refs.py
python
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
22 days ago
| 1 | """muse.core.refs — ref and HEAD management for the Muse VCS. |
| 2 | |
| 3 | Every place in Muse that reads or writes ``.muse/refs/`` or ``.muse/HEAD`` |
| 4 | should be built on the primitives exported here. |
| 5 | |
| 6 | Public API |
| 7 | ---------- |
| 8 | read_ref |
| 9 | Read a single ref file and return the commit ID, or ``None`` when the file |
| 10 | is absent, empty, or unreadable. The canonical primitive for all ref-file |
| 11 | I/O. |
| 12 | |
| 13 | iter_branch_refs |
| 14 | Generator that yields ``(branch_name, commit_id)`` pairs for every |
| 15 | non-empty, non-symlink ref file under ``.muse/refs/heads/``. The |
| 16 | canonical primitive for iterating local branch heads. |
| 17 | |
| 18 | write_branch_ref |
| 19 | Atomically (optionally CAS) update a branch tip ref. |
| 20 | |
| 21 | RefConflictError |
| 22 | Raised when a CAS write finds the ref has advanced concurrently. |
| 23 | |
| 24 | SymbolicHead / DetachedHead / HeadState |
| 25 | Typed representations of ``.muse/HEAD`` content. |
| 26 | |
| 27 | read_head / read_current_branch / write_head_branch / write_head_commit |
| 28 | Typed HEAD I/O. |
| 29 | |
| 30 | get_head_commit_id / resolve_any_ref / get_all_branch_heads |
| 31 | Higher-level ref resolution helpers. |
| 32 | |
| 33 | Design notes |
| 34 | ------------ |
| 35 | * ``read_ref`` is intentionally minimal — it does one thing and handles the |
| 36 | two failure modes (missing file, empty content) that appear everywhere. |
| 37 | * ``iter_branch_refs`` skips symlinks so a crafted symlink inside |
| 38 | ``.muse/refs/heads/`` cannot escape the repository root. |
| 39 | * Neither low-level function validates the commit ID format — callers that |
| 40 | need validation should call ``validate_object_id`` from ``muse.core.types`` |
| 41 | after receiving the value. Separating read from validate keeps this |
| 42 | module dependency-free (no circular imports). |
| 43 | """ |
| 44 | from __future__ import annotations |
| 45 | |
| 46 | import fcntl |
| 47 | import pathlib |
| 48 | import re |
| 49 | from collections.abc import Iterator |
| 50 | from typing import Literal, TypedDict |
| 51 | |
| 52 | from muse.core.io import write_text_atomic |
| 53 | from muse.core.paths import heads_dir as _heads_dir, head_path as _head_path, ref_path as _ref_path, remotes_dir as _remotes_dir |
| 54 | from muse.core.types import BranchHeads |
| 55 | from muse.core.validation import validate_branch_name |
| 56 | |
| 57 | _SENTINEL = object() # distinct from None — distinguishes "not provided" from "no prior ref" |
| 58 | |
| 59 | # --------------------------------------------------------------------------- |
| 60 | # RefConflictError |
| 61 | # --------------------------------------------------------------------------- |
| 62 | |
| 63 | class RefConflictError(Exception): |
| 64 | """Raised when a compare-and-swap ref write fails because the current ref |
| 65 | value does not match the expected value — another writer advanced the branch |
| 66 | between the caller's parent-read and this write attempt.""" |
| 67 | |
| 68 | # --------------------------------------------------------------------------- |
| 69 | # read_ref |
| 70 | # --------------------------------------------------------------------------- |
| 71 | |
| 72 | def read_ref(path: pathlib.Path) -> str | None: |
| 73 | """Read a ref file and return the commit ID string, or ``None``. |
| 74 | |
| 75 | Returns ``None`` when: |
| 76 | - The file does not exist. |
| 77 | - The file is empty or contains only whitespace. |
| 78 | - The file cannot be read (``PermissionError``, ``OSError``). |
| 79 | |
| 80 | Parameters |
| 81 | ---------- |
| 82 | path: |
| 83 | Absolute path to the ref file (e.g. |
| 84 | ``repo / ".muse" / "refs" / "heads" / "main"``). |
| 85 | |
| 86 | Returns |
| 87 | ------- |
| 88 | str | None |
| 89 | The stripped commit ID string (e.g. ``"sha256:abc…"``), or ``None``. |
| 90 | |
| 91 | Examples |
| 92 | -------- |
| 93 | Read the tip of the ``main`` branch:: |
| 94 | |
| 95 | from muse.core.refs import read_ref |
| 96 | cid = read_ref(repo / ".muse" / "refs" / "heads" / "main") |
| 97 | if cid is None: |
| 98 | print("branch is empty or does not exist") |
| 99 | """ |
| 100 | try: |
| 101 | raw = path.read_text(encoding="utf-8", errors="strict").strip() |
| 102 | if not raw: |
| 103 | return None |
| 104 | if not raw.startswith("sha256:"): |
| 105 | return None |
| 106 | return raw |
| 107 | except (FileNotFoundError, PermissionError, OSError, UnicodeDecodeError): |
| 108 | return None |
| 109 | |
| 110 | # --------------------------------------------------------------------------- |
| 111 | # iter_branch_refs |
| 112 | # --------------------------------------------------------------------------- |
| 113 | |
| 114 | def iter_branch_refs(repo_root: pathlib.Path) -> Iterator[tuple[str, str]]: |
| 115 | """Yield ``(branch_name, commit_id)`` for every local branch ref. |
| 116 | |
| 117 | This is the **canonical branch-ref walker** for all of Muse. All other |
| 118 | iteration over ``.muse/refs/heads/`` should be built on top of this |
| 119 | function. |
| 120 | |
| 121 | Parameters |
| 122 | ---------- |
| 123 | repo_root: |
| 124 | Repository root directory (contains ``.muse/``). |
| 125 | |
| 126 | Yields |
| 127 | ------ |
| 128 | tuple[str, str] |
| 129 | ``(branch_name, commit_id)`` pairs, where *branch_name* is the plain |
| 130 | file name (e.g. ``"main"``, ``"dev"``, ``"feat/login"``) and |
| 131 | *commit_id* is the non-empty commit ID string read from the ref file. |
| 132 | |
| 133 | Notes |
| 134 | ----- |
| 135 | * Symlinks are skipped — a crafted symlink inside ``.muse/refs/heads/`` |
| 136 | cannot be used to read arbitrary files outside the repository. |
| 137 | * Subdirectories are skipped (hierarchical ref namespaces are not yet |
| 138 | supported; only flat branch names are yielded). |
| 139 | * Empty or unreadable ref files are silently skipped. |
| 140 | * Missing ``.muse/refs/heads/`` directory yields nothing rather than |
| 141 | raising an exception. |
| 142 | |
| 143 | Examples |
| 144 | -------- |
| 145 | Collect all branch tips:: |
| 146 | |
| 147 | tips = {name: cid for name, cid in iter_branch_refs(repo_root)} |
| 148 | |
| 149 | Feed all tips into a multi-source BFS:: |
| 150 | |
| 151 | from muse.core.graph import iter_ancestors |
| 152 | tips = [cid for _, cid in iter_branch_refs(repo_root)] |
| 153 | for commit in iter_ancestors(repo_root, tips): |
| 154 | process(commit) |
| 155 | """ |
| 156 | heads_dir = _heads_dir(repo_root) |
| 157 | if not heads_dir.is_dir(): |
| 158 | return |
| 159 | for ref_file in heads_dir.rglob("*"): |
| 160 | if ref_file.is_symlink() or not ref_file.is_file(): |
| 161 | continue |
| 162 | branch_name = ref_file.relative_to(heads_dir).as_posix() |
| 163 | commit_id = read_ref(ref_file) |
| 164 | if commit_id: |
| 165 | yield branch_name, commit_id |
| 166 | |
| 167 | # --------------------------------------------------------------------------- |
| 168 | # write_branch_ref |
| 169 | # --------------------------------------------------------------------------- |
| 170 | |
| 171 | def write_branch_ref( |
| 172 | repo_root: pathlib.Path, |
| 173 | branch: str, |
| 174 | commit_id: str, |
| 175 | *, |
| 176 | expected_id: str | None = _SENTINEL, |
| 177 | ) -> None: |
| 178 | """Atomically update the branch tip pointer in ``.muse/refs/heads/<branch>``. |
| 179 | |
| 180 | This is the **canonical** way to advance a branch ref. All commands that |
| 181 | record a new commit on a branch — ``commit``, ``merge``, ``cherry-pick``, |
| 182 | ``revert``, ``reset``, ``pull``, ``rebase`` — must call this function |
| 183 | rather than writing the ref file directly. |
| 184 | |
| 185 | Using a bare ``path.write_text()`` is forbidden for ref files because: |
| 186 | * It is not atomic — a crash mid-write leaves a zero-length or partial file, |
| 187 | orphaning all commits reachable only from this branch. |
| 188 | * It is not fsynced — a power loss after the write syscall returns but |
| 189 | before the page cache is flushed produces the same corruption. |
| 190 | |
| 191 | When *expected_id* is provided, the write is a compare-and-swap: it only |
| 192 | proceeds if the current ref value matches *expected_id*. Pass the |
| 193 | ``parent_id`` read at the start of a commit so concurrent advances of the |
| 194 | branch are detected and surfaced as ``RefConflictError`` rather than |
| 195 | silently orphaning a commit. Pass ``None`` to assert that no prior ref |
| 196 | exists (first commit on a new branch). Omit entirely for an unconditional |
| 197 | write (legacy callers — prefer providing expected_id). |
| 198 | |
| 199 | Args: |
| 200 | repo_root: Repository root (parent of ``.muse/``). |
| 201 | branch: Branch name; validated before use. |
| 202 | commit_id: New tip commit ID. |
| 203 | expected_id: Current ref value the caller observed. When provided, |
| 204 | raises ``RefConflictError`` if the ref has changed. |
| 205 | |
| 206 | Raises: |
| 207 | ValueError: If *branch* or *commit_id* is invalid. |
| 208 | RefConflictError: If *expected_id* is provided and the current ref |
| 209 | does not match — another writer advanced the branch. |
| 210 | """ |
| 211 | validate_branch_name(branch) |
| 212 | if not re.fullmatch(r"sha256:[0-9a-f]{64}", commit_id): |
| 213 | raise ValueError(f"commit_id must be 'sha256:<64 hex chars>', got: {commit_id!r}") |
| 214 | ref_file = _ref_path(repo_root, branch) |
| 215 | |
| 216 | if expected_id is not _SENTINEL: |
| 217 | lock_path = ref_file.with_suffix(".lock") |
| 218 | lock_path.parent.mkdir(parents=True, exist_ok=True) |
| 219 | with open(lock_path, "w") as _lock_fh: |
| 220 | fcntl.flock(_lock_fh.fileno(), fcntl.LOCK_EX) |
| 221 | try: |
| 222 | current = read_ref(ref_file) |
| 223 | if current != expected_id: |
| 224 | raise RefConflictError( |
| 225 | f"Branch '{branch}' has moved concurrently. " |
| 226 | f"Expected ref {expected_id if expected_id else 'None'}, " |
| 227 | f"found {current if current else 'None'}. " |
| 228 | "Pull the latest changes and retry your commit." |
| 229 | ) |
| 230 | write_text_atomic(ref_file, commit_id) |
| 231 | finally: |
| 232 | lock_path.unlink(missing_ok=True) |
| 233 | return |
| 234 | |
| 235 | write_text_atomic(ref_file, commit_id) |
| 236 | |
| 237 | # --------------------------------------------------------------------------- |
| 238 | # HEAD file — typed I/O |
| 239 | # --------------------------------------------------------------------------- |
| 240 | # |
| 241 | # Muse HEAD format |
| 242 | # ---------------- |
| 243 | # The ``.muse/HEAD`` file is always one of two self-describing forms: |
| 244 | # |
| 245 | # ref: refs/heads/<branch> — symbolic ref; HEAD points to a branch |
| 246 | # commit: <sha256> — detached HEAD; HEAD points to a commit |
| 247 | # |
| 248 | # The ``ref:`` prefix is adopted from Git because it is the right design: |
| 249 | # a file that can hold two semantically different things should say which |
| 250 | # one it holds. The ``commit:`` prefix for detached HEAD is a Muse |
| 251 | # extension — Git uses a bare SHA, which is ambiguous (SHA-1? SHA-256?). |
| 252 | # Muse makes the hash algorithm implicit in the prefix, leaving the door |
| 253 | # open for future algorithm identifiers without changing the parsing rule. |
| 254 | # |
| 255 | # There is no backward-compatibility layer; every write site uses |
| 256 | # ``write_head_branch`` / ``write_head_commit`` and every read site uses |
| 257 | # ``read_head`` / ``read_current_branch``. |
| 258 | |
| 259 | class SymbolicHead(TypedDict): |
| 260 | """HEAD points to a named branch.""" |
| 261 | |
| 262 | kind: Literal["branch"] |
| 263 | branch: str |
| 264 | |
| 265 | class DetachedHead(TypedDict): |
| 266 | """HEAD points directly to a commit (detached HEAD state).""" |
| 267 | |
| 268 | kind: Literal["commit"] |
| 269 | commit_id: str |
| 270 | |
| 271 | HeadState = SymbolicHead | DetachedHead |
| 272 | |
| 273 | def read_head(repo_root: pathlib.Path) -> HeadState: |
| 274 | """Parse ``.muse/HEAD`` and return a typed :data:`HeadState`. |
| 275 | |
| 276 | Raises :exc:`ValueError` for any content that does not match the two |
| 277 | expected forms, and when the HEAD file does not exist (uninitialised or |
| 278 | corrupt repository), so callers never receive an ambiguous raw string or |
| 279 | an unhandled :exc:`FileNotFoundError`. |
| 280 | """ |
| 281 | head_path = _head_path(repo_root) |
| 282 | try: |
| 283 | raw = head_path.read_text(encoding="utf-8").strip() |
| 284 | except FileNotFoundError: |
| 285 | raise ValueError( |
| 286 | f"Repository HEAD file missing: {head_path}\n" |
| 287 | "The repository may be uninitialised. Run 'muse init' to fix it." |
| 288 | ) |
| 289 | if raw.startswith("ref: refs/heads/"): |
| 290 | branch = raw.removeprefix("ref: refs/heads/").strip() |
| 291 | validate_branch_name(branch) |
| 292 | return SymbolicHead(kind="branch", branch=branch) |
| 293 | if raw.startswith("commit: "): |
| 294 | commit_id = raw.removeprefix("commit: ").strip() |
| 295 | if not re.fullmatch(r"sha256:[0-9a-f]{64}", commit_id): |
| 296 | raise ValueError(f"Malformed commit ID in HEAD: {commit_id!r}") |
| 297 | return DetachedHead(kind="commit", commit_id=commit_id) |
| 298 | raise ValueError( |
| 299 | f"Malformed HEAD: {raw!r}. " |
| 300 | "Expected 'ref: refs/heads/<branch>' or 'commit: <sha256>'." |
| 301 | ) |
| 302 | |
| 303 | def read_current_branch(repo_root: pathlib.Path) -> str: |
| 304 | """Return the currently checked-out branch name. |
| 305 | |
| 306 | Raises :exc:`ValueError` when the repository is in detached HEAD state |
| 307 | so callers that cannot operate without a branch get a clear error |
| 308 | rather than silently receiving a commit ID as a branch name. |
| 309 | """ |
| 310 | state = read_head(repo_root) |
| 311 | if state["kind"] != "branch": |
| 312 | raise ValueError( |
| 313 | "Repository is in detached HEAD state. " |
| 314 | "Run 'muse checkout <branch>' to return to a branch." |
| 315 | ) |
| 316 | return state["branch"] |
| 317 | |
| 318 | def write_head_branch(repo_root: pathlib.Path, branch: str) -> None: |
| 319 | """Write a symbolic ref to ``.muse/HEAD`` atomically. |
| 320 | |
| 321 | Format: ``ref: refs/heads/<branch>`` — self-describing; the ``ref:`` |
| 322 | prefix unambiguously identifies the entry as a symbolic reference. |
| 323 | |
| 324 | Uses :func:`write_text_atomic` so a crash or power loss during ``muse |
| 325 | checkout`` or ``muse init`` cannot corrupt or zero-out HEAD. |
| 326 | """ |
| 327 | validate_branch_name(branch) |
| 328 | write_text_atomic(_head_path(repo_root), f"ref: refs/heads/{branch}\n") |
| 329 | |
| 330 | def write_head_commit(repo_root: pathlib.Path, commit_id: str) -> None: |
| 331 | """Write a direct commit reference to ``.muse/HEAD`` atomically (detached HEAD). |
| 332 | |
| 333 | Format: ``commit: <sha256>`` — the ``commit:`` prefix is a Muse |
| 334 | extension that makes the entry self-describing in all states. Unlike |
| 335 | Git (which stores a bare hash), this makes the hash type explicit and |
| 336 | leaves room for future algorithm prefixes without parsing heuristics. |
| 337 | |
| 338 | Uses :func:`write_text_atomic` so a crash or power loss cannot zero-out HEAD. |
| 339 | """ |
| 340 | if not re.fullmatch(r"sha256:[0-9a-f]{64}", commit_id): |
| 341 | raise ValueError(f"commit_id must be 'sha256:<64 hex chars>', got: {commit_id!r}") |
| 342 | write_text_atomic(_head_path(repo_root), f"commit: {commit_id}\n") |
| 343 | |
| 344 | # --------------------------------------------------------------------------- |
| 345 | # Higher-level ref resolution helpers |
| 346 | # --------------------------------------------------------------------------- |
| 347 | |
| 348 | def get_head_commit_id(repo_root: pathlib.Path, branch: str) -> str | None: |
| 349 | """Return the commit ID at HEAD of *branch*, or ``None`` for an empty branch.""" |
| 350 | validate_branch_name(branch) |
| 351 | return read_ref(_ref_path(repo_root, branch)) |
| 352 | |
| 353 | def resolve_any_ref(repo_root: pathlib.Path, ref: str) -> str | None: |
| 354 | """Resolve *ref* to a commit ID, checking local branches then remote tracking refs. |
| 355 | |
| 356 | Handles both plain branch names (``main``) and remote-tracking ref syntax |
| 357 | (``origin/main``, ``remotes/origin/main``). Returns ``None`` when the ref |
| 358 | cannot be resolved. |
| 359 | """ |
| 360 | # 1. Try as a local branch name first. |
| 361 | try: |
| 362 | cid = get_head_commit_id(repo_root, ref) |
| 363 | if cid is not None: |
| 364 | return cid |
| 365 | except (ValueError, OSError): |
| 366 | pass |
| 367 | |
| 368 | # 2. Try remote tracking ref. Accepts two formats: |
| 369 | # "origin/main" → .muse/remotes/origin/main |
| 370 | # "remotes/origin/main" → .muse/remotes/origin/main |
| 371 | parts = ref.split("/") |
| 372 | if len(parts) >= 2: |
| 373 | if parts[0] == "remotes": |
| 374 | parts = parts[1:] |
| 375 | if len(parts) >= 2: |
| 376 | remote = parts[0] |
| 377 | branch = "/".join(parts[1:]) |
| 378 | tracking_path = _remotes_dir(repo_root) / remote / branch |
| 379 | cid = read_ref(tracking_path) |
| 380 | if cid: |
| 381 | return cid |
| 382 | |
| 383 | return None |
| 384 | |
| 385 | def get_all_branch_heads(repo_root: pathlib.Path) -> BranchHeads: |
| 386 | """Return a mapping of branch name → commit ID for every branch in *repo_root*. |
| 387 | |
| 388 | Reads all ref files under ``.muse/refs/heads/``. Branches whose ref file |
| 389 | is empty or contains an invalid commit ID are silently skipped. |
| 390 | |
| 391 | Args: |
| 392 | repo_root: Repository root directory (contains ``.muse/``). |
| 393 | |
| 394 | Returns: |
| 395 | ``{branch_name: commit_id}`` for every non-empty branch ref. |
| 396 | """ |
| 397 | return dict(iter_branch_refs(repo_root)) |
File History
4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
22 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
30 days ago