gabriel / muse public
refs.py python
397 lines 15.5 KB
Raw
1 """muse.core.refs — ref and HEAD management for the Muse VCS.
2
3 Every place in Muse that reads or writes ``.muse/refs/`` or ``.muse/HEAD``
4 should be built on the primitives exported here.
5
6 Public API
7 ----------
8 read_ref
9 Read a single ref file and return the commit ID, or ``None`` when the file
10 is absent, empty, or unreadable. The canonical primitive for all ref-file
11 I/O.
12
13 iter_branch_refs
14 Generator that yields ``(branch_name, commit_id)`` pairs for every
15 non-empty, non-symlink ref file under ``.muse/refs/heads/``. The
16 canonical primitive for iterating local branch heads.
17
18 write_branch_ref
19 Atomically (optionally CAS) update a branch tip ref.
20
21 RefConflictError
22 Raised when a CAS write finds the ref has advanced concurrently.
23
24 SymbolicHead / DetachedHead / HeadState
25 Typed representations of ``.muse/HEAD`` content.
26
27 read_head / read_current_branch / write_head_branch / write_head_commit
28 Typed HEAD I/O.
29
30 get_head_commit_id / resolve_any_ref / get_all_branch_heads
31 Higher-level ref resolution helpers.
32
33 Design notes
34 ------------
35 * ``read_ref`` is intentionally minimal — it does one thing and handles the
36 two failure modes (missing file, empty content) that appear everywhere.
37 * ``iter_branch_refs`` skips symlinks so a crafted symlink inside
38 ``.muse/refs/heads/`` cannot escape the repository root.
39 * Neither low-level function validates the commit ID format — callers that
40 need validation should call ``validate_object_id`` from ``muse.core.types``
41 after receiving the value. Separating read from validate keeps this
42 module dependency-free (no circular imports).
43 """
44 from __future__ import annotations
45
46 import fcntl
47 import pathlib
48 import re
49 from collections.abc import Iterator
50 from typing import Literal, TypedDict
51
52 from muse.core.io import write_text_atomic
53 from muse.core.paths import heads_dir as _heads_dir, head_path as _head_path, ref_path as _ref_path, remotes_dir as _remotes_dir
54 from muse.core.types import BranchHeads
55 from muse.core.validation import validate_branch_name
56
57 _SENTINEL = object() # distinct from None — distinguishes "not provided" from "no prior ref"
58
59 # ---------------------------------------------------------------------------
60 # RefConflictError
61 # ---------------------------------------------------------------------------
62
63 class RefConflictError(Exception):
64 """Raised when a compare-and-swap ref write fails because the current ref
65 value does not match the expected value — another writer advanced the branch
66 between the caller's parent-read and this write attempt."""
67
68 # ---------------------------------------------------------------------------
69 # read_ref
70 # ---------------------------------------------------------------------------
71
72 def read_ref(path: pathlib.Path) -> str | None:
73 """Read a ref file and return the commit ID string, or ``None``.
74
75 Returns ``None`` when:
76 - The file does not exist.
77 - The file is empty or contains only whitespace.
78 - The file cannot be read (``PermissionError``, ``OSError``).
79
80 Parameters
81 ----------
82 path:
83 Absolute path to the ref file (e.g.
84 ``repo / ".muse" / "refs" / "heads" / "main"``).
85
86 Returns
87 -------
88 str | None
89 The stripped commit ID string (e.g. ``"sha256:abc…"``), or ``None``.
90
91 Examples
92 --------
93 Read the tip of the ``main`` branch::
94
95 from muse.core.refs import read_ref
96 cid = read_ref(repo / ".muse" / "refs" / "heads" / "main")
97 if cid is None:
98 print("branch is empty or does not exist")
99 """
100 try:
101 raw = path.read_text(encoding="utf-8", errors="strict").strip()
102 if not raw:
103 return None
104 if not raw.startswith("sha256:"):
105 return None
106 return raw
107 except (FileNotFoundError, PermissionError, OSError, UnicodeDecodeError):
108 return None
109
110 # ---------------------------------------------------------------------------
111 # iter_branch_refs
112 # ---------------------------------------------------------------------------
113
114 def iter_branch_refs(repo_root: pathlib.Path) -> Iterator[tuple[str, str]]:
115 """Yield ``(branch_name, commit_id)`` for every local branch ref.
116
117 This is the **canonical branch-ref walker** for all of Muse. All other
118 iteration over ``.muse/refs/heads/`` should be built on top of this
119 function.
120
121 Parameters
122 ----------
123 repo_root:
124 Repository root directory (contains ``.muse/``).
125
126 Yields
127 ------
128 tuple[str, str]
129 ``(branch_name, commit_id)`` pairs, where *branch_name* is the plain
130 file name (e.g. ``"main"``, ``"dev"``, ``"feat/login"``) and
131 *commit_id* is the non-empty commit ID string read from the ref file.
132
133 Notes
134 -----
135 * Symlinks are skipped — a crafted symlink inside ``.muse/refs/heads/``
136 cannot be used to read arbitrary files outside the repository.
137 * Subdirectories are skipped (hierarchical ref namespaces are not yet
138 supported; only flat branch names are yielded).
139 * Empty or unreadable ref files are silently skipped.
140 * Missing ``.muse/refs/heads/`` directory yields nothing rather than
141 raising an exception.
142
143 Examples
144 --------
145 Collect all branch tips::
146
147 tips = {name: cid for name, cid in iter_branch_refs(repo_root)}
148
149 Feed all tips into a multi-source BFS::
150
151 from muse.core.graph import iter_ancestors
152 tips = [cid for _, cid in iter_branch_refs(repo_root)]
153 for commit in iter_ancestors(repo_root, tips):
154 process(commit)
155 """
156 heads_dir = _heads_dir(repo_root)
157 if not heads_dir.is_dir():
158 return
159 for ref_file in heads_dir.rglob("*"):
160 if ref_file.is_symlink() or not ref_file.is_file():
161 continue
162 branch_name = ref_file.relative_to(heads_dir).as_posix()
163 commit_id = read_ref(ref_file)
164 if commit_id:
165 yield branch_name, commit_id
166
167 # ---------------------------------------------------------------------------
168 # write_branch_ref
169 # ---------------------------------------------------------------------------
170
171 def write_branch_ref(
172 repo_root: pathlib.Path,
173 branch: str,
174 commit_id: str,
175 *,
176 expected_id: str | None = _SENTINEL,
177 ) -> None:
178 """Atomically update the branch tip pointer in ``.muse/refs/heads/<branch>``.
179
180 This is the **canonical** way to advance a branch ref. All commands that
181 record a new commit on a branch — ``commit``, ``merge``, ``cherry-pick``,
182 ``revert``, ``reset``, ``pull``, ``rebase`` — must call this function
183 rather than writing the ref file directly.
184
185 Using a bare ``path.write_text()`` is forbidden for ref files because:
186 * It is not atomic — a crash mid-write leaves a zero-length or partial file,
187 orphaning all commits reachable only from this branch.
188 * It is not fsynced — a power loss after the write syscall returns but
189 before the page cache is flushed produces the same corruption.
190
191 When *expected_id* is provided, the write is a compare-and-swap: it only
192 proceeds if the current ref value matches *expected_id*. Pass the
193 ``parent_id`` read at the start of a commit so concurrent advances of the
194 branch are detected and surfaced as ``RefConflictError`` rather than
195 silently orphaning a commit. Pass ``None`` to assert that no prior ref
196 exists (first commit on a new branch). Omit entirely for an unconditional
197 write (legacy callers — prefer providing expected_id).
198
199 Args:
200 repo_root: Repository root (parent of ``.muse/``).
201 branch: Branch name; validated before use.
202 commit_id: New tip commit ID.
203 expected_id: Current ref value the caller observed. When provided,
204 raises ``RefConflictError`` if the ref has changed.
205
206 Raises:
207 ValueError: If *branch* or *commit_id* is invalid.
208 RefConflictError: If *expected_id* is provided and the current ref
209 does not match — another writer advanced the branch.
210 """
211 validate_branch_name(branch)
212 if not re.fullmatch(r"sha256:[0-9a-f]{64}", commit_id):
213 raise ValueError(f"commit_id must be 'sha256:<64 hex chars>', got: {commit_id!r}")
214 ref_file = _ref_path(repo_root, branch)
215
216 if expected_id is not _SENTINEL:
217 lock_path = ref_file.with_suffix(".lock")
218 lock_path.parent.mkdir(parents=True, exist_ok=True)
219 with open(lock_path, "w") as _lock_fh:
220 fcntl.flock(_lock_fh.fileno(), fcntl.LOCK_EX)
221 try:
222 current = read_ref(ref_file)
223 if current != expected_id:
224 raise RefConflictError(
225 f"Branch '{branch}' has moved concurrently. "
226 f"Expected ref {expected_id if expected_id else 'None'}, "
227 f"found {current if current else 'None'}. "
228 "Pull the latest changes and retry your commit."
229 )
230 write_text_atomic(ref_file, commit_id)
231 finally:
232 lock_path.unlink(missing_ok=True)
233 return
234
235 write_text_atomic(ref_file, commit_id)
236
237 # ---------------------------------------------------------------------------
238 # HEAD file — typed I/O
239 # ---------------------------------------------------------------------------
240 #
241 # Muse HEAD format
242 # ----------------
243 # The ``.muse/HEAD`` file is always one of two self-describing forms:
244 #
245 # ref: refs/heads/<branch> — symbolic ref; HEAD points to a branch
246 # commit: <sha256> — detached HEAD; HEAD points to a commit
247 #
248 # The ``ref:`` prefix is adopted from Git because it is the right design:
249 # a file that can hold two semantically different things should say which
250 # one it holds. The ``commit:`` prefix for detached HEAD is a Muse
251 # extension — Git uses a bare SHA, which is ambiguous (SHA-1? SHA-256?).
252 # Muse makes the hash algorithm implicit in the prefix, leaving the door
253 # open for future algorithm identifiers without changing the parsing rule.
254 #
255 # There is no backward-compatibility layer; every write site uses
256 # ``write_head_branch`` / ``write_head_commit`` and every read site uses
257 # ``read_head`` / ``read_current_branch``.
258
259 class SymbolicHead(TypedDict):
260 """HEAD points to a named branch."""
261
262 kind: Literal["branch"]
263 branch: str
264
265 class DetachedHead(TypedDict):
266 """HEAD points directly to a commit (detached HEAD state)."""
267
268 kind: Literal["commit"]
269 commit_id: str
270
271 HeadState = SymbolicHead | DetachedHead
272
273 def read_head(repo_root: pathlib.Path) -> HeadState:
274 """Parse ``.muse/HEAD`` and return a typed :data:`HeadState`.
275
276 Raises :exc:`ValueError` for any content that does not match the two
277 expected forms, and when the HEAD file does not exist (uninitialised or
278 corrupt repository), so callers never receive an ambiguous raw string or
279 an unhandled :exc:`FileNotFoundError`.
280 """
281 head_path = _head_path(repo_root)
282 try:
283 raw = head_path.read_text(encoding="utf-8").strip()
284 except FileNotFoundError:
285 raise ValueError(
286 f"Repository HEAD file missing: {head_path}\n"
287 "The repository may be uninitialised. Run 'muse init' to fix it."
288 )
289 if raw.startswith("ref: refs/heads/"):
290 branch = raw.removeprefix("ref: refs/heads/").strip()
291 validate_branch_name(branch)
292 return SymbolicHead(kind="branch", branch=branch)
293 if raw.startswith("commit: "):
294 commit_id = raw.removeprefix("commit: ").strip()
295 if not re.fullmatch(r"sha256:[0-9a-f]{64}", commit_id):
296 raise ValueError(f"Malformed commit ID in HEAD: {commit_id!r}")
297 return DetachedHead(kind="commit", commit_id=commit_id)
298 raise ValueError(
299 f"Malformed HEAD: {raw!r}. "
300 "Expected 'ref: refs/heads/<branch>' or 'commit: <sha256>'."
301 )
302
303 def read_current_branch(repo_root: pathlib.Path) -> str:
304 """Return the currently checked-out branch name.
305
306 Raises :exc:`ValueError` when the repository is in detached HEAD state
307 so callers that cannot operate without a branch get a clear error
308 rather than silently receiving a commit ID as a branch name.
309 """
310 state = read_head(repo_root)
311 if state["kind"] != "branch":
312 raise ValueError(
313 "Repository is in detached HEAD state. "
314 "Run 'muse checkout <branch>' to return to a branch."
315 )
316 return state["branch"]
317
318 def write_head_branch(repo_root: pathlib.Path, branch: str) -> None:
319 """Write a symbolic ref to ``.muse/HEAD`` atomically.
320
321 Format: ``ref: refs/heads/<branch>`` — self-describing; the ``ref:``
322 prefix unambiguously identifies the entry as a symbolic reference.
323
324 Uses :func:`write_text_atomic` so a crash or power loss during ``muse
325 checkout`` or ``muse init`` cannot corrupt or zero-out HEAD.
326 """
327 validate_branch_name(branch)
328 write_text_atomic(_head_path(repo_root), f"ref: refs/heads/{branch}\n")
329
330 def write_head_commit(repo_root: pathlib.Path, commit_id: str) -> None:
331 """Write a direct commit reference to ``.muse/HEAD`` atomically (detached HEAD).
332
333 Format: ``commit: <sha256>`` — the ``commit:`` prefix is a Muse
334 extension that makes the entry self-describing in all states. Unlike
335 Git (which stores a bare hash), this makes the hash type explicit and
336 leaves room for future algorithm prefixes without parsing heuristics.
337
338 Uses :func:`write_text_atomic` so a crash or power loss cannot zero-out HEAD.
339 """
340 if not re.fullmatch(r"sha256:[0-9a-f]{64}", commit_id):
341 raise ValueError(f"commit_id must be 'sha256:<64 hex chars>', got: {commit_id!r}")
342 write_text_atomic(_head_path(repo_root), f"commit: {commit_id}\n")
343
344 # ---------------------------------------------------------------------------
345 # Higher-level ref resolution helpers
346 # ---------------------------------------------------------------------------
347
348 def get_head_commit_id(repo_root: pathlib.Path, branch: str) -> str | None:
349 """Return the commit ID at HEAD of *branch*, or ``None`` for an empty branch."""
350 validate_branch_name(branch)
351 return read_ref(_ref_path(repo_root, branch))
352
353 def resolve_any_ref(repo_root: pathlib.Path, ref: str) -> str | None:
354 """Resolve *ref* to a commit ID, checking local branches then remote tracking refs.
355
356 Handles both plain branch names (``main``) and remote-tracking ref syntax
357 (``origin/main``, ``remotes/origin/main``). Returns ``None`` when the ref
358 cannot be resolved.
359 """
360 # 1. Try as a local branch name first.
361 try:
362 cid = get_head_commit_id(repo_root, ref)
363 if cid is not None:
364 return cid
365 except (ValueError, OSError):
366 pass
367
368 # 2. Try remote tracking ref. Accepts two formats:
369 # "origin/main" → .muse/remotes/origin/main
370 # "remotes/origin/main" → .muse/remotes/origin/main
371 parts = ref.split("/")
372 if len(parts) >= 2:
373 if parts[0] == "remotes":
374 parts = parts[1:]
375 if len(parts) >= 2:
376 remote = parts[0]
377 branch = "/".join(parts[1:])
378 tracking_path = _remotes_dir(repo_root) / remote / branch
379 cid = read_ref(tracking_path)
380 if cid:
381 return cid
382
383 return None
384
385 def get_all_branch_heads(repo_root: pathlib.Path) -> BranchHeads:
386 """Return a mapping of branch name → commit ID for every branch in *repo_root*.
387
388 Reads all ref files under ``.muse/refs/heads/``. Branches whose ref file
389 is empty or contains an invalid commit ID are silently skipped.
390
391 Args:
392 repo_root: Repository root directory (contains ``.muse/``).
393
394 Returns:
395 ``{branch_name: commit_id}`` for every non-empty branch ref.
396 """
397 return dict(iter_branch_refs(repo_root))
File History 1 commit