gabriel / muse public
stage.py python
214 lines 7.0 KB
Raw
sha256:a154bc65916614c833d5a40a10d81ba3eae0d0495b0afddd34dc34f18d5e91b8 fix: test suite alignment and typing audit — zero violations Sonnet 4.6 minor ⚠ breaking 22 days ago
1 """Code-domain staging index — ``muse code add`` persistence layer.
2
3 The staging index lives at ``.muse/code/stage.json``. It records which
4 files the user has explicitly staged for the next ``muse commit``, along
5 with the content-addressed object ID of each staged version.
6
7 Format (UTF-8 JSON)::
8
9 {
10 "version": 3,
11 "entries": {
12 "src/auth.py": {
13 "object_id": "<sha256>",
14 "mode": "M",
15 "staged_at": "2026-03-21T14:32:00+00:00"
16 }
17 }
18 }
19
20 Modes:
21
22 - ``"A"`` — added (file is new; not in the previous commit)
23 - ``"M"`` — modified (file exists in the previous commit)
24 - ``"D"`` — deleted (file will be removed from the next commit)
25 """
26
27 import json as _json
28 import logging
29 import os
30 import pathlib
31 import tempfile
32 from collections.abc import Mapping
33 from typing import Literal, TypedDict
34
35 from muse.core.types import blob_id, now_utc_iso
36 from muse.core.paths import code_stage_path as _code_stage_path
37
38 # Canonical object ID for an empty directory — sha256 of zero bytes.
39 # Every tracked empty directory uses this object ID in the stage index,
40 # making directories proper content-addressed objects (all share one object).
41 EMPTY_DIR_OID: str = blob_id(b"")
42
43 # Legacy sentinel written by older Muse versions — migrated on read.
44 _DIR_SENTINEL_LEGACY = "dir:"
45
46 type StagedFileMap = dict[str, "StagedEntry"]
47
48 logger = logging.getLogger(__name__)
49
50 class StagedEntry(TypedDict):
51 """One file's staging record."""
52
53 object_id: str
54 mode: Literal["A", "M", "D"]
55 staged_at: str # ISO-8601
56
57 class StageIndex(TypedDict):
58 """Full contents of the stage index (JSON on disk)."""
59
60 version: int
61 entries: StagedFileMap
62 dir_renames: dict[str, str] # old_dir_path → new_dir_path (no trailing slash)
63
64 _STAGE_VERSION = 3
65
66 def stage_path(root: pathlib.Path) -> pathlib.Path:
67 """Return the absolute path to ``.muse/code/stage.json``."""
68 return _code_stage_path(root)
69
70 def read_stage(root: pathlib.Path) -> StagedFileMap:
71 """Read the stage index from disk.
72
73 Returns an empty dict when no stage exists (fresh repo or after a commit).
74 Returns an empty dict and logs a warning when the file is corrupt or contains
75 old binary msgpack content — this is a safe reset; the user re-stages.
76 """
77 path = stage_path(root)
78
79 if not path.exists():
80 return {}
81
82 try:
83 raw = path.read_bytes()
84 # Msgpack first byte is always 0x80–0xFF (fixmap, fixarray, fixstr, etc.)
85 # for any dict/list/string. If the file starts with such a byte, it's
86 # old binary msgpack — treat as stale and return empty.
87 if raw and raw[0] > 0x7F:
88 logger.warning(
89 "⚠️ Stage index at %s contains old binary format — "
90 "clearing stage. Run 'muse code add' again to re-stage.",
91 path,
92 )
93 path.unlink(missing_ok=True)
94 return {}
95
96 data = _json.loads(raw.decode("utf-8"))
97 if not isinstance(data, dict):
98 return {}
99
100 raw_entries_obj = data.get("entries")
101 raw_entries: Mapping[str, object] = raw_entries_obj if isinstance(raw_entries_obj, dict) else {}
102 entries: StagedFileMap = {}
103 for k, v in raw_entries.items():
104 if not isinstance(v, dict):
105 continue
106 raw_mode = v.get("mode")
107 if raw_mode == "A":
108 mode: Literal["A", "M", "D"] = "A"
109 elif raw_mode == "D":
110 mode = "D"
111 else:
112 mode = "M"
113 obj_id = v.get("object_id", "")
114 if obj_id == _DIR_SENTINEL_LEGACY:
115 obj_id = EMPTY_DIR_OID # migrate legacy sentinel to real content hash
116 staged = v.get("staged_at", "")
117 entries[str(k)] = StagedEntry(
118 object_id=str(obj_id) if isinstance(obj_id, str) else "",
119 mode=mode,
120 staged_at=str(staged) if isinstance(staged, str) else "",
121 )
122 return entries
123 except Exception as exc:
124 logger.warning(
125 "⚠️ Stage index at %s is corrupt (%s) — clearing stage to prevent "
126 "phantom staged entries. Run 'muse code add' again to re-stage.",
127 path,
128 exc,
129 )
130 return {}
131
132 def read_stage_dir_renames(root: pathlib.Path) -> Mapping[str, str]:
133 """Return the directory rename map stored in the stage index.
134
135 Returns ``{}`` when no stage exists or when the file contains no renames.
136 Keys and values are repo-relative paths without trailing slash.
137 """
138 path = stage_path(root)
139 if not path.exists():
140 return {}
141 try:
142 data = _json.loads(path.read_bytes().decode("utf-8"))
143 if not isinstance(data, dict):
144 return {}
145 renames = data.get("dir_renames", {})
146 if not isinstance(renames, dict):
147 return {}
148 return {str(k): str(v) for k, v in renames.items()}
149 except Exception:
150 return {}
151
152 def write_stage(
153 root: pathlib.Path,
154 entries: StagedFileMap,
155 dir_renames: dict[str, str] | None = None,
156 ) -> None:
157 """Persist *entries* (and optional *dir_renames*) to ``.muse/code/stage.json``.
158
159 Creates the ``.muse/code/`` directory if it does not exist. Writing
160 an empty dict clears the stage file (equivalent to calling
161 :func:`clear_stage`).
162
163 ``dir_renames`` records explicit directory rename pairs produced by
164 ``muse mv`` on tracked directories (old_path → new_path, no trailing
165 slash). When ``None`` the existing on-disk renames are preserved;
166 pass ``{}`` to clear them.
167
168 Writes are atomic (temp file + ``os.replace``) so a process crash
169 mid-write never leaves a corrupt stage file.
170 """
171 path = stage_path(root)
172 if not entries:
173 path.unlink(missing_ok=True)
174 return
175
176 # Preserve existing dir_renames when the caller doesn't supply new ones.
177 if dir_renames is None:
178 dir_renames = read_stage_dir_renames(root)
179
180 path.parent.mkdir(parents=True, exist_ok=True)
181 payload = StageIndex(
182 version=_STAGE_VERSION,
183 entries=entries,
184 dir_renames=dir_renames,
185 )
186 encoded = _json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
187 fd, tmp_str = tempfile.mkstemp(
188 dir=path.parent, prefix=".stage-tmp-", suffix=".json"
189 )
190 tmp = pathlib.Path(tmp_str)
191 try:
192 with os.fdopen(fd, "wb") as fh:
193 fh.write(encoded)
194 fh.flush()
195 os.fsync(fh.fileno())
196 tmp.replace(path)
197 except Exception:
198 tmp.unlink(missing_ok=True)
199 raise
200
201 def clear_stage(root: pathlib.Path) -> None:
202 """Remove the stage index, resetting to full-snapshot mode."""
203 stage_path(root).unlink(missing_ok=True)
204
205 def make_entry(
206 object_id: str,
207 mode: Literal["A", "M", "D"],
208 ) -> StagedEntry:
209 """Build a :class:`StagedEntry` with the current UTC timestamp."""
210 return StagedEntry(
211 object_id=object_id,
212 mode=mode,
213 staged_at=now_utc_iso(),
214 )
File History 3 commits
sha256:a154bc65916614c833d5a40a10d81ba3eae0d0495b0afddd34dc34f18d5e91b8 fix: test suite alignment and typing audit — zero violations Sonnet 4.6 minor 22 days ago
sha256:3f46367650ccd121654f3bbe06ed3471a9007c3229fe9556d1069d64b6a2550a refactor: directories are proper content-addressed objects … Sonnet 4.6 patch 22 days ago
sha256:8c872e4dffa2db45a9629956256fa1c99a3d2ff33b80c055252e58d94a0e8d1b feat: staged directory renames shown as renamed in muse status Sonnet 4.6 minor 22 days ago