gabriel / muse public
test_phase4_shelf_json.py python
327 lines 11.6 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """TDD — Phase 4: shelf entries from binary msgpack to git-header+JSON.
2
3 Phase 4 requirements (issue #12):
4 - shelf_entry_path() returns a path with NO extension
5 - write_shelf_entry() writes "shelf <size>\0<json>" framing (same as commits)
6 - read_shelf_entry() parses header+JSON; falls back to .msgpack on miss (silent upgrade)
7 - list_shelf_entries() finds new-format files (no extension)
8 - delete_shelf_entry() removes new-format AND legacy .msgpack files
9 - gc._collect_shelf_objects() finds object IDs in new-format shelf entries
10 """
11
12 from __future__ import annotations
13
14 import json
15 import pathlib
16 from typing import TypedDict
17
18 import msgpack
19 import pytest
20
21 from muse.core.ids import hash_blob
22 from muse.core.paths import shelf_dir
23 from muse.core.shelf import (
24 delete_shelf_entry,
25 list_shelf_entries,
26 read_shelf_entry,
27 shelf_entry_path,
28 write_shelf_entry,
29 )
30 from muse.core.types import long_id, split_id
31
32
33 # ---------------------------------------------------------------------------
34 # Constants
35 # ---------------------------------------------------------------------------
36
37 _OBJ_A = hash_blob(b"object-a")
38 _OBJ_B = hash_blob(b"object-b")
39
40
41 # ---------------------------------------------------------------------------
42 # Helpers
43 # ---------------------------------------------------------------------------
44
45 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
46 (tmp_path / ".muse" / "shelf").mkdir(parents=True)
47 return tmp_path
48
49
50 class _ShelfEntry(TypedDict):
51 id: str
52 name: str
53 snapshot: dict[str, str]
54 created_at: str
55 message: str
56 branch: str
57 intent_type: str
58 resumable: bool
59 metadata: dict[str, str]
60
61
62 def _shelf_id(tag: str = "a") -> str:
63 return hash_blob(f"shelf-entry-{tag}".encode())
64
65
66 def _entry(tag: str = "a") -> _ShelfEntry:
67 eid = _shelf_id(tag)
68 return {
69 "id": eid,
70 "name": f"entry-{tag}",
71 "snapshot": {f"file_{tag}.py": _OBJ_A},
72 "created_at": "2026-05-21T00:00:00+00:00",
73 "message": f"shelf {tag}",
74 "branch": "dev",
75 "intent_type": "manual",
76 "resumable": False,
77 "metadata": {},
78 }
79
80
81 def _legacy_path(repo: pathlib.Path, entry_id: str) -> pathlib.Path:
82 """Old-format .msgpack path for a shelf entry."""
83 return shelf_entry_path(repo, entry_id).with_suffix(".msgpack")
84
85
86 def _write_legacy_shelf(repo: pathlib.Path, entry: _ShelfEntry) -> pathlib.Path:
87 """Write a shelf entry in the old binary msgpack format."""
88 path = _legacy_path(repo, str(entry["id"]))
89 path.parent.mkdir(parents=True, exist_ok=True)
90 path.write_bytes(msgpack.packb(entry, use_bin_type=True))
91 return path
92
93
94 # ---------------------------------------------------------------------------
95 # shelf_entry_path — no extension
96 # ---------------------------------------------------------------------------
97
98 class TestShelfEntryPath:
99 def test_shelf_entry_path_has_no_extension(self, tmp_path: pathlib.Path) -> None:
100 """shelf_entry_path() must return a path with no file extension."""
101 eid = _shelf_id()
102 p = shelf_entry_path(tmp_path, eid)
103 assert p.suffix == "", f"Expected no extension, got {p.suffix!r}"
104
105 def test_shelf_entry_path_structure(self, tmp_path: pathlib.Path) -> None:
106 """shelf_entry_path() uses .muse/shelf/<algo>/<hex> layout."""
107 eid = _shelf_id()
108 p = shelf_entry_path(tmp_path, eid)
109 algo, hex_id = split_id(eid)
110 assert p.name == hex_id
111 assert p.parent.name == algo
112
113
114 # ---------------------------------------------------------------------------
115 # write_shelf_entry — git-header+JSON format
116 # ---------------------------------------------------------------------------
117
118 class TestWriteShelfEntry:
119 def test_write_produces_shelf_header(self, tmp_path: pathlib.Path) -> None:
120 """write_shelf_entry() must write a file starting with 'shelf <size>\\0'."""
121 repo = _make_repo(tmp_path)
122 e = _entry()
123 write_shelf_entry(repo, e)
124
125 path = shelf_entry_path(repo, str(e["id"]))
126 assert path.exists()
127 raw = path.read_bytes()
128 null_idx = raw.index(b"\0")
129 header = raw[:null_idx].decode()
130 type_str, size_str = header.split(" ", 1)
131 assert type_str == "shelf"
132 assert int(size_str) == len(raw[null_idx + 1:])
133
134 def test_write_payload_is_valid_json(self, tmp_path: pathlib.Path) -> None:
135 """The payload after the null byte must be valid UTF-8 JSON."""
136 repo = _make_repo(tmp_path)
137 e = _entry()
138 write_shelf_entry(repo, e)
139
140 path = shelf_entry_path(repo, str(e["id"]))
141 raw = path.read_bytes()
142 null_idx = raw.index(b"\0")
143 data = json.loads(raw[null_idx + 1:].decode("utf-8"))
144 assert data["id"] == e["id"]
145
146 def test_write_read_roundtrip(self, tmp_path: pathlib.Path) -> None:
147 """write_shelf_entry() then read_shelf_entry() returns the same dict."""
148 repo = _make_repo(tmp_path)
149 e = _entry("rt")
150 write_shelf_entry(repo, e)
151
152 result = read_shelf_entry(repo, str(e["id"]))
153 assert result is not None
154 assert result["id"] == e["id"]
155 assert result["message"] == "shelf rt"
156 assert result["snapshot"] == {f"file_rt.py": _OBJ_A}
157
158 def test_file_has_no_extension(self, tmp_path: pathlib.Path) -> None:
159 """The file created by write_shelf_entry() must have no extension."""
160 repo = _make_repo(tmp_path)
161 e = _entry()
162 write_shelf_entry(repo, e)
163
164 path = shelf_entry_path(repo, str(e["id"]))
165 assert path.suffix == ""
166 assert path.exists()
167
168
169 # ---------------------------------------------------------------------------
170 # list_shelf_entries — finds new-format files
171 # ---------------------------------------------------------------------------
172
173 class TestListShelfEntries:
174 def test_list_finds_new_format_entries(self, tmp_path: pathlib.Path) -> None:
175 """list_shelf_entries() returns entries written in the new format."""
176 repo = _make_repo(tmp_path)
177 e = _entry("list")
178 write_shelf_entry(repo, e)
179
180 results = list_shelf_entries(repo)
181 assert len(results) == 1
182 assert results[0]["id"] == e["id"]
183
184 def test_list_multiple_entries(self, tmp_path: pathlib.Path) -> None:
185 """list_shelf_entries() returns all new-format entries sorted by created_at."""
186 repo = _make_repo(tmp_path)
187 for tag in ("x", "y", "z"):
188 write_shelf_entry(repo, _entry(tag))
189
190 results = list_shelf_entries(repo)
191 assert len(results) == 3
192
193
194 # ---------------------------------------------------------------------------
195 # Legacy .msgpack upgrade — read_shelf_entry
196 # ---------------------------------------------------------------------------
197
198 class TestLegacyShelfUpgrade:
199 def test_legacy_msgpack_readable_via_read_shelf_entry(self, tmp_path: pathlib.Path) -> None:
200 """read_shelf_entry() reads a legacy .msgpack shelf entry."""
201 repo = _make_repo(tmp_path)
202 e = _entry("leg")
203 _write_legacy_shelf(repo, e)
204
205 result = read_shelf_entry(repo, str(e["id"]))
206 assert result is not None
207 assert result["id"] == e["id"]
208 assert result["message"] == "shelf leg"
209
210 def test_legacy_msgpack_readable_via_list(self, tmp_path: pathlib.Path) -> None:
211 """list_shelf_entries() returns a legacy .msgpack shelf entry."""
212 repo = _make_repo(tmp_path)
213 e = _entry("leglist")
214 _write_legacy_shelf(repo, e)
215
216 results = list_shelf_entries(repo)
217 assert len(results) == 1
218 assert results[0]["id"] == e["id"]
219
220 def test_legacy_migrated_to_new_format_on_read(self, tmp_path: pathlib.Path) -> None:
221 """After read_shelf_entry() reads a legacy entry, the new-format file exists."""
222 repo = _make_repo(tmp_path)
223 e = _entry("migr")
224 _write_legacy_shelf(repo, e)
225
226 read_shelf_entry(repo, str(e["id"]))
227
228 new_path = shelf_entry_path(repo, str(e["id"]))
229 assert new_path.exists(), "New-format shelf entry must exist after migration"
230 raw = new_path.read_bytes()
231 assert raw.startswith(b"shelf "), "Migrated file must use shelf header format"
232
233 def test_legacy_msgpack_removed_after_migration(self, tmp_path: pathlib.Path) -> None:
234 """The old .msgpack file is deleted after read_shelf_entry() migrates it."""
235 repo = _make_repo(tmp_path)
236 e = _entry("del")
237 legacy = _write_legacy_shelf(repo, e)
238 assert legacy.exists()
239
240 read_shelf_entry(repo, str(e["id"]))
241
242 assert not legacy.exists(), "Old .msgpack shelf file must be removed after migration"
243
244 def test_legacy_migrated_on_list(self, tmp_path: pathlib.Path) -> None:
245 """list_shelf_entries() also migrates legacy .msgpack entries."""
246 repo = _make_repo(tmp_path)
247 e = _entry("lmig")
248 legacy = _write_legacy_shelf(repo, e)
249
250 list_shelf_entries(repo)
251
252 new_path = shelf_entry_path(repo, str(e["id"]))
253 assert new_path.exists()
254 assert not legacy.exists()
255
256 def test_mixed_format_list(self, tmp_path: pathlib.Path) -> None:
257 """list_shelf_entries() returns both new-format and legacy entries."""
258 repo = _make_repo(tmp_path)
259 e_new = _entry("new")
260 e_old = _entry("old")
261 write_shelf_entry(repo, e_new)
262 _write_legacy_shelf(repo, e_old)
263
264 results = list_shelf_entries(repo)
265 ids = {r["id"] for r in results}
266 assert e_new["id"] in ids
267 assert e_old["id"] in ids
268 assert len(results) == 2
269
270
271 # ---------------------------------------------------------------------------
272 # delete_shelf_entry — removes both new and legacy files
273 # ---------------------------------------------------------------------------
274
275 class TestDeleteShelfEntry:
276 def test_delete_removes_new_format_file(self, tmp_path: pathlib.Path) -> None:
277 """delete_shelf_entry() removes a new-format (no extension) shelf file."""
278 repo = _make_repo(tmp_path)
279 e = _entry("delnew")
280 write_shelf_entry(repo, e)
281 assert shelf_entry_path(repo, str(e["id"])).exists()
282
283 result = delete_shelf_entry(repo, str(e["id"]))
284
285 assert result is True
286 assert not shelf_entry_path(repo, str(e["id"])).exists()
287
288 def test_delete_removes_legacy_msgpack_file(self, tmp_path: pathlib.Path) -> None:
289 """delete_shelf_entry() also removes a legacy .msgpack shelf file."""
290 repo = _make_repo(tmp_path)
291 e = _entry("delleg")
292 legacy = _write_legacy_shelf(repo, e)
293 assert legacy.exists()
294
295 result = delete_shelf_entry(repo, str(e["id"]))
296
297 assert result is True
298 assert not legacy.exists()
299
300 def test_delete_absent_entry_returns_false(self, tmp_path: pathlib.Path) -> None:
301 """delete_shelf_entry() returns False when entry doesn't exist."""
302 repo = _make_repo(tmp_path)
303 e = _entry("absent")
304
305 result = delete_shelf_entry(repo, str(e["id"]))
306
307 assert result is False
308
309
310 # ---------------------------------------------------------------------------
311 # GC — finds object IDs in new-format shelf entries
312 # ---------------------------------------------------------------------------
313
314 class TestGcShelfWalk:
315 def test_gc_finds_object_ids_in_new_format_entry(self, tmp_path: pathlib.Path) -> None:
316 """_collect_shelf_objects() marks object IDs from new-format entries reachable."""
317 from muse.core.gc import _collect_shelf_objects
318
319 repo = _make_repo(tmp_path)
320 e = _entry("gc")
321 write_shelf_entry(repo, e)
322
323 reachable: set[str] = set()
324 _collect_shelf_objects(repo, reachable)
325
326 assert _OBJ_A in reachable, \
327 "object ID from a new-format shelf entry must appear in GC reachable set"
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:09656d1b0772ea4c96f8911d7bf8042b33eb0596992c6546dfab3d21e9dee330 fix: align muse read --json schema and test contracts Sonnet 4.6 minor 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago