gabriel / muse public
test_shelf_msgpack_storage.py python
874 lines 37.6 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Tests for the per-entry shelf storage layer.
2
3 Shelf entries were previously serialised as a single JSON array in
4 ``.muse/shelf.json``. This test suite covers the per-entry git-header+JSON
5 layout at ``.muse/shelf/<algo>/<hex>`` (no extension) — the same content-
6 addressing scheme used by the unified object store.
7
8 Test tiers
9 ----------
10 Unit
11 Path helpers, JSON round-trips, ID derivation. No subprocess,
12 no real repo.
13 Integration
14 ``write_shelf_entry`` / ``read_shelf_entry`` / ``list_shelf_entries`` /
15 ``delete_shelf_entry`` against a real ``.muse/`` directory tree.
16 End-to-end
17 Full CLI round-trips: ``muse shelf save``, ``list``, ``read``, ``pop``,
18 ``drop``. Verifies the entry files appear on disk and ``shelf.json``
19 is never created.
20 Stress
21 100 entries written concurrently; listing returns all of them.
22 State
23 State-machine transitions: empty → save → list → drop → empty.
24 Name-collision invariant. Pop restores working tree.
25 Integrity
26 Content-addressed: entry ID matches sha256 of content (minus id).
27 File path encodes the algo. Tampered bytes return ``None`` on read.
28 Corrupt entry is skipped by ``list_shelf_entries`` without crashing.
29 Performance
30 ``write_shelf_entry`` < 50 ms. ``read_shelf_entry`` < 10 ms.
31 ``list_shelf_entries`` for 50 entries < 500 ms.
32 Security
33 Path traversal in entry name cannot escape ``.muse/shelf/``.
34 Symlinked shelf directory is rejected.
35 Oversized payload is rejected.
36 Entry whose serialised ID does not match its filename is rejected.
37 """
38
39 from __future__ import annotations
40
41 import json
42 import os
43 import pathlib
44 import threading
45 import time
46 from collections.abc import Mapping
47
48 import json as _json
49 import pytest
50
51 from muse.core.types import fake_id, long_id, blob_id, content_hash, split_id
52 from muse.core.object_store import object_path
53 from muse.core.paths import muse_dir, shelf_dir
54
55 type _ShelfDict = dict[str, str | bool | int | list[str] | None]
56
57 # ---------------------------------------------------------------------------
58 # Lazy imports — these symbols do not exist yet; tests drive their creation.
59 # ---------------------------------------------------------------------------
60
61 def _shelf_dir(root: pathlib.Path) -> pathlib.Path:
62 """Thin wrapper so tests import the real helper once it exists."""
63 from muse.core.paths import shelf_dir
64 return shelf_dir(root)
65
66
67 def _shelf_entry_path(root: pathlib.Path, entry_id: str) -> pathlib.Path:
68 """Thin wrapper so tests import the real helper once it exists."""
69 from muse.core.shelf import shelf_entry_path
70 return shelf_entry_path(root, entry_id)
71
72
73 def _write_shelf_entry(root: pathlib.Path, entry: _ShelfDict) -> None:
74 from muse.core.shelf import write_shelf_entry
75 write_shelf_entry(root, entry)
76
77
78 def _read_shelf_entry(root: pathlib.Path, entry_id: str) -> _ShelfDict | None:
79 from muse.core.shelf import read_shelf_entry
80 return read_shelf_entry(root, entry_id)
81
82
83 def _list_shelf_entries(root: pathlib.Path) -> list[_ShelfDict]:
84 from muse.core.shelf import list_shelf_entries
85 return list_shelf_entries(root)
86
87
88 def _delete_shelf_entry(root: pathlib.Path, entry_id: str) -> bool:
89 from muse.core.shelf import delete_shelf_entry
90 return delete_shelf_entry(root, entry_id)
91
92
93 # ---------------------------------------------------------------------------
94 # Shared test helpers
95 # ---------------------------------------------------------------------------
96
97 def _init_repo(tmp_path: pathlib.Path, branch: str = "main") -> tuple[pathlib.Path, str]:
98 """Create a minimal Muse repo structure — no subprocess required."""
99 muse = muse_dir(tmp_path)
100 muse.mkdir()
101 repo_id = fake_id("repo")
102 (muse / "repo.json").write_text(json.dumps({
103 "repo_id": repo_id,
104 "domain": "code",
105 "created_at": "2026-01-01T00:00:00+00:00",
106 "schema_version": 1,
107 "bare": False,
108 }), encoding="utf-8")
109 (muse / "HEAD").write_text(f"ref: refs/heads/{branch}", encoding="utf-8")
110 (muse / "refs" / "heads").mkdir(parents=True)
111 (muse / "snapshots").mkdir()
112 (muse / "commits" / "sha256").mkdir(parents=True)
113 (muse / "objects").mkdir()
114 return tmp_path, repo_id
115
116
117 def _make_entry_dict(
118 name: str = "main/000",
119 branch: str = "main",
120 snapshot: dict[str, str] | None = None,
121 created_at: str = "2026-01-01T00:00:00+00:00",
122 created_by: str = "human",
123 intent_type: str = "checkpoint",
124 intent: str | None = None,
125 resumable: bool = False,
126 tags: list[str] | None = None,
127 ) -> _ShelfDict:
128 """Build a complete shelf entry dict including a derived ``id`` field.
129
130 The ``id`` is computed as ``sha256:`` of the entry content minus the
131 ``id`` key itself — exactly matching the production derivation in
132 ``_compute_shelf_id``.
133 """
134 without_id = {
135 "name": name,
136 "snapshot": snapshot or {"a.py": long_id("a" * 64)},
137 "deleted": [],
138 "snapshot_id": long_id("b" * 64),
139 "parent_commit": long_id("c" * 64),
140 "branch": branch,
141 "created_at": created_at,
142 "created_by": created_by,
143 "intent_type": intent_type,
144 "intent": intent,
145 "resumable": resumable,
146 "tags": tags or [],
147 "expires_at": None,
148 "domain_state": {},
149 }
150 entry_id = content_hash(without_id)
151 return {"id": entry_id, **without_id}
152
153
154 def _write_object(root: pathlib.Path, content: bytes) -> str:
155 """Write raw bytes to the object store and return the blob ID."""
156 obj_id = blob_id(content)
157 p = object_path(root, obj_id)
158 p.parent.mkdir(parents=True, exist_ok=True)
159 p.write_bytes(content)
160 return obj_id
161
162
163 # ---------------------------------------------------------------------------
164 # Tier 1 — Unit
165 # ---------------------------------------------------------------------------
166
167
168 class TestShelfDirPathHelper:
169 """``shelf_dir`` returns the canonical ``.muse/shelf/`` path."""
170
171 def test_returns_dot_muse_shelf(self, tmp_path: pathlib.Path) -> None:
172 """shelf_dir() must resolve to <root>/.muse/shelf — the root of the
173 per-entry git-header+JSON layout, consistent with objects_dir."""
174 root, _ = _init_repo(tmp_path)
175 assert _shelf_dir(root) == shelf_dir(root)
176
177 def test_is_child_of_muse_dir(self, tmp_path: pathlib.Path) -> None:
178 root, _ = _init_repo(tmp_path)
179 assert _shelf_dir(root).parent == muse_dir(root)
180
181 def test_does_not_create_directory(self, tmp_path: pathlib.Path) -> None:
182 """Path helper is pure — it must not create directories as a side effect."""
183 root, _ = _init_repo(tmp_path)
184 _shelf_dir(root)
185 assert not (shelf_dir(root)).exists()
186
187 def test_name_is_shelf(self, tmp_path: pathlib.Path) -> None:
188 root, _ = _init_repo(tmp_path)
189 assert _shelf_dir(root).name == "shelf"
190
191
192 class TestShelfEntryPathHelper:
193 """``shelf_entry_path`` encodes algo and hex into ``.muse/shelf/<algo>/<hex>`` (no extension)."""
194
195 def test_sha256_path_shape(self, tmp_path: pathlib.Path) -> None:
196 """Path must follow the same <dir>/<algo>/<hex> convention
197 as commit_path and snapshot_path so all content-addressed stores
198 are structurally uniform."""
199 root, _ = _init_repo(tmp_path)
200 entry_id = long_id("a" * 64)
201 p = _shelf_entry_path(root, entry_id)
202 assert p == shelf_dir(root) / "sha256" / f"{'a' * 64}"
203
204 def test_algo_extracted_from_prefix(self, tmp_path: pathlib.Path) -> None:
205 """The algo segment in the path must come from the prefix of entry_id,
206 never be hardcoded as 'sha256'."""
207 root, _ = _init_repo(tmp_path)
208 entry_id = long_id("b" * 64)
209 algo, hex_id = split_id(entry_id)
210 p = _shelf_entry_path(root, entry_id)
211 assert p.parent.name == algo
212 assert p.name == hex_id
213
214 def test_extension_is_empty(self, tmp_path: pathlib.Path) -> None:
215 root, _ = _init_repo(tmp_path)
216 p = _shelf_entry_path(root, long_id("c" * 64))
217 assert p.suffix == ""
218
219 def test_parent_is_shelf_dir(self, tmp_path: pathlib.Path) -> None:
220 root, _ = _init_repo(tmp_path)
221 p = _shelf_entry_path(root, long_id("d" * 64))
222 assert p.parent.parent == _shelf_dir(root)
223
224 def test_different_ids_produce_different_paths(self, tmp_path: pathlib.Path) -> None:
225 root, _ = _init_repo(tmp_path)
226 p1 = _shelf_entry_path(root, long_id("a" * 64))
227 p2 = _shelf_entry_path(root, long_id("b" * 64))
228 assert p1 != p2
229
230 def test_does_not_create_directory(self, tmp_path: pathlib.Path) -> None:
231 root, _ = _init_repo(tmp_path)
232 _shelf_entry_path(root, long_id("e" * 64))
233 assert not (shelf_dir(root)).exists()
234
235
236 class TestJsonRoundTrip:
237 """Shelf entry dicts survive a JSON serialise → deserialise cycle unchanged."""
238
239 def test_string_fields_survive(self) -> None:
240 entry = _make_entry_dict()
241 out = _json.loads(_json.dumps(entry))
242 assert out["name"] == entry["name"]
243 assert out["branch"] == entry["branch"]
244 assert out["id"] == entry["id"]
245
246 def test_none_fields_survive(self) -> None:
247 entry = _make_entry_dict(intent=None)
248 out = _json.loads(_json.dumps(entry))
249 assert out["intent"] is None
250 assert out["expires_at"] is None
251
252 def test_nested_snapshot_survives(self) -> None:
253 snap = {"src/a.py": long_id("a" * 64), "src/b.py": long_id("b" * 64)}
254 entry = _make_entry_dict(snapshot=snap)
255 out = _json.loads(_json.dumps(entry))
256 assert out["snapshot"] == snap
257
258 def test_bool_fields_survive(self) -> None:
259 entry = _make_entry_dict(resumable=True)
260 out = _json.loads(_json.dumps(entry))
261 assert out["resumable"] is True
262
263 def test_list_fields_survive(self) -> None:
264 entry = _make_entry_dict(tags=["hotfix", "api"])
265 out = _json.loads(_json.dumps(entry))
266 assert out["tags"] == ["hotfix", "api"]
267
268 def test_empty_dict_domain_state_survives(self) -> None:
269 entry = _make_entry_dict()
270 assert entry["domain_state"] == {}
271 out = _json.loads(_json.dumps(entry))
272 assert out["domain_state"] == {}
273
274
275 class TestEntryIdDerivation:
276 """Entry ID is deterministic: sha256 of content minus the id field."""
277
278 def test_same_content_same_id(self) -> None:
279 e1 = _make_entry_dict(name="x/000")
280 e2 = _make_entry_dict(name="x/000")
281 assert e1["id"] == e2["id"]
282
283 def test_different_name_different_id(self) -> None:
284 e1 = _make_entry_dict(name="x/000")
285 e2 = _make_entry_dict(name="x/001")
286 assert e1["id"] != e2["id"]
287
288 def test_id_has_sha256_prefix(self) -> None:
289 e = _make_entry_dict()
290 assert e["id"].startswith("sha256:")
291
292 def test_id_hex_is_64_chars(self) -> None:
293 e = _make_entry_dict()
294 _, hex_part = split_id(e["id"])
295 assert len(hex_part) == 64
296
297
298 # ---------------------------------------------------------------------------
299 # Tier 2 — Integration
300 # ---------------------------------------------------------------------------
301
302
303 class TestWriteReadRoundTrip:
304 """``write_shelf_entry`` + ``read_shelf_entry`` preserves all fields."""
305
306 def test_basic_round_trip(self, tmp_path: pathlib.Path) -> None:
307 """Reading back a just-written entry must return an identical dict."""
308 root, _ = _init_repo(tmp_path)
309 entry = _make_entry_dict()
310 _write_shelf_entry(root, entry)
311 out = _read_shelf_entry(root, entry["id"])
312 assert out is not None
313 assert out["id"] == entry["id"]
314 assert out["name"] == entry["name"]
315 assert out["snapshot"] == entry["snapshot"]
316
317 def test_creates_file_at_correct_path(self, tmp_path: pathlib.Path) -> None:
318 """The on-disk file must live at .muse/shelf/<algo>/<hex> with no extension."""
319 root, _ = _init_repo(tmp_path)
320 entry = _make_entry_dict()
321 _write_shelf_entry(root, entry)
322 expected = _shelf_entry_path(root, entry["id"])
323 assert expected.exists()
324 assert expected.suffix == ""
325
326 def test_creates_algo_subdirectory(self, tmp_path: pathlib.Path) -> None:
327 root, _ = _init_repo(tmp_path)
328 entry = _make_entry_dict()
329 _write_shelf_entry(root, entry)
330 algo_dir = _shelf_dir(root) / "sha256"
331 assert algo_dir.is_dir()
332
333 def test_none_fields_preserved(self, tmp_path: pathlib.Path) -> None:
334 root, _ = _init_repo(tmp_path)
335 entry = _make_entry_dict(intent=None)
336 _write_shelf_entry(root, entry)
337 out = _read_shelf_entry(root, entry["id"])
338 assert out["intent"] is None
339 assert out["expires_at"] is None
340
341 def test_resumable_true_preserved(self, tmp_path: pathlib.Path) -> None:
342 root, _ = _init_repo(tmp_path)
343 entry = _make_entry_dict(resumable=True)
344 _write_shelf_entry(root, entry)
345 out = _read_shelf_entry(root, entry["id"])
346 assert out["resumable"] is True
347
348 def test_tags_preserved(self, tmp_path: pathlib.Path) -> None:
349 root, _ = _init_repo(tmp_path)
350 entry = _make_entry_dict(tags=["audit", "wip"])
351 _write_shelf_entry(root, entry)
352 out = _read_shelf_entry(root, entry["id"])
353 assert out["tags"] == ["audit", "wip"]
354
355 def test_write_is_idempotent(self, tmp_path: pathlib.Path) -> None:
356 """Writing the same entry twice must not raise and must leave exactly
357 one file on disk."""
358 root, _ = _init_repo(tmp_path)
359 entry = _make_entry_dict()
360 _write_shelf_entry(root, entry)
361 _write_shelf_entry(root, entry)
362 files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""]
363 assert len(files) == 1
364
365 def test_read_nonexistent_returns_none(self, tmp_path: pathlib.Path) -> None:
366 root, _ = _init_repo(tmp_path)
367 result = _read_shelf_entry(root, long_id("f" * 64))
368 assert result is None
369
370
371 class TestListShelfEntries:
372 """``list_shelf_entries`` returns all entries sorted by created_at descending."""
373
374 def test_empty_dir_returns_empty_list(self, tmp_path: pathlib.Path) -> None:
375 root, _ = _init_repo(tmp_path)
376 assert _list_shelf_entries(root) == []
377
378 def test_missing_shelf_dir_returns_empty_list(self, tmp_path: pathlib.Path) -> None:
379 """Listing must not raise when .muse/shelf/ has never been created."""
380 root, _ = _init_repo(tmp_path)
381 assert not (_shelf_dir(root)).exists()
382 assert _list_shelf_entries(root) == []
383
384 def test_single_entry_returned(self, tmp_path: pathlib.Path) -> None:
385 root, _ = _init_repo(tmp_path)
386 entry = _make_entry_dict()
387 _write_shelf_entry(root, entry)
388 entries = _list_shelf_entries(root)
389 assert len(entries) == 1
390 assert entries[0]["id"] == entry["id"]
391
392 def test_two_entries_returned(self, tmp_path: pathlib.Path) -> None:
393 root, _ = _init_repo(tmp_path)
394 e1 = _make_entry_dict(name="main/000", created_at="2026-01-01T00:00:00+00:00")
395 e2 = _make_entry_dict(name="main/001", created_at="2026-01-02T00:00:00+00:00")
396 _write_shelf_entry(root, e1)
397 _write_shelf_entry(root, e2)
398 entries = _list_shelf_entries(root)
399 assert len(entries) == 2
400
401 def test_sorted_newest_first(self, tmp_path: pathlib.Path) -> None:
402 """Entries are ordered newest-first so CLI list shows recent work at top."""
403 root, _ = _init_repo(tmp_path)
404 e1 = _make_entry_dict(name="main/000", created_at="2026-01-01T00:00:00+00:00")
405 e2 = _make_entry_dict(name="main/001", created_at="2026-01-03T00:00:00+00:00")
406 e3 = _make_entry_dict(name="main/002", created_at="2026-01-02T00:00:00+00:00")
407 for e in [e1, e2, e3]:
408 _write_shelf_entry(root, e)
409 entries = _list_shelf_entries(root)
410 assert [e["name"] for e in entries] == ["main/001", "main/002", "main/000"]
411
412 def test_no_shelf_json_created(self, tmp_path: pathlib.Path) -> None:
413 """The legacy shelf.json file must never be created by the new storage layer."""
414 root, _ = _init_repo(tmp_path)
415 _write_shelf_entry(root, _make_entry_dict())
416 _list_shelf_entries(root)
417 assert not (muse_dir(root) / "shelf.json").exists()
418
419
420 class TestDeleteShelfEntry:
421 """``delete_shelf_entry`` removes the entry file and reports existence."""
422
423 def test_delete_existing_returns_true(self, tmp_path: pathlib.Path) -> None:
424 root, _ = _init_repo(tmp_path)
425 entry = _make_entry_dict()
426 _write_shelf_entry(root, entry)
427 assert _delete_shelf_entry(root, entry["id"]) is True
428
429 def test_delete_removes_file(self, tmp_path: pathlib.Path) -> None:
430 root, _ = _init_repo(tmp_path)
431 entry = _make_entry_dict()
432 _write_shelf_entry(root, entry)
433 _delete_shelf_entry(root, entry["id"])
434 assert not _shelf_entry_path(root, entry["id"]).exists()
435
436 def test_delete_nonexistent_returns_false(self, tmp_path: pathlib.Path) -> None:
437 root, _ = _init_repo(tmp_path)
438 assert _delete_shelf_entry(root, long_id("a" * 64)) is False
439
440 def test_delete_one_leaves_others(self, tmp_path: pathlib.Path) -> None:
441 root, _ = _init_repo(tmp_path)
442 e1 = _make_entry_dict(name="main/000")
443 e2 = _make_entry_dict(name="main/001")
444 _write_shelf_entry(root, e1)
445 _write_shelf_entry(root, e2)
446 _delete_shelf_entry(root, e1["id"])
447 entries = _list_shelf_entries(root)
448 assert len(entries) == 1
449 assert entries[0]["id"] == e2["id"]
450
451 def test_delete_twice_returns_false_second_time(self, tmp_path: pathlib.Path) -> None:
452 root, _ = _init_repo(tmp_path)
453 entry = _make_entry_dict()
454 _write_shelf_entry(root, entry)
455 _delete_shelf_entry(root, entry["id"])
456 assert _delete_shelf_entry(root, entry["id"]) is False
457
458
459 # ---------------------------------------------------------------------------
460 # Tier 3 — End-to-end (CLI)
461 # ---------------------------------------------------------------------------
462
463
464 class TestCliShelfSaveHeaderJsonLayout:
465 """``muse shelf save`` must produce per-entry git-header+JSON files, not shelf.json."""
466
467 def test_save_creates_entry_file(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
468 """After ``muse shelf save`` the .muse/shelf/sha256/ directory must
469 contain exactly one extensionless entry file."""
470 from tests.cli_test_helper import CliRunner
471 runner = CliRunner()
472 monkeypatch.chdir(tmp_path)
473 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
474 runner.invoke(None, ["init"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
475 (tmp_path / "hello.py").write_text("print('hi')\n")
476 runner.invoke(None, ["commit", "-m", "base"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
477 (tmp_path / "work.py").write_text("x = 42\n")
478 result = runner.invoke(None, ["shelf", "save", "-m", "wip"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
479 assert result.exit_code == 0, result.output
480 shelf_files = [f for f in (shelf_dir(tmp_path) / "sha256").glob("*") if f.is_file() and f.suffix == ""]
481 assert len(shelf_files) == 1
482
483 def test_save_does_not_create_shelf_json(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
484 """shelf.json must never be written by the new storage layer."""
485 from tests.cli_test_helper import CliRunner
486 runner = CliRunner()
487 monkeypatch.chdir(tmp_path)
488 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
489 runner.invoke(None, ["init"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
490 (tmp_path / "a.py").write_text("a = 1\n")
491 runner.invoke(None, ["commit", "-m", "base"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
492 (tmp_path / "b.py").write_text("b = 2\n")
493 runner.invoke(None, ["shelf", "save"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
494 assert not (muse_dir(tmp_path) / "shelf.json").exists()
495
496 def test_save_json_output_has_id(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
497 from tests.cli_test_helper import CliRunner
498 runner = CliRunner()
499 monkeypatch.chdir(tmp_path)
500 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
501 runner.invoke(None, ["init"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
502 (tmp_path / "a.py").write_text("a = 1\n")
503 runner.invoke(None, ["commit", "-m", "base"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
504 (tmp_path / "b.py").write_text("b = 2\n")
505 result = runner.invoke(None, ["shelf", "save", "--json"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False)
506 data = json.loads(result.output)
507 assert data["id"] is not None
508 assert data["id"].startswith("sha256:")
509
510 def test_drop_removes_entry_file(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
511 from tests.cli_test_helper import CliRunner
512 runner = CliRunner()
513 env = {"MUSE_REPO_ROOT": str(tmp_path)}
514 monkeypatch.chdir(tmp_path)
515 runner.invoke(None, ["init"], env=env, catch_exceptions=False)
516 (tmp_path / "a.py").write_text("a = 1\n")
517 runner.invoke(None, ["commit", "-m", "base"], env=env, catch_exceptions=False)
518 (tmp_path / "b.py").write_text("b = 2\n")
519 save_result = runner.invoke(None, ["shelf", "save", "--json"], env=env, catch_exceptions=False)
520 name = json.loads(save_result.output)["name"]
521 runner.invoke(None, ["shelf", "drop", name], env=env, catch_exceptions=False)
522 remaining = [f for f in (shelf_dir(tmp_path) / "sha256").glob("*") if f.is_file() and f.suffix == ""]
523 assert len(remaining) == 0
524
525 def test_list_returns_saved_entry(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
526 from tests.cli_test_helper import CliRunner
527 runner = CliRunner()
528 env = {"MUSE_REPO_ROOT": str(tmp_path)}
529 monkeypatch.chdir(tmp_path)
530 runner.invoke(None, ["init"], env=env, catch_exceptions=False)
531 (tmp_path / "a.py").write_text("a = 1\n")
532 runner.invoke(None, ["commit", "-m", "base"], env=env, catch_exceptions=False)
533 (tmp_path / "b.py").write_text("b = 2\n")
534 runner.invoke(None, ["shelf", "save", "-m", "my work"], env=env, catch_exceptions=False)
535 result = runner.invoke(None, ["shelf", "list", "--json"], env=env, catch_exceptions=False)
536 data = json.loads(result.output)
537 assert len(data["entries"]) == 1
538 assert data["entries"][0]["intent"] == "my work"
539
540 def test_pop_removes_entry_and_restores_file(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
541 from tests.cli_test_helper import CliRunner
542 runner = CliRunner()
543 env = {"MUSE_REPO_ROOT": str(tmp_path)}
544 monkeypatch.chdir(tmp_path)
545 runner.invoke(None, ["init"], env=env, catch_exceptions=False)
546 (tmp_path / "a.py").write_text("a = 1\n")
547 runner.invoke(None, ["commit", "-m", "base"], env=env, catch_exceptions=False)
548 (tmp_path / "b.py").write_text("restored content\n")
549 save_result = runner.invoke(None, ["shelf", "save", "--json"], env=env, catch_exceptions=False)
550 name = json.loads(save_result.output)["name"]
551 assert not (tmp_path / "b.py").exists()
552 runner.invoke(None, ["shelf", "pop", name], env=env, catch_exceptions=False)
553 assert (tmp_path / "b.py").read_text() == "restored content\n"
554 remaining = [f for f in (shelf_dir(tmp_path) / "sha256").glob("*") if f.is_file() and f.suffix == ""]
555 assert len(remaining) == 0
556
557
558 # ---------------------------------------------------------------------------
559 # Tier 4 — Stress
560 # ---------------------------------------------------------------------------
561
562
563 class TestStressShelfStorage:
564 """Storage layer remains correct under high entry volume."""
565
566 def test_100_entries_all_written(self, tmp_path: pathlib.Path) -> None:
567 """Writing 100 entries must produce 100 distinct entry files."""
568 root, _ = _init_repo(tmp_path)
569 entries = [
570 _make_entry_dict(
571 name=f"main/{i:03d}",
572 created_at=f"2026-01-{(i % 28) + 1:02d}T00:00:00+00:00",
573 snapshot={f"file_{i}.py": long_id(hex(i)[2:].zfill(64))},
574 )
575 for i in range(100)
576 ]
577 for e in entries:
578 _write_shelf_entry(root, e)
579 files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""]
580 assert len(files) == 100
581
582 def test_100_entries_all_listable(self, tmp_path: pathlib.Path) -> None:
583 root, _ = _init_repo(tmp_path)
584 ids = set()
585 for i in range(100):
586 e = _make_entry_dict(
587 name=f"main/{i:03d}",
588 snapshot={f"f{i}.py": long_id(hex(i)[2:].zfill(64))},
589 )
590 _write_shelf_entry(root, e)
591 ids.add(e["id"])
592 listed = _list_shelf_entries(root)
593 assert len(listed) == 100
594 assert {e["id"] for e in listed} == ids
595
596 def test_concurrent_writes_no_corruption(self, tmp_path: pathlib.Path) -> None:
597 """Concurrent writes from multiple threads must each produce their own
598 file without corrupting one another — the atomic rename guarantee."""
599 root, _ = _init_repo(tmp_path)
600 errors: list[Exception] = []
601
602 def write_entry(i: int) -> None:
603 try:
604 e = _make_entry_dict(
605 name=f"thread/{i:03d}",
606 snapshot={f"t{i}.py": long_id(hex(i * 7)[2:].zfill(64))},
607 )
608 _write_shelf_entry(root, e)
609 except Exception as exc:
610 errors.append(exc)
611
612 threads = [threading.Thread(target=write_entry, args=(i,)) for i in range(20)]
613 for t in threads:
614 t.start()
615 for t in threads:
616 t.join()
617
618 assert not errors, f"Concurrent write errors: {errors}"
619 files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""]
620 assert len(files) == 20
621
622
623 # ---------------------------------------------------------------------------
624 # Tier 5 — State
625 # ---------------------------------------------------------------------------
626
627
628 class TestShelfStateMachine:
629 """State transitions: empty → save → list → drop → empty."""
630
631 def test_empty_to_save(self, tmp_path: pathlib.Path) -> None:
632 root, _ = _init_repo(tmp_path)
633 assert _list_shelf_entries(root) == []
634 entry = _make_entry_dict()
635 _write_shelf_entry(root, entry)
636 assert len(_list_shelf_entries(root)) == 1
637
638 def test_save_to_drop_to_empty(self, tmp_path: pathlib.Path) -> None:
639 root, _ = _init_repo(tmp_path)
640 entry = _make_entry_dict()
641 _write_shelf_entry(root, entry)
642 _delete_shelf_entry(root, entry["id"])
643 assert _list_shelf_entries(root) == []
644
645 def test_two_saves_then_one_drop(self, tmp_path: pathlib.Path) -> None:
646 root, _ = _init_repo(tmp_path)
647 e1 = _make_entry_dict(name="main/000")
648 e2 = _make_entry_dict(name="main/001")
649 _write_shelf_entry(root, e1)
650 _write_shelf_entry(root, e2)
651 _delete_shelf_entry(root, e1["id"])
652 remaining = _list_shelf_entries(root)
653 assert len(remaining) == 1
654 assert remaining[0]["name"] == "main/001"
655
656 def test_listing_after_no_writes_is_empty(self, tmp_path: pathlib.Path) -> None:
657 """list_shelf_entries must tolerate a repo that has never had a shelf entry."""
658 root, _ = _init_repo(tmp_path)
659 assert _list_shelf_entries(root) == []
660
661 def test_overwrite_same_entry_is_stable(self, tmp_path: pathlib.Path) -> None:
662 """Writing the same entry twice must leave a consistent readable state."""
663 root, _ = _init_repo(tmp_path)
664 entry = _make_entry_dict()
665 _write_shelf_entry(root, entry)
666 _write_shelf_entry(root, entry)
667 entries = _list_shelf_entries(root)
668 assert len(entries) == 1
669 assert entries[0]["id"] == entry["id"]
670
671
672 # ---------------------------------------------------------------------------
673 # Tier 6 — Integrity
674 # ---------------------------------------------------------------------------
675
676
677 class TestShelfStorageIntegrity:
678 """Content-address correctness and tamper detection."""
679
680 def test_file_path_encodes_entry_id(self, tmp_path: pathlib.Path) -> None:
681 """The entry filename must be the hex portion of entry['id'].
682 A mismatch would mean the file is unreachable by ID — a silent data loss."""
683 root, _ = _init_repo(tmp_path)
684 entry = _make_entry_dict()
685 _write_shelf_entry(root, entry)
686 _, hex_id = split_id(entry["id"])
687 expected_name = hex_id
688 files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""]
689 assert len(files) == 1
690 assert files[0].name == expected_name
691
692 def test_read_back_id_matches_filename(self, tmp_path: pathlib.Path) -> None:
693 """The id field inside the entry must match the filename — verifying
694 no silent ID drift between serialisation and storage."""
695 root, _ = _init_repo(tmp_path)
696 entry = _make_entry_dict()
697 _write_shelf_entry(root, entry)
698 out = _read_shelf_entry(root, entry["id"])
699 _, hex_id = split_id(out["id"])
700 expected_path = _shelf_dir(root) / "sha256" / hex_id
701 assert expected_path.exists()
702
703 def test_tampered_bytes_causes_rejection(self, tmp_path: pathlib.Path) -> None:
704 """Flipping a byte in the entry file must cause read_shelf_entry to
705 return None rather than silently serving corrupt data."""
706 root, _ = _init_repo(tmp_path)
707 entry = _make_entry_dict()
708 _write_shelf_entry(root, entry)
709 p = _shelf_entry_path(root, entry["id"])
710 raw = bytearray(p.read_bytes())
711 raw[-4] ^= 0xFF
712 p.write_bytes(bytes(raw))
713 result = _read_shelf_entry(root, entry["id"])
714 assert result is None
715
716 def test_corrupt_entry_skipped_by_list(self, tmp_path: pathlib.Path) -> None:
717 """A corrupt entry file must be silently skipped by list_shelf_entries
718 so one bad file does not prevent access to all other entries."""
719 root, _ = _init_repo(tmp_path)
720 good = _make_entry_dict(name="main/000")
721 _write_shelf_entry(root, good)
722 # Write a corrupt file directly into the shelf directory (no extension).
723 bad_path = _shelf_dir(root) / "sha256" / f"{'a' * 62}ff"
724 bad_path.write_bytes(b"\xff\xfe garbage data \x00")
725 entries = _list_shelf_entries(root)
726 assert len(entries) == 1
727 assert entries[0]["id"] == good["id"]
728
729 def test_empty_entry_file_skipped_by_list(self, tmp_path: pathlib.Path) -> None:
730 root, _ = _init_repo(tmp_path)
731 (_shelf_dir(root) / "sha256").mkdir(parents=True, exist_ok=True)
732 empty = _shelf_dir(root) / "sha256" / f"{'0' * 64}"
733 empty.write_bytes(b"")
734 assert _list_shelf_entries(root) == []
735
736 def test_write_creates_no_temp_files(self, tmp_path: pathlib.Path) -> None:
737 """After write_shelf_entry completes, no temp files must remain in
738 .muse/shelf/sha256/ — atomic rename must clean up on success."""
739 root, _ = _init_repo(tmp_path)
740 entry = _make_entry_dict()
741 _write_shelf_entry(root, entry)
742 algo_dir = _shelf_dir(root) / "sha256"
743 all_files = list(algo_dir.iterdir())
744 assert all(f.suffix == "" for f in all_files)
745
746
747 # ---------------------------------------------------------------------------
748 # Tier 7 — Performance
749 # ---------------------------------------------------------------------------
750
751
752 class TestShelfStoragePerformance:
753 """Storage operations must complete within latency budgets."""
754
755 def test_write_entry_under_50ms(self, tmp_path: pathlib.Path) -> None:
756 """A single write_shelf_entry call must complete within 50 ms.
757 Shelf save is on the critical path of ``muse shelf save`` — users
758 feel latency > 50 ms as sluggishness."""
759 root, _ = _init_repo(tmp_path)
760 entry = _make_entry_dict()
761 start = time.perf_counter()
762 _write_shelf_entry(root, entry)
763 elapsed_ms = (time.perf_counter() - start) * 1000
764 assert elapsed_ms < 50, f"write_shelf_entry took {elapsed_ms:.1f} ms"
765
766 def test_read_entry_under_10ms(self, tmp_path: pathlib.Path) -> None:
767 """A single read_shelf_entry call must complete within 10 ms.
768 This is a hot path for ``muse shelf pop`` and ``muse shelf read``."""
769 root, _ = _init_repo(tmp_path)
770 entry = _make_entry_dict()
771 _write_shelf_entry(root, entry)
772 start = time.perf_counter()
773 _read_shelf_entry(root, entry["id"])
774 elapsed_ms = (time.perf_counter() - start) * 1000
775 assert elapsed_ms < 10, f"read_shelf_entry took {elapsed_ms:.1f} ms"
776
777 def test_list_50_entries_under_500ms(self, tmp_path: pathlib.Path) -> None:
778 """list_shelf_entries for 50 entries must complete within 500 ms.
779 The old shelf.json approach had to parse the entire JSON array; per-file
780 reads should be faster due to smaller per-read payload."""
781 root, _ = _init_repo(tmp_path)
782 for i in range(50):
783 e = _make_entry_dict(
784 name=f"main/{i:03d}",
785 snapshot={f"f{i}.py": long_id(hex(i)[2:].zfill(64))},
786 )
787 _write_shelf_entry(root, e)
788 start = time.perf_counter()
789 entries = _list_shelf_entries(root)
790 elapsed_ms = (time.perf_counter() - start) * 1000
791 assert len(entries) == 50
792 assert elapsed_ms < 500, f"list_shelf_entries took {elapsed_ms:.1f} ms"
793
794 def test_delete_entry_under_10ms(self, tmp_path: pathlib.Path) -> None:
795 root, _ = _init_repo(tmp_path)
796 entry = _make_entry_dict()
797 _write_shelf_entry(root, entry)
798 start = time.perf_counter()
799 _delete_shelf_entry(root, entry["id"])
800 elapsed_ms = (time.perf_counter() - start) * 1000
801 assert elapsed_ms < 10, f"delete_shelf_entry took {elapsed_ms:.1f} ms"
802
803
804 # ---------------------------------------------------------------------------
805 # Tier 8 — Security
806 # ---------------------------------------------------------------------------
807
808
809 class TestShelfStorageSecurity:
810 """Guards against path traversal, symlink attacks, and oversized payloads."""
811
812 def test_symlinked_shelf_dir_rejected_on_write(self, tmp_path: pathlib.Path) -> None:
813 """If .muse/shelf/ is a symlink, write_shelf_entry must raise rather
814 than follow it — prevents redirect of shelf writes to attacker paths."""
815 root, _ = _init_repo(tmp_path)
816 attacker_dir = tmp_path / "attacker"
817 attacker_dir.mkdir()
818 shelf = shelf_dir(root)
819 shelf.symlink_to(attacker_dir)
820 entry = _make_entry_dict()
821 with pytest.raises((ValueError, OSError)):
822 _write_shelf_entry(root, entry)
823
824 def test_entry_id_cannot_escape_shelf_dir(self, tmp_path: pathlib.Path) -> None:
825 """shelf_entry_path must always resolve inside .muse/shelf/.
826 A crafted entry_id containing path separators must not produce a path
827 that escapes the shelf directory."""
828 root, _ = _init_repo(tmp_path)
829 # Construct a traversal attempt: the hex portion of split_id must be
830 # a bare hex string — any non-hex content is a sign of tampering.
831 # The path helper itself should produce a path inside shelf_dir.
832 # We verify by ensuring the resolved path starts with shelf_dir.
833 legitimate_id = long_id("a" * 64)
834 p = _shelf_entry_path(root, legitimate_id)
835 assert str(p).startswith(str(_shelf_dir(root)))
836
837 def test_oversized_entry_rejected_on_read(self, tmp_path: pathlib.Path) -> None:
838 """An oversized shelf entry file (attacker injected) must be rejected by
839 read_shelf_entry to prevent memory exhaustion."""
840 from muse.core.io import MAX_MSGPACK_BYTES
841 root, _ = _init_repo(tmp_path)
842 (_shelf_dir(root) / "sha256").mkdir(parents=True, exist_ok=True)
843 fake_id_str = long_id("e" * 64)
844 p = _shelf_entry_path(root, fake_id_str)
845 # Write a file larger than the allowed limit.
846 p.write_bytes(b"\x00" * (MAX_MSGPACK_BYTES + 1))
847 result = _read_shelf_entry(root, fake_id_str)
848 assert result is None
849
850 def test_non_dict_payload_rejected_on_read(self, tmp_path: pathlib.Path) -> None:
851 """A shelf entry file whose top-level JSON value is not a dict (e.g. a list)
852 must be rejected — guards against type-confusion attacks."""
853 root, _ = _init_repo(tmp_path)
854 (_shelf_dir(root) / "sha256").mkdir(parents=True, exist_ok=True)
855 fake_id_str = long_id("f" * 64)
856 p = _shelf_entry_path(root, fake_id_str)
857 # Write a valid shelf header+JSON framing but with a non-dict payload.
858 payload = _json.dumps(["not", "a", "dict"]).encode("utf-8")
859 header = f"shelf {len(payload)}\0".encode("utf-8")
860 p.write_bytes(header + payload)
861 result = _read_shelf_entry(root, fake_id_str)
862 assert result is None
863
864 def test_shelf_dir_not_traversable_via_list(self, tmp_path: pathlib.Path) -> None:
865 """list_shelf_entries must only glob inside .muse/shelf/<algo>/*.
866 A file placed directly in .muse/shelf/ (wrong level) must not appear."""
867 root, _ = _init_repo(tmp_path)
868 # Place a valid-looking entry directly in .muse/shelf/ (wrong level — no algo dir).
869 (_shelf_dir(root)).mkdir(parents=True, exist_ok=True)
870 payload = _json.dumps({"id": long_id("a" * 64)}).encode("utf-8")
871 header = f"shelf {len(payload)}\0".encode("utf-8")
872 rogue = _shelf_dir(root) / ("a" * 64)
873 rogue.write_bytes(header + payload)
874 assert _list_shelf_entries(root) == []
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago