"""Tests for the per-entry shelf storage layer. Shelf entries were previously serialised as a single JSON array in ``.muse/shelf.json``. This test suite covers the per-entry git-header+JSON layout at ``.muse/shelf//`` (no extension) — the same content- addressing scheme used by the unified object store. Test tiers ---------- Unit Path helpers, JSON round-trips, ID derivation. No subprocess, no real repo. Integration ``write_shelf_entry`` / ``read_shelf_entry`` / ``list_shelf_entries`` / ``delete_shelf_entry`` against a real ``.muse/`` directory tree. End-to-end Full CLI round-trips: ``muse shelf save``, ``list``, ``read``, ``pop``, ``drop``. Verifies the entry files appear on disk and ``shelf.json`` is never created. Stress 100 entries written concurrently; listing returns all of them. State State-machine transitions: empty → save → list → drop → empty. Name-collision invariant. Pop restores working tree. Integrity Content-addressed: entry ID matches sha256 of content (minus id). File path encodes the algo. Tampered bytes return ``None`` on read. Corrupt entry is skipped by ``list_shelf_entries`` without crashing. Performance ``write_shelf_entry`` < 50 ms. ``read_shelf_entry`` < 10 ms. ``list_shelf_entries`` for 50 entries < 500 ms. Security Path traversal in entry name cannot escape ``.muse/shelf/``. Symlinked shelf directory is rejected. Oversized payload is rejected. Entry whose serialised ID does not match its filename is rejected. """ from __future__ import annotations import json import os import pathlib import threading import time from collections.abc import Mapping import json as _json import pytest from muse.core.types import fake_id, long_id, blob_id, content_hash, split_id from muse.core.object_store import object_path from muse.core.paths import muse_dir, shelf_dir type _ShelfDict = dict[str, str | bool | int | list[str] | None] # --------------------------------------------------------------------------- # Lazy imports — these symbols do not exist yet; tests drive their creation. # --------------------------------------------------------------------------- def _shelf_dir(root: pathlib.Path) -> pathlib.Path: """Thin wrapper so tests import the real helper once it exists.""" from muse.core.paths import shelf_dir return shelf_dir(root) def _shelf_entry_path(root: pathlib.Path, entry_id: str) -> pathlib.Path: """Thin wrapper so tests import the real helper once it exists.""" from muse.core.shelf import shelf_entry_path return shelf_entry_path(root, entry_id) def _write_shelf_entry(root: pathlib.Path, entry: _ShelfDict) -> None: from muse.core.shelf import write_shelf_entry write_shelf_entry(root, entry) def _read_shelf_entry(root: pathlib.Path, entry_id: str) -> _ShelfDict | None: from muse.core.shelf import read_shelf_entry return read_shelf_entry(root, entry_id) def _list_shelf_entries(root: pathlib.Path) -> list[_ShelfDict]: from muse.core.shelf import list_shelf_entries return list_shelf_entries(root) def _delete_shelf_entry(root: pathlib.Path, entry_id: str) -> bool: from muse.core.shelf import delete_shelf_entry return delete_shelf_entry(root, entry_id) # --------------------------------------------------------------------------- # Shared test helpers # --------------------------------------------------------------------------- def _init_repo(tmp_path: pathlib.Path, branch: str = "main") -> tuple[pathlib.Path, str]: """Create a minimal Muse repo structure — no subprocess required.""" muse = muse_dir(tmp_path) muse.mkdir() repo_id = fake_id("repo") (muse / "repo.json").write_text(json.dumps({ "repo_id": repo_id, "domain": "code", "created_at": "2026-01-01T00:00:00+00:00", "schema_version": 1, "bare": False, }), encoding="utf-8") (muse / "HEAD").write_text(f"ref: refs/heads/{branch}", encoding="utf-8") (muse / "refs" / "heads").mkdir(parents=True) (muse / "snapshots").mkdir() (muse / "commits" / "sha256").mkdir(parents=True) (muse / "objects").mkdir() return tmp_path, repo_id def _make_entry_dict( name: str = "main/000", branch: str = "main", snapshot: dict[str, str] | None = None, created_at: str = "2026-01-01T00:00:00+00:00", created_by: str = "human", intent_type: str = "checkpoint", intent: str | None = None, resumable: bool = False, tags: list[str] | None = None, ) -> _ShelfDict: """Build a complete shelf entry dict including a derived ``id`` field. The ``id`` is computed as ``sha256:`` of the entry content minus the ``id`` key itself — exactly matching the production derivation in ``_compute_shelf_id``. """ without_id = { "name": name, "snapshot": snapshot or {"a.py": long_id("a" * 64)}, "deleted": [], "snapshot_id": long_id("b" * 64), "parent_commit": long_id("c" * 64), "branch": branch, "created_at": created_at, "created_by": created_by, "intent_type": intent_type, "intent": intent, "resumable": resumable, "tags": tags or [], "expires_at": None, "domain_state": {}, } entry_id = content_hash(without_id) return {"id": entry_id, **without_id} def _write_object(root: pathlib.Path, content: bytes) -> str: """Write raw bytes to the object store and return the blob ID.""" obj_id = blob_id(content) p = object_path(root, obj_id) p.parent.mkdir(parents=True, exist_ok=True) p.write_bytes(content) return obj_id # --------------------------------------------------------------------------- # Tier 1 — Unit # --------------------------------------------------------------------------- class TestShelfDirPathHelper: """``shelf_dir`` returns the canonical ``.muse/shelf/`` path.""" def test_returns_dot_muse_shelf(self, tmp_path: pathlib.Path) -> None: """shelf_dir() must resolve to /.muse/shelf — the root of the per-entry git-header+JSON layout, consistent with objects_dir.""" root, _ = _init_repo(tmp_path) assert _shelf_dir(root) == shelf_dir(root) def test_is_child_of_muse_dir(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) assert _shelf_dir(root).parent == muse_dir(root) def test_does_not_create_directory(self, tmp_path: pathlib.Path) -> None: """Path helper is pure — it must not create directories as a side effect.""" root, _ = _init_repo(tmp_path) _shelf_dir(root) assert not (shelf_dir(root)).exists() def test_name_is_shelf(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) assert _shelf_dir(root).name == "shelf" class TestShelfEntryPathHelper: """``shelf_entry_path`` encodes algo and hex into ``.muse/shelf//`` (no extension).""" def test_sha256_path_shape(self, tmp_path: pathlib.Path) -> None: """Path must follow the same // convention as commit_path and snapshot_path so all content-addressed stores are structurally uniform.""" root, _ = _init_repo(tmp_path) entry_id = long_id("a" * 64) p = _shelf_entry_path(root, entry_id) assert p == shelf_dir(root) / "sha256" / f"{'a' * 64}" def test_algo_extracted_from_prefix(self, tmp_path: pathlib.Path) -> None: """The algo segment in the path must come from the prefix of entry_id, never be hardcoded as 'sha256'.""" root, _ = _init_repo(tmp_path) entry_id = long_id("b" * 64) algo, hex_id = split_id(entry_id) p = _shelf_entry_path(root, entry_id) assert p.parent.name == algo assert p.name == hex_id def test_extension_is_empty(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) p = _shelf_entry_path(root, long_id("c" * 64)) assert p.suffix == "" def test_parent_is_shelf_dir(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) p = _shelf_entry_path(root, long_id("d" * 64)) assert p.parent.parent == _shelf_dir(root) def test_different_ids_produce_different_paths(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) p1 = _shelf_entry_path(root, long_id("a" * 64)) p2 = _shelf_entry_path(root, long_id("b" * 64)) assert p1 != p2 def test_does_not_create_directory(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) _shelf_entry_path(root, long_id("e" * 64)) assert not (shelf_dir(root)).exists() class TestJsonRoundTrip: """Shelf entry dicts survive a JSON serialise → deserialise cycle unchanged.""" def test_string_fields_survive(self) -> None: entry = _make_entry_dict() out = _json.loads(_json.dumps(entry)) assert out["name"] == entry["name"] assert out["branch"] == entry["branch"] assert out["id"] == entry["id"] def test_none_fields_survive(self) -> None: entry = _make_entry_dict(intent=None) out = _json.loads(_json.dumps(entry)) assert out["intent"] is None assert out["expires_at"] is None def test_nested_snapshot_survives(self) -> None: snap = {"src/a.py": long_id("a" * 64), "src/b.py": long_id("b" * 64)} entry = _make_entry_dict(snapshot=snap) out = _json.loads(_json.dumps(entry)) assert out["snapshot"] == snap def test_bool_fields_survive(self) -> None: entry = _make_entry_dict(resumable=True) out = _json.loads(_json.dumps(entry)) assert out["resumable"] is True def test_list_fields_survive(self) -> None: entry = _make_entry_dict(tags=["hotfix", "api"]) out = _json.loads(_json.dumps(entry)) assert out["tags"] == ["hotfix", "api"] def test_empty_dict_domain_state_survives(self) -> None: entry = _make_entry_dict() assert entry["domain_state"] == {} out = _json.loads(_json.dumps(entry)) assert out["domain_state"] == {} class TestEntryIdDerivation: """Entry ID is deterministic: sha256 of content minus the id field.""" def test_same_content_same_id(self) -> None: e1 = _make_entry_dict(name="x/000") e2 = _make_entry_dict(name="x/000") assert e1["id"] == e2["id"] def test_different_name_different_id(self) -> None: e1 = _make_entry_dict(name="x/000") e2 = _make_entry_dict(name="x/001") assert e1["id"] != e2["id"] def test_id_has_sha256_prefix(self) -> None: e = _make_entry_dict() assert e["id"].startswith("sha256:") def test_id_hex_is_64_chars(self) -> None: e = _make_entry_dict() _, hex_part = split_id(e["id"]) assert len(hex_part) == 64 # --------------------------------------------------------------------------- # Tier 2 — Integration # --------------------------------------------------------------------------- class TestWriteReadRoundTrip: """``write_shelf_entry`` + ``read_shelf_entry`` preserves all fields.""" def test_basic_round_trip(self, tmp_path: pathlib.Path) -> None: """Reading back a just-written entry must return an identical dict.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) out = _read_shelf_entry(root, entry["id"]) assert out is not None assert out["id"] == entry["id"] assert out["name"] == entry["name"] assert out["snapshot"] == entry["snapshot"] def test_creates_file_at_correct_path(self, tmp_path: pathlib.Path) -> None: """The on-disk file must live at .muse/shelf// with no extension.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) expected = _shelf_entry_path(root, entry["id"]) assert expected.exists() assert expected.suffix == "" def test_creates_algo_subdirectory(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) algo_dir = _shelf_dir(root) / "sha256" assert algo_dir.is_dir() def test_none_fields_preserved(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict(intent=None) _write_shelf_entry(root, entry) out = _read_shelf_entry(root, entry["id"]) assert out["intent"] is None assert out["expires_at"] is None def test_resumable_true_preserved(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict(resumable=True) _write_shelf_entry(root, entry) out = _read_shelf_entry(root, entry["id"]) assert out["resumable"] is True def test_tags_preserved(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict(tags=["audit", "wip"]) _write_shelf_entry(root, entry) out = _read_shelf_entry(root, entry["id"]) assert out["tags"] == ["audit", "wip"] def test_write_is_idempotent(self, tmp_path: pathlib.Path) -> None: """Writing the same entry twice must not raise and must leave exactly one file on disk.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) _write_shelf_entry(root, entry) files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""] assert len(files) == 1 def test_read_nonexistent_returns_none(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) result = _read_shelf_entry(root, long_id("f" * 64)) assert result is None class TestListShelfEntries: """``list_shelf_entries`` returns all entries sorted by created_at descending.""" def test_empty_dir_returns_empty_list(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) assert _list_shelf_entries(root) == [] def test_missing_shelf_dir_returns_empty_list(self, tmp_path: pathlib.Path) -> None: """Listing must not raise when .muse/shelf/ has never been created.""" root, _ = _init_repo(tmp_path) assert not (_shelf_dir(root)).exists() assert _list_shelf_entries(root) == [] def test_single_entry_returned(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) entries = _list_shelf_entries(root) assert len(entries) == 1 assert entries[0]["id"] == entry["id"] def test_two_entries_returned(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) e1 = _make_entry_dict(name="main/000", created_at="2026-01-01T00:00:00+00:00") e2 = _make_entry_dict(name="main/001", created_at="2026-01-02T00:00:00+00:00") _write_shelf_entry(root, e1) _write_shelf_entry(root, e2) entries = _list_shelf_entries(root) assert len(entries) == 2 def test_sorted_newest_first(self, tmp_path: pathlib.Path) -> None: """Entries are ordered newest-first so CLI list shows recent work at top.""" root, _ = _init_repo(tmp_path) e1 = _make_entry_dict(name="main/000", created_at="2026-01-01T00:00:00+00:00") e2 = _make_entry_dict(name="main/001", created_at="2026-01-03T00:00:00+00:00") e3 = _make_entry_dict(name="main/002", created_at="2026-01-02T00:00:00+00:00") for e in [e1, e2, e3]: _write_shelf_entry(root, e) entries = _list_shelf_entries(root) assert [e["name"] for e in entries] == ["main/001", "main/002", "main/000"] def test_no_shelf_json_created(self, tmp_path: pathlib.Path) -> None: """The legacy shelf.json file must never be created by the new storage layer.""" root, _ = _init_repo(tmp_path) _write_shelf_entry(root, _make_entry_dict()) _list_shelf_entries(root) assert not (muse_dir(root) / "shelf.json").exists() class TestDeleteShelfEntry: """``delete_shelf_entry`` removes the entry file and reports existence.""" def test_delete_existing_returns_true(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) assert _delete_shelf_entry(root, entry["id"]) is True def test_delete_removes_file(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) _delete_shelf_entry(root, entry["id"]) assert not _shelf_entry_path(root, entry["id"]).exists() def test_delete_nonexistent_returns_false(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) assert _delete_shelf_entry(root, long_id("a" * 64)) is False def test_delete_one_leaves_others(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) e1 = _make_entry_dict(name="main/000") e2 = _make_entry_dict(name="main/001") _write_shelf_entry(root, e1) _write_shelf_entry(root, e2) _delete_shelf_entry(root, e1["id"]) entries = _list_shelf_entries(root) assert len(entries) == 1 assert entries[0]["id"] == e2["id"] def test_delete_twice_returns_false_second_time(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) _delete_shelf_entry(root, entry["id"]) assert _delete_shelf_entry(root, entry["id"]) is False # --------------------------------------------------------------------------- # Tier 3 — End-to-end (CLI) # --------------------------------------------------------------------------- class TestCliShelfSaveHeaderJsonLayout: """``muse shelf save`` must produce per-entry git-header+JSON files, not shelf.json.""" def test_save_creates_entry_file(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: """After ``muse shelf save`` the .muse/shelf/sha256/ directory must contain exactly one extensionless entry file.""" from tests.cli_test_helper import CliRunner runner = CliRunner() monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) runner.invoke(None, ["init"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) (tmp_path / "hello.py").write_text("print('hi')\n") runner.invoke(None, ["commit", "-m", "base"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) (tmp_path / "work.py").write_text("x = 42\n") result = runner.invoke(None, ["shelf", "save", "-m", "wip"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) assert result.exit_code == 0, result.output shelf_files = [f for f in (shelf_dir(tmp_path) / "sha256").glob("*") if f.is_file() and f.suffix == ""] assert len(shelf_files) == 1 def test_save_does_not_create_shelf_json(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: """shelf.json must never be written by the new storage layer.""" from tests.cli_test_helper import CliRunner runner = CliRunner() monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) runner.invoke(None, ["init"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) (tmp_path / "a.py").write_text("a = 1\n") runner.invoke(None, ["commit", "-m", "base"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) (tmp_path / "b.py").write_text("b = 2\n") runner.invoke(None, ["shelf", "save"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) assert not (muse_dir(tmp_path) / "shelf.json").exists() def test_save_json_output_has_id(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: from tests.cli_test_helper import CliRunner runner = CliRunner() monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) runner.invoke(None, ["init"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) (tmp_path / "a.py").write_text("a = 1\n") runner.invoke(None, ["commit", "-m", "base"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) (tmp_path / "b.py").write_text("b = 2\n") result = runner.invoke(None, ["shelf", "save", "--json"], env={"MUSE_REPO_ROOT": str(tmp_path)}, catch_exceptions=False) data = json.loads(result.output) assert data["id"] is not None assert data["id"].startswith("sha256:") def test_drop_removes_entry_file(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: from tests.cli_test_helper import CliRunner runner = CliRunner() env = {"MUSE_REPO_ROOT": str(tmp_path)} monkeypatch.chdir(tmp_path) runner.invoke(None, ["init"], env=env, catch_exceptions=False) (tmp_path / "a.py").write_text("a = 1\n") runner.invoke(None, ["commit", "-m", "base"], env=env, catch_exceptions=False) (tmp_path / "b.py").write_text("b = 2\n") save_result = runner.invoke(None, ["shelf", "save", "--json"], env=env, catch_exceptions=False) name = json.loads(save_result.output)["name"] runner.invoke(None, ["shelf", "drop", name], env=env, catch_exceptions=False) remaining = [f for f in (shelf_dir(tmp_path) / "sha256").glob("*") if f.is_file() and f.suffix == ""] assert len(remaining) == 0 def test_list_returns_saved_entry(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: from tests.cli_test_helper import CliRunner runner = CliRunner() env = {"MUSE_REPO_ROOT": str(tmp_path)} monkeypatch.chdir(tmp_path) runner.invoke(None, ["init"], env=env, catch_exceptions=False) (tmp_path / "a.py").write_text("a = 1\n") runner.invoke(None, ["commit", "-m", "base"], env=env, catch_exceptions=False) (tmp_path / "b.py").write_text("b = 2\n") runner.invoke(None, ["shelf", "save", "-m", "my work"], env=env, catch_exceptions=False) result = runner.invoke(None, ["shelf", "list", "--json"], env=env, catch_exceptions=False) data = json.loads(result.output) assert len(data["entries"]) == 1 assert data["entries"][0]["intent"] == "my work" def test_pop_removes_entry_and_restores_file(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: from tests.cli_test_helper import CliRunner runner = CliRunner() env = {"MUSE_REPO_ROOT": str(tmp_path)} monkeypatch.chdir(tmp_path) runner.invoke(None, ["init"], env=env, catch_exceptions=False) (tmp_path / "a.py").write_text("a = 1\n") runner.invoke(None, ["commit", "-m", "base"], env=env, catch_exceptions=False) (tmp_path / "b.py").write_text("restored content\n") save_result = runner.invoke(None, ["shelf", "save", "--json"], env=env, catch_exceptions=False) name = json.loads(save_result.output)["name"] assert not (tmp_path / "b.py").exists() runner.invoke(None, ["shelf", "pop", name], env=env, catch_exceptions=False) assert (tmp_path / "b.py").read_text() == "restored content\n" remaining = [f for f in (shelf_dir(tmp_path) / "sha256").glob("*") if f.is_file() and f.suffix == ""] assert len(remaining) == 0 # --------------------------------------------------------------------------- # Tier 4 — Stress # --------------------------------------------------------------------------- class TestStressShelfStorage: """Storage layer remains correct under high entry volume.""" def test_100_entries_all_written(self, tmp_path: pathlib.Path) -> None: """Writing 100 entries must produce 100 distinct entry files.""" root, _ = _init_repo(tmp_path) entries = [ _make_entry_dict( name=f"main/{i:03d}", created_at=f"2026-01-{(i % 28) + 1:02d}T00:00:00+00:00", snapshot={f"file_{i}.py": long_id(hex(i)[2:].zfill(64))}, ) for i in range(100) ] for e in entries: _write_shelf_entry(root, e) files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""] assert len(files) == 100 def test_100_entries_all_listable(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) ids = set() for i in range(100): e = _make_entry_dict( name=f"main/{i:03d}", snapshot={f"f{i}.py": long_id(hex(i)[2:].zfill(64))}, ) _write_shelf_entry(root, e) ids.add(e["id"]) listed = _list_shelf_entries(root) assert len(listed) == 100 assert {e["id"] for e in listed} == ids def test_concurrent_writes_no_corruption(self, tmp_path: pathlib.Path) -> None: """Concurrent writes from multiple threads must each produce their own file without corrupting one another — the atomic rename guarantee.""" root, _ = _init_repo(tmp_path) errors: list[Exception] = [] def write_entry(i: int) -> None: try: e = _make_entry_dict( name=f"thread/{i:03d}", snapshot={f"t{i}.py": long_id(hex(i * 7)[2:].zfill(64))}, ) _write_shelf_entry(root, e) except Exception as exc: errors.append(exc) threads = [threading.Thread(target=write_entry, args=(i,)) for i in range(20)] for t in threads: t.start() for t in threads: t.join() assert not errors, f"Concurrent write errors: {errors}" files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""] assert len(files) == 20 # --------------------------------------------------------------------------- # Tier 5 — State # --------------------------------------------------------------------------- class TestShelfStateMachine: """State transitions: empty → save → list → drop → empty.""" def test_empty_to_save(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) assert _list_shelf_entries(root) == [] entry = _make_entry_dict() _write_shelf_entry(root, entry) assert len(_list_shelf_entries(root)) == 1 def test_save_to_drop_to_empty(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) _delete_shelf_entry(root, entry["id"]) assert _list_shelf_entries(root) == [] def test_two_saves_then_one_drop(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) e1 = _make_entry_dict(name="main/000") e2 = _make_entry_dict(name="main/001") _write_shelf_entry(root, e1) _write_shelf_entry(root, e2) _delete_shelf_entry(root, e1["id"]) remaining = _list_shelf_entries(root) assert len(remaining) == 1 assert remaining[0]["name"] == "main/001" def test_listing_after_no_writes_is_empty(self, tmp_path: pathlib.Path) -> None: """list_shelf_entries must tolerate a repo that has never had a shelf entry.""" root, _ = _init_repo(tmp_path) assert _list_shelf_entries(root) == [] def test_overwrite_same_entry_is_stable(self, tmp_path: pathlib.Path) -> None: """Writing the same entry twice must leave a consistent readable state.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) _write_shelf_entry(root, entry) entries = _list_shelf_entries(root) assert len(entries) == 1 assert entries[0]["id"] == entry["id"] # --------------------------------------------------------------------------- # Tier 6 — Integrity # --------------------------------------------------------------------------- class TestShelfStorageIntegrity: """Content-address correctness and tamper detection.""" def test_file_path_encodes_entry_id(self, tmp_path: pathlib.Path) -> None: """The entry filename must be the hex portion of entry['id']. A mismatch would mean the file is unreachable by ID — a silent data loss.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) _, hex_id = split_id(entry["id"]) expected_name = hex_id files = [f for f in (_shelf_dir(root) / "sha256").glob("*") if f.is_file() and f.suffix == ""] assert len(files) == 1 assert files[0].name == expected_name def test_read_back_id_matches_filename(self, tmp_path: pathlib.Path) -> None: """The id field inside the entry must match the filename — verifying no silent ID drift between serialisation and storage.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) out = _read_shelf_entry(root, entry["id"]) _, hex_id = split_id(out["id"]) expected_path = _shelf_dir(root) / "sha256" / hex_id assert expected_path.exists() def test_tampered_bytes_causes_rejection(self, tmp_path: pathlib.Path) -> None: """Flipping a byte in the entry file must cause read_shelf_entry to return None rather than silently serving corrupt data.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) p = _shelf_entry_path(root, entry["id"]) raw = bytearray(p.read_bytes()) raw[-4] ^= 0xFF p.write_bytes(bytes(raw)) result = _read_shelf_entry(root, entry["id"]) assert result is None def test_corrupt_entry_skipped_by_list(self, tmp_path: pathlib.Path) -> None: """A corrupt entry file must be silently skipped by list_shelf_entries so one bad file does not prevent access to all other entries.""" root, _ = _init_repo(tmp_path) good = _make_entry_dict(name="main/000") _write_shelf_entry(root, good) # Write a corrupt file directly into the shelf directory (no extension). bad_path = _shelf_dir(root) / "sha256" / f"{'a' * 62}ff" bad_path.write_bytes(b"\xff\xfe garbage data \x00") entries = _list_shelf_entries(root) assert len(entries) == 1 assert entries[0]["id"] == good["id"] def test_empty_entry_file_skipped_by_list(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) (_shelf_dir(root) / "sha256").mkdir(parents=True, exist_ok=True) empty = _shelf_dir(root) / "sha256" / f"{'0' * 64}" empty.write_bytes(b"") assert _list_shelf_entries(root) == [] def test_write_creates_no_temp_files(self, tmp_path: pathlib.Path) -> None: """After write_shelf_entry completes, no temp files must remain in .muse/shelf/sha256/ — atomic rename must clean up on success.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) algo_dir = _shelf_dir(root) / "sha256" all_files = list(algo_dir.iterdir()) assert all(f.suffix == "" for f in all_files) # --------------------------------------------------------------------------- # Tier 7 — Performance # --------------------------------------------------------------------------- class TestShelfStoragePerformance: """Storage operations must complete within latency budgets.""" def test_write_entry_under_50ms(self, tmp_path: pathlib.Path) -> None: """A single write_shelf_entry call must complete within 50 ms. Shelf save is on the critical path of ``muse shelf save`` — users feel latency > 50 ms as sluggishness.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() start = time.perf_counter() _write_shelf_entry(root, entry) elapsed_ms = (time.perf_counter() - start) * 1000 assert elapsed_ms < 50, f"write_shelf_entry took {elapsed_ms:.1f} ms" def test_read_entry_under_10ms(self, tmp_path: pathlib.Path) -> None: """A single read_shelf_entry call must complete within 10 ms. This is a hot path for ``muse shelf pop`` and ``muse shelf read``.""" root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) start = time.perf_counter() _read_shelf_entry(root, entry["id"]) elapsed_ms = (time.perf_counter() - start) * 1000 assert elapsed_ms < 10, f"read_shelf_entry took {elapsed_ms:.1f} ms" def test_list_50_entries_under_500ms(self, tmp_path: pathlib.Path) -> None: """list_shelf_entries for 50 entries must complete within 500 ms. The old shelf.json approach had to parse the entire JSON array; per-file reads should be faster due to smaller per-read payload.""" root, _ = _init_repo(tmp_path) for i in range(50): e = _make_entry_dict( name=f"main/{i:03d}", snapshot={f"f{i}.py": long_id(hex(i)[2:].zfill(64))}, ) _write_shelf_entry(root, e) start = time.perf_counter() entries = _list_shelf_entries(root) elapsed_ms = (time.perf_counter() - start) * 1000 assert len(entries) == 50 assert elapsed_ms < 500, f"list_shelf_entries took {elapsed_ms:.1f} ms" def test_delete_entry_under_10ms(self, tmp_path: pathlib.Path) -> None: root, _ = _init_repo(tmp_path) entry = _make_entry_dict() _write_shelf_entry(root, entry) start = time.perf_counter() _delete_shelf_entry(root, entry["id"]) elapsed_ms = (time.perf_counter() - start) * 1000 assert elapsed_ms < 10, f"delete_shelf_entry took {elapsed_ms:.1f} ms" # --------------------------------------------------------------------------- # Tier 8 — Security # --------------------------------------------------------------------------- class TestShelfStorageSecurity: """Guards against path traversal, symlink attacks, and oversized payloads.""" def test_symlinked_shelf_dir_rejected_on_write(self, tmp_path: pathlib.Path) -> None: """If .muse/shelf/ is a symlink, write_shelf_entry must raise rather than follow it — prevents redirect of shelf writes to attacker paths.""" root, _ = _init_repo(tmp_path) attacker_dir = tmp_path / "attacker" attacker_dir.mkdir() shelf = shelf_dir(root) shelf.symlink_to(attacker_dir) entry = _make_entry_dict() with pytest.raises((ValueError, OSError)): _write_shelf_entry(root, entry) def test_entry_id_cannot_escape_shelf_dir(self, tmp_path: pathlib.Path) -> None: """shelf_entry_path must always resolve inside .muse/shelf/. A crafted entry_id containing path separators must not produce a path that escapes the shelf directory.""" root, _ = _init_repo(tmp_path) # Construct a traversal attempt: the hex portion of split_id must be # a bare hex string — any non-hex content is a sign of tampering. # The path helper itself should produce a path inside shelf_dir. # We verify by ensuring the resolved path starts with shelf_dir. legitimate_id = long_id("a" * 64) p = _shelf_entry_path(root, legitimate_id) assert str(p).startswith(str(_shelf_dir(root))) def test_oversized_entry_rejected_on_read(self, tmp_path: pathlib.Path) -> None: """An oversized shelf entry file (attacker injected) must be rejected by read_shelf_entry to prevent memory exhaustion.""" from muse.core.io import MAX_MSGPACK_BYTES root, _ = _init_repo(tmp_path) (_shelf_dir(root) / "sha256").mkdir(parents=True, exist_ok=True) fake_id_str = long_id("e" * 64) p = _shelf_entry_path(root, fake_id_str) # Write a file larger than the allowed limit. p.write_bytes(b"\x00" * (MAX_MSGPACK_BYTES + 1)) result = _read_shelf_entry(root, fake_id_str) assert result is None def test_non_dict_payload_rejected_on_read(self, tmp_path: pathlib.Path) -> None: """A shelf entry file whose top-level JSON value is not a dict (e.g. a list) must be rejected — guards against type-confusion attacks.""" root, _ = _init_repo(tmp_path) (_shelf_dir(root) / "sha256").mkdir(parents=True, exist_ok=True) fake_id_str = long_id("f" * 64) p = _shelf_entry_path(root, fake_id_str) # Write a valid shelf header+JSON framing but with a non-dict payload. payload = _json.dumps(["not", "a", "dict"]).encode("utf-8") header = f"shelf {len(payload)}\0".encode("utf-8") p.write_bytes(header + payload) result = _read_shelf_entry(root, fake_id_str) assert result is None def test_shelf_dir_not_traversable_via_list(self, tmp_path: pathlib.Path) -> None: """list_shelf_entries must only glob inside .muse/shelf//*. A file placed directly in .muse/shelf/ (wrong level) must not appear.""" root, _ = _init_repo(tmp_path) # Place a valid-looking entry directly in .muse/shelf/ (wrong level — no algo dir). (_shelf_dir(root)).mkdir(parents=True, exist_ok=True) payload = _json.dumps({"id": long_id("a" * 64)}).encode("utf-8") header = f"shelf {len(payload)}\0".encode("utf-8") rogue = _shelf_dir(root) / ("a" * 64) rogue.write_bytes(header + payload) assert _list_shelf_entries(root) == []