"""Tests for muse.core.symbol_cache. Coverage -------- - Cache hit: get() returns stored tree without calling parse_symbols. - Cache miss: get() returns None; put() stores a tree; subsequent get() hits. - Content-addressed key: different content → different key → independent entries. - Persistence: save() / load() round-trip via .muse/cache/symbols.json. - Atomic write: tmp file replaced; no corruption. - empty(): no-op — save() is a no-op without a muse_dir. - load_symbol_cache() convenience helper. - Corrupt file: gracefully returns empty cache. - Wrong version: gracefully returns empty cache. - prune(): removes stale entries, marks dirty. - Integration with symbols_for_snapshot: warm cache skips parse_symbols. - Working-tree key: disk bytes SHA-256 ≠ object_id when file is edited. """ from __future__ import annotations import pathlib from unittest.mock import patch import pytest from muse.core.types import MsgpackValue from muse.core.symbol_cache import ( SymbolCache, _object_id_of, _is_symbol_record, load_symbol_cache, ) from muse.core.types import blob_id, fake_id from muse.core.object_store import write_object from muse.core.paths import muse_dir from muse.plugins.code.ast_parser import SymbolKind, SymbolRecord, SymbolTree # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- def _make_muse_dir(tmp_path: pathlib.Path) -> pathlib.Path: dot_muse = muse_dir(tmp_path) dot_muse.mkdir() (dot_muse / "cache").mkdir() return dot_muse def _make_record( name: str = "my_func", kind: SymbolKind = "function", lineno: int = 1, end_lineno: int = 5, ) -> SymbolRecord: return SymbolRecord( kind=kind, name=name, qualified_name=name, content_id=fake_id(name), body_hash=blob_id(b"body"), signature_id=blob_id(b"sig"), metadata_id=blob_id(b"meta"), canonical_key=name, lineno=lineno, end_lineno=end_lineno, ) def _make_tree(*names: str) -> SymbolTree: return {f"billing.py::{n}": _make_record(n) for n in names} def _make_raw(name: str = "my_func", kind: str = "function", lineno: int = 1, end_lineno: int = 5) -> MsgpackDict: """Build a raw JSON-compatible record dict for guard testing.""" return { "kind": kind, "name": name, "qualified_name": name, "content_id": "a" * 64, "body_hash": "b" * 64, "signature_id": "c" * 64, "metadata_id": "", "canonical_key": name, "lineno": lineno, "end_lineno": end_lineno, } # --------------------------------------------------------------------------- # _object_id_of # --------------------------------------------------------------------------- class TestObjectIdOf: def test_sha256_of_bytes(self) -> None: raw = b"hello" assert _object_id_of(raw) == blob_id(b"hello") def test_different_content_different_id(self) -> None: assert _object_id_of(b"a") != _object_id_of(b"b") def test_same_content_same_id(self) -> None: assert _object_id_of(b"stable") == _object_id_of(b"stable") # --------------------------------------------------------------------------- # _is_symbol_record # --------------------------------------------------------------------------- class TestIsSymbolRecord: def test_valid_record_passes(self) -> None: assert _is_symbol_record(_make_raw()) def test_not_a_dict_fails(self) -> None: assert not _is_symbol_record("string") assert not _is_symbol_record(42) assert not _is_symbol_record(None) def test_missing_field_fails(self) -> None: rec = _make_raw() del rec["kind"] assert not _is_symbol_record(rec) def test_wrong_type_for_str_field_fails(self) -> None: rec = _make_raw() rec["name"] = 123 # should be str assert not _is_symbol_record(rec) def test_wrong_type_for_int_field_fails(self) -> None: rec = _make_raw() rec["lineno"] = "not_an_int" assert not _is_symbol_record(rec) def test_invalid_kind_fails(self) -> None: rec = _make_raw() rec["kind"] = "not_a_valid_kind" assert not _is_symbol_record(rec) # --------------------------------------------------------------------------- # SymbolCache — in-memory operations # --------------------------------------------------------------------------- class TestSymbolCacheMemory: def test_get_miss_returns_none(self) -> None: cache = SymbolCache.empty() assert cache.get("nonexistent_id") is None def test_put_then_get_hits(self) -> None: cache = SymbolCache.empty() tree = _make_tree("run", "setup") cache.put("abc123", tree) assert cache.get("abc123") == tree def test_put_marks_dirty(self) -> None: cache = SymbolCache.empty() assert not cache._dirty cache.put("id1", _make_tree("fn")) assert cache._dirty def test_different_ids_independent(self) -> None: cache = SymbolCache.empty() tree_a = _make_tree("alpha") tree_b = _make_tree("beta") cache.put("id_a", tree_a) cache.put("id_b", tree_b) assert cache.get("id_a") == tree_a assert cache.get("id_b") == tree_b def test_size_property(self) -> None: cache = SymbolCache.empty() assert cache.size == 0 cache.put("x", _make_tree("f")) cache.put("y", _make_tree("g")) assert cache.size == 2 def test_prune_removes_stale(self) -> None: cache = SymbolCache.empty() cache.put("keep", _make_tree("f")) cache.put("drop", _make_tree("g")) cache.prune({"keep"}) assert cache.get("keep") is not None assert cache.get("drop") is None assert cache._dirty def test_prune_no_stale_not_dirty(self) -> None: cache = SymbolCache.empty() cache.put("keep", _make_tree("f")) cache._dirty = False # reset after put cache.prune({"keep", "other"}) assert not cache._dirty def test_empty_save_is_noop(self, tmp_path: pathlib.Path) -> None: cache = SymbolCache.empty() cache.put("id", _make_tree("f")) cache.save() # should not raise — muse_dir is None assert not (tmp_path / ".muse" / "cache" / "symbols.json").exists() # --------------------------------------------------------------------------- # SymbolCache — persistence (save / load round-trip) # --------------------------------------------------------------------------- class TestSymbolCachePersistence: def test_save_creates_file(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) cache = SymbolCache.load(muse_dir) cache.put("id1", _make_tree("fn_a")) cache.save() assert (muse_dir / "cache" / "symbols.json").is_file() def test_save_then_load_round_trip(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) tree = _make_tree("compute", "validate") cache = SymbolCache.load(muse_dir) cache.put("deadbeef" * 8, tree) cache.save() loaded = SymbolCache.load(muse_dir) result = loaded.get("deadbeef" * 8) assert result is not None assert set(result) == set(tree) first_addr = next(iter(tree)) assert result[first_addr]["kind"] == tree[first_addr]["kind"] assert result[first_addr]["name"] == tree[first_addr]["name"] assert result[first_addr]["lineno"] == tree[first_addr]["lineno"] assert result[first_addr]["end_lineno"] == tree[first_addr]["end_lineno"] def test_save_no_dirty_skips_write(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) cache = SymbolCache.load(muse_dir) cache.save() # _dirty is False — no file should appear assert not (muse_dir / "cache" / "symbols.json").is_file() def test_save_dirty_false_after_save(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) cache = SymbolCache.load(muse_dir) cache.put("id", _make_tree("fn")) cache.save() assert not cache._dirty def test_multiple_saves_second_is_noop(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) cache = SymbolCache.load(muse_dir) cache.put("id", _make_tree("fn")) cache.save() mtime1 = (muse_dir / "cache" / "symbols.json").stat().st_mtime_ns cache.save() # not dirty — should not touch file mtime2 = (muse_dir / "cache" / "symbols.json").stat().st_mtime_ns assert mtime1 == mtime2 def test_atomic_write_no_tmp_leftover(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) cache = SymbolCache.load(muse_dir) cache.put("id", _make_tree("fn")) cache.save() assert not any((muse_dir / "cache").glob("*.tmp")) def test_orphaned_tmp_swept_on_startup(self, tmp_path: pathlib.Path) -> None: """A stale ``.symbols_*.tmp`` left by a crash is removed by the startup sweep.""" from muse.core.repo import _cleanup_muse_dir_temps muse_dir = _make_muse_dir(tmp_path) orphan = muse_dir / "cache" / ".symbols_abc123.tmp" orphan.write_bytes(b"stale") _cleanup_muse_dir_temps(muse_dir) assert not orphan.exists() # --------------------------------------------------------------------------- # SymbolCache — graceful error handling on load # --------------------------------------------------------------------------- class TestSymbolCacheGracefulLoad: def test_absent_file_returns_empty(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) cache = SymbolCache.load(muse_dir) assert cache.size == 0 def test_corrupt_file_returns_empty(self, tmp_path: pathlib.Path) -> None: import json as _json muse_dir = _make_muse_dir(tmp_path) (muse_dir / "cache" / "symbols.json").write_bytes(b"not valid JSON !!!") cache = SymbolCache.load(muse_dir) assert cache.size == 0 def test_wrong_version_returns_empty(self, tmp_path: pathlib.Path) -> None: import json as _json muse_dir = _make_muse_dir(tmp_path) doc = {"version": 999, "entries": {}} (muse_dir / "cache" / "symbols.json").write_bytes( _json.dumps(doc).encode() ) cache = SymbolCache.load(muse_dir) assert cache.size == 0 def test_invalid_entry_skipped(self, tmp_path: pathlib.Path) -> None: """A single malformed tree entry is skipped; valid entries survive.""" import json as _json from muse.core.symbol_cache import _CACHE_VERSION muse_dir = _make_muse_dir(tmp_path) good_tree = {"billing.py::run": dict(_make_record("run"))} bad_tree = {"billing.py::broken": {"kind": "INVALID_KIND", "name": 123}} doc = { "version": _CACHE_VERSION, "entries": { "good_id": good_tree, "bad_id": bad_tree, }, } (muse_dir / "cache" / "symbols.json").write_bytes(_json.dumps(doc).encode()) cache = SymbolCache.load(muse_dir) assert cache.get("good_id") is not None assert cache.get("bad_id") is None def test_load_symbol_cache_no_muse_dir(self, tmp_path: pathlib.Path) -> None: """load_symbol_cache returns empty when there is no .muse directory.""" cache = load_symbol_cache(tmp_path) assert cache.size == 0 def test_load_symbol_cache_with_muse_dir(self, tmp_path: pathlib.Path) -> None: muse_dir = _make_muse_dir(tmp_path) tree = _make_tree("fn") seed = SymbolCache.load(muse_dir) seed.put("myid", tree) seed.save() cache = load_symbol_cache(tmp_path) assert cache.get("myid") is not None # --------------------------------------------------------------------------- # Integration: symbols_for_snapshot uses cache # --------------------------------------------------------------------------- class TestSymbolsForSnapshotCache: """Verify that symbols_for_snapshot calls parse_symbols only on cache miss.""" def _make_manifest( self, tmp_path: pathlib.Path, content: bytes = b"def run(): pass\n" ) -> tuple[pathlib.Path, dict[str, str]]: """Write a .muse object and return (root, manifest).""" root = tmp_path / "repo" root.mkdir() muse_dir(root).mkdir() oid = blob_id(content) write_object(root, oid, content) manifest = {"billing.py": oid} return root, manifest def test_cold_cache_calls_parse(self, tmp_path: pathlib.Path) -> None: root, manifest = self._make_manifest(tmp_path) from muse.plugins.code._query import symbols_for_snapshot with patch("muse.plugins.code._query.parse_symbols", wraps=__import__("muse.plugins.code.ast_parser", fromlist=["parse_symbols"]).parse_symbols) as mock_parse: result = symbols_for_snapshot(root, manifest) assert mock_parse.call_count >= 1 def test_warm_cache_skips_parse(self, tmp_path: pathlib.Path) -> None: content = b"def run(): pass\n" root, manifest = self._make_manifest(tmp_path, content) from muse.plugins.code._query import symbols_for_snapshot # First call — populates cache symbols_for_snapshot(root, manifest) # Second call — should hit cache, never call parse_symbols with patch("muse.plugins.code._query.parse_symbols") as mock_parse: result2 = symbols_for_snapshot(root, manifest) mock_parse.assert_not_called() assert "billing.py" in result2 def test_working_tree_edit_invalidates_cache(self, tmp_path: pathlib.Path) -> None: """Editing a file produces a new SHA-256 → cache miss → re-parse.""" content_v1 = b"def run(): pass\n" content_v2 = b"def run(): pass\ndef brand_new(): pass\n" root, manifest = self._make_manifest(tmp_path, content_v1) # Write v1 to disk (root / "billing.py").write_bytes(content_v1) from muse.plugins.code._query import symbols_for_snapshot result1 = symbols_for_snapshot(root, manifest, workdir=root) syms1 = set(result1.get("billing.py", {}).keys()) # Edit file on disk (v2) — cache key changes because SHA-256 changes (root / "billing.py").write_bytes(content_v2) result2 = symbols_for_snapshot(root, manifest, workdir=root) syms2 = set(result2.get("billing.py", {}).keys()) # v2 has brand_new — the working-tree edit was picked up assert any("brand_new" in addr for addr in syms2), ( f"Expected brand_new in {syms2}" )