test_core_symbol_cache.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago
| 1 | """Tests for muse.core.symbol_cache. |
| 2 | |
| 3 | Coverage |
| 4 | -------- |
| 5 | - Cache hit: get() returns stored tree without calling parse_symbols. |
| 6 | - Cache miss: get() returns None; put() stores a tree; subsequent get() hits. |
| 7 | - Content-addressed key: different content → different key → independent entries. |
| 8 | - Persistence: save() / load() round-trip via .muse/cache/symbols.json. |
| 9 | - Atomic write: tmp file replaced; no corruption. |
| 10 | - empty(): no-op — save() is a no-op without a muse_dir. |
| 11 | - load_symbol_cache() convenience helper. |
| 12 | - Corrupt file: gracefully returns empty cache. |
| 13 | - Wrong version: gracefully returns empty cache. |
| 14 | - prune(): removes stale entries, marks dirty. |
| 15 | - Integration with symbols_for_snapshot: warm cache skips parse_symbols. |
| 16 | - Working-tree key: disk bytes SHA-256 ≠ object_id when file is edited. |
| 17 | """ |
| 18 | |
| 19 | from __future__ import annotations |
| 20 | |
| 21 | import pathlib |
| 22 | from unittest.mock import patch |
| 23 | |
| 24 | import pytest |
| 25 | |
| 26 | from muse.core.types import MsgpackValue |
| 27 | from muse.core.symbol_cache import ( |
| 28 | SymbolCache, |
| 29 | _object_id_of, |
| 30 | _is_symbol_record, |
| 31 | load_symbol_cache, |
| 32 | ) |
| 33 | from muse.core.types import blob_id, fake_id |
| 34 | from muse.core.object_store import write_object |
| 35 | from muse.core.paths import muse_dir |
| 36 | from muse.plugins.code.ast_parser import SymbolKind, SymbolRecord, SymbolTree |
| 37 | |
| 38 | |
| 39 | # --------------------------------------------------------------------------- |
| 40 | # Fixtures |
| 41 | # --------------------------------------------------------------------------- |
| 42 | |
| 43 | |
| 44 | def _make_muse_dir(tmp_path: pathlib.Path) -> pathlib.Path: |
| 45 | dot_muse = muse_dir(tmp_path) |
| 46 | dot_muse.mkdir() |
| 47 | (dot_muse / "cache").mkdir() |
| 48 | return dot_muse |
| 49 | |
| 50 | |
| 51 | def _make_record( |
| 52 | name: str = "my_func", |
| 53 | kind: SymbolKind = "function", |
| 54 | lineno: int = 1, |
| 55 | end_lineno: int = 5, |
| 56 | ) -> SymbolRecord: |
| 57 | return SymbolRecord( |
| 58 | kind=kind, |
| 59 | name=name, |
| 60 | qualified_name=name, |
| 61 | content_id=fake_id(name), |
| 62 | body_hash=blob_id(b"body"), |
| 63 | signature_id=blob_id(b"sig"), |
| 64 | metadata_id=blob_id(b"meta"), |
| 65 | canonical_key=name, |
| 66 | lineno=lineno, |
| 67 | end_lineno=end_lineno, |
| 68 | ) |
| 69 | |
| 70 | |
| 71 | def _make_tree(*names: str) -> SymbolTree: |
| 72 | return {f"billing.py::{n}": _make_record(n) for n in names} |
| 73 | |
| 74 | |
| 75 | def _make_raw(name: str = "my_func", kind: str = "function", lineno: int = 1, end_lineno: int = 5) -> MsgpackDict: |
| 76 | """Build a raw JSON-compatible record dict for guard testing.""" |
| 77 | return { |
| 78 | "kind": kind, |
| 79 | "name": name, |
| 80 | "qualified_name": name, |
| 81 | "content_id": "a" * 64, |
| 82 | "body_hash": "b" * 64, |
| 83 | "signature_id": "c" * 64, |
| 84 | "metadata_id": "", |
| 85 | "canonical_key": name, |
| 86 | "lineno": lineno, |
| 87 | "end_lineno": end_lineno, |
| 88 | } |
| 89 | |
| 90 | |
| 91 | # --------------------------------------------------------------------------- |
| 92 | # _object_id_of |
| 93 | # --------------------------------------------------------------------------- |
| 94 | |
| 95 | |
| 96 | class TestObjectIdOf: |
| 97 | def test_sha256_of_bytes(self) -> None: |
| 98 | raw = b"hello" |
| 99 | assert _object_id_of(raw) == blob_id(b"hello") |
| 100 | |
| 101 | def test_different_content_different_id(self) -> None: |
| 102 | assert _object_id_of(b"a") != _object_id_of(b"b") |
| 103 | |
| 104 | def test_same_content_same_id(self) -> None: |
| 105 | assert _object_id_of(b"stable") == _object_id_of(b"stable") |
| 106 | |
| 107 | |
| 108 | # --------------------------------------------------------------------------- |
| 109 | # _is_symbol_record |
| 110 | # --------------------------------------------------------------------------- |
| 111 | |
| 112 | |
| 113 | class TestIsSymbolRecord: |
| 114 | def test_valid_record_passes(self) -> None: |
| 115 | assert _is_symbol_record(_make_raw()) |
| 116 | |
| 117 | def test_not_a_dict_fails(self) -> None: |
| 118 | assert not _is_symbol_record("string") |
| 119 | assert not _is_symbol_record(42) |
| 120 | assert not _is_symbol_record(None) |
| 121 | |
| 122 | def test_missing_field_fails(self) -> None: |
| 123 | rec = _make_raw() |
| 124 | del rec["kind"] |
| 125 | assert not _is_symbol_record(rec) |
| 126 | |
| 127 | def test_wrong_type_for_str_field_fails(self) -> None: |
| 128 | rec = _make_raw() |
| 129 | rec["name"] = 123 # should be str |
| 130 | assert not _is_symbol_record(rec) |
| 131 | |
| 132 | def test_wrong_type_for_int_field_fails(self) -> None: |
| 133 | rec = _make_raw() |
| 134 | rec["lineno"] = "not_an_int" |
| 135 | assert not _is_symbol_record(rec) |
| 136 | |
| 137 | def test_invalid_kind_fails(self) -> None: |
| 138 | rec = _make_raw() |
| 139 | rec["kind"] = "not_a_valid_kind" |
| 140 | assert not _is_symbol_record(rec) |
| 141 | |
| 142 | |
| 143 | # --------------------------------------------------------------------------- |
| 144 | # SymbolCache — in-memory operations |
| 145 | # --------------------------------------------------------------------------- |
| 146 | |
| 147 | |
| 148 | class TestSymbolCacheMemory: |
| 149 | def test_get_miss_returns_none(self) -> None: |
| 150 | cache = SymbolCache.empty() |
| 151 | assert cache.get("nonexistent_id") is None |
| 152 | |
| 153 | def test_put_then_get_hits(self) -> None: |
| 154 | cache = SymbolCache.empty() |
| 155 | tree = _make_tree("run", "setup") |
| 156 | cache.put("abc123", tree) |
| 157 | assert cache.get("abc123") == tree |
| 158 | |
| 159 | def test_put_marks_dirty(self) -> None: |
| 160 | cache = SymbolCache.empty() |
| 161 | assert not cache._dirty |
| 162 | cache.put("id1", _make_tree("fn")) |
| 163 | assert cache._dirty |
| 164 | |
| 165 | def test_different_ids_independent(self) -> None: |
| 166 | cache = SymbolCache.empty() |
| 167 | tree_a = _make_tree("alpha") |
| 168 | tree_b = _make_tree("beta") |
| 169 | cache.put("id_a", tree_a) |
| 170 | cache.put("id_b", tree_b) |
| 171 | assert cache.get("id_a") == tree_a |
| 172 | assert cache.get("id_b") == tree_b |
| 173 | |
| 174 | def test_size_property(self) -> None: |
| 175 | cache = SymbolCache.empty() |
| 176 | assert cache.size == 0 |
| 177 | cache.put("x", _make_tree("f")) |
| 178 | cache.put("y", _make_tree("g")) |
| 179 | assert cache.size == 2 |
| 180 | |
| 181 | def test_prune_removes_stale(self) -> None: |
| 182 | cache = SymbolCache.empty() |
| 183 | cache.put("keep", _make_tree("f")) |
| 184 | cache.put("drop", _make_tree("g")) |
| 185 | cache.prune({"keep"}) |
| 186 | assert cache.get("keep") is not None |
| 187 | assert cache.get("drop") is None |
| 188 | assert cache._dirty |
| 189 | |
| 190 | def test_prune_no_stale_not_dirty(self) -> None: |
| 191 | cache = SymbolCache.empty() |
| 192 | cache.put("keep", _make_tree("f")) |
| 193 | cache._dirty = False # reset after put |
| 194 | cache.prune({"keep", "other"}) |
| 195 | assert not cache._dirty |
| 196 | |
| 197 | def test_empty_save_is_noop(self, tmp_path: pathlib.Path) -> None: |
| 198 | cache = SymbolCache.empty() |
| 199 | cache.put("id", _make_tree("f")) |
| 200 | cache.save() # should not raise — muse_dir is None |
| 201 | assert not (tmp_path / ".muse" / "cache" / "symbols.json").exists() |
| 202 | |
| 203 | |
| 204 | # --------------------------------------------------------------------------- |
| 205 | # SymbolCache — persistence (save / load round-trip) |
| 206 | # --------------------------------------------------------------------------- |
| 207 | |
| 208 | |
| 209 | class TestSymbolCachePersistence: |
| 210 | def test_save_creates_file(self, tmp_path: pathlib.Path) -> None: |
| 211 | muse_dir = _make_muse_dir(tmp_path) |
| 212 | cache = SymbolCache.load(muse_dir) |
| 213 | cache.put("id1", _make_tree("fn_a")) |
| 214 | cache.save() |
| 215 | assert (muse_dir / "cache" / "symbols.json").is_file() |
| 216 | |
| 217 | def test_save_then_load_round_trip(self, tmp_path: pathlib.Path) -> None: |
| 218 | muse_dir = _make_muse_dir(tmp_path) |
| 219 | tree = _make_tree("compute", "validate") |
| 220 | cache = SymbolCache.load(muse_dir) |
| 221 | cache.put("deadbeef" * 8, tree) |
| 222 | cache.save() |
| 223 | |
| 224 | loaded = SymbolCache.load(muse_dir) |
| 225 | result = loaded.get("deadbeef" * 8) |
| 226 | assert result is not None |
| 227 | assert set(result) == set(tree) |
| 228 | first_addr = next(iter(tree)) |
| 229 | assert result[first_addr]["kind"] == tree[first_addr]["kind"] |
| 230 | assert result[first_addr]["name"] == tree[first_addr]["name"] |
| 231 | assert result[first_addr]["lineno"] == tree[first_addr]["lineno"] |
| 232 | assert result[first_addr]["end_lineno"] == tree[first_addr]["end_lineno"] |
| 233 | |
| 234 | def test_save_no_dirty_skips_write(self, tmp_path: pathlib.Path) -> None: |
| 235 | muse_dir = _make_muse_dir(tmp_path) |
| 236 | cache = SymbolCache.load(muse_dir) |
| 237 | cache.save() # _dirty is False — no file should appear |
| 238 | assert not (muse_dir / "cache" / "symbols.json").is_file() |
| 239 | |
| 240 | def test_save_dirty_false_after_save(self, tmp_path: pathlib.Path) -> None: |
| 241 | muse_dir = _make_muse_dir(tmp_path) |
| 242 | cache = SymbolCache.load(muse_dir) |
| 243 | cache.put("id", _make_tree("fn")) |
| 244 | cache.save() |
| 245 | assert not cache._dirty |
| 246 | |
| 247 | def test_multiple_saves_second_is_noop(self, tmp_path: pathlib.Path) -> None: |
| 248 | muse_dir = _make_muse_dir(tmp_path) |
| 249 | cache = SymbolCache.load(muse_dir) |
| 250 | cache.put("id", _make_tree("fn")) |
| 251 | cache.save() |
| 252 | mtime1 = (muse_dir / "cache" / "symbols.json").stat().st_mtime_ns |
| 253 | cache.save() # not dirty — should not touch file |
| 254 | mtime2 = (muse_dir / "cache" / "symbols.json").stat().st_mtime_ns |
| 255 | assert mtime1 == mtime2 |
| 256 | |
| 257 | def test_atomic_write_no_tmp_leftover(self, tmp_path: pathlib.Path) -> None: |
| 258 | muse_dir = _make_muse_dir(tmp_path) |
| 259 | cache = SymbolCache.load(muse_dir) |
| 260 | cache.put("id", _make_tree("fn")) |
| 261 | cache.save() |
| 262 | assert not any((muse_dir / "cache").glob("*.tmp")) |
| 263 | |
| 264 | def test_orphaned_tmp_swept_on_startup(self, tmp_path: pathlib.Path) -> None: |
| 265 | """A stale ``.symbols_*.tmp`` left by a crash is removed by the startup sweep.""" |
| 266 | from muse.core.repo import _cleanup_muse_dir_temps |
| 267 | muse_dir = _make_muse_dir(tmp_path) |
| 268 | orphan = muse_dir / "cache" / ".symbols_abc123.tmp" |
| 269 | orphan.write_bytes(b"stale") |
| 270 | _cleanup_muse_dir_temps(muse_dir) |
| 271 | assert not orphan.exists() |
| 272 | |
| 273 | |
| 274 | # --------------------------------------------------------------------------- |
| 275 | # SymbolCache — graceful error handling on load |
| 276 | # --------------------------------------------------------------------------- |
| 277 | |
| 278 | |
| 279 | class TestSymbolCacheGracefulLoad: |
| 280 | def test_absent_file_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 281 | muse_dir = _make_muse_dir(tmp_path) |
| 282 | cache = SymbolCache.load(muse_dir) |
| 283 | assert cache.size == 0 |
| 284 | |
| 285 | def test_corrupt_file_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 286 | import json as _json |
| 287 | muse_dir = _make_muse_dir(tmp_path) |
| 288 | (muse_dir / "cache" / "symbols.json").write_bytes(b"not valid JSON !!!") |
| 289 | cache = SymbolCache.load(muse_dir) |
| 290 | assert cache.size == 0 |
| 291 | |
| 292 | def test_wrong_version_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 293 | import json as _json |
| 294 | muse_dir = _make_muse_dir(tmp_path) |
| 295 | doc = {"version": 999, "entries": {}} |
| 296 | (muse_dir / "cache" / "symbols.json").write_bytes( |
| 297 | _json.dumps(doc).encode() |
| 298 | ) |
| 299 | cache = SymbolCache.load(muse_dir) |
| 300 | assert cache.size == 0 |
| 301 | |
| 302 | def test_invalid_entry_skipped(self, tmp_path: pathlib.Path) -> None: |
| 303 | """A single malformed tree entry is skipped; valid entries survive.""" |
| 304 | import json as _json |
| 305 | from muse.core.symbol_cache import _CACHE_VERSION |
| 306 | muse_dir = _make_muse_dir(tmp_path) |
| 307 | good_tree = {"billing.py::run": dict(_make_record("run"))} |
| 308 | bad_tree = {"billing.py::broken": {"kind": "INVALID_KIND", "name": 123}} |
| 309 | doc = { |
| 310 | "version": _CACHE_VERSION, |
| 311 | "entries": { |
| 312 | "good_id": good_tree, |
| 313 | "bad_id": bad_tree, |
| 314 | }, |
| 315 | } |
| 316 | (muse_dir / "cache" / "symbols.json").write_bytes(_json.dumps(doc).encode()) |
| 317 | cache = SymbolCache.load(muse_dir) |
| 318 | assert cache.get("good_id") is not None |
| 319 | assert cache.get("bad_id") is None |
| 320 | |
| 321 | def test_load_symbol_cache_no_muse_dir(self, tmp_path: pathlib.Path) -> None: |
| 322 | """load_symbol_cache returns empty when there is no .muse directory.""" |
| 323 | cache = load_symbol_cache(tmp_path) |
| 324 | assert cache.size == 0 |
| 325 | |
| 326 | def test_load_symbol_cache_with_muse_dir(self, tmp_path: pathlib.Path) -> None: |
| 327 | muse_dir = _make_muse_dir(tmp_path) |
| 328 | tree = _make_tree("fn") |
| 329 | seed = SymbolCache.load(muse_dir) |
| 330 | seed.put("myid", tree) |
| 331 | seed.save() |
| 332 | |
| 333 | cache = load_symbol_cache(tmp_path) |
| 334 | assert cache.get("myid") is not None |
| 335 | |
| 336 | |
| 337 | # --------------------------------------------------------------------------- |
| 338 | # Integration: symbols_for_snapshot uses cache |
| 339 | # --------------------------------------------------------------------------- |
| 340 | |
| 341 | |
| 342 | class TestSymbolsForSnapshotCache: |
| 343 | """Verify that symbols_for_snapshot calls parse_symbols only on cache miss.""" |
| 344 | |
| 345 | def _make_manifest( |
| 346 | self, tmp_path: pathlib.Path, content: bytes = b"def run(): pass\n" |
| 347 | ) -> tuple[pathlib.Path, dict[str, str]]: |
| 348 | """Write a .muse object and return (root, manifest).""" |
| 349 | root = tmp_path / "repo" |
| 350 | root.mkdir() |
| 351 | muse_dir(root).mkdir() |
| 352 | |
| 353 | oid = blob_id(content) |
| 354 | write_object(root, oid, content) |
| 355 | |
| 356 | manifest = {"billing.py": oid} |
| 357 | return root, manifest |
| 358 | |
| 359 | def test_cold_cache_calls_parse(self, tmp_path: pathlib.Path) -> None: |
| 360 | root, manifest = self._make_manifest(tmp_path) |
| 361 | from muse.plugins.code._query import symbols_for_snapshot |
| 362 | with patch("muse.plugins.code._query.parse_symbols", wraps=__import__("muse.plugins.code.ast_parser", fromlist=["parse_symbols"]).parse_symbols) as mock_parse: |
| 363 | result = symbols_for_snapshot(root, manifest) |
| 364 | assert mock_parse.call_count >= 1 |
| 365 | |
| 366 | def test_warm_cache_skips_parse(self, tmp_path: pathlib.Path) -> None: |
| 367 | content = b"def run(): pass\n" |
| 368 | root, manifest = self._make_manifest(tmp_path, content) |
| 369 | from muse.plugins.code._query import symbols_for_snapshot |
| 370 | |
| 371 | # First call — populates cache |
| 372 | symbols_for_snapshot(root, manifest) |
| 373 | |
| 374 | # Second call — should hit cache, never call parse_symbols |
| 375 | with patch("muse.plugins.code._query.parse_symbols") as mock_parse: |
| 376 | result2 = symbols_for_snapshot(root, manifest) |
| 377 | mock_parse.assert_not_called() |
| 378 | assert "billing.py" in result2 |
| 379 | |
| 380 | def test_working_tree_edit_invalidates_cache(self, tmp_path: pathlib.Path) -> None: |
| 381 | """Editing a file produces a new SHA-256 → cache miss → re-parse.""" |
| 382 | content_v1 = b"def run(): pass\n" |
| 383 | content_v2 = b"def run(): pass\ndef brand_new(): pass\n" |
| 384 | root, manifest = self._make_manifest(tmp_path, content_v1) |
| 385 | |
| 386 | # Write v1 to disk |
| 387 | (root / "billing.py").write_bytes(content_v1) |
| 388 | |
| 389 | from muse.plugins.code._query import symbols_for_snapshot |
| 390 | result1 = symbols_for_snapshot(root, manifest, workdir=root) |
| 391 | syms1 = set(result1.get("billing.py", {}).keys()) |
| 392 | |
| 393 | # Edit file on disk (v2) — cache key changes because SHA-256 changes |
| 394 | (root / "billing.py").write_bytes(content_v2) |
| 395 | result2 = symbols_for_snapshot(root, manifest, workdir=root) |
| 396 | syms2 = set(result2.get("billing.py", {}).keys()) |
| 397 | |
| 398 | # v2 has brand_new — the working-tree edit was picked up |
| 399 | assert any("brand_new" in addr for addr in syms2), ( |
| 400 | f"Expected brand_new in {syms2}" |
| 401 | ) |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago