gabriel / muse public
test_core_symbol_cache.py python
401 lines 14.7 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago
1 """Tests for muse.core.symbol_cache.
2
3 Coverage
4 --------
5 - Cache hit: get() returns stored tree without calling parse_symbols.
6 - Cache miss: get() returns None; put() stores a tree; subsequent get() hits.
7 - Content-addressed key: different content → different key → independent entries.
8 - Persistence: save() / load() round-trip via .muse/cache/symbols.json.
9 - Atomic write: tmp file replaced; no corruption.
10 - empty(): no-op — save() is a no-op without a muse_dir.
11 - load_symbol_cache() convenience helper.
12 - Corrupt file: gracefully returns empty cache.
13 - Wrong version: gracefully returns empty cache.
14 - prune(): removes stale entries, marks dirty.
15 - Integration with symbols_for_snapshot: warm cache skips parse_symbols.
16 - Working-tree key: disk bytes SHA-256 ≠ object_id when file is edited.
17 """
18
19 from __future__ import annotations
20
21 import pathlib
22 from unittest.mock import patch
23
24 import pytest
25
26 from muse.core.types import MsgpackValue
27 from muse.core.symbol_cache import (
28 SymbolCache,
29 _object_id_of,
30 _is_symbol_record,
31 load_symbol_cache,
32 )
33 from muse.core.types import blob_id, fake_id
34 from muse.core.object_store import write_object
35 from muse.core.paths import muse_dir
36 from muse.plugins.code.ast_parser import SymbolKind, SymbolRecord, SymbolTree
37
38
39 # ---------------------------------------------------------------------------
40 # Fixtures
41 # ---------------------------------------------------------------------------
42
43
44 def _make_muse_dir(tmp_path: pathlib.Path) -> pathlib.Path:
45 dot_muse = muse_dir(tmp_path)
46 dot_muse.mkdir()
47 (dot_muse / "cache").mkdir()
48 return dot_muse
49
50
51 def _make_record(
52 name: str = "my_func",
53 kind: SymbolKind = "function",
54 lineno: int = 1,
55 end_lineno: int = 5,
56 ) -> SymbolRecord:
57 return SymbolRecord(
58 kind=kind,
59 name=name,
60 qualified_name=name,
61 content_id=fake_id(name),
62 body_hash=blob_id(b"body"),
63 signature_id=blob_id(b"sig"),
64 metadata_id=blob_id(b"meta"),
65 canonical_key=name,
66 lineno=lineno,
67 end_lineno=end_lineno,
68 )
69
70
71 def _make_tree(*names: str) -> SymbolTree:
72 return {f"billing.py::{n}": _make_record(n) for n in names}
73
74
75 def _make_raw(name: str = "my_func", kind: str = "function", lineno: int = 1, end_lineno: int = 5) -> MsgpackDict:
76 """Build a raw JSON-compatible record dict for guard testing."""
77 return {
78 "kind": kind,
79 "name": name,
80 "qualified_name": name,
81 "content_id": "a" * 64,
82 "body_hash": "b" * 64,
83 "signature_id": "c" * 64,
84 "metadata_id": "",
85 "canonical_key": name,
86 "lineno": lineno,
87 "end_lineno": end_lineno,
88 }
89
90
91 # ---------------------------------------------------------------------------
92 # _object_id_of
93 # ---------------------------------------------------------------------------
94
95
96 class TestObjectIdOf:
97 def test_sha256_of_bytes(self) -> None:
98 raw = b"hello"
99 assert _object_id_of(raw) == blob_id(b"hello")
100
101 def test_different_content_different_id(self) -> None:
102 assert _object_id_of(b"a") != _object_id_of(b"b")
103
104 def test_same_content_same_id(self) -> None:
105 assert _object_id_of(b"stable") == _object_id_of(b"stable")
106
107
108 # ---------------------------------------------------------------------------
109 # _is_symbol_record
110 # ---------------------------------------------------------------------------
111
112
113 class TestIsSymbolRecord:
114 def test_valid_record_passes(self) -> None:
115 assert _is_symbol_record(_make_raw())
116
117 def test_not_a_dict_fails(self) -> None:
118 assert not _is_symbol_record("string")
119 assert not _is_symbol_record(42)
120 assert not _is_symbol_record(None)
121
122 def test_missing_field_fails(self) -> None:
123 rec = _make_raw()
124 del rec["kind"]
125 assert not _is_symbol_record(rec)
126
127 def test_wrong_type_for_str_field_fails(self) -> None:
128 rec = _make_raw()
129 rec["name"] = 123 # should be str
130 assert not _is_symbol_record(rec)
131
132 def test_wrong_type_for_int_field_fails(self) -> None:
133 rec = _make_raw()
134 rec["lineno"] = "not_an_int"
135 assert not _is_symbol_record(rec)
136
137 def test_invalid_kind_fails(self) -> None:
138 rec = _make_raw()
139 rec["kind"] = "not_a_valid_kind"
140 assert not _is_symbol_record(rec)
141
142
143 # ---------------------------------------------------------------------------
144 # SymbolCache — in-memory operations
145 # ---------------------------------------------------------------------------
146
147
148 class TestSymbolCacheMemory:
149 def test_get_miss_returns_none(self) -> None:
150 cache = SymbolCache.empty()
151 assert cache.get("nonexistent_id") is None
152
153 def test_put_then_get_hits(self) -> None:
154 cache = SymbolCache.empty()
155 tree = _make_tree("run", "setup")
156 cache.put("abc123", tree)
157 assert cache.get("abc123") == tree
158
159 def test_put_marks_dirty(self) -> None:
160 cache = SymbolCache.empty()
161 assert not cache._dirty
162 cache.put("id1", _make_tree("fn"))
163 assert cache._dirty
164
165 def test_different_ids_independent(self) -> None:
166 cache = SymbolCache.empty()
167 tree_a = _make_tree("alpha")
168 tree_b = _make_tree("beta")
169 cache.put("id_a", tree_a)
170 cache.put("id_b", tree_b)
171 assert cache.get("id_a") == tree_a
172 assert cache.get("id_b") == tree_b
173
174 def test_size_property(self) -> None:
175 cache = SymbolCache.empty()
176 assert cache.size == 0
177 cache.put("x", _make_tree("f"))
178 cache.put("y", _make_tree("g"))
179 assert cache.size == 2
180
181 def test_prune_removes_stale(self) -> None:
182 cache = SymbolCache.empty()
183 cache.put("keep", _make_tree("f"))
184 cache.put("drop", _make_tree("g"))
185 cache.prune({"keep"})
186 assert cache.get("keep") is not None
187 assert cache.get("drop") is None
188 assert cache._dirty
189
190 def test_prune_no_stale_not_dirty(self) -> None:
191 cache = SymbolCache.empty()
192 cache.put("keep", _make_tree("f"))
193 cache._dirty = False # reset after put
194 cache.prune({"keep", "other"})
195 assert not cache._dirty
196
197 def test_empty_save_is_noop(self, tmp_path: pathlib.Path) -> None:
198 cache = SymbolCache.empty()
199 cache.put("id", _make_tree("f"))
200 cache.save() # should not raise — muse_dir is None
201 assert not (tmp_path / ".muse" / "cache" / "symbols.json").exists()
202
203
204 # ---------------------------------------------------------------------------
205 # SymbolCache — persistence (save / load round-trip)
206 # ---------------------------------------------------------------------------
207
208
209 class TestSymbolCachePersistence:
210 def test_save_creates_file(self, tmp_path: pathlib.Path) -> None:
211 muse_dir = _make_muse_dir(tmp_path)
212 cache = SymbolCache.load(muse_dir)
213 cache.put("id1", _make_tree("fn_a"))
214 cache.save()
215 assert (muse_dir / "cache" / "symbols.json").is_file()
216
217 def test_save_then_load_round_trip(self, tmp_path: pathlib.Path) -> None:
218 muse_dir = _make_muse_dir(tmp_path)
219 tree = _make_tree("compute", "validate")
220 cache = SymbolCache.load(muse_dir)
221 cache.put("deadbeef" * 8, tree)
222 cache.save()
223
224 loaded = SymbolCache.load(muse_dir)
225 result = loaded.get("deadbeef" * 8)
226 assert result is not None
227 assert set(result) == set(tree)
228 first_addr = next(iter(tree))
229 assert result[first_addr]["kind"] == tree[first_addr]["kind"]
230 assert result[first_addr]["name"] == tree[first_addr]["name"]
231 assert result[first_addr]["lineno"] == tree[first_addr]["lineno"]
232 assert result[first_addr]["end_lineno"] == tree[first_addr]["end_lineno"]
233
234 def test_save_no_dirty_skips_write(self, tmp_path: pathlib.Path) -> None:
235 muse_dir = _make_muse_dir(tmp_path)
236 cache = SymbolCache.load(muse_dir)
237 cache.save() # _dirty is False — no file should appear
238 assert not (muse_dir / "cache" / "symbols.json").is_file()
239
240 def test_save_dirty_false_after_save(self, tmp_path: pathlib.Path) -> None:
241 muse_dir = _make_muse_dir(tmp_path)
242 cache = SymbolCache.load(muse_dir)
243 cache.put("id", _make_tree("fn"))
244 cache.save()
245 assert not cache._dirty
246
247 def test_multiple_saves_second_is_noop(self, tmp_path: pathlib.Path) -> None:
248 muse_dir = _make_muse_dir(tmp_path)
249 cache = SymbolCache.load(muse_dir)
250 cache.put("id", _make_tree("fn"))
251 cache.save()
252 mtime1 = (muse_dir / "cache" / "symbols.json").stat().st_mtime_ns
253 cache.save() # not dirty — should not touch file
254 mtime2 = (muse_dir / "cache" / "symbols.json").stat().st_mtime_ns
255 assert mtime1 == mtime2
256
257 def test_atomic_write_no_tmp_leftover(self, tmp_path: pathlib.Path) -> None:
258 muse_dir = _make_muse_dir(tmp_path)
259 cache = SymbolCache.load(muse_dir)
260 cache.put("id", _make_tree("fn"))
261 cache.save()
262 assert not any((muse_dir / "cache").glob("*.tmp"))
263
264 def test_orphaned_tmp_swept_on_startup(self, tmp_path: pathlib.Path) -> None:
265 """A stale ``.symbols_*.tmp`` left by a crash is removed by the startup sweep."""
266 from muse.core.repo import _cleanup_muse_dir_temps
267 muse_dir = _make_muse_dir(tmp_path)
268 orphan = muse_dir / "cache" / ".symbols_abc123.tmp"
269 orphan.write_bytes(b"stale")
270 _cleanup_muse_dir_temps(muse_dir)
271 assert not orphan.exists()
272
273
274 # ---------------------------------------------------------------------------
275 # SymbolCache — graceful error handling on load
276 # ---------------------------------------------------------------------------
277
278
279 class TestSymbolCacheGracefulLoad:
280 def test_absent_file_returns_empty(self, tmp_path: pathlib.Path) -> None:
281 muse_dir = _make_muse_dir(tmp_path)
282 cache = SymbolCache.load(muse_dir)
283 assert cache.size == 0
284
285 def test_corrupt_file_returns_empty(self, tmp_path: pathlib.Path) -> None:
286 import json as _json
287 muse_dir = _make_muse_dir(tmp_path)
288 (muse_dir / "cache" / "symbols.json").write_bytes(b"not valid JSON !!!")
289 cache = SymbolCache.load(muse_dir)
290 assert cache.size == 0
291
292 def test_wrong_version_returns_empty(self, tmp_path: pathlib.Path) -> None:
293 import json as _json
294 muse_dir = _make_muse_dir(tmp_path)
295 doc = {"version": 999, "entries": {}}
296 (muse_dir / "cache" / "symbols.json").write_bytes(
297 _json.dumps(doc).encode()
298 )
299 cache = SymbolCache.load(muse_dir)
300 assert cache.size == 0
301
302 def test_invalid_entry_skipped(self, tmp_path: pathlib.Path) -> None:
303 """A single malformed tree entry is skipped; valid entries survive."""
304 import json as _json
305 from muse.core.symbol_cache import _CACHE_VERSION
306 muse_dir = _make_muse_dir(tmp_path)
307 good_tree = {"billing.py::run": dict(_make_record("run"))}
308 bad_tree = {"billing.py::broken": {"kind": "INVALID_KIND", "name": 123}}
309 doc = {
310 "version": _CACHE_VERSION,
311 "entries": {
312 "good_id": good_tree,
313 "bad_id": bad_tree,
314 },
315 }
316 (muse_dir / "cache" / "symbols.json").write_bytes(_json.dumps(doc).encode())
317 cache = SymbolCache.load(muse_dir)
318 assert cache.get("good_id") is not None
319 assert cache.get("bad_id") is None
320
321 def test_load_symbol_cache_no_muse_dir(self, tmp_path: pathlib.Path) -> None:
322 """load_symbol_cache returns empty when there is no .muse directory."""
323 cache = load_symbol_cache(tmp_path)
324 assert cache.size == 0
325
326 def test_load_symbol_cache_with_muse_dir(self, tmp_path: pathlib.Path) -> None:
327 muse_dir = _make_muse_dir(tmp_path)
328 tree = _make_tree("fn")
329 seed = SymbolCache.load(muse_dir)
330 seed.put("myid", tree)
331 seed.save()
332
333 cache = load_symbol_cache(tmp_path)
334 assert cache.get("myid") is not None
335
336
337 # ---------------------------------------------------------------------------
338 # Integration: symbols_for_snapshot uses cache
339 # ---------------------------------------------------------------------------
340
341
342 class TestSymbolsForSnapshotCache:
343 """Verify that symbols_for_snapshot calls parse_symbols only on cache miss."""
344
345 def _make_manifest(
346 self, tmp_path: pathlib.Path, content: bytes = b"def run(): pass\n"
347 ) -> tuple[pathlib.Path, dict[str, str]]:
348 """Write a .muse object and return (root, manifest)."""
349 root = tmp_path / "repo"
350 root.mkdir()
351 muse_dir(root).mkdir()
352
353 oid = blob_id(content)
354 write_object(root, oid, content)
355
356 manifest = {"billing.py": oid}
357 return root, manifest
358
359 def test_cold_cache_calls_parse(self, tmp_path: pathlib.Path) -> None:
360 root, manifest = self._make_manifest(tmp_path)
361 from muse.plugins.code._query import symbols_for_snapshot
362 with patch("muse.plugins.code._query.parse_symbols", wraps=__import__("muse.plugins.code.ast_parser", fromlist=["parse_symbols"]).parse_symbols) as mock_parse:
363 result = symbols_for_snapshot(root, manifest)
364 assert mock_parse.call_count >= 1
365
366 def test_warm_cache_skips_parse(self, tmp_path: pathlib.Path) -> None:
367 content = b"def run(): pass\n"
368 root, manifest = self._make_manifest(tmp_path, content)
369 from muse.plugins.code._query import symbols_for_snapshot
370
371 # First call — populates cache
372 symbols_for_snapshot(root, manifest)
373
374 # Second call — should hit cache, never call parse_symbols
375 with patch("muse.plugins.code._query.parse_symbols") as mock_parse:
376 result2 = symbols_for_snapshot(root, manifest)
377 mock_parse.assert_not_called()
378 assert "billing.py" in result2
379
380 def test_working_tree_edit_invalidates_cache(self, tmp_path: pathlib.Path) -> None:
381 """Editing a file produces a new SHA-256 → cache miss → re-parse."""
382 content_v1 = b"def run(): pass\n"
383 content_v2 = b"def run(): pass\ndef brand_new(): pass\n"
384 root, manifest = self._make_manifest(tmp_path, content_v1)
385
386 # Write v1 to disk
387 (root / "billing.py").write_bytes(content_v1)
388
389 from muse.plugins.code._query import symbols_for_snapshot
390 result1 = symbols_for_snapshot(root, manifest, workdir=root)
391 syms1 = set(result1.get("billing.py", {}).keys())
392
393 # Edit file on disk (v2) — cache key changes because SHA-256 changes
394 (root / "billing.py").write_bytes(content_v2)
395 result2 = symbols_for_snapshot(root, manifest, workdir=root)
396 syms2 = set(result2.get("billing.py", {}).keys())
397
398 # v2 has brand_new — the working-tree edit was picked up
399 assert any("brand_new" in addr for addr in syms2), (
400 f"Expected brand_new in {syms2}"
401 )
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago