gabriel / muse public
test_invariant_file_cache.py python
444 lines 16.0 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Tests for _InvariantFileCache — the persistent per-file AST analysis cache.
2
3 Coverage
4 --------
5 The cache lives at ``.muse/cache/invariants.json`` and maps a file's
6 content hash (SHA-256) to the ``_FileData`` struct produced by a single
7 ``ast.parse`` pass. On a warm cache, ``muse code invariants`` skips every
8 ``ast.parse`` call — O(N×R) → O(1).
9
10 Tier 1 — Unit
11 In-memory operations: get/put/prune/size/empty/dirty flag. No I/O.
12
13 Tier 2 — Integration
14 Real filesystem via ``tmp_path``. Verifies the correct on-disk path,
15 save/load round-trip fidelity, dirty-flag lifecycle, and no-op behaviour.
16
17 Tier 5 — Data integrity
18 Adversarial on-disk state: corrupt bytes, wrong version, missing keys,
19 invalid entries, non-string content hashes. Also verifies atomic write
20 (no ``.tmp`` leftover after a successful save).
21
22 Tier 6 — Performance
23 Asserts that a warm cache skips ``ast.parse`` entirely (zero calls).
24 The mechanism — I/O patching — is more reliable than wall-clock ratios
25 across CI hardware and proves the exact property we care about.
26
27 Tier 7 — Security
28 Mode-000 cache file: ``load()`` must return empty and never raise.
29 Deeply nested JSON payload: ``load()`` must not crash or hang.
30 """
31
32 from __future__ import annotations
33
34 import ast
35 import os
36 import pathlib
37 import stat
38 import time
39 from collections.abc import Mapping
40
41 import pytest
42
43 from muse.core.paths import muse_dir
44 from muse.plugins.code._invariants import (
45 _FileData,
46 _InvariantFileCache,
47 _FILE_CACHE_VERSION,
48 )
49
50
51 # ---------------------------------------------------------------------------
52 # Helpers
53 # ---------------------------------------------------------------------------
54
55
56 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
57 """Return a minimal repo root with ``.muse/cache/`` created."""
58 (muse_dir(tmp_path) / "cache").mkdir(parents=True)
59 return tmp_path
60
61
62 def _file_data(
63 imports: list[str] | None = None,
64 fns: list[str] | None = None,
65 classes: list[str] | None = None,
66 has_all: bool = False,
67 complexity: dict[str, int] | None = None,
68 ) -> _FileData:
69 """Build a minimal ``_FileData`` struct for testing."""
70 return _FileData(
71 raw_module_imports=imports or [],
72 from_module=[],
73 from_name=[],
74 top_level_fns=fns or [],
75 top_level_classes=classes or [],
76 has_all=has_all,
77 complexity=complexity or {},
78 )
79
80
81 def _cache_path(root: pathlib.Path) -> pathlib.Path:
82 return muse_dir(root) / "cache" / "invariants.json"
83
84
85 # ---------------------------------------------------------------------------
86 # Tier 1 — Unit (in-memory, no filesystem)
87 # ---------------------------------------------------------------------------
88
89
90 class TestUnit:
91 """In-memory get/put/prune/size/empty operations — no I/O."""
92
93 def test_get_miss_returns_none(self) -> None:
94 cache = _InvariantFileCache.empty()
95 assert cache.get("no_such_hash") is None
96
97 def test_put_then_get_hit(self) -> None:
98 cache = _InvariantFileCache.empty()
99 fd = _file_data(imports=["os"], fns=["main"])
100 cache.put("abc123", fd)
101 assert cache.get("abc123") == fd
102
103 def test_put_marks_dirty(self) -> None:
104 cache = _InvariantFileCache.empty()
105 assert not cache._dirty
106 cache.put("id1", _file_data())
107 assert cache._dirty
108
109 def test_put_same_key_overwrites(self) -> None:
110 cache = _InvariantFileCache.empty()
111 cache.put("k", _file_data(fns=["old"]))
112 cache.put("k", _file_data(fns=["new"]))
113 assert cache.get("k")["top_level_fns"] == ["new"]
114 assert cache.size == 1
115
116 def test_different_keys_independent(self) -> None:
117 cache = _InvariantFileCache.empty()
118 a = _file_data(fns=["alpha"])
119 b = _file_data(fns=["beta"])
120 cache.put("k_a", a)
121 cache.put("k_b", b)
122 assert cache.get("k_a") == a
123 assert cache.get("k_b") == b
124
125 def test_size_starts_zero(self) -> None:
126 assert _InvariantFileCache.empty().size == 0
127
128 def test_size_grows_with_put(self) -> None:
129 cache = _InvariantFileCache.empty()
130 cache.put("x", _file_data())
131 cache.put("y", _file_data())
132 assert cache.size == 2
133
134 def test_prune_removes_stale_sets_dirty(self) -> None:
135 cache = _InvariantFileCache.empty()
136 cache.put("keep", _file_data())
137 cache.put("drop", _file_data())
138 cache._dirty = False
139 cache.prune({"keep"})
140 assert cache.get("keep") is not None
141 assert cache.get("drop") is None
142 assert cache._dirty
143
144 def test_prune_noop_when_all_live(self) -> None:
145 cache = _InvariantFileCache.empty()
146 cache.put("keep", _file_data())
147 cache._dirty = False
148 cache.prune({"keep", "other"})
149 assert not cache._dirty
150
151 def test_empty_cache_dir_is_none(self) -> None:
152 cache = _InvariantFileCache.empty()
153 assert cache._cache_dir is None
154
155 def test_empty_save_is_noop(self, tmp_path: pathlib.Path) -> None:
156 cache = _InvariantFileCache.empty()
157 cache.put("id", _file_data())
158 cache.save()
159 assert not any(tmp_path.rglob("invariants.json"))
160
161
162 # ---------------------------------------------------------------------------
163 # Tier 2 — Integration (real filesystem)
164 # ---------------------------------------------------------------------------
165
166
167 class TestIntegration:
168 """Real filesystem via ``tmp_path``."""
169
170 def test_load_missing_file_returns_empty(self, tmp_path: pathlib.Path) -> None:
171 root = _make_repo(tmp_path)
172 cache = _InvariantFileCache.load(root)
173 assert cache.size == 0
174
175 def test_save_creates_file_at_correct_path(self, tmp_path: pathlib.Path) -> None:
176 root = _make_repo(tmp_path)
177 cache = _InvariantFileCache.load(root)
178 cache.put("h1", _file_data(fns=["compute"]))
179 cache.save()
180 assert _cache_path(root).is_file()
181
182 def test_save_load_round_trip_preserves_all_fields(self, tmp_path: pathlib.Path) -> None:
183 root = _make_repo(tmp_path)
184 fd = _file_data(
185 imports=["os", "sys"],
186 fns=["compute", "validate"],
187 classes=["MyClass"],
188 has_all=True,
189 complexity={"billing.py::compute": 5},
190 )
191 cache = _InvariantFileCache.load(root)
192 cache.put("deadbeef", fd)
193 cache.save()
194
195 loaded = _InvariantFileCache.load(root)
196 result = loaded.get("deadbeef")
197 assert result is not None
198 assert result["raw_module_imports"] == ["os", "sys"]
199 assert result["top_level_fns"] == ["compute", "validate"]
200 assert result["top_level_classes"] == ["MyClass"]
201 assert result["has_all"] is True
202 assert result["complexity"] == {"billing.py::compute": 5}
203
204 def test_save_noop_when_not_dirty(self, tmp_path: pathlib.Path) -> None:
205 root = _make_repo(tmp_path)
206 cache = _InvariantFileCache.load(root)
207 cache.save()
208 assert not _cache_path(root).exists()
209
210 def test_dirty_false_after_successful_save(self, tmp_path: pathlib.Path) -> None:
211 root = _make_repo(tmp_path)
212 cache = _InvariantFileCache.load(root)
213 cache.put("h", _file_data())
214 cache.save()
215 assert not cache._dirty
216
217 def test_second_save_does_not_update_mtime(self, tmp_path: pathlib.Path) -> None:
218 root = _make_repo(tmp_path)
219 cache = _InvariantFileCache.load(root)
220 cache.put("h", _file_data())
221 cache.save()
222 mtime1 = _cache_path(root).stat().st_mtime_ns
223 cache.save() # not dirty — must not touch the file
224 mtime2 = _cache_path(root).stat().st_mtime_ns
225 assert mtime1 == mtime2
226
227 def test_load_without_muse_dir_returns_empty(self, tmp_path: pathlib.Path) -> None:
228 # No .muse/ at all — cache_dir is None, returns empty gracefully.
229 cache = _InvariantFileCache.load(tmp_path)
230 assert cache.size == 0
231 assert cache._cache_dir is None
232
233 def test_multiple_entries_round_trip(self, tmp_path: pathlib.Path) -> None:
234 root = _make_repo(tmp_path)
235 cache = _InvariantFileCache.load(root)
236 for i in range(10):
237 cache.put(f"hash_{i}", _file_data(fns=[f"fn_{i}"]))
238 cache.save()
239
240 loaded = _InvariantFileCache.load(root)
241 assert loaded.size == 10
242 for i in range(10):
243 assert loaded.get(f"hash_{i}")["top_level_fns"] == [f"fn_{i}"]
244
245
246 # ---------------------------------------------------------------------------
247 # Tier 5 — Data integrity
248 # ---------------------------------------------------------------------------
249
250
251 class TestDataIntegrity:
252 """Adversarial on-disk state."""
253
254 def test_corrupt_bytes_returns_empty(self, tmp_path: pathlib.Path) -> None:
255 root = _make_repo(tmp_path)
256 _cache_path(root).write_bytes(b"not valid JSON !!!")
257 cache = _InvariantFileCache.load(root)
258 assert cache.size == 0
259
260 def test_wrong_version_returns_empty(self, tmp_path: pathlib.Path) -> None:
261 import json as _json
262 root = _make_repo(tmp_path)
263 _cache_path(root).write_bytes(
264 _json.dumps({"version": 999, "entries": {}}).encode()
265 )
266 cache = _InvariantFileCache.load(root)
267 assert cache.size == 0
268
269 def test_missing_entries_key_returns_empty(self, tmp_path: pathlib.Path) -> None:
270 import json as _json
271 root = _make_repo(tmp_path)
272 _cache_path(root).write_bytes(
273 _json.dumps({"version": _FILE_CACHE_VERSION}).encode()
274 )
275 cache = _InvariantFileCache.load(root)
276 assert cache.size == 0
277
278 def test_invalid_entry_skipped_valid_survives(self, tmp_path: pathlib.Path) -> None:
279 import json as _json
280 root = _make_repo(tmp_path)
281 good = {
282 "raw_module_imports": ["os"],
283 "from_module": [],
284 "from_name": [],
285 "top_level_fns": ["run"],
286 "top_level_classes": [],
287 "has_all": False,
288 "complexity": {},
289 }
290 doc = {
291 "version": _FILE_CACHE_VERSION,
292 "entries": {
293 "good_hash": good,
294 "bad_hash": ["not", "a", "dict"], # non-dict value — entry skipped
295 },
296 }
297 _cache_path(root).write_bytes(_json.dumps(doc).encode())
298 cache = _InvariantFileCache.load(root)
299 assert cache.get("good_hash") is not None
300 assert cache.size == 1
301
302 def test_non_dict_entry_value_skipped(self, tmp_path: pathlib.Path) -> None:
303 import json as _json
304 root = _make_repo(tmp_path)
305 doc = {
306 "version": _FILE_CACHE_VERSION,
307 "entries": {"bad_hash": "not_a_dict"},
308 }
309 _cache_path(root).write_bytes(_json.dumps(doc).encode())
310 cache = _InvariantFileCache.load(root)
311 assert cache.size == 0
312
313 def test_no_tmp_file_leftover_after_save(self, tmp_path: pathlib.Path) -> None:
314 root = _make_repo(tmp_path)
315 cache = _InvariantFileCache.load(root)
316 cache.put("h", _file_data())
317 cache.save()
318 cache_dir = muse_dir(root) / "cache"
319 assert not any(cache_dir.glob("*.tmp"))
320
321 def test_orphaned_tmp_swept_on_startup(self, tmp_path: pathlib.Path) -> None:
322 """A stale ``.invariants_*.tmp`` left by a crash is removed by the startup sweep."""
323 from muse.core.repo import _cleanup_muse_dir_temps
324 root = _make_repo(tmp_path)
325 dot_muse = muse_dir(root)
326 orphan = dot_muse / "cache" / ".invariants_abc123.tmp"
327 orphan.write_bytes(b"stale")
328 _cleanup_muse_dir_temps(dot_muse)
329 assert not orphan.exists()
330
331 def test_old_location_file_is_ignored(self, tmp_path: pathlib.Path) -> None:
332 """A ``code_invariants_cache.json`` at the old ``.muse/`` root is not loaded."""
333 import json as _json
334 root = _make_repo(tmp_path)
335 old_location = muse_dir(root) / "code_invariants_cache.json"
336 stale = {
337 "version": _FILE_CACHE_VERSION,
338 "entries": {
339 "stale_hash": {
340 "raw_module_imports": ["stale"],
341 "from_module": [],
342 "from_name": [],
343 "top_level_fns": ["stale_fn"],
344 "top_level_classes": [],
345 "has_all": False,
346 "complexity": {},
347 }
348 },
349 }
350 old_location.write_bytes(_json.dumps(stale).encode())
351 cache = _InvariantFileCache.load(root)
352 assert cache.get("stale_hash") is None
353
354
355 # ---------------------------------------------------------------------------
356 # Tier 6 — Performance (warm path skips ast.parse)
357 # ---------------------------------------------------------------------------
358
359
360 class TestPerformance:
361 """Warm cache must not call ``ast.parse``."""
362
363 def test_warm_cache_skips_ast_parse(self, tmp_path: pathlib.Path) -> None:
364 """Pre-populated cache: ``_build_file_data`` must not call ``ast.parse``."""
365 from unittest.mock import patch
366 from muse.core.object_store import write_object
367 from muse.core.types import blob_id
368 from muse.plugins.code._invariants import _build_file_data
369
370 root = _make_repo(tmp_path)
371 src = b"def compute(x: int) -> int:\n return x * 2\n"
372 oid = blob_id(src)
373 write_object(root, oid, src)
374 manifest = {"billing.py": oid}
375
376 # Cold run — populates cache in memory.
377 cold_cache = _InvariantFileCache.load(root)
378 _build_file_data(manifest, root, cold_cache)
379 cold_cache.save()
380
381 # Warm run — patch ast.parse to detect any call.
382 warm_cache = _InvariantFileCache.load(root)
383 parse_calls: list[str] = []
384
385 import ast as _ast
386 original_parse = _ast.parse
387
388 def counting_parse(source: str | bytes, *args: str | int, **kwargs: str | int) -> "ast.AST":
389 parse_calls.append("called")
390 return original_parse(source, *args, **kwargs)
391
392 with patch("muse.plugins.code._invariants.ast.parse", counting_parse):
393 _build_file_data(manifest, root, warm_cache)
394
395 assert parse_calls == [], (
396 f"ast.parse called {len(parse_calls)} time(s) on warm cache — "
397 "cold run should have populated the cache"
398 )
399
400
401 # ---------------------------------------------------------------------------
402 # Tier 7 — Security
403 # ---------------------------------------------------------------------------
404
405
406 class TestSecurity:
407 """Untrusted cache content and unreadable files."""
408
409 @pytest.mark.skipif(os.getuid() == 0, reason="root bypasses file permissions")
410 def test_mode_000_file_returns_empty_no_raise(self, tmp_path: pathlib.Path) -> None:
411 """An unreadable cache file must be handled gracefully — never raises."""
412 root = _make_repo(tmp_path)
413 cache_file = _cache_path(root)
414 import json as _json
415 cache_file.write_bytes(
416 _json.dumps({"version": _FILE_CACHE_VERSION, "entries": {}}).encode()
417 )
418 cache_file.chmod(0o000)
419 try:
420 cache = _InvariantFileCache.load(root)
421 assert cache.size == 0
422 finally:
423 cache_file.chmod(0o644) # restore so tmp_path cleanup succeeds
424
425 def test_deeply_nested_payload_does_not_crash(self, tmp_path: pathlib.Path) -> None:
426 """A pathologically nested JSON structure must not crash or hang.
427
428 The load code accepts any dict as a ``_FileData`` (filling in defaults
429 for missing fields), so deeply nested dicts won't be *rejected* — but
430 they must not raise an exception or exhaust the stack.
431 """
432 import json as _json
433 root = _make_repo(tmp_path)
434 nested: str | Mapping[str, object] = "leaf"
435 for _ in range(200):
436 nested = {"k": nested}
437 doc = {
438 "version": _FILE_CACHE_VERSION,
439 "entries": {"bomb": nested},
440 }
441 _cache_path(root).write_bytes(_json.dumps(doc).encode())
442 # Must complete without raising — size is 1 (loaded with empty defaults).
443 cache = _InvariantFileCache.load(root)
444 assert isinstance(cache, _InvariantFileCache)
File History 5 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:09656d1b0772ea4c96f8911d7bf8042b33eb0596992c6546dfab3d21e9dee330 fix: align muse read --json schema and test contracts Sonnet 4.6 minor 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago