gabriel / muse public
test_invariant_file_cache.py python
439 lines 15.9 KB
Raw
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ breaking 29 days ago
1 """Tests for _InvariantFileCache — the persistent per-file AST analysis cache.
2
3 Coverage
4 --------
5 The cache lives at ``.muse/cache/invariants.msgpack`` and maps a file's
6 content hash (SHA-256) to the ``_FileData`` struct produced by a single
7 ``ast.parse`` pass. On a warm cache, ``muse code invariants`` skips every
8 ``ast.parse`` call — O(N×R) → O(1).
9
10 Tier 1 — Unit
11 In-memory operations: get/put/prune/size/empty/dirty flag. No I/O.
12
13 Tier 2 — Integration
14 Real filesystem via ``tmp_path``. Verifies the correct on-disk path,
15 save/load round-trip fidelity, dirty-flag lifecycle, and no-op behaviour.
16
17 Tier 5 — Data integrity
18 Adversarial on-disk state: corrupt bytes, wrong version, missing keys,
19 invalid entries, non-string content hashes. Also verifies atomic write
20 (no ``.tmp`` leftover after a successful save).
21
22 Tier 6 — Performance
23 Asserts that a warm cache skips ``ast.parse`` entirely (zero calls).
24 The mechanism — I/O patching — is more reliable than wall-clock ratios
25 across CI hardware and proves the exact property we care about.
26
27 Tier 7 — Security
28 Mode-000 cache file: ``load()`` must return empty and never raise.
29 Deeply nested msgpack payload: ``load()`` must not crash or hang.
30 """
31
32 from __future__ import annotations
33
34 import ast
35 import os
36 import pathlib
37 import stat
38 import time
39
40 import msgpack
41 import pytest
42
43 from muse.core.paths import muse_dir
44 from muse.plugins.code._invariants import (
45 _FileData,
46 _InvariantFileCache,
47 _FILE_CACHE_VERSION,
48 )
49
50
51 # ---------------------------------------------------------------------------
52 # Helpers
53 # ---------------------------------------------------------------------------
54
55
56 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
57 """Return a minimal repo root with ``.muse/cache/`` created."""
58 (muse_dir(tmp_path) / "cache").mkdir(parents=True)
59 return tmp_path
60
61
62 def _file_data(
63 imports: list[str] | None = None,
64 fns: list[str] | None = None,
65 classes: list[str] | None = None,
66 has_all: bool = False,
67 complexity: dict[str, int] | None = None,
68 ) -> _FileData:
69 """Build a minimal ``_FileData`` struct for testing."""
70 return _FileData(
71 raw_module_imports=imports or [],
72 from_module=[],
73 from_name=[],
74 top_level_fns=fns or [],
75 top_level_classes=classes or [],
76 has_all=has_all,
77 complexity=complexity or {},
78 )
79
80
81 def _cache_path(root: pathlib.Path) -> pathlib.Path:
82 return muse_dir(root) / "cache" / "invariants.msgpack"
83
84
85 # ---------------------------------------------------------------------------
86 # Tier 1 — Unit (in-memory, no filesystem)
87 # ---------------------------------------------------------------------------
88
89
90 class TestUnit:
91 """In-memory get/put/prune/size/empty operations — no I/O."""
92
93 def test_get_miss_returns_none(self) -> None:
94 cache = _InvariantFileCache.empty()
95 assert cache.get("no_such_hash") is None
96
97 def test_put_then_get_hit(self) -> None:
98 cache = _InvariantFileCache.empty()
99 fd = _file_data(imports=["os"], fns=["main"])
100 cache.put("abc123", fd)
101 assert cache.get("abc123") == fd
102
103 def test_put_marks_dirty(self) -> None:
104 cache = _InvariantFileCache.empty()
105 assert not cache._dirty
106 cache.put("id1", _file_data())
107 assert cache._dirty
108
109 def test_put_same_key_overwrites(self) -> None:
110 cache = _InvariantFileCache.empty()
111 cache.put("k", _file_data(fns=["old"]))
112 cache.put("k", _file_data(fns=["new"]))
113 assert cache.get("k")["top_level_fns"] == ["new"]
114 assert cache.size == 1
115
116 def test_different_keys_independent(self) -> None:
117 cache = _InvariantFileCache.empty()
118 a = _file_data(fns=["alpha"])
119 b = _file_data(fns=["beta"])
120 cache.put("k_a", a)
121 cache.put("k_b", b)
122 assert cache.get("k_a") == a
123 assert cache.get("k_b") == b
124
125 def test_size_starts_zero(self) -> None:
126 assert _InvariantFileCache.empty().size == 0
127
128 def test_size_grows_with_put(self) -> None:
129 cache = _InvariantFileCache.empty()
130 cache.put("x", _file_data())
131 cache.put("y", _file_data())
132 assert cache.size == 2
133
134 def test_prune_removes_stale_sets_dirty(self) -> None:
135 cache = _InvariantFileCache.empty()
136 cache.put("keep", _file_data())
137 cache.put("drop", _file_data())
138 cache._dirty = False
139 cache.prune({"keep"})
140 assert cache.get("keep") is not None
141 assert cache.get("drop") is None
142 assert cache._dirty
143
144 def test_prune_noop_when_all_live(self) -> None:
145 cache = _InvariantFileCache.empty()
146 cache.put("keep", _file_data())
147 cache._dirty = False
148 cache.prune({"keep", "other"})
149 assert not cache._dirty
150
151 def test_empty_cache_dir_is_none(self) -> None:
152 cache = _InvariantFileCache.empty()
153 assert cache._cache_dir is None
154
155 def test_empty_save_is_noop(self, tmp_path: pathlib.Path) -> None:
156 cache = _InvariantFileCache.empty()
157 cache.put("id", _file_data())
158 cache.save()
159 assert not any(tmp_path.rglob("invariants.msgpack"))
160
161
162 # ---------------------------------------------------------------------------
163 # Tier 2 — Integration (real filesystem)
164 # ---------------------------------------------------------------------------
165
166
167 class TestIntegration:
168 """Real filesystem via ``tmp_path``."""
169
170 def test_load_missing_file_returns_empty(self, tmp_path: pathlib.Path) -> None:
171 root = _make_repo(tmp_path)
172 cache = _InvariantFileCache.load(root)
173 assert cache.size == 0
174
175 def test_save_creates_file_at_correct_path(self, tmp_path: pathlib.Path) -> None:
176 root = _make_repo(tmp_path)
177 cache = _InvariantFileCache.load(root)
178 cache.put("h1", _file_data(fns=["compute"]))
179 cache.save()
180 assert _cache_path(root).is_file()
181
182 def test_save_load_round_trip_preserves_all_fields(self, tmp_path: pathlib.Path) -> None:
183 root = _make_repo(tmp_path)
184 fd = _file_data(
185 imports=["os", "sys"],
186 fns=["compute", "validate"],
187 classes=["MyClass"],
188 has_all=True,
189 complexity={"billing.py::compute": 5},
190 )
191 cache = _InvariantFileCache.load(root)
192 cache.put("deadbeef", fd)
193 cache.save()
194
195 loaded = _InvariantFileCache.load(root)
196 result = loaded.get("deadbeef")
197 assert result is not None
198 assert result["raw_module_imports"] == ["os", "sys"]
199 assert result["top_level_fns"] == ["compute", "validate"]
200 assert result["top_level_classes"] == ["MyClass"]
201 assert result["has_all"] is True
202 assert result["complexity"] == {"billing.py::compute": 5}
203
204 def test_save_noop_when_not_dirty(self, tmp_path: pathlib.Path) -> None:
205 root = _make_repo(tmp_path)
206 cache = _InvariantFileCache.load(root)
207 cache.save()
208 assert not _cache_path(root).exists()
209
210 def test_dirty_false_after_successful_save(self, tmp_path: pathlib.Path) -> None:
211 root = _make_repo(tmp_path)
212 cache = _InvariantFileCache.load(root)
213 cache.put("h", _file_data())
214 cache.save()
215 assert not cache._dirty
216
217 def test_second_save_does_not_update_mtime(self, tmp_path: pathlib.Path) -> None:
218 root = _make_repo(tmp_path)
219 cache = _InvariantFileCache.load(root)
220 cache.put("h", _file_data())
221 cache.save()
222 mtime1 = _cache_path(root).stat().st_mtime_ns
223 cache.save() # not dirty — must not touch the file
224 mtime2 = _cache_path(root).stat().st_mtime_ns
225 assert mtime1 == mtime2
226
227 def test_load_without_muse_dir_returns_empty(self, tmp_path: pathlib.Path) -> None:
228 # No .muse/ at all — cache_dir is None, returns empty gracefully.
229 cache = _InvariantFileCache.load(tmp_path)
230 assert cache.size == 0
231 assert cache._cache_dir is None
232
233 def test_multiple_entries_round_trip(self, tmp_path: pathlib.Path) -> None:
234 root = _make_repo(tmp_path)
235 cache = _InvariantFileCache.load(root)
236 for i in range(10):
237 cache.put(f"hash_{i}", _file_data(fns=[f"fn_{i}"]))
238 cache.save()
239
240 loaded = _InvariantFileCache.load(root)
241 assert loaded.size == 10
242 for i in range(10):
243 assert loaded.get(f"hash_{i}")["top_level_fns"] == [f"fn_{i}"]
244
245
246 # ---------------------------------------------------------------------------
247 # Tier 5 — Data integrity
248 # ---------------------------------------------------------------------------
249
250
251 class TestDataIntegrity:
252 """Adversarial on-disk state."""
253
254 def test_corrupt_bytes_returns_empty(self, tmp_path: pathlib.Path) -> None:
255 root = _make_repo(tmp_path)
256 _cache_path(root).write_bytes(b"not valid msgpack !!!")
257 cache = _InvariantFileCache.load(root)
258 assert cache.size == 0
259
260 def test_wrong_version_returns_empty(self, tmp_path: pathlib.Path) -> None:
261 root = _make_repo(tmp_path)
262 _cache_path(root).write_bytes(
263 msgpack.packb({"version": 999, "entries": {}}, use_bin_type=True)
264 )
265 cache = _InvariantFileCache.load(root)
266 assert cache.size == 0
267
268 def test_missing_entries_key_returns_empty(self, tmp_path: pathlib.Path) -> None:
269 root = _make_repo(tmp_path)
270 _cache_path(root).write_bytes(
271 msgpack.packb({"version": _FILE_CACHE_VERSION}, use_bin_type=True)
272 )
273 cache = _InvariantFileCache.load(root)
274 assert cache.size == 0
275
276 def test_invalid_entry_skipped_valid_survives(self, tmp_path: pathlib.Path) -> None:
277 root = _make_repo(tmp_path)
278 good = {
279 "raw_module_imports": ["os"],
280 "from_module": [],
281 "from_name": [],
282 "top_level_fns": ["run"],
283 "top_level_classes": [],
284 "has_all": False,
285 "complexity": {},
286 }
287 doc = {
288 "version": _FILE_CACHE_VERSION,
289 "entries": {
290 "good_hash": good,
291 "bad_hash": ["not", "a", "dict"], # non-dict value — entry skipped
292 },
293 }
294 _cache_path(root).write_bytes(msgpack.packb(doc, use_bin_type=True))
295 cache = _InvariantFileCache.load(root)
296 assert cache.get("good_hash") is not None
297 assert cache.size == 1
298
299 def test_non_dict_entry_value_skipped(self, tmp_path: pathlib.Path) -> None:
300 root = _make_repo(tmp_path)
301 doc = {
302 "version": _FILE_CACHE_VERSION,
303 "entries": {"bad_hash": "not_a_dict"},
304 }
305 _cache_path(root).write_bytes(msgpack.packb(doc, use_bin_type=True))
306 cache = _InvariantFileCache.load(root)
307 assert cache.size == 0
308
309 def test_no_tmp_file_leftover_after_save(self, tmp_path: pathlib.Path) -> None:
310 root = _make_repo(tmp_path)
311 cache = _InvariantFileCache.load(root)
312 cache.put("h", _file_data())
313 cache.save()
314 cache_dir = muse_dir(root) / "cache"
315 assert not any(cache_dir.glob("*.tmp"))
316
317 def test_orphaned_tmp_swept_on_startup(self, tmp_path: pathlib.Path) -> None:
318 """A stale ``.invariants_*.tmp`` left by a crash is removed by the startup sweep."""
319 from muse.core.repo import _cleanup_muse_dir_temps
320 root = _make_repo(tmp_path)
321 dot_muse = muse_dir(root)
322 orphan = dot_muse / "cache" / ".invariants_abc123.tmp"
323 orphan.write_bytes(b"stale")
324 _cleanup_muse_dir_temps(dot_muse)
325 assert not orphan.exists()
326
327 def test_old_location_file_is_ignored(self, tmp_path: pathlib.Path) -> None:
328 """A ``code_invariants_cache.msgpack`` at the old ``.muse/`` root is not loaded."""
329 root = _make_repo(tmp_path)
330 old_location = muse_dir(root) / "code_invariants_cache.msgpack"
331 stale = {
332 "version": _FILE_CACHE_VERSION,
333 "entries": {
334 "stale_hash": {
335 "raw_module_imports": ["stale"],
336 "from_module": [],
337 "from_name": [],
338 "top_level_fns": ["stale_fn"],
339 "top_level_classes": [],
340 "has_all": False,
341 "complexity": {},
342 }
343 },
344 }
345 old_location.write_bytes(msgpack.packb(stale, use_bin_type=True))
346 cache = _InvariantFileCache.load(root)
347 assert cache.get("stale_hash") is None
348
349
350 # ---------------------------------------------------------------------------
351 # Tier 6 — Performance (warm path skips ast.parse)
352 # ---------------------------------------------------------------------------
353
354
355 class TestPerformance:
356 """Warm cache must not call ``ast.parse``."""
357
358 def test_warm_cache_skips_ast_parse(self, tmp_path: pathlib.Path) -> None:
359 """Pre-populated cache: ``_build_file_data`` must not call ``ast.parse``."""
360 from unittest.mock import patch
361 from muse.core.object_store import write_object
362 from muse.core.types import blob_id
363 from muse.plugins.code._invariants import _build_file_data
364
365 root = _make_repo(tmp_path)
366 src = b"def compute(x: int) -> int:\n return x * 2\n"
367 oid = blob_id(src)
368 write_object(root, oid, src)
369 manifest = {"billing.py": oid}
370
371 # Cold run — populates cache in memory.
372 cold_cache = _InvariantFileCache.load(root)
373 _build_file_data(manifest, root, cold_cache)
374 cold_cache.save()
375
376 # Warm run — patch ast.parse to detect any call.
377 warm_cache = _InvariantFileCache.load(root)
378 parse_calls: list[str] = []
379
380 import ast as _ast
381 original_parse = _ast.parse
382
383 def counting_parse(source: str | bytes, *args: str | int, **kwargs: str | int) -> "ast.AST":
384 parse_calls.append("called")
385 return original_parse(source, *args, **kwargs)
386
387 with patch("muse.plugins.code._invariants.ast.parse", counting_parse):
388 _build_file_data(manifest, root, warm_cache)
389
390 assert parse_calls == [], (
391 f"ast.parse called {len(parse_calls)} time(s) on warm cache — "
392 "cold run should have populated the cache"
393 )
394
395
396 # ---------------------------------------------------------------------------
397 # Tier 7 — Security
398 # ---------------------------------------------------------------------------
399
400
401 class TestSecurity:
402 """Untrusted cache content and unreadable files."""
403
404 @pytest.mark.skipif(os.getuid() == 0, reason="root bypasses file permissions")
405 def test_mode_000_file_returns_empty_no_raise(self, tmp_path: pathlib.Path) -> None:
406 """An unreadable cache file must be handled gracefully — never raises."""
407 root = _make_repo(tmp_path)
408 cache_file = _cache_path(root)
409 cache_file.write_bytes(
410 msgpack.packb(
411 {"version": _FILE_CACHE_VERSION, "entries": {}}, use_bin_type=True
412 )
413 )
414 cache_file.chmod(0o000)
415 try:
416 cache = _InvariantFileCache.load(root)
417 assert cache.size == 0
418 finally:
419 cache_file.chmod(0o644) # restore so tmp_path cleanup succeeds
420
421 def test_deeply_nested_payload_does_not_crash(self, tmp_path: pathlib.Path) -> None:
422 """A pathologically nested msgpack structure must not crash or hang.
423
424 The load code accepts any dict as a ``_FileData`` (filling in defaults
425 for missing fields), so deeply nested dicts won't be *rejected* — but
426 they must not raise an exception or exhaust the stack.
427 """
428 root = _make_repo(tmp_path)
429 nested = "leaf"
430 for _ in range(200):
431 nested = {"k": nested}
432 doc = {
433 "version": _FILE_CACHE_VERSION,
434 "entries": {"bomb": nested},
435 }
436 _cache_path(root).write_bytes(msgpack.packb(doc, use_bin_type=True))
437 # Must complete without raising — size is 1 (loaded with empty defaults).
438 cache = _InvariantFileCache.load(root)
439 assert isinstance(cache, _InvariantFileCache)
File History 1 commit
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago