test_invariant_file_cache.py
python
sha256:7781e508756c81b7ddb0b08b408fd2b99bad87798cefa596773373efc360952c
chore: typing audit — zero violations, zero untyped defs
Sonnet 4.6
patch
23 days ago
| 1 | """Tests for _InvariantFileCache — the persistent per-file AST analysis cache. |
| 2 | |
| 3 | Coverage |
| 4 | -------- |
| 5 | The cache lives at ``.muse/cache/invariants.json`` and maps a file's |
| 6 | content hash (SHA-256) to the ``_FileData`` struct produced by a single |
| 7 | ``ast.parse`` pass. On a warm cache, ``muse code invariants`` skips every |
| 8 | ``ast.parse`` call — O(N×R) → O(1). |
| 9 | |
| 10 | Tier 1 — Unit |
| 11 | In-memory operations: get/put/prune/size/empty/dirty flag. No I/O. |
| 12 | |
| 13 | Tier 2 — Integration |
| 14 | Real filesystem via ``tmp_path``. Verifies the correct on-disk path, |
| 15 | save/load round-trip fidelity, dirty-flag lifecycle, and no-op behaviour. |
| 16 | |
| 17 | Tier 5 — Data integrity |
| 18 | Adversarial on-disk state: corrupt bytes, wrong version, missing keys, |
| 19 | invalid entries, non-string content hashes. Also verifies atomic write |
| 20 | (no ``.tmp`` leftover after a successful save). |
| 21 | |
| 22 | Tier 6 — Performance |
| 23 | Asserts that a warm cache skips ``ast.parse`` entirely (zero calls). |
| 24 | The mechanism — I/O patching — is more reliable than wall-clock ratios |
| 25 | across CI hardware and proves the exact property we care about. |
| 26 | |
| 27 | Tier 7 — Security |
| 28 | Mode-000 cache file: ``load()`` must return empty and never raise. |
| 29 | Deeply nested JSON payload: ``load()`` must not crash or hang. |
| 30 | """ |
| 31 | |
| 32 | from __future__ import annotations |
| 33 | |
| 34 | import ast |
| 35 | import os |
| 36 | import pathlib |
| 37 | import stat |
| 38 | import time |
| 39 | from collections.abc import Mapping |
| 40 | |
| 41 | import pytest |
| 42 | |
| 43 | from muse.core.paths import muse_dir |
| 44 | from muse.plugins.code._invariants import ( |
| 45 | _FileData, |
| 46 | _InvariantFileCache, |
| 47 | _FILE_CACHE_VERSION, |
| 48 | ) |
| 49 | |
| 50 | |
| 51 | # --------------------------------------------------------------------------- |
| 52 | # Helpers |
| 53 | # --------------------------------------------------------------------------- |
| 54 | |
| 55 | |
| 56 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 57 | """Return a minimal repo root with ``.muse/cache/`` created.""" |
| 58 | (muse_dir(tmp_path) / "cache").mkdir(parents=True) |
| 59 | return tmp_path |
| 60 | |
| 61 | |
| 62 | def _file_data( |
| 63 | imports: list[str] | None = None, |
| 64 | fns: list[str] | None = None, |
| 65 | classes: list[str] | None = None, |
| 66 | has_all: bool = False, |
| 67 | complexity: dict[str, int] | None = None, |
| 68 | ) -> _FileData: |
| 69 | """Build a minimal ``_FileData`` struct for testing.""" |
| 70 | return _FileData( |
| 71 | raw_module_imports=imports or [], |
| 72 | from_module=[], |
| 73 | from_name=[], |
| 74 | top_level_fns=fns or [], |
| 75 | top_level_classes=classes or [], |
| 76 | has_all=has_all, |
| 77 | complexity=complexity or {}, |
| 78 | ) |
| 79 | |
| 80 | |
| 81 | def _cache_path(root: pathlib.Path) -> pathlib.Path: |
| 82 | return muse_dir(root) / "cache" / "invariants.json" |
| 83 | |
| 84 | |
| 85 | # --------------------------------------------------------------------------- |
| 86 | # Tier 1 — Unit (in-memory, no filesystem) |
| 87 | # --------------------------------------------------------------------------- |
| 88 | |
| 89 | |
| 90 | class TestUnit: |
| 91 | """In-memory get/put/prune/size/empty operations — no I/O.""" |
| 92 | |
| 93 | def test_get_miss_returns_none(self) -> None: |
| 94 | cache = _InvariantFileCache.empty() |
| 95 | assert cache.get("no_such_hash") is None |
| 96 | |
| 97 | def test_put_then_get_hit(self) -> None: |
| 98 | cache = _InvariantFileCache.empty() |
| 99 | fd = _file_data(imports=["os"], fns=["main"]) |
| 100 | cache.put("abc123", fd) |
| 101 | assert cache.get("abc123") == fd |
| 102 | |
| 103 | def test_put_marks_dirty(self) -> None: |
| 104 | cache = _InvariantFileCache.empty() |
| 105 | assert not cache._dirty |
| 106 | cache.put("id1", _file_data()) |
| 107 | assert cache._dirty |
| 108 | |
| 109 | def test_put_same_key_overwrites(self) -> None: |
| 110 | cache = _InvariantFileCache.empty() |
| 111 | cache.put("k", _file_data(fns=["old"])) |
| 112 | cache.put("k", _file_data(fns=["new"])) |
| 113 | assert cache.get("k")["top_level_fns"] == ["new"] |
| 114 | assert cache.size == 1 |
| 115 | |
| 116 | def test_different_keys_independent(self) -> None: |
| 117 | cache = _InvariantFileCache.empty() |
| 118 | a = _file_data(fns=["alpha"]) |
| 119 | b = _file_data(fns=["beta"]) |
| 120 | cache.put("k_a", a) |
| 121 | cache.put("k_b", b) |
| 122 | assert cache.get("k_a") == a |
| 123 | assert cache.get("k_b") == b |
| 124 | |
| 125 | def test_size_starts_zero(self) -> None: |
| 126 | assert _InvariantFileCache.empty().size == 0 |
| 127 | |
| 128 | def test_size_grows_with_put(self) -> None: |
| 129 | cache = _InvariantFileCache.empty() |
| 130 | cache.put("x", _file_data()) |
| 131 | cache.put("y", _file_data()) |
| 132 | assert cache.size == 2 |
| 133 | |
| 134 | def test_prune_removes_stale_sets_dirty(self) -> None: |
| 135 | cache = _InvariantFileCache.empty() |
| 136 | cache.put("keep", _file_data()) |
| 137 | cache.put("drop", _file_data()) |
| 138 | cache._dirty = False |
| 139 | cache.prune({"keep"}) |
| 140 | assert cache.get("keep") is not None |
| 141 | assert cache.get("drop") is None |
| 142 | assert cache._dirty |
| 143 | |
| 144 | def test_prune_noop_when_all_live(self) -> None: |
| 145 | cache = _InvariantFileCache.empty() |
| 146 | cache.put("keep", _file_data()) |
| 147 | cache._dirty = False |
| 148 | cache.prune({"keep", "other"}) |
| 149 | assert not cache._dirty |
| 150 | |
| 151 | def test_empty_cache_dir_is_none(self) -> None: |
| 152 | cache = _InvariantFileCache.empty() |
| 153 | assert cache._cache_dir is None |
| 154 | |
| 155 | def test_empty_save_is_noop(self, tmp_path: pathlib.Path) -> None: |
| 156 | cache = _InvariantFileCache.empty() |
| 157 | cache.put("id", _file_data()) |
| 158 | cache.save() |
| 159 | assert not any(tmp_path.rglob("invariants.json")) |
| 160 | |
| 161 | |
| 162 | # --------------------------------------------------------------------------- |
| 163 | # Tier 2 — Integration (real filesystem) |
| 164 | # --------------------------------------------------------------------------- |
| 165 | |
| 166 | |
| 167 | class TestIntegration: |
| 168 | """Real filesystem via ``tmp_path``.""" |
| 169 | |
| 170 | def test_load_missing_file_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 171 | root = _make_repo(tmp_path) |
| 172 | cache = _InvariantFileCache.load(root) |
| 173 | assert cache.size == 0 |
| 174 | |
| 175 | def test_save_creates_file_at_correct_path(self, tmp_path: pathlib.Path) -> None: |
| 176 | root = _make_repo(tmp_path) |
| 177 | cache = _InvariantFileCache.load(root) |
| 178 | cache.put("h1", _file_data(fns=["compute"])) |
| 179 | cache.save() |
| 180 | assert _cache_path(root).is_file() |
| 181 | |
| 182 | def test_save_load_round_trip_preserves_all_fields(self, tmp_path: pathlib.Path) -> None: |
| 183 | root = _make_repo(tmp_path) |
| 184 | fd = _file_data( |
| 185 | imports=["os", "sys"], |
| 186 | fns=["compute", "validate"], |
| 187 | classes=["MyClass"], |
| 188 | has_all=True, |
| 189 | complexity={"billing.py::compute": 5}, |
| 190 | ) |
| 191 | cache = _InvariantFileCache.load(root) |
| 192 | cache.put("deadbeef", fd) |
| 193 | cache.save() |
| 194 | |
| 195 | loaded = _InvariantFileCache.load(root) |
| 196 | result = loaded.get("deadbeef") |
| 197 | assert result is not None |
| 198 | assert result["raw_module_imports"] == ["os", "sys"] |
| 199 | assert result["top_level_fns"] == ["compute", "validate"] |
| 200 | assert result["top_level_classes"] == ["MyClass"] |
| 201 | assert result["has_all"] is True |
| 202 | assert result["complexity"] == {"billing.py::compute": 5} |
| 203 | |
| 204 | def test_save_noop_when_not_dirty(self, tmp_path: pathlib.Path) -> None: |
| 205 | root = _make_repo(tmp_path) |
| 206 | cache = _InvariantFileCache.load(root) |
| 207 | cache.save() |
| 208 | assert not _cache_path(root).exists() |
| 209 | |
| 210 | def test_dirty_false_after_successful_save(self, tmp_path: pathlib.Path) -> None: |
| 211 | root = _make_repo(tmp_path) |
| 212 | cache = _InvariantFileCache.load(root) |
| 213 | cache.put("h", _file_data()) |
| 214 | cache.save() |
| 215 | assert not cache._dirty |
| 216 | |
| 217 | def test_second_save_does_not_update_mtime(self, tmp_path: pathlib.Path) -> None: |
| 218 | root = _make_repo(tmp_path) |
| 219 | cache = _InvariantFileCache.load(root) |
| 220 | cache.put("h", _file_data()) |
| 221 | cache.save() |
| 222 | mtime1 = _cache_path(root).stat().st_mtime_ns |
| 223 | cache.save() # not dirty — must not touch the file |
| 224 | mtime2 = _cache_path(root).stat().st_mtime_ns |
| 225 | assert mtime1 == mtime2 |
| 226 | |
| 227 | def test_load_without_muse_dir_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 228 | # No .muse/ at all — cache_dir is None, returns empty gracefully. |
| 229 | cache = _InvariantFileCache.load(tmp_path) |
| 230 | assert cache.size == 0 |
| 231 | assert cache._cache_dir is None |
| 232 | |
| 233 | def test_multiple_entries_round_trip(self, tmp_path: pathlib.Path) -> None: |
| 234 | root = _make_repo(tmp_path) |
| 235 | cache = _InvariantFileCache.load(root) |
| 236 | for i in range(10): |
| 237 | cache.put(f"hash_{i}", _file_data(fns=[f"fn_{i}"])) |
| 238 | cache.save() |
| 239 | |
| 240 | loaded = _InvariantFileCache.load(root) |
| 241 | assert loaded.size == 10 |
| 242 | for i in range(10): |
| 243 | assert loaded.get(f"hash_{i}")["top_level_fns"] == [f"fn_{i}"] |
| 244 | |
| 245 | |
| 246 | # --------------------------------------------------------------------------- |
| 247 | # Tier 5 — Data integrity |
| 248 | # --------------------------------------------------------------------------- |
| 249 | |
| 250 | |
| 251 | class TestDataIntegrity: |
| 252 | """Adversarial on-disk state.""" |
| 253 | |
| 254 | def test_corrupt_bytes_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 255 | root = _make_repo(tmp_path) |
| 256 | _cache_path(root).write_bytes(b"not valid JSON !!!") |
| 257 | cache = _InvariantFileCache.load(root) |
| 258 | assert cache.size == 0 |
| 259 | |
| 260 | def test_wrong_version_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 261 | import json as _json |
| 262 | root = _make_repo(tmp_path) |
| 263 | _cache_path(root).write_bytes( |
| 264 | _json.dumps({"version": 999, "entries": {}}).encode() |
| 265 | ) |
| 266 | cache = _InvariantFileCache.load(root) |
| 267 | assert cache.size == 0 |
| 268 | |
| 269 | def test_missing_entries_key_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 270 | import json as _json |
| 271 | root = _make_repo(tmp_path) |
| 272 | _cache_path(root).write_bytes( |
| 273 | _json.dumps({"version": _FILE_CACHE_VERSION}).encode() |
| 274 | ) |
| 275 | cache = _InvariantFileCache.load(root) |
| 276 | assert cache.size == 0 |
| 277 | |
| 278 | def test_invalid_entry_skipped_valid_survives(self, tmp_path: pathlib.Path) -> None: |
| 279 | import json as _json |
| 280 | root = _make_repo(tmp_path) |
| 281 | good = { |
| 282 | "raw_module_imports": ["os"], |
| 283 | "from_module": [], |
| 284 | "from_name": [], |
| 285 | "top_level_fns": ["run"], |
| 286 | "top_level_classes": [], |
| 287 | "has_all": False, |
| 288 | "complexity": {}, |
| 289 | } |
| 290 | doc = { |
| 291 | "version": _FILE_CACHE_VERSION, |
| 292 | "entries": { |
| 293 | "good_hash": good, |
| 294 | "bad_hash": ["not", "a", "dict"], # non-dict value — entry skipped |
| 295 | }, |
| 296 | } |
| 297 | _cache_path(root).write_bytes(_json.dumps(doc).encode()) |
| 298 | cache = _InvariantFileCache.load(root) |
| 299 | assert cache.get("good_hash") is not None |
| 300 | assert cache.size == 1 |
| 301 | |
| 302 | def test_non_dict_entry_value_skipped(self, tmp_path: pathlib.Path) -> None: |
| 303 | import json as _json |
| 304 | root = _make_repo(tmp_path) |
| 305 | doc = { |
| 306 | "version": _FILE_CACHE_VERSION, |
| 307 | "entries": {"bad_hash": "not_a_dict"}, |
| 308 | } |
| 309 | _cache_path(root).write_bytes(_json.dumps(doc).encode()) |
| 310 | cache = _InvariantFileCache.load(root) |
| 311 | assert cache.size == 0 |
| 312 | |
| 313 | def test_no_tmp_file_leftover_after_save(self, tmp_path: pathlib.Path) -> None: |
| 314 | root = _make_repo(tmp_path) |
| 315 | cache = _InvariantFileCache.load(root) |
| 316 | cache.put("h", _file_data()) |
| 317 | cache.save() |
| 318 | cache_dir = muse_dir(root) / "cache" |
| 319 | assert not any(cache_dir.glob("*.tmp")) |
| 320 | |
| 321 | def test_orphaned_tmp_swept_on_startup(self, tmp_path: pathlib.Path) -> None: |
| 322 | """A stale ``.invariants_*.tmp`` left by a crash is removed by the startup sweep.""" |
| 323 | from muse.core.repo import _cleanup_muse_dir_temps |
| 324 | root = _make_repo(tmp_path) |
| 325 | dot_muse = muse_dir(root) |
| 326 | orphan = dot_muse / "cache" / ".invariants_abc123.tmp" |
| 327 | orphan.write_bytes(b"stale") |
| 328 | _cleanup_muse_dir_temps(dot_muse) |
| 329 | assert not orphan.exists() |
| 330 | |
| 331 | def test_old_location_file_is_ignored(self, tmp_path: pathlib.Path) -> None: |
| 332 | """A ``code_invariants_cache.json`` at the old ``.muse/`` root is not loaded.""" |
| 333 | import json as _json |
| 334 | root = _make_repo(tmp_path) |
| 335 | old_location = muse_dir(root) / "code_invariants_cache.json" |
| 336 | stale = { |
| 337 | "version": _FILE_CACHE_VERSION, |
| 338 | "entries": { |
| 339 | "stale_hash": { |
| 340 | "raw_module_imports": ["stale"], |
| 341 | "from_module": [], |
| 342 | "from_name": [], |
| 343 | "top_level_fns": ["stale_fn"], |
| 344 | "top_level_classes": [], |
| 345 | "has_all": False, |
| 346 | "complexity": {}, |
| 347 | } |
| 348 | }, |
| 349 | } |
| 350 | old_location.write_bytes(_json.dumps(stale).encode()) |
| 351 | cache = _InvariantFileCache.load(root) |
| 352 | assert cache.get("stale_hash") is None |
| 353 | |
| 354 | |
| 355 | # --------------------------------------------------------------------------- |
| 356 | # Tier 6 — Performance (warm path skips ast.parse) |
| 357 | # --------------------------------------------------------------------------- |
| 358 | |
| 359 | |
| 360 | class TestPerformance: |
| 361 | """Warm cache must not call ``ast.parse``.""" |
| 362 | |
| 363 | def test_warm_cache_skips_ast_parse(self, tmp_path: pathlib.Path) -> None: |
| 364 | """Pre-populated cache: ``_build_file_data`` must not call ``ast.parse``.""" |
| 365 | from unittest.mock import patch |
| 366 | from muse.core.object_store import write_object |
| 367 | from muse.core.types import blob_id |
| 368 | from muse.plugins.code._invariants import _build_file_data |
| 369 | |
| 370 | root = _make_repo(tmp_path) |
| 371 | src = b"def compute(x: int) -> int:\n return x * 2\n" |
| 372 | oid = blob_id(src) |
| 373 | write_object(root, oid, src) |
| 374 | manifest = {"billing.py": oid} |
| 375 | |
| 376 | # Cold run — populates cache in memory. |
| 377 | cold_cache = _InvariantFileCache.load(root) |
| 378 | _build_file_data(manifest, root, cold_cache) |
| 379 | cold_cache.save() |
| 380 | |
| 381 | # Warm run — patch ast.parse to detect any call. |
| 382 | warm_cache = _InvariantFileCache.load(root) |
| 383 | parse_calls: list[str] = [] |
| 384 | |
| 385 | import ast as _ast |
| 386 | original_parse = _ast.parse |
| 387 | |
| 388 | def counting_parse(source: str | bytes, *args: str | int, **kwargs: str | int) -> "ast.AST": |
| 389 | parse_calls.append("called") |
| 390 | return original_parse(source, *args, **kwargs) |
| 391 | |
| 392 | with patch("muse.plugins.code._invariants.ast.parse", counting_parse): |
| 393 | _build_file_data(manifest, root, warm_cache) |
| 394 | |
| 395 | assert parse_calls == [], ( |
| 396 | f"ast.parse called {len(parse_calls)} time(s) on warm cache — " |
| 397 | "cold run should have populated the cache" |
| 398 | ) |
| 399 | |
| 400 | |
| 401 | # --------------------------------------------------------------------------- |
| 402 | # Tier 7 — Security |
| 403 | # --------------------------------------------------------------------------- |
| 404 | |
| 405 | |
| 406 | class TestSecurity: |
| 407 | """Untrusted cache content and unreadable files.""" |
| 408 | |
| 409 | @pytest.mark.skipif(os.getuid() == 0, reason="root bypasses file permissions") |
| 410 | def test_mode_000_file_returns_empty_no_raise(self, tmp_path: pathlib.Path) -> None: |
| 411 | """An unreadable cache file must be handled gracefully — never raises.""" |
| 412 | root = _make_repo(tmp_path) |
| 413 | cache_file = _cache_path(root) |
| 414 | import json as _json |
| 415 | cache_file.write_bytes( |
| 416 | _json.dumps({"version": _FILE_CACHE_VERSION, "entries": {}}).encode() |
| 417 | ) |
| 418 | cache_file.chmod(0o000) |
| 419 | try: |
| 420 | cache = _InvariantFileCache.load(root) |
| 421 | assert cache.size == 0 |
| 422 | finally: |
| 423 | cache_file.chmod(0o644) # restore so tmp_path cleanup succeeds |
| 424 | |
| 425 | def test_deeply_nested_payload_does_not_crash(self, tmp_path: pathlib.Path) -> None: |
| 426 | """A pathologically nested JSON structure must not crash or hang. |
| 427 | |
| 428 | The load code accepts any dict as a ``_FileData`` (filling in defaults |
| 429 | for missing fields), so deeply nested dicts won't be *rejected* — but |
| 430 | they must not raise an exception or exhaust the stack. |
| 431 | """ |
| 432 | import json as _json |
| 433 | root = _make_repo(tmp_path) |
| 434 | nested: str | Mapping[str, object] = "leaf" |
| 435 | for _ in range(200): |
| 436 | nested = {"k": nested} |
| 437 | doc = { |
| 438 | "version": _FILE_CACHE_VERSION, |
| 439 | "entries": {"bomb": nested}, |
| 440 | } |
| 441 | _cache_path(root).write_bytes(_json.dumps(doc).encode()) |
| 442 | # Must complete without raising — size is 1 (loaded with empty defaults). |
| 443 | cache = _InvariantFileCache.load(root) |
| 444 | assert isinstance(cache, _InvariantFileCache) |
File History
4 commits
sha256:7781e508756c81b7ddb0b08b408fd2b99bad87798cefa596773373efc360952c
chore: typing audit — zero violations, zero untyped defs
Sonnet 4.6
patch
23 days ago
sha256:09656d1b0772ea4c96f8911d7bf8042b33eb0596992c6546dfab3d21e9dee330
fix: align muse read --json schema and test contracts
Sonnet 4.6
minor
⚠
23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
29 days ago