test_query_stat_cache.py
python
sha256:f6cd81bc71702f5c1c6890bd39aaba994fe58c75f019d7c03934724fa2739bb4
fix: carry dev changes harmony dropped in merge — detached …
Sonnet 4.6
minor
⚠ breaking
16 days ago
| 1 | """TDD tests for StatCache integration into symbols_for_snapshot. |
| 2 | |
| 3 | Root cause |
| 4 | ---------- |
| 5 | ``symbols_for_snapshot(workdir=root)`` always calls ``disk_path.read_bytes()`` |
| 6 | for every Python file to compute the SHA-256 cache key, even when the file |
| 7 | hasn't changed since the last run. On the muse repo (~400 files) this costs |
| 8 | ~9,700 ms of pure disk I/O every single invocation. |
| 9 | |
| 10 | Fix |
| 11 | --- |
| 12 | Accept a ``stat_cache: StatCache | None`` parameter. On a stat-cache hit |
| 13 | (``ino + mtime + size`` match) the SHA-256 is already known — skip |
| 14 | ``read_bytes()`` entirely. Only when the SymbolCache also misses do we |
| 15 | actually read the file. |
| 16 | |
| 17 | Coverage |
| 18 | -------- |
| 19 | - ``symbols_for_snapshot`` accepts ``stat_cache=`` keyword argument. |
| 20 | - On stat-cache hit + symbol-cache hit: ``read_bytes()`` is never called. |
| 21 | - On stat-cache hit + symbol-cache miss: file is read once (to parse). |
| 22 | - On stat-cache miss: file is read (to hash + parse if needed). |
| 23 | - Stat cache is populated after a workdir call. |
| 24 | - Results are identical whether stat_cache is supplied or not. |
| 25 | - ``stat_cache`` is ignored when ``workdir=None`` (committed-blob path). |
| 26 | """ |
| 27 | |
| 28 | from __future__ import annotations |
| 29 | |
| 30 | import hashlib |
| 31 | import pathlib |
| 32 | from unittest.mock import patch, MagicMock |
| 33 | |
| 34 | import pytest |
| 35 | |
| 36 | from muse.core.types import blob_id |
| 37 | from muse.core.object_store import write_object |
| 38 | from muse.core.stat_cache import StatCache |
| 39 | from muse.core.symbol_cache import SymbolCache |
| 40 | from muse.plugins.code._query import symbols_for_snapshot |
| 41 | from muse.core.paths import muse_dir |
| 42 | |
| 43 | |
| 44 | # --------------------------------------------------------------------------- |
| 45 | # Helpers |
| 46 | # --------------------------------------------------------------------------- |
| 47 | |
| 48 | |
| 49 | _PY_SRC = b"""\ |
| 50 | def compute(x: int) -> int: |
| 51 | return x * 2 |
| 52 | |
| 53 | def helper() -> int: |
| 54 | return 42 |
| 55 | """ |
| 56 | |
| 57 | _PY_SRC_V2 = b"""\ |
| 58 | def compute(x: int, y: int = 0) -> int: |
| 59 | return x * 2 + y |
| 60 | |
| 61 | def helper() -> int: |
| 62 | return 99 |
| 63 | """ |
| 64 | |
| 65 | |
| 66 | def _make_repo(tmp_path: pathlib.Path, content: bytes = _PY_SRC) -> tuple[pathlib.Path, dict]: |
| 67 | """Write a .muse repo with one Python file; return (root, manifest).""" |
| 68 | dot_muse = muse_dir(tmp_path) |
| 69 | dot_muse.mkdir() |
| 70 | oid = blob_id(content) |
| 71 | write_object(tmp_path, oid, content) |
| 72 | (tmp_path / "billing.py").write_bytes(content) |
| 73 | return tmp_path, {"billing.py": oid} |
| 74 | |
| 75 | |
| 76 | # --------------------------------------------------------------------------- |
| 77 | # 1. symbols_for_snapshot accepts stat_cache= keyword |
| 78 | # --------------------------------------------------------------------------- |
| 79 | |
| 80 | |
| 81 | class TestAcceptsStatCache: |
| 82 | def test_accepts_stat_cache_none(self, tmp_path: pathlib.Path) -> None: |
| 83 | root, manifest = _make_repo(tmp_path) |
| 84 | result = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=None) |
| 85 | assert "billing.py" in result |
| 86 | |
| 87 | def test_accepts_stat_cache_instance(self, tmp_path: pathlib.Path) -> None: |
| 88 | root, manifest = _make_repo(tmp_path) |
| 89 | sc = StatCache.empty() |
| 90 | result = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc) |
| 91 | assert "billing.py" in result |
| 92 | |
| 93 | def test_result_unchanged_with_or_without_stat_cache( |
| 94 | self, tmp_path: pathlib.Path |
| 95 | ) -> None: |
| 96 | root, manifest = _make_repo(tmp_path) |
| 97 | r1 = symbols_for_snapshot(root, manifest, workdir=root) |
| 98 | r2 = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=StatCache.empty()) |
| 99 | assert set(r1.get("billing.py", {})) == set(r2.get("billing.py", {})) |
| 100 | |
| 101 | |
| 102 | # --------------------------------------------------------------------------- |
| 103 | # 2. Stat-cache hit + symbol-cache hit → read_bytes never called |
| 104 | # --------------------------------------------------------------------------- |
| 105 | |
| 106 | |
| 107 | class TestStatCacheHitSkipsRead: |
| 108 | def test_warm_stat_and_symbol_cache_skips_read_bytes( |
| 109 | self, tmp_path: pathlib.Path |
| 110 | ) -> None: |
| 111 | """Both caches warm → file bytes never read.""" |
| 112 | root, manifest = _make_repo(tmp_path) |
| 113 | |
| 114 | # Warm both caches with a cold run. |
| 115 | sym_cache = SymbolCache.load(muse_dir(root)) |
| 116 | stat_cache = StatCache.load(muse_dir(root)) |
| 117 | symbols_for_snapshot( |
| 118 | root, manifest, workdir=root, cache=sym_cache, stat_cache=stat_cache |
| 119 | ) |
| 120 | sym_cache.save() |
| 121 | stat_cache.save() |
| 122 | |
| 123 | # Reload from disk — fully warm. |
| 124 | sym_cache2 = SymbolCache.load(muse_dir(root)) |
| 125 | stat_cache2 = StatCache.load(muse_dir(root)) |
| 126 | |
| 127 | read_call_count = [] |
| 128 | original_read_bytes = pathlib.Path.read_bytes |
| 129 | |
| 130 | def counting_read_bytes(self_path: pathlib.Path) -> bytes: |
| 131 | if self_path.suffix == ".py": |
| 132 | read_call_count.append(str(self_path)) |
| 133 | return original_read_bytes(self_path) |
| 134 | |
| 135 | with patch.object(pathlib.Path, "read_bytes", counting_read_bytes): |
| 136 | symbols_for_snapshot( |
| 137 | root, manifest, workdir=root, cache=sym_cache2, stat_cache=stat_cache2 |
| 138 | ) |
| 139 | |
| 140 | assert read_call_count == [], ( |
| 141 | f"read_bytes called on warm cache for: {read_call_count}" |
| 142 | ) |
| 143 | |
| 144 | def test_stat_cache_hit_symbol_cache_miss_reads_once( |
| 145 | self, tmp_path: pathlib.Path |
| 146 | ) -> None: |
| 147 | """Stat-cache hit but cold symbol cache → file read exactly once.""" |
| 148 | root, manifest = _make_repo(tmp_path) |
| 149 | |
| 150 | # Warm only the stat cache. |
| 151 | stat_cache = StatCache.load(muse_dir(root)) |
| 152 | symbols_for_snapshot(root, manifest, workdir=root, stat_cache=stat_cache) |
| 153 | stat_cache.save() |
| 154 | |
| 155 | stat_cache2 = StatCache.load(muse_dir(root)) |
| 156 | cold_sym_cache = SymbolCache.empty() |
| 157 | |
| 158 | read_call_count = [] |
| 159 | original_read_bytes = pathlib.Path.read_bytes |
| 160 | |
| 161 | def counting_read_bytes(self_path: pathlib.Path) -> bytes: |
| 162 | if self_path.suffix == ".py": |
| 163 | read_call_count.append(str(self_path)) |
| 164 | return original_read_bytes(self_path) |
| 165 | |
| 166 | with patch.object(pathlib.Path, "read_bytes", counting_read_bytes): |
| 167 | symbols_for_snapshot( |
| 168 | root, manifest, workdir=root, |
| 169 | cache=cold_sym_cache, stat_cache=stat_cache2, |
| 170 | ) |
| 171 | |
| 172 | assert len(read_call_count) == 1, ( |
| 173 | f"Expected exactly 1 read on stat-hit/sym-miss, got {read_call_count}" |
| 174 | ) |
| 175 | |
| 176 | |
| 177 | # --------------------------------------------------------------------------- |
| 178 | # 3. Stat cache is populated after a workdir call |
| 179 | # --------------------------------------------------------------------------- |
| 180 | |
| 181 | |
| 182 | class TestStatCachePopulated: |
| 183 | def test_stat_cache_has_entry_after_workdir_call( |
| 184 | self, tmp_path: pathlib.Path |
| 185 | ) -> None: |
| 186 | root, manifest = _make_repo(tmp_path) |
| 187 | sc = StatCache.load(muse_dir(root)) |
| 188 | symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc) |
| 189 | sc.save() |
| 190 | |
| 191 | sc2 = StatCache.load(muse_dir(root)) |
| 192 | # billing.py must be in the cache after the workdir call. |
| 193 | obj_hash = sc2.get_object_hash(root, root / "billing.py") |
| 194 | assert obj_hash == blob_id(_PY_SRC), ( |
| 195 | f"Stat cache returned wrong hash: {obj_hash}" |
| 196 | ) |
| 197 | |
| 198 | def test_stat_cache_file_created_on_disk(self, tmp_path: pathlib.Path) -> None: |
| 199 | root, manifest = _make_repo(tmp_path) |
| 200 | sc = StatCache.load(muse_dir(root)) |
| 201 | symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc) |
| 202 | sc.save() |
| 203 | assert (muse_dir(root) / "cache" / "stat.json").exists() |
| 204 | |
| 205 | |
| 206 | # --------------------------------------------------------------------------- |
| 207 | # 4. stat_cache= ignored when workdir=None (committed-blob path unchanged) |
| 208 | # --------------------------------------------------------------------------- |
| 209 | |
| 210 | |
| 211 | class TestStatCacheIgnoredWithoutWorkdir: |
| 212 | def test_no_read_bytes_called_for_committed_blobs( |
| 213 | self, tmp_path: pathlib.Path |
| 214 | ) -> None: |
| 215 | """Committed path reads from object store, not disk — stat_cache irrelevant.""" |
| 216 | root, manifest = _make_repo(tmp_path) |
| 217 | sc = StatCache.empty() |
| 218 | # Should not raise and should return symbols. |
| 219 | result = symbols_for_snapshot(root, manifest, stat_cache=sc) |
| 220 | assert "billing.py" in result |
| 221 | |
| 222 | |
| 223 | # --------------------------------------------------------------------------- |
| 224 | # 5. Changed file invalidates stat cache → re-read |
| 225 | # --------------------------------------------------------------------------- |
| 226 | |
| 227 | |
| 228 | class TestStatCacheInvalidation: |
| 229 | def test_edited_file_triggers_reread(self, tmp_path: pathlib.Path) -> None: |
| 230 | """After editing a file, stat cache miss → file is re-read.""" |
| 231 | root, manifest = _make_repo(tmp_path) |
| 232 | |
| 233 | # Warm stat cache with v1. |
| 234 | sc = StatCache.load(muse_dir(root)) |
| 235 | r1 = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc) |
| 236 | sc.save() |
| 237 | |
| 238 | # Edit file on disk (v2 — different content, new mtime). |
| 239 | (root / "billing.py").write_bytes(_PY_SRC_V2) |
| 240 | |
| 241 | sc2 = StatCache.load(muse_dir(root)) |
| 242 | r2 = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc2) |
| 243 | |
| 244 | # v2 has different signatures → symbol set differs. |
| 245 | syms1 = set(r1.get("billing.py", {})) |
| 246 | syms2 = set(r2.get("billing.py", {})) |
| 247 | # Both have 'compute' and 'helper' but content_id differs — result |
| 248 | # should still be parseable (regression: must not crash or return stale). |
| 249 | assert "billing.py" in r2 |
| 250 | assert any("compute" in addr for addr in syms2) |
File History
2 commits
sha256:43c82f6d4fa2e85dd9ed9dd1a31199ec6b481191517aba66dfa9da275dbfa1af
Merge branch 'dev' into main
Human
1 day ago
sha256:fb67fed5a4d3e40de84bdd163de94ef1386570bef1dd1a020a732c8a038962ce
Merge branch 'dev' into main
Human
20 days ago