gabriel / muse public

test_query_stat_cache.py file-level

at sha256:8 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:4 Merge branch 'dev' into main · gabriel · Jun 17, 2026
1 """TDD tests for StatCache integration into symbols_for_snapshot.
2
3 Root cause
4 ----------
5 ``symbols_for_snapshot(workdir=root)`` always calls ``disk_path.read_bytes()``
6 for every Python file to compute the SHA-256 cache key, even when the file
7 hasn't changed since the last run. On the muse repo (~400 files) this costs
8 ~9,700 ms of pure disk I/O every single invocation.
9
10 Fix
11 ---
12 Accept a ``stat_cache: StatCache | None`` parameter. On a stat-cache hit
13 (``ino + mtime + size`` match) the SHA-256 is already known β€” skip
14 ``read_bytes()`` entirely. Only when the SymbolCache also misses do we
15 actually read the file.
16
17 Coverage
18 --------
19 - ``symbols_for_snapshot`` accepts ``stat_cache=`` keyword argument.
20 - On stat-cache hit + symbol-cache hit: ``read_bytes()`` is never called.
21 - On stat-cache hit + symbol-cache miss: file is read once (to parse).
22 - On stat-cache miss: file is read (to hash + parse if needed).
23 - Stat cache is populated after a workdir call.
24 - Results are identical whether stat_cache is supplied or not.
25 - ``stat_cache`` is ignored when ``workdir=None`` (committed-blob path).
26 """
27
28 from __future__ import annotations
29
30 import hashlib
31 import pathlib
32 from unittest.mock import patch, MagicMock
33
34 import pytest
35
36 from muse.core.types import blob_id
37 from muse.core.object_store import write_object
38 from muse.core.stat_cache import StatCache
39 from muse.core.symbol_cache import SymbolCache
40 from muse.plugins.code._query import symbols_for_snapshot
41 from muse.core.paths import muse_dir
42
43
44 # ---------------------------------------------------------------------------
45 # Helpers
46 # ---------------------------------------------------------------------------
47
48
49 _PY_SRC = b"""\
50 def compute(x: int) -> int:
51 return x * 2
52
53 def helper() -> int:
54 return 42
55 """
56
57 _PY_SRC_V2 = b"""\
58 def compute(x: int, y: int = 0) -> int:
59 return x * 2 + y
60
61 def helper() -> int:
62 return 99
63 """
64
65
66 def _make_repo(tmp_path: pathlib.Path, content: bytes = _PY_SRC) -> tuple[pathlib.Path, dict]:
67 """Write a .muse repo with one Python file; return (root, manifest)."""
68 dot_muse = muse_dir(tmp_path)
69 dot_muse.mkdir()
70 oid = blob_id(content)
71 write_object(tmp_path, oid, content)
72 (tmp_path / "billing.py").write_bytes(content)
73 return tmp_path, {"billing.py": oid}
74
75
76 # ---------------------------------------------------------------------------
77 # 1. symbols_for_snapshot accepts stat_cache= keyword
78 # ---------------------------------------------------------------------------
79
80
81 class TestAcceptsStatCache:
82 def test_accepts_stat_cache_none(self, tmp_path: pathlib.Path) -> None:
83 root, manifest = _make_repo(tmp_path)
84 result = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=None)
85 assert "billing.py" in result
86
87 def test_accepts_stat_cache_instance(self, tmp_path: pathlib.Path) -> None:
88 root, manifest = _make_repo(tmp_path)
89 sc = StatCache.empty()
90 result = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc)
91 assert "billing.py" in result
92
93 def test_result_unchanged_with_or_without_stat_cache(
94 self, tmp_path: pathlib.Path
95 ) -> None:
96 root, manifest = _make_repo(tmp_path)
97 r1 = symbols_for_snapshot(root, manifest, workdir=root)
98 r2 = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=StatCache.empty())
99 assert set(r1.get("billing.py", {})) == set(r2.get("billing.py", {}))
100
101
102 # ---------------------------------------------------------------------------
103 # 2. Stat-cache hit + symbol-cache hit β†’ read_bytes never called
104 # ---------------------------------------------------------------------------
105
106
107 class TestStatCacheHitSkipsRead:
108 def test_warm_stat_and_symbol_cache_skips_read_bytes(
109 self, tmp_path: pathlib.Path
110 ) -> None:
111 """Both caches warm β†’ file bytes never read."""
112 root, manifest = _make_repo(tmp_path)
113
114 # Warm both caches with a cold run.
115 sym_cache = SymbolCache.load(muse_dir(root))
116 stat_cache = StatCache.load(muse_dir(root))
117 symbols_for_snapshot(
118 root, manifest, workdir=root, cache=sym_cache, stat_cache=stat_cache
119 )
120 sym_cache.save()
121 stat_cache.save()
122
123 # Reload from disk β€” fully warm.
124 sym_cache2 = SymbolCache.load(muse_dir(root))
125 stat_cache2 = StatCache.load(muse_dir(root))
126
127 read_call_count = []
128 original_read_bytes = pathlib.Path.read_bytes
129
130 def counting_read_bytes(self_path: pathlib.Path) -> bytes:
131 if self_path.suffix == ".py":
132 read_call_count.append(str(self_path))
133 return original_read_bytes(self_path)
134
135 with patch.object(pathlib.Path, "read_bytes", counting_read_bytes):
136 symbols_for_snapshot(
137 root, manifest, workdir=root, cache=sym_cache2, stat_cache=stat_cache2
138 )
139
140 assert read_call_count == [], (
141 f"read_bytes called on warm cache for: {read_call_count}"
142 )
143
144 def test_stat_cache_hit_symbol_cache_miss_reads_once(
145 self, tmp_path: pathlib.Path
146 ) -> None:
147 """Stat-cache hit but cold symbol cache β†’ file read exactly once."""
148 root, manifest = _make_repo(tmp_path)
149
150 # Warm only the stat cache.
151 stat_cache = StatCache.load(muse_dir(root))
152 symbols_for_snapshot(root, manifest, workdir=root, stat_cache=stat_cache)
153 stat_cache.save()
154
155 stat_cache2 = StatCache.load(muse_dir(root))
156 cold_sym_cache = SymbolCache.empty()
157
158 read_call_count = []
159 original_read_bytes = pathlib.Path.read_bytes
160
161 def counting_read_bytes(self_path: pathlib.Path) -> bytes:
162 if self_path.suffix == ".py":
163 read_call_count.append(str(self_path))
164 return original_read_bytes(self_path)
165
166 with patch.object(pathlib.Path, "read_bytes", counting_read_bytes):
167 symbols_for_snapshot(
168 root, manifest, workdir=root,
169 cache=cold_sym_cache, stat_cache=stat_cache2,
170 )
171
172 assert len(read_call_count) == 1, (
173 f"Expected exactly 1 read on stat-hit/sym-miss, got {read_call_count}"
174 )
175
176
177 # ---------------------------------------------------------------------------
178 # 3. Stat cache is populated after a workdir call
179 # ---------------------------------------------------------------------------
180
181
182 class TestStatCachePopulated:
183 def test_stat_cache_has_entry_after_workdir_call(
184 self, tmp_path: pathlib.Path
185 ) -> None:
186 root, manifest = _make_repo(tmp_path)
187 sc = StatCache.load(muse_dir(root))
188 symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc)
189 sc.save()
190
191 sc2 = StatCache.load(muse_dir(root))
192 # billing.py must be in the cache after the workdir call.
193 obj_hash = sc2.get_object_hash(root, root / "billing.py")
194 assert obj_hash == blob_id(_PY_SRC), (
195 f"Stat cache returned wrong hash: {obj_hash}"
196 )
197
198 def test_stat_cache_file_created_on_disk(self, tmp_path: pathlib.Path) -> None:
199 root, manifest = _make_repo(tmp_path)
200 sc = StatCache.load(muse_dir(root))
201 symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc)
202 sc.save()
203 assert (muse_dir(root) / "cache" / "stat.json").exists()
204
205
206 # ---------------------------------------------------------------------------
207 # 4. stat_cache= ignored when workdir=None (committed-blob path unchanged)
208 # ---------------------------------------------------------------------------
209
210
211 class TestStatCacheIgnoredWithoutWorkdir:
212 def test_no_read_bytes_called_for_committed_blobs(
213 self, tmp_path: pathlib.Path
214 ) -> None:
215 """Committed path reads from object store, not disk β€” stat_cache irrelevant."""
216 root, manifest = _make_repo(tmp_path)
217 sc = StatCache.empty()
218 # Should not raise and should return symbols.
219 result = symbols_for_snapshot(root, manifest, stat_cache=sc)
220 assert "billing.py" in result
221
222
223 # ---------------------------------------------------------------------------
224 # 5. Changed file invalidates stat cache β†’ re-read
225 # ---------------------------------------------------------------------------
226
227
228 class TestStatCacheInvalidation:
229 def test_edited_file_triggers_reread(self, tmp_path: pathlib.Path) -> None:
230 """After editing a file, stat cache miss β†’ file is re-read."""
231 root, manifest = _make_repo(tmp_path)
232
233 # Warm stat cache with v1.
234 sc = StatCache.load(muse_dir(root))
235 r1 = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc)
236 sc.save()
237
238 # Edit file on disk (v2 β€” different content, new mtime).
239 (root / "billing.py").write_bytes(_PY_SRC_V2)
240
241 sc2 = StatCache.load(muse_dir(root))
242 r2 = symbols_for_snapshot(root, manifest, workdir=root, stat_cache=sc2)
243
244 # v2 has different signatures β†’ symbol set differs.
245 syms1 = set(r1.get("billing.py", {}))
246 syms2 = set(r2.get("billing.py", {}))
247 # Both have 'compute' and 'helper' but content_id differs β€” result
248 # should still be parseable (regression: must not crash or return stale).
249 assert "billing.py" in r2
250 assert any("compute" in addr for addr in syms2)