gabriel / muse public
test_core_stat_cache.py python
412 lines 15.0 KB
Raw
1 """Tests for muse.core.stat_cache.
2
3 Coverage
4 --------
5 - Cache hit: file with unchanged (mtime, size) returns stored hash without I/O.
6 - Cache miss: new or modified file is re-hashed and entry is updated.
7 - Stale-entry pruning: entries for deleted files are removed.
8 - Dimension hash round-trip: set_dimension / get_dimension.
9 - Dimension eviction on object-hash miss: dimensions reset when file changes.
10 - Persistence: save() / load() round-trip via .muse/cache/stat.json.
11 - Atomic write: temp file is cleaned up; no corruption on concurrent use.
12 - empty(): no-op — save() is a no-op without a cache_dir.
13 - load_cache() convenience helper.
14 - walk_workdir() integration: cache is used and persisted automatically.
15 """
16
17 from __future__ import annotations
18
19 import json
20 import pathlib
21 import time
22
23 import pytest
24
25 from muse.core.types import blob_id
26 from muse.core.stat_cache import FileCacheEntry, StatCache, _hash_bytes, load_cache
27 from muse.core.snapshot import walk_workdir
28 from muse.core.paths import muse_dir
29
30
31 # ---------------------------------------------------------------------------
32 # Helpers
33 # ---------------------------------------------------------------------------
34
35
36 def _make_muse_dir(tmp_path: pathlib.Path) -> pathlib.Path:
37 dot_muse = muse_dir(tmp_path)
38 dot_muse.mkdir()
39 (dot_muse / "cache").mkdir()
40 return dot_muse
41
42
43 def _write(path: pathlib.Path, content: str = "hello") -> pathlib.Path:
44 path.parent.mkdir(parents=True, exist_ok=True)
45 path.write_text(content, encoding="utf-8")
46 return path
47
48
49 # ---------------------------------------------------------------------------
50 # _hash_bytes — canonical hash function
51 # ---------------------------------------------------------------------------
52
53
54 class TestHashBytes:
55 def test_matches_hashlib(self, tmp_path: pathlib.Path) -> None:
56 f = _write(tmp_path / "f.txt", "muse")
57 assert _hash_bytes(f) == blob_id(b"muse")
58
59 def test_empty_file(self, tmp_path: pathlib.Path) -> None:
60 f = tmp_path / "empty.txt"
61 f.write_bytes(b"")
62 assert _hash_bytes(f) == blob_id(b"")
63
64 def test_large_file_chunked(self, tmp_path: pathlib.Path) -> None:
65 data = b"x" * (200 * 1024) # 200 KiB — forces multiple 64 KiB chunks
66 f = tmp_path / "big.bin"
67 f.write_bytes(data)
68 assert _hash_bytes(f) == blob_id(data)
69
70
71 # ---------------------------------------------------------------------------
72 # StatCache — construction
73 # ---------------------------------------------------------------------------
74
75
76 class TestStatCacheConstruction:
77 def test_load_missing_file_returns_empty(self, tmp_path: pathlib.Path) -> None:
78 muse_dir = _make_muse_dir(tmp_path)
79 cache = StatCache.load(muse_dir)
80 assert cache._entries == {}
81
82 def test_load_corrupt_json_returns_empty(self, tmp_path: pathlib.Path) -> None:
83 import json as _json
84 muse_dir = _make_muse_dir(tmp_path)
85 (muse_dir / "cache" / "stat.json").write_bytes(b"not JSON !!!")
86 cache = StatCache.load(muse_dir)
87 assert cache._entries == {}
88
89 def test_load_wrong_version_returns_empty(self, tmp_path: pathlib.Path) -> None:
90 import json as _json
91 muse_dir = _make_muse_dir(tmp_path)
92 (muse_dir / "cache" / "stat.json").write_bytes(
93 _json.dumps({"version": 99, "entries": {}}).encode()
94 )
95 cache = StatCache.load(muse_dir)
96 assert cache._entries == {}
97
98 def test_empty_has_no_cache_dir(self, tmp_path: pathlib.Path) -> None:
99 cache = StatCache.empty()
100 assert cache._cache_dir is None
101 assert cache._entries == {}
102
103 def test_load_cache_helper_with_muse_dir(self, tmp_path: pathlib.Path) -> None:
104 _make_muse_dir(tmp_path)
105 cache = load_cache(tmp_path)
106 assert isinstance(cache, StatCache)
107 assert cache._cache_dir == muse_dir(tmp_path) / "cache"
108
109 def test_load_cache_helper_without_muse_dir(self, tmp_path: pathlib.Path) -> None:
110 cache = load_cache(tmp_path)
111 assert cache._cache_dir is None
112
113
114 # ---------------------------------------------------------------------------
115 # StatCache — get_object_hash (hit / miss)
116 # ---------------------------------------------------------------------------
117
118
119 class TestGetObjectHash:
120 def test_first_call_is_cache_miss(self, tmp_path: pathlib.Path) -> None:
121 muse_dir = _make_muse_dir(tmp_path)
122 f = _write(tmp_path / "a.py", "x = 1")
123 cache = StatCache.load(muse_dir)
124
125 h = cache.get_object_hash(tmp_path, f)
126
127 assert h == _hash_bytes(f)
128 assert cache._dirty is True
129 assert "a.py" in cache._entries
130
131 def test_second_call_is_cache_hit_no_dirty(self, tmp_path: pathlib.Path) -> None:
132 muse_dir = _make_muse_dir(tmp_path)
133 f = _write(tmp_path / "a.py", "x = 1")
134 cache = StatCache.load(muse_dir)
135 cache.get_object_hash(tmp_path, f)
136 cache._dirty = False # reset after first miss
137
138 h2 = cache.get_object_hash(tmp_path, f)
139
140 assert h2 == _hash_bytes(f)
141 assert cache._dirty is False # no re-hash, no dirty flag
142
143 def test_modified_file_triggers_miss(self, tmp_path: pathlib.Path) -> None:
144 muse_dir = _make_muse_dir(tmp_path)
145 f = _write(tmp_path / "a.py", "x = 1")
146 cache = StatCache.load(muse_dir)
147 h1 = cache.get_object_hash(tmp_path, f)
148
149 # Modify file content (ensure mtime changes on this filesystem).
150 time.sleep(0.01)
151 f.write_text("x = 2", encoding="utf-8")
152 h2 = cache.get_object_hash(tmp_path, f)
153
154 assert h1 != h2
155 assert h2 == _hash_bytes(f)
156
157 def test_same_content_new_mtime_triggers_miss_but_same_hash(
158 self, tmp_path: pathlib.Path
159 ) -> None:
160 muse_dir = _make_muse_dir(tmp_path)
161 f = _write(tmp_path / "a.py", "identical")
162 cache = StatCache.load(muse_dir)
163 h1 = cache.get_object_hash(tmp_path, f)
164
165 time.sleep(0.01)
166 f.write_text("identical", encoding="utf-8")
167 h2 = cache.get_object_hash(tmp_path, f)
168
169 # Cache miss because mtime changed, but hash is still the same.
170 assert h1 == h2
171
172
173 # ---------------------------------------------------------------------------
174 # StatCache — dimension hashes
175 # ---------------------------------------------------------------------------
176
177
178 class TestDimensionHashes:
179 def test_set_and_get_dimension(self, tmp_path: pathlib.Path) -> None:
180 muse_dir = _make_muse_dir(tmp_path)
181 f = _write(tmp_path / "src.py")
182 cache = StatCache.load(muse_dir)
183 cache.get_object_hash(tmp_path, f) # ensure entry exists
184
185 cache.set_dimension(tmp_path, f, "symbols", "abc123")
186
187 assert cache.get_dimension(tmp_path, f, "symbols") == "abc123"
188
189 def test_get_dimension_missing_key_returns_none(self, tmp_path: pathlib.Path) -> None:
190 muse_dir = _make_muse_dir(tmp_path)
191 f = _write(tmp_path / "src.py")
192 cache = StatCache.load(muse_dir)
193 cache.get_object_hash(tmp_path, f)
194
195 assert cache.get_dimension(tmp_path, f, "nonexistent") is None
196
197 def test_get_dimension_missing_entry_returns_none(self, tmp_path: pathlib.Path) -> None:
198 muse_dir = _make_muse_dir(tmp_path)
199 f = _write(tmp_path / "src.py")
200 cache = StatCache.load(muse_dir)
201 # Never called get_object_hash, so no entry exists.
202 assert cache.get_dimension(tmp_path, f, "symbols") is None
203
204 def test_dimension_evicted_on_object_hash_miss(self, tmp_path: pathlib.Path) -> None:
205 """When a file changes, its dimension hashes must be cleared."""
206 muse_dir = _make_muse_dir(tmp_path)
207 f = _write(tmp_path / "src.py", "v1")
208 cache = StatCache.load(muse_dir)
209 cache.get_object_hash(tmp_path, f)
210 cache.set_dimension(tmp_path, f, "symbols", "stale-hash")
211
212 time.sleep(0.01)
213 f.write_text("v2", encoding="utf-8")
214 cache.get_object_hash(tmp_path, f) # triggers miss → evicts dimensions
215
216 assert cache.get_dimension(tmp_path, f, "symbols") is None
217
218 def test_multiple_dimensions(self, tmp_path: pathlib.Path) -> None:
219 muse_dir = _make_muse_dir(tmp_path)
220 f = _write(tmp_path / "src.py")
221 cache = StatCache.load(muse_dir)
222 cache.get_object_hash(tmp_path, f)
223 cache.set_dimension(tmp_path, f, "symbols", "sym-hash")
224 cache.set_dimension(tmp_path, f, "imports", "imp-hash")
225
226 assert cache.get_dimension(tmp_path, f, "symbols") == "sym-hash"
227 assert cache.get_dimension(tmp_path, f, "imports") == "imp-hash"
228
229 def test_set_dimension_noop_for_unknown_file(self, tmp_path: pathlib.Path) -> None:
230 """set_dimension on a file with no entry must not crash."""
231 muse_dir = _make_muse_dir(tmp_path)
232 f = _write(tmp_path / "ghost.py")
233 cache = StatCache.load(muse_dir)
234 # No get_object_hash call → no entry.
235 cache.set_dimension(tmp_path, f, "symbols", "x") # must not raise
236
237
238 # ---------------------------------------------------------------------------
239 # StatCache — prune
240 # ---------------------------------------------------------------------------
241
242
243 class TestPrune:
244 def test_prune_removes_stale_entries(self, tmp_path: pathlib.Path) -> None:
245 muse_dir = _make_muse_dir(tmp_path)
246 f1 = _write(tmp_path / "keep.py")
247 f2 = _write(tmp_path / "gone.py")
248 cache = StatCache.load(muse_dir)
249 cache.get_object_hash(tmp_path, f1)
250 cache.get_object_hash(tmp_path, f2)
251
252 cache.prune({"keep.py"})
253
254 assert "keep.py" in cache._entries
255 assert "gone.py" not in cache._entries
256
257 def test_prune_noop_when_all_present(self, tmp_path: pathlib.Path) -> None:
258 muse_dir = _make_muse_dir(tmp_path)
259 f = _write(tmp_path / "a.py")
260 cache = StatCache.load(muse_dir)
261 cache.get_object_hash(tmp_path, f)
262 cache._dirty = False
263
264 cache.prune({"a.py"})
265
266 assert cache._dirty is False
267
268 def test_prune_empty_known_set_clears_all(self, tmp_path: pathlib.Path) -> None:
269 muse_dir = _make_muse_dir(tmp_path)
270 f = _write(tmp_path / "a.py")
271 cache = StatCache.load(muse_dir)
272 cache.get_object_hash(tmp_path, f)
273
274 cache.prune(set())
275
276 assert cache._entries == {}
277
278
279 # ---------------------------------------------------------------------------
280 # StatCache — persistence (save / load round-trip)
281 # ---------------------------------------------------------------------------
282
283
284 class TestPersistence:
285 def test_save_and_reload(self, tmp_path: pathlib.Path) -> None:
286 muse_dir = _make_muse_dir(tmp_path)
287 f = _write(tmp_path / "mod.py", "print('hi')")
288 cache = StatCache.load(muse_dir)
289 h = cache.get_object_hash(tmp_path, f)
290 cache.save()
291
292 assert (muse_dir / "cache" / "stat.json").is_file()
293
294 cache2 = StatCache.load(muse_dir)
295 cache2._dirty = False
296 h2 = cache2.get_object_hash(tmp_path, f)
297
298 assert h2 == h
299 assert cache2._dirty is False # served from cache, no re-hash
300
301 def test_save_is_atomic_no_tmp_left(self, tmp_path: pathlib.Path) -> None:
302 muse_dir = _make_muse_dir(tmp_path)
303 f = _write(tmp_path / "x.py")
304 cache = StatCache.load(muse_dir)
305 cache.get_object_hash(tmp_path, f)
306 cache.save()
307
308 assert not any((muse_dir / "cache").glob(".stat_cache_*.tmp"))
309
310 def test_save_noop_when_not_dirty(self, tmp_path: pathlib.Path) -> None:
311 muse_dir = _make_muse_dir(tmp_path)
312 cache = StatCache.load(muse_dir)
313 cache.save() # nothing written
314 assert not (muse_dir / "cache" / "stat.json").exists()
315
316 def test_empty_cache_save_is_noop(self) -> None:
317 cache = StatCache.empty()
318 cache.save() # must not raise
319
320 def test_dimensions_persisted(self, tmp_path: pathlib.Path) -> None:
321 muse_dir = _make_muse_dir(tmp_path)
322 f = _write(tmp_path / "s.py")
323 cache = StatCache.load(muse_dir)
324 cache.get_object_hash(tmp_path, f)
325 cache.set_dimension(tmp_path, f, "symbols", "sym42")
326 cache.save()
327
328 cache2 = StatCache.load(muse_dir)
329 # Validate entry shape — mtime/size unchanged so entry is still valid.
330 assert cache2.get_dimension(tmp_path, f, "symbols") == "sym42"
331
332 def test_json_format_is_versioned(self, tmp_path: pathlib.Path) -> None:
333 muse_dir = _make_muse_dir(tmp_path)
334 f = _write(tmp_path / "v.py")
335 cache = StatCache.load(muse_dir)
336 cache.get_object_hash(tmp_path, f)
337 cache.save()
338
339 import json as _json
340 raw = _json.loads((muse_dir / "cache" / "stat.json").read_bytes())
341 assert raw["version"] == 4
342 assert "v.py" in raw["entries"]
343
344
345 # ---------------------------------------------------------------------------
346 # walk_workdir integration
347 # ---------------------------------------------------------------------------
348
349
350 class TestWalkWorkdirCacheIntegration:
351 def test_walk_creates_cache_file(self, tmp_path: pathlib.Path) -> None:
352 dot_muse = muse_dir(tmp_path)
353 dot_muse.mkdir()
354 _write(tmp_path / "a.py", "x = 1")
355 _write(tmp_path / "b.py", "y = 2")
356
357 walk_workdir(tmp_path)
358
359 assert (dot_muse / "cache" / "stat.json").is_file()
360
361 def test_walk_second_call_uses_cache(self, tmp_path: pathlib.Path) -> None:
362 """Second walk should hit cache for both files — no dirty flag set."""
363 dot_muse = muse_dir(tmp_path)
364 dot_muse.mkdir()
365 _write(tmp_path / "a.py", "x = 1")
366
367 walk_workdir(tmp_path) # cold — populates cache
368
369 cache = StatCache.load(dot_muse)
370 cache._dirty = False
371 cache.get_object_hash(tmp_path, tmp_path / "a.py")
372 # Should not set dirty because mtime/size unchanged.
373 assert cache._dirty is False
374
375 def test_walk_excludes_secrets_from_cache(self, tmp_path: pathlib.Path) -> None:
376 """Secrets excluded by built-in blocklist must not appear in the manifest."""
377 dot_muse = muse_dir(tmp_path)
378 dot_muse.mkdir()
379 _write(tmp_path / "visible.py")
380 _write(tmp_path / ".env")
381
382 manifest = walk_workdir(tmp_path)
383
384 assert "visible.py" in manifest
385 assert ".env" not in manifest
386
387 def test_walk_tracks_non_secret_dotfiles(self, tmp_path: pathlib.Path) -> None:
388 """Non-secret dotfiles like .cursorrules are now tracked by default."""
389 dot_muse = muse_dir(tmp_path)
390 dot_muse.mkdir()
391 _write(tmp_path / ".cursorrules")
392 _write(tmp_path / ".editorconfig")
393
394 manifest = walk_workdir(tmp_path)
395
396 assert ".cursorrules" in manifest
397 assert ".editorconfig" in manifest
398
399 def test_walk_without_muse_dir_still_works(self, tmp_path: pathlib.Path) -> None:
400 """walk_workdir must work correctly even with no .muse directory."""
401 _write(tmp_path / "a.py", "ok")
402 manifest = walk_workdir(tmp_path)
403 assert "a.py" in manifest
404
405 def test_walk_hashes_match_direct_hash(self, tmp_path: pathlib.Path) -> None:
406 dot_muse = muse_dir(tmp_path)
407 dot_muse.mkdir()
408 f = _write(tmp_path / "c.py", "content")
409
410 manifest = walk_workdir(tmp_path)
411
412 assert manifest["c.py"] == _hash_bytes(f)
File History 1 commit