gabriel / muse public
test_core_snapshot.py python
467 lines 18.8 KB
Raw
1 """Tests for muse.core.snapshot — content-addressed snapshot computation.
2
3 Test categories
4 ---------------
5 - TestHashFile — unit: SHA-256 hash_file
6 - TestBuildSnapshotManifest — unit: full manifest walks
7 - TestNestedRepoWalk — unit/integration: nested .muse repos are excluded
8 - TestComputeSnapshotId — unit: snapshot id derivation
9 - TestComputeCommitId — unit: commit id derivation
10 - TestDiffWorkdirVsSnapshot — unit: diff logic
11 """
12
13 import os
14 import pathlib
15 import threading
16 import time
17
18 import pytest
19
20 from muse.core.types import fake_id
21 from muse.core.snapshot import (
22 build_snapshot_manifest,
23 diff_workdir_vs_snapshot,
24 hash_file,
25 walk_workdir,
26 walk_workdir_with_dirs,
27 )
28 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
29 from muse.core.paths import muse_dir, repo_json_path
30
31
32 @pytest.fixture
33 def workdir(tmp_path: pathlib.Path) -> pathlib.Path:
34 return tmp_path
35
36
37 class TestHashFile:
38 def test_consistent(self, tmp_path: pathlib.Path) -> None:
39 f = tmp_path / "file.mid"
40 f.write_bytes(b"hello world")
41 assert hash_file(f) == hash_file(f)
42
43 def test_different_content_different_hash(self, tmp_path: pathlib.Path) -> None:
44 a = tmp_path / "a.mid"
45 b = tmp_path / "b.mid"
46 a.write_bytes(b"aaa")
47 b.write_bytes(b"bbb")
48 assert hash_file(a) != hash_file(b)
49
50 def test_known_hash(self, tmp_path: pathlib.Path) -> None:
51 from muse.core.types import blob_id
52 content = b"muse"
53 f = tmp_path / "f.mid"
54 f.write_bytes(content)
55 expected = blob_id(content)
56 assert hash_file(f) == expected
57
58
59 class TestBuildSnapshotManifest:
60 def test_empty_workdir(self, workdir: pathlib.Path) -> None:
61 assert build_snapshot_manifest(workdir) == {}
62
63 def test_single_file(self, workdir: pathlib.Path) -> None:
64 (workdir / "beat.mid").write_bytes(b"drums")
65 manifest = build_snapshot_manifest(workdir)
66 assert "beat.mid" in manifest
67 assert len(manifest["beat.mid"]) == 71 # sha256:<64 hex>
68
69 def test_nested_file(self, workdir: pathlib.Path) -> None:
70 (workdir / "tracks").mkdir()
71 (workdir / "tracks" / "bass.mid").write_bytes(b"bass")
72 manifest = build_snapshot_manifest(workdir)
73 assert "tracks/bass.mid" in manifest
74
75 def test_secrets_excluded_by_builtin_blocklist(self, workdir: pathlib.Path) -> None:
76 """Built-in secrets blocklist protects even without a .museignore file."""
77 (workdir / ".env").write_bytes(b"SECRET=abc")
78 (workdir / ".DS_Store").write_bytes(b"junk")
79 (workdir / "beat.mid").write_bytes(b"drums")
80 manifest = build_snapshot_manifest(workdir)
81 assert ".env" not in manifest
82 assert ".DS_Store" not in manifest
83 assert "beat.mid" in manifest
84
85 def test_dotfiles_tracked_when_not_ignored(self, workdir: pathlib.Path) -> None:
86 """Non-secret dotfiles like .cursorrules are tracked by default."""
87 (workdir / ".cursorrules").write_bytes(b"# rules")
88 (workdir / ".editorconfig").write_bytes(b"[*]\nindent_size=4")
89 manifest = build_snapshot_manifest(workdir)
90 assert ".cursorrules" in manifest
91 assert ".editorconfig" in manifest
92
93 def test_museignore_excludes_custom_pattern(self, workdir: pathlib.Path) -> None:
94 """A pattern in .museignore excludes the matched file."""
95 (workdir / ".museignore").write_bytes(b'[global]\npatterns = ["*.secret"]\n')
96 (workdir / "api.secret").write_bytes(b"token")
97 (workdir / "beat.mid").write_bytes(b"drums")
98 manifest = build_snapshot_manifest(workdir)
99 assert "api.secret" not in manifest
100 assert "beat.mid" in manifest
101
102 def test_deterministic_order(self, workdir: pathlib.Path) -> None:
103 for name in ["c.mid", "a.mid", "b.mid"]:
104 (workdir / name).write_bytes(name.encode())
105 m1 = build_snapshot_manifest(workdir)
106 m2 = build_snapshot_manifest(workdir)
107 assert m1 == m2
108
109
110 class TestComputeSnapshotId:
111 def test_empty_manifest(self) -> None:
112 sid = compute_snapshot_id({})
113 assert len(sid) == 71
114
115 def test_deterministic(self) -> None:
116 manifest = {"a.mid": fake_id("hash1"), "b.mid": fake_id("hash2")}
117 assert compute_snapshot_id(manifest) == compute_snapshot_id(manifest)
118
119 def test_order_independent(self) -> None:
120 m1 = {"a.mid": fake_id("h1"), "b.mid": fake_id("h2")}
121 m2 = {"b.mid": fake_id("h2"), "a.mid": fake_id("h1")}
122 assert compute_snapshot_id(m1) == compute_snapshot_id(m2)
123
124 def test_different_content_different_id(self) -> None:
125 m1 = {"a.mid": fake_id("h1")}
126 m2 = {"a.mid": fake_id("h2")}
127 assert compute_snapshot_id(m1) != compute_snapshot_id(m2)
128
129
130
131
132 class TestComputeCommitId:
133 _BASE = dict(
134 parent_ids=[fake_id("p1")],
135 snapshot_id=fake_id("snap"),
136 message="msg",
137 committed_at_iso="2026-01-01T00:00:00+00:00",
138 author="gabriel",
139 signer_public_key="ed25519:AAAA",
140 )
141
142 def test_deterministic(self) -> None:
143 assert compute_commit_id(**self._BASE) == compute_commit_id(**self._BASE)
144
145 def test_parent_order_independent(self) -> None:
146 a = compute_commit_id(**{**self._BASE, "parent_ids": [fake_id("p1"), fake_id("p2")]})
147 b = compute_commit_id(**{**self._BASE, "parent_ids": [fake_id("p2"), fake_id("p1")]})
148 assert a == b
149
150 def test_different_messages_different_ids(self) -> None:
151 a = compute_commit_id(**{**self._BASE, "message": "msg1"})
152 b = compute_commit_id(**{**self._BASE, "message": "msg2"})
153 assert a != b
154
155 def test_different_authors_different_commit_ids(self) -> None:
156 a = compute_commit_id(**{**self._BASE, "author": "alice"})
157 b = compute_commit_id(**{**self._BASE, "author": "bob"})
158 assert a != b
159
160 def test_different_signer_keys_different_commit_ids(self) -> None:
161 a = compute_commit_id(**{**self._BASE, "signer_public_key": "ed25519:AAAA"})
162 b = compute_commit_id(**{**self._BASE, "signer_public_key": "ed25519:BBBB"})
163 assert a != b
164
165 def test_empty_author_and_key_still_deterministic(self) -> None:
166 kwargs = {**self._BASE, "author": "", "signer_public_key": ""}
167 assert compute_commit_id(**kwargs) == compute_commit_id(**kwargs)
168
169 def test_result_has_sha256_prefix(self) -> None:
170 result = compute_commit_id(**self._BASE)
171 assert result.startswith("sha256:")
172
173
174
175
176 class TestDiffWorkdirVsSnapshot:
177 def test_new_repo_all_untracked(self, workdir: pathlib.Path) -> None:
178 (workdir / "beat.mid").write_bytes(b"x")
179 added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, {})
180 assert added == set()
181 assert untracked == {"beat.mid"}
182
183 def test_added_file(self, workdir: pathlib.Path) -> None:
184 (workdir / "beat.mid").write_bytes(b"x")
185 last = {"other.mid": "abc"}
186 added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, last)
187 assert "beat.mid" in added
188 assert "other.mid" in deleted
189
190 def test_modified_file(self, workdir: pathlib.Path) -> None:
191 f = workdir / "beat.mid"
192 f.write_bytes(b"new content")
193 last = {"beat.mid": "oldhash"}
194 added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, last)
195 assert "beat.mid" in modified
196
197 def test_clean_workdir(self, workdir: pathlib.Path) -> None:
198 f = workdir / "beat.mid"
199 f.write_bytes(b"content")
200 from muse.core.snapshot import hash_file
201 h = hash_file(f)
202 added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, {"beat.mid": h})
203 assert not added and not modified and not deleted and not untracked
204
205 def test_ignored_extant_file_not_reported_as_deleted(
206 self, workdir: pathlib.Path
207 ) -> None:
208 """A file that was tracked, is now in .museignore, and still exists on
209 disk must NOT appear in ``deleted``. It was intentionally moved out of
210 tracking — reporting it as deleted would block checkout and cause shelf
211 pop to unlink it."""
212 (workdir / ".museignore").write_bytes(
213 b'[global]\npatterns = ["app.js"]\n'
214 )
215 (workdir / "app.js").write_bytes(b"// build artifact")
216 (workdir / "src.py").write_bytes(b"# source")
217 from muse.core.snapshot import hash_file
218 # Pretend HEAD tracked both files.
219 last = {
220 "app.js": hash_file(workdir / "app.js"),
221 "src.py": hash_file(workdir / "src.py"),
222 }
223 added, modified, deleted, _, _, _ = diff_workdir_vs_snapshot(workdir, last)
224 assert "app.js" not in deleted, (
225 "ignored-and-extant file must not appear in deleted"
226 )
227 assert "src.py" not in deleted
228
229 def test_ignored_absent_file_is_reported_as_deleted(
230 self, workdir: pathlib.Path
231 ) -> None:
232 """A file that is in .museignore but is genuinely absent from disk IS
233 deleted and must appear in ``deleted``."""
234 (workdir / ".museignore").write_bytes(
235 b'[global]\npatterns = ["app.js"]\n'
236 )
237 # app.js is in .museignore but does NOT exist on disk.
238 (workdir / "src.py").write_bytes(b"# source")
239 from muse.core.snapshot import hash_file
240 last = {
241 "app.js": "a" * 64, # was in HEAD but is gone from disk
242 "src.py": hash_file(workdir / "src.py"),
243 }
244 added, modified, deleted, _, _, _ = diff_workdir_vs_snapshot(workdir, last)
245 assert "app.js" in deleted, (
246 "ignored file that is genuinely absent from disk must still be deleted"
247 )
248
249
250 # ---------------------------------------------------------------------------
251 # Nested repo boundary — unit / integration
252 # ---------------------------------------------------------------------------
253
254 def _make_nested_repo(parent: pathlib.Path, name: str) -> pathlib.Path:
255 """Create a child directory that looks like a muse repo (.muse/ present)."""
256 child = parent / name
257 child.mkdir(parents=True, exist_ok=True)
258 muse_dir(child).mkdir()
259 (repo_json_path(child)).write_text('{"repo_id": "child"}')
260 return child
261
262
263 class TestNestedRepoWalk:
264 """Nested muse repos must be excluded from the parent's walk.
265
266 The parent repo's ``os.walk`` must prune any subdirectory that contains
267 its own ``.muse/`` directory. This mirrors git submodule behaviour —
268 child repo files belong to the child snapshot, not the parent.
269 """
270
271 # --- walk_workdir -------------------------------------------------------
272
273 def test_nested_repo_files_excluded(self, tmp_path: pathlib.Path) -> None:
274 """Files inside a nested repo do not appear in the parent manifest."""
275 (tmp_path / "parent.py").write_bytes(b"# parent")
276 child = _make_nested_repo(tmp_path, "child_repo")
277 (child / "child.py").write_bytes(b"# child")
278
279 manifest = walk_workdir(tmp_path)
280 assert "parent.py" in manifest
281 assert "child_repo/child.py" not in manifest
282
283 def test_nested_repo_root_dir_excluded(self, tmp_path: pathlib.Path) -> None:
284 """The child root directory itself is not descended into."""
285 _make_nested_repo(tmp_path, "child_repo")
286 manifest = walk_workdir(tmp_path)
287 # No key should start with child_repo/
288 assert not any(k.startswith("child_repo/") for k in manifest)
289
290 def test_sibling_dirs_still_walked(self, tmp_path: pathlib.Path) -> None:
291 """Normal subdirs next to a nested repo are still walked."""
292 _make_nested_repo(tmp_path, "child_repo")
293 sibling = tmp_path / "src"
294 sibling.mkdir()
295 (sibling / "main.py").write_bytes(b"# main")
296
297 manifest = walk_workdir(tmp_path)
298 assert "src/main.py" in manifest
299
300 def test_deeply_nested_repo_excluded(self, tmp_path: pathlib.Path) -> None:
301 """Nested repos two levels deep are also excluded."""
302 mid = tmp_path / "packages"
303 mid.mkdir()
304 (mid / "shared.py").write_bytes(b"# shared")
305 child = _make_nested_repo(mid, "plugin")
306 (child / "plugin.py").write_bytes(b"# plugin")
307
308 manifest = walk_workdir(tmp_path)
309 assert "packages/shared.py" in manifest
310 assert "packages/plugin/plugin.py" not in manifest
311
312 def test_multiple_nested_repos_all_excluded(self, tmp_path: pathlib.Path) -> None:
313 """Multiple sibling nested repos are all pruned."""
314 _make_nested_repo(tmp_path, "repo_a")
315 _make_nested_repo(tmp_path, "repo_b")
316 _make_nested_repo(tmp_path, "repo_c")
317 (tmp_path / "root.py").write_bytes(b"# root")
318 for repo in ("repo_a", "repo_b", "repo_c"):
319 ((tmp_path / repo) / "file.py").write_bytes(b"# file")
320
321 manifest = walk_workdir(tmp_path)
322 assert "root.py" in manifest
323 for repo in ("repo_a", "repo_b", "repo_c"):
324 assert f"{repo}/file.py" not in manifest
325
326 # --- walk_workdir_with_dirs ---------------------------------------------
327
328 def test_dirs_output_excludes_nested_repo(self, tmp_path: pathlib.Path) -> None:
329 """walk_workdir_with_dirs must not list the nested repo as a directory."""
330 _make_nested_repo(tmp_path, "child_repo")
331 src = tmp_path / "src"
332 src.mkdir()
333 (src / "a.py").write_bytes(b"a")
334
335 _, dirs = walk_workdir_with_dirs(tmp_path)
336 assert "src" in dirs
337 assert "child_repo" not in dirs
338
339 # --- build_snapshot_manifest (public API) --------------------------------
340
341 def test_build_snapshot_manifest_excludes_nested(self, tmp_path: pathlib.Path) -> None:
342 """build_snapshot_manifest is the public wrapper — same boundary."""
343 (tmp_path / "root.py").write_bytes(b"# root")
344 child = _make_nested_repo(tmp_path, "nested")
345 (child / "nested.py").write_bytes(b"# nested")
346
347 manifest = build_snapshot_manifest(tmp_path)
348 assert "root.py" in manifest
349 assert "nested/nested.py" not in manifest
350
351 # --- diff_workdir_vs_snapshot integration --------------------------------
352
353 def test_diff_does_not_report_nested_files_as_added(
354 self, tmp_path: pathlib.Path
355 ) -> None:
356 """diff sees an empty last-snapshot: nested files must not appear as untracked."""
357 (tmp_path / "root.py").write_bytes(b"# root")
358 child = _make_nested_repo(tmp_path, "sub")
359 (child / "sub.py").write_bytes(b"# sub")
360
361 added, modified, deleted, untracked, _, _ = diff_workdir_vs_snapshot(
362 tmp_path, {}
363 )
364 assert "root.py" in untracked
365 assert not any(k.startswith("sub/") for k in untracked)
366 assert not any(k.startswith("sub/") for k in added)
367
368 # --- data integrity -----------------------------------------------------
369
370 def test_manifest_keys_posix_separators(self, tmp_path: pathlib.Path) -> None:
371 """Manifest keys always use '/' regardless of OS."""
372 sub = tmp_path / "a" / "b"
373 sub.mkdir(parents=True)
374 (sub / "file.py").write_bytes(b"x")
375 manifest = walk_workdir(tmp_path)
376 assert "a/b/file.py" in manifest
377 assert all("/" in k or "/" not in k for k in manifest) # no backslash keys
378 assert not any("\\" in k for k in manifest)
379
380 def test_nested_muse_dir_itself_not_tracked(self, tmp_path: pathlib.Path) -> None:
381 """The .muse/ directory of a nested repo is not tracked as a file."""
382 child = _make_nested_repo(tmp_path, "child")
383 (child / "real.py").write_bytes(b"x")
384 manifest = walk_workdir(tmp_path)
385 assert not any(".muse" in k for k in manifest)
386
387 # --- security -----------------------------------------------------------
388
389 def test_symlink_to_nested_repo_not_followed(self, tmp_path: pathlib.Path) -> None:
390 """A symlink pointing at a directory that has .muse/ is not followed.
391 walk_workdir uses followlinks=False so symlinks are excluded by design."""
392 real = _make_nested_repo(tmp_path, "real_repo")
393 (real / "secret.py").write_bytes(b"# secret")
394 link = tmp_path / "link_to_repo"
395 link.symlink_to(real)
396
397 manifest = walk_workdir(tmp_path)
398 assert "link_to_repo/secret.py" not in manifest
399
400 def test_symlink_to_regular_dir_not_followed(self, tmp_path: pathlib.Path) -> None:
401 """Symlinks to any directory are never followed — followlinks=False."""
402 real = tmp_path / "outside"
403 real.mkdir()
404 (real / "file.py").write_bytes(b"x")
405 link = tmp_path / "link_to_dir"
406 link.symlink_to(real)
407
408 manifest = walk_workdir(tmp_path)
409 assert "link_to_dir/file.py" not in manifest
410
411 def test_nested_repo_with_unusual_name(self, tmp_path: pathlib.Path) -> None:
412 """Nested repos with names containing spaces or dots are excluded."""
413 for name in ("my.repo", "repo name", ".hidden_repo"):
414 child = tmp_path / name
415 child.mkdir()
416 muse_dir(child).mkdir()
417 (child / "file.py").write_bytes(b"x")
418
419 (tmp_path / "root.py").write_bytes(b"r")
420 manifest = walk_workdir(tmp_path)
421 assert "root.py" in manifest
422 assert not any("file.py" in k for k in manifest)
423
424 # --- performance --------------------------------------------------------
425
426 def test_large_parent_with_nested_repo_fast(self, tmp_path: pathlib.Path) -> None:
427 """Walking 500-file parent with a nested repo completes in < 2 s."""
428 for i in range(500):
429 (tmp_path / f"file_{i:04d}.py").write_bytes(b"x" * 100)
430 child = _make_nested_repo(tmp_path, "child")
431 for i in range(200):
432 (child / f"child_{i:04d}.py").write_bytes(b"x" * 100)
433
434 start = time.monotonic()
435 manifest = walk_workdir(tmp_path)
436 elapsed = time.monotonic() - start
437
438 assert elapsed < 2.0, f"walk took {elapsed:.2f}s — too slow"
439 # Parent files included, child files excluded.
440 assert len(manifest) == 500
441 assert not any(k.startswith("child/") for k in manifest)
442
443 def test_concurrent_walks_consistent(self, tmp_path: pathlib.Path) -> None:
444 """Concurrent walks of the same tree return identical manifests."""
445 (tmp_path / "a.py").write_bytes(b"a")
446 (tmp_path / "b.py").write_bytes(b"b")
447 _make_nested_repo(tmp_path, "child")
448 (tmp_path / "child" / "c.py").write_bytes(b"c")
449
450 results: list[dict] = []
451 errors: list[Exception] = []
452
453 def _walk() -> None:
454 try:
455 results.append(walk_workdir(tmp_path))
456 except Exception as exc:
457 errors.append(exc)
458
459 threads = [threading.Thread(target=_walk) for _ in range(8)]
460 for t in threads:
461 t.start()
462 for t in threads:
463 t.join()
464
465 assert not errors
466 assert len(results) == 8
467 assert all(r == results[0] for r in results), "concurrent walks diverged"
File History 1 commit