"""Tests for muse.core.snapshot — content-addressed snapshot computation. Test categories --------------- - TestHashFile — unit: SHA-256 hash_file - TestBuildSnapshotManifest — unit: full manifest walks - TestNestedRepoWalk — unit/integration: nested .muse repos are excluded - TestComputeSnapshotId — unit: snapshot id derivation - TestComputeCommitId — unit: commit id derivation - TestDiffWorkdirVsSnapshot — unit: diff logic """ import os import pathlib import threading import time import pytest from muse.core.types import fake_id from muse.core.snapshot import ( build_snapshot_manifest, diff_workdir_vs_snapshot, hash_file, walk_workdir, walk_workdir_with_dirs, ) from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.paths import muse_dir, repo_json_path @pytest.fixture def workdir(tmp_path: pathlib.Path) -> pathlib.Path: return tmp_path class TestHashFile: def test_consistent(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "file.mid" f.write_bytes(b"hello world") assert hash_file(f) == hash_file(f) def test_different_content_different_hash(self, tmp_path: pathlib.Path) -> None: a = tmp_path / "a.mid" b = tmp_path / "b.mid" a.write_bytes(b"aaa") b.write_bytes(b"bbb") assert hash_file(a) != hash_file(b) def test_known_hash(self, tmp_path: pathlib.Path) -> None: from muse.core.types import blob_id content = b"muse" f = tmp_path / "f.mid" f.write_bytes(content) expected = blob_id(content) assert hash_file(f) == expected class TestBuildSnapshotManifest: def test_empty_workdir(self, workdir: pathlib.Path) -> None: assert build_snapshot_manifest(workdir) == {} def test_single_file(self, workdir: pathlib.Path) -> None: (workdir / "beat.mid").write_bytes(b"drums") manifest = build_snapshot_manifest(workdir) assert "beat.mid" in manifest assert len(manifest["beat.mid"]) == 71 # sha256:<64 hex> def test_nested_file(self, workdir: pathlib.Path) -> None: (workdir / "tracks").mkdir() (workdir / "tracks" / "bass.mid").write_bytes(b"bass") manifest = build_snapshot_manifest(workdir) assert "tracks/bass.mid" in manifest def test_secrets_excluded_by_builtin_blocklist(self, workdir: pathlib.Path) -> None: """Built-in secrets blocklist protects even without a .museignore file.""" (workdir / ".env").write_bytes(b"SECRET=abc") (workdir / ".DS_Store").write_bytes(b"junk") (workdir / "beat.mid").write_bytes(b"drums") manifest = build_snapshot_manifest(workdir) assert ".env" not in manifest assert ".DS_Store" not in manifest assert "beat.mid" in manifest def test_dotfiles_tracked_when_not_ignored(self, workdir: pathlib.Path) -> None: """Non-secret dotfiles like .cursorrules are tracked by default.""" (workdir / ".cursorrules").write_bytes(b"# rules") (workdir / ".editorconfig").write_bytes(b"[*]\nindent_size=4") manifest = build_snapshot_manifest(workdir) assert ".cursorrules" in manifest assert ".editorconfig" in manifest def test_museignore_excludes_custom_pattern(self, workdir: pathlib.Path) -> None: """A pattern in .museignore excludes the matched file.""" (workdir / ".museignore").write_bytes(b'[global]\npatterns = ["*.secret"]\n') (workdir / "api.secret").write_bytes(b"token") (workdir / "beat.mid").write_bytes(b"drums") manifest = build_snapshot_manifest(workdir) assert "api.secret" not in manifest assert "beat.mid" in manifest def test_deterministic_order(self, workdir: pathlib.Path) -> None: for name in ["c.mid", "a.mid", "b.mid"]: (workdir / name).write_bytes(name.encode()) m1 = build_snapshot_manifest(workdir) m2 = build_snapshot_manifest(workdir) assert m1 == m2 class TestComputeSnapshotId: def test_empty_manifest(self) -> None: sid = compute_snapshot_id({}) assert len(sid) == 71 def test_deterministic(self) -> None: manifest = {"a.mid": fake_id("hash1"), "b.mid": fake_id("hash2")} assert compute_snapshot_id(manifest) == compute_snapshot_id(manifest) def test_order_independent(self) -> None: m1 = {"a.mid": fake_id("h1"), "b.mid": fake_id("h2")} m2 = {"b.mid": fake_id("h2"), "a.mid": fake_id("h1")} assert compute_snapshot_id(m1) == compute_snapshot_id(m2) def test_different_content_different_id(self) -> None: m1 = {"a.mid": fake_id("h1")} m2 = {"a.mid": fake_id("h2")} assert compute_snapshot_id(m1) != compute_snapshot_id(m2) class TestComputeCommitId: _BASE = dict( parent_ids=[fake_id("p1")], snapshot_id=fake_id("snap"), message="msg", committed_at_iso="2026-01-01T00:00:00+00:00", author="gabriel", signer_public_key="ed25519:AAAA", ) def test_deterministic(self) -> None: assert compute_commit_id(**self._BASE) == compute_commit_id(**self._BASE) def test_parent_order_independent(self) -> None: a = compute_commit_id(**{**self._BASE, "parent_ids": [fake_id("p1"), fake_id("p2")]}) b = compute_commit_id(**{**self._BASE, "parent_ids": [fake_id("p2"), fake_id("p1")]}) assert a == b def test_different_messages_different_ids(self) -> None: a = compute_commit_id(**{**self._BASE, "message": "msg1"}) b = compute_commit_id(**{**self._BASE, "message": "msg2"}) assert a != b def test_different_authors_different_commit_ids(self) -> None: a = compute_commit_id(**{**self._BASE, "author": "alice"}) b = compute_commit_id(**{**self._BASE, "author": "bob"}) assert a != b def test_different_signer_keys_different_commit_ids(self) -> None: a = compute_commit_id(**{**self._BASE, "signer_public_key": "ed25519:AAAA"}) b = compute_commit_id(**{**self._BASE, "signer_public_key": "ed25519:BBBB"}) assert a != b def test_empty_author_and_key_still_deterministic(self) -> None: kwargs = {**self._BASE, "author": "", "signer_public_key": ""} assert compute_commit_id(**kwargs) == compute_commit_id(**kwargs) def test_result_has_sha256_prefix(self) -> None: result = compute_commit_id(**self._BASE) assert result.startswith("sha256:") class TestDiffWorkdirVsSnapshot: def test_new_repo_all_untracked(self, workdir: pathlib.Path) -> None: (workdir / "beat.mid").write_bytes(b"x") added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, {}) assert added == set() assert untracked == {"beat.mid"} def test_added_file(self, workdir: pathlib.Path) -> None: (workdir / "beat.mid").write_bytes(b"x") last = {"other.mid": "abc"} added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, last) assert "beat.mid" in added assert "other.mid" in deleted def test_modified_file(self, workdir: pathlib.Path) -> None: f = workdir / "beat.mid" f.write_bytes(b"new content") last = {"beat.mid": "oldhash"} added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, last) assert "beat.mid" in modified def test_clean_workdir(self, workdir: pathlib.Path) -> None: f = workdir / "beat.mid" f.write_bytes(b"content") from muse.core.snapshot import hash_file h = hash_file(f) added, modified, deleted, untracked, added_dirs, deleted_dirs = diff_workdir_vs_snapshot(workdir, {"beat.mid": h}) assert not added and not modified and not deleted and not untracked def test_ignored_extant_file_not_reported_as_deleted( self, workdir: pathlib.Path ) -> None: """A file that was tracked, is now in .museignore, and still exists on disk must NOT appear in ``deleted``. It was intentionally moved out of tracking — reporting it as deleted would block checkout and cause shelf pop to unlink it.""" (workdir / ".museignore").write_bytes( b'[global]\npatterns = ["app.js"]\n' ) (workdir / "app.js").write_bytes(b"// build artifact") (workdir / "src.py").write_bytes(b"# source") from muse.core.snapshot import hash_file # Pretend HEAD tracked both files. last = { "app.js": hash_file(workdir / "app.js"), "src.py": hash_file(workdir / "src.py"), } added, modified, deleted, _, _, _ = diff_workdir_vs_snapshot(workdir, last) assert "app.js" not in deleted, ( "ignored-and-extant file must not appear in deleted" ) assert "src.py" not in deleted def test_ignored_absent_file_is_reported_as_deleted( self, workdir: pathlib.Path ) -> None: """A file that is in .museignore but is genuinely absent from disk IS deleted and must appear in ``deleted``.""" (workdir / ".museignore").write_bytes( b'[global]\npatterns = ["app.js"]\n' ) # app.js is in .museignore but does NOT exist on disk. (workdir / "src.py").write_bytes(b"# source") from muse.core.snapshot import hash_file last = { "app.js": "a" * 64, # was in HEAD but is gone from disk "src.py": hash_file(workdir / "src.py"), } added, modified, deleted, _, _, _ = diff_workdir_vs_snapshot(workdir, last) assert "app.js" in deleted, ( "ignored file that is genuinely absent from disk must still be deleted" ) # --------------------------------------------------------------------------- # Nested repo boundary — unit / integration # --------------------------------------------------------------------------- def _make_nested_repo(parent: pathlib.Path, name: str) -> pathlib.Path: """Create a child directory that looks like a muse repo (.muse/ present).""" child = parent / name child.mkdir(parents=True, exist_ok=True) muse_dir(child).mkdir() (repo_json_path(child)).write_text('{"repo_id": "child"}') return child class TestNestedRepoWalk: """Nested muse repos must be excluded from the parent's walk. The parent repo's ``os.walk`` must prune any subdirectory that contains its own ``.muse/`` directory. This mirrors git submodule behaviour — child repo files belong to the child snapshot, not the parent. """ # --- walk_workdir ------------------------------------------------------- def test_nested_repo_files_excluded(self, tmp_path: pathlib.Path) -> None: """Files inside a nested repo do not appear in the parent manifest.""" (tmp_path / "parent.py").write_bytes(b"# parent") child = _make_nested_repo(tmp_path, "child_repo") (child / "child.py").write_bytes(b"# child") manifest = walk_workdir(tmp_path) assert "parent.py" in manifest assert "child_repo/child.py" not in manifest def test_nested_repo_root_dir_excluded(self, tmp_path: pathlib.Path) -> None: """The child root directory itself is not descended into.""" _make_nested_repo(tmp_path, "child_repo") manifest = walk_workdir(tmp_path) # No key should start with child_repo/ assert not any(k.startswith("child_repo/") for k in manifest) def test_sibling_dirs_still_walked(self, tmp_path: pathlib.Path) -> None: """Normal subdirs next to a nested repo are still walked.""" _make_nested_repo(tmp_path, "child_repo") sibling = tmp_path / "src" sibling.mkdir() (sibling / "main.py").write_bytes(b"# main") manifest = walk_workdir(tmp_path) assert "src/main.py" in manifest def test_deeply_nested_repo_excluded(self, tmp_path: pathlib.Path) -> None: """Nested repos two levels deep are also excluded.""" mid = tmp_path / "packages" mid.mkdir() (mid / "shared.py").write_bytes(b"# shared") child = _make_nested_repo(mid, "plugin") (child / "plugin.py").write_bytes(b"# plugin") manifest = walk_workdir(tmp_path) assert "packages/shared.py" in manifest assert "packages/plugin/plugin.py" not in manifest def test_multiple_nested_repos_all_excluded(self, tmp_path: pathlib.Path) -> None: """Multiple sibling nested repos are all pruned.""" _make_nested_repo(tmp_path, "repo_a") _make_nested_repo(tmp_path, "repo_b") _make_nested_repo(tmp_path, "repo_c") (tmp_path / "root.py").write_bytes(b"# root") for repo in ("repo_a", "repo_b", "repo_c"): ((tmp_path / repo) / "file.py").write_bytes(b"# file") manifest = walk_workdir(tmp_path) assert "root.py" in manifest for repo in ("repo_a", "repo_b", "repo_c"): assert f"{repo}/file.py" not in manifest # --- walk_workdir_with_dirs --------------------------------------------- def test_dirs_output_excludes_nested_repo(self, tmp_path: pathlib.Path) -> None: """walk_workdir_with_dirs must not list the nested repo as a directory.""" _make_nested_repo(tmp_path, "child_repo") src = tmp_path / "src" src.mkdir() (src / "a.py").write_bytes(b"a") _, dirs = walk_workdir_with_dirs(tmp_path) assert "src" in dirs assert "child_repo" not in dirs # --- build_snapshot_manifest (public API) -------------------------------- def test_build_snapshot_manifest_excludes_nested(self, tmp_path: pathlib.Path) -> None: """build_snapshot_manifest is the public wrapper — same boundary.""" (tmp_path / "root.py").write_bytes(b"# root") child = _make_nested_repo(tmp_path, "nested") (child / "nested.py").write_bytes(b"# nested") manifest = build_snapshot_manifest(tmp_path) assert "root.py" in manifest assert "nested/nested.py" not in manifest # --- diff_workdir_vs_snapshot integration -------------------------------- def test_diff_does_not_report_nested_files_as_added( self, tmp_path: pathlib.Path ) -> None: """diff sees an empty last-snapshot: nested files must not appear as untracked.""" (tmp_path / "root.py").write_bytes(b"# root") child = _make_nested_repo(tmp_path, "sub") (child / "sub.py").write_bytes(b"# sub") added, modified, deleted, untracked, _, _ = diff_workdir_vs_snapshot( tmp_path, {} ) assert "root.py" in untracked assert not any(k.startswith("sub/") for k in untracked) assert not any(k.startswith("sub/") for k in added) # --- data integrity ----------------------------------------------------- def test_manifest_keys_posix_separators(self, tmp_path: pathlib.Path) -> None: """Manifest keys always use '/' regardless of OS.""" sub = tmp_path / "a" / "b" sub.mkdir(parents=True) (sub / "file.py").write_bytes(b"x") manifest = walk_workdir(tmp_path) assert "a/b/file.py" in manifest assert all("/" in k or "/" not in k for k in manifest) # no backslash keys assert not any("\\" in k for k in manifest) def test_nested_muse_dir_itself_not_tracked(self, tmp_path: pathlib.Path) -> None: """The .muse/ directory of a nested repo is not tracked as a file.""" child = _make_nested_repo(tmp_path, "child") (child / "real.py").write_bytes(b"x") manifest = walk_workdir(tmp_path) assert not any(".muse" in k for k in manifest) # --- security ----------------------------------------------------------- def test_symlink_to_nested_repo_not_followed(self, tmp_path: pathlib.Path) -> None: """A symlink pointing at a directory that has .muse/ is not followed. walk_workdir uses followlinks=False so symlinks are excluded by design.""" real = _make_nested_repo(tmp_path, "real_repo") (real / "secret.py").write_bytes(b"# secret") link = tmp_path / "link_to_repo" link.symlink_to(real) manifest = walk_workdir(tmp_path) assert "link_to_repo/secret.py" not in manifest def test_symlink_to_regular_dir_not_followed(self, tmp_path: pathlib.Path) -> None: """Symlinks to any directory are never followed — followlinks=False.""" real = tmp_path / "outside" real.mkdir() (real / "file.py").write_bytes(b"x") link = tmp_path / "link_to_dir" link.symlink_to(real) manifest = walk_workdir(tmp_path) assert "link_to_dir/file.py" not in manifest def test_nested_repo_with_unusual_name(self, tmp_path: pathlib.Path) -> None: """Nested repos with names containing spaces or dots are excluded.""" for name in ("my.repo", "repo name", ".hidden_repo"): child = tmp_path / name child.mkdir() muse_dir(child).mkdir() (child / "file.py").write_bytes(b"x") (tmp_path / "root.py").write_bytes(b"r") manifest = walk_workdir(tmp_path) assert "root.py" in manifest assert not any("file.py" in k for k in manifest) # --- performance -------------------------------------------------------- def test_large_parent_with_nested_repo_fast(self, tmp_path: pathlib.Path) -> None: """Walking 500-file parent with a nested repo completes in < 2 s.""" for i in range(500): (tmp_path / f"file_{i:04d}.py").write_bytes(b"x" * 100) child = _make_nested_repo(tmp_path, "child") for i in range(200): (child / f"child_{i:04d}.py").write_bytes(b"x" * 100) start = time.monotonic() manifest = walk_workdir(tmp_path) elapsed = time.monotonic() - start assert elapsed < 2.0, f"walk took {elapsed:.2f}s — too slow" # Parent files included, child files excluded. assert len(manifest) == 500 assert not any(k.startswith("child/") for k in manifest) def test_concurrent_walks_consistent(self, tmp_path: pathlib.Path) -> None: """Concurrent walks of the same tree return identical manifests.""" (tmp_path / "a.py").write_bytes(b"a") (tmp_path / "b.py").write_bytes(b"b") _make_nested_repo(tmp_path, "child") (tmp_path / "child" / "c.py").write_bytes(b"c") results: list[dict] = [] errors: list[Exception] = [] def _walk() -> None: try: results.append(walk_workdir(tmp_path)) except Exception as exc: errors.append(exc) threads = [threading.Thread(target=_walk) for _ in range(8)] for t in threads: t.start() for t in threads: t.join() assert not errors assert len(results) == 8 assert all(r == results[0] for r in results), "concurrent walks diverged"