"""Tests for ``muse verify`` and ``muse/core/verify.py``. Covers: empty repo, healthy repo, missing commit, missing snapshot, missing object, corrupted object (hash mismatch), --no-objects flag, --quiet flag, --format json, stress: 100-commit chain. """ from __future__ import annotations import datetime import json import pathlib import pytest from tests.cli_test_helper import CliRunner cli = None # argparse migration — CliRunner ignores this arg import os from muse.core.object_store import object_path, write_object from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.verify import run_verify from muse.core.types import Manifest, blob_id, long_id, fake_id from muse.core.paths import muse_dir, heads_dir, ref_path runner = CliRunner() _REPO_ID = "verify-test" # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _init_repo(path: pathlib.Path) -> pathlib.Path: muse = muse_dir(path) for d in ("commits", "snapshots", "objects", "refs/heads"): (muse / d).mkdir(parents=True, exist_ok=True) (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (muse / "repo.json").write_text( json.dumps({"repo_id": _REPO_ID, "domain": "midi"}), encoding="utf-8" ) return path def _env(repo: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(repo)} def _make_commit( root: pathlib.Path, parent_id: str | None = None, content: bytes = b"data", branch: str = "main", idx: int = 0, ) -> str: raw = content + str(idx).encode() obj_id = blob_id(raw) write_object(root, obj_id, raw) manifest = {f"file_{idx}.txt": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) + datetime.timedelta(hours=idx) parent_ids = [parent_id] if parent_id else [] commit_id = hash_commit( parent_ids=parent_ids, snapshot_id=snap_id, message=f"commit {idx}", committed_at_iso=committed_at.isoformat(), ) write_commit(root, CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message=f"commit {idx}", committed_at=committed_at, parent_commit_id=parent_id, )) (ref_path(root, branch)).write_text(commit_id, encoding="utf-8") return commit_id # --------------------------------------------------------------------------- # Unit: core run_verify # --------------------------------------------------------------------------- def test_verify_empty_repo_no_failures(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) result = run_verify(tmp_path) assert result["all_ok"] is True assert result["failures"] == [] assert result["nothing_checked"] is True # --------------------------------------------------------------------------- # Supercharged verify — snapshot sweep, nothing_checked, zero-byte detection # --------------------------------------------------------------------------- class TestVerifySupercharged: """Tests for the three supercharged verify capabilities: 1. Snapshot store sweep — finds missing objects even when branch refs are absent. 2. nothing_checked flag — distinguishes "empty repo" from "all healthy". 3. Truncated objects are caught by the hash check (check_objects=True); existence-only mode (check_objects=False) does not hash-verify content. """ def test_nothing_checked_false_when_commits_exist(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _make_commit(tmp_path, content=b"data", idx=0) result = run_verify(tmp_path) assert result["nothing_checked"] is False def test_nothing_checked_true_when_no_refs_and_no_snapshots(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) result = run_verify(tmp_path) assert result["nothing_checked"] is True def test_orphan_snapshot_with_missing_object_detected(self, tmp_path: pathlib.Path) -> None: """Snapshot exists in .muse/snapshots/ but no commit or branch ref points to it. Its objects are missing. Verify should catch this via the snapshot store sweep.""" _init_repo(tmp_path) obj_id = long_id("a" * 64) # non-existent object manifest = {"orphan.py": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) # No branch ref written, no commit written. result = run_verify(tmp_path) assert result["all_ok"] is False assert any(f["kind"] == "object" and f["id"] == obj_id for f in result["failures"]) assert result["nothing_checked"] is False # sweep found something to check def test_orphan_snapshot_with_present_object_passes(self, tmp_path: pathlib.Path) -> None: """Orphan snapshot whose object IS present should not cause failures.""" _init_repo(tmp_path) content = b"orphan content" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) manifest = {"file.py": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) result = run_verify(tmp_path) assert result["all_ok"] is True assert result["nothing_checked"] is False # sweep found the snapshot def test_partial_clone_missing_objects_detected(self, tmp_path: pathlib.Path) -> None: """Simulate a failed clone: commits and snapshots written to store, but the branch ref file was never created and objects are absent. Verify must detect the missing objects via the snapshot sweep.""" import datetime _init_repo(tmp_path) obj_id = blob_id(b"important file content") manifest = {"src/main.py": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="partial clone", committed_at_iso=committed_at.isoformat(), ) write_commit(tmp_path, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="partial clone", committed_at=committed_at, )) # Critically: the branch ref file is NOT written (simulates clone crash). # The object is also NOT written (simulates R2 gap). result = run_verify(tmp_path) assert result["all_ok"] is False object_failures = [f for f in result["failures"] if f["kind"] == "object"] assert any(f["id"] == obj_id for f in object_failures) def test_truncated_object_caught_by_hash_check(self, tmp_path: pathlib.Path) -> None: """An object file truncated to empty bytes is caught as a hash mismatch when check_objects=True. Empty bytes have OID sha256:e3b0c44… which differs from the stored OID unless the file was always empty.""" import os as _os _init_repo(tmp_path) content = b"real content here" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) manifest = {"real.py": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 4, 1, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="truncated test", committed_at_iso=committed_at.isoformat(), ) write_commit(tmp_path, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="truncated test", committed_at=committed_at, )) (heads_dir(tmp_path) / "main").write_text(commit_id) # Simulate truncation (e.g. R2 serving empty body for a non-empty OID). obj_file = object_path(tmp_path, obj_id) _os.chmod(obj_file, 0o644) obj_file.write_bytes(b"") # Hash check catches the mismatch. result = run_verify(tmp_path, check_objects=True) assert result["all_ok"] is False assert any(f["kind"] == "object" and f["id"] == obj_id for f in result["failures"]) def test_truncated_object_passes_existence_check(self, tmp_path: pathlib.Path) -> None: """check_objects=False only verifies the object file exists — it does not re-hash. A truncated file passes existence-only mode.""" import os as _os _init_repo(tmp_path) content = b"real content here" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) manifest = {"real.py": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 4, 1, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="existence test", committed_at_iso=committed_at.isoformat(), ) write_commit(tmp_path, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="existence test", committed_at=committed_at, )) (heads_dir(tmp_path) / "main").write_text(commit_id) obj_file = object_path(tmp_path, obj_id) _os.chmod(obj_file, 0o644) obj_file.write_bytes(b"") result = run_verify(tmp_path, check_objects=False) assert result["all_ok"] is True def test_genuinely_empty_file_passes_hash_check(self, tmp_path: pathlib.Path) -> None: """A file whose content is genuinely empty bytes has OID sha256:e3b0c44… The object file is zero bytes and the hash check must pass — empty is valid.""" _init_repo(tmp_path) content = b"" obj_id = blob_id(content) # sha256:e3b0c44... write_object(tmp_path, obj_id, content) manifest = {"__init__.py": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 4, 3, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="empty file test", committed_at_iso=committed_at.isoformat(), ) write_commit(tmp_path, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="empty file test", committed_at=committed_at, )) (heads_dir(tmp_path) / "main").write_text(commit_id) result = run_verify(tmp_path, check_objects=True) assert result["all_ok"] is True, f"Failures: {result['failures']}" def test_truncated_object_reported_exactly_once(self, tmp_path: pathlib.Path) -> None: """A truncated object should appear exactly once in failures — the hash mismatch check, not duplicated by any secondary check.""" import os as _os _init_repo(tmp_path) content = b"will be truncated" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) manifest = {"f.py": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 4, 2, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="dup test", committed_at_iso=committed_at.isoformat(), ) write_commit(tmp_path, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="dup test", committed_at=committed_at, )) (heads_dir(tmp_path) / "main").write_text(commit_id) obj_file = object_path(tmp_path, obj_id) _os.chmod(obj_file, 0o644) obj_file.write_bytes(b"") result = run_verify(tmp_path, check_objects=True) matching = [f for f in result["failures"] if f["id"] == obj_id] assert len(matching) == 1, f"Expected 1 failure for {obj_id[:12]}, got {len(matching)}" def test_snapshot_sweep_does_not_recheck_already_verified(self, tmp_path: pathlib.Path) -> None: """Snapshots reachable from branch refs should not be double-counted by the orphan sweep pass.""" _init_repo(tmp_path) commit_id = _make_commit(tmp_path, content=b"data", idx=0) result = run_verify(tmp_path) assert result["snapshots_checked"] == 1 # not 2 def test_json_output_includes_nothing_checked(self, tmp_path: pathlib.Path) -> None: """The --json output must include nothing_checked so scripts can distinguish empty repos from healthy ones.""" _init_repo(tmp_path) result = runner.invoke(cli, ["verify", "--json"], env=_env(tmp_path)) assert result.exit_code == 0 data = json.loads(result.output) assert "nothing_checked" in data assert data["nothing_checked"] is True def test_verify_healthy_repo(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _make_commit(tmp_path, content=b"healthy", idx=0) result = run_verify(tmp_path) assert result["all_ok"] is True assert result["commits_checked"] == 1 assert result["objects_checked"] >= 1 def test_verify_missing_commit_fails(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) # Write a ref pointing to a nonexistent commit. missing_commit = fake_id("nonexistent-commit") (heads_dir(tmp_path) / "main").write_text(missing_commit, encoding="utf-8") result = run_verify(tmp_path) assert result["all_ok"] is False kinds = [f["kind"] for f in result["failures"]] assert "commit" in kinds def test_verify_corrupted_object_detected(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) content = b"original content" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) manifest = {"file.txt": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="corrupt test", committed_at_iso=committed_at.isoformat(), ) write_commit(tmp_path, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="corrupt test", committed_at=committed_at, )) (heads_dir(tmp_path) / "main").write_text(commit_id, encoding="utf-8") # Object store writes files as 0o444 (immutable) — chmod before corrupting. obj_file = object_path(tmp_path, obj_id) os.chmod(obj_file, 0o644) obj_file.write_bytes(b"tampered data!") result = run_verify(tmp_path, check_objects=True) assert result["all_ok"] is False kinds = [f["kind"] for f in result["failures"]] assert "object" in kinds def test_verify_no_objects_flag_skips_rehash(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) content = b"clean" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) manifest = {"f.txt": obj_id} snap_id = hash_snapshot(manifest) write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 3, 2, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="test", committed_at_iso=committed_at.isoformat(), ) write_commit(tmp_path, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="test", committed_at=committed_at, )) (heads_dir(tmp_path) / "main").write_text(commit_id, encoding="utf-8") # Object store writes files as 0o444 (immutable) — chmod before corrupting. obj_file = object_path(tmp_path, obj_id) os.chmod(obj_file, 0o644) obj_file.write_bytes(b"corrupted!") result = run_verify(tmp_path, check_objects=False) # Should not flag the corruption since we skipped re-hashing. assert result["all_ok"] is True # --------------------------------------------------------------------------- # CLI: muse verify # --------------------------------------------------------------------------- def test_verify_cli_help() -> None: result = runner.invoke(cli, ["verify", "--help"]) assert result.exit_code == 0 # Rich injects ANSI codes between '--' dashes; the short flag '-O' is reliable. assert "--no-objects" in result.output or "-O" in result.output def test_verify_cli_healthy(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _make_commit(tmp_path, content=b"cli healthy", idx=99) result = runner.invoke(cli, ["verify"], env=_env(tmp_path)) assert result.exit_code == 0 assert "healthy" in result.output.lower() def test_verify_cli_json(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _make_commit(tmp_path, content=b"json verify", idx=88) result = runner.invoke(cli, ["verify", "--json"], env=_env(tmp_path)) assert result.exit_code == 0 data = json.loads(result.output) assert data["all_ok"] is True assert data["failures"] == [] def test_verify_cli_quiet_exit_zero_when_clean(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _make_commit(tmp_path, content=b"quiet clean", idx=77) result = runner.invoke(cli, ["verify", "--quiet"], env=_env(tmp_path)) assert result.exit_code == 0 def test_verify_cli_quiet_exit_one_when_broken(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) fake_id = "b" * 64 (heads_dir(tmp_path) / "main").write_text(fake_id, encoding="utf-8") result = runner.invoke(cli, ["verify", "-q"], env=_env(tmp_path)) assert result.exit_code != 0 def test_verify_cli_no_objects_flag(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _make_commit(tmp_path, content=b"no-obj flag", idx=66) result = runner.invoke(cli, ["verify", "--no-objects"], env=_env(tmp_path)) assert result.exit_code == 0 # --------------------------------------------------------------------------- # Stress: 100-commit chain # --------------------------------------------------------------------------- def test_verify_stress_100_commit_chain(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) prev: str | None = None for i in range(100): prev = _make_commit(tmp_path, parent_id=prev, content=b"chain", idx=i) result = run_verify(tmp_path, check_objects=True) assert result["all_ok"] is True assert result["commits_checked"] == 100