"""Integration tests for shallow-object-store + promisor-remote architecture. Core semantics -------------- A Muse repo's local object store is legitimately *shallow*: it may not hold every historical object blob. Missing objects are not automatically failures. Their status depends on what is known about the remote: PRESENT → object file exists locally → verified (checked) PROMISED → absent but a promisor remote exists → not a failure; counted MISSING → absent AND no promisor remote at all → kind="object" failure Shallow graft semantics ----------------------- `.muse/shallow` lists the deepest commits included in local history. The BFS walk in run_verify stops at these commits — it does NOT enqueue their parents. Objects beyond the graft boundary are not expected locally. strict mode ----------- `run_verify(strict=True)` treats every absent object as a failure, regardless of promisor remotes. Use this when you need to prove complete local integrity. Coverage -------- U — unit: VerifyResult has promised_objects, shallow_commits, is_shallow, promisor_remotes fields E — promisor: missing objects with promisor → not failures missing objects without promisor → failures F — strict: strict=True fails on promised objects S — shallow: BFS stops at graft boundary; parents beyond not checked C — CLI: --strict flag; JSON output includes new fields I — integration: real repo layout, multi-branch, orphan sweep """ from __future__ import annotations from collections.abc import Mapping import datetime import json import pathlib import threading import pytest from tests.cli_test_helper import CliRunner, InvokeResult from muse.core.types import blob_id, long_id, fake_id from muse.core.object_store import object_path, write_object from muse.core.shallow import add_shallow, write_shallow from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.verify import run_verify from muse.core.paths import muse_dir, ref_path runner = CliRunner() _REPO_ID = "shallow-verify-test" # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _init_repo( path: pathlib.Path, remotes: Mapping[str, object] | None = None, ) -> pathlib.Path: dot_muse = muse_dir(path) for d in ("commits", "snapshots", "objects", "refs/heads"): (dot_muse / d).mkdir(parents=True, exist_ok=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main") (dot_muse / "repo.json").write_text( json.dumps({"repo_id": _REPO_ID, "domain": "code"}) ) if remotes: lines = [] for name, cfg in remotes.items(): lines.append(f"[remotes.{name}]") lines.append(f'url = "{cfg["url"]}"') if "promisor" in cfg: val = "true" if cfg["promisor"] else "false" lines.append(f"promisor = {val}") (dot_muse / "config.toml").write_text("\n".join(lines) + "\n") return path def _make_commit( root: pathlib.Path, parent_id: str | None = None, content: bytes = b"data", branch: str = "main", idx: int = 0, write_objects: bool = True, ) -> tuple[str, str]: """Create a commit and return (commit_id, obj_id). When write_objects=False, the object is NOT written to the store — simulating a shallow gap. """ raw = content + str(idx).encode() obj_id = blob_id(raw) if write_objects: write_object(root, obj_id, raw) manifest = {f"file_{idx}.txt": obj_id} snap_id = compute_snapshot_id(manifest) if write_objects: write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) else: # Write the snapshot record even for shallow commits so the commit # can be read back, but omit the object file. write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = ( datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) + datetime.timedelta(hours=idx) ) parent_ids = [parent_id] if parent_id else [] commit_id = compute_commit_id( parent_ids, snap_id, f"commit {idx}", committed_at.isoformat(), ) write_commit( root, CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message=f"commit {idx}", committed_at=committed_at, parent_commit_id=parent_id, ), ) (ref_path(root, branch)).write_text(commit_id) return commit_id, obj_id def _env(repo: pathlib.Path) -> Mapping[str, str]: return {"MUSE_REPO_ROOT": str(repo)} def _invoke(repo: pathlib.Path, *args: str) -> InvokeResult: from muse.cli.app import main as cli_main return runner.invoke(cli_main, ["verify", *args], env=_env(repo)) # --------------------------------------------------------------------------- # U — VerifyResult shape: new fields present # --------------------------------------------------------------------------- class TestVerifyResultShape: def test_promised_objects_field_present(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = run_verify(repo) assert "promised_objects" in result def test_shallow_commits_field_present(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = run_verify(repo) assert "shallow_commits" in result def test_is_shallow_field_present(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = run_verify(repo) assert "is_shallow" in result def test_promisor_remotes_field_present(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = run_verify(repo) assert "promisor_remotes" in result def test_promised_objects_zero_for_clean_repo(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) _make_commit(repo, idx=0) result = run_verify(repo) assert result["promised_objects"] == 0 def test_is_shallow_false_without_shallow_file(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = run_verify(repo) assert result["is_shallow"] is False def test_promisor_remotes_empty_without_config(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = run_verify(repo) assert result["promisor_remotes"] == [] # --------------------------------------------------------------------------- # E — Promisor semantics: PROMISED ≠ failure # --------------------------------------------------------------------------- class TestPromisorSemantics: def test_missing_object_with_promisor_not_a_failure( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) # Write commit + snapshot but NOT the object — shallow gap _make_commit(repo, idx=0, write_objects=False) result = run_verify(repo) assert result["all_ok"] is True assert result["promised_objects"] >= 1 assert result["failures"] == [] def test_missing_object_without_promisor_is_failure( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path) # no remotes _make_commit(repo, idx=0, write_objects=False) result = run_verify(repo) assert result["all_ok"] is False assert any(f["kind"] == "object" for f in result["failures"]) def test_promised_objects_counted_correctly( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) # 3 commits, each with a missing object prev: str | None = None for i in range(3): prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=False) result = run_verify(repo) assert result["promised_objects"] == 3 assert result["all_ok"] is True def test_present_objects_not_counted_as_promised( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) _make_commit(repo, idx=0, write_objects=True) # object IS present result = run_verify(repo) assert result["promised_objects"] == 0 def test_promisor_false_opt_out_causes_failure( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "mirror": {"url": "http://mirror.example.com/muse", "promisor": False}, }) _make_commit(repo, idx=0, write_objects=False) result = run_verify(repo) assert result["all_ok"] is False assert result["promised_objects"] == 0 def test_promisor_remotes_listed_in_result( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, "staging": {"url": "https://staging.musehub.ai/gabriel/muse"}, }) result = run_verify(repo) assert "local" in result["promisor_remotes"] assert "staging" in result["promisor_remotes"] def test_mixed_present_and_promised(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) _make_commit(repo, idx=0, write_objects=True) # PRESENT prev, _ = _make_commit(repo, parent_id=None, idx=1, write_objects=False) # PROMISED # update ref to idx=1 result = run_verify(repo) assert result["all_ok"] is True assert result["objects_checked"] >= 1 # idx=0 present and checked assert result["promised_objects"] >= 1 # idx=1 promised # --------------------------------------------------------------------------- # F — strict mode: promised objects become failures # --------------------------------------------------------------------------- class TestStrictMode: def test_strict_fails_on_promised_object(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) _make_commit(repo, idx=0, write_objects=False) result = run_verify(repo, strict=True) assert result["all_ok"] is False assert any(f["kind"] == "object" for f in result["failures"]) def test_strict_does_not_change_result_for_present_objects( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) _make_commit(repo, idx=0, write_objects=True) result = run_verify(repo, strict=True) assert result["all_ok"] is True assert result["promised_objects"] == 0 def test_strict_promised_objects_still_zero_in_strict( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) _make_commit(repo, idx=0, write_objects=False) result = run_verify(repo, strict=True) # In strict mode, absent objects go to failures, not promised_objects assert result["promised_objects"] == 0 def test_strict_fail_fast(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) prev: str | None = None for i in range(5): prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=False) result = run_verify(repo, strict=True, fail_fast=True) assert result["all_ok"] is False assert len(result["failures"]) == 1 # --------------------------------------------------------------------------- # S — shallow graft: BFS stops at boundary # --------------------------------------------------------------------------- class TestShallowGraft: def test_is_shallow_true_when_shallow_file_exists( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path) cid, _ = _make_commit(repo, idx=0) add_shallow(repo, cid) result = run_verify(repo) assert result["is_shallow"] is True def test_shallow_commits_counted(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) cid, _ = _make_commit(repo, idx=0) add_shallow(repo, cid) result = run_verify(repo) assert result["shallow_commits"] >= 1 def test_parents_beyond_graft_not_checked(self, tmp_path: pathlib.Path) -> None: """Commit chain: old → graft → new. The graft is in .muse/shallow. The old commit's objects are not in the local store. Verify must NOT report the old commit's objects as missing — they're beyond the graft boundary. """ repo = _init_repo(tmp_path) # no remotes — would fail if walked past graft # old commit: object NOT in store old_cid, old_obj_id = _make_commit(repo, idx=0, write_objects=False) # graft commit: parents=old, object IS in store graft_cid, _ = _make_commit(repo, parent_id=old_cid, idx=1, write_objects=True) add_shallow(repo, graft_cid) # current tip: parent=graft, object IS in store tip_cid, _ = _make_commit(repo, parent_id=graft_cid, idx=2, write_objects=True) result = run_verify(repo) # No failures: old commit's objects are beyond the graft, not checked assert result["all_ok"] is True, f"Unexpected failures: {result['failures']}" def test_graft_objects_themselves_are_checked(self, tmp_path: pathlib.Path) -> None: """The graft commit's own objects ARE expected locally.""" repo = _init_repo(tmp_path) cid, obj_id = _make_commit(repo, idx=0, write_objects=True) add_shallow(repo, cid) result = run_verify(repo) assert result["all_ok"] is True assert result["objects_checked"] >= 1 def test_multiple_grafts(self, tmp_path: pathlib.Path) -> None: # Two grafts on separate branches so both are reachable from branch refs. repo = _init_repo(tmp_path) cid_a, _ = _make_commit(repo, idx=0, branch="main", write_objects=True) cid_b, _ = _make_commit(repo, idx=1, branch="dev", write_objects=True) write_shallow(repo, {cid_a, cid_b}) result = run_verify(repo) assert result["shallow_commits"] >= 2 assert result["is_shallow"] is True # --------------------------------------------------------------------------- # C — CLI surface # --------------------------------------------------------------------------- class TestCLIShallow: def test_json_has_promised_objects(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) _make_commit(repo, idx=0) d = json.loads(_invoke(repo, "--json").output) assert "promised_objects" in d def test_json_has_shallow_commits(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) _make_commit(repo, idx=0) d = json.loads(_invoke(repo, "--json").output) assert "shallow_commits" in d def test_json_has_is_shallow(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) _make_commit(repo, idx=0) d = json.loads(_invoke(repo, "--json").output) assert "is_shallow" in d def test_json_has_promisor_remotes(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) _make_commit(repo, idx=0) d = json.loads(_invoke(repo, "--json").output) assert "promisor_remotes" in d def test_strict_flag_exists(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) _make_commit(repo, idx=0) r = _invoke(repo, "--strict", "--json") # Just check it doesn't error on unknown flag assert r.exit_code in (0, 1) # 0=ok 1=failures def test_strict_fails_on_promised_via_cli(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) _make_commit(repo, idx=0, write_objects=False) # Without --strict: ok r_default = _invoke(repo, "--json") d_default = json.loads(r_default.output) assert d_default["all_ok"] is True # With --strict: failure r_strict = _invoke(repo, "--strict", "--json") assert r_strict.exit_code == 1 d_strict = json.loads(r_strict.output) assert d_strict["all_ok"] is False def test_is_shallow_true_in_json_when_shallow_file( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path) cid, _ = _make_commit(repo, idx=0) add_shallow(repo, cid) d = json.loads(_invoke(repo, "--json").output) assert d["is_shallow"] is True def test_promisor_remotes_listed_in_json(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) _make_commit(repo, idx=0) d = json.loads(_invoke(repo, "--json").output) assert "local" in d["promisor_remotes"] # --------------------------------------------------------------------------- # I — Integration: realistic scenario # --------------------------------------------------------------------------- class TestIntegration: def test_clean_repo_no_remotes_all_ok(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) prev: str | None = None for i in range(5): prev, _ = _make_commit(repo, parent_id=prev, idx=i) result = run_verify(repo) assert result["all_ok"] is True assert result["promised_objects"] == 0 def test_shallow_repo_with_promisor_all_ok(self, tmp_path: pathlib.Path) -> None: """Simulate a normal agent repo: recent objects present, history shallow.""" repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) # "old" history: objects not local (shallow gap) prev: str | None = None for i in range(10): prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=False) graft = prev add_shallow(repo, graft) # "recent" history: objects local for i in range(10, 15): prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=True) result = run_verify(repo) assert result["all_ok"] is True assert result["is_shallow"] is True # The graft commit's own objects are verified (they may be absent/promised). # Its ancestors' snapshots are collected during the graft walk and skipped # by the orphan sweep — so only the graft's own missing object counts. assert result["promised_objects"] <= 1 # at most the graft's own object assert result["objects_checked"] >= 5 # recent objects verified def test_orphan_snapshot_with_missing_object_and_promisor( self, tmp_path: pathlib.Path ) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) # Orphan snapshot (no branch ref) with missing object obj_id = fake_id("orphan-obj-f") manifest = {"orphan.py": obj_id} snap_id = compute_snapshot_id(manifest) write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) result = run_verify(repo) assert result["all_ok"] is True assert result["promised_objects"] >= 1 def test_concurrent_reads_stable(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path, remotes={ "local": {"url": "https://localhost:1337/gabriel/muse"}, }) prev: str | None = None for i in range(5): prev, _ = _make_commit(repo, parent_id=prev, idx=i) results: list[dict] = [] errors: list[Exception] = [] lock = threading.Lock() def _read() -> None: try: r = _invoke(repo, "--json") d = json.loads(r.output) with lock: results.append(d) except Exception as exc: with lock: errors.append(exc) threads = [threading.Thread(target=_read) for _ in range(8)] for t in threads: t.start() for t in threads: t.join() assert errors == [] assert len(results) == 8 assert all(d["all_ok"] is True for d in results)