"""TDD — Phase 2 follow-up: two BFS sites missed in the original Phase 2 pass. verify.py::_collect_ancestor_snapshots (line 248) Pure BFS — collects snapshot IDs from ancestors of a shallow graft commit. Stops at commits already in the main BFS ``visited`` set. Replaced with ``iter_ancestors(root, starts, exclude=visited)``. The main ``run_verify`` BFS (line 342) is a documented exception: it must report missing commits as ``VerifyFailure`` entries rather than silently skipping them, which ``iter_ancestors`` cannot do. This mirrors the ``gc.py`` exception. plugins/midi/_midi_query.py::run_query (line 465) First-parent walk with ``from_commit_id`` stop and ``max_commits`` cap. Replaced with ``iter_ancestors(first_parent_only=True, prune=..., max_commits=...)``. Coverage -------- V1 Structural — ``_collect_ancestor_snapshots`` uses ``iter_ancestors``; no inline ``deque`` BFS V2 Behavioural — ancestor snapshots are collected, stopping at ``visited`` M1 Structural — ``run_query`` uses ``iter_ancestors``; no ``while commit_id`` loop M2 Behavioural — first-parent walk stops at ``from_commit_id`` (exclusive) M3 Behavioural — walk respects ``max_commits`` cap """ from __future__ import annotations import datetime import inspect import json import pathlib import pytest from muse._version import __version__ from muse.core.object_store import write_object from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.types import blob_id from muse.core.paths import muse_dir # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: dot_muse = muse_dir(tmp_path) for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"): (dot_muse / d).mkdir(parents=True, exist_ok=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") (dot_muse / "repo.json").write_text( json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"}) ) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) monkeypatch.chdir(tmp_path) return tmp_path def _make_commit( root: pathlib.Path, manifest: dict[str, str], parent_id: str | None = None, *, message: str = "test", ) -> CommitRecord: oid = blob_id(b"data-" + message.encode()) write_object(root, oid, b"data-" + message.encode()) manifest = manifest or {"f.py": oid} snap_id = compute_snapshot_id(manifest) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) cid = compute_commit_id( parent_ids=[parent_id] if parent_id else [], snapshot_id=snap_id, message=message, committed_at_iso=ts.isoformat(), ) rec = CommitRecord( commit_id=cid, branch="main", snapshot_id=snap_id, message=message, committed_at=ts, parent_commit_id=parent_id, ) write_commit(root, rec) return rec # --------------------------------------------------------------------------- # V1 Structural — _collect_ancestor_snapshots uses iter_ancestors # --------------------------------------------------------------------------- def test_v1_collect_ancestor_snapshots_uses_iter_ancestors() -> None: """_collect_ancestor_snapshots must use iter_ancestors, not an inline BFS.""" from muse.core import verify as verify_mod src = inspect.getsource(verify_mod._collect_ancestor_snapshots) # type: ignore[attr-defined] assert "iter_ancestors" in src, ( "_collect_ancestor_snapshots must delegate to iter_ancestors. " "Replace the inline deque BFS with iter_ancestors(exclude=visited)." ) assert "deque" not in src, ( "_collect_ancestor_snapshots still uses an inline deque. " "Replace with iter_ancestors." ) # --------------------------------------------------------------------------- # V2 Behavioural — ancestor snapshots collected, stopping at visited # --------------------------------------------------------------------------- def test_v2_collect_ancestor_snapshots_collects_and_stops( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, ) -> None: """Snapshot IDs from ancestors of the graft commit are added to verified_snapshots. Commits already in visited are not traversed. Chain: C1 → C2 → C3(graft) visited = {C1} — C1 is the main BFS boundary. Call _collect_ancestor_snapshots(root, C3, visited=visited, ...) Expected: C2's snapshot_id added (C1 is boundary, so C2 is the deepest ancestor collected). """ from muse.core.verify import _collect_ancestor_snapshots # type: ignore[attr-defined] root = _repo(tmp_path, monkeypatch) c1 = _make_commit(root, {}, message="c1") c2 = _make_commit(root, {}, c1.commit_id, message="c2") c3 = _make_commit(root, {}, c2.commit_id, message="c3") visited: set[str] = {c1.commit_id} verified_snapshots: set[str] = set() _collect_ancestor_snapshots( root, c3, visited=visited, verified_snapshots=verified_snapshots, ) assert c2.snapshot_id in verified_snapshots, ( "C2's snapshot must be collected — it is an unvisited ancestor of C3" ) assert c1.snapshot_id not in verified_snapshots, ( "C1's snapshot must NOT be collected — C1 is in visited (boundary)" ) # --------------------------------------------------------------------------- # M1 Structural — run_query uses iter_ancestors # --------------------------------------------------------------------------- def test_m1run_query_uses_iter_ancestors() -> None: """run_query must use iter_ancestors, not a while commit_id loop.""" from muse.plugins.midi import _midi_query as mq_mod src = inspect.getsource(mq_mod.run_query) assert "iter_ancestors" in src, ( "run_query must use iter_ancestors(first_parent_only=True). " "Replace the inline while commit_id loop." ) assert "while commit_id" not in src, ( "run_query still has an inline while commit_id loop. " "Replace with iter_ancestors." ) # --------------------------------------------------------------------------- # M2 Behavioural — walk stops at from_commit_id (exclusive) # --------------------------------------------------------------------------- def test_m2run_query_stops_at_from_commit( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, ) -> None: """run_query must not process commits at or before from_commit_id. We verify this indirectly: with a chain C1 → C2 → C3 and from_commit_id=C2, only C3 is walked (C2 is the exclusive boundary). We pass a query that always matches and count the commits processed by checking that commit messages from the walk are correct. """ from muse.plugins.midi._midi_query import run_query # type: ignore[attr-defined] root = _repo(tmp_path, monkeypatch) # No real MIDI files — the query will match nothing but the walk count is testable # via max_commits: set it to 1 and verify we only process C3. c1 = _make_commit(root, {}, message="c1") c2 = _make_commit(root, {}, c1.commit_id, message="c2") c3 = _make_commit(root, {}, c2.commit_id, message="c3") # The function should complete without error and stop at C2 (exclusive). # With no MIDI files, results will be empty — that's fine. The test just # ensures it doesn't raise and respects the from_commit_id boundary. results = run_query( "bar == 999", root, c3.commit_id, from_commit_id=c2.commit_id, max_commits=1_000, ) assert isinstance(results, list) # --------------------------------------------------------------------------- # M3 Behavioural — walk respects max_commits cap # --------------------------------------------------------------------------- def test_m3run_query_respects_max_commits( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, ) -> None: """run_query must not walk more than max_commits commits. Build a 5-commit chain and call with max_commits=2. The function must return without error (not walk the full chain). """ from muse.plugins.midi._midi_query import run_query # type: ignore[attr-defined] root = _repo(tmp_path, monkeypatch) parent_id: str | None = None for i in range(5): c = _make_commit(root, {}, parent_id, message=f"c{i}") parent_id = c.commit_id results = run_query( "bar == 999", root, parent_id, # type: ignore[arg-type] max_commits=2, ) assert isinstance(results, list)