test_phase2b_missed_bfs_sites.py
python
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49
docs: update store.py references to focused module paths
Sonnet 4.6
28 days ago
| 1 | """TDD — Phase 2 follow-up: two BFS sites missed in the original Phase 2 pass. |
| 2 | |
| 3 | verify.py::_collect_ancestor_snapshots (line 248) |
| 4 | Pure BFS — collects snapshot IDs from ancestors of a shallow graft commit. |
| 5 | Stops at commits already in the main BFS ``visited`` set. |
| 6 | Replaced with ``iter_ancestors(root, starts, exclude=visited)``. |
| 7 | |
| 8 | The main ``run_verify`` BFS (line 342) is a documented exception: it must |
| 9 | report missing commits as ``VerifyFailure`` entries rather than silently |
| 10 | skipping them, which ``iter_ancestors`` cannot do. This mirrors the |
| 11 | ``gc.py`` exception. |
| 12 | |
| 13 | plugins/midi/_midi_query.py::run_query (line 465) |
| 14 | First-parent walk with ``from_commit_id`` stop and ``max_commits`` cap. |
| 15 | Replaced with ``iter_ancestors(first_parent_only=True, prune=..., |
| 16 | max_commits=...)``. |
| 17 | |
| 18 | Coverage |
| 19 | -------- |
| 20 | V1 Structural — ``_collect_ancestor_snapshots`` uses ``iter_ancestors``; |
| 21 | no inline ``deque`` BFS |
| 22 | V2 Behavioural — ancestor snapshots are collected, stopping at ``visited`` |
| 23 | M1 Structural — ``run_query`` uses ``iter_ancestors``; no ``while |
| 24 | commit_id`` loop |
| 25 | M2 Behavioural — first-parent walk stops at ``from_commit_id`` (exclusive) |
| 26 | M3 Behavioural — walk respects ``max_commits`` cap |
| 27 | """ |
| 28 | from __future__ import annotations |
| 29 | |
| 30 | import datetime |
| 31 | import inspect |
| 32 | import json |
| 33 | import pathlib |
| 34 | |
| 35 | import pytest |
| 36 | |
| 37 | from muse._version import __version__ |
| 38 | from muse.core.object_store import write_object |
| 39 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 40 | from muse.core.commits import ( |
| 41 | CommitRecord, |
| 42 | write_commit, |
| 43 | ) |
| 44 | from muse.core.snapshots import ( |
| 45 | SnapshotRecord, |
| 46 | write_snapshot, |
| 47 | ) |
| 48 | from muse.core.types import blob_id |
| 49 | from muse.core.paths import muse_dir |
| 50 | |
| 51 | |
| 52 | # --------------------------------------------------------------------------- |
| 53 | # Helpers |
| 54 | # --------------------------------------------------------------------------- |
| 55 | |
| 56 | def _repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: |
| 57 | dot_muse = muse_dir(tmp_path) |
| 58 | for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"): |
| 59 | (dot_muse / d).mkdir(parents=True, exist_ok=True) |
| 60 | (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 61 | (dot_muse / "repo.json").write_text( |
| 62 | json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"}) |
| 63 | ) |
| 64 | monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) |
| 65 | monkeypatch.chdir(tmp_path) |
| 66 | return tmp_path |
| 67 | |
| 68 | |
| 69 | def _make_commit( |
| 70 | root: pathlib.Path, |
| 71 | manifest: dict[str, str], |
| 72 | parent_id: str | None = None, |
| 73 | *, |
| 74 | message: str = "test", |
| 75 | ) -> CommitRecord: |
| 76 | oid = blob_id(b"data-" + message.encode()) |
| 77 | write_object(root, oid, b"data-" + message.encode()) |
| 78 | manifest = manifest or {"f.py": oid} |
| 79 | snap_id = compute_snapshot_id(manifest) |
| 80 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) |
| 81 | ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 82 | cid = compute_commit_id( |
| 83 | parent_ids=[parent_id] if parent_id else [], |
| 84 | snapshot_id=snap_id, |
| 85 | message=message, |
| 86 | committed_at_iso=ts.isoformat(), |
| 87 | ) |
| 88 | rec = CommitRecord( |
| 89 | commit_id=cid, |
| 90 | branch="main", |
| 91 | snapshot_id=snap_id, |
| 92 | message=message, |
| 93 | committed_at=ts, |
| 94 | parent_commit_id=parent_id, |
| 95 | ) |
| 96 | write_commit(root, rec) |
| 97 | return rec |
| 98 | |
| 99 | |
| 100 | # --------------------------------------------------------------------------- |
| 101 | # V1 Structural — _collect_ancestor_snapshots uses iter_ancestors |
| 102 | # --------------------------------------------------------------------------- |
| 103 | |
| 104 | def test_v1_collect_ancestor_snapshots_uses_iter_ancestors() -> None: |
| 105 | """_collect_ancestor_snapshots must use iter_ancestors, not an inline BFS.""" |
| 106 | from muse.core import verify as verify_mod |
| 107 | |
| 108 | src = inspect.getsource(verify_mod._collect_ancestor_snapshots) # type: ignore[attr-defined] |
| 109 | |
| 110 | assert "iter_ancestors" in src, ( |
| 111 | "_collect_ancestor_snapshots must delegate to iter_ancestors. " |
| 112 | "Replace the inline deque BFS with iter_ancestors(exclude=visited)." |
| 113 | ) |
| 114 | assert "deque" not in src, ( |
| 115 | "_collect_ancestor_snapshots still uses an inline deque. " |
| 116 | "Replace with iter_ancestors." |
| 117 | ) |
| 118 | |
| 119 | |
| 120 | # --------------------------------------------------------------------------- |
| 121 | # V2 Behavioural — ancestor snapshots collected, stopping at visited |
| 122 | # --------------------------------------------------------------------------- |
| 123 | |
| 124 | def test_v2_collect_ancestor_snapshots_collects_and_stops( |
| 125 | tmp_path: pathlib.Path, |
| 126 | monkeypatch: pytest.MonkeyPatch, |
| 127 | ) -> None: |
| 128 | """Snapshot IDs from ancestors of the graft commit are added to |
| 129 | verified_snapshots. Commits already in visited are not traversed. |
| 130 | |
| 131 | Chain: C1 → C2 → C3(graft) |
| 132 | |
| 133 | visited = {C1} — C1 is the main BFS boundary. |
| 134 | Call _collect_ancestor_snapshots(root, C3, visited=visited, ...) |
| 135 | Expected: C2's snapshot_id added (C1 is boundary, so C2 is the deepest |
| 136 | ancestor collected). |
| 137 | """ |
| 138 | from muse.core.verify import _collect_ancestor_snapshots # type: ignore[attr-defined] |
| 139 | |
| 140 | root = _repo(tmp_path, monkeypatch) |
| 141 | |
| 142 | c1 = _make_commit(root, {}, message="c1") |
| 143 | c2 = _make_commit(root, {}, c1.commit_id, message="c2") |
| 144 | c3 = _make_commit(root, {}, c2.commit_id, message="c3") |
| 145 | |
| 146 | visited: set[str] = {c1.commit_id} |
| 147 | verified_snapshots: set[str] = set() |
| 148 | |
| 149 | _collect_ancestor_snapshots( |
| 150 | root, c3, |
| 151 | visited=visited, |
| 152 | verified_snapshots=verified_snapshots, |
| 153 | ) |
| 154 | |
| 155 | assert c2.snapshot_id in verified_snapshots, ( |
| 156 | "C2's snapshot must be collected — it is an unvisited ancestor of C3" |
| 157 | ) |
| 158 | assert c1.snapshot_id not in verified_snapshots, ( |
| 159 | "C1's snapshot must NOT be collected — C1 is in visited (boundary)" |
| 160 | ) |
| 161 | |
| 162 | |
| 163 | # --------------------------------------------------------------------------- |
| 164 | # M1 Structural — run_query uses iter_ancestors |
| 165 | # --------------------------------------------------------------------------- |
| 166 | |
| 167 | def test_m1run_query_uses_iter_ancestors() -> None: |
| 168 | """run_query must use iter_ancestors, not a while commit_id loop.""" |
| 169 | from muse.plugins.midi import _midi_query as mq_mod |
| 170 | |
| 171 | src = inspect.getsource(mq_mod.run_query) |
| 172 | |
| 173 | assert "iter_ancestors" in src, ( |
| 174 | "run_query must use iter_ancestors(first_parent_only=True). " |
| 175 | "Replace the inline while commit_id loop." |
| 176 | ) |
| 177 | assert "while commit_id" not in src, ( |
| 178 | "run_query still has an inline while commit_id loop. " |
| 179 | "Replace with iter_ancestors." |
| 180 | ) |
| 181 | |
| 182 | |
| 183 | # --------------------------------------------------------------------------- |
| 184 | # M2 Behavioural — walk stops at from_commit_id (exclusive) |
| 185 | # --------------------------------------------------------------------------- |
| 186 | |
| 187 | def test_m2run_query_stops_at_from_commit( |
| 188 | tmp_path: pathlib.Path, |
| 189 | monkeypatch: pytest.MonkeyPatch, |
| 190 | ) -> None: |
| 191 | """run_query must not process commits at or before from_commit_id. |
| 192 | |
| 193 | We verify this indirectly: with a chain C1 → C2 → C3 and |
| 194 | from_commit_id=C2, only C3 is walked (C2 is the exclusive boundary). |
| 195 | We pass a query that always matches and count the commits processed |
| 196 | by checking that commit messages from the walk are correct. |
| 197 | """ |
| 198 | from muse.plugins.midi._midi_query import run_query # type: ignore[attr-defined] |
| 199 | |
| 200 | root = _repo(tmp_path, monkeypatch) |
| 201 | # No real MIDI files — the query will match nothing but the walk count is testable |
| 202 | # via max_commits: set it to 1 and verify we only process C3. |
| 203 | c1 = _make_commit(root, {}, message="c1") |
| 204 | c2 = _make_commit(root, {}, c1.commit_id, message="c2") |
| 205 | c3 = _make_commit(root, {}, c2.commit_id, message="c3") |
| 206 | |
| 207 | # The function should complete without error and stop at C2 (exclusive). |
| 208 | # With no MIDI files, results will be empty — that's fine. The test just |
| 209 | # ensures it doesn't raise and respects the from_commit_id boundary. |
| 210 | results = run_query( |
| 211 | "bar == 999", |
| 212 | root, |
| 213 | c3.commit_id, |
| 214 | from_commit_id=c2.commit_id, |
| 215 | max_commits=1_000, |
| 216 | ) |
| 217 | assert isinstance(results, list) |
| 218 | |
| 219 | |
| 220 | # --------------------------------------------------------------------------- |
| 221 | # M3 Behavioural — walk respects max_commits cap |
| 222 | # --------------------------------------------------------------------------- |
| 223 | |
| 224 | def test_m3run_query_respects_max_commits( |
| 225 | tmp_path: pathlib.Path, |
| 226 | monkeypatch: pytest.MonkeyPatch, |
| 227 | ) -> None: |
| 228 | """run_query must not walk more than max_commits commits. |
| 229 | |
| 230 | Build a 5-commit chain and call with max_commits=2. The function must |
| 231 | return without error (not walk the full chain). |
| 232 | """ |
| 233 | from muse.plugins.midi._midi_query import run_query # type: ignore[attr-defined] |
| 234 | |
| 235 | root = _repo(tmp_path, monkeypatch) |
| 236 | |
| 237 | parent_id: str | None = None |
| 238 | for i in range(5): |
| 239 | c = _make_commit(root, {}, parent_id, message=f"c{i}") |
| 240 | parent_id = c.commit_id |
| 241 | |
| 242 | results = run_query( |
| 243 | "bar == 999", |
| 244 | root, |
| 245 | parent_id, # type: ignore[arg-type] |
| 246 | max_commits=2, |
| 247 | ) |
| 248 | assert isinstance(results, list) |
File History
2 commits
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49
docs: update store.py references to focused module paths
Sonnet 4.6
28 days ago
sha256:b6cae4448122b2cc690d913be26f7e0a539f11855b8d288bd48be43eb532b5b2
refactor: migrate all source callers off muse.core.store re…
Sonnet 4.6
minor
⚠
28 days ago