gabriel / muse public
test_phase2b_missed_bfs_sites.py python
248 lines 8.9 KB
Raw
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49 docs: update store.py references to focused module paths Sonnet 4.6 28 days ago
1 """TDD — Phase 2 follow-up: two BFS sites missed in the original Phase 2 pass.
2
3 verify.py::_collect_ancestor_snapshots (line 248)
4 Pure BFS — collects snapshot IDs from ancestors of a shallow graft commit.
5 Stops at commits already in the main BFS ``visited`` set.
6 Replaced with ``iter_ancestors(root, starts, exclude=visited)``.
7
8 The main ``run_verify`` BFS (line 342) is a documented exception: it must
9 report missing commits as ``VerifyFailure`` entries rather than silently
10 skipping them, which ``iter_ancestors`` cannot do. This mirrors the
11 ``gc.py`` exception.
12
13 plugins/midi/_midi_query.py::run_query (line 465)
14 First-parent walk with ``from_commit_id`` stop and ``max_commits`` cap.
15 Replaced with ``iter_ancestors(first_parent_only=True, prune=...,
16 max_commits=...)``.
17
18 Coverage
19 --------
20 V1 Structural — ``_collect_ancestor_snapshots`` uses ``iter_ancestors``;
21 no inline ``deque`` BFS
22 V2 Behavioural — ancestor snapshots are collected, stopping at ``visited``
23 M1 Structural — ``run_query`` uses ``iter_ancestors``; no ``while
24 commit_id`` loop
25 M2 Behavioural — first-parent walk stops at ``from_commit_id`` (exclusive)
26 M3 Behavioural — walk respects ``max_commits`` cap
27 """
28 from __future__ import annotations
29
30 import datetime
31 import inspect
32 import json
33 import pathlib
34
35 import pytest
36
37 from muse._version import __version__
38 from muse.core.object_store import write_object
39 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
40 from muse.core.commits import (
41 CommitRecord,
42 write_commit,
43 )
44 from muse.core.snapshots import (
45 SnapshotRecord,
46 write_snapshot,
47 )
48 from muse.core.types import blob_id
49 from muse.core.paths import muse_dir
50
51
52 # ---------------------------------------------------------------------------
53 # Helpers
54 # ---------------------------------------------------------------------------
55
56 def _repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
57 dot_muse = muse_dir(tmp_path)
58 for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"):
59 (dot_muse / d).mkdir(parents=True, exist_ok=True)
60 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
61 (dot_muse / "repo.json").write_text(
62 json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"})
63 )
64 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
65 monkeypatch.chdir(tmp_path)
66 return tmp_path
67
68
69 def _make_commit(
70 root: pathlib.Path,
71 manifest: dict[str, str],
72 parent_id: str | None = None,
73 *,
74 message: str = "test",
75 ) -> CommitRecord:
76 oid = blob_id(b"data-" + message.encode())
77 write_object(root, oid, b"data-" + message.encode())
78 manifest = manifest or {"f.py": oid}
79 snap_id = compute_snapshot_id(manifest)
80 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
81 ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
82 cid = compute_commit_id(
83 parent_ids=[parent_id] if parent_id else [],
84 snapshot_id=snap_id,
85 message=message,
86 committed_at_iso=ts.isoformat(),
87 )
88 rec = CommitRecord(
89 commit_id=cid,
90 branch="main",
91 snapshot_id=snap_id,
92 message=message,
93 committed_at=ts,
94 parent_commit_id=parent_id,
95 )
96 write_commit(root, rec)
97 return rec
98
99
100 # ---------------------------------------------------------------------------
101 # V1 Structural — _collect_ancestor_snapshots uses iter_ancestors
102 # ---------------------------------------------------------------------------
103
104 def test_v1_collect_ancestor_snapshots_uses_iter_ancestors() -> None:
105 """_collect_ancestor_snapshots must use iter_ancestors, not an inline BFS."""
106 from muse.core import verify as verify_mod
107
108 src = inspect.getsource(verify_mod._collect_ancestor_snapshots) # type: ignore[attr-defined]
109
110 assert "iter_ancestors" in src, (
111 "_collect_ancestor_snapshots must delegate to iter_ancestors. "
112 "Replace the inline deque BFS with iter_ancestors(exclude=visited)."
113 )
114 assert "deque" not in src, (
115 "_collect_ancestor_snapshots still uses an inline deque. "
116 "Replace with iter_ancestors."
117 )
118
119
120 # ---------------------------------------------------------------------------
121 # V2 Behavioural — ancestor snapshots collected, stopping at visited
122 # ---------------------------------------------------------------------------
123
124 def test_v2_collect_ancestor_snapshots_collects_and_stops(
125 tmp_path: pathlib.Path,
126 monkeypatch: pytest.MonkeyPatch,
127 ) -> None:
128 """Snapshot IDs from ancestors of the graft commit are added to
129 verified_snapshots. Commits already in visited are not traversed.
130
131 Chain: C1 → C2 → C3(graft)
132
133 visited = {C1} — C1 is the main BFS boundary.
134 Call _collect_ancestor_snapshots(root, C3, visited=visited, ...)
135 Expected: C2's snapshot_id added (C1 is boundary, so C2 is the deepest
136 ancestor collected).
137 """
138 from muse.core.verify import _collect_ancestor_snapshots # type: ignore[attr-defined]
139
140 root = _repo(tmp_path, monkeypatch)
141
142 c1 = _make_commit(root, {}, message="c1")
143 c2 = _make_commit(root, {}, c1.commit_id, message="c2")
144 c3 = _make_commit(root, {}, c2.commit_id, message="c3")
145
146 visited: set[str] = {c1.commit_id}
147 verified_snapshots: set[str] = set()
148
149 _collect_ancestor_snapshots(
150 root, c3,
151 visited=visited,
152 verified_snapshots=verified_snapshots,
153 )
154
155 assert c2.snapshot_id in verified_snapshots, (
156 "C2's snapshot must be collected — it is an unvisited ancestor of C3"
157 )
158 assert c1.snapshot_id not in verified_snapshots, (
159 "C1's snapshot must NOT be collected — C1 is in visited (boundary)"
160 )
161
162
163 # ---------------------------------------------------------------------------
164 # M1 Structural — run_query uses iter_ancestors
165 # ---------------------------------------------------------------------------
166
167 def test_m1run_query_uses_iter_ancestors() -> None:
168 """run_query must use iter_ancestors, not a while commit_id loop."""
169 from muse.plugins.midi import _midi_query as mq_mod
170
171 src = inspect.getsource(mq_mod.run_query)
172
173 assert "iter_ancestors" in src, (
174 "run_query must use iter_ancestors(first_parent_only=True). "
175 "Replace the inline while commit_id loop."
176 )
177 assert "while commit_id" not in src, (
178 "run_query still has an inline while commit_id loop. "
179 "Replace with iter_ancestors."
180 )
181
182
183 # ---------------------------------------------------------------------------
184 # M2 Behavioural — walk stops at from_commit_id (exclusive)
185 # ---------------------------------------------------------------------------
186
187 def test_m2run_query_stops_at_from_commit(
188 tmp_path: pathlib.Path,
189 monkeypatch: pytest.MonkeyPatch,
190 ) -> None:
191 """run_query must not process commits at or before from_commit_id.
192
193 We verify this indirectly: with a chain C1 → C2 → C3 and
194 from_commit_id=C2, only C3 is walked (C2 is the exclusive boundary).
195 We pass a query that always matches and count the commits processed
196 by checking that commit messages from the walk are correct.
197 """
198 from muse.plugins.midi._midi_query import run_query # type: ignore[attr-defined]
199
200 root = _repo(tmp_path, monkeypatch)
201 # No real MIDI files — the query will match nothing but the walk count is testable
202 # via max_commits: set it to 1 and verify we only process C3.
203 c1 = _make_commit(root, {}, message="c1")
204 c2 = _make_commit(root, {}, c1.commit_id, message="c2")
205 c3 = _make_commit(root, {}, c2.commit_id, message="c3")
206
207 # The function should complete without error and stop at C2 (exclusive).
208 # With no MIDI files, results will be empty — that's fine. The test just
209 # ensures it doesn't raise and respects the from_commit_id boundary.
210 results = run_query(
211 "bar == 999",
212 root,
213 c3.commit_id,
214 from_commit_id=c2.commit_id,
215 max_commits=1_000,
216 )
217 assert isinstance(results, list)
218
219
220 # ---------------------------------------------------------------------------
221 # M3 Behavioural — walk respects max_commits cap
222 # ---------------------------------------------------------------------------
223
224 def test_m3run_query_respects_max_commits(
225 tmp_path: pathlib.Path,
226 monkeypatch: pytest.MonkeyPatch,
227 ) -> None:
228 """run_query must not walk more than max_commits commits.
229
230 Build a 5-commit chain and call with max_commits=2. The function must
231 return without error (not walk the full chain).
232 """
233 from muse.plugins.midi._midi_query import run_query # type: ignore[attr-defined]
234
235 root = _repo(tmp_path, monkeypatch)
236
237 parent_id: str | None = None
238 for i in range(5):
239 c = _make_commit(root, {}, parent_id, message=f"c{i}")
240 parent_id = c.commit_id
241
242 results = run_query(
243 "bar == 999",
244 root,
245 parent_id, # type: ignore[arg-type]
246 max_commits=2,
247 )
248 assert isinstance(results, list)
File History 2 commits
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49 docs: update store.py references to focused module paths Sonnet 4.6 28 days ago
sha256:b6cae4448122b2cc690d913be26f7e0a539f11855b8d288bd48be43eb532b5b2 refactor: migrate all source callers off muse.core.store re… Sonnet 4.6 minor 28 days ago