gabriel / muse public
test_phase2_bfs_migration.py python
454 lines 17.7 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago
1 """TDD Phase 2 — migrate inline BFS/DFS implementations to walk_dag / iter_ancestors.
2
3 Structural tests FAIL now (inline patterns still present); PASS after migration.
4 Behavioral tests PASS before and after (pin the public contract).
5
6 Target sites
7 ------------
8 bisect._ancestors — DFS list[str] oldest-first
9 bisect._reachable_from_good — DFS set[str]
10 branch._commit_ancestors — DFS set[str]
11 pack.build_mpack BFS block — BFS with have-set exclusion
12 pack.collect_blob_ids BFS block — BFS with have-set exclusion
13 _query.walk_commits_bfs — BFS with stop_at_commit_id
14 midi/_query.walk_commits_for_track — linear first-parent walk
15 """
16
17 from __future__ import annotations
18
19 import datetime
20 import inspect
21 import json
22 import pathlib
23 import types
24
25 import pytest
26
27 from muse.core.commits import (
28 CommitRecord,
29 write_commit,
30 )
31 from muse.core.snapshots import (
32 SnapshotRecord,
33 write_snapshot,
34 )
35 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
36 from muse.core.paths import muse_dir
37
38 _BASE_DT = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
39
40
41 # ---------------------------------------------------------------------------
42 # Shared helpers (same pattern as test_core_graph.py)
43 # ---------------------------------------------------------------------------
44
45 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
46 dot_muse = muse_dir(tmp_path)
47 for d in ("objects", "commits", "snapshots", "refs/heads"):
48 (dot_muse / d).mkdir(parents=True, exist_ok=True)
49 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
50 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
51 return tmp_path
52
53
54 def _write_commit(
55 repo: pathlib.Path,
56 message: str = "commit",
57 parent: str | None = None,
58 parent2: str | None = None,
59 author: str = "Author",
60 dt: datetime.datetime = _BASE_DT,
61 ) -> str:
62 snap_id = compute_snapshot_id({})
63 write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest={}))
64 parent_ids = [p for p in (parent, parent2) if p is not None]
65 commit_id = compute_commit_id(
66 parent_ids=parent_ids,
67 snapshot_id=snap_id,
68 message=message,
69 committed_at_iso=dt.isoformat(),
70 author=author,
71 )
72 write_commit(
73 repo,
74 CommitRecord(
75 commit_id=commit_id,
76 branch="main",
77 snapshot_id=snap_id,
78 message=message,
79 committed_at=dt,
80 parent_commit_id=parent,
81 author=author,
82 parent2_commit_id=parent2,
83 ),
84 )
85 return commit_id
86
87
88 def _no_inline_bfs(source: str) -> bool:
89 """True when source has no standalone BFS queue patterns."""
90 return "while queue:" not in source and "queue.popleft()" not in source and "queue.pop()" not in source
91
92
93 # ---------------------------------------------------------------------------
94 # bisect._ancestors
95 # ---------------------------------------------------------------------------
96
97 class TestBisectAncestors:
98 def _fn(self) -> types.FunctionType:
99 from muse.core import bisect
100 return bisect._ancestors
101
102 def test_structural_no_inline_bfs(self) -> None:
103 """_ancestors must not use an inline BFS/DFS queue after migration."""
104 src = inspect.getsource(self._fn())
105 assert _no_inline_bfs(src), (
106 "bisect._ancestors still uses an inline queue — migrate to ancestor_ids"
107 )
108
109 def test_linear_chain_all_ancestors_returned(self, tmp_path: pathlib.Path) -> None:
110 repo = _make_repo(tmp_path)
111 root = _write_commit(repo, "root")
112 mid = _write_commit(repo, "mid", parent=root)
113 tip = _write_commit(repo, "tip", parent=mid)
114 result = self._fn()(repo, tip)
115 assert set(result) == {root, mid, tip}
116
117 def test_returns_oldest_first(self, tmp_path: pathlib.Path) -> None:
118 repo = _make_repo(tmp_path)
119 root = _write_commit(repo, "root")
120 mid = _write_commit(repo, "mid", parent=root)
121 tip = _write_commit(repo, "tip", parent=mid)
122 result = self._fn()(repo, tip)
123 # oldest-first: root then mid then tip
124 assert result.index(root) < result.index(mid) < result.index(tip)
125
126 def test_single_commit(self, tmp_path: pathlib.Path) -> None:
127 repo = _make_repo(tmp_path)
128 cid = _write_commit(repo, "solo")
129 assert self._fn()(repo, cid) == [cid]
130
131 def test_diamond_no_duplicates(self, tmp_path: pathlib.Path) -> None:
132 repo = _make_repo(tmp_path)
133 d = _write_commit(repo, "D")
134 b = _write_commit(repo, "B", parent=d)
135 c = _write_commit(repo, "C", parent=d)
136 a = _write_commit(repo, "A", parent=b, parent2=c)
137 result = self._fn()(repo, a)
138 assert result.count(d) == 1
139 assert set(result) == {a, b, c, d}
140
141
142 # ---------------------------------------------------------------------------
143 # bisect._reachable_from_good
144 # ---------------------------------------------------------------------------
145
146 class TestBisectReachableFromGood:
147 def _fn(self) -> types.FunctionType:
148 from muse.core import bisect
149 return bisect._reachable_from_good
150
151 def test_structural_no_inline_bfs(self) -> None:
152 """_reachable_from_good must not use an inline BFS/DFS queue."""
153 src = inspect.getsource(self._fn())
154 assert _no_inline_bfs(src), (
155 "bisect._reachable_from_good still uses an inline queue — migrate to ancestor_ids"
156 )
157
158 def test_single_good_includes_ancestors(self, tmp_path: pathlib.Path) -> None:
159 repo = _make_repo(tmp_path)
160 root = _write_commit(repo, "root")
161 mid = _write_commit(repo, "mid", parent=root)
162 tip = _write_commit(repo, "tip", parent=mid)
163 result = self._fn()(repo, [tip])
164 assert result == {tip, mid, root}
165
166 def test_multi_source_union(self, tmp_path: pathlib.Path) -> None:
167 repo = _make_repo(tmp_path)
168 shared = _write_commit(repo, "shared")
169 a = _write_commit(repo, "a", parent=shared)
170 b = _write_commit(repo, "b", parent=shared)
171 result = self._fn()(repo, [a, b])
172 assert result == {a, b, shared}
173
174 def test_shared_ancestor_once(self, tmp_path: pathlib.Path) -> None:
175 repo = _make_repo(tmp_path)
176 shared = _write_commit(repo, "shared")
177 a = _write_commit(repo, "a", parent=shared)
178 b = _write_commit(repo, "b", parent=shared)
179 result = self._fn()(repo, [a, b])
180 assert shared in result
181
182 def test_empty_good_ids(self, tmp_path: pathlib.Path) -> None:
183 repo = _make_repo(tmp_path)
184 result = self._fn()(repo, [])
185 assert result == set()
186
187
188 # ---------------------------------------------------------------------------
189 # branch._commit_ancestors
190 # ---------------------------------------------------------------------------
191
192 class TestBranchCommitAncestors:
193 def _fn(self) -> types.FunctionType:
194 from muse.cli.commands import branch as branch_module
195 return branch_module._commit_ancestors
196
197 def test_structural_no_inline_bfs(self) -> None:
198 """_commit_ancestors must not use an inline BFS/DFS queue."""
199 src = inspect.getsource(self._fn())
200 assert _no_inline_bfs(src), (
201 "branch._commit_ancestors still uses an inline queue — migrate to ancestor_ids"
202 )
203
204 def test_returns_set_including_self(self, tmp_path: pathlib.Path) -> None:
205 repo = _make_repo(tmp_path)
206 root = _write_commit(repo, "root")
207 tip = _write_commit(repo, "tip", parent=root)
208 result = self._fn()(repo, tip)
209 assert tip in result
210 assert root in result
211 assert isinstance(result, set)
212
213 def test_diamond_no_duplicates(self, tmp_path: pathlib.Path) -> None:
214 repo = _make_repo(tmp_path)
215 d = _write_commit(repo, "D")
216 b = _write_commit(repo, "B", parent=d)
217 c = _write_commit(repo, "C", parent=d)
218 a = _write_commit(repo, "A", parent=b, parent2=c)
219 result = self._fn()(repo, a)
220 assert result == {a, b, c, d}
221
222 def test_single_commit(self, tmp_path: pathlib.Path) -> None:
223 repo = _make_repo(tmp_path)
224 cid = _write_commit(repo, "solo")
225 assert self._fn()(repo, cid) == {cid}
226
227
228 # ---------------------------------------------------------------------------
229 # pack.build_mpack BFS block
230 # ---------------------------------------------------------------------------
231
232 class TestPackBuildMpackBFS:
233 def test_structural_no_inline_bfs(self) -> None:
234 """build_mpack must not contain its own inline BFS queue."""
235 from muse.core import mpack as pack_module
236 src = inspect.getsource(pack_module.build_mpack)
237 assert _no_inline_bfs(src), (
238 "pack.build_mpack still has an inline BFS queue — migrate to iter_ancestors"
239 )
240
241 def test_have_set_excludes_old_commits(self, tmp_path: pathlib.Path) -> None:
242 """build_mpack with have=[old] must not include old commit in mpack."""
243 from muse.core.mpack import build_mpack
244 from muse.core.object_store import write_object
245 from muse.core.types import blob_id
246
247 repo = _make_repo(tmp_path)
248 old_content = b"old-object"
249 old_oid = blob_id(old_content)
250 write_object(repo, old_oid, old_content)
251
252 old_snap_id = compute_snapshot_id({"f.txt": old_oid})
253 write_snapshot(repo, SnapshotRecord(snapshot_id=old_snap_id, manifest={"f.txt": old_oid}))
254
255 old_cid = compute_commit_id(
256 parent_ids=[],
257 snapshot_id=old_snap_id,
258 message="old",
259 committed_at_iso=_BASE_DT.isoformat(),
260 author="A",
261 )
262 write_commit(repo, CommitRecord(
263 commit_id=old_cid, branch="main",
264 snapshot_id=old_snap_id, message="old",
265 committed_at=_BASE_DT, author="A",
266 ))
267
268 new_content = b"new-object"
269 new_oid = blob_id(new_content)
270 write_object(repo, new_oid, new_content)
271
272 new_snap_id = compute_snapshot_id({"g.txt": new_oid})
273 write_snapshot(repo, SnapshotRecord(snapshot_id=new_snap_id, manifest={"g.txt": new_oid}))
274
275 new_cid = compute_commit_id(
276 parent_ids=[old_cid],
277 snapshot_id=new_snap_id,
278 message="new",
279 committed_at_iso=_BASE_DT.isoformat(),
280 author="A",
281 )
282 write_commit(repo, CommitRecord(
283 commit_id=new_cid, branch="main",
284 snapshot_id=new_snap_id, message="new",
285 committed_at=_BASE_DT, parent_commit_id=old_cid, author="A",
286 ))
287
288 mpack = build_mpack(repo, [new_cid], have=[old_cid])
289 sent_ids = {c["commit_id"] for c in mpack["commits"]}
290 assert new_cid in sent_ids
291 assert old_cid not in sent_ids
292
293
294 # ---------------------------------------------------------------------------
295 # pack.collect_blob_ids BFS block
296 # ---------------------------------------------------------------------------
297
298 class TestPackCollectObjectIds:
299 def test_structural_no_inline_bfs(self) -> None:
300 """collect_blob_ids must not contain its own inline BFS queue."""
301 from muse.core import mpack as pack_module
302 src = inspect.getsource(pack_module.collect_blob_ids)
303 assert _no_inline_bfs(src), (
304 "pack.collect_blob_ids still has an inline BFS queue — migrate to iter_ancestors"
305 )
306
307 def test_have_excludes_known_objects(self, tmp_path: pathlib.Path) -> None:
308 """Objects reachable only via have-commits are excluded from result."""
309 from muse.core.mpack import collect_blob_ids
310 from muse.core.object_store import write_object
311 from muse.core.types import blob_id
312
313 repo = _make_repo(tmp_path)
314
315 old_content = b"old"
316 old_oid = blob_id(old_content)
317 write_object(repo, old_oid, old_content)
318 old_snap_id = compute_snapshot_id({"a.txt": old_oid})
319 write_snapshot(repo, SnapshotRecord(snapshot_id=old_snap_id, manifest={"a.txt": old_oid}))
320 old_cid = compute_commit_id(
321 parent_ids=[], snapshot_id=old_snap_id,
322 message="old", committed_at_iso=_BASE_DT.isoformat(), author="A",
323 )
324 write_commit(repo, CommitRecord(
325 commit_id=old_cid, branch="main",
326 snapshot_id=old_snap_id, message="old",
327 committed_at=_BASE_DT, author="A",
328 ))
329
330 new_content = b"new"
331 new_oid = blob_id(new_content)
332 write_object(repo, new_oid, new_content)
333 new_snap_id = compute_snapshot_id({"b.txt": new_oid})
334 write_snapshot(repo, SnapshotRecord(snapshot_id=new_snap_id, manifest={"b.txt": new_oid}))
335 new_cid = compute_commit_id(
336 parent_ids=[old_cid], snapshot_id=new_snap_id,
337 message="new", committed_at_iso=_BASE_DT.isoformat(), author="A",
338 )
339 write_commit(repo, CommitRecord(
340 commit_id=new_cid, branch="main",
341 snapshot_id=new_snap_id, message="new",
342 committed_at=_BASE_DT, parent_commit_id=old_cid, author="A",
343 ))
344
345 result = collect_blob_ids(repo, [new_cid], have=[old_cid])
346 assert new_oid in result
347 assert old_oid not in result
348
349
350 # ---------------------------------------------------------------------------
351 # code._query.walk_commits_bfs
352 # ---------------------------------------------------------------------------
353
354 class TestWalkCommitsBfs:
355 def _fn(self) -> types.FunctionType:
356 from muse.plugins.code import _query
357 return _query.walk_commits_bfs
358
359 def test_structural_no_inline_bfs(self) -> None:
360 """walk_commits_bfs must not use an inline BFS queue after migration."""
361 src = inspect.getsource(self._fn())
362 assert _no_inline_bfs(src), (
363 "code._query.walk_commits_bfs still has an inline queue — migrate to iter_ancestors"
364 )
365
366 def test_returns_all_commits_no_stop(self, tmp_path: pathlib.Path) -> None:
367 repo = _make_repo(tmp_path)
368 root = _write_commit(repo, "root")
369 mid = _write_commit(repo, "mid", parent=root)
370 tip = _write_commit(repo, "tip", parent=mid)
371 commits, truncated = self._fn()(repo, tip)
372 assert {c.commit_id for c in commits} == {root, mid, tip}
373 assert truncated is False
374
375 def test_stop_at_commit_id_excludes_boundary(self, tmp_path: pathlib.Path) -> None:
376 repo = _make_repo(tmp_path)
377 root = _write_commit(repo, "root")
378 mid = _write_commit(repo, "mid", parent=root)
379 tip = _write_commit(repo, "tip", parent=mid)
380 commits, truncated = self._fn()(repo, tip, stop_at_commit_id=mid)
381 ids = {c.commit_id for c in commits}
382 assert tip in ids
383 assert mid not in ids
384 assert root not in ids
385
386 def test_max_commits_returns_truncated_true(self, tmp_path: pathlib.Path) -> None:
387 repo = _make_repo(tmp_path)
388 prev = None
389 for i in range(5):
390 prev = _write_commit(repo, f"c{i}", parent=prev)
391 tip = prev
392 commits, truncated = self._fn()(repo, tip, max_commits=3)
393 assert len(commits) <= 3
394 assert truncated is True
395
396 def test_merge_commit_visits_both_parents(self, tmp_path: pathlib.Path) -> None:
397 repo = _make_repo(tmp_path)
398 root = _write_commit(repo, "root")
399 left = _write_commit(repo, "left", parent=root)
400 right = _write_commit(repo, "right", parent=root)
401 merge = _write_commit(repo, "merge", parent=left, parent2=right)
402 commits, _ = self._fn()(repo, merge)
403 ids = {c.commit_id for c in commits}
404 assert ids == {merge, left, right, root}
405
406
407 # ---------------------------------------------------------------------------
408 # midi._query.walk_commits_for_track (first-parent walk)
409 # ---------------------------------------------------------------------------
410
411 class TestWalkCommitsForTrack:
412 def _fn(self) -> types.FunctionType:
413 from muse.plugins.midi import _query
414 return _query.walk_commits_for_track
415
416 def test_structural_no_inline_while(self) -> None:
417 """walk_commits_for_track must not use an inline while-loop walk."""
418 src = inspect.getsource(self._fn())
419 assert "while current_id" not in src and "while commit_id" not in src, (
420 "midi._query.walk_commits_for_track still uses an inline while-loop — "
421 "migrate to iter_ancestors(first_parent_only=True)"
422 )
423
424 def test_linear_chain_all_returned(self, tmp_path: pathlib.Path) -> None:
425 repo = _make_repo(tmp_path)
426 root = _write_commit(repo, "root")
427 mid = _write_commit(repo, "mid", parent=root)
428 tip = _write_commit(repo, "tip", parent=mid)
429 result = self._fn()(repo, tip, track_path="song.mid")
430 commit_ids = {r[0].commit_id for r in result}
431 assert commit_ids == {root, mid, tip}
432
433 def test_first_parent_only_skips_second_parent(self, tmp_path: pathlib.Path) -> None:
434 """walk_commits_for_track must follow only first-parent at merge commits."""
435 repo = _make_repo(tmp_path)
436 root = _write_commit(repo, "root")
437 branch = _write_commit(repo, "branch", parent=root)
438 main_tip = _write_commit(repo, "main_tip", parent=root)
439 merge = _write_commit(repo, "merge", parent=main_tip, parent2=branch)
440 result = self._fn()(repo, merge, track_path="song.mid")
441 commit_ids = {r[0].commit_id for r in result}
442 assert merge in commit_ids
443 assert main_tip in commit_ids
444 assert root in commit_ids
445 assert branch not in commit_ids # second-parent branch skipped
446
447 def test_max_commits_cap(self, tmp_path: pathlib.Path) -> None:
448 repo = _make_repo(tmp_path)
449 prev = None
450 for i in range(10):
451 prev = _write_commit(repo, f"c{i}", parent=prev)
452 tip = prev
453 result = self._fn()(repo, tip, track_path="song.mid", max_commits=3)
454 assert len(result) <= 3
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago