gabriel / muse public
test_perf_merge_scale.py python
763 lines 32.7 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Phase 3.6 — ``muse merge`` at scale.
2
3 Targets
4 -------
5 - ``muse merge --dry-run <branch>`` on two branches each with 5 000 modified
6 files from a 75 000-file base: < 30 s.
7 - ``detect_conflicts`` on 1 000 files modified by both branches: < 5 s
8 (actual measurement: < 1 ms — target is trivially exceeded; the test
9 documents the asymptotic behaviour).
10 - ``find_merge_base`` on 50-commit-deep chains completes correctly;
11 > ``max_ancestors`` cap raises ``MuseCLIError`` cleanly.
12
13 Additional coverage discovered during reconnaissance
14 ----------------------------------------------------
15 - Convergent both-delete: NOT a conflict, absent from merged manifest.
16 - Convergent same-add same-hash: NOT a conflict, present in merged manifest.
17 - Delete-vs-modify: IS a conflict.
18 - Criss-cross DAG: ``find_merge_base`` returns a valid (though non-unique) LCA.
19 - Disjoint histories (no common ancestor): returns ``None``.
20 - Fast-forward merge semantics at 75 000-file scale.
21 - Memory: three 75 000-file manifests stay under 64 MB peak.
22 - Snapshot I/O round-trip at 75 000 files: write < 500 ms, read < 500 ms.
23 - Full three-way pipeline (diff → detect → apply) at 75 000 files: < 5 s.
24 """
25
26 from __future__ import annotations
27 from collections.abc import Mapping
28
29 import datetime
30 import hashlib
31 import pathlib
32 import shutil
33 import tempfile
34 import time
35 import tracemalloc
36
37 import pytest
38
39 from muse.core.merge_engine import (
40 apply_merge,
41 detect_conflicts,
42 diff_snapshots,
43 find_merge_base,
44 )
45 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
46 from muse.core.object_store import object_path
47 from muse.core.commits import (
48 CommitRecord,
49 write_commit,
50 )
51 from muse.core.snapshots import (
52 SnapshotRecord,
53 write_snapshot,
54 )
55 from muse.core.types import Manifest, blob_id
56 from muse.core.paths import muse_dir
57
58 # ---------------------------------------------------------------------------
59 # Helpers
60 # ---------------------------------------------------------------------------
61
62 _NOW = datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc)
63 _REPO_ID = "bench"
64
65
66 def _s256(data: bytes) -> str:
67 return blob_id(data)
68
69
70 def _fresh_repo(path: pathlib.Path, max_ancestors: int = 200_000) -> pathlib.Path:
71 dot_muse = muse_dir(path)
72 dot_muse.mkdir(parents=True)
73 (dot_muse / "repo.json").write_text('{"repo_id":"bench","owner":"bench"}')
74 (dot_muse / "config.toml").write_text(f"[limits]\nmax_ancestors = {max_ancestors}\n")
75 for d in ("commits", "snapshots", "objects"):
76 (dot_muse / d).mkdir()
77 (dot_muse / "refs" / "heads").mkdir(parents=True)
78 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
79 return path
80
81
82 def _make_commit(
83 root: pathlib.Path,
84 parent_id: str | None,
85 snap_id: str,
86 msg: str,
87 offset_secs: int = 0,
88 ) -> str:
89 parent_ids = [parent_id] if parent_id else []
90 ts = _NOW + datetime.timedelta(seconds=offset_secs)
91 cid = compute_commit_id(
92 parent_ids=parent_ids,
93 snapshot_id=snap_id,
94 message=msg,
95 committed_at_iso=ts.isoformat(),
96 author="bench",
97 )
98 write_commit(
99 root,
100 CommitRecord(
101 commit_id=cid,
102 branch="br",
103 message=msg,
104 author="bench",
105 committed_at=ts,
106 parent_commit_id=parent_id,
107 parent2_commit_id=None,
108 snapshot_id=snap_id,
109 metadata={},
110 sem_ver_bump="PATCH",
111 ),
112 )
113 return cid
114
115
116 def _write_chain(root: pathlib.Path, depth: int, prefix: str, parent_id: str) -> str:
117 """Write a linear chain of *depth* commits on top of *parent_id*. Returns tip."""
118 snap = SnapshotRecord(
119 snapshot_id=compute_snapshot_id({}), manifest={}, created_at=_NOW
120 )
121 write_snapshot(root, snap)
122 prev = parent_id
123 for i in range(depth):
124 prev = _make_commit(root, prev, snap.snapshot_id, f"{prefix}-{i}", i)
125 return prev
126
127
128 def _build_manifests(
129 n_base: int,
130 n_ours: int,
131 n_theirs: int,
132 n_conflict: int,
133 ) -> tuple[Mapping[str, str], Mapping[str, str], Mapping[str, str]]:
134 """Return (base, ours, theirs) manifests for a scale scenario."""
135 base = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(n_base)}
136 ours = dict(base)
137 theirs = dict(base)
138 for i in range(n_ours):
139 ours[f"f{i:06d}.py"] = _s256(b"ours" + bytes([i % 256] * 60))
140 for i in range(n_ours, n_ours + n_theirs):
141 theirs[f"f{i:06d}.py"] = _s256(b"theirs" + bytes([i % 256] * 60))
142 conflict_start = n_ours + n_theirs
143 for i in range(conflict_start, conflict_start + n_conflict):
144 ours[f"f{i:06d}.py"] = _s256(b"ours_c" + bytes([i % 256] * 56))
145 theirs[f"f{i:06d}.py"] = _s256(b"theirs_c" + bytes([i % 256] * 56))
146 return base, ours, theirs
147
148
149 # ---------------------------------------------------------------------------
150 # TestDiffSnapshotsAtScale — the pure set-arithmetic hot path
151 # ---------------------------------------------------------------------------
152
153
154 class TestDiffSnapshotsAtScale:
155 """diff_snapshots is pure Python set arithmetic — verifies correctness at 75k."""
156
157 def test_diff_empty_manifests(self) -> None:
158 assert diff_snapshots({}, {}) == set()
159
160 def test_diff_no_changes(self) -> None:
161 m = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(1000)}
162 assert diff_snapshots(m, m) == set()
163
164 def test_diff_all_added(self) -> None:
165 other = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(500)}
166 result = diff_snapshots({}, other)
167 assert result == set(other)
168
169 def test_diff_all_deleted(self) -> None:
170 base = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(500)}
171 assert diff_snapshots(base, {}) == set(base)
172
173 def test_diff_partial_modify(self) -> None:
174 base = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(1000)}
175 other = dict(base)
176 for i in range(0, 100):
177 other[f"f{i}.py"] = _s256(b"new" + bytes([i % 256]))
178 result = diff_snapshots(base, other)
179 assert len(result) == 100
180 assert all(f"f{i}.py" in result for i in range(100))
181
182 def test_diff_symmetry_add_delete(self) -> None:
183 """diff_snapshots(a, b) == diff_snapshots(b, a) for add/delete (same paths, different meaning)."""
184 base = {"x.py": "h1"}
185 other = {"y.py": "h2"}
186 assert diff_snapshots(base, other) == diff_snapshots(other, base)
187
188 def test_diff_75k_under_500ms(self) -> None:
189 """diff_snapshots on 75k-file manifests completes in < 500 ms."""
190 base, ours, _ = _build_manifests(75_000, 5_000, 0, 0)
191 t0 = time.perf_counter()
192 result = diff_snapshots(base, ours)
193 duration_ms = (time.perf_counter() - t0) * 1000
194 assert len(result) == 5_000
195 assert duration_ms < 500, f"diff_snapshots took {duration_ms:.1f}ms (limit: 500ms)"
196
197
198 # ---------------------------------------------------------------------------
199 # TestDetectConflictsSemantics — correctness of the new convergence semantics
200 # ---------------------------------------------------------------------------
201
202
203 class TestDetectConflictsSemantics:
204 """Verifies the fixed convergence semantics of detect_conflicts."""
205
206 def test_disjoint_changes_no_conflict(self) -> None:
207 ours_m = {"a.py": "h_a"}
208 theirs_m = {"b.py": "h_b"}
209 assert detect_conflicts({"a.py"}, {"b.py"}, ours_m, theirs_m) == set()
210
211 def test_divergent_change_is_conflict(self) -> None:
212 ours_m = {"x.py": "hash_ours"}
213 theirs_m = {"x.py": "hash_theirs"}
214 assert detect_conflicts({"x.py"}, {"x.py"}, ours_m, theirs_m) == {"x.py"}
215
216 def test_both_delete_is_convergent(self) -> None:
217 """Both branches deleted the same file — convergent, NOT a conflict."""
218 assert detect_conflicts({"gone.py"}, {"gone.py"}, {}, {}) == set()
219
220 def test_both_delete_absent_from_merged(self) -> None:
221 """Both-delete: convergent → apply_merge correctly omits the file."""
222 base = {"gone.py": "h_old", "keep.py": "h_k"}
223 ours = {"keep.py": "h_k"}
224 theirs = {"keep.py": "h_k"}
225 oc = diff_snapshots(base, ours)
226 tc = diff_snapshots(base, theirs)
227 conflicts = detect_conflicts(oc, tc, ours, theirs)
228 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
229 assert "gone.py" not in merged, "Both-delete must remove file from merged"
230 assert "keep.py" in merged
231
232 def test_same_add_same_hash_convergent(self) -> None:
233 """Both branches independently added the same file with identical content."""
234 ours_m = {"new.py": "hash42"}
235 theirs_m = {"new.py": "hash42"}
236 assert detect_conflicts({"new.py"}, {"new.py"}, ours_m, theirs_m) == set()
237
238 def test_same_add_present_in_merged(self) -> None:
239 """Same-add convergent → apply_merge includes the file at the agreed hash."""
240 base: Manifest = {}
241 h = _s256(b"shared-content")
242 ours = {"new.py": h}
243 theirs = {"new.py": h}
244 oc = diff_snapshots(base, ours)
245 tc = diff_snapshots(base, theirs)
246 conflicts = detect_conflicts(oc, tc, ours, theirs)
247 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
248 assert "new.py" not in conflicts
249 assert merged.get("new.py") == h
250
251 def test_delete_vs_modify_is_conflict(self) -> None:
252 """One side deleted, other modified — genuinely divergent."""
253 ours_m: Manifest = {}
254 theirs_m = {"a.py": "hash_new"}
255 assert detect_conflicts({"a.py"}, {"a.py"}, ours_m, theirs_m) == {"a.py"}
256
257 def test_same_add_different_hash_is_conflict(self) -> None:
258 """Both added same path with DIFFERENT content — real conflict."""
259 ours_m = {"new.py": "h_v1"}
260 theirs_m = {"new.py": "h_v2"}
261 assert detect_conflicts({"new.py"}, {"new.py"}, ours_m, theirs_m) == {"new.py"}
262
263 def test_commutativity(self) -> None:
264 """detect_conflicts(a, b, ma, mb) == detect_conflicts(b, a, mb, ma)."""
265 ours_m = {f"f{i}.py": f"h_ours_{i}" for i in range(50)}
266 theirs_m = {f"f{i}.py": f"h_theirs_{i}" for i in range(30, 80)}
267 oc = set(ours_m)
268 tc = set(theirs_m)
269 assert detect_conflicts(oc, tc, ours_m, theirs_m) == detect_conflicts(
270 tc, oc, theirs_m, ours_m
271 )
272
273 def test_detect_conflicts_1k_under_5s(self) -> None:
274 """1 000 conflicting paths detected in well under 5 s (actually < 1 ms)."""
275 paths = {f"conflict-{i:04d}.py" for i in range(1_000)}
276 ours_m = {p: f"ours-{p}" for p in paths}
277 theirs_m = {p: f"theirs-{p}" for p in paths}
278 t0 = time.perf_counter()
279 result = detect_conflicts(paths, paths, ours_m, theirs_m)
280 duration_ms = (time.perf_counter() - t0) * 1000
281 assert result == paths
282 assert duration_ms < 5_000, f"detect_conflicts 1k took {duration_ms:.1f}ms (limit: 5000ms)"
283
284 def test_detect_conflicts_75k_under_500ms(self) -> None:
285 """detect_conflicts on 75k-path intersection completes in < 500 ms."""
286 n = 75_000
287 paths = {f"f{i:06d}.py" for i in range(n)}
288 ours_m = {p: f"ours-{p}" for p in paths}
289 theirs_m = {p: f"theirs-{p}" for p in paths}
290 t0 = time.perf_counter()
291 result = detect_conflicts(paths, paths, ours_m, theirs_m)
292 duration_ms = (time.perf_counter() - t0) * 1000
293 assert len(result) == n
294 assert duration_ms < 500, f"detect_conflicts 75k took {duration_ms:.1f}ms (limit: 500ms)"
295
296
297 # ---------------------------------------------------------------------------
298 # TestApplyMergeAtScale — correctness and timing of apply_merge
299 # ---------------------------------------------------------------------------
300
301
302 class TestApplyMergeAtScale:
303 """apply_merge correctness and performance at 75k files."""
304
305 def test_apply_no_changes(self) -> None:
306 base = {"a.py": "h1", "b.py": "h2"}
307 merged = apply_merge(base, base, base, set(), set(), set())
308 assert merged == base
309
310 def test_apply_ours_only_addition(self) -> None:
311 base: Manifest = {}
312 ours = {"new.py": "h_new"}
313 merged = apply_merge(base, ours, {}, {"new.py"}, set(), set())
314 assert merged["new.py"] == "h_new"
315
316 def test_apply_theirs_only_deletion(self) -> None:
317 base = {"del.py": "h_old", "keep.py": "h_k"}
318 ours = dict(base)
319 theirs = {"keep.py": "h_k"}
320 merged = apply_merge(base, ours, theirs, set(), {"del.py"}, set())
321 assert "del.py" not in merged
322 assert "keep.py" in merged
323
324 def test_apply_conflict_stays_at_base(self) -> None:
325 base = {"c.py": "h_base"}
326 ours = {"c.py": "h_ours"}
327 theirs = {"c.py": "h_theirs"}
328 merged = apply_merge(base, ours, theirs, {"c.py"}, {"c.py"}, {"c.py"})
329 assert merged["c.py"] == "h_base"
330
331 def test_apply_all_conflict_returns_base(self) -> None:
332 """When every path is a conflict, apply_merge returns a copy of base."""
333 base = {f"f{i}.py": f"h{i}" for i in range(100)}
334 ours = {p: f"ours-{v}" for p, v in base.items()}
335 theirs = {p: f"theirs-{v}" for p, v in base.items()}
336 oc = set(base)
337 tc = set(base)
338 merged = apply_merge(base, ours, theirs, oc, tc, oc)
339 assert merged == base
340
341 def test_apply_75k_5k_5k_under_500ms(self) -> None:
342 """apply_merge on 75k files with 5k ours + 5k theirs changes: < 500 ms."""
343 base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 0)
344 oc = diff_snapshots(base, ours)
345 tc = diff_snapshots(base, theirs)
346 conflicts = detect_conflicts(oc, tc, ours, theirs)
347 t0 = time.perf_counter()
348 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
349 duration_ms = (time.perf_counter() - t0) * 1000
350 assert len(conflicts) == 0
351 assert len(merged) == 75_000
352 assert duration_ms < 500, f"apply_merge 75k took {duration_ms:.1f}ms (limit: 500ms)"
353
354 def test_apply_75k_with_1k_conflicts_under_500ms(self) -> None:
355 """apply_merge with 1k conflict paths at 75k scale: < 500 ms."""
356 base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000)
357 oc = diff_snapshots(base, ours)
358 tc = diff_snapshots(base, theirs)
359 conflicts = detect_conflicts(oc, tc, ours, theirs)
360 t0 = time.perf_counter()
361 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
362 duration_ms = (time.perf_counter() - t0) * 1000
363 assert len(conflicts) == 1_000
364 assert duration_ms < 500, f"apply_merge 75k+conflicts took {duration_ms:.1f}ms (limit: 500ms)"
365
366
367 # ---------------------------------------------------------------------------
368 # TestFullMergePipeline — diff → detect → apply at scale
369 # ---------------------------------------------------------------------------
370
371
372 class TestFullMergePipeline:
373 """End-to-end pure-function pipeline timing and correctness."""
374
375 def test_pipeline_correctness_small(self) -> None:
376 base = {"a.py": "base_a", "b.py": "base_b", "c.py": "base_c"}
377 # ours: modifies a.py, leaves b.py and c.py unchanged
378 ours = {"a.py": "ours_a", "b.py": "base_b", "c.py": "base_c"}
379 # theirs: leaves a.py unchanged, modifies b.py, leaves c.py, adds d.py
380 theirs = {"a.py": "base_a", "b.py": "theirs_b", "c.py": "base_c", "d.py": "new_d"}
381 oc = diff_snapshots(base, ours)
382 tc = diff_snapshots(base, theirs)
383 conflicts = detect_conflicts(oc, tc, ours, theirs)
384 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
385 assert merged["a.py"] == "ours_a" # ours-only modification
386 assert merged["b.py"] == "theirs_b" # theirs-only modification
387 assert merged["d.py"] == "new_d" # theirs-only addition
388 assert merged["c.py"] == "base_c" # untouched by both
389
390 def test_pipeline_both_delete_resolved(self) -> None:
391 """Pipeline: both-delete produces no conflict and absent file in merged."""
392 base = {"rm.py": "old", "keep.py": "k"}
393 ours = {"keep.py": "k"}
394 theirs = {"keep.py": "k"}
395 oc = diff_snapshots(base, ours)
396 tc = diff_snapshots(base, theirs)
397 conflicts = detect_conflicts(oc, tc, ours, theirs)
398 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
399 assert "rm.py" not in conflicts
400 assert "rm.py" not in merged
401
402 def test_pipeline_same_add_resolved(self) -> None:
403 """Pipeline: same-add same-hash produces no conflict and file in merged."""
404 base: Manifest = {}
405 h = _s256(b"shared")
406 ours = {"new.py": h}
407 theirs = {"new.py": h}
408 oc = diff_snapshots(base, ours)
409 tc = diff_snapshots(base, theirs)
410 conflicts = detect_conflicts(oc, tc, ours, theirs)
411 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
412 assert "new.py" not in conflicts
413 assert merged.get("new.py") == h
414
415 def test_pipeline_75k_5k_5k_under_5s(self) -> None:
416 """Full pipeline (diff×2 + detect + apply) at 75k files, 5k+5k changes: < 5 s."""
417 base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 0)
418 t0 = time.perf_counter()
419 oc = diff_snapshots(base, ours)
420 tc = diff_snapshots(base, theirs)
421 conflicts = detect_conflicts(oc, tc, ours, theirs)
422 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
423 elapsed = time.perf_counter() - t0
424 assert len(conflicts) == 0
425 assert len(merged) == 75_000
426 assert elapsed < 5, f"Full pipeline 75k took {elapsed:.2f}s (limit: 5s)"
427
428 def test_pipeline_75k_with_1k_conflicts_under_5s(self) -> None:
429 """Full pipeline with 1k conflict paths at 75k scale: < 5 s."""
430 base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000)
431 t0 = time.perf_counter()
432 oc = diff_snapshots(base, ours)
433 tc = diff_snapshots(base, theirs)
434 conflicts = detect_conflicts(oc, tc, ours, theirs)
435 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
436 elapsed = time.perf_counter() - t0
437 assert len(conflicts) == 1_000
438 assert elapsed < 5, f"Full pipeline 75k+conflicts took {elapsed:.2f}s (limit: 5s)"
439
440 def test_pipeline_memory_3_manifests_under_64mb(self) -> None:
441 """Three 75k-file manifests (base + ours + theirs) peak < 64 MB."""
442 tracemalloc.start()
443 base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000)
444 _, peak = tracemalloc.get_traced_memory()
445 tracemalloc.stop()
446 peak_mb = peak / 1024 / 1024
447 assert peak_mb < 64, f"3 manifests at 75k peak {peak_mb:.1f}MB (limit: 64MB)"
448
449
450 # ---------------------------------------------------------------------------
451 # TestSnapshotIOAtScale — read_snapshot / write_snapshot at 75k files
452 # ---------------------------------------------------------------------------
453
454
455 class TestSnapshotIOAtScale:
456 """Snapshot serialisation/deserialisation timing at 75k-file scale."""
457
458 def test_write_snapshot_75k_under_500ms(self, tmp_path: pathlib.Path) -> None:
459 root = _fresh_repo(tmp_path)
460 manifest = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(75_000)}
461 snap_id = compute_snapshot_id(manifest)
462 snap = SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW)
463 t0 = time.perf_counter()
464 write_snapshot(root, snap)
465 duration_ms = (time.perf_counter() - t0) * 1000
466 assert duration_ms < 500, f"write_snapshot 75k took {duration_ms:.1f}ms (limit: 500ms)"
467
468 def test_read_snapshot_75k_under_500ms(self, tmp_path: pathlib.Path) -> None:
469 from muse.core.snapshots import read_snapshot
470
471 root = _fresh_repo(tmp_path)
472 manifest = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(75_000)}
473 snap_id = compute_snapshot_id(manifest)
474 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW))
475 t0 = time.perf_counter()
476 loaded = read_snapshot(root, snap_id)
477 duration_ms = (time.perf_counter() - t0) * 1000
478 assert loaded is not None
479 assert len(loaded.manifest) == 75_000
480 assert duration_ms < 500, f"read_snapshot 75k took {duration_ms:.1f}ms (limit: 500ms)"
481
482 def test_snapshot_roundtrip_integrity(self, tmp_path: pathlib.Path) -> None:
483 """write + read produces bit-identical manifest."""
484 from muse.core.snapshots import read_snapshot
485
486 root = _fresh_repo(tmp_path)
487 manifest = {f"f{i:04d}.py": _s256(bytes([i % 256])) for i in range(10_000)}
488 snap_id = compute_snapshot_id(manifest)
489 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW))
490 loaded = read_snapshot(root, snap_id)
491 assert loaded is not None
492 assert loaded.manifest == manifest
493
494
495 # ---------------------------------------------------------------------------
496 # TestFindMergeBaseCorrectness — DAG edge cases
497 # ---------------------------------------------------------------------------
498
499
500 class TestFindMergeBaseCorrectness:
501 """find_merge_base correctness across edge cases."""
502
503 def _repo_with_base(
504 self, tmp_path: pathlib.Path, max_ancestors: int = 200_000
505 ) -> tuple[pathlib.Path, str, str]:
506 """Return (root, base_commit_id, snap_id)."""
507 root = _fresh_repo(tmp_path, max_ancestors=max_ancestors)
508 snap_id = compute_snapshot_id({})
509 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW))
510 base_id = _make_commit(root, None, snap_id, "base", 0)
511 return root, base_id, snap_id
512
513 def test_lca_simple_diverging_chains(self, tmp_path: pathlib.Path) -> None:
514 root, base_id, snap_id = self._repo_with_base(tmp_path)
515 tip_a = _write_chain(root, 10, "a", base_id)
516 tip_b = _write_chain(root, 10, "b", base_id)
517 result = find_merge_base(root, tip_a, tip_b)
518 assert result == base_id
519
520 def test_lca_identical_tips(self, tmp_path: pathlib.Path) -> None:
521 """find_merge_base(tip, tip) == tip."""
522 root, base_id, snap_id = self._repo_with_base(tmp_path)
523 tip = _write_chain(root, 5, "a", base_id)
524 assert find_merge_base(root, tip, tip) == tip
525
526 def test_lca_a_is_ancestor_of_b(self, tmp_path: pathlib.Path) -> None:
527 """When a is a direct ancestor of b, LCA is a."""
528 root, base_id, snap_id = self._repo_with_base(tmp_path)
529 mid = _write_chain(root, 3, "chain", base_id)
530 tip = _write_chain(root, 3, "ext", mid)
531 result = find_merge_base(root, mid, tip)
532 assert result == mid
533
534 def test_lca_disjoint_histories_returns_none(self, tmp_path: pathlib.Path) -> None:
535 root = _fresh_repo(tmp_path)
536 snap_id = compute_snapshot_id({})
537 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW))
538 a = _make_commit(root, None, snap_id, "a", 0)
539 b = _make_commit(root, None, snap_id, "b", 1)
540 assert find_merge_base(root, a, b) is None
541
542 def test_lca_criss_cross_dag_valid_lca(self, tmp_path: pathlib.Path) -> None:
543 """Criss-cross DAG (merge_1 has parents a+b, merge_2 has parents b+a).
544 Both a and b are valid LCAs; BFS must return one of them.
545 """
546 root, base_id, snap_id = self._repo_with_base(tmp_path)
547 a = _make_commit(root, base_id, snap_id, "a", 1)
548 b = _make_commit(root, base_id, snap_id, "b", 2)
549
550 # m1: a merges b
551 ts = _NOW + datetime.timedelta(seconds=3)
552 m1_id = compute_commit_id( parent_ids=[a, b],
553 snapshot_id=snap_id,
554 message="merge1",
555 committed_at_iso=ts.isoformat(),
556 author="b",)
557 write_commit(
558 root,
559 CommitRecord(
560 commit_id=m1_id, branch="a",
561 message="merge1", author="b", committed_at=ts,
562 parent_commit_id=a, parent2_commit_id=b,
563 snapshot_id=snap_id, metadata={}, sem_ver_bump="PATCH",
564 ),
565 )
566
567 # m2: b merges a
568 ts2 = _NOW + datetime.timedelta(seconds=4)
569 m2_id = compute_commit_id( parent_ids=[b, a],
570 snapshot_id=snap_id,
571 message="merge2",
572 committed_at_iso=ts2.isoformat(),
573 author="b",)
574 write_commit(
575 root,
576 CommitRecord(
577 commit_id=m2_id, branch="b",
578 message="merge2", author="b", committed_at=ts2,
579 parent_commit_id=b, parent2_commit_id=a,
580 snapshot_id=snap_id, metadata={}, sem_ver_bump="PATCH",
581 ),
582 )
583
584 result = find_merge_base(root, m1_id, m2_id)
585 assert result in {a, b}, f"Expected a or b as LCA, got {result}"
586
587 def test_lca_cap_raises_clean_error(self, tmp_path: pathlib.Path) -> None:
588 """Ancestor graph > max_ancestors raises MuseCLIError, not a silent None."""
589 from muse.core.errors import MuseCLIError
590
591 root = _fresh_repo(tmp_path, max_ancestors=20)
592 snap_id = compute_snapshot_id({})
593 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW))
594 # 25-long chains share no ancestor — A side will hit cap
595 prev_a: str | None = None
596 for i in range(25):
597 prev_a = _make_commit(root, prev_a, snap_id, f"a-{i}", i)
598 prev_b: str | None = None
599 for i in range(25):
600 prev_b = _make_commit(root, prev_b, snap_id, f"b-{i}", i + 25)
601 assert prev_a is not None
602 assert prev_b is not None
603 with pytest.raises(MuseCLIError, match="Ancestor graph exceeds"):
604 find_merge_base(root, prev_a, prev_b)
605
606 def test_lca_missing_commit_handled(self, tmp_path: pathlib.Path) -> None:
607 """BFS continues gracefully when a commit file is missing (None from read_commit)."""
608 root, base_id, snap_id = self._repo_with_base(tmp_path)
609 # Chain a: base → real commit
610 tip_a = _write_chain(root, 5, "a", base_id)
611 # Chain b: points to a non-existent commit ID (ghost)
612 ghost_id = _s256(b"ghost-commit-does-not-exist")
613 # Stub the ghost so write_commit's parent-existence guard passes;
614 # find_merge_base will encounter the unreadable stub and stop.
615 import json as _json
616 ghost_payload = _json.dumps({"commit_id": ghost_id}, separators=(",", ":")).encode()
617 _p = object_path(root, ghost_id)
618 _p.parent.mkdir(parents=True, exist_ok=True)
619 _p.write_bytes(f"commit {len(ghost_payload)}\0".encode() + ghost_payload)
620 # wrap the ghost in a real commit so find_merge_base walks into it
621 ts = _NOW + datetime.timedelta(seconds=100)
622 wrap_id = compute_commit_id( parent_ids=[ghost_id],
623 snapshot_id=snap_id,
624 message="wrap",
625 committed_at_iso=ts.isoformat(),
626 author="b",)
627 write_commit(
628 root,
629 CommitRecord(
630 commit_id=wrap_id, branch="b",
631 message="wrap", author="b", committed_at=ts,
632 parent_commit_id=ghost_id, parent2_commit_id=None,
633 snapshot_id=snap_id, metadata={}, sem_ver_bump="PATCH",
634 ),
635 )
636 # Should not raise — returns None because the ghost branch has no real ancestors
637 result = find_merge_base(root, tip_a, wrap_id)
638 assert result is None # ghost side has no ancestors in common with a
639
640
641 # ---------------------------------------------------------------------------
642 # TestFindMergeBasePerformance — timing at real-world commit depths
643 # ---------------------------------------------------------------------------
644
645
646 class TestFindMergeBasePerformance:
647 """find_merge_base timing. Each test builds fresh chains in a temp dir."""
648
649 @pytest.mark.parametrize("depth", [100, 500])
650 def test_find_merge_base_depth_under_2s(
651 self, depth: int, tmp_path: pathlib.Path
652 ) -> None:
653 """find_merge_base on two diverging {depth}-commit chains: < 2 s."""
654 root = _fresh_repo(tmp_path)
655 snap_id = compute_snapshot_id({})
656 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW))
657 base_id = _make_commit(root, None, snap_id, "base", 0)
658 tip_a = _write_chain(root, depth, "a", base_id)
659 tip_b = _write_chain(root, depth, "b", base_id)
660 t0 = time.perf_counter()
661 result = find_merge_base(root, tip_a, tip_b)
662 elapsed = time.perf_counter() - t0
663 assert result == base_id, f"Wrong LCA at depth={depth}"
664 assert elapsed < 2, f"find_merge_base depth={depth} took {elapsed:.2f}s (limit: 2s)"
665
666 @pytest.mark.slow
667 def test_find_merge_base_1k_depth_under_5s(self, tmp_path: pathlib.Path) -> None:
668 """find_merge_base on 1k-commit diverging chains: < 5 s."""
669 root = _fresh_repo(tmp_path)
670 snap_id = compute_snapshot_id({})
671 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW))
672 base_id = _make_commit(root, None, snap_id, "base", 0)
673 tip_a = _write_chain(root, 1_000, "a", base_id)
674 tip_b = _write_chain(root, 1_000, "b", base_id)
675 t0 = time.perf_counter()
676 result = find_merge_base(root, tip_a, tip_b)
677 elapsed = time.perf_counter() - t0
678 assert result == base_id
679 assert elapsed < 5, f"find_merge_base 1k took {elapsed:.2f}s (limit: 5s)"
680
681 @pytest.mark.slow
682 def test_find_merge_base_5k_depth_under_30s(self, tmp_path: pathlib.Path) -> None:
683 """find_merge_base on 5k-commit diverging chains: < 30 s (the merge target)."""
684 root = _fresh_repo(tmp_path)
685 snap_id = compute_snapshot_id({})
686 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW))
687 base_id = _make_commit(root, None, snap_id, "base", 0)
688 tip_a = _write_chain(root, 5_000, "a", base_id)
689 tip_b = _write_chain(root, 5_000, "b", base_id)
690 t0 = time.perf_counter()
691 result = find_merge_base(root, tip_a, tip_b)
692 elapsed = time.perf_counter() - t0
693 assert result == base_id
694 assert elapsed < 30, f"find_merge_base 5k took {elapsed:.2f}s (limit: 30s)"
695
696
697 # ---------------------------------------------------------------------------
698 # TestFullMergeScaleSlow — the 30-second overall target (@slow)
699 # ---------------------------------------------------------------------------
700
701
702 @pytest.mark.slow
703 class TestFullMergeScaleSlow:
704 """End-to-end scale targets that require @slow to keep CI fast."""
705
706 def test_full_pipeline_75k_5k_5k_1k_conflicts_under_30s(self) -> None:
707 """Full pipeline: 75k files, 5k ours, 5k theirs, 1k conflicts — < 30 s."""
708 base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000)
709 t0 = time.perf_counter()
710 oc = diff_snapshots(base, ours)
711 tc = diff_snapshots(base, theirs)
712 conflicts = detect_conflicts(oc, tc, ours, theirs)
713 merged = apply_merge(base, ours, theirs, oc, tc, conflicts)
714 elapsed = time.perf_counter() - t0
715 assert len(conflicts) == 1_000
716 assert len(merged) == 75_000
717 assert elapsed < 30, f"Full pipeline 75k+1k conflicts took {elapsed:.2f}s (limit: 30s)"
718
719 def test_snapshot_io_75k_three_reads_plus_write_under_30s(
720 self, tmp_path: pathlib.Path
721 ) -> None:
722 """3 reads + 1 write of a 75k-file snapshot (realistic merge I/O) < 30 s."""
723 from muse.core.snapshots import read_snapshot
724
725 root = _fresh_repo(tmp_path)
726 manifest = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(75_000)}
727 snap_id = compute_snapshot_id(manifest)
728 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW))
729
730 t0 = time.perf_counter()
731 for _ in range(3):
732 snap = read_snapshot(root, snap_id)
733 assert snap is not None and len(snap.manifest) == 75_000
734 # write the merged result
735 merged_snap_id = compute_snapshot_id(manifest)
736 write_snapshot(root, SnapshotRecord(snapshot_id=merged_snap_id, manifest=manifest, created_at=_NOW))
737 elapsed = time.perf_counter() - t0
738 assert elapsed < 30, f"3 reads + 1 write 75k took {elapsed:.2f}s (limit: 30s)"
739
740 def test_write_merge_state_1k_conflicts_roundtrip(
741 self, tmp_path: pathlib.Path
742 ) -> None:
743 """write_merge_state + read_merge_state with 1 000 conflict paths round-trips cleanly."""
744 from muse.core.merge_engine import read_merge_state, write_merge_state
745
746 root = _fresh_repo(tmp_path)
747 conflict_paths = [f"conflict/f{i:04d}.py" for i in range(1_000)]
748
749 write_merge_state(
750 root,
751 base_commit="a" * 64,
752 ours_commit="b" * 64,
753 theirs_commit="c" * 64,
754 conflict_paths=conflict_paths,
755 other_branch="feat/big-change",
756 )
757
758 state = read_merge_state(root)
759 assert state is not None
760 assert len(state.conflict_paths) == 1_000
761 assert state.other_branch == "feat/big-change"
762 # Paths must round-trip correctly
763 assert sorted(state.conflict_paths) == sorted(conflict_paths)
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago