test_perf_merge_scale.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago
| 1 | """Phase 3.6 — ``muse merge`` at scale. |
| 2 | |
| 3 | Targets |
| 4 | ------- |
| 5 | - ``muse merge --dry-run <branch>`` on two branches each with 5 000 modified |
| 6 | files from a 75 000-file base: < 30 s. |
| 7 | - ``detect_conflicts`` on 1 000 files modified by both branches: < 5 s |
| 8 | (actual measurement: < 1 ms — target is trivially exceeded; the test |
| 9 | documents the asymptotic behaviour). |
| 10 | - ``find_merge_base`` on 50-commit-deep chains completes correctly; |
| 11 | > ``max_ancestors`` cap raises ``MuseCLIError`` cleanly. |
| 12 | |
| 13 | Additional coverage discovered during reconnaissance |
| 14 | ---------------------------------------------------- |
| 15 | - Convergent both-delete: NOT a conflict, absent from merged manifest. |
| 16 | - Convergent same-add same-hash: NOT a conflict, present in merged manifest. |
| 17 | - Delete-vs-modify: IS a conflict. |
| 18 | - Criss-cross DAG: ``find_merge_base`` returns a valid (though non-unique) LCA. |
| 19 | - Disjoint histories (no common ancestor): returns ``None``. |
| 20 | - Fast-forward merge semantics at 75 000-file scale. |
| 21 | - Memory: three 75 000-file manifests stay under 64 MB peak. |
| 22 | - Snapshot I/O round-trip at 75 000 files: write < 500 ms, read < 500 ms. |
| 23 | - Full three-way pipeline (diff → detect → apply) at 75 000 files: < 5 s. |
| 24 | """ |
| 25 | |
| 26 | from __future__ import annotations |
| 27 | from collections.abc import Mapping |
| 28 | |
| 29 | import datetime |
| 30 | import hashlib |
| 31 | import pathlib |
| 32 | import shutil |
| 33 | import tempfile |
| 34 | import time |
| 35 | import tracemalloc |
| 36 | |
| 37 | import pytest |
| 38 | |
| 39 | from muse.core.merge_engine import ( |
| 40 | apply_merge, |
| 41 | detect_conflicts, |
| 42 | diff_snapshots, |
| 43 | find_merge_base, |
| 44 | ) |
| 45 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 46 | from muse.core.object_store import object_path |
| 47 | from muse.core.commits import ( |
| 48 | CommitRecord, |
| 49 | write_commit, |
| 50 | ) |
| 51 | from muse.core.snapshots import ( |
| 52 | SnapshotRecord, |
| 53 | write_snapshot, |
| 54 | ) |
| 55 | from muse.core.types import Manifest, blob_id |
| 56 | from muse.core.paths import muse_dir |
| 57 | |
| 58 | # --------------------------------------------------------------------------- |
| 59 | # Helpers |
| 60 | # --------------------------------------------------------------------------- |
| 61 | |
| 62 | _NOW = datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc) |
| 63 | _REPO_ID = "bench" |
| 64 | |
| 65 | |
| 66 | def _s256(data: bytes) -> str: |
| 67 | return blob_id(data) |
| 68 | |
| 69 | |
| 70 | def _fresh_repo(path: pathlib.Path, max_ancestors: int = 200_000) -> pathlib.Path: |
| 71 | dot_muse = muse_dir(path) |
| 72 | dot_muse.mkdir(parents=True) |
| 73 | (dot_muse / "repo.json").write_text('{"repo_id":"bench","owner":"bench"}') |
| 74 | (dot_muse / "config.toml").write_text(f"[limits]\nmax_ancestors = {max_ancestors}\n") |
| 75 | for d in ("commits", "snapshots", "objects"): |
| 76 | (dot_muse / d).mkdir() |
| 77 | (dot_muse / "refs" / "heads").mkdir(parents=True) |
| 78 | (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 79 | return path |
| 80 | |
| 81 | |
| 82 | def _make_commit( |
| 83 | root: pathlib.Path, |
| 84 | parent_id: str | None, |
| 85 | snap_id: str, |
| 86 | msg: str, |
| 87 | offset_secs: int = 0, |
| 88 | ) -> str: |
| 89 | parent_ids = [parent_id] if parent_id else [] |
| 90 | ts = _NOW + datetime.timedelta(seconds=offset_secs) |
| 91 | cid = compute_commit_id( |
| 92 | parent_ids=parent_ids, |
| 93 | snapshot_id=snap_id, |
| 94 | message=msg, |
| 95 | committed_at_iso=ts.isoformat(), |
| 96 | author="bench", |
| 97 | ) |
| 98 | write_commit( |
| 99 | root, |
| 100 | CommitRecord( |
| 101 | commit_id=cid, |
| 102 | branch="br", |
| 103 | message=msg, |
| 104 | author="bench", |
| 105 | committed_at=ts, |
| 106 | parent_commit_id=parent_id, |
| 107 | parent2_commit_id=None, |
| 108 | snapshot_id=snap_id, |
| 109 | metadata={}, |
| 110 | sem_ver_bump="PATCH", |
| 111 | ), |
| 112 | ) |
| 113 | return cid |
| 114 | |
| 115 | |
| 116 | def _write_chain(root: pathlib.Path, depth: int, prefix: str, parent_id: str) -> str: |
| 117 | """Write a linear chain of *depth* commits on top of *parent_id*. Returns tip.""" |
| 118 | snap = SnapshotRecord( |
| 119 | snapshot_id=compute_snapshot_id({}), manifest={}, created_at=_NOW |
| 120 | ) |
| 121 | write_snapshot(root, snap) |
| 122 | prev = parent_id |
| 123 | for i in range(depth): |
| 124 | prev = _make_commit(root, prev, snap.snapshot_id, f"{prefix}-{i}", i) |
| 125 | return prev |
| 126 | |
| 127 | |
| 128 | def _build_manifests( |
| 129 | n_base: int, |
| 130 | n_ours: int, |
| 131 | n_theirs: int, |
| 132 | n_conflict: int, |
| 133 | ) -> tuple[Mapping[str, str], Mapping[str, str], Mapping[str, str]]: |
| 134 | """Return (base, ours, theirs) manifests for a scale scenario.""" |
| 135 | base = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(n_base)} |
| 136 | ours = dict(base) |
| 137 | theirs = dict(base) |
| 138 | for i in range(n_ours): |
| 139 | ours[f"f{i:06d}.py"] = _s256(b"ours" + bytes([i % 256] * 60)) |
| 140 | for i in range(n_ours, n_ours + n_theirs): |
| 141 | theirs[f"f{i:06d}.py"] = _s256(b"theirs" + bytes([i % 256] * 60)) |
| 142 | conflict_start = n_ours + n_theirs |
| 143 | for i in range(conflict_start, conflict_start + n_conflict): |
| 144 | ours[f"f{i:06d}.py"] = _s256(b"ours_c" + bytes([i % 256] * 56)) |
| 145 | theirs[f"f{i:06d}.py"] = _s256(b"theirs_c" + bytes([i % 256] * 56)) |
| 146 | return base, ours, theirs |
| 147 | |
| 148 | |
| 149 | # --------------------------------------------------------------------------- |
| 150 | # TestDiffSnapshotsAtScale — the pure set-arithmetic hot path |
| 151 | # --------------------------------------------------------------------------- |
| 152 | |
| 153 | |
| 154 | class TestDiffSnapshotsAtScale: |
| 155 | """diff_snapshots is pure Python set arithmetic — verifies correctness at 75k.""" |
| 156 | |
| 157 | def test_diff_empty_manifests(self) -> None: |
| 158 | assert diff_snapshots({}, {}) == set() |
| 159 | |
| 160 | def test_diff_no_changes(self) -> None: |
| 161 | m = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(1000)} |
| 162 | assert diff_snapshots(m, m) == set() |
| 163 | |
| 164 | def test_diff_all_added(self) -> None: |
| 165 | other = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(500)} |
| 166 | result = diff_snapshots({}, other) |
| 167 | assert result == set(other) |
| 168 | |
| 169 | def test_diff_all_deleted(self) -> None: |
| 170 | base = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(500)} |
| 171 | assert diff_snapshots(base, {}) == set(base) |
| 172 | |
| 173 | def test_diff_partial_modify(self) -> None: |
| 174 | base = {f"f{i}.py": _s256(bytes([i % 256])) for i in range(1000)} |
| 175 | other = dict(base) |
| 176 | for i in range(0, 100): |
| 177 | other[f"f{i}.py"] = _s256(b"new" + bytes([i % 256])) |
| 178 | result = diff_snapshots(base, other) |
| 179 | assert len(result) == 100 |
| 180 | assert all(f"f{i}.py" in result for i in range(100)) |
| 181 | |
| 182 | def test_diff_symmetry_add_delete(self) -> None: |
| 183 | """diff_snapshots(a, b) == diff_snapshots(b, a) for add/delete (same paths, different meaning).""" |
| 184 | base = {"x.py": "h1"} |
| 185 | other = {"y.py": "h2"} |
| 186 | assert diff_snapshots(base, other) == diff_snapshots(other, base) |
| 187 | |
| 188 | def test_diff_75k_under_500ms(self) -> None: |
| 189 | """diff_snapshots on 75k-file manifests completes in < 500 ms.""" |
| 190 | base, ours, _ = _build_manifests(75_000, 5_000, 0, 0) |
| 191 | t0 = time.perf_counter() |
| 192 | result = diff_snapshots(base, ours) |
| 193 | duration_ms = (time.perf_counter() - t0) * 1000 |
| 194 | assert len(result) == 5_000 |
| 195 | assert duration_ms < 500, f"diff_snapshots took {duration_ms:.1f}ms (limit: 500ms)" |
| 196 | |
| 197 | |
| 198 | # --------------------------------------------------------------------------- |
| 199 | # TestDetectConflictsSemantics — correctness of the new convergence semantics |
| 200 | # --------------------------------------------------------------------------- |
| 201 | |
| 202 | |
| 203 | class TestDetectConflictsSemantics: |
| 204 | """Verifies the fixed convergence semantics of detect_conflicts.""" |
| 205 | |
| 206 | def test_disjoint_changes_no_conflict(self) -> None: |
| 207 | ours_m = {"a.py": "h_a"} |
| 208 | theirs_m = {"b.py": "h_b"} |
| 209 | assert detect_conflicts({"a.py"}, {"b.py"}, ours_m, theirs_m) == set() |
| 210 | |
| 211 | def test_divergent_change_is_conflict(self) -> None: |
| 212 | ours_m = {"x.py": "hash_ours"} |
| 213 | theirs_m = {"x.py": "hash_theirs"} |
| 214 | assert detect_conflicts({"x.py"}, {"x.py"}, ours_m, theirs_m) == {"x.py"} |
| 215 | |
| 216 | def test_both_delete_is_convergent(self) -> None: |
| 217 | """Both branches deleted the same file — convergent, NOT a conflict.""" |
| 218 | assert detect_conflicts({"gone.py"}, {"gone.py"}, {}, {}) == set() |
| 219 | |
| 220 | def test_both_delete_absent_from_merged(self) -> None: |
| 221 | """Both-delete: convergent → apply_merge correctly omits the file.""" |
| 222 | base = {"gone.py": "h_old", "keep.py": "h_k"} |
| 223 | ours = {"keep.py": "h_k"} |
| 224 | theirs = {"keep.py": "h_k"} |
| 225 | oc = diff_snapshots(base, ours) |
| 226 | tc = diff_snapshots(base, theirs) |
| 227 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 228 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 229 | assert "gone.py" not in merged, "Both-delete must remove file from merged" |
| 230 | assert "keep.py" in merged |
| 231 | |
| 232 | def test_same_add_same_hash_convergent(self) -> None: |
| 233 | """Both branches independently added the same file with identical content.""" |
| 234 | ours_m = {"new.py": "hash42"} |
| 235 | theirs_m = {"new.py": "hash42"} |
| 236 | assert detect_conflicts({"new.py"}, {"new.py"}, ours_m, theirs_m) == set() |
| 237 | |
| 238 | def test_same_add_present_in_merged(self) -> None: |
| 239 | """Same-add convergent → apply_merge includes the file at the agreed hash.""" |
| 240 | base: Manifest = {} |
| 241 | h = _s256(b"shared-content") |
| 242 | ours = {"new.py": h} |
| 243 | theirs = {"new.py": h} |
| 244 | oc = diff_snapshots(base, ours) |
| 245 | tc = diff_snapshots(base, theirs) |
| 246 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 247 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 248 | assert "new.py" not in conflicts |
| 249 | assert merged.get("new.py") == h |
| 250 | |
| 251 | def test_delete_vs_modify_is_conflict(self) -> None: |
| 252 | """One side deleted, other modified — genuinely divergent.""" |
| 253 | ours_m: Manifest = {} |
| 254 | theirs_m = {"a.py": "hash_new"} |
| 255 | assert detect_conflicts({"a.py"}, {"a.py"}, ours_m, theirs_m) == {"a.py"} |
| 256 | |
| 257 | def test_same_add_different_hash_is_conflict(self) -> None: |
| 258 | """Both added same path with DIFFERENT content — real conflict.""" |
| 259 | ours_m = {"new.py": "h_v1"} |
| 260 | theirs_m = {"new.py": "h_v2"} |
| 261 | assert detect_conflicts({"new.py"}, {"new.py"}, ours_m, theirs_m) == {"new.py"} |
| 262 | |
| 263 | def test_commutativity(self) -> None: |
| 264 | """detect_conflicts(a, b, ma, mb) == detect_conflicts(b, a, mb, ma).""" |
| 265 | ours_m = {f"f{i}.py": f"h_ours_{i}" for i in range(50)} |
| 266 | theirs_m = {f"f{i}.py": f"h_theirs_{i}" for i in range(30, 80)} |
| 267 | oc = set(ours_m) |
| 268 | tc = set(theirs_m) |
| 269 | assert detect_conflicts(oc, tc, ours_m, theirs_m) == detect_conflicts( |
| 270 | tc, oc, theirs_m, ours_m |
| 271 | ) |
| 272 | |
| 273 | def test_detect_conflicts_1k_under_5s(self) -> None: |
| 274 | """1 000 conflicting paths detected in well under 5 s (actually < 1 ms).""" |
| 275 | paths = {f"conflict-{i:04d}.py" for i in range(1_000)} |
| 276 | ours_m = {p: f"ours-{p}" for p in paths} |
| 277 | theirs_m = {p: f"theirs-{p}" for p in paths} |
| 278 | t0 = time.perf_counter() |
| 279 | result = detect_conflicts(paths, paths, ours_m, theirs_m) |
| 280 | duration_ms = (time.perf_counter() - t0) * 1000 |
| 281 | assert result == paths |
| 282 | assert duration_ms < 5_000, f"detect_conflicts 1k took {duration_ms:.1f}ms (limit: 5000ms)" |
| 283 | |
| 284 | def test_detect_conflicts_75k_under_500ms(self) -> None: |
| 285 | """detect_conflicts on 75k-path intersection completes in < 500 ms.""" |
| 286 | n = 75_000 |
| 287 | paths = {f"f{i:06d}.py" for i in range(n)} |
| 288 | ours_m = {p: f"ours-{p}" for p in paths} |
| 289 | theirs_m = {p: f"theirs-{p}" for p in paths} |
| 290 | t0 = time.perf_counter() |
| 291 | result = detect_conflicts(paths, paths, ours_m, theirs_m) |
| 292 | duration_ms = (time.perf_counter() - t0) * 1000 |
| 293 | assert len(result) == n |
| 294 | assert duration_ms < 500, f"detect_conflicts 75k took {duration_ms:.1f}ms (limit: 500ms)" |
| 295 | |
| 296 | |
| 297 | # --------------------------------------------------------------------------- |
| 298 | # TestApplyMergeAtScale — correctness and timing of apply_merge |
| 299 | # --------------------------------------------------------------------------- |
| 300 | |
| 301 | |
| 302 | class TestApplyMergeAtScale: |
| 303 | """apply_merge correctness and performance at 75k files.""" |
| 304 | |
| 305 | def test_apply_no_changes(self) -> None: |
| 306 | base = {"a.py": "h1", "b.py": "h2"} |
| 307 | merged = apply_merge(base, base, base, set(), set(), set()) |
| 308 | assert merged == base |
| 309 | |
| 310 | def test_apply_ours_only_addition(self) -> None: |
| 311 | base: Manifest = {} |
| 312 | ours = {"new.py": "h_new"} |
| 313 | merged = apply_merge(base, ours, {}, {"new.py"}, set(), set()) |
| 314 | assert merged["new.py"] == "h_new" |
| 315 | |
| 316 | def test_apply_theirs_only_deletion(self) -> None: |
| 317 | base = {"del.py": "h_old", "keep.py": "h_k"} |
| 318 | ours = dict(base) |
| 319 | theirs = {"keep.py": "h_k"} |
| 320 | merged = apply_merge(base, ours, theirs, set(), {"del.py"}, set()) |
| 321 | assert "del.py" not in merged |
| 322 | assert "keep.py" in merged |
| 323 | |
| 324 | def test_apply_conflict_stays_at_base(self) -> None: |
| 325 | base = {"c.py": "h_base"} |
| 326 | ours = {"c.py": "h_ours"} |
| 327 | theirs = {"c.py": "h_theirs"} |
| 328 | merged = apply_merge(base, ours, theirs, {"c.py"}, {"c.py"}, {"c.py"}) |
| 329 | assert merged["c.py"] == "h_base" |
| 330 | |
| 331 | def test_apply_all_conflict_returns_base(self) -> None: |
| 332 | """When every path is a conflict, apply_merge returns a copy of base.""" |
| 333 | base = {f"f{i}.py": f"h{i}" for i in range(100)} |
| 334 | ours = {p: f"ours-{v}" for p, v in base.items()} |
| 335 | theirs = {p: f"theirs-{v}" for p, v in base.items()} |
| 336 | oc = set(base) |
| 337 | tc = set(base) |
| 338 | merged = apply_merge(base, ours, theirs, oc, tc, oc) |
| 339 | assert merged == base |
| 340 | |
| 341 | def test_apply_75k_5k_5k_under_500ms(self) -> None: |
| 342 | """apply_merge on 75k files with 5k ours + 5k theirs changes: < 500 ms.""" |
| 343 | base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 0) |
| 344 | oc = diff_snapshots(base, ours) |
| 345 | tc = diff_snapshots(base, theirs) |
| 346 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 347 | t0 = time.perf_counter() |
| 348 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 349 | duration_ms = (time.perf_counter() - t0) * 1000 |
| 350 | assert len(conflicts) == 0 |
| 351 | assert len(merged) == 75_000 |
| 352 | assert duration_ms < 500, f"apply_merge 75k took {duration_ms:.1f}ms (limit: 500ms)" |
| 353 | |
| 354 | def test_apply_75k_with_1k_conflicts_under_500ms(self) -> None: |
| 355 | """apply_merge with 1k conflict paths at 75k scale: < 500 ms.""" |
| 356 | base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000) |
| 357 | oc = diff_snapshots(base, ours) |
| 358 | tc = diff_snapshots(base, theirs) |
| 359 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 360 | t0 = time.perf_counter() |
| 361 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 362 | duration_ms = (time.perf_counter() - t0) * 1000 |
| 363 | assert len(conflicts) == 1_000 |
| 364 | assert duration_ms < 500, f"apply_merge 75k+conflicts took {duration_ms:.1f}ms (limit: 500ms)" |
| 365 | |
| 366 | |
| 367 | # --------------------------------------------------------------------------- |
| 368 | # TestFullMergePipeline — diff → detect → apply at scale |
| 369 | # --------------------------------------------------------------------------- |
| 370 | |
| 371 | |
| 372 | class TestFullMergePipeline: |
| 373 | """End-to-end pure-function pipeline timing and correctness.""" |
| 374 | |
| 375 | def test_pipeline_correctness_small(self) -> None: |
| 376 | base = {"a.py": "base_a", "b.py": "base_b", "c.py": "base_c"} |
| 377 | # ours: modifies a.py, leaves b.py and c.py unchanged |
| 378 | ours = {"a.py": "ours_a", "b.py": "base_b", "c.py": "base_c"} |
| 379 | # theirs: leaves a.py unchanged, modifies b.py, leaves c.py, adds d.py |
| 380 | theirs = {"a.py": "base_a", "b.py": "theirs_b", "c.py": "base_c", "d.py": "new_d"} |
| 381 | oc = diff_snapshots(base, ours) |
| 382 | tc = diff_snapshots(base, theirs) |
| 383 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 384 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 385 | assert merged["a.py"] == "ours_a" # ours-only modification |
| 386 | assert merged["b.py"] == "theirs_b" # theirs-only modification |
| 387 | assert merged["d.py"] == "new_d" # theirs-only addition |
| 388 | assert merged["c.py"] == "base_c" # untouched by both |
| 389 | |
| 390 | def test_pipeline_both_delete_resolved(self) -> None: |
| 391 | """Pipeline: both-delete produces no conflict and absent file in merged.""" |
| 392 | base = {"rm.py": "old", "keep.py": "k"} |
| 393 | ours = {"keep.py": "k"} |
| 394 | theirs = {"keep.py": "k"} |
| 395 | oc = diff_snapshots(base, ours) |
| 396 | tc = diff_snapshots(base, theirs) |
| 397 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 398 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 399 | assert "rm.py" not in conflicts |
| 400 | assert "rm.py" not in merged |
| 401 | |
| 402 | def test_pipeline_same_add_resolved(self) -> None: |
| 403 | """Pipeline: same-add same-hash produces no conflict and file in merged.""" |
| 404 | base: Manifest = {} |
| 405 | h = _s256(b"shared") |
| 406 | ours = {"new.py": h} |
| 407 | theirs = {"new.py": h} |
| 408 | oc = diff_snapshots(base, ours) |
| 409 | tc = diff_snapshots(base, theirs) |
| 410 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 411 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 412 | assert "new.py" not in conflicts |
| 413 | assert merged.get("new.py") == h |
| 414 | |
| 415 | def test_pipeline_75k_5k_5k_under_5s(self) -> None: |
| 416 | """Full pipeline (diff×2 + detect + apply) at 75k files, 5k+5k changes: < 5 s.""" |
| 417 | base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 0) |
| 418 | t0 = time.perf_counter() |
| 419 | oc = diff_snapshots(base, ours) |
| 420 | tc = diff_snapshots(base, theirs) |
| 421 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 422 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 423 | elapsed = time.perf_counter() - t0 |
| 424 | assert len(conflicts) == 0 |
| 425 | assert len(merged) == 75_000 |
| 426 | assert elapsed < 5, f"Full pipeline 75k took {elapsed:.2f}s (limit: 5s)" |
| 427 | |
| 428 | def test_pipeline_75k_with_1k_conflicts_under_5s(self) -> None: |
| 429 | """Full pipeline with 1k conflict paths at 75k scale: < 5 s.""" |
| 430 | base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000) |
| 431 | t0 = time.perf_counter() |
| 432 | oc = diff_snapshots(base, ours) |
| 433 | tc = diff_snapshots(base, theirs) |
| 434 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 435 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 436 | elapsed = time.perf_counter() - t0 |
| 437 | assert len(conflicts) == 1_000 |
| 438 | assert elapsed < 5, f"Full pipeline 75k+conflicts took {elapsed:.2f}s (limit: 5s)" |
| 439 | |
| 440 | def test_pipeline_memory_3_manifests_under_64mb(self) -> None: |
| 441 | """Three 75k-file manifests (base + ours + theirs) peak < 64 MB.""" |
| 442 | tracemalloc.start() |
| 443 | base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000) |
| 444 | _, peak = tracemalloc.get_traced_memory() |
| 445 | tracemalloc.stop() |
| 446 | peak_mb = peak / 1024 / 1024 |
| 447 | assert peak_mb < 64, f"3 manifests at 75k peak {peak_mb:.1f}MB (limit: 64MB)" |
| 448 | |
| 449 | |
| 450 | # --------------------------------------------------------------------------- |
| 451 | # TestSnapshotIOAtScale — read_snapshot / write_snapshot at 75k files |
| 452 | # --------------------------------------------------------------------------- |
| 453 | |
| 454 | |
| 455 | class TestSnapshotIOAtScale: |
| 456 | """Snapshot serialisation/deserialisation timing at 75k-file scale.""" |
| 457 | |
| 458 | def test_write_snapshot_75k_under_500ms(self, tmp_path: pathlib.Path) -> None: |
| 459 | root = _fresh_repo(tmp_path) |
| 460 | manifest = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(75_000)} |
| 461 | snap_id = compute_snapshot_id(manifest) |
| 462 | snap = SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW) |
| 463 | t0 = time.perf_counter() |
| 464 | write_snapshot(root, snap) |
| 465 | duration_ms = (time.perf_counter() - t0) * 1000 |
| 466 | assert duration_ms < 500, f"write_snapshot 75k took {duration_ms:.1f}ms (limit: 500ms)" |
| 467 | |
| 468 | def test_read_snapshot_75k_under_500ms(self, tmp_path: pathlib.Path) -> None: |
| 469 | from muse.core.snapshots import read_snapshot |
| 470 | |
| 471 | root = _fresh_repo(tmp_path) |
| 472 | manifest = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(75_000)} |
| 473 | snap_id = compute_snapshot_id(manifest) |
| 474 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW)) |
| 475 | t0 = time.perf_counter() |
| 476 | loaded = read_snapshot(root, snap_id) |
| 477 | duration_ms = (time.perf_counter() - t0) * 1000 |
| 478 | assert loaded is not None |
| 479 | assert len(loaded.manifest) == 75_000 |
| 480 | assert duration_ms < 500, f"read_snapshot 75k took {duration_ms:.1f}ms (limit: 500ms)" |
| 481 | |
| 482 | def test_snapshot_roundtrip_integrity(self, tmp_path: pathlib.Path) -> None: |
| 483 | """write + read produces bit-identical manifest.""" |
| 484 | from muse.core.snapshots import read_snapshot |
| 485 | |
| 486 | root = _fresh_repo(tmp_path) |
| 487 | manifest = {f"f{i:04d}.py": _s256(bytes([i % 256])) for i in range(10_000)} |
| 488 | snap_id = compute_snapshot_id(manifest) |
| 489 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW)) |
| 490 | loaded = read_snapshot(root, snap_id) |
| 491 | assert loaded is not None |
| 492 | assert loaded.manifest == manifest |
| 493 | |
| 494 | |
| 495 | # --------------------------------------------------------------------------- |
| 496 | # TestFindMergeBaseCorrectness — DAG edge cases |
| 497 | # --------------------------------------------------------------------------- |
| 498 | |
| 499 | |
| 500 | class TestFindMergeBaseCorrectness: |
| 501 | """find_merge_base correctness across edge cases.""" |
| 502 | |
| 503 | def _repo_with_base( |
| 504 | self, tmp_path: pathlib.Path, max_ancestors: int = 200_000 |
| 505 | ) -> tuple[pathlib.Path, str, str]: |
| 506 | """Return (root, base_commit_id, snap_id).""" |
| 507 | root = _fresh_repo(tmp_path, max_ancestors=max_ancestors) |
| 508 | snap_id = compute_snapshot_id({}) |
| 509 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW)) |
| 510 | base_id = _make_commit(root, None, snap_id, "base", 0) |
| 511 | return root, base_id, snap_id |
| 512 | |
| 513 | def test_lca_simple_diverging_chains(self, tmp_path: pathlib.Path) -> None: |
| 514 | root, base_id, snap_id = self._repo_with_base(tmp_path) |
| 515 | tip_a = _write_chain(root, 10, "a", base_id) |
| 516 | tip_b = _write_chain(root, 10, "b", base_id) |
| 517 | result = find_merge_base(root, tip_a, tip_b) |
| 518 | assert result == base_id |
| 519 | |
| 520 | def test_lca_identical_tips(self, tmp_path: pathlib.Path) -> None: |
| 521 | """find_merge_base(tip, tip) == tip.""" |
| 522 | root, base_id, snap_id = self._repo_with_base(tmp_path) |
| 523 | tip = _write_chain(root, 5, "a", base_id) |
| 524 | assert find_merge_base(root, tip, tip) == tip |
| 525 | |
| 526 | def test_lca_a_is_ancestor_of_b(self, tmp_path: pathlib.Path) -> None: |
| 527 | """When a is a direct ancestor of b, LCA is a.""" |
| 528 | root, base_id, snap_id = self._repo_with_base(tmp_path) |
| 529 | mid = _write_chain(root, 3, "chain", base_id) |
| 530 | tip = _write_chain(root, 3, "ext", mid) |
| 531 | result = find_merge_base(root, mid, tip) |
| 532 | assert result == mid |
| 533 | |
| 534 | def test_lca_disjoint_histories_returns_none(self, tmp_path: pathlib.Path) -> None: |
| 535 | root = _fresh_repo(tmp_path) |
| 536 | snap_id = compute_snapshot_id({}) |
| 537 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW)) |
| 538 | a = _make_commit(root, None, snap_id, "a", 0) |
| 539 | b = _make_commit(root, None, snap_id, "b", 1) |
| 540 | assert find_merge_base(root, a, b) is None |
| 541 | |
| 542 | def test_lca_criss_cross_dag_valid_lca(self, tmp_path: pathlib.Path) -> None: |
| 543 | """Criss-cross DAG (merge_1 has parents a+b, merge_2 has parents b+a). |
| 544 | Both a and b are valid LCAs; BFS must return one of them. |
| 545 | """ |
| 546 | root, base_id, snap_id = self._repo_with_base(tmp_path) |
| 547 | a = _make_commit(root, base_id, snap_id, "a", 1) |
| 548 | b = _make_commit(root, base_id, snap_id, "b", 2) |
| 549 | |
| 550 | # m1: a merges b |
| 551 | ts = _NOW + datetime.timedelta(seconds=3) |
| 552 | m1_id = compute_commit_id( parent_ids=[a, b], |
| 553 | snapshot_id=snap_id, |
| 554 | message="merge1", |
| 555 | committed_at_iso=ts.isoformat(), |
| 556 | author="b",) |
| 557 | write_commit( |
| 558 | root, |
| 559 | CommitRecord( |
| 560 | commit_id=m1_id, branch="a", |
| 561 | message="merge1", author="b", committed_at=ts, |
| 562 | parent_commit_id=a, parent2_commit_id=b, |
| 563 | snapshot_id=snap_id, metadata={}, sem_ver_bump="PATCH", |
| 564 | ), |
| 565 | ) |
| 566 | |
| 567 | # m2: b merges a |
| 568 | ts2 = _NOW + datetime.timedelta(seconds=4) |
| 569 | m2_id = compute_commit_id( parent_ids=[b, a], |
| 570 | snapshot_id=snap_id, |
| 571 | message="merge2", |
| 572 | committed_at_iso=ts2.isoformat(), |
| 573 | author="b",) |
| 574 | write_commit( |
| 575 | root, |
| 576 | CommitRecord( |
| 577 | commit_id=m2_id, branch="b", |
| 578 | message="merge2", author="b", committed_at=ts2, |
| 579 | parent_commit_id=b, parent2_commit_id=a, |
| 580 | snapshot_id=snap_id, metadata={}, sem_ver_bump="PATCH", |
| 581 | ), |
| 582 | ) |
| 583 | |
| 584 | result = find_merge_base(root, m1_id, m2_id) |
| 585 | assert result in {a, b}, f"Expected a or b as LCA, got {result}" |
| 586 | |
| 587 | def test_lca_cap_raises_clean_error(self, tmp_path: pathlib.Path) -> None: |
| 588 | """Ancestor graph > max_ancestors raises MuseCLIError, not a silent None.""" |
| 589 | from muse.core.errors import MuseCLIError |
| 590 | |
| 591 | root = _fresh_repo(tmp_path, max_ancestors=20) |
| 592 | snap_id = compute_snapshot_id({}) |
| 593 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW)) |
| 594 | # 25-long chains share no ancestor — A side will hit cap |
| 595 | prev_a: str | None = None |
| 596 | for i in range(25): |
| 597 | prev_a = _make_commit(root, prev_a, snap_id, f"a-{i}", i) |
| 598 | prev_b: str | None = None |
| 599 | for i in range(25): |
| 600 | prev_b = _make_commit(root, prev_b, snap_id, f"b-{i}", i + 25) |
| 601 | assert prev_a is not None |
| 602 | assert prev_b is not None |
| 603 | with pytest.raises(MuseCLIError, match="Ancestor graph exceeds"): |
| 604 | find_merge_base(root, prev_a, prev_b) |
| 605 | |
| 606 | def test_lca_missing_commit_handled(self, tmp_path: pathlib.Path) -> None: |
| 607 | """BFS continues gracefully when a commit file is missing (None from read_commit).""" |
| 608 | root, base_id, snap_id = self._repo_with_base(tmp_path) |
| 609 | # Chain a: base → real commit |
| 610 | tip_a = _write_chain(root, 5, "a", base_id) |
| 611 | # Chain b: points to a non-existent commit ID (ghost) |
| 612 | ghost_id = _s256(b"ghost-commit-does-not-exist") |
| 613 | # Stub the ghost so write_commit's parent-existence guard passes; |
| 614 | # find_merge_base will encounter the unreadable stub and stop. |
| 615 | import json as _json |
| 616 | ghost_payload = _json.dumps({"commit_id": ghost_id}, separators=(",", ":")).encode() |
| 617 | _p = object_path(root, ghost_id) |
| 618 | _p.parent.mkdir(parents=True, exist_ok=True) |
| 619 | _p.write_bytes(f"commit {len(ghost_payload)}\0".encode() + ghost_payload) |
| 620 | # wrap the ghost in a real commit so find_merge_base walks into it |
| 621 | ts = _NOW + datetime.timedelta(seconds=100) |
| 622 | wrap_id = compute_commit_id( parent_ids=[ghost_id], |
| 623 | snapshot_id=snap_id, |
| 624 | message="wrap", |
| 625 | committed_at_iso=ts.isoformat(), |
| 626 | author="b",) |
| 627 | write_commit( |
| 628 | root, |
| 629 | CommitRecord( |
| 630 | commit_id=wrap_id, branch="b", |
| 631 | message="wrap", author="b", committed_at=ts, |
| 632 | parent_commit_id=ghost_id, parent2_commit_id=None, |
| 633 | snapshot_id=snap_id, metadata={}, sem_ver_bump="PATCH", |
| 634 | ), |
| 635 | ) |
| 636 | # Should not raise — returns None because the ghost branch has no real ancestors |
| 637 | result = find_merge_base(root, tip_a, wrap_id) |
| 638 | assert result is None # ghost side has no ancestors in common with a |
| 639 | |
| 640 | |
| 641 | # --------------------------------------------------------------------------- |
| 642 | # TestFindMergeBasePerformance — timing at real-world commit depths |
| 643 | # --------------------------------------------------------------------------- |
| 644 | |
| 645 | |
| 646 | class TestFindMergeBasePerformance: |
| 647 | """find_merge_base timing. Each test builds fresh chains in a temp dir.""" |
| 648 | |
| 649 | @pytest.mark.parametrize("depth", [100, 500]) |
| 650 | def test_find_merge_base_depth_under_2s( |
| 651 | self, depth: int, tmp_path: pathlib.Path |
| 652 | ) -> None: |
| 653 | """find_merge_base on two diverging {depth}-commit chains: < 2 s.""" |
| 654 | root = _fresh_repo(tmp_path) |
| 655 | snap_id = compute_snapshot_id({}) |
| 656 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW)) |
| 657 | base_id = _make_commit(root, None, snap_id, "base", 0) |
| 658 | tip_a = _write_chain(root, depth, "a", base_id) |
| 659 | tip_b = _write_chain(root, depth, "b", base_id) |
| 660 | t0 = time.perf_counter() |
| 661 | result = find_merge_base(root, tip_a, tip_b) |
| 662 | elapsed = time.perf_counter() - t0 |
| 663 | assert result == base_id, f"Wrong LCA at depth={depth}" |
| 664 | assert elapsed < 2, f"find_merge_base depth={depth} took {elapsed:.2f}s (limit: 2s)" |
| 665 | |
| 666 | @pytest.mark.slow |
| 667 | def test_find_merge_base_1k_depth_under_5s(self, tmp_path: pathlib.Path) -> None: |
| 668 | """find_merge_base on 1k-commit diverging chains: < 5 s.""" |
| 669 | root = _fresh_repo(tmp_path) |
| 670 | snap_id = compute_snapshot_id({}) |
| 671 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW)) |
| 672 | base_id = _make_commit(root, None, snap_id, "base", 0) |
| 673 | tip_a = _write_chain(root, 1_000, "a", base_id) |
| 674 | tip_b = _write_chain(root, 1_000, "b", base_id) |
| 675 | t0 = time.perf_counter() |
| 676 | result = find_merge_base(root, tip_a, tip_b) |
| 677 | elapsed = time.perf_counter() - t0 |
| 678 | assert result == base_id |
| 679 | assert elapsed < 5, f"find_merge_base 1k took {elapsed:.2f}s (limit: 5s)" |
| 680 | |
| 681 | @pytest.mark.slow |
| 682 | def test_find_merge_base_5k_depth_under_30s(self, tmp_path: pathlib.Path) -> None: |
| 683 | """find_merge_base on 5k-commit diverging chains: < 30 s (the merge target).""" |
| 684 | root = _fresh_repo(tmp_path) |
| 685 | snap_id = compute_snapshot_id({}) |
| 686 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=_NOW)) |
| 687 | base_id = _make_commit(root, None, snap_id, "base", 0) |
| 688 | tip_a = _write_chain(root, 5_000, "a", base_id) |
| 689 | tip_b = _write_chain(root, 5_000, "b", base_id) |
| 690 | t0 = time.perf_counter() |
| 691 | result = find_merge_base(root, tip_a, tip_b) |
| 692 | elapsed = time.perf_counter() - t0 |
| 693 | assert result == base_id |
| 694 | assert elapsed < 30, f"find_merge_base 5k took {elapsed:.2f}s (limit: 30s)" |
| 695 | |
| 696 | |
| 697 | # --------------------------------------------------------------------------- |
| 698 | # TestFullMergeScaleSlow — the 30-second overall target (@slow) |
| 699 | # --------------------------------------------------------------------------- |
| 700 | |
| 701 | |
| 702 | @pytest.mark.slow |
| 703 | class TestFullMergeScaleSlow: |
| 704 | """End-to-end scale targets that require @slow to keep CI fast.""" |
| 705 | |
| 706 | def test_full_pipeline_75k_5k_5k_1k_conflicts_under_30s(self) -> None: |
| 707 | """Full pipeline: 75k files, 5k ours, 5k theirs, 1k conflicts — < 30 s.""" |
| 708 | base, ours, theirs = _build_manifests(75_000, 5_000, 5_000, 1_000) |
| 709 | t0 = time.perf_counter() |
| 710 | oc = diff_snapshots(base, ours) |
| 711 | tc = diff_snapshots(base, theirs) |
| 712 | conflicts = detect_conflicts(oc, tc, ours, theirs) |
| 713 | merged = apply_merge(base, ours, theirs, oc, tc, conflicts) |
| 714 | elapsed = time.perf_counter() - t0 |
| 715 | assert len(conflicts) == 1_000 |
| 716 | assert len(merged) == 75_000 |
| 717 | assert elapsed < 30, f"Full pipeline 75k+1k conflicts took {elapsed:.2f}s (limit: 30s)" |
| 718 | |
| 719 | def test_snapshot_io_75k_three_reads_plus_write_under_30s( |
| 720 | self, tmp_path: pathlib.Path |
| 721 | ) -> None: |
| 722 | """3 reads + 1 write of a 75k-file snapshot (realistic merge I/O) < 30 s.""" |
| 723 | from muse.core.snapshots import read_snapshot |
| 724 | |
| 725 | root = _fresh_repo(tmp_path) |
| 726 | manifest = {f"f{i:06d}.py": _s256(bytes([i % 256] * 64)) for i in range(75_000)} |
| 727 | snap_id = compute_snapshot_id(manifest) |
| 728 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_NOW)) |
| 729 | |
| 730 | t0 = time.perf_counter() |
| 731 | for _ in range(3): |
| 732 | snap = read_snapshot(root, snap_id) |
| 733 | assert snap is not None and len(snap.manifest) == 75_000 |
| 734 | # write the merged result |
| 735 | merged_snap_id = compute_snapshot_id(manifest) |
| 736 | write_snapshot(root, SnapshotRecord(snapshot_id=merged_snap_id, manifest=manifest, created_at=_NOW)) |
| 737 | elapsed = time.perf_counter() - t0 |
| 738 | assert elapsed < 30, f"3 reads + 1 write 75k took {elapsed:.2f}s (limit: 30s)" |
| 739 | |
| 740 | def test_write_merge_state_1k_conflicts_roundtrip( |
| 741 | self, tmp_path: pathlib.Path |
| 742 | ) -> None: |
| 743 | """write_merge_state + read_merge_state with 1 000 conflict paths round-trips cleanly.""" |
| 744 | from muse.core.merge_engine import read_merge_state, write_merge_state |
| 745 | |
| 746 | root = _fresh_repo(tmp_path) |
| 747 | conflict_paths = [f"conflict/f{i:04d}.py" for i in range(1_000)] |
| 748 | |
| 749 | write_merge_state( |
| 750 | root, |
| 751 | base_commit="a" * 64, |
| 752 | ours_commit="b" * 64, |
| 753 | theirs_commit="c" * 64, |
| 754 | conflict_paths=conflict_paths, |
| 755 | other_branch="feat/big-change", |
| 756 | ) |
| 757 | |
| 758 | state = read_merge_state(root) |
| 759 | assert state is not None |
| 760 | assert len(state.conflict_paths) == 1_000 |
| 761 | assert state.other_branch == "feat/big-change" |
| 762 | # Paths must round-trip correctly |
| 763 | assert sorted(state.conflict_paths) == sorted(conflict_paths) |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago