gabriel / muse public
test_stress_merge_regression.py python
1,497 lines 67.2 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Regression stress tests for the three-way merge engine — all permutations.
2
3 Root cause (fixed in commit 73427a30):
4 CodePlugin.merge_ops silently dropped theirs-only changes when OT symbol
5 commutation masked a file-level conflict. The merged blob was the ours blob
6 verbatim, so the result reported "clean merge" with no-op file content.
7
8 Real-world impact:
9 MuseHub's executor.py ``--pid=private`` fix (removed in fix/pool-pre-ping)
10 was silently discarded when the user merged local/dev (which had a commuting
11 pool_pre_ping change to database.py). Every subsequent CI run failed with
12 "docker: --pid: invalid PID mode" until the regression was manually tracked
13 down through the object store.
14
15 This file tests every permutation of merge topology that could lead to silent
16 data loss — not just the one that burned us.
17
18 Categories
19 ----------
20 A Fast-forward / up-to-date detection (no data-loss risk, but correctness)
21 B Three-way clean merges — no conflicts anywhere
22 C Three-way with conflicts surfaced — the merge MUST stop, not silently pass
23 D The silent-drop regression — commuting OT ops on same file
24 E Theirs-only files MUST survive when there are conflicts elsewhere
25 F Strategy shortcuts (--strategy=ours / --strategy=theirs) correctness
26 G MuseHub regression scenario (pool_pre_ping + executor + AGENTS.md)
27 H Merge-base correctness for complex DAG topologies
28 I False-conflict regression — theirs-only additions falsely reported as
29 conflicts and deleted from disk when ours-snapshot == base-snapshot.
30 Root cause: muse/core/patch_record.py was deleted from disk during a
31 dev→main merge where main's previous merge had left its snapshot
32 identical to the merge base. The engine must NEVER report a theirs-only
33 addition as a conflict, and must NEVER delete it from the working tree.
34 """
35 from __future__ import annotations
36
37 import datetime
38 import json
39 import pathlib
40 import textwrap
41
42 import pytest
43 from tests.cli_test_helper import CliRunner
44 from muse.core.types import Manifest, blob_id, fake_id
45 from muse.core.paths import merge_state_path, muse_dir, ref_path
46
47 runner = CliRunner()
48 cli = None # CliRunner ignores this positional arg
49
50
51 # ---------------------------------------------------------------------------
52 # Low-level repo helpers
53 # ---------------------------------------------------------------------------
54
55
56 def _h(label: str) -> str:
57 """Stable fake content hash for a text label (sha256: prefixed)."""
58 return fake_id(label)
59
60
61 def _env(root: pathlib.Path) -> Manifest:
62 return {"MUSE_REPO_ROOT": str(root)}
63
64
65 def _run(root: pathlib.Path, *args: str) -> tuple[int, str]:
66 """Run a muse command, injecting --force into merge calls.
67
68 Tests use an in-memory manifest-only setup (no files on disk) so the
69 working-tree cleanliness guard would always fire. ``--force`` bypasses
70 that guard without affecting any merge-logic correctness being tested.
71 """
72 final_args = list(args)
73 if final_args and final_args[0] == "merge" and "--force" not in final_args:
74 final_args.insert(1, "--force")
75 result = runner.invoke(cli, final_args, env=_env(root), catch_exceptions=False)
76 return result.exit_code, result.output
77
78
79 def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]:
80 """Like _run but does not raise on failure."""
81 final_args = list(args)
82 if final_args and final_args[0] == "merge" and "--force" not in final_args:
83 final_args.insert(1, "--force")
84 result = runner.invoke(cli, final_args, env=_env(root))
85 return result.exit_code, result.output
86
87
88 def _write_object(root: pathlib.Path, content: bytes) -> str:
89 """Write content to object store and return sha256:-prefixed object ID."""
90 from muse.core.object_store import write_object as _store_write
91 oid = blob_id(content)
92 _store_write(root, oid, content)
93 return oid
94
95
96 def _init_code_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
97 """Initialise a bare code-domain repo and return (root, repo_id)."""
98 from muse.core.commits import (
99 CommitRecord,
100 write_commit,
101 )
102 from muse.core.snapshots import (
103 SnapshotRecord,
104 write_snapshot,
105 )
106 from muse.core.ids import hash_snapshot as compute_snapshot_id, hash_commit as compute_commit_id
107
108 dot_muse = muse_dir(tmp_path)
109 dot_muse.mkdir()
110 repo_id = fake_id("repo")
111 (dot_muse / "repo.json").write_text(json.dumps({
112 "repo_id": repo_id,
113 "domain": "code",
114 "default_branch": "main",
115 "created_at": "2025-01-01T00:00:00+00:00",
116 }), encoding="utf-8")
117 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
118 (dot_muse / "refs" / "heads").mkdir(parents=True)
119 (dot_muse / "snapshots").mkdir()
120 (dot_muse / "commits").mkdir()
121 (dot_muse / "objects").mkdir()
122 return tmp_path, repo_id
123
124
125 def _make_commit(
126 root: pathlib.Path,
127 repo_id: str,
128 branch: str = "main",
129 message: str = "test",
130 manifest: Manifest | None = None,
131 parent_commit_id: str | None = None,
132 parent2_commit_id: str | None = None,
133 ) -> str:
134 """Write a snapshot + commit and advance the branch ref."""
135 from muse.core.commits import (
136 CommitRecord,
137 write_commit,
138 )
139 from muse.core.snapshots import (
140 SnapshotRecord,
141 write_snapshot,
142 )
143 from muse.core.ids import hash_snapshot as compute_snapshot_id, hash_commit as compute_commit_id
144
145 ref_file = ref_path(root, branch)
146 if parent_commit_id is None and ref_file.exists():
147 parent_commit_id = ref_file.read_text().strip() or None
148
149 m = manifest or {}
150 snap_id = compute_snapshot_id(m)
151 committed_at = datetime.datetime.now(datetime.timezone.utc)
152 parent_ids: list[str] = []
153 if parent_commit_id:
154 parent_ids.append(parent_commit_id)
155 if parent2_commit_id:
156 parent_ids.append(parent2_commit_id)
157 commit_id = compute_commit_id(
158 parent_ids=parent_ids,
159 snapshot_id=snap_id,
160 message=message,
161 committed_at_iso=committed_at.isoformat(),
162 )
163 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=m))
164 write_commit(root, CommitRecord(
165 commit_id=commit_id,
166 branch=branch,
167 snapshot_id=snap_id,
168 message=message,
169 committed_at=committed_at,
170 parent_commit_id=parent_commit_id,
171 parent2_commit_id=parent2_commit_id,
172 ))
173 ref_file.parent.mkdir(parents=True, exist_ok=True)
174 ref_file.write_text(commit_id, encoding="utf-8")
175 return commit_id
176
177
178 def _write_py(root: pathlib.Path, filename: str, content: str) -> str:
179 """Write Python content into the object store ONLY; return object_id.
180
181 We deliberately do NOT write the file to the working tree so that
182 ``require_clean_workdir`` never aborts the merge due to uncommitted
183 changes. The code plugin reads file bytes from the object store via
184 ``read_object(root, obj_id)``, so on-disk presence is not required.
185 """
186 return _write_object(root, content.encode())
187
188
189 def _ref(root: pathlib.Path, branch: str) -> str:
190 return (ref_path(root, branch)).read_text(encoding="utf-8").strip()
191
192
193 def _snapshot_manifest(root: pathlib.Path, branch: str) -> Manifest:
194 """Return the manifest for a branch's current HEAD snapshot."""
195 from muse.core.commits import read_commit
196 from muse.core.snapshots import read_snapshot
197 commit_id = _ref(root, branch)
198 commit = read_commit(root, commit_id)
199 assert commit is not None
200 snap = read_snapshot(root, commit.snapshot_id)
201 assert snap is not None
202 return snap.manifest
203
204
205 # ===========================================================================
206 # A — Fast-forward / up-to-date
207 # ===========================================================================
208
209
210 class TestMergeTopologyA:
211 """Ensure merge base detection is correct and no data is corrupted."""
212
213 def test_A1_fast_forward_updates_head_and_files(self, tmp_path: pathlib.Path) -> None:
214 """A1: ours is ancestor of theirs → fast-forward, working tree = theirs."""
215 root, repo_id = _init_code_repo(tmp_path)
216 a_id = _write_py(root, "app.py", "x = 1\n")
217 _make_commit(root, repo_id, branch="main", message="base",
218 manifest={"app.py": a_id})
219 base_commit = _ref(root, "main")
220
221 # Create feature branch from same base.
222 (ref_path(root, "feat")).write_text(base_commit)
223 b_id = _write_py(root, "app.py", "x = 2\n")
224 _make_commit(root, repo_id, branch="feat", message="feat commit",
225 manifest={"app.py": b_id})
226
227 code, out = _run(root, "merge", "feat")
228 assert code == 0, out
229 # main HEAD must now equal feat HEAD.
230 assert _ref(root, "main") == _ref(root, "feat")
231 # Manifest must equal feat's snapshot.
232 assert _snapshot_manifest(root, "main") == {"app.py": b_id}
233
234 def test_A2_already_up_to_date_prints_message(self, tmp_path: pathlib.Path) -> None:
235 """A2: theirs is ancestor of ours → 'Already up to date.'"""
236 root, repo_id = _init_code_repo(tmp_path)
237 a_id = _write_py(root, "f.py", "a = 1\n")
238 base_c = _make_commit(root, repo_id, branch="main", message="base",
239 manifest={"f.py": a_id})
240 (ref_path(root, "old")).write_text(base_c)
241 b_id = _write_py(root, "f.py", "a = 2\n")
242 _make_commit(root, repo_id, branch="main", message="advance",
243 manifest={"f.py": b_id})
244
245 code, out = _run(root, "merge", "old")
246 assert code == 0, out
247 assert "up to date" in out.lower()
248 # main must not have moved back.
249 assert _snapshot_manifest(root, "main") == {"f.py": b_id}
250
251 def test_A3_fast_forward_json_reports_fast_forward_status(self, tmp_path: pathlib.Path) -> None:
252 """A3: JSON output for fast-forward has status='fast_forward'."""
253 root, repo_id = _init_code_repo(tmp_path)
254 a_id = _write_py(root, "f.py", "a = 1\n")
255 base_c = _make_commit(root, repo_id, branch="main", message="base",
256 manifest={"f.py": a_id})
257 (ref_path(root, "feat")).write_text(base_c)
258 b_id = _write_py(root, "f.py", "a = 2\n")
259 _make_commit(root, repo_id, branch="feat", message="feat",
260 manifest={"f.py": b_id})
261
262 code, out = _run(root, "merge", "--json", "feat")
263 assert code == 0, out
264 data = json.loads(out)
265 assert data["status"] == "fast_forward"
266 assert data["conflicts"] == []
267
268 def test_A4_fast_forward_preserves_all_theirs_files(self, tmp_path: pathlib.Path) -> None:
269 """A4: fast-forward with 50 files — all must appear in main's manifest."""
270 root, repo_id = _init_code_repo(tmp_path)
271 a_id = _write_py(root, "base.py", "base = True\n")
272 base_c = _make_commit(root, repo_id, branch="main", message="base",
273 manifest={"base.py": a_id})
274 (ref_path(root, "feat")).write_text(base_c)
275
276 manifest: Manifest = {"base.py": a_id}
277 for i in range(50):
278 oid = _write_py(root, f"module_{i:02d}.py", f"x_{i} = {i}\n")
279 manifest[f"module_{i:02d}.py"] = oid
280 _make_commit(root, repo_id, branch="feat", message="many files",
281 manifest=manifest)
282
283 code, _ = _run(root, "merge", "feat")
284 assert code == 0
285 merged = _snapshot_manifest(root, "main")
286 for i in range(50):
287 assert f"module_{i:02d}.py" in merged, f"module_{i:02d}.py missing after fast-forward"
288
289 def test_A5_no_ff_creates_merge_commit(self, tmp_path: pathlib.Path) -> None:
290 """A5: --no-ff skips fast-forward and always creates a merge commit."""
291 root, repo_id = _init_code_repo(tmp_path)
292 a_id = _write_py(root, "f.py", "a = 1\n")
293 base_c = _make_commit(root, repo_id, branch="main", message="base",
294 manifest={"f.py": a_id})
295 (ref_path(root, "feat")).write_text(base_c)
296 b_id = _write_py(root, "f.py", "a = 2\n")
297 feat_c = _make_commit(root, repo_id, branch="feat", message="feat",
298 manifest={"f.py": b_id})
299
300 from muse.core.commits import read_commit
301 pre_main = _ref(root, "main")
302 code, out = _run(root, "merge", "--no-ff", "feat")
303 assert code == 0, out
304 post_main = _ref(root, "main")
305 # HEAD must have advanced (new merge commit created).
306 assert post_main != pre_main
307 # The new commit must have TWO parents.
308 commit = read_commit(root, post_main)
309 assert commit is not None
310 assert commit.parent2_commit_id is not None, "no-ff must create merge commit with 2 parents"
311
312
313 # ===========================================================================
314 # B — Three-way clean merges (no conflicts anywhere)
315 # ===========================================================================
316
317
318 class TestThreeWayCleanMergeB:
319 """Theirs-only and ours-only changes all survive; merged snapshot is correct."""
320
321 def test_B1_disjoint_file_changes_both_survive(self, tmp_path: pathlib.Path) -> None:
322 """B1: ours changes a.py, theirs changes b.py — both must be in merged."""
323 root, repo_id = _init_code_repo(tmp_path)
324 a0 = _write_py(root, "a.py", "a = 0\n")
325 b0 = _write_py(root, "b.py", "b = 0\n")
326 base_c = _make_commit(root, repo_id, branch="main", message="base",
327 manifest={"a.py": a0, "b.py": b0})
328 (ref_path(root, "feat")).write_text(base_c)
329
330 # ours: modify a.py
331 a1 = _write_py(root, "a.py", "a = 1\n")
332 _make_commit(root, repo_id, branch="main", message="ours: change a",
333 manifest={"a.py": a1, "b.py": b0})
334
335 # theirs: modify b.py
336 b1 = _write_py(root, "b.py", "b = 1\n")
337 _make_commit(root, repo_id, branch="feat", message="theirs: change b",
338 manifest={"a.py": a0, "b.py": b1})
339
340 code, out = _run(root, "merge", "feat")
341 assert code == 0, out
342 m = _snapshot_manifest(root, "main")
343 assert m.get("a.py") == a1, "ours change to a.py lost after clean merge"
344 assert m.get("b.py") == b1, "theirs change to b.py lost after clean merge"
345
346 def test_B2_theirs_adds_new_file(self, tmp_path: pathlib.Path) -> None:
347 """B2: theirs adds new.py that ours never touched — must be in merged."""
348 root, repo_id = _init_code_repo(tmp_path)
349 a0 = _write_py(root, "a.py", "a = 0\n")
350 base_c = _make_commit(root, repo_id, branch="main", message="base",
351 manifest={"a.py": a0})
352 (ref_path(root, "feat")).write_text(base_c)
353
354 new_id = _write_py(root, "new.py", "new = True\n")
355 _make_commit(root, repo_id, branch="feat", message="add new.py",
356 manifest={"a.py": a0, "new.py": new_id})
357
358 code, out = _run(root, "merge", "feat")
359 assert code == 0, out
360 assert "new.py" in _snapshot_manifest(root, "main"), "theirs new file lost"
361
362 def test_B3_theirs_deletes_file_ours_never_touched(self, tmp_path: pathlib.Path) -> None:
363 """B3: theirs deletes stale.py — must be absent in merged."""
364 root, repo_id = _init_code_repo(tmp_path)
365 a0 = _write_py(root, "a.py", "a = 0\n")
366 stale0 = _write_py(root, "stale.py", "dead = True\n")
367 base_c = _make_commit(root, repo_id, branch="main", message="base",
368 manifest={"a.py": a0, "stale.py": stale0})
369 (ref_path(root, "feat")).write_text(base_c)
370
371 a1 = _write_py(root, "a.py", "a = 1\n")
372 _make_commit(root, repo_id, branch="main", message="ours: tweak a",
373 manifest={"a.py": a1, "stale.py": stale0})
374 _make_commit(root, repo_id, branch="feat", message="theirs: rm stale.py",
375 manifest={"a.py": a0})
376
377 code, out = _run(root, "merge", "feat")
378 assert code == 0, out
379 m = _snapshot_manifest(root, "main")
380 assert "stale.py" not in m, "theirs deletion of stale.py was not applied"
381 assert m.get("a.py") == a1, "ours change to a.py lost"
382
383 def test_B4_many_theirs_only_additions_all_survive(self, tmp_path: pathlib.Path) -> None:
384 """B4: theirs adds 30 files, ours changes 1 file — all 30 must be in merged."""
385 root, repo_id = _init_code_repo(tmp_path)
386 base_id = _write_py(root, "main.py", "x = 0\n")
387 base_c = _make_commit(root, repo_id, branch="main", message="base",
388 manifest={"main.py": base_id})
389 (ref_path(root, "feat")).write_text(base_c)
390
391 # ours: bump main.py
392 bumped = _write_py(root, "main.py", "x = 1\n")
393 _make_commit(root, repo_id, branch="main", message="ours: bump",
394 manifest={"main.py": bumped})
395
396 # theirs: 30 new modules
397 theirs_manifest = {"main.py": base_id}
398 for i in range(30):
399 oid = _write_py(root, f"mod_{i}.py", f"MOD_{i} = True\n")
400 theirs_manifest[f"mod_{i}.py"] = oid
401 _make_commit(root, repo_id, branch="feat", message="theirs: add 30 mods",
402 manifest=theirs_manifest)
403
404 code, out = _run(root, "merge", "feat")
405 assert code == 0, out
406 m = _snapshot_manifest(root, "main")
407 for i in range(30):
408 assert f"mod_{i}.py" in m, f"mod_{i}.py missing after clean three-way merge"
409
410
411 # ===========================================================================
412 # C — Three-way with conflicts that MUST be surfaced
413 # ===========================================================================
414
415
416 class TestThreeWayConflictSurfacedC:
417 """Conflicts must be reported; the merge must NOT silently produce wrong content."""
418
419 def test_C1_genuine_conflict_exits_nonzero(self, tmp_path: pathlib.Path) -> None:
420 """C1: both sides change the same symbol in the same file → exit nonzero."""
421 root, repo_id = _init_code_repo(tmp_path)
422 a0 = _write_py(root, "service.py", textwrap.dedent("""\
423 def charge():
424 return 'v1'
425 """))
426 base_c = _make_commit(root, repo_id, branch="main", message="base",
427 manifest={"service.py": a0})
428 (ref_path(root, "feat")).write_text(base_c)
429
430 a_ours = _write_py(root, "service.py", textwrap.dedent("""\
431 def charge():
432 return 'ours-v2'
433 """))
434 _make_commit(root, repo_id, branch="main", message="ours: change charge",
435 manifest={"service.py": a_ours})
436
437 a_theirs = _write_py(root, "service.py", textwrap.dedent("""\
438 def charge():
439 return 'theirs-v2'
440 """))
441 _make_commit(root, repo_id, branch="feat", message="theirs: change charge",
442 manifest={"service.py": a_theirs})
443
444 code, out = _run_unchecked(root, "merge", "feat")
445 assert code != 0, "conflict must exit nonzero, not silently succeed"
446
447 def test_C2_conflict_creates_merge_state_json(self, tmp_path: pathlib.Path) -> None:
448 """C2: conflict writes MERGE_STATE.json with the right fields."""
449 root, repo_id = _init_code_repo(tmp_path)
450 f0 = _write_py(root, "f.py", textwrap.dedent("""\
451 def foo():
452 return 1
453 """))
454 base_c = _make_commit(root, repo_id, branch="main", message="base",
455 manifest={"f.py": f0})
456 (ref_path(root, "feat")).write_text(base_c)
457
458 f_ours = _write_py(root, "f.py", textwrap.dedent("""\
459 def foo():
460 return 2
461 """))
462 _make_commit(root, repo_id, branch="main", message="ours",
463 manifest={"f.py": f_ours})
464
465 f_theirs = _write_py(root, "f.py", textwrap.dedent("""\
466 def foo():
467 return 99
468 """))
469 _make_commit(root, repo_id, branch="feat", message="theirs",
470 manifest={"f.py": f_theirs})
471
472 _run_unchecked(root, "merge", "feat")
473 state_path = merge_state_path(root)
474 assert state_path.exists(), "MERGE_STATE.json must be written on conflict"
475 state = json.loads(state_path.read_text())
476 assert "ours_commit" in state
477 assert "theirs_commit" in state
478 assert "conflict_paths" in state
479
480 def test_C3_conflict_json_format_lists_paths(self, tmp_path: pathlib.Path) -> None:
481 """C3: --format json reports conflict with non-empty conflicts list."""
482 root, repo_id = _init_code_repo(tmp_path)
483 f0 = _write_py(root, "svc.py", textwrap.dedent("""\
484 def go():
485 pass
486 """))
487 base_c = _make_commit(root, repo_id, branch="main", message="base",
488 manifest={"svc.py": f0})
489 (ref_path(root, "feat")).write_text(base_c)
490
491 f1 = _write_py(root, "svc.py", textwrap.dedent("""\
492 def go():
493 return 'ours'
494 """))
495 _make_commit(root, repo_id, branch="main", message="ours", manifest={"svc.py": f1})
496
497 f2 = _write_py(root, "svc.py", textwrap.dedent("""\
498 def go():
499 return 'theirs'
500 """))
501 _make_commit(root, repo_id, branch="feat", message="theirs", manifest={"svc.py": f2})
502
503 result = runner.invoke(cli, ["merge", "--force", "--json", "feat"],
504 env=_env(root))
505 data = json.loads(result.output)
506 assert data["status"] == "conflict"
507 assert len(data["conflicts"]) > 0
508
509
510 # ===========================================================================
511 # D — The silent-drop regression (commuting OT ops on same file)
512 # ===========================================================================
513
514
515 class TestSilentDropRegressionD:
516 """
517 The exact bug that burned us: two branches modify DIFFERENT symbols in
518 the same file. OT sees them as commuting (non-conflicting at symbol level),
519 but cannot reconstruct the merged blob. Before the fix this silently
520 produced the ours blob and dropped all theirs changes in that file.
521 After the fix, this must either surface a conflict or correctly auto-merge.
522
523 In either case: theirs-only CHANGES to OTHER FILES must always survive.
524 """
525
526 def test_D1_commuting_symbol_changes_do_not_silently_succeed(
527 self, tmp_path: pathlib.Path
528 ) -> None:
529 """D1: ours changes func_a, theirs changes func_b — must conflict or merge, never silently lose theirs."""
530 root, repo_id = _init_code_repo(tmp_path)
531 base_code = textwrap.dedent("""\
532 def func_a():
533 return 'a-v1'
534
535 def func_b():
536 return 'b-v1'
537 """)
538 f0 = _write_py(root, "lib.py", base_code)
539 base_c = _make_commit(root, repo_id, branch="main", message="base",
540 manifest={"lib.py": f0})
541 (ref_path(root, "feat")).write_text(base_c)
542
543 ours_code = textwrap.dedent("""\
544 def func_a():
545 return 'a-v2'
546
547 def func_b():
548 return 'b-v1'
549 """)
550 f_ours = _write_py(root, "lib.py", ours_code)
551 _make_commit(root, repo_id, branch="main", message="ours: change func_a",
552 manifest={"lib.py": f_ours})
553
554 theirs_code = textwrap.dedent("""\
555 def func_a():
556 return 'a-v1'
557
558 def func_b():
559 return 'b-v2'
560 """)
561 f_theirs = _write_py(root, "lib.py", theirs_code)
562 _make_commit(root, repo_id, branch="feat", message="theirs: change func_b",
563 manifest={"lib.py": f_theirs})
564
565 result = runner.invoke(cli, ["merge", "--force", "--json", "feat"],
566 env=_env(root))
567 data = json.loads(result.output)
568
569 if data["status"] == "merged":
570 # If auto-merged: func_b MUST be 'b-v2', never silently kept as 'b-v1'.
571 m = _snapshot_manifest(root, "main")
572 from muse.core.snapshots import read_snapshot
573 snap = None
574 from muse.core.commits import read_commit
575 commit = read_commit(root, _ref(root, "main"))
576 assert commit is not None
577 from muse.core.snapshots import read_snapshot
578 snap = read_snapshot(root, commit.snapshot_id)
579 assert snap is not None
580 # We can't read the actual merged file content from the manifest
581 # without the working tree, but we CAN assert lib.py is present.
582 assert "lib.py" in snap.manifest
583 else:
584 # If conflict: that is correct — better a conflict than silent data loss.
585 assert data["status"] == "conflict"
586 assert len(data["conflicts"]) > 0
587
588 def test_D2_theirs_only_file_survives_commuting_conflict(
589 self, tmp_path: pathlib.Path
590 ) -> None:
591 """D2: regression core — theirs-only executor.py must survive even when lib.py conflicts."""
592 root, repo_id = _init_code_repo(tmp_path)
593 base_db = textwrap.dedent("""\
594 def pool():
595 pass
596 """)
597 base_exec = textwrap.dedent("""\
598 def run():
599 args = ['--pid=private']
600 return args
601 """)
602 db0 = _write_py(root, "database.py", base_db)
603 exec0 = _write_py(root, "executor.py", base_exec)
604 base_c = _make_commit(root, repo_id, branch="main", message="base",
605 manifest={"database.py": db0, "executor.py": exec0})
606 (ref_path(root, "fix-branch")).write_text(base_c)
607
608 # ours (dev): fix pool_pre_ping in database.py, don't touch executor.py
609 ours_db = textwrap.dedent("""\
610 def pool():
611 return 'pool_pre_ping=True'
612 """)
613 db_ours = _write_py(root, "database.py", ours_db)
614 _make_commit(root, repo_id, branch="main", message="ours: pool_pre_ping fix",
615 manifest={"database.py": db_ours, "executor.py": exec0})
616
617 # theirs (fix-branch): fix pool_pre_ping the same way AND fix executor.py
618 theirs_db = textwrap.dedent("""\
619 def pool():
620 return 'pool_pre_ping=True'
621 """)
622 theirs_exec = textwrap.dedent("""\
623 def run():
624 args = [] # --pid=private removed (invalid Docker flag)
625 return args
626 """)
627 db_theirs = _write_py(root, "database.py", theirs_db)
628 exec_theirs = _write_py(root, "executor.py", theirs_exec)
629 _make_commit(root, repo_id, branch="fix-branch",
630 message="theirs: pool_pre_ping + remove --pid=private",
631 manifest={"database.py": db_theirs, "executor.py": exec_theirs})
632
633 result = runner.invoke(cli, ["merge", "--force", "--json", "fix-branch"],
634 env=_env(root))
635 data = json.loads(result.output)
636
637 # The critical assertion: in any outcome, executor.py must NOT be the old version.
638 # Either the merge succeeded and executor.py has the fix, OR a conflict is raised
639 # so the user can resolve it. What is NEVER acceptable: silent success with old content.
640 if data["status"] == "merged":
641 from muse.core.commits import read_commit
642 from muse.core.snapshots import read_snapshot
643 commit = read_commit(root, _ref(root, "main"))
644 assert commit is not None
645 snap = read_snapshot(root, commit.snapshot_id)
646 assert snap is not None
647 # executor.py must be the FIXED version (no --pid=private), not the base.
648 assert snap.manifest.get("executor.py") == exec_theirs, (
649 "REGRESSION: executor.py fix was silently dropped — "
650 "the theirs-only change was lost in the merge"
651 )
652 else:
653 # Conflict is acceptable (user can resolve), silent data loss is not.
654 assert data["status"] == "conflict"
655
656 def test_D3_identical_object_hash_on_both_sides_no_file_conflict(
657 self, tmp_path: pathlib.Path
658 ) -> None:
659 """D3: both sides converge to the EXACT same object hash — file-level conflict impossible.
660
661 When ours and theirs both arrive at the same content hash for a file,
662 diff_snapshots sees them as identical (no change relative to each other).
663 The merge engine must treat this as a clean convergence — or at minimum,
664 the resulting manifest must contain that file at the shared hash.
665
666 This tests the file-level merge_engine layer (diff_snapshots / apply_merge).
667 Symbol-level conflict detection (within the file) is separate and handled
668 by the plugin — if the plugin marks it as conflicting despite identical
669 hashes, that is a plugin-level decision, not a data-loss scenario.
670 """
671 from muse.core.merge_engine import diff_snapshots, detect_conflicts, apply_merge
672
673 fixed_hash = _h("pool_pre_ping_fix_content")
674 base_hash = _h("original_pool_content")
675
676 base_manifest = {"database.py": base_hash, "other.py": _h("other")}
677 ours_manifest = {"database.py": fixed_hash, "other.py": _h("other")}
678 theirs_manifest = {"database.py": fixed_hash, "other.py": _h("other")}
679
680 ours_changed = diff_snapshots(base_manifest, ours_manifest)
681 theirs_changed = diff_snapshots(base_manifest, theirs_manifest)
682 conflicts = detect_conflicts(ours_changed, theirs_changed, ours_manifest, theirs_manifest)
683 merged = apply_merge(base_manifest, ours_manifest, theirs_manifest,
684 ours_changed, theirs_changed, conflicts)
685
686 # Both sides converged to the SAME hash — detect_conflicts must not flag it.
687 assert "database.py" not in conflicts, (
688 "D3 VIOLATED: convergent same-hash change wrongly reported as conflict"
689 )
690 # apply_merge must include database.py at the agreed fixed hash.
691 assert merged.get("database.py") == fixed_hash, (
692 "D3 VIOLATED: database.py absent or at wrong hash after convergent merge"
693 )
694
695 def test_D4_the_musehub_regression_scenario(self, tmp_path: pathlib.Path) -> None:
696 """D4: exact topology from the MuseHub incident — 3 branches, complex DAG.
697
698 Timeline:
699 base → ours (dev): pool_pre_ping DB fix
700 base → theirs (fix-branch): pool_pre_ping fix + --pid fix + AGENTS.md rewrite + new_feature.py
701
702 When user merges fix-branch into dev:
703 - database.py: both changed (same content, should be clean OR conflict)
704 - executor.py: theirs-only change → MUST survive in merged
705 - agents.md: theirs-only change → MUST survive in merged
706 - new_feature.py: theirs-only addition → MUST survive in merged
707 """
708 root, repo_id = _init_code_repo(tmp_path)
709
710 # Base state
711 db0 = _write_py(root, "database.py", "def pool(): pass\n")
712 exec0 = _write_py(root, "executor.py", "args = ['--pid=private']\n")
713 agents0 = _write_py(root, "agents.md", "# Short docs\n")
714 base_c = _make_commit(root, repo_id, branch="main", message="base",
715 manifest={"database.py": db0, "executor.py": exec0,
716 "agents.md": agents0})
717 (ref_path(root, "fix-branch")).write_text(base_c)
718
719 # ours (dev): pool_pre_ping only
720 db_ours = _write_py(root, "database.py", "def pool(): return 'pool_pre_ping=True'\n")
721 _make_commit(root, repo_id, branch="main", message="ours: pool_pre_ping",
722 manifest={"database.py": db_ours, "executor.py": exec0,
723 "agents.md": agents0})
724
725 # theirs (fix-branch): pool_pre_ping + pid fix + AGENTS.md rewrite + new file
726 db_theirs = _write_py(root, "database.py", "def pool(): return 'pool_pre_ping=True'\n")
727 exec_theirs = _write_py(root, "executor.py", "args = [] # no --pid\n")
728 agents_theirs = _write_py(root, "agents.md", "# Comprehensive 700-line rewrite\n" * 10)
729 new_feat = _write_py(root, "new_feature.py", "NEW = True\n")
730 _make_commit(root, repo_id, branch="fix-branch",
731 message="theirs: comprehensive fix mpack",
732 manifest={"database.py": db_theirs, "executor.py": exec_theirs,
733 "agents.md": agents_theirs, "new_feature.py": new_feat})
734
735 result = runner.invoke(cli, ["merge", "--force", "--json", "fix-branch"],
736 env=_env(root))
737 data = json.loads(result.output)
738
739 if data["status"] == "merged":
740 from muse.core.commits import read_commit
741 from muse.core.snapshots import read_snapshot
742 commit = read_commit(root, _ref(root, "main"))
743 assert commit is not None
744 snap = read_snapshot(root, commit.snapshot_id)
745 assert snap is not None
746 m = snap.manifest
747
748 assert m.get("executor.py") == exec_theirs, (
749 "REGRESSION: executor.py (--pid fix) was silently dropped"
750 )
751 assert m.get("agents.md") == agents_theirs, (
752 "REGRESSION: agents.md rewrite was silently dropped"
753 )
754 assert "new_feature.py" in m, (
755 "REGRESSION: new_feature.py addition was silently dropped"
756 )
757 else:
758 # A conflict is an acceptable outcome.
759 # But check that it's not some other failure mode.
760 assert data["status"] == "conflict", f"unexpected status: {data['status']}"
761
762
763 # ===========================================================================
764 # E — Theirs-only files MUST survive even when there are conflicts elsewhere
765 # ===========================================================================
766
767
768 class TestTheirsOnlySurvivesConflictE:
769 """
770 When there IS a genuine conflict in file X, the merge stops. But the
771 *would-be* merged manifest (what the engine computed before stopping) must
772 still contain all theirs-only changes. The engine must not take a shortcut
773 and return ours manifest verbatim just because a conflict exists.
774
775 These tests use the JSON output's "files_changed" or check MERGE_STATE.json
776 to infer what the engine planned to write.
777
778 After a conflict, the user resolves and re-commits — but if the engine's
779 intermediate merged manifest is wrong, the resolution will silently bake
780 in the data loss.
781 """
782
783 def test_E1_theirs_additions_included_in_merged_manifest_despite_conflict(
784 self, tmp_path: pathlib.Path
785 ) -> None:
786 """E1: conflict in a.py; theirs adds b.py and c.py — both must be in merged manifest."""
787 root, repo_id = _init_code_repo(tmp_path)
788 a0 = _write_py(root, "a.py", textwrap.dedent("""\
789 def go():
790 return 1
791 """))
792 base_c = _make_commit(root, repo_id, branch="main", message="base",
793 manifest={"a.py": a0})
794 (ref_path(root, "feat")).write_text(base_c)
795
796 a_ours = _write_py(root, "a.py", textwrap.dedent("""\
797 def go():
798 return 'ours'
799 """))
800 _make_commit(root, repo_id, branch="main", message="ours: change a.py",
801 manifest={"a.py": a_ours})
802
803 a_theirs = _write_py(root, "a.py", textwrap.dedent("""\
804 def go():
805 return 'theirs'
806 """))
807 b_theirs = _write_py(root, "b.py", "B = True\n")
808 c_theirs = _write_py(root, "c.py", "C = True\n")
809 _make_commit(root, repo_id, branch="feat", message="theirs: change a + add b + add c",
810 manifest={"a.py": a_theirs, "b.py": b_theirs, "c.py": c_theirs})
811
812 result = runner.invoke(cli, ["merge", "--force", "--json", "feat"],
813 env=_env(root))
814 data = json.loads(result.output)
815
816 # Two acceptable outcomes:
817 # 1. Clean merge (auto-resolved) — b.py and c.py must be in main manifest
818 # 2. Conflict in a.py — MERGE_STATE must be written; we trust the engine
819 # will include b.py and c.py in the conflict-resolution manifest.
820 if data["status"] == "merged":
821 m = _snapshot_manifest(root, "main")
822 assert "b.py" in m, "theirs-only b.py was lost despite clean merge of other files"
823 assert "c.py" in m, "theirs-only c.py was lost despite clean merge of other files"
824 else:
825 assert data["status"] == "conflict"
826 # The engine computed conflicts — but must NOT have silently dropped b.py/c.py
827 # from the intermediate manifest it would apply after resolution.
828 # We verify this by inspecting what would have been applied: check that
829 # the conflict paths DON'T include b.py or c.py (they're theirs-only, not conflicts).
830 assert "b.py" not in data.get("conflicts", []), "b.py incorrectly marked as conflict"
831 assert "c.py" not in data.get("conflicts", []), "c.py incorrectly marked as conflict"
832
833 def test_E2_ten_theirs_only_files_all_excluded_from_conflict_list(
834 self, tmp_path: pathlib.Path
835 ) -> None:
836 """E2: 10 theirs-only additions must never appear in the conflict list."""
837 root, repo_id = _init_code_repo(tmp_path)
838 f0 = _write_py(root, "main.py", textwrap.dedent("""\
839 def run():
840 pass
841 """))
842 base_c = _make_commit(root, repo_id, branch="main", message="base",
843 manifest={"main.py": f0})
844 (ref_path(root, "feat")).write_text(base_c)
845
846 f_ours = _write_py(root, "main.py", textwrap.dedent("""\
847 def run():
848 return 'ours'
849 """))
850 _make_commit(root, repo_id, branch="main", message="ours: modify run",
851 manifest={"main.py": f_ours})
852
853 f_theirs = _write_py(root, "main.py", textwrap.dedent("""\
854 def run():
855 return 'theirs'
856 """))
857 theirs_manifest: Manifest = {"main.py": f_theirs}
858 for i in range(10):
859 oid = _write_py(root, f"extra_{i}.py", f"EXTRA_{i} = True\n")
860 theirs_manifest[f"extra_{i}.py"] = oid
861 _make_commit(root, repo_id, branch="feat", message="theirs: conflict + 10 extras",
862 manifest=theirs_manifest)
863
864 result = runner.invoke(cli, ["merge", "--force", "--json", "feat"],
865 env=_env(root))
866 data = json.loads(result.output)
867 conflicts = data.get("conflicts", [])
868 for i in range(10):
869 assert f"extra_{i}.py" not in conflicts, (
870 f"extra_{i}.py is a theirs-only addition — must not appear in conflicts"
871 )
872
873
874 # ===========================================================================
875 # F — Strategy shortcuts correctness
876 # ===========================================================================
877
878
879 class TestStrategyShortcutsF:
880 """
881 --strategy=ours and --strategy=theirs are convenience shortcuts.
882 The correct behaviour: non-conflicting theirs/ours changes are STILL
883 applied; only the conflicting files take the chosen side.
884
885 The old bug: --strategy=ours took ENTIRE ours manifest, discarding all
886 theirs-only changes. This caused data loss just as severe as the OT bug.
887 """
888
889 def test_F1_strategy_ours_preserves_theirs_only_files(self, tmp_path: pathlib.Path) -> None:
890 """F1: --strategy=ours for conflict in a.py; theirs-only b.py must still appear."""
891 root, repo_id = _init_code_repo(tmp_path)
892 a0 = _write_py(root, "a.py", textwrap.dedent("""\
893 def go():
894 return 1
895 """))
896 base_c = _make_commit(root, repo_id, branch="main", message="base",
897 manifest={"a.py": a0})
898 (ref_path(root, "feat")).write_text(base_c)
899
900 a_ours = _write_py(root, "a.py", textwrap.dedent("""\
901 def go():
902 return 'ours'
903 """))
904 _make_commit(root, repo_id, branch="main", message="ours", manifest={"a.py": a_ours})
905
906 a_theirs = _write_py(root, "a.py", textwrap.dedent("""\
907 def go():
908 return 'theirs'
909 """))
910 b_theirs = _write_py(root, "b.py", "B = True\n")
911 _make_commit(root, repo_id, branch="feat", message="theirs",
912 manifest={"a.py": a_theirs, "b.py": b_theirs})
913
914 code, out = _run(root, "merge", "--strategy", "ours", "feat")
915 assert code == 0, out
916
917 m = _snapshot_manifest(root, "main")
918 # a.py must be ours version.
919 assert m.get("a.py") == a_ours, "--strategy=ours must keep ours version of conflicting file"
920 # b.py is theirs-only — it must be present.
921 assert "b.py" in m, (
922 "REGRESSION: --strategy=ours discarded theirs-only b.py. "
923 "Non-conflicting theirs changes must still be applied."
924 )
925
926 def test_F2_strategy_theirs_preserves_ours_only_files(self, tmp_path: pathlib.Path) -> None:
927 """F2: --strategy=theirs for conflict in a.py; ours-only c.py must still appear."""
928 root, repo_id = _init_code_repo(tmp_path)
929 a0 = _write_py(root, "a.py", textwrap.dedent("""\
930 def go():
931 return 1
932 """))
933 base_c = _make_commit(root, repo_id, branch="main", message="base",
934 manifest={"a.py": a0})
935 (ref_path(root, "feat")).write_text(base_c)
936
937 a_ours = _write_py(root, "a.py", textwrap.dedent("""\
938 def go():
939 return 'ours'
940 """))
941 c_ours = _write_py(root, "c.py", "C = True\n")
942 _make_commit(root, repo_id, branch="main", message="ours",
943 manifest={"a.py": a_ours, "c.py": c_ours})
944
945 a_theirs = _write_py(root, "a.py", textwrap.dedent("""\
946 def go():
947 return 'theirs'
948 """))
949 _make_commit(root, repo_id, branch="feat", message="theirs",
950 manifest={"a.py": a_theirs})
951
952 code, out = _run(root, "merge", "--strategy", "theirs", "feat")
953 assert code == 0, out
954
955 m = _snapshot_manifest(root, "main")
956 # a.py must be theirs.
957 assert m.get("a.py") == a_theirs, "--strategy=theirs must keep theirs version"
958 # c.py is ours-only — must be in merged.
959 assert "c.py" in m, (
960 "REGRESSION: --strategy=theirs discarded ours-only c.py. "
961 "Non-conflicting ours changes must still be applied."
962 )
963
964 def test_F3_strategy_ours_with_zero_ours_changes_is_up_to_date(
965 self, tmp_path: pathlib.Path
966 ) -> None:
967 """F3: --strategy=ours when ours == base → theirs changes should all be applied."""
968 root, repo_id = _init_code_repo(tmp_path)
969 f0 = _write_py(root, "f.py", "x = 0\n")
970 base_c = _make_commit(root, repo_id, branch="main", message="base",
971 manifest={"f.py": f0})
972 (ref_path(root, "feat")).write_text(base_c)
973
974 g_id = _write_py(root, "g.py", "g = True\n")
975 _make_commit(root, repo_id, branch="feat", message="theirs: add g.py",
976 manifest={"f.py": f0, "g.py": g_id})
977
978 # No ours changes since base.
979 code, out = _run(root, "merge", "--strategy", "ours", "feat")
980 assert code == 0, out
981 m = _snapshot_manifest(root, "main")
982 # g.py is theirs-only — must be present.
983 assert "g.py" in m, "theirs-only addition lost with --strategy=ours when ours has no changes"
984
985
986 # ===========================================================================
987 # G — Full MuseHub regression scenario: pool_pre_ping + executor + AGENTS.md
988 # ===========================================================================
989
990
991 class TestMuseHubRegressionScenarioG:
992 """
993 Reproduces the exact topology that led to every CI run failing with
994 'docker: --pid: invalid PID mode' for days.
995
996 This test is the "aha! that's it!" test the user asked for.
997 It must FAIL on the old Muse code (before commit 73427a30) and
998 PASS on the fixed code.
999 """
1000
1001 def test_G1_musehub_incident_executor_fix_not_lost(self, tmp_path: pathlib.Path) -> None:
1002 """G1: the MuseHub incident in miniature — never again.
1003
1004 Topology:
1005 C0 (base): database.py v1, executor.py v1 (broken), agents.md v1
1006 C1 (dev): pool_pre_ping fix on database.py ← ours
1007 C2 (fix-pool): pool_pre_ping fix on database.py ← theirs (same fix)
1008 + --pid=private removed from executor.py ← theirs only
1009 + agents.md comprehensive rewrite ← theirs only
1010
1011 Expected after merge:
1012 executor.py MUST be the fixed version (no --pid=private)
1013 agents.md MUST be the comprehensive rewrite
1014 database.py MUST be the pool_pre_ping version (either side, same content)
1015 """
1016 root, repo_id = _init_code_repo(tmp_path)
1017
1018 db_v1 = _write_py(root, "database.py",
1019 "def init_db(): return engine\n")
1020 exec_v1 = _write_py(root, "executor.py",
1021 "DOCKER_ARGS = ['--memory=1g', '--pid=private']\n")
1022 agents_v1 = _write_py(root, "agents.md",
1023 "# MuseHub Agent Contract\nDo stuff.\n")
1024
1025 c0 = _make_commit(root, repo_id, branch="main", message="C0: base",
1026 manifest={"database.py": db_v1, "executor.py": exec_v1,
1027 "agents.md": agents_v1})
1028 (ref_path(root, "fix-pool")).write_text(c0)
1029
1030 # C1 — ours (dev): pool_pre_ping fix, nothing else
1031 db_v2 = _write_py(root, "database.py",
1032 "def init_db(): return engine.execution_options(pool_pre_ping=True)\n")
1033 c1 = _make_commit(root, repo_id, branch="main", message="C1: pool_pre_ping",
1034 manifest={"database.py": db_v2, "executor.py": exec_v1,
1035 "agents.md": agents_v1})
1036
1037 # C2 — theirs (fix-pool): same pool_pre_ping + executor fix + agents rewrite
1038 db_v2b = _write_py(root, "database.py",
1039 "def init_db(): return engine.execution_options(pool_pre_ping=True)\n")
1040 exec_v2 = _write_py(root, "executor.py",
1041 "DOCKER_ARGS = ['--memory=1g'] # --pid=private removed\n")
1042 agents_v2 = _write_py(root, "agents.md",
1043 "# Comprehensive 700-line rewrite\n" * 20)
1044 c2 = _make_commit(root, repo_id, branch="fix-pool",
1045 message="C2: pool_pre_ping + executor fix + agents rewrite",
1046 manifest={"database.py": db_v2b, "executor.py": exec_v2,
1047 "agents.md": agents_v2})
1048
1049 result = runner.invoke(cli, ["merge", "--force", "--json", "fix-pool"],
1050 env=_env(root))
1051 data = json.loads(result.output)
1052
1053 from muse.core.commits import read_commit
1054 from muse.core.snapshots import read_snapshot
1055
1056 if data["status"] == "merged":
1057 commit = read_commit(root, _ref(root, "main"))
1058 assert commit is not None
1059 snap = read_snapshot(root, commit.snapshot_id)
1060 assert snap is not None
1061 m = snap.manifest
1062
1063 assert m.get("executor.py") == exec_v2, (
1064 "\n\nREGRESSION DETECTED — test_G1_musehub_incident_executor_fix_not_lost\n"
1065 "executor.py still has '--pid=private' after merge.\n"
1066 "The silent-drop bug in CodePlugin.merge_ops has returned.\n"
1067 "See commit 73427a30 for the fix that must be applied.\n"
1068 )
1069 assert m.get("agents.md") == agents_v2, (
1070 "\n\nREGRESSION DETECTED — agents.md rewrite was silently dropped.\n"
1071 )
1072 # database.py must be the pool_pre_ping version (same content on both sides).
1073 assert m.get("database.py") in (db_v2, db_v2b), (
1074 "database.py pool_pre_ping fix was lost"
1075 )
1076 elif data["status"] == "conflict":
1077 # Conflict is acceptable. Verify executor.py and agents.md are NOT in the conflict list.
1078 conflicts = data.get("conflicts", [])
1079 assert "executor.py" not in conflicts, (
1080 "executor.py is theirs-only — must not appear in conflicts, only in merged manifest"
1081 )
1082 assert "agents.md" not in conflicts, (
1083 "agents.md is theirs-only — must not appear in conflicts"
1084 )
1085 else:
1086 pytest.fail(f"Unexpected merge status: {data['status']}\n{data}")
1087
1088 def test_G2_merge_commit_has_two_parents(self, tmp_path: pathlib.Path) -> None:
1089 """G2: a successful three-way merge always creates a commit with 2 parent IDs."""
1090 root, repo_id = _init_code_repo(tmp_path)
1091 a0 = _write_py(root, "a.py", "x = 0\n")
1092 base_c = _make_commit(root, repo_id, branch="main", message="base",
1093 manifest={"a.py": a0})
1094 (ref_path(root, "feat")).write_text(base_c)
1095
1096 a1 = _write_py(root, "a.py", "x = 1\n")
1097 _make_commit(root, repo_id, branch="main", message="ours", manifest={"a.py": a1})
1098
1099 b1 = _write_py(root, "b.py", "b = 1\n")
1100 _make_commit(root, repo_id, branch="feat", message="theirs",
1101 manifest={"a.py": a0, "b.py": b1})
1102
1103 code, out = _run(root, "merge", "feat")
1104 assert code == 0, out
1105
1106 from muse.core.commits import read_commit
1107 commit = read_commit(root, _ref(root, "main"))
1108 assert commit is not None
1109 # A three-way merge commit must record both parents.
1110 assert commit.parent2_commit_id is not None, (
1111 "three-way merge commit missing second parent — "
1112 "merge history will appear linear in `muse log`"
1113 )
1114
1115 def test_G3_merged_snapshot_is_not_ours_snapshot_verbatim(
1116 self, tmp_path: pathlib.Path
1117 ) -> None:
1118 """G3: the snapshot recorded by the merge commit must differ from ours snapshot.
1119
1120 When the merged snapshot equals ours verbatim, theirs changes were silently dropped.
1121 """
1122 root, repo_id = _init_code_repo(tmp_path)
1123 a0 = _write_py(root, "a.py", "x = 0\n")
1124 base_c = _make_commit(root, repo_id, branch="main", message="base",
1125 manifest={"a.py": a0})
1126 (ref_path(root, "feat")).write_text(base_c)
1127
1128 a1 = _write_py(root, "a.py", "x = 1\n")
1129 ours_c = _make_commit(root, repo_id, branch="main", message="ours",
1130 manifest={"a.py": a1})
1131
1132 b1 = _write_py(root, "b.py", "b = True\n")
1133 _make_commit(root, repo_id, branch="feat", message="theirs: add b.py",
1134 manifest={"a.py": a0, "b.py": b1})
1135
1136 # Get ours snapshot_id BEFORE the merge.
1137 from muse.core.commits import read_commit
1138 ours_commit = read_commit(root, ours_c)
1139 assert ours_commit is not None
1140 ours_snap_id = ours_commit.snapshot_id
1141
1142 code, out = _run(root, "merge", "feat")
1143 assert code == 0, out
1144
1145 merge_commit = read_commit(root, _ref(root, "main"))
1146 assert merge_commit is not None
1147 assert merge_commit.snapshot_id != ours_snap_id, (
1148 "REGRESSION: merged snapshot equals ours snapshot verbatim. "
1149 "Theirs changes (b.py addition) were silently discarded."
1150 )
1151
1152
1153 # ===========================================================================
1154 # H — Merge-base correctness for complex DAG topologies
1155 # ===========================================================================
1156
1157
1158 class TestMergeBaseCorrectnessH:
1159 """
1160 find_merge_base must handle complex DAG shapes correctly.
1161 An incorrect LCA leads to wrong merge-base manifests, which cause
1162 phantom conflicts (changes treated as conflicting when they aren't)
1163 or missed conflicts (changes treated as clean when they conflict).
1164 """
1165
1166 def test_H1_diamond_topology_correct_lca(self, tmp_path: pathlib.Path) -> None:
1167 """H1: diamond DAG — LCA is the bottom of the diamond, not an earlier commit.
1168
1169 C0
1170 /\\
1171 C1 C2
1172 \\ /
1173 C3 (merge of C1 and C2)
1174
1175 Merging C3 into C1 (or C2) should detect C0 as the LCA, not something else.
1176 """
1177 from muse.core.merge_engine import find_merge_base
1178 root, repo_id = _init_code_repo(tmp_path)
1179
1180 f0 = _write_py(root, "f.py", "v = 0\n")
1181 c0 = _make_commit(root, repo_id, branch="main", message="C0",
1182 manifest={"f.py": f0})
1183 (ref_path(root, "branch-a")).write_text(c0)
1184 (ref_path(root, "branch-b")).write_text(c0)
1185
1186 f1 = _write_py(root, "f.py", "v = 1\n")
1187 c1 = _make_commit(root, repo_id, branch="branch-a", message="C1",
1188 manifest={"f.py": f1})
1189
1190 f2 = _write_py(root, "f.py", "v = 2\n")
1191 c2 = _make_commit(root, repo_id, branch="branch-b", message="C2",
1192 manifest={"f.py": f2})
1193
1194 # C3: a merge commit combining C1 and C2 — just use C1's snapshot for simplicity.
1195 c3 = _make_commit(root, repo_id, branch="main", message="C3: merge",
1196 manifest={"f.py": f1},
1197 parent_commit_id=c1, parent2_commit_id=c2)
1198
1199 lca = find_merge_base(root, c1, c3)
1200 assert lca == c1, (
1201 f"LCA(C1, C3) should be C1 (C3 is a descendant of C1), got {lca}"
1202 )
1203
1204 lca2 = find_merge_base(root, c0, c3)
1205 assert lca2 == c0, (
1206 f"LCA(C0, C3) should be C0 (common ancestor of C0-C3 chain), got {lca2}"
1207 )
1208
1209 def test_H2_long_linear_chain_lca(self, tmp_path: pathlib.Path) -> None:
1210 """H2: 20-commit linear chain — LCA of first and last commit is the first commit."""
1211 from muse.core.merge_engine import find_merge_base
1212 root, repo_id = _init_code_repo(tmp_path)
1213
1214 f0 = _write_py(root, "f.py", "v = 0\n")
1215 first_c = _make_commit(root, repo_id, branch="main", message="C0",
1216 manifest={"f.py": f0})
1217 (ref_path(root, "branch")).write_text(first_c)
1218
1219 last_c = first_c
1220 for i in range(1, 21):
1221 fi = _write_py(root, "f.py", f"v = {i}\n")
1222 last_c = _make_commit(root, repo_id, branch="main", message=f"C{i}",
1223 manifest={"f.py": fi})
1224
1225 lca = find_merge_base(root, first_c, last_c)
1226 assert lca == first_c, "LCA of linear chain tip and base should be the base"
1227
1228 def test_H3_lca_of_equal_commits_is_that_commit(self, tmp_path: pathlib.Path) -> None:
1229 """H3: LCA(X, X) == X."""
1230 from muse.core.merge_engine import find_merge_base
1231 root, repo_id = _init_code_repo(tmp_path)
1232 f0 = _write_py(root, "f.py", "v = 0\n")
1233 c0 = _make_commit(root, repo_id, branch="main", message="C0",
1234 manifest={"f.py": f0})
1235 lca = find_merge_base(root, c0, c0)
1236 assert lca == c0
1237
1238 def test_H4_merge_base_with_remote_tracking_branch_topology(
1239 self, tmp_path: pathlib.Path
1240 ) -> None:
1241 """H4: simulates the exact topology of the MuseHub incident.
1242
1243 local/dev (727dad83) branched from 5e6c6476.
1244 remote/dev (e01007b4) is a merge of [5e6c6476, d40f74ba].
1245 d40f74ba includes 727dad83 in its ancestry.
1246
1247 LCA(local/dev, remote/dev) should be 5e6c6476 (NOT 727dad83),
1248 because 5e6c6476 is the common ancestor that appears first in the BFS
1249 of remote/dev's parents.
1250
1251 With this LCA, the three-way merge MUST detect that:
1252 - executor.py is a theirs-only change (theirs changed it from base, ours did not)
1253 - executor.py must appear in the merged manifest.
1254 """
1255 from muse.core.merge_engine import find_merge_base
1256 root, repo_id = _init_code_repo(tmp_path)
1257
1258 # 5e6c6476 equivalent: the proposal-list-revamp merge
1259 f_base = _write_py(root, "f.py", "v = 0\n")
1260 c_5e6c = _make_commit(root, repo_id, branch="main", message="5e6c: proposal-list-revamp",
1261 manifest={"f.py": f_base})
1262
1263 # 727dad83 equivalent: pool_pre_ping fix on top of 5e6c6476
1264 f_pp = _write_py(root, "database.py", "pool_pre_ping = True\n")
1265 c_727d = _make_commit(root, repo_id, branch="main", message="727d: pool_pre_ping",
1266 manifest={"f.py": f_base, "database.py": f_pp})
1267
1268 # d40f74ba equivalent: fix-branch HEAD (includes 727dad83 ancestor)
1269 f_ex = _write_py(root, "executor.py", "args = [] # fixed\n")
1270 (ref_path(root, "fix-branch")).write_text(c_727d)
1271 c_d40f = _make_commit(root, repo_id, branch="fix-branch",
1272 message="d40f: executor fix",
1273 manifest={"f.py": f_base, "database.py": f_pp,
1274 "executor.py": f_ex})
1275
1276 # e01007b4 equivalent: MuseHub merge of fix-branch into dev
1277 # parents: [5e6c6476, d40f74ba]
1278 (ref_path(root, "remote-dev")).write_text(c_5e6c)
1279 c_e010 = _make_commit(root, repo_id, branch="remote-dev",
1280 message="e010: Merge fix-branch into dev",
1281 manifest={"f.py": f_base, "database.py": f_pp,
1282 "executor.py": f_ex},
1283 parent_commit_id=c_5e6c,
1284 parent2_commit_id=c_d40f)
1285
1286 # The merge base of local dev (727dad83) and remote dev (e01007b4).
1287 lca = find_merge_base(root, c_727d, c_e010)
1288 assert lca == c_5e6c, (
1289 f"LCA(727dad83, e01007b4) should be 5e6c6476, got {lca}. "
1290 "With the wrong LCA, the three-way merge computes wrong change-sets "
1291 "and silently drops theirs-only files."
1292 )
1293
1294
1295 # ===========================================================================
1296 # I — False-conflict regression: theirs-only additions when ours==base
1297 # ===========================================================================
1298 # Real incident: muse/core/patch_record.py was added on dev. When dev was
1299 # merged into main, main's HEAD was a previous merge commit whose snapshot
1300 # was IDENTICAL to the merge base snapshot (the prior merge had introduced
1301 # no net manifest changes). The engine falsely reported patch_record.py as
1302 # a conflict and apply_manifest deleted it from disk.
1303 #
1304 # Root invariant: if base_manifest[p] is absent AND ours_manifest[p] is absent
1305 # AND theirs_manifest[p] is present → this is a PURE THEIRS ADDITION. It must
1306 # NEVER appear in conflict_paths. It MUST appear on disk after the merge stops.
1307 # ===========================================================================
1308
1309
1310 class TestFalseConflictTheirsOnlyI:
1311 """I: theirs-only additions must never be false-conflicted or deleted."""
1312
1313 def test_I1_theirs_only_addition_not_in_conflict_list(
1314 self, tmp_path: pathlib.Path
1315 ) -> None:
1316 """I1: when ours-snapshot == base-snapshot, theirs-only new files are clean."""
1317 root, repo_id = _init_code_repo(tmp_path)
1318
1319 # Base commit: a.py only
1320 a_oid = _write_py(root, "a.py", "A = 1\n")
1321 base_c = _make_commit(root, repo_id, branch="main", message="base: a.py",
1322 manifest={"a.py": a_oid})
1323
1324 # Main: a no-op merge commit (snapshot identical to base — mirrors real incident
1325 # where main's last commit was a merge that produced no manifest changes).
1326 noop_c = _make_commit(root, repo_id, branch="main", message="Merge dev into main (noop)",
1327 manifest={"a.py": a_oid},
1328 parent_commit_id=base_c,
1329 parent2_commit_id=base_c)
1330
1331 # Dev: adds patch_record.py — theirs-only addition
1332 pr_oid = _write_py(root, "patch_record.py",
1333 "\"\"\"Patch record.\"\"\"\n\nclass PatchRecord:\n pass\n")
1334 (ref_path(root, "dev")).write_text(base_c)
1335 dev_c = _make_commit(root, repo_id, branch="dev", message="feat: add patch_record",
1336 manifest={"a.py": a_oid, "patch_record.py": pr_oid},
1337 parent_commit_id=base_c)
1338
1339 result = runner.invoke(cli, ["merge", "--force", "--json", "dev"],
1340 env=_env(root))
1341 assert result.exit_code == 0, f"merge failed:\n{result.output}"
1342 data = json.loads(result.output)
1343 assert "patch_record.py" not in data.get("conflicts", []), (
1344 "patch_record.py is a pure theirs-only addition — must not appear in conflicts"
1345 )
1346
1347 def test_I2_theirs_only_addition_lands_on_disk(
1348 self, tmp_path: pathlib.Path
1349 ) -> None:
1350 """I2: theirs-only file must exist on disk after merge (not deleted by apply_manifest)."""
1351 root, repo_id = _init_code_repo(tmp_path)
1352
1353 a_oid = _write_py(root, "a.py", "A = 1\n")
1354 base_c = _make_commit(root, repo_id, branch="main", message="base: a.py",
1355 manifest={"a.py": a_oid})
1356
1357 # Write a.py to disk so the workdir guard doesn't fire
1358 (root / "a.py").write_bytes(b"A = 1\n")
1359
1360 noop_c = _make_commit(root, repo_id, branch="main", message="Merge dev into main (noop)",
1361 manifest={"a.py": a_oid},
1362 parent_commit_id=base_c,
1363 parent2_commit_id=base_c)
1364
1365 pr_content = b"\"\"\"Patch record.\"\"\"\n\nclass PatchRecord:\n pass\n"
1366 pr_oid = _write_object(root, pr_content)
1367 (ref_path(root, "dev")).write_text(base_c)
1368 dev_c = _make_commit(root, repo_id, branch="dev", message="feat: add patch_record",
1369 manifest={"a.py": a_oid, "patch_record.py": pr_oid},
1370 parent_commit_id=base_c)
1371
1372 result = runner.invoke(cli, ["merge", "--force", "--json", "dev"],
1373 env=_env(root))
1374 assert result.exit_code == 0, f"merge failed:\n{result.output}"
1375 # On a clean merge, patch_record.py must be written to disk
1376 data = json.loads(result.output)
1377 if data["status"] == "merged":
1378 assert (root / "patch_record.py").exists(), (
1379 "patch_record.py must exist on disk after clean merge — "
1380 "apply_manifest must not delete it"
1381 )
1382
1383 def test_I3_merge_succeeds_cleanly_when_ours_equals_base_snapshot(
1384 self, tmp_path: pathlib.Path
1385 ) -> None:
1386 """I3: merge status must be 'merged' or 'fast_forward', never 'conflict'
1387 when ours snapshot equals base snapshot and theirs only adds files."""
1388 root, repo_id = _init_code_repo(tmp_path)
1389
1390 a_oid = _write_py(root, "a.py", "A = 1\n")
1391 base_c = _make_commit(root, repo_id, branch="main", message="base",
1392 manifest={"a.py": a_oid})
1393
1394 # ours == base snapshot exactly
1395 noop_c = _make_commit(root, repo_id, branch="main", message="noop merge",
1396 manifest={"a.py": a_oid},
1397 parent_commit_id=base_c,
1398 parent2_commit_id=base_c)
1399
1400 new_oid = _write_py(root, "new_module.py", "X = True\n")
1401 (ref_path(root, "dev")).write_text(base_c)
1402 _make_commit(root, repo_id, branch="dev", message="add new_module.py",
1403 manifest={"a.py": a_oid, "new_module.py": new_oid},
1404 parent_commit_id=base_c)
1405
1406 result = runner.invoke(cli, ["merge", "--force", "--json", "dev"],
1407 env=_env(root))
1408 assert result.exit_code == 0
1409 data = json.loads(result.output)
1410 assert data["status"] in ("merged", "fast_forward"), (
1411 f"expected clean merge, got status={data['status']!r}; "
1412 f"conflicts={data.get('conflicts')}"
1413 )
1414
1415 def test_I4_multiple_theirs_only_files_no_conflict_when_ours_equals_base(
1416 self, tmp_path: pathlib.Path
1417 ) -> None:
1418 """I4: multiple theirs-only additions, none must appear in conflict list."""
1419 root, repo_id = _init_code_repo(tmp_path)
1420
1421 a_oid = _write_py(root, "a.py", "A = 1\n")
1422 base_c = _make_commit(root, repo_id, branch="main", message="base",
1423 manifest={"a.py": a_oid})
1424 noop_c = _make_commit(root, repo_id, branch="main", message="noop",
1425 manifest={"a.py": a_oid},
1426 parent_commit_id=base_c,
1427 parent2_commit_id=base_c)
1428
1429 theirs_manifest: Manifest = {"a.py": a_oid}
1430 new_files = ["patch_record.py", "apply_patch.py", "format_patch.py",
1431 "patch_utils.py", "patch_schema.py"]
1432 for fname in new_files:
1433 oid = _write_py(root, fname, f"# {fname}\n")
1434 theirs_manifest[fname] = oid
1435
1436 (ref_path(root, "dev")).write_text(base_c)
1437 _make_commit(root, repo_id, branch="dev", message="add patch files",
1438 manifest=theirs_manifest, parent_commit_id=base_c)
1439
1440 result = runner.invoke(cli, ["merge", "--force", "--json", "dev"],
1441 env=_env(root))
1442 assert result.exit_code == 0
1443 data = json.loads(result.output)
1444 conflicts = data.get("conflicts", [])
1445 for fname in new_files:
1446 assert fname not in conflicts, (
1447 f"{fname} is a pure theirs-only addition — must not appear in conflicts. "
1448 f"Full conflict list: {conflicts}"
1449 )
1450
1451 def test_I5_partial_merged_manifest_must_include_theirs_only_files_on_conflict(
1452 self, tmp_path: pathlib.Path
1453 ) -> None:
1454 """I5: when there IS a genuine conflict elsewhere, theirs-only additions
1455 must still be in the working tree (apply_manifest must not delete them)."""
1456 root, repo_id = _init_code_repo(tmp_path)
1457
1458 a_oid = _write_py(root, "a.py", "def go(): return 'base'\n")
1459 b_oid = _write_py(root, "b.py", "B = True\n")
1460 base_c = _make_commit(root, repo_id, branch="main", message="base",
1461 manifest={"a.py": a_oid, "b.py": b_oid})
1462
1463 # ours: modifies a.py (causing conflict), same snapshot as base otherwise
1464 a_ours = _write_py(root, "a.py", "def go(): return 'ours'\n")
1465 noop_c = _make_commit(root, repo_id, branch="main", message="ours: change a.py",
1466 manifest={"a.py": a_ours, "b.py": b_oid},
1467 parent_commit_id=base_c)
1468
1469 # Write working tree for ours
1470 (root / "a.py").write_bytes(b"def go(): return 'ours'\n")
1471 (root / "b.py").write_bytes(b"B = True\n")
1472
1473 # theirs: modifies a.py differently + adds new_module.py
1474 a_theirs = _write_py(root, "a.py", "def go(): return 'theirs'\n")
1475 new_oid = _write_object(root, b"NEW = True\n")
1476 (ref_path(root, "dev")).write_text(base_c)
1477 _make_commit(root, repo_id, branch="dev", message="theirs: change a + add new",
1478 manifest={"a.py": a_theirs, "b.py": b_oid, "new_module.py": new_oid},
1479 parent_commit_id=base_c)
1480
1481 result = runner.invoke(cli, ["merge", "--force", "--json", "dev"],
1482 env=_env(root))
1483 data = json.loads(result.output)
1484
1485 # There should be a conflict on a.py, but new_module.py must NOT be in conflicts.
1486 assert "new_module.py" not in data.get("conflicts", []), (
1487 "new_module.py is theirs-only — must not appear in conflicts even when "
1488 "there is a genuine conflict in a.py"
1489 )
1490 if data["status"] == "conflict":
1491 # The partial_merged manifest (applied to disk) must contain new_module.py.
1492 # Verify it's on disk — if apply_manifest deleted it, that's the bug.
1493 assert (root / "new_module.py").exists(), (
1494 "new_module.py must be on disk after conflict-stop. "
1495 "apply_manifest must include theirs-only files in partial_merged, "
1496 "not delete them because they're absent from ours_manifest."
1497 )
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago