"""Regression stress tests for the three-way merge engine — all permutations. Root cause (fixed in commit 73427a30): CodePlugin.merge_ops silently dropped theirs-only changes when OT symbol commutation masked a file-level conflict. The merged blob was the ours blob verbatim, so the result reported "clean merge" with no-op file content. Real-world impact: MuseHub's executor.py ``--pid=private`` fix (removed in fix/pool-pre-ping) was silently discarded when the user merged local/dev (which had a commuting pool_pre_ping change to database.py). Every subsequent CI run failed with "docker: --pid: invalid PID mode" until the regression was manually tracked down through the object store. This file tests every permutation of merge topology that could lead to silent data loss — not just the one that burned us. Categories ---------- A Fast-forward / up-to-date detection (no data-loss risk, but correctness) B Three-way clean merges — no conflicts anywhere C Three-way with conflicts surfaced — the merge MUST stop, not silently pass D The silent-drop regression — commuting OT ops on same file E Theirs-only files MUST survive when there are conflicts elsewhere F Strategy shortcuts (--strategy=ours / --strategy=theirs) correctness G MuseHub regression scenario (pool_pre_ping + executor + AGENTS.md) H Merge-base correctness for complex DAG topologies I False-conflict regression — theirs-only additions falsely reported as conflicts and deleted from disk when ours-snapshot == base-snapshot. Root cause: muse/core/patch_record.py was deleted from disk during a dev→main merge where main's previous merge had left its snapshot identical to the merge base. The engine must NEVER report a theirs-only addition as a conflict, and must NEVER delete it from the working tree. """ from __future__ import annotations import datetime import json import pathlib import textwrap import pytest from tests.cli_test_helper import CliRunner from muse.core.types import Manifest, blob_id, fake_id from muse.core.paths import merge_state_path, muse_dir, ref_path runner = CliRunner() cli = None # CliRunner ignores this positional arg # --------------------------------------------------------------------------- # Low-level repo helpers # --------------------------------------------------------------------------- def _h(label: str) -> str: """Stable fake content hash for a text label (sha256: prefixed).""" return fake_id(label) def _env(root: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(root)} def _run(root: pathlib.Path, *args: str) -> tuple[int, str]: """Run a muse command, injecting --force into merge calls. Tests use an in-memory manifest-only setup (no files on disk) so the working-tree cleanliness guard would always fire. ``--force`` bypasses that guard without affecting any merge-logic correctness being tested. """ final_args = list(args) if final_args and final_args[0] == "merge" and "--force" not in final_args: final_args.insert(1, "--force") result = runner.invoke(cli, final_args, env=_env(root), catch_exceptions=False) return result.exit_code, result.output def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]: """Like _run but does not raise on failure.""" final_args = list(args) if final_args and final_args[0] == "merge" and "--force" not in final_args: final_args.insert(1, "--force") result = runner.invoke(cli, final_args, env=_env(root)) return result.exit_code, result.output def _write_object(root: pathlib.Path, content: bytes) -> str: """Write content to object store and return sha256:-prefixed object ID.""" from muse.core.object_store import write_object as _store_write oid = blob_id(content) _store_write(root, oid, content) return oid def _init_code_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]: """Initialise a bare code-domain repo and return (root, repo_id).""" from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.ids import hash_snapshot as compute_snapshot_id, hash_commit as compute_commit_id dot_muse = muse_dir(tmp_path) dot_muse.mkdir() repo_id = fake_id("repo") (dot_muse / "repo.json").write_text(json.dumps({ "repo_id": repo_id, "domain": "code", "default_branch": "main", "created_at": "2025-01-01T00:00:00+00:00", }), encoding="utf-8") (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (dot_muse / "refs" / "heads").mkdir(parents=True) (dot_muse / "snapshots").mkdir() (dot_muse / "commits").mkdir() (dot_muse / "objects").mkdir() return tmp_path, repo_id def _make_commit( root: pathlib.Path, repo_id: str, branch: str = "main", message: str = "test", manifest: Manifest | None = None, parent_commit_id: str | None = None, parent2_commit_id: str | None = None, ) -> str: """Write a snapshot + commit and advance the branch ref.""" from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.ids import hash_snapshot as compute_snapshot_id, hash_commit as compute_commit_id ref_file = ref_path(root, branch) if parent_commit_id is None and ref_file.exists(): parent_commit_id = ref_file.read_text().strip() or None m = manifest or {} snap_id = compute_snapshot_id(m) committed_at = datetime.datetime.now(datetime.timezone.utc) parent_ids: list[str] = [] if parent_commit_id: parent_ids.append(parent_commit_id) if parent2_commit_id: parent_ids.append(parent2_commit_id) commit_id = compute_commit_id( parent_ids=parent_ids, snapshot_id=snap_id, message=message, committed_at_iso=committed_at.isoformat(), ) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=m)) write_commit(root, CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message=message, committed_at=committed_at, parent_commit_id=parent_commit_id, parent2_commit_id=parent2_commit_id, )) ref_file.parent.mkdir(parents=True, exist_ok=True) ref_file.write_text(commit_id, encoding="utf-8") return commit_id def _write_py(root: pathlib.Path, filename: str, content: str) -> str: """Write Python content into the object store ONLY; return object_id. We deliberately do NOT write the file to the working tree so that ``require_clean_workdir`` never aborts the merge due to uncommitted changes. The code plugin reads file bytes from the object store via ``read_object(root, obj_id)``, so on-disk presence is not required. """ return _write_object(root, content.encode()) def _ref(root: pathlib.Path, branch: str) -> str: return (ref_path(root, branch)).read_text(encoding="utf-8").strip() def _snapshot_manifest(root: pathlib.Path, branch: str) -> Manifest: """Return the manifest for a branch's current HEAD snapshot.""" from muse.core.commits import read_commit from muse.core.snapshots import read_snapshot commit_id = _ref(root, branch) commit = read_commit(root, commit_id) assert commit is not None snap = read_snapshot(root, commit.snapshot_id) assert snap is not None return snap.manifest # =========================================================================== # A — Fast-forward / up-to-date # =========================================================================== class TestMergeTopologyA: """Ensure merge base detection is correct and no data is corrupted.""" def test_A1_fast_forward_updates_head_and_files(self, tmp_path: pathlib.Path) -> None: """A1: ours is ancestor of theirs → fast-forward, working tree = theirs.""" root, repo_id = _init_code_repo(tmp_path) a_id = _write_py(root, "app.py", "x = 1\n") _make_commit(root, repo_id, branch="main", message="base", manifest={"app.py": a_id}) base_commit = _ref(root, "main") # Create feature branch from same base. (ref_path(root, "feat")).write_text(base_commit) b_id = _write_py(root, "app.py", "x = 2\n") _make_commit(root, repo_id, branch="feat", message="feat commit", manifest={"app.py": b_id}) code, out = _run(root, "merge", "feat") assert code == 0, out # main HEAD must now equal feat HEAD. assert _ref(root, "main") == _ref(root, "feat") # Manifest must equal feat's snapshot. assert _snapshot_manifest(root, "main") == {"app.py": b_id} def test_A2_already_up_to_date_prints_message(self, tmp_path: pathlib.Path) -> None: """A2: theirs is ancestor of ours → 'Already up to date.'""" root, repo_id = _init_code_repo(tmp_path) a_id = _write_py(root, "f.py", "a = 1\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"f.py": a_id}) (ref_path(root, "old")).write_text(base_c) b_id = _write_py(root, "f.py", "a = 2\n") _make_commit(root, repo_id, branch="main", message="advance", manifest={"f.py": b_id}) code, out = _run(root, "merge", "old") assert code == 0, out assert "up to date" in out.lower() # main must not have moved back. assert _snapshot_manifest(root, "main") == {"f.py": b_id} def test_A3_fast_forward_json_reports_fast_forward_status(self, tmp_path: pathlib.Path) -> None: """A3: JSON output for fast-forward has status='fast_forward'.""" root, repo_id = _init_code_repo(tmp_path) a_id = _write_py(root, "f.py", "a = 1\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"f.py": a_id}) (ref_path(root, "feat")).write_text(base_c) b_id = _write_py(root, "f.py", "a = 2\n") _make_commit(root, repo_id, branch="feat", message="feat", manifest={"f.py": b_id}) code, out = _run(root, "merge", "--json", "feat") assert code == 0, out data = json.loads(out) assert data["status"] == "fast_forward" assert data["conflicts"] == [] def test_A4_fast_forward_preserves_all_theirs_files(self, tmp_path: pathlib.Path) -> None: """A4: fast-forward with 50 files — all must appear in main's manifest.""" root, repo_id = _init_code_repo(tmp_path) a_id = _write_py(root, "base.py", "base = True\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"base.py": a_id}) (ref_path(root, "feat")).write_text(base_c) manifest: Manifest = {"base.py": a_id} for i in range(50): oid = _write_py(root, f"module_{i:02d}.py", f"x_{i} = {i}\n") manifest[f"module_{i:02d}.py"] = oid _make_commit(root, repo_id, branch="feat", message="many files", manifest=manifest) code, _ = _run(root, "merge", "feat") assert code == 0 merged = _snapshot_manifest(root, "main") for i in range(50): assert f"module_{i:02d}.py" in merged, f"module_{i:02d}.py missing after fast-forward" def test_A5_no_ff_creates_merge_commit(self, tmp_path: pathlib.Path) -> None: """A5: --no-ff skips fast-forward and always creates a merge commit.""" root, repo_id = _init_code_repo(tmp_path) a_id = _write_py(root, "f.py", "a = 1\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"f.py": a_id}) (ref_path(root, "feat")).write_text(base_c) b_id = _write_py(root, "f.py", "a = 2\n") feat_c = _make_commit(root, repo_id, branch="feat", message="feat", manifest={"f.py": b_id}) from muse.core.commits import read_commit pre_main = _ref(root, "main") code, out = _run(root, "merge", "--no-ff", "feat") assert code == 0, out post_main = _ref(root, "main") # HEAD must have advanced (new merge commit created). assert post_main != pre_main # The new commit must have TWO parents. commit = read_commit(root, post_main) assert commit is not None assert commit.parent2_commit_id is not None, "no-ff must create merge commit with 2 parents" # =========================================================================== # B — Three-way clean merges (no conflicts anywhere) # =========================================================================== class TestThreeWayCleanMergeB: """Theirs-only and ours-only changes all survive; merged snapshot is correct.""" def test_B1_disjoint_file_changes_both_survive(self, tmp_path: pathlib.Path) -> None: """B1: ours changes a.py, theirs changes b.py — both must be in merged.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", "a = 0\n") b0 = _write_py(root, "b.py", "b = 0\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0, "b.py": b0}) (ref_path(root, "feat")).write_text(base_c) # ours: modify a.py a1 = _write_py(root, "a.py", "a = 1\n") _make_commit(root, repo_id, branch="main", message="ours: change a", manifest={"a.py": a1, "b.py": b0}) # theirs: modify b.py b1 = _write_py(root, "b.py", "b = 1\n") _make_commit(root, repo_id, branch="feat", message="theirs: change b", manifest={"a.py": a0, "b.py": b1}) code, out = _run(root, "merge", "feat") assert code == 0, out m = _snapshot_manifest(root, "main") assert m.get("a.py") == a1, "ours change to a.py lost after clean merge" assert m.get("b.py") == b1, "theirs change to b.py lost after clean merge" def test_B2_theirs_adds_new_file(self, tmp_path: pathlib.Path) -> None: """B2: theirs adds new.py that ours never touched — must be in merged.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", "a = 0\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0}) (ref_path(root, "feat")).write_text(base_c) new_id = _write_py(root, "new.py", "new = True\n") _make_commit(root, repo_id, branch="feat", message="add new.py", manifest={"a.py": a0, "new.py": new_id}) code, out = _run(root, "merge", "feat") assert code == 0, out assert "new.py" in _snapshot_manifest(root, "main"), "theirs new file lost" def test_B3_theirs_deletes_file_ours_never_touched(self, tmp_path: pathlib.Path) -> None: """B3: theirs deletes stale.py — must be absent in merged.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", "a = 0\n") stale0 = _write_py(root, "stale.py", "dead = True\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0, "stale.py": stale0}) (ref_path(root, "feat")).write_text(base_c) a1 = _write_py(root, "a.py", "a = 1\n") _make_commit(root, repo_id, branch="main", message="ours: tweak a", manifest={"a.py": a1, "stale.py": stale0}) _make_commit(root, repo_id, branch="feat", message="theirs: rm stale.py", manifest={"a.py": a0}) code, out = _run(root, "merge", "feat") assert code == 0, out m = _snapshot_manifest(root, "main") assert "stale.py" not in m, "theirs deletion of stale.py was not applied" assert m.get("a.py") == a1, "ours change to a.py lost" def test_B4_many_theirs_only_additions_all_survive(self, tmp_path: pathlib.Path) -> None: """B4: theirs adds 30 files, ours changes 1 file — all 30 must be in merged.""" root, repo_id = _init_code_repo(tmp_path) base_id = _write_py(root, "main.py", "x = 0\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"main.py": base_id}) (ref_path(root, "feat")).write_text(base_c) # ours: bump main.py bumped = _write_py(root, "main.py", "x = 1\n") _make_commit(root, repo_id, branch="main", message="ours: bump", manifest={"main.py": bumped}) # theirs: 30 new modules theirs_manifest = {"main.py": base_id} for i in range(30): oid = _write_py(root, f"mod_{i}.py", f"MOD_{i} = True\n") theirs_manifest[f"mod_{i}.py"] = oid _make_commit(root, repo_id, branch="feat", message="theirs: add 30 mods", manifest=theirs_manifest) code, out = _run(root, "merge", "feat") assert code == 0, out m = _snapshot_manifest(root, "main") for i in range(30): assert f"mod_{i}.py" in m, f"mod_{i}.py missing after clean three-way merge" # =========================================================================== # C — Three-way with conflicts that MUST be surfaced # =========================================================================== class TestThreeWayConflictSurfacedC: """Conflicts must be reported; the merge must NOT silently produce wrong content.""" def test_C1_genuine_conflict_exits_nonzero(self, tmp_path: pathlib.Path) -> None: """C1: both sides change the same symbol in the same file → exit nonzero.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "service.py", textwrap.dedent("""\ def charge(): return 'v1' """)) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"service.py": a0}) (ref_path(root, "feat")).write_text(base_c) a_ours = _write_py(root, "service.py", textwrap.dedent("""\ def charge(): return 'ours-v2' """)) _make_commit(root, repo_id, branch="main", message="ours: change charge", manifest={"service.py": a_ours}) a_theirs = _write_py(root, "service.py", textwrap.dedent("""\ def charge(): return 'theirs-v2' """)) _make_commit(root, repo_id, branch="feat", message="theirs: change charge", manifest={"service.py": a_theirs}) code, out = _run_unchecked(root, "merge", "feat") assert code != 0, "conflict must exit nonzero, not silently succeed" def test_C2_conflict_creates_merge_state_json(self, tmp_path: pathlib.Path) -> None: """C2: conflict writes MERGE_STATE.json with the right fields.""" root, repo_id = _init_code_repo(tmp_path) f0 = _write_py(root, "f.py", textwrap.dedent("""\ def foo(): return 1 """)) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"f.py": f0}) (ref_path(root, "feat")).write_text(base_c) f_ours = _write_py(root, "f.py", textwrap.dedent("""\ def foo(): return 2 """)) _make_commit(root, repo_id, branch="main", message="ours", manifest={"f.py": f_ours}) f_theirs = _write_py(root, "f.py", textwrap.dedent("""\ def foo(): return 99 """)) _make_commit(root, repo_id, branch="feat", message="theirs", manifest={"f.py": f_theirs}) _run_unchecked(root, "merge", "feat") state_path = merge_state_path(root) assert state_path.exists(), "MERGE_STATE.json must be written on conflict" state = json.loads(state_path.read_text()) assert "ours_commit" in state assert "theirs_commit" in state assert "conflict_paths" in state def test_C3_conflict_json_format_lists_paths(self, tmp_path: pathlib.Path) -> None: """C3: --format json reports conflict with non-empty conflicts list.""" root, repo_id = _init_code_repo(tmp_path) f0 = _write_py(root, "svc.py", textwrap.dedent("""\ def go(): pass """)) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"svc.py": f0}) (ref_path(root, "feat")).write_text(base_c) f1 = _write_py(root, "svc.py", textwrap.dedent("""\ def go(): return 'ours' """)) _make_commit(root, repo_id, branch="main", message="ours", manifest={"svc.py": f1}) f2 = _write_py(root, "svc.py", textwrap.dedent("""\ def go(): return 'theirs' """)) _make_commit(root, repo_id, branch="feat", message="theirs", manifest={"svc.py": f2}) result = runner.invoke(cli, ["merge", "--force", "--json", "feat"], env=_env(root)) data = json.loads(result.output) assert data["status"] == "conflict" assert len(data["conflicts"]) > 0 # =========================================================================== # D — The silent-drop regression (commuting OT ops on same file) # =========================================================================== class TestSilentDropRegressionD: """ The exact bug that burned us: two branches modify DIFFERENT symbols in the same file. OT sees them as commuting (non-conflicting at symbol level), but cannot reconstruct the merged blob. Before the fix this silently produced the ours blob and dropped all theirs changes in that file. After the fix, this must either surface a conflict or correctly auto-merge. In either case: theirs-only CHANGES to OTHER FILES must always survive. """ def test_D1_commuting_symbol_changes_do_not_silently_succeed( self, tmp_path: pathlib.Path ) -> None: """D1: ours changes func_a, theirs changes func_b — must conflict or merge, never silently lose theirs.""" root, repo_id = _init_code_repo(tmp_path) base_code = textwrap.dedent("""\ def func_a(): return 'a-v1' def func_b(): return 'b-v1' """) f0 = _write_py(root, "lib.py", base_code) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"lib.py": f0}) (ref_path(root, "feat")).write_text(base_c) ours_code = textwrap.dedent("""\ def func_a(): return 'a-v2' def func_b(): return 'b-v1' """) f_ours = _write_py(root, "lib.py", ours_code) _make_commit(root, repo_id, branch="main", message="ours: change func_a", manifest={"lib.py": f_ours}) theirs_code = textwrap.dedent("""\ def func_a(): return 'a-v1' def func_b(): return 'b-v2' """) f_theirs = _write_py(root, "lib.py", theirs_code) _make_commit(root, repo_id, branch="feat", message="theirs: change func_b", manifest={"lib.py": f_theirs}) result = runner.invoke(cli, ["merge", "--force", "--json", "feat"], env=_env(root)) data = json.loads(result.output) if data["status"] == "merged": # If auto-merged: func_b MUST be 'b-v2', never silently kept as 'b-v1'. m = _snapshot_manifest(root, "main") from muse.core.snapshots import read_snapshot snap = None from muse.core.commits import read_commit commit = read_commit(root, _ref(root, "main")) assert commit is not None from muse.core.snapshots import read_snapshot snap = read_snapshot(root, commit.snapshot_id) assert snap is not None # We can't read the actual merged file content from the manifest # without the working tree, but we CAN assert lib.py is present. assert "lib.py" in snap.manifest else: # If conflict: that is correct — better a conflict than silent data loss. assert data["status"] == "conflict" assert len(data["conflicts"]) > 0 def test_D2_theirs_only_file_survives_commuting_conflict( self, tmp_path: pathlib.Path ) -> None: """D2: regression core — theirs-only executor.py must survive even when lib.py conflicts.""" root, repo_id = _init_code_repo(tmp_path) base_db = textwrap.dedent("""\ def pool(): pass """) base_exec = textwrap.dedent("""\ def run(): args = ['--pid=private'] return args """) db0 = _write_py(root, "database.py", base_db) exec0 = _write_py(root, "executor.py", base_exec) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"database.py": db0, "executor.py": exec0}) (ref_path(root, "fix-branch")).write_text(base_c) # ours (dev): fix pool_pre_ping in database.py, don't touch executor.py ours_db = textwrap.dedent("""\ def pool(): return 'pool_pre_ping=True' """) db_ours = _write_py(root, "database.py", ours_db) _make_commit(root, repo_id, branch="main", message="ours: pool_pre_ping fix", manifest={"database.py": db_ours, "executor.py": exec0}) # theirs (fix-branch): fix pool_pre_ping the same way AND fix executor.py theirs_db = textwrap.dedent("""\ def pool(): return 'pool_pre_ping=True' """) theirs_exec = textwrap.dedent("""\ def run(): args = [] # --pid=private removed (invalid Docker flag) return args """) db_theirs = _write_py(root, "database.py", theirs_db) exec_theirs = _write_py(root, "executor.py", theirs_exec) _make_commit(root, repo_id, branch="fix-branch", message="theirs: pool_pre_ping + remove --pid=private", manifest={"database.py": db_theirs, "executor.py": exec_theirs}) result = runner.invoke(cli, ["merge", "--force", "--json", "fix-branch"], env=_env(root)) data = json.loads(result.output) # The critical assertion: in any outcome, executor.py must NOT be the old version. # Either the merge succeeded and executor.py has the fix, OR a conflict is raised # so the user can resolve it. What is NEVER acceptable: silent success with old content. if data["status"] == "merged": from muse.core.commits import read_commit from muse.core.snapshots import read_snapshot commit = read_commit(root, _ref(root, "main")) assert commit is not None snap = read_snapshot(root, commit.snapshot_id) assert snap is not None # executor.py must be the FIXED version (no --pid=private), not the base. assert snap.manifest.get("executor.py") == exec_theirs, ( "REGRESSION: executor.py fix was silently dropped — " "the theirs-only change was lost in the merge" ) else: # Conflict is acceptable (user can resolve), silent data loss is not. assert data["status"] == "conflict" def test_D3_identical_object_hash_on_both_sides_no_file_conflict( self, tmp_path: pathlib.Path ) -> None: """D3: both sides converge to the EXACT same object hash — file-level conflict impossible. When ours and theirs both arrive at the same content hash for a file, diff_snapshots sees them as identical (no change relative to each other). The merge engine must treat this as a clean convergence — or at minimum, the resulting manifest must contain that file at the shared hash. This tests the file-level merge_engine layer (diff_snapshots / apply_merge). Symbol-level conflict detection (within the file) is separate and handled by the plugin — if the plugin marks it as conflicting despite identical hashes, that is a plugin-level decision, not a data-loss scenario. """ from muse.core.merge_engine import diff_snapshots, detect_conflicts, apply_merge fixed_hash = _h("pool_pre_ping_fix_content") base_hash = _h("original_pool_content") base_manifest = {"database.py": base_hash, "other.py": _h("other")} ours_manifest = {"database.py": fixed_hash, "other.py": _h("other")} theirs_manifest = {"database.py": fixed_hash, "other.py": _h("other")} ours_changed = diff_snapshots(base_manifest, ours_manifest) theirs_changed = diff_snapshots(base_manifest, theirs_manifest) conflicts = detect_conflicts(ours_changed, theirs_changed, ours_manifest, theirs_manifest) merged = apply_merge(base_manifest, ours_manifest, theirs_manifest, ours_changed, theirs_changed, conflicts) # Both sides converged to the SAME hash — detect_conflicts must not flag it. assert "database.py" not in conflicts, ( "D3 VIOLATED: convergent same-hash change wrongly reported as conflict" ) # apply_merge must include database.py at the agreed fixed hash. assert merged.get("database.py") == fixed_hash, ( "D3 VIOLATED: database.py absent or at wrong hash after convergent merge" ) def test_D4_the_musehub_regression_scenario(self, tmp_path: pathlib.Path) -> None: """D4: exact topology from the MuseHub incident — 3 branches, complex DAG. Timeline: base → ours (dev): pool_pre_ping DB fix base → theirs (fix-branch): pool_pre_ping fix + --pid fix + AGENTS.md rewrite + new_feature.py When user merges fix-branch into dev: - database.py: both changed (same content, should be clean OR conflict) - executor.py: theirs-only change → MUST survive in merged - agents.md: theirs-only change → MUST survive in merged - new_feature.py: theirs-only addition → MUST survive in merged """ root, repo_id = _init_code_repo(tmp_path) # Base state db0 = _write_py(root, "database.py", "def pool(): pass\n") exec0 = _write_py(root, "executor.py", "args = ['--pid=private']\n") agents0 = _write_py(root, "agents.md", "# Short docs\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"database.py": db0, "executor.py": exec0, "agents.md": agents0}) (ref_path(root, "fix-branch")).write_text(base_c) # ours (dev): pool_pre_ping only db_ours = _write_py(root, "database.py", "def pool(): return 'pool_pre_ping=True'\n") _make_commit(root, repo_id, branch="main", message="ours: pool_pre_ping", manifest={"database.py": db_ours, "executor.py": exec0, "agents.md": agents0}) # theirs (fix-branch): pool_pre_ping + pid fix + AGENTS.md rewrite + new file db_theirs = _write_py(root, "database.py", "def pool(): return 'pool_pre_ping=True'\n") exec_theirs = _write_py(root, "executor.py", "args = [] # no --pid\n") agents_theirs = _write_py(root, "agents.md", "# Comprehensive 700-line rewrite\n" * 10) new_feat = _write_py(root, "new_feature.py", "NEW = True\n") _make_commit(root, repo_id, branch="fix-branch", message="theirs: comprehensive fix mpack", manifest={"database.py": db_theirs, "executor.py": exec_theirs, "agents.md": agents_theirs, "new_feature.py": new_feat}) result = runner.invoke(cli, ["merge", "--force", "--json", "fix-branch"], env=_env(root)) data = json.loads(result.output) if data["status"] == "merged": from muse.core.commits import read_commit from muse.core.snapshots import read_snapshot commit = read_commit(root, _ref(root, "main")) assert commit is not None snap = read_snapshot(root, commit.snapshot_id) assert snap is not None m = snap.manifest assert m.get("executor.py") == exec_theirs, ( "REGRESSION: executor.py (--pid fix) was silently dropped" ) assert m.get("agents.md") == agents_theirs, ( "REGRESSION: agents.md rewrite was silently dropped" ) assert "new_feature.py" in m, ( "REGRESSION: new_feature.py addition was silently dropped" ) else: # A conflict is an acceptable outcome. # But check that it's not some other failure mode. assert data["status"] == "conflict", f"unexpected status: {data['status']}" # =========================================================================== # E — Theirs-only files MUST survive even when there are conflicts elsewhere # =========================================================================== class TestTheirsOnlySurvivesConflictE: """ When there IS a genuine conflict in file X, the merge stops. But the *would-be* merged manifest (what the engine computed before stopping) must still contain all theirs-only changes. The engine must not take a shortcut and return ours manifest verbatim just because a conflict exists. These tests use the JSON output's "files_changed" or check MERGE_STATE.json to infer what the engine planned to write. After a conflict, the user resolves and re-commits — but if the engine's intermediate merged manifest is wrong, the resolution will silently bake in the data loss. """ def test_E1_theirs_additions_included_in_merged_manifest_despite_conflict( self, tmp_path: pathlib.Path ) -> None: """E1: conflict in a.py; theirs adds b.py and c.py — both must be in merged manifest.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 1 """)) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0}) (ref_path(root, "feat")).write_text(base_c) a_ours = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 'ours' """)) _make_commit(root, repo_id, branch="main", message="ours: change a.py", manifest={"a.py": a_ours}) a_theirs = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 'theirs' """)) b_theirs = _write_py(root, "b.py", "B = True\n") c_theirs = _write_py(root, "c.py", "C = True\n") _make_commit(root, repo_id, branch="feat", message="theirs: change a + add b + add c", manifest={"a.py": a_theirs, "b.py": b_theirs, "c.py": c_theirs}) result = runner.invoke(cli, ["merge", "--force", "--json", "feat"], env=_env(root)) data = json.loads(result.output) # Two acceptable outcomes: # 1. Clean merge (auto-resolved) — b.py and c.py must be in main manifest # 2. Conflict in a.py — MERGE_STATE must be written; we trust the engine # will include b.py and c.py in the conflict-resolution manifest. if data["status"] == "merged": m = _snapshot_manifest(root, "main") assert "b.py" in m, "theirs-only b.py was lost despite clean merge of other files" assert "c.py" in m, "theirs-only c.py was lost despite clean merge of other files" else: assert data["status"] == "conflict" # The engine computed conflicts — but must NOT have silently dropped b.py/c.py # from the intermediate manifest it would apply after resolution. # We verify this by inspecting what would have been applied: check that # the conflict paths DON'T include b.py or c.py (they're theirs-only, not conflicts). assert "b.py" not in data.get("conflicts", []), "b.py incorrectly marked as conflict" assert "c.py" not in data.get("conflicts", []), "c.py incorrectly marked as conflict" def test_E2_ten_theirs_only_files_all_excluded_from_conflict_list( self, tmp_path: pathlib.Path ) -> None: """E2: 10 theirs-only additions must never appear in the conflict list.""" root, repo_id = _init_code_repo(tmp_path) f0 = _write_py(root, "main.py", textwrap.dedent("""\ def run(): pass """)) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"main.py": f0}) (ref_path(root, "feat")).write_text(base_c) f_ours = _write_py(root, "main.py", textwrap.dedent("""\ def run(): return 'ours' """)) _make_commit(root, repo_id, branch="main", message="ours: modify run", manifest={"main.py": f_ours}) f_theirs = _write_py(root, "main.py", textwrap.dedent("""\ def run(): return 'theirs' """)) theirs_manifest: Manifest = {"main.py": f_theirs} for i in range(10): oid = _write_py(root, f"extra_{i}.py", f"EXTRA_{i} = True\n") theirs_manifest[f"extra_{i}.py"] = oid _make_commit(root, repo_id, branch="feat", message="theirs: conflict + 10 extras", manifest=theirs_manifest) result = runner.invoke(cli, ["merge", "--force", "--json", "feat"], env=_env(root)) data = json.loads(result.output) conflicts = data.get("conflicts", []) for i in range(10): assert f"extra_{i}.py" not in conflicts, ( f"extra_{i}.py is a theirs-only addition — must not appear in conflicts" ) # =========================================================================== # F — Strategy shortcuts correctness # =========================================================================== class TestStrategyShortcutsF: """ --strategy=ours and --strategy=theirs are convenience shortcuts. The correct behaviour: non-conflicting theirs/ours changes are STILL applied; only the conflicting files take the chosen side. The old bug: --strategy=ours took ENTIRE ours manifest, discarding all theirs-only changes. This caused data loss just as severe as the OT bug. """ def test_F1_strategy_ours_preserves_theirs_only_files(self, tmp_path: pathlib.Path) -> None: """F1: --strategy=ours for conflict in a.py; theirs-only b.py must still appear.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 1 """)) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0}) (ref_path(root, "feat")).write_text(base_c) a_ours = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 'ours' """)) _make_commit(root, repo_id, branch="main", message="ours", manifest={"a.py": a_ours}) a_theirs = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 'theirs' """)) b_theirs = _write_py(root, "b.py", "B = True\n") _make_commit(root, repo_id, branch="feat", message="theirs", manifest={"a.py": a_theirs, "b.py": b_theirs}) code, out = _run(root, "merge", "--strategy", "ours", "feat") assert code == 0, out m = _snapshot_manifest(root, "main") # a.py must be ours version. assert m.get("a.py") == a_ours, "--strategy=ours must keep ours version of conflicting file" # b.py is theirs-only — it must be present. assert "b.py" in m, ( "REGRESSION: --strategy=ours discarded theirs-only b.py. " "Non-conflicting theirs changes must still be applied." ) def test_F2_strategy_theirs_preserves_ours_only_files(self, tmp_path: pathlib.Path) -> None: """F2: --strategy=theirs for conflict in a.py; ours-only c.py must still appear.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 1 """)) base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0}) (ref_path(root, "feat")).write_text(base_c) a_ours = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 'ours' """)) c_ours = _write_py(root, "c.py", "C = True\n") _make_commit(root, repo_id, branch="main", message="ours", manifest={"a.py": a_ours, "c.py": c_ours}) a_theirs = _write_py(root, "a.py", textwrap.dedent("""\ def go(): return 'theirs' """)) _make_commit(root, repo_id, branch="feat", message="theirs", manifest={"a.py": a_theirs}) code, out = _run(root, "merge", "--strategy", "theirs", "feat") assert code == 0, out m = _snapshot_manifest(root, "main") # a.py must be theirs. assert m.get("a.py") == a_theirs, "--strategy=theirs must keep theirs version" # c.py is ours-only — must be in merged. assert "c.py" in m, ( "REGRESSION: --strategy=theirs discarded ours-only c.py. " "Non-conflicting ours changes must still be applied." ) def test_F3_strategy_ours_with_zero_ours_changes_is_up_to_date( self, tmp_path: pathlib.Path ) -> None: """F3: --strategy=ours when ours == base → theirs changes should all be applied.""" root, repo_id = _init_code_repo(tmp_path) f0 = _write_py(root, "f.py", "x = 0\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"f.py": f0}) (ref_path(root, "feat")).write_text(base_c) g_id = _write_py(root, "g.py", "g = True\n") _make_commit(root, repo_id, branch="feat", message="theirs: add g.py", manifest={"f.py": f0, "g.py": g_id}) # No ours changes since base. code, out = _run(root, "merge", "--strategy", "ours", "feat") assert code == 0, out m = _snapshot_manifest(root, "main") # g.py is theirs-only — must be present. assert "g.py" in m, "theirs-only addition lost with --strategy=ours when ours has no changes" # =========================================================================== # G — Full MuseHub regression scenario: pool_pre_ping + executor + AGENTS.md # =========================================================================== class TestMuseHubRegressionScenarioG: """ Reproduces the exact topology that led to every CI run failing with 'docker: --pid: invalid PID mode' for days. This test is the "aha! that's it!" test the user asked for. It must FAIL on the old Muse code (before commit 73427a30) and PASS on the fixed code. """ def test_G1_musehub_incident_executor_fix_not_lost(self, tmp_path: pathlib.Path) -> None: """G1: the MuseHub incident in miniature — never again. Topology: C0 (base): database.py v1, executor.py v1 (broken), agents.md v1 C1 (dev): pool_pre_ping fix on database.py ← ours C2 (fix-pool): pool_pre_ping fix on database.py ← theirs (same fix) + --pid=private removed from executor.py ← theirs only + agents.md comprehensive rewrite ← theirs only Expected after merge: executor.py MUST be the fixed version (no --pid=private) agents.md MUST be the comprehensive rewrite database.py MUST be the pool_pre_ping version (either side, same content) """ root, repo_id = _init_code_repo(tmp_path) db_v1 = _write_py(root, "database.py", "def init_db(): return engine\n") exec_v1 = _write_py(root, "executor.py", "DOCKER_ARGS = ['--memory=1g', '--pid=private']\n") agents_v1 = _write_py(root, "agents.md", "# MuseHub Agent Contract\nDo stuff.\n") c0 = _make_commit(root, repo_id, branch="main", message="C0: base", manifest={"database.py": db_v1, "executor.py": exec_v1, "agents.md": agents_v1}) (ref_path(root, "fix-pool")).write_text(c0) # C1 — ours (dev): pool_pre_ping fix, nothing else db_v2 = _write_py(root, "database.py", "def init_db(): return engine.execution_options(pool_pre_ping=True)\n") c1 = _make_commit(root, repo_id, branch="main", message="C1: pool_pre_ping", manifest={"database.py": db_v2, "executor.py": exec_v1, "agents.md": agents_v1}) # C2 — theirs (fix-pool): same pool_pre_ping + executor fix + agents rewrite db_v2b = _write_py(root, "database.py", "def init_db(): return engine.execution_options(pool_pre_ping=True)\n") exec_v2 = _write_py(root, "executor.py", "DOCKER_ARGS = ['--memory=1g'] # --pid=private removed\n") agents_v2 = _write_py(root, "agents.md", "# Comprehensive 700-line rewrite\n" * 20) c2 = _make_commit(root, repo_id, branch="fix-pool", message="C2: pool_pre_ping + executor fix + agents rewrite", manifest={"database.py": db_v2b, "executor.py": exec_v2, "agents.md": agents_v2}) result = runner.invoke(cli, ["merge", "--force", "--json", "fix-pool"], env=_env(root)) data = json.loads(result.output) from muse.core.commits import read_commit from muse.core.snapshots import read_snapshot if data["status"] == "merged": commit = read_commit(root, _ref(root, "main")) assert commit is not None snap = read_snapshot(root, commit.snapshot_id) assert snap is not None m = snap.manifest assert m.get("executor.py") == exec_v2, ( "\n\nREGRESSION DETECTED — test_G1_musehub_incident_executor_fix_not_lost\n" "executor.py still has '--pid=private' after merge.\n" "The silent-drop bug in CodePlugin.merge_ops has returned.\n" "See commit 73427a30 for the fix that must be applied.\n" ) assert m.get("agents.md") == agents_v2, ( "\n\nREGRESSION DETECTED — agents.md rewrite was silently dropped.\n" ) # database.py must be the pool_pre_ping version (same content on both sides). assert m.get("database.py") in (db_v2, db_v2b), ( "database.py pool_pre_ping fix was lost" ) elif data["status"] == "conflict": # Conflict is acceptable. Verify executor.py and agents.md are NOT in the conflict list. conflicts = data.get("conflicts", []) assert "executor.py" not in conflicts, ( "executor.py is theirs-only — must not appear in conflicts, only in merged manifest" ) assert "agents.md" not in conflicts, ( "agents.md is theirs-only — must not appear in conflicts" ) else: pytest.fail(f"Unexpected merge status: {data['status']}\n{data}") def test_G2_merge_commit_has_two_parents(self, tmp_path: pathlib.Path) -> None: """G2: a successful three-way merge always creates a commit with 2 parent IDs.""" root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", "x = 0\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0}) (ref_path(root, "feat")).write_text(base_c) a1 = _write_py(root, "a.py", "x = 1\n") _make_commit(root, repo_id, branch="main", message="ours", manifest={"a.py": a1}) b1 = _write_py(root, "b.py", "b = 1\n") _make_commit(root, repo_id, branch="feat", message="theirs", manifest={"a.py": a0, "b.py": b1}) code, out = _run(root, "merge", "feat") assert code == 0, out from muse.core.commits import read_commit commit = read_commit(root, _ref(root, "main")) assert commit is not None # A three-way merge commit must record both parents. assert commit.parent2_commit_id is not None, ( "three-way merge commit missing second parent — " "merge history will appear linear in `muse log`" ) def test_G3_merged_snapshot_is_not_ours_snapshot_verbatim( self, tmp_path: pathlib.Path ) -> None: """G3: the snapshot recorded by the merge commit must differ from ours snapshot. When the merged snapshot equals ours verbatim, theirs changes were silently dropped. """ root, repo_id = _init_code_repo(tmp_path) a0 = _write_py(root, "a.py", "x = 0\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a0}) (ref_path(root, "feat")).write_text(base_c) a1 = _write_py(root, "a.py", "x = 1\n") ours_c = _make_commit(root, repo_id, branch="main", message="ours", manifest={"a.py": a1}) b1 = _write_py(root, "b.py", "b = True\n") _make_commit(root, repo_id, branch="feat", message="theirs: add b.py", manifest={"a.py": a0, "b.py": b1}) # Get ours snapshot_id BEFORE the merge. from muse.core.commits import read_commit ours_commit = read_commit(root, ours_c) assert ours_commit is not None ours_snap_id = ours_commit.snapshot_id code, out = _run(root, "merge", "feat") assert code == 0, out merge_commit = read_commit(root, _ref(root, "main")) assert merge_commit is not None assert merge_commit.snapshot_id != ours_snap_id, ( "REGRESSION: merged snapshot equals ours snapshot verbatim. " "Theirs changes (b.py addition) were silently discarded." ) # =========================================================================== # H — Merge-base correctness for complex DAG topologies # =========================================================================== class TestMergeBaseCorrectnessH: """ find_merge_base must handle complex DAG shapes correctly. An incorrect LCA leads to wrong merge-base manifests, which cause phantom conflicts (changes treated as conflicting when they aren't) or missed conflicts (changes treated as clean when they conflict). """ def test_H1_diamond_topology_correct_lca(self, tmp_path: pathlib.Path) -> None: """H1: diamond DAG — LCA is the bottom of the diamond, not an earlier commit. C0 /\\ C1 C2 \\ / C3 (merge of C1 and C2) Merging C3 into C1 (or C2) should detect C0 as the LCA, not something else. """ from muse.core.merge_engine import find_merge_base root, repo_id = _init_code_repo(tmp_path) f0 = _write_py(root, "f.py", "v = 0\n") c0 = _make_commit(root, repo_id, branch="main", message="C0", manifest={"f.py": f0}) (ref_path(root, "branch-a")).write_text(c0) (ref_path(root, "branch-b")).write_text(c0) f1 = _write_py(root, "f.py", "v = 1\n") c1 = _make_commit(root, repo_id, branch="branch-a", message="C1", manifest={"f.py": f1}) f2 = _write_py(root, "f.py", "v = 2\n") c2 = _make_commit(root, repo_id, branch="branch-b", message="C2", manifest={"f.py": f2}) # C3: a merge commit combining C1 and C2 — just use C1's snapshot for simplicity. c3 = _make_commit(root, repo_id, branch="main", message="C3: merge", manifest={"f.py": f1}, parent_commit_id=c1, parent2_commit_id=c2) lca = find_merge_base(root, c1, c3) assert lca == c1, ( f"LCA(C1, C3) should be C1 (C3 is a descendant of C1), got {lca}" ) lca2 = find_merge_base(root, c0, c3) assert lca2 == c0, ( f"LCA(C0, C3) should be C0 (common ancestor of C0-C3 chain), got {lca2}" ) def test_H2_long_linear_chain_lca(self, tmp_path: pathlib.Path) -> None: """H2: 20-commit linear chain — LCA of first and last commit is the first commit.""" from muse.core.merge_engine import find_merge_base root, repo_id = _init_code_repo(tmp_path) f0 = _write_py(root, "f.py", "v = 0\n") first_c = _make_commit(root, repo_id, branch="main", message="C0", manifest={"f.py": f0}) (ref_path(root, "branch")).write_text(first_c) last_c = first_c for i in range(1, 21): fi = _write_py(root, "f.py", f"v = {i}\n") last_c = _make_commit(root, repo_id, branch="main", message=f"C{i}", manifest={"f.py": fi}) lca = find_merge_base(root, first_c, last_c) assert lca == first_c, "LCA of linear chain tip and base should be the base" def test_H3_lca_of_equal_commits_is_that_commit(self, tmp_path: pathlib.Path) -> None: """H3: LCA(X, X) == X.""" from muse.core.merge_engine import find_merge_base root, repo_id = _init_code_repo(tmp_path) f0 = _write_py(root, "f.py", "v = 0\n") c0 = _make_commit(root, repo_id, branch="main", message="C0", manifest={"f.py": f0}) lca = find_merge_base(root, c0, c0) assert lca == c0 def test_H4_merge_base_with_remote_tracking_branch_topology( self, tmp_path: pathlib.Path ) -> None: """H4: simulates the exact topology of the MuseHub incident. local/dev (727dad83) branched from 5e6c6476. remote/dev (e01007b4) is a merge of [5e6c6476, d40f74ba]. d40f74ba includes 727dad83 in its ancestry. LCA(local/dev, remote/dev) should be 5e6c6476 (NOT 727dad83), because 5e6c6476 is the common ancestor that appears first in the BFS of remote/dev's parents. With this LCA, the three-way merge MUST detect that: - executor.py is a theirs-only change (theirs changed it from base, ours did not) - executor.py must appear in the merged manifest. """ from muse.core.merge_engine import find_merge_base root, repo_id = _init_code_repo(tmp_path) # 5e6c6476 equivalent: the proposal-list-revamp merge f_base = _write_py(root, "f.py", "v = 0\n") c_5e6c = _make_commit(root, repo_id, branch="main", message="5e6c: proposal-list-revamp", manifest={"f.py": f_base}) # 727dad83 equivalent: pool_pre_ping fix on top of 5e6c6476 f_pp = _write_py(root, "database.py", "pool_pre_ping = True\n") c_727d = _make_commit(root, repo_id, branch="main", message="727d: pool_pre_ping", manifest={"f.py": f_base, "database.py": f_pp}) # d40f74ba equivalent: fix-branch HEAD (includes 727dad83 ancestor) f_ex = _write_py(root, "executor.py", "args = [] # fixed\n") (ref_path(root, "fix-branch")).write_text(c_727d) c_d40f = _make_commit(root, repo_id, branch="fix-branch", message="d40f: executor fix", manifest={"f.py": f_base, "database.py": f_pp, "executor.py": f_ex}) # e01007b4 equivalent: MuseHub merge of fix-branch into dev # parents: [5e6c6476, d40f74ba] (ref_path(root, "remote-dev")).write_text(c_5e6c) c_e010 = _make_commit(root, repo_id, branch="remote-dev", message="e010: Merge fix-branch into dev", manifest={"f.py": f_base, "database.py": f_pp, "executor.py": f_ex}, parent_commit_id=c_5e6c, parent2_commit_id=c_d40f) # The merge base of local dev (727dad83) and remote dev (e01007b4). lca = find_merge_base(root, c_727d, c_e010) assert lca == c_5e6c, ( f"LCA(727dad83, e01007b4) should be 5e6c6476, got {lca}. " "With the wrong LCA, the three-way merge computes wrong change-sets " "and silently drops theirs-only files." ) # =========================================================================== # I — False-conflict regression: theirs-only additions when ours==base # =========================================================================== # Real incident: muse/core/patch_record.py was added on dev. When dev was # merged into main, main's HEAD was a previous merge commit whose snapshot # was IDENTICAL to the merge base snapshot (the prior merge had introduced # no net manifest changes). The engine falsely reported patch_record.py as # a conflict and apply_manifest deleted it from disk. # # Root invariant: if base_manifest[p] is absent AND ours_manifest[p] is absent # AND theirs_manifest[p] is present → this is a PURE THEIRS ADDITION. It must # NEVER appear in conflict_paths. It MUST appear on disk after the merge stops. # =========================================================================== class TestFalseConflictTheirsOnlyI: """I: theirs-only additions must never be false-conflicted or deleted.""" def test_I1_theirs_only_addition_not_in_conflict_list( self, tmp_path: pathlib.Path ) -> None: """I1: when ours-snapshot == base-snapshot, theirs-only new files are clean.""" root, repo_id = _init_code_repo(tmp_path) # Base commit: a.py only a_oid = _write_py(root, "a.py", "A = 1\n") base_c = _make_commit(root, repo_id, branch="main", message="base: a.py", manifest={"a.py": a_oid}) # Main: a no-op merge commit (snapshot identical to base — mirrors real incident # where main's last commit was a merge that produced no manifest changes). noop_c = _make_commit(root, repo_id, branch="main", message="Merge dev into main (noop)", manifest={"a.py": a_oid}, parent_commit_id=base_c, parent2_commit_id=base_c) # Dev: adds patch_record.py — theirs-only addition pr_oid = _write_py(root, "patch_record.py", "\"\"\"Patch record.\"\"\"\n\nclass PatchRecord:\n pass\n") (ref_path(root, "dev")).write_text(base_c) dev_c = _make_commit(root, repo_id, branch="dev", message="feat: add patch_record", manifest={"a.py": a_oid, "patch_record.py": pr_oid}, parent_commit_id=base_c) result = runner.invoke(cli, ["merge", "--force", "--json", "dev"], env=_env(root)) assert result.exit_code == 0, f"merge failed:\n{result.output}" data = json.loads(result.output) assert "patch_record.py" not in data.get("conflicts", []), ( "patch_record.py is a pure theirs-only addition — must not appear in conflicts" ) def test_I2_theirs_only_addition_lands_on_disk( self, tmp_path: pathlib.Path ) -> None: """I2: theirs-only file must exist on disk after merge (not deleted by apply_manifest).""" root, repo_id = _init_code_repo(tmp_path) a_oid = _write_py(root, "a.py", "A = 1\n") base_c = _make_commit(root, repo_id, branch="main", message="base: a.py", manifest={"a.py": a_oid}) # Write a.py to disk so the workdir guard doesn't fire (root / "a.py").write_bytes(b"A = 1\n") noop_c = _make_commit(root, repo_id, branch="main", message="Merge dev into main (noop)", manifest={"a.py": a_oid}, parent_commit_id=base_c, parent2_commit_id=base_c) pr_content = b"\"\"\"Patch record.\"\"\"\n\nclass PatchRecord:\n pass\n" pr_oid = _write_object(root, pr_content) (ref_path(root, "dev")).write_text(base_c) dev_c = _make_commit(root, repo_id, branch="dev", message="feat: add patch_record", manifest={"a.py": a_oid, "patch_record.py": pr_oid}, parent_commit_id=base_c) result = runner.invoke(cli, ["merge", "--force", "--json", "dev"], env=_env(root)) assert result.exit_code == 0, f"merge failed:\n{result.output}" # On a clean merge, patch_record.py must be written to disk data = json.loads(result.output) if data["status"] == "merged": assert (root / "patch_record.py").exists(), ( "patch_record.py must exist on disk after clean merge — " "apply_manifest must not delete it" ) def test_I3_merge_succeeds_cleanly_when_ours_equals_base_snapshot( self, tmp_path: pathlib.Path ) -> None: """I3: merge status must be 'merged' or 'fast_forward', never 'conflict' when ours snapshot equals base snapshot and theirs only adds files.""" root, repo_id = _init_code_repo(tmp_path) a_oid = _write_py(root, "a.py", "A = 1\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a_oid}) # ours == base snapshot exactly noop_c = _make_commit(root, repo_id, branch="main", message="noop merge", manifest={"a.py": a_oid}, parent_commit_id=base_c, parent2_commit_id=base_c) new_oid = _write_py(root, "new_module.py", "X = True\n") (ref_path(root, "dev")).write_text(base_c) _make_commit(root, repo_id, branch="dev", message="add new_module.py", manifest={"a.py": a_oid, "new_module.py": new_oid}, parent_commit_id=base_c) result = runner.invoke(cli, ["merge", "--force", "--json", "dev"], env=_env(root)) assert result.exit_code == 0 data = json.loads(result.output) assert data["status"] in ("merged", "fast_forward"), ( f"expected clean merge, got status={data['status']!r}; " f"conflicts={data.get('conflicts')}" ) def test_I4_multiple_theirs_only_files_no_conflict_when_ours_equals_base( self, tmp_path: pathlib.Path ) -> None: """I4: multiple theirs-only additions, none must appear in conflict list.""" root, repo_id = _init_code_repo(tmp_path) a_oid = _write_py(root, "a.py", "A = 1\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a_oid}) noop_c = _make_commit(root, repo_id, branch="main", message="noop", manifest={"a.py": a_oid}, parent_commit_id=base_c, parent2_commit_id=base_c) theirs_manifest: Manifest = {"a.py": a_oid} new_files = ["patch_record.py", "apply_patch.py", "format_patch.py", "patch_utils.py", "patch_schema.py"] for fname in new_files: oid = _write_py(root, fname, f"# {fname}\n") theirs_manifest[fname] = oid (ref_path(root, "dev")).write_text(base_c) _make_commit(root, repo_id, branch="dev", message="add patch files", manifest=theirs_manifest, parent_commit_id=base_c) result = runner.invoke(cli, ["merge", "--force", "--json", "dev"], env=_env(root)) assert result.exit_code == 0 data = json.loads(result.output) conflicts = data.get("conflicts", []) for fname in new_files: assert fname not in conflicts, ( f"{fname} is a pure theirs-only addition — must not appear in conflicts. " f"Full conflict list: {conflicts}" ) def test_I5_partial_merged_manifest_must_include_theirs_only_files_on_conflict( self, tmp_path: pathlib.Path ) -> None: """I5: when there IS a genuine conflict elsewhere, theirs-only additions must still be in the working tree (apply_manifest must not delete them).""" root, repo_id = _init_code_repo(tmp_path) a_oid = _write_py(root, "a.py", "def go(): return 'base'\n") b_oid = _write_py(root, "b.py", "B = True\n") base_c = _make_commit(root, repo_id, branch="main", message="base", manifest={"a.py": a_oid, "b.py": b_oid}) # ours: modifies a.py (causing conflict), same snapshot as base otherwise a_ours = _write_py(root, "a.py", "def go(): return 'ours'\n") noop_c = _make_commit(root, repo_id, branch="main", message="ours: change a.py", manifest={"a.py": a_ours, "b.py": b_oid}, parent_commit_id=base_c) # Write working tree for ours (root / "a.py").write_bytes(b"def go(): return 'ours'\n") (root / "b.py").write_bytes(b"B = True\n") # theirs: modifies a.py differently + adds new_module.py a_theirs = _write_py(root, "a.py", "def go(): return 'theirs'\n") new_oid = _write_object(root, b"NEW = True\n") (ref_path(root, "dev")).write_text(base_c) _make_commit(root, repo_id, branch="dev", message="theirs: change a + add new", manifest={"a.py": a_theirs, "b.py": b_oid, "new_module.py": new_oid}, parent_commit_id=base_c) result = runner.invoke(cli, ["merge", "--force", "--json", "dev"], env=_env(root)) data = json.loads(result.output) # There should be a conflict on a.py, but new_module.py must NOT be in conflicts. assert "new_module.py" not in data.get("conflicts", []), ( "new_module.py is theirs-only — must not appear in conflicts even when " "there is a genuine conflict in a.py" ) if data["status"] == "conflict": # The partial_merged manifest (applied to disk) must contain new_module.py. # Verify it's on disk — if apply_manifest deleted it, that's the bug. assert (root / "new_module.py").exists(), ( "new_module.py must be on disk after conflict-stop. " "apply_manifest must include theirs-only files in partial_merged, " "not delete them because they're absent from ours_manifest." )