"""TDD tests for Phase 7 — Step 1.5 correctness + Harmony confidence gating. Issue #86 Phase 7 deliverables: Step 1.5 independence-merge guard (IC tests): IC_01 — Divergent unparseable Python (SyntaxError on both sides) → conflict surfaces correctly; no silent union-merge. IC_02 — Divergent valid Python, non-overlapping symbols → independence merge fires and produces a clean merge. IC_03 — Divergent valid Python, same symbol changed on both sides → conflict surfaces; independence merge does NOT fire. IC_04 — merge_ops Step 1.5 skipped when child_ops empty on both sides (unit test on the skip condition). Harmony confidence gating (HC tests): HC_01 — Resolution with confidence >= 0.85 (default threshold) auto-applies. HC_02 — Resolution with confidence < 0.85 (e.g. 0.8 from --ours) does NOT auto-apply; conflict stays in remaining. HC_03 — harmony.min_auto_apply_confidence config key overrides default; setting it to 0.75 causes the 0.8-confidence pattern to auto-apply. HC_04 — When no resolution meets the threshold, Harmony escalates cleanly (returns the conflict in remaining, does not error). Background ---------- Gap 1 (Step 1.5): When a .py file can't be parsed (SyntaxError), _semantic_ops returns a PatchOp with child_ops=[]. The commute-check loop is vacuously False over empty lists, so _independence_merge_blob fires and silently union-merges divergent lines — writing both versions without surfacing a conflict. This is data corruption on real divergence in unparseable files. The guard: if not our_patch["child_ops"] and not their_patch["child_ops"]: continue skips the independence path when neither side produced any symbols. Gap 2 (Harmony confidence): auto_apply calls best_resolution() and applies it unconditionally regardless of confidence. A pattern learned from muse checkout --ours (confidence=0.8) replays at the same strength as a hand-edited resolution (confidence=1.0). The fix reads harmony.min_auto_apply_confidence from config (default 0.85) and escalates patterns below the threshold. """ from __future__ import annotations import datetime import json import pathlib import tomllib import pytest from tests.cli_test_helper import CliRunner from muse.core.types import blob_id, fake_id from muse.core.object_store import write_object, read_object from muse.core.paths import heads_dir, muse_dir, ref_path runner = CliRunner() cli = None # --------------------------------------------------------------------------- # Shared helpers # --------------------------------------------------------------------------- def _env(root: pathlib.Path) -> dict: return {"MUSE_REPO_ROOT": str(root)} def _init_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]: dot_muse = muse_dir(tmp_path) dot_muse.mkdir() repo_id = fake_id("repo") (dot_muse / "repo.json").write_text(json.dumps({ "repo_id": repo_id, "domain": "code", "default_branch": "main", "created_at": "2025-01-01T00:00:00+00:00", }), encoding="utf-8") (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (dot_muse / "refs" / "heads").mkdir(parents=True) (dot_muse / "snapshots").mkdir() (dot_muse / "commits").mkdir() (dot_muse / "objects").mkdir() return tmp_path, repo_id def _write_obj(root: pathlib.Path, content: bytes) -> str: oid = blob_id(content) write_object(root, oid, content) return oid def _make_commit( root: pathlib.Path, repo_id: str, branch: str = "main", message: str = "test", manifest: dict | None = None, parent_id: str | None = None, ) -> str: from muse.core.commits import CommitRecord, write_commit from muse.core.snapshots import SnapshotRecord, write_snapshot from muse.core.ids import hash_snapshot, hash_commit ref_file = ref_path(root, branch) if parent_id is None: parent_id = ref_file.read_text().strip() if ref_file.exists() else None m = manifest or {} snap_id = hash_snapshot(m) committed_at = datetime.datetime.now(datetime.timezone.utc) commit_id = hash_commit( parent_ids=[parent_id] if parent_id else [], snapshot_id=snap_id, message=message, committed_at_iso=committed_at.isoformat(), ) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=m)) write_commit(root, CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message=message, committed_at=committed_at, parent_commit_id=parent_id, )) ref_file.parent.mkdir(parents=True, exist_ok=True) ref_file.write_text(commit_id, encoding="utf-8") return commit_id # --------------------------------------------------------------------------- # IC_01 — Divergent unparseable Python → conflict surfaces, no silent union-merge # --------------------------------------------------------------------------- def test_IC_01_divergent_unparseable_surfaces_conflict(tmp_path: pathlib.Path) -> None: """Divergent unparseable Python (SyntaxError on both sides) → conflict surfaces. Before the fix: child_ops=[] causes the commute check to be vacuously False, _independence_merge_blob fires, and silently union-merges both divergent versions into the output without surfacing a conflict. After the fix: the empty-child_ops guard skips the independence path; the file-level fallback detects divergent content and surfaces the conflict. """ root, repo_id = _init_repo(tmp_path) # Base: valid Python base_content = b"value = 1\n" base_oid = _write_obj(root, base_content) base_id = _make_commit(root, repo_id, "main", "base", {"config.py": base_oid}) # Ours: SyntaxError (unclosed bracket) + changes value line ours_content = b"(((\nvalue = 2\n" ours_oid = _write_obj(root, ours_content) _make_commit(root, repo_id, "main", "ours changes config", {"config.py": ours_oid}, parent_id=base_id) # Theirs: different SyntaxError, diverges from ours on the bracket line theirs_content = b")))\nvalue = 3\n" theirs_oid = _write_obj(root, theirs_content) (heads_dir(root) / "feat").write_text(base_id, encoding="utf-8") _make_commit(root, repo_id, "feat", "feat changes config", {"config.py": theirs_oid}, parent_id=base_id) # Write ours content to disk so the CLI sees the working tree (root / "config.py").write_bytes(ours_content) result = runner.invoke(cli, ["merge", "feat", "--json"], env=_env(root), catch_exceptions=False) data = json.loads(result.output) assert "config.py" in data.get("conflicts", []), ( f"IC_01: divergent unparseable file must surface conflict, " f"got conflicts={data.get('conflicts')}" ) # --------------------------------------------------------------------------- # IC_02 — Divergent valid Python, non-overlapping symbols → clean independence merge # --------------------------------------------------------------------------- def test_IC_02_non_overlapping_symbols_merge_cleanly(tmp_path: pathlib.Path) -> None: """Divergent valid Python with non-overlapping symbols → independence merge, clean. Branch A adds def foo(), branch B adds def bar() — both to the same file. The independence merge should produce a clean result with both functions. """ root, repo_id = _init_repo(tmp_path) base_content = b"# shared module\n" base_oid = _write_obj(root, base_content) base_id = _make_commit(root, repo_id, "main", "base", {"utils.py": base_oid}) # Ours: add def foo() ours_content = b"# shared module\n\ndef foo():\n return 1\n" ours_oid = _write_obj(root, ours_content) _make_commit(root, repo_id, "main", "add foo", {"utils.py": ours_oid}, parent_id=base_id) # Theirs: add def bar() — independent of foo theirs_content = b"# shared module\n\ndef bar():\n return 2\n" theirs_oid = _write_obj(root, theirs_content) (heads_dir(root) / "feat").write_text(base_id, encoding="utf-8") _make_commit(root, repo_id, "feat", "add bar", {"utils.py": theirs_oid}, parent_id=base_id) (root / "utils.py").write_bytes(ours_content) result = runner.invoke(cli, ["merge", "feat", "--json"], env=_env(root), catch_exceptions=False) data = json.loads(result.output) assert "utils.py" not in data.get("conflicts", []), ( f"IC_02: non-overlapping symbol additions must merge cleanly, " f"got conflicts={data.get('conflicts')}" ) assert data.get("status") in ("clean", "merged", "committed"), ( f"IC_02: expected clean merge status, got {data.get('status')}" ) # --------------------------------------------------------------------------- # IC_03 — Same symbol changed on both sides → conflict surfaces # --------------------------------------------------------------------------- def test_IC_03_same_symbol_conflict_surfaces(tmp_path: pathlib.Path) -> None: """Same symbol modified on both sides → conflict surfaces; independence merge does not fire.""" root, repo_id = _init_repo(tmp_path) base_content = b"def compute():\n return 1\n" base_oid = _write_obj(root, base_content) base_id = _make_commit(root, repo_id, "main", "base", {"engine.py": base_oid}) # Ours: modify compute() to return 2 ours_content = b"def compute():\n return 2\n" ours_oid = _write_obj(root, ours_content) _make_commit(root, repo_id, "main", "ours modifies compute", {"engine.py": ours_oid}, parent_id=base_id) # Theirs: modify the same compute() to return 3 theirs_content = b"def compute():\n return 3\n" theirs_oid = _write_obj(root, theirs_content) (heads_dir(root) / "feat").write_text(base_id, encoding="utf-8") _make_commit(root, repo_id, "feat", "feat modifies compute", {"engine.py": theirs_oid}, parent_id=base_id) (root / "engine.py").write_bytes(ours_content) result = runner.invoke(cli, ["merge", "feat", "--json"], env=_env(root), catch_exceptions=False) data = json.loads(result.output) # Conflict must surface — either at symbol level or file level conflicts = data.get("conflicts", []) assert any("engine.py" in c for c in conflicts), ( f"IC_03: same symbol divergence must surface conflict, got conflicts={conflicts}" ) # --------------------------------------------------------------------------- # IC_04 — Unit test: Step 1.5 skipped when both patches have empty child_ops # --------------------------------------------------------------------------- def test_IC_04_step15_skips_empty_child_ops(tmp_path: pathlib.Path) -> None: """merge_ops Step 1.5 must skip independence merge when child_ops empty on both sides. This is the unit-level proof of the guard: if not our_patch["child_ops"] and not their_patch["child_ops"]: continue Verifies that an unparseable file with divergent content ends up as a conflict in merge_ops output rather than in independence_resolved. """ from muse.plugins.code.plugin import CodePlugin root, _ = _init_repo(tmp_path) # Unparseable on both sides — _semantic_ops will return child_ops=[] base_content = b"value = 1\n" ours_content = b"(((\nvalue = 2\n" # SyntaxError theirs_content = b")))\nvalue = 3\n" # SyntaxError, diverges base_oid = _write_obj(root, base_content) ours_oid = _write_obj(root, ours_content) theirs_oid = _write_obj(root, theirs_content) base_snap = {"files": {"config.py": base_oid}, "domain": "code", "directories": {}} ours_snap = {"files": {"config.py": ours_oid}, "domain": "code", "directories": {}} theirs_snap = {"files": {"config.py": theirs_oid}, "domain": "code", "directories": {}} # Construct minimal PatchOps with empty child_ops — simulates what _semantic_ops # returns when a file cannot be parsed (SyntaxError → child_ops=[]). ours_ops = [{"op": "patch", "address": "config.py", "child_domain": "python", "child_ops": []}] theirs_ops = [{"op": "patch", "address": "config.py", "child_domain": "python", "child_ops": []}] plugin = CodePlugin() result = plugin.merge_ops(base_snap, ours_snap, theirs_snap, ours_ops, theirs_ops, repo_root=root) # config.py must appear as a conflict — the empty-child_ops guard prevented # silent union-merge, so the file-level fallback detected divergence. assert "config.py" in result.conflicts, ( f"IC_04: empty child_ops must cause Step 1.5 skip; " f"config.py must conflict, got conflicts={result.conflicts}" ) # --------------------------------------------------------------------------- # Harmony HC test helpers # --------------------------------------------------------------------------- _NOW = datetime.datetime(2025, 6, 1, 12, 0, 0, tzinfo=datetime.timezone.utc) def _plant_harmony_resolution( root: pathlib.Path, path: str, ours_content: bytes, theirs_content: bytes, outcome_content: bytes, confidence: float, human_verified: bool = False, ) -> tuple[str, str]: """Create a harmony pattern + resolution with the given confidence. Returns (pattern_id, outcome_oid). """ from muse.core.harmony.types import ( AgentProvenance, ConflictPattern, ConflictType, Resolution, ResolutionStrategy, ) from muse.core.harmony.fingerprint import ( blob_fingerprint, compute_pattern_id, compute_resolution_id, ) from muse.core.harmony.patterns import record_pattern from muse.core.harmony.resolutions import save_resolution ours_oid = _write_obj(root, ours_content) theirs_oid = _write_obj(root, theirs_content) outcome_oid = _write_obj(root, outcome_content) blob_fp = blob_fingerprint(ours_oid, theirs_oid) pattern_id = compute_pattern_id(path, blob_fp, blob_fp) pattern = ConflictPattern( pattern_id=pattern_id, path=path, domain="code", conflict_type=ConflictType.CONTENT, blob_fingerprint=blob_fp, semantic_fingerprint=blob_fp, ours_id=ours_oid, theirs_id=theirs_oid, description={}, recorded_at=_NOW, recorded_by="test", ) record_pattern(root, pattern) by = AgentProvenance.human() resolution_id = compute_resolution_id(pattern_id, outcome_oid, ResolutionStrategy.MANUAL, by, _NOW) resolution = Resolution( resolution_id=resolution_id, pattern_id=pattern_id, strategy=ResolutionStrategy.MANUAL, policy_id=None, outcome_blob=outcome_oid, resolved_by=by, human_verified=human_verified, confidence=confidence, rationale="test resolution", resolved_at=_NOW, ) save_resolution(root, resolution) return pattern_id, outcome_oid # --------------------------------------------------------------------------- # HC_01 — High confidence auto-applies (>= default 0.85) # --------------------------------------------------------------------------- def test_HC_01_high_confidence_auto_applies(tmp_path: pathlib.Path) -> None: """Resolution with confidence >= 0.85 (default threshold) auto-applies on re-merge.""" from muse.core.harmony.engine import auto_apply from muse.plugins.code.plugin import CodePlugin root, _ = _init_repo(tmp_path) ours_content = b"value = 2\n" theirs_content = b"value = 3\n" outcome_content = b"value = 2 # merged\n" _, outcome_oid = _plant_harmony_resolution( root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.9 ) ours_oid = blob_id(ours_content) theirs_oid = blob_id(theirs_content) ours_manifest = {"config.py": ours_oid} theirs_manifest = {"config.py": theirs_oid} plugin = CodePlugin() resolved, remaining = auto_apply( root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin ) assert "config.py" in resolved, ( f"HC_01: confidence=0.9 >= threshold=0.85 must auto-apply, " f"got resolved={resolved}, remaining={remaining}" ) assert "config.py" not in remaining # --------------------------------------------------------------------------- # HC_02 — Low confidence does NOT auto-apply (< default 0.85) # --------------------------------------------------------------------------- def test_HC_02_low_confidence_does_not_auto_apply(tmp_path: pathlib.Path) -> None: """Resolution with confidence < 0.85 (e.g. 0.8 from --ours) must NOT auto-apply.""" from muse.core.harmony.engine import auto_apply from muse.plugins.code.plugin import CodePlugin root, _ = _init_repo(tmp_path) ours_content = b"value = 2\n" theirs_content = b"value = 3\n" outcome_content = b"value = 2\n" # same as ours — typical --ours resolution _plant_harmony_resolution( root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.8 ) ours_oid = blob_id(ours_content) theirs_oid = blob_id(theirs_content) ours_manifest = {"config.py": ours_oid} theirs_manifest = {"config.py": theirs_oid} plugin = CodePlugin() resolved, remaining = auto_apply( root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin ) assert "config.py" not in resolved, ( f"HC_02: confidence=0.8 < threshold=0.85 must NOT auto-apply, " f"got resolved={resolved}" ) assert "config.py" in remaining, ( f"HC_02: low-confidence path must be in remaining, got remaining={remaining}" ) # --------------------------------------------------------------------------- # HC_03 — Config key overrides default threshold # --------------------------------------------------------------------------- def test_HC_03_config_key_overrides_threshold(tmp_path: pathlib.Path) -> None: """harmony.min_auto_apply_confidence=0.75 causes the 0.8-confidence pattern to auto-apply.""" from muse.core.harmony.engine import auto_apply from muse.plugins.code.plugin import CodePlugin root, _ = _init_repo(tmp_path) # Write config: lower threshold to 0.75 try: import tomllib as _tomllib_check # noqa: F401 except ImportError: pytest.skip("tomllib not available") config_content = "[harmony]\nmin_auto_apply_confidence = 0.75\n" (root / ".muse" / "config.toml").write_text(config_content, encoding="utf-8") ours_content = b"value = 2\n" theirs_content = b"value = 3\n" outcome_content = b"value = 2\n" _plant_harmony_resolution( root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.8 ) ours_oid = blob_id(ours_content) theirs_oid = blob_id(theirs_content) ours_manifest = {"config.py": ours_oid} theirs_manifest = {"config.py": theirs_oid} plugin = CodePlugin() resolved, remaining = auto_apply( root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin ) assert "config.py" in resolved, ( f"HC_03: with threshold=0.75, confidence=0.8 must auto-apply, " f"got resolved={resolved}, remaining={remaining}" ) assert "config.py" not in remaining # --------------------------------------------------------------------------- # HC_04 — No resolution meeting threshold → escalates cleanly, no error # --------------------------------------------------------------------------- def test_HC_04_no_qualifying_resolution_escalates_cleanly(tmp_path: pathlib.Path) -> None: """When no resolution meets the confidence threshold, auto_apply escalates cleanly.""" from muse.core.harmony.engine import auto_apply from muse.plugins.code.plugin import CodePlugin root, _ = _init_repo(tmp_path) ours_content = b"value = 2\n" theirs_content = b"value = 3\n" outcome_content = b"value = 2\n" # Plant a resolution at 0.5 — well below the 0.85 default threshold _plant_harmony_resolution( root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.5 ) ours_oid = blob_id(ours_content) theirs_oid = blob_id(theirs_content) ours_manifest = {"config.py": ours_oid} theirs_manifest = {"config.py": theirs_oid} plugin = CodePlugin() # Must not raise — escalation is a normal outcome resolved, remaining = auto_apply( root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin ) assert "config.py" not in resolved, ( f"HC_04: confidence=0.5 << threshold=0.85 must not auto-apply, got resolved={resolved}" ) assert "config.py" in remaining, ( f"HC_04: unresolved path must be in remaining, got remaining={remaining}" )