"""Tests for harmony.auto_apply and harmony.record_resolutions. Exercises the high-level integration helpers that sit between the merge and commit commands and the harmony store. These are the functions that actually wire conflict fingerprinting → pattern storage → resolution replay. Bugs documented here: Bug 1 — symbol-level conflict paths (e.g. "config.py::SomeSymbol"): The file portion must be extracted for manifest lookups while the full address is stored as the ConflictPattern path. Bug 2 — record_resolutions not idempotent: Two calls with the same outcome_blob must produce only one resolution. Bug 3 — MERGE_STATE original_conflict_paths: ``muse checkout --ours/--theirs`` clears conflict_paths from MERGE_STATE as each is resolved. By commit time conflict_paths is empty, so record_resolutions is called with [] and nothing is ever recorded. MERGE_STATE must preserve the original conflict list so commit can record. """ from __future__ import annotations from collections.abc import Mapping import pathlib import tempfile import pytest import muse.core.harmony as h from muse.core.harmony import ( auto_apply, blob_fingerprint, compute_pattern_id, compute_semantic_fingerprint, list_patterns, list_resolutions, record_resolutions, ) from muse.core.object_store import write_object from muse.core.types import Manifest, NULL_LONG_ID, blob_id, long_id from muse.core.paths import muse_dir # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _fake_object_id(content: bytes) -> str: """Return a canonical sha256:-prefixed object ID for content.""" return blob_id(content) def _write_fake_object(root: pathlib.Path, content: bytes) -> str: """Write content to the object store and return its object ID.""" oid = _fake_object_id(content) write_object(root, oid, content) return oid class _FakePlugin: """Minimal MuseDomainPlugin — no HarmonyPlugin sub-protocol.""" name = "test" def schema(self) -> "Mapping[str, object]": return {} @pytest.fixture() def repo(tmp_path: pathlib.Path) -> pathlib.Path: muse_dir(tmp_path).mkdir() return tmp_path # --------------------------------------------------------------------------- # compute_semantic_fingerprint # --------------------------------------------------------------------------- class TestComputeSemanticFingerprint: def test_no_plugin_returns_blob_fingerprint(self, repo: pathlib.Path) -> None: ours = long_id("a" * 64) theirs = long_id("b" * 64) plugin = _FakePlugin() result = compute_semantic_fingerprint("config.py", ours, theirs, plugin, repo) assert result == blob_fingerprint(ours, theirs) def test_commutative(self, repo: pathlib.Path) -> None: ours = long_id("a" * 64) theirs = long_id("b" * 64) plugin = _FakePlugin() r1 = compute_semantic_fingerprint("config.py", ours, theirs, plugin, repo) r2 = compute_semantic_fingerprint("config.py", theirs, ours, plugin, repo) assert r1 == r2 def test_different_paths_different_fingerprints(self, repo: pathlib.Path) -> None: ours = long_id("a" * 64) theirs = long_id("b" * 64) plugin = _FakePlugin() # blob_fingerprint is path-independent; semantic is too by default # but compute_pattern_id incorporates path — verified separately r1 = compute_semantic_fingerprint("a.py", ours, theirs, plugin, repo) r2 = compute_semantic_fingerprint("b.py", ours, theirs, plugin, repo) # Default (no HarmonyPlugin) → same blob_fp regardless of path assert r1 == r2 # --------------------------------------------------------------------------- # record_resolutions — file-level paths # --------------------------------------------------------------------------- class TestRecordResolutionsFilePaths: """record_resolutions with plain file paths (no :: separator).""" def test_records_pattern_and_resolution(self, repo: pathlib.Path) -> None: ours_content = b"version = 1" theirs_content = b"version = 2" resolved_content = b"version = 3" ours_id = _write_fake_object(repo, ours_content) theirs_id = _write_fake_object(repo, theirs_content) resolution_id = _write_fake_object(repo, resolved_content) ours_manifest: Manifest = {"config.py": ours_id} theirs_manifest: Manifest = {"config.py": theirs_id} new_manifest: Manifest = {"config.py": resolution_id} plugin = _FakePlugin() saved = record_resolutions( repo, ["config.py"], ours_manifest, theirs_manifest, new_manifest, "code", plugin, ) assert saved == ["config.py"] patterns = list_patterns(repo) assert len(patterns) == 1 assert patterns[0].path == "config.py" assert patterns[0].domain == "code" resolutions = list_resolutions(repo, patterns[0].pattern_id) assert len(resolutions) == 1 assert resolutions[0].outcome_blob == resolution_id assert resolutions[0].human_verified is True assert resolutions[0].confidence == 1.0 def test_skips_path_not_in_manifests(self, repo: pathlib.Path) -> None: plugin = _FakePlugin() saved = record_resolutions( repo, ["missing.py"], {}, {}, {}, "code", plugin, ) assert saved == [] assert list_patterns(repo) == [] def test_idempotent_second_call(self, repo: pathlib.Path) -> None: ours_id = _write_fake_object(repo, b"a") theirs_id = _write_fake_object(repo, b"b") resolution_id = _write_fake_object(repo, b"c") ours_m: Manifest = {"f.py": ours_id} theirs_m: Manifest = {"f.py": theirs_id} new_m: Manifest = {"f.py": resolution_id} plugin = _FakePlugin() record_resolutions(repo, ["f.py"], ours_m, theirs_m, new_m, "code", plugin) record_resolutions(repo, ["f.py"], ours_m, theirs_m, new_m, "code", plugin) assert len(list_patterns(repo)) == 1 assert len(list_resolutions(repo, list_patterns(repo)[0].pattern_id)) == 1 # --------------------------------------------------------------------------- # BUG: record_resolutions — symbol-level paths ("file.py::Symbol") # --------------------------------------------------------------------------- class TestRecordResolutionsSymbolPaths: """record_resolutions with symbol-level conflict paths. Conflict paths from the code-domain merge engine are symbol addresses of the form "config.py::MAX_CONNECTIONS". Manifests are keyed by file path. The function must extract the file portion for manifest lookups while storing the full symbol address in the ConflictPattern. """ def test_symbol_path_records_pattern(self, repo: pathlib.Path) -> None: ours_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 10") theirs_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 25") resolution_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 50") # Manifests are keyed by FILE path ours_manifest: Manifest = {"config.py": ours_id} theirs_manifest: Manifest = {"config.py": theirs_id} new_manifest: Manifest = {"config.py": resolution_id} plugin = _FakePlugin() saved = record_resolutions( repo, ["config.py::MAX_CONNECTIONS"], # symbol-level conflict path ours_manifest, theirs_manifest, new_manifest, "code", plugin, ) assert saved == ["config.py::MAX_CONNECTIONS"], ( "record_resolutions silently skipped a symbol-level conflict path — " "it must extract 'config.py' from 'config.py::MAX_CONNECTIONS' " "for manifest lookups" ) patterns = list_patterns(repo) assert len(patterns) == 1, "Expected exactly one pattern recorded" # The full symbol address should be stored as the path assert patterns[0].path == "config.py::MAX_CONNECTIONS" resolutions = list_resolutions(repo, patterns[0].pattern_id) assert len(resolutions) == 1 assert resolutions[0].outcome_blob == resolution_id def test_multiple_symbol_paths_same_file(self, repo: pathlib.Path) -> None: """Two conflicting symbols in the same file → two distinct patterns.""" file_ours = _write_fake_object(repo, b"file ours") file_theirs = _write_fake_object(repo, b"file theirs") file_resolved = _write_fake_object(repo, b"file resolved") ours_m: Manifest = {"app.py": file_ours} theirs_m: Manifest = {"app.py": file_theirs} new_m: Manifest = {"app.py": file_resolved} plugin = _FakePlugin() saved = record_resolutions( repo, ["app.py::foo", "app.py::bar"], ours_m, theirs_m, new_m, "code", plugin, ) assert saved == ["app.py::foo", "app.py::bar"] patterns = list_patterns(repo) assert len(patterns) == 2, ( "Each symbol address should produce a distinct pattern " "(pattern_id incorporates path)" ) paths = {p.path for p in patterns} assert paths == {"app.py::foo", "app.py::bar"} def test_symbol_path_no_file_portion_in_manifest(self, repo: pathlib.Path) -> None: """If the file portion of the symbol path is not in the manifest, skip.""" plugin = _FakePlugin() saved = record_resolutions( repo, ["missing.py::SomeSymbol"], {}, # empty manifests {}, {}, "code", plugin, ) assert saved == [] # --------------------------------------------------------------------------- # BUG: auto_apply — symbol-level paths # --------------------------------------------------------------------------- class TestAutoApplySymbolPaths: """auto_apply must also extract the file portion from symbol paths.""" def test_auto_apply_with_symbol_path_records_pattern( self, repo: pathlib.Path ) -> None: ours_id = _write_fake_object(repo, b"DEBUG = False") theirs_id = _write_fake_object(repo, b"DEBUG = True") ours_m: Manifest = {"settings.py": ours_id} theirs_m: Manifest = {"settings.py": theirs_id} plugin = _FakePlugin() resolved, remaining = auto_apply( repo, ["settings.py::DEBUG"], # symbol-level path ours_m, theirs_m, "code", plugin, ) assert "settings.py::DEBUG" in remaining patterns = list_patterns(repo) assert len(patterns) == 1, ( "auto_apply must record the pattern even when no resolution exists yet " "— but it silently skipped the symbol-level path" ) assert patterns[0].path == "settings.py::DEBUG" def test_auto_apply_replays_symbol_resolution( self, repo: pathlib.Path ) -> None: """After record_resolutions saves a resolution, auto_apply replays it.""" ours_id = _write_fake_object(repo, b"TIMEOUT = 30") theirs_id = _write_fake_object(repo, b"TIMEOUT = 60") resolution_content = b"TIMEOUT = 45" resolution_id = _write_fake_object(repo, resolution_content) ours_m: Manifest = {"config.py": ours_id} theirs_m: Manifest = {"config.py": theirs_id} new_m: Manifest = {"config.py": resolution_id} plugin = _FakePlugin() # Simulate commit recording the resolution saved = record_resolutions( repo, ["config.py::TIMEOUT"], ours_m, theirs_m, new_m, "code", plugin, ) assert saved == ["config.py::TIMEOUT"] # Now the same conflict recurs — auto_apply should replay it dest = repo / "config.py" resolved, remaining = auto_apply( repo, ["config.py::TIMEOUT"], ours_m, theirs_m, "code", plugin, ) assert "config.py::TIMEOUT" in resolved, ( "auto_apply failed to replay a saved resolution for a symbol-level path" ) assert remaining == [] assert dest.read_bytes() == resolution_content def test_auto_apply_file_path_still_works( self, repo: pathlib.Path ) -> None: """Plain file paths (no ::) still work after the fix.""" ours_id = _write_fake_object(repo, b"v1") theirs_id = _write_fake_object(repo, b"v2") resolution_content = b"v3" resolution_id = _write_fake_object(repo, resolution_content) ours_m: Manifest = {"README.md": ours_id} theirs_m: Manifest = {"README.md": theirs_id} new_m: Manifest = {"README.md": resolution_id} plugin = _FakePlugin() record_resolutions(repo, ["README.md"], ours_m, theirs_m, new_m, "code", plugin) dest = repo / "README.md" resolved, remaining = auto_apply( repo, ["README.md"], ours_m, theirs_m, "code", plugin ) assert "README.md" in resolved assert remaining == [] assert dest.read_bytes() == resolution_content # --------------------------------------------------------------------------- # auto_apply — path traversal guard still applies # --------------------------------------------------------------------------- class TestAutoApplyPathTraversal: def test_traversal_path_skipped(self, repo: pathlib.Path) -> None: ours_id = _write_fake_object(repo, b"x") theirs_id = _write_fake_object(repo, b"y") ours_m: Manifest = {"../traversal.py": ours_id} theirs_m: Manifest = {"../traversal.py": theirs_id} plugin = _FakePlugin() resolved, remaining = auto_apply( repo, ["../traversal.py"], ours_m, theirs_m, "code", plugin ) assert resolved == {} assert "../traversal.py" in remaining def test_symbol_traversal_skipped(self, repo: pathlib.Path) -> None: ours_id = _write_fake_object(repo, b"x") theirs_id = _write_fake_object(repo, b"y") ours_m: Manifest = {"../traversal.py": ours_id} theirs_m: Manifest = {"../traversal.py": theirs_id} plugin = _FakePlugin() resolved, remaining = auto_apply( repo, ["../traversal.py::Symbol"], ours_m, theirs_m, "code", plugin ) assert resolved == {} assert "../traversal.py::Symbol" in remaining # --------------------------------------------------------------------------- # BUG 3: MERGE_STATE must preserve original_conflict_paths # --------------------------------------------------------------------------- class TestMergeStateOriginalConflictPaths: """MERGE_STATE.original_conflict_paths must survive checkout --ours/--theirs. Workflow: 1. muse merge → MERGE_STATE written with conflict_paths=[A, B] 2. muse checkout --ours A → MERGE_STATE updated: conflict_paths=[B] 3. muse checkout --ours B → MERGE_STATE updated: conflict_paths=[] 4. muse commit → reads merge_state; calls record_resolutions(conflict_paths=[]) → nothing recorded ← BUG Fix: MERGE_STATE preserves original_conflict_paths=[A, B] through all checkout calls. Commit reads original_conflict_paths for record_resolutions. """ def test_write_merge_state_sets_original_conflict_paths( self, repo: pathlib.Path ) -> None: from muse.core.merge_engine import write_merge_state, read_merge_state write_merge_state( repo, base_commit=NULL_LONG_ID, ours_commit=long_id("1" * 64), theirs_commit=long_id("2" * 64), conflict_paths=["config.py::MAX_CONNECTIONS", "utils.py::clamp"], ) state = read_merge_state(repo) assert state is not None assert state.original_conflict_paths == [ "config.py::MAX_CONNECTIONS", "utils.py::clamp", ], ( "write_merge_state must populate original_conflict_paths " "equal to conflict_paths on first write" ) def test_original_conflict_paths_preserved_after_partial_resolution( self, repo: pathlib.Path ) -> None: from muse.core.merge_engine import write_merge_state, read_merge_state # First write — merge produces two conflicts write_merge_state( repo, base_commit=NULL_LONG_ID, ours_commit=long_id("1" * 64), theirs_commit=long_id("2" * 64), conflict_paths=["config.py::A", "config.py::B"], ) # Second write — checkout --ours resolved A; only B remains write_merge_state( repo, base_commit=NULL_LONG_ID, ours_commit=long_id("1" * 64), theirs_commit=long_id("2" * 64), conflict_paths=["config.py::B"], ) state = read_merge_state(repo) assert state is not None assert state.conflict_paths == ["config.py::B"] assert state.original_conflict_paths == ["config.py::A", "config.py::B"], ( "original_conflict_paths must be preserved across writes — " "checkout --ours updates conflict_paths but not original_conflict_paths" ) def test_original_conflict_paths_preserved_after_all_resolved( self, repo: pathlib.Path ) -> None: from muse.core.merge_engine import write_merge_state, read_merge_state write_merge_state( repo, base_commit=NULL_LONG_ID, ours_commit=long_id("1" * 64), theirs_commit=long_id("2" * 64), conflict_paths=["config.py::MAX_CONNECTIONS"], ) # All resolved via checkout --ours write_merge_state( repo, base_commit=NULL_LONG_ID, ours_commit=long_id("1" * 64), theirs_commit=long_id("2" * 64), conflict_paths=[], ) state = read_merge_state(repo) assert state is not None assert state.conflict_paths == [] assert state.original_conflict_paths == ["config.py::MAX_CONNECTIONS"], ( "original_conflict_paths must survive even when all conflicts are cleared" ) def test_commit_uses_original_conflict_paths_for_harmony( self, repo: pathlib.Path ) -> None: """Commit must pass original_conflict_paths to record_resolutions. When all conflicts have been resolved via checkout --ours/--theirs, merge_state.conflict_paths is empty. Commit must fall back to merge_state.original_conflict_paths so harmony still learns. """ from muse.core.merge_engine import write_merge_state, read_merge_state ours_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 50") theirs_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 25") resolution_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 50") # ours wins # Simulate: merge wrote state with conflict, then checkout --ours cleared it write_merge_state( repo, base_commit=NULL_LONG_ID, ours_commit=long_id("1" * 64), theirs_commit=long_id("2" * 64), conflict_paths=["config.py::MAX_CONNECTIONS"], ) write_merge_state( repo, base_commit=NULL_LONG_ID, ours_commit=long_id("1" * 64), theirs_commit=long_id("2" * 64), conflict_paths=[], # all resolved ) state = read_merge_state(repo) assert state is not None assert state.conflict_paths == [] assert state.original_conflict_paths == ["config.py::MAX_CONNECTIONS"] # The commit should use original_conflict_paths, not conflict_paths ours_m: Manifest = {"config.py": ours_id} theirs_m: Manifest = {"config.py": theirs_id} new_m: Manifest = {"config.py": resolution_id} plugin = _FakePlugin() paths_for_harmony = state.original_conflict_paths or state.conflict_paths saved = record_resolutions( repo, paths_for_harmony, ours_m, theirs_m, new_m, "code", plugin ) assert saved == ["config.py::MAX_CONNECTIONS"], ( "commit.py must use merge_state.original_conflict_paths when " "conflict_paths is empty — harmony must learn the resolution" )