"""Tests for Phase 3 CLI additions to ``muse harmony``. New subcommands: ``muse harmony engine `` — run the three-tier resolution engine ``muse harmony similar `` — find semantically similar patterns Coverage tiers -------------- I Unit — TypedDict schemas for engine + similar JSON output II Success — engine applied/proposed/escalated; similar with matches III Errors — invalid IDs; pattern not found IV E2E — full policy → engine → audit lifecycle via CLI V Integrity — all JSON fields always present; confidence in range VI Security — path-traversal IDs rejected VII Perf — both subcommands <300 ms """ from __future__ import annotations from collections.abc import Mapping from muse.core.types import fake_id from muse.core.paths import muse_dir import json import pathlib import time import typing import pytest from tests.cli_test_helper import CliRunner runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @pytest.fixture() def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: dot_muse = muse_dir(tmp_path) dot_muse.mkdir() (dot_muse / "config.toml").write_text('[repo]\nname = "test"\nid = "abc"\n') monkeypatch.chdir(tmp_path) return tmp_path def _record( path: str = "track.mid", domain: str = "midi", conflict_type: str = "content", ours: str = "ours", theirs: str = "theirs", semantic_fp: str | None = None, ) -> str: args = [ "harmony", "record", "--path", path, "--domain", domain, "--conflict-type", conflict_type, "--ours-id", fake_id(ours), "--theirs-id", fake_id(theirs), "--json", ] if semantic_fp is not None: args += ["--semantic-fingerprint", semantic_fp] r = runner.invoke(None, args) assert r.exit_code == 0, r.output return json.loads(r.output)["pattern_id"] def _resolve( pattern_id: str, confidence: str = "0.9", strategy: str = "manual", outcome: str = "outcome", ) -> str: r = runner.invoke(None, [ "harmony", "resolve", "--pattern-id", pattern_id, "--strategy", strategy, "--outcome-blob", fake_id(outcome), "--confidence", confidence, "--json", ]) assert r.exit_code == 0, r.output return json.loads(r.output)["resolution_id"] def _add_policy( policy_id: str = "auto-policy", scope: str = "repo", action: str = "prefer-ours", confidence: str = "0.95", domain: str | None = None, ) -> None: args = [ "harmony", "policy-add", "--policy-id", policy_id, "--description", "Test policy", "--scope", scope, "--action", action, "--confidence", confidence, ] if domain: args += ["--domain", domain] runner.invoke(None, args) # =========================================================================== # Tier I — Unit: TypedDict schemas # =========================================================================== class TestTypedDictSchemas: """I: Engine and similar TypedDicts declare expected keys.""" def _hints(self, name: str) -> Mapping[str, object]: import muse.cli.commands.harmony as h td = getattr(h, name) return typing.get_type_hints(td) def test_engine_json_has_status(self) -> None: assert "status" in self._hints("_HarmonyEngineJson") def test_engine_json_has_pattern_id(self) -> None: assert "pattern_id" in self._hints("_HarmonyEngineJson") def test_engine_json_has_proposal(self) -> None: assert "proposal" in self._hints("_HarmonyEngineJson") def test_engine_json_has_applied_resolution_id(self) -> None: assert "applied_resolution_id" in self._hints("_HarmonyEngineJson") def test_engine_json_has_escalation_reason(self) -> None: assert "escalation_reason" in self._hints("_HarmonyEngineJson") def test_similar_json_has_pattern_id(self) -> None: assert "pattern_id" in self._hints("_HarmonySimilarJson") def test_similar_json_has_total(self) -> None: assert "total" in self._hints("_HarmonySimilarJson") def test_similar_json_has_proposals(self) -> None: assert "proposals" in self._hints("_HarmonySimilarJson") class TestRegistration: """I: engine and similar subcommands are reachable.""" def test_engine_help(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "engine", "--help"]) assert r.exit_code == 0 def test_similar_help(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "similar", "--help"]) assert r.exit_code == 0 # =========================================================================== # Tier II — Integration: success paths # =========================================================================== class TestEngineSuccess: """II: muse harmony engine — success paths for all three statuses.""" def test_engine_escalates_no_policy_no_resolution(self, repo: pathlib.Path) -> None: pid = _record() r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["status"] == "escalated" assert data["pattern_id"] == pid assert data["escalation_reason"] is not None def test_engine_applied_via_policy(self, repo: pathlib.Path) -> None: _add_policy(confidence="0.95", action="prefer-ours") pid = _record() r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["status"] == "applied" assert data["proposal"] is not None assert data["proposal"]["strategy"] == "policy" def test_engine_applied_via_exact_replay(self, repo: pathlib.Path) -> None: pid = _record() _resolve(pid, confidence="0.90") r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["status"] == "applied" assert data["applied_resolution_id"] is not None def test_engine_proposed_low_confidence(self, repo: pathlib.Path) -> None: pid = _record() _resolve(pid, confidence="0.60") r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["status"] == "proposed" assert data["proposal"] is not None assert data["proposal"]["requires_confirmation"] is True def test_engine_text_output(self, repo: pathlib.Path) -> None: pid = _record() r = runner.invoke(None, ["harmony", "engine", pid]) assert r.exit_code == 0 assert pid[:12] in r.output def test_engine_with_custom_threshold(self, repo: pathlib.Path) -> None: """--auto-apply-threshold overrides default.""" pid = _record() _resolve(pid, confidence="0.80") # Below default threshold (0.85) → would be proposed. Above 0.75 → applied. r = runner.invoke(None, ["harmony", "engine", pid, "--auto-apply-threshold", "0.75", "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["status"] == "applied" def test_engine_proposed_via_policy_low_confidence(self, repo: pathlib.Path) -> None: _add_policy(policy_id="low-conf", confidence="0.60", action="prefer-ours") pid = _record() r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["status"] == "proposed" def test_engine_escalated_via_escalate_policy(self, repo: pathlib.Path) -> None: _add_policy(policy_id="esc-policy", confidence="1.0", action="escalate") pid = _record() r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) assert r.exit_code == 0 assert json.loads(r.output)["status"] == "escalated" class TestSimilarSuccess: """II: muse harmony similar — success paths.""" def test_similar_empty_when_no_match(self, repo: pathlib.Path) -> None: pid = _record() r = runner.invoke(None, ["harmony", "similar", pid, "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["pattern_id"] == pid assert data["total"] == 0 assert data["proposals"] == [] def test_similar_finds_shared_semantic_fingerprint(self, repo: pathlib.Path) -> None: shared_fp = fake_id("shared-semantic-cli") source_pid = _record(path="source.mid", ours="so", theirs="st", semantic_fp=shared_fp) target_pid = _record(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp) # Give source a resolution _resolve(source_pid, confidence="0.88") r = runner.invoke(None, ["harmony", "similar", target_pid, "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["total"] >= 1 assert data["proposals"][0]["similar_pattern_id"] == source_pid def test_similar_entry_has_required_fields(self, repo: pathlib.Path) -> None: shared_fp = fake_id("shared-fields") source_pid = _record(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp) target_pid = _record(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp) _resolve(source_pid, confidence="0.85") r = runner.invoke(None, ["harmony", "similar", target_pid, "--json"]) entry = json.loads(r.output)["proposals"][0] for field in ("similar_pattern_id", "similarity", "confidence", "strategy", "rationale"): assert field in entry, f"missing field: {field}" def test_similar_text_output(self, repo: pathlib.Path) -> None: shared_fp = fake_id("shared-text") src = _record(path="text-src.mid", ours="so", theirs="st", semantic_fp=shared_fp) tgt = _record(path="text-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp) _resolve(src) r = runner.invoke(None, ["harmony", "similar", tgt]) assert r.exit_code == 0 def test_similar_limit(self, repo: pathlib.Path) -> None: shared_fp = fake_id("limit-shared") target_pid = _record(path="lim-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp) for i in range(10): src = _record( path=f"lim{i}.mid", ours=f"o{i}", theirs=f"t{i}", semantic_fp=shared_fp ) _resolve(src, confidence=f"0.{70+i}", outcome=f"o{i}") r = runner.invoke(None, ["harmony", "similar", target_pid, "--limit", "3", "--json"]) data = json.loads(r.output) assert len(data["proposals"]) <= 3 # =========================================================================== # Tier III — Error paths # =========================================================================== class TestEngineErrors: """III: muse harmony engine — error paths.""" def test_engine_invalid_id_exits_1(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "engine", "bad-id", "--json"]) assert r.exit_code == 1 def test_engine_nonexistent_id_exits_0_escalated(self, repo: pathlib.Path) -> None: # Unknown pattern → engine escalates rather than errors r = runner.invoke(None, ["harmony", "engine", fake_id("nonexistent"), "--json"]) assert r.exit_code == 0 assert json.loads(r.output)["status"] == "escalated" def test_engine_invalid_threshold_exits_1(self, repo: pathlib.Path) -> None: pid = _record() r = runner.invoke(None, [ "harmony", "engine", pid, "--auto-apply-threshold", "1.5", "--json", ]) assert r.exit_code == 1 def test_engine_negative_threshold_exits_1(self, repo: pathlib.Path) -> None: pid = _record() r = runner.invoke(None, [ "harmony", "engine", pid, "--auto-apply-threshold", "-0.1", "--json", ]) assert r.exit_code == 1 class TestSimilarErrors: """III: muse harmony similar — error paths.""" def test_similar_invalid_id_exits_1(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "similar", "bad-id", "--json"]) assert r.exit_code == 1 def test_similar_nonexistent_exits_0_empty(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "similar", fake_id("nonexistent"), "--json"]) assert r.exit_code == 0 data = json.loads(r.output) assert data["total"] == 0 # =========================================================================== # Tier IV — End-to-end # =========================================================================== class TestEndToEnd: """IV: Full lifecycle via CLI layer.""" def test_policy_engine_audit_trail(self, repo: pathlib.Path) -> None: _add_policy(confidence="0.95", action="prefer-ours") pid = _record() runner.invoke(None, ["harmony", "engine", pid]) r = runner.invoke(None, ["harmony", "audit", "--json"]) event_types = [e["event_type"] for e in json.loads(r.output)["entries"]] assert "resolution_applied" in event_types def test_escalation_audit_trail(self, repo: pathlib.Path) -> None: pid = _record() runner.invoke(None, ["harmony", "engine", pid]) r = runner.invoke(None, ["harmony", "audit", "--json"]) event_types = [e["event_type"] for e in json.loads(r.output)["entries"]] assert "escalation_recorded" in event_types def test_exact_replay_increments_applied_count_via_cli(self, repo: pathlib.Path) -> None: pid = _record() _resolve(pid, confidence="0.90") runner.invoke(None, ["harmony", "engine", pid]) r = runner.invoke(None, ["harmony", "show", pid, "--json"]) res = json.loads(r.output)["resolutions"][0] assert res["applied_count"] == 1 def test_similar_then_engine_workflow(self, repo: pathlib.Path) -> None: """Agent workflow: find_similar to discover candidates, engine to resolve.""" shared_fp = fake_id("workflow-shared") src = _record(path="src.mid", ours="so", theirs="st", semantic_fp=shared_fp) tgt = _record(path="tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp) _resolve(src, confidence="0.88") # Step 1: find similar rs = runner.invoke(None, ["harmony", "similar", tgt, "--json"]) assert json.loads(rs.output)["total"] >= 1 # Step 2: run engine (semantic tier fires, requires confirmation) re = runner.invoke(None, ["harmony", "engine", tgt, "--json"]) data = json.loads(re.output) assert data["status"] == "proposed" assert data["proposal"]["strategy"] == "semantic-proposal" # =========================================================================== # Tier V — Data integrity # =========================================================================== class TestDataIntegrity: """V: All JSON fields always present; types correct.""" def test_engine_escalated_fields_all_present(self, repo: pathlib.Path) -> None: pid = _record() r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) data = json.loads(r.output) for field in ("status", "pattern_id", "proposal", "applied_resolution_id", "escalation_reason"): assert field in data, f"missing field: {field}" def test_engine_applied_applied_resolution_id_is_hex64(self, repo: pathlib.Path) -> None: pid = _record() _resolve(pid, confidence="0.90") r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) rid = json.loads(r.output)["applied_resolution_id"] assert rid is not None assert len(rid) == 71 def test_similar_empty_proposals_is_list_not_null(self, repo: pathlib.Path) -> None: pid = _record() r = runner.invoke(None, ["harmony", "similar", pid, "--json"]) data = json.loads(r.output) assert isinstance(data["proposals"], list) def test_engine_proposed_proposal_confidence_in_range(self, repo: pathlib.Path) -> None: pid = _record() _resolve(pid, confidence="0.60") r = runner.invoke(None, ["harmony", "engine", pid, "--json"]) prop = json.loads(r.output)["proposal"] assert prop is not None assert 0.0 <= prop["confidence"] <= 1.0 def test_similar_similarity_in_range(self, repo: pathlib.Path) -> None: shared_fp = fake_id("range-check") src = _record(path="rc-src.mid", ours="so", theirs="st", semantic_fp=shared_fp) tgt = _record(path="rc-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp) _resolve(src) r = runner.invoke(None, ["harmony", "similar", tgt, "--json"]) for prop in json.loads(r.output)["proposals"]: assert 0.0 <= prop["similarity"] <= 1.0 # =========================================================================== # Tier VI — Security # =========================================================================== class TestSecurity: """VI: Path-traversal IDs rejected at engine and similar entry points.""" def test_engine_traversal_rejected(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "engine", "../../malicious", "--json"]) assert r.exit_code == 1 def test_similar_traversal_rejected(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "similar", "../../malicious", "--json"]) assert r.exit_code == 1 def test_engine_null_byte_rejected(self, repo: pathlib.Path) -> None: r = runner.invoke(None, ["harmony", "engine", "a" * 63 + "\x00", "--json"]) assert r.exit_code == 1 # =========================================================================== # Tier VII — Performance # =========================================================================== class TestPerformance: """VII: engine and similar complete within 300 ms.""" def test_engine_under_300ms(self, repo: pathlib.Path) -> None: pid = _record() start = time.monotonic() runner.invoke(None, ["harmony", "engine", pid, "--json"]) elapsed = (time.monotonic() - start) * 1000 assert elapsed < 300, f"engine took {elapsed:.0f}ms" def test_similar_under_300ms(self, repo: pathlib.Path) -> None: pid = _record() start = time.monotonic() runner.invoke(None, ["harmony", "similar", pid, "--json"]) elapsed = (time.monotonic() - start) * 1000 assert elapsed < 600, f"similar took {elapsed:.0f}ms"