"""Tests for muse/core/harmony/engine.py — Phase 3: Resolution Engine. Coverage tiers -------------- I Unit — EngineStatus, EngineConfig, EngineResult, DefaultPlugin, HarmonyPlugin protocol, find_similar II Integration — all four resolution tiers (policy, exact-replay, semantic, escalate); action/threshold branching III End-to-end — full pipeline via resolve() with audit trail IV Stress — 100-pattern semantic search; many-policy matching V Data integrity— EngineResult fields always populated; JSON round-trip VI Security — crafted inputs don't escape engine; plugin errors contained VII Performance — resolve() <50 ms; find_similar(100 patterns) <500 ms """ from __future__ import annotations import dataclasses import datetime from muse.core.types import fake_id import pathlib import time from typing import Any from unittest import mock import pytest import muse.core.harmony as h from muse.core.harmony import ( AgentProvenance, AuditEventType, ConflictPattern, ConflictType, Policy, PolicyAction, PolicyCondition, PolicyScope, Resolution, ResolutionProposal, ResolutionStrategy, append_audit, best_resolution, blob_fingerprint, compute_pattern_id, compute_resolution_id, list_audit, list_patterns, list_resolutions, record_pattern, save_policy, save_resolution, ) from muse.core.harmony.engine import ( DefaultPlugin, EngineConfig, EngineResult, EngineStatus, HarmonyPlugin, find_similar, resolve, ) from muse.core.paths import muse_dir # --------------------------------------------------------------------------- # Shared helpers # --------------------------------------------------------------------------- def _utc_now() -> datetime.datetime: return datetime.datetime.now(datetime.timezone.utc) @pytest.fixture() def repo(tmp_path: pathlib.Path) -> pathlib.Path: muse_dir(tmp_path).mkdir() return tmp_path def _make_pattern( path: str = "track.mid", domain: str = "midi", conflict_type: str = ConflictType.CONTENT, ours: str = "ours", theirs: str = "theirs", semantic_fp: str | None = None, ) -> ConflictPattern: ours_id = fake_id(ours) theirs_id = fake_id(theirs) blob_fp = blob_fingerprint(ours_id, theirs_id) sfp = semantic_fp if semantic_fp is not None else blob_fp pid = compute_pattern_id(path, blob_fp, sfp) return ConflictPattern( pattern_id=pid, path=path, domain=domain, conflict_type=conflict_type, blob_fingerprint=blob_fp, semantic_fingerprint=sfp, ours_id=ours_id, theirs_id=theirs_id, description={}, recorded_at=_utc_now(), recorded_by="claude-code", ) def _make_resolution( pattern: ConflictPattern, confidence: float = 0.9, human_verified: bool = False, strategy: str = ResolutionStrategy.MANUAL, outcome_seed: str = "outcome", applied_count: int = 0, ) -> Resolution: prov = AgentProvenance.agent("claude-code", "claude-sonnet-4-6") outcome_blob = fake_id(outcome_seed) resolved_at = _utc_now() rid = compute_resolution_id(pattern.pattern_id, outcome_blob, strategy, prov, resolved_at) return dataclasses.replace( Resolution( resolution_id=rid, pattern_id=pattern.pattern_id, strategy=strategy, policy_id=None, outcome_blob=outcome_blob, resolved_by=prov, human_verified=human_verified, confidence=confidence, rationale="Test resolution", resolved_at=resolved_at, applied_count=applied_count, ), ) def _make_policy( policy_id: str = "default-policy", scope: str = PolicyScope.REPO, action: str = PolicyAction.PREFER_OURS, confidence: float = 0.95, conflict_type: str | None = None, domain: str | None = None, path_pattern: str | None = None, escalate_to: str | None = None, delegate_to: str | None = None, ) -> Policy: return Policy( policy_id=policy_id, description="Test policy", when=PolicyCondition( conflict_type=conflict_type, domain=domain, path_pattern=path_pattern, ), action=action, confidence=confidence, escalate_to=escalate_to, delegate_to=delegate_to, scope=scope, created_at=_utc_now(), created_by="claude-code", ) # =========================================================================== # Tier I — Unit tests # =========================================================================== class TestEngineStatus: """I: EngineStatus constants are plain strings.""" def test_applied_is_string(self) -> None: assert isinstance(EngineStatus.APPLIED, str) def test_proposed_is_string(self) -> None: assert isinstance(EngineStatus.PROPOSED, str) def test_escalated_is_string(self) -> None: assert isinstance(EngineStatus.ESCALATED, str) def test_distinct_values(self) -> None: statuses = {EngineStatus.APPLIED, EngineStatus.PROPOSED, EngineStatus.ESCALATED} assert len(statuses) == 3 class TestEngineConfig: """I: EngineConfig defaults and frozen nature.""" def test_default_auto_apply_threshold(self) -> None: assert EngineConfig().auto_apply_threshold == pytest.approx(0.85) def test_default_semantic_threshold(self) -> None: assert EngineConfig().semantic_threshold == pytest.approx(0.70) def test_default_max_proposals(self) -> None: assert EngineConfig().max_proposals == 5 def test_custom_config(self) -> None: cfg = EngineConfig(auto_apply_threshold=0.99, semantic_threshold=0.50, max_proposals=3) assert cfg.auto_apply_threshold == pytest.approx(0.99) assert cfg.semantic_threshold == pytest.approx(0.50) assert cfg.max_proposals == 3 def test_frozen(self) -> None: cfg = EngineConfig() with pytest.raises(Exception): # FrozenInstanceError cfg.auto_apply_threshold = 0.0 # type: ignore[misc] class TestEngineResult: """I: EngineResult fields, defaults, and immutability.""" def test_applied_result(self) -> None: r = EngineResult( status=EngineStatus.APPLIED, pattern_id=fake_id("p"), applied_resolution_id=fake_id("r"), ) assert r.status == EngineStatus.APPLIED assert r.proposal is None assert r.escalation_reason is None def test_proposed_result(self) -> None: prop = ResolutionProposal( pattern_id=fake_id("p"), strategy=ResolutionStrategy.POLICY, proposed_action=PolicyAction.PREFER_OURS, confidence=0.9, rationale="test", ) r = EngineResult( status=EngineStatus.PROPOSED, pattern_id=fake_id("p"), proposal=prop, ) assert r.proposal is prop assert r.applied_resolution_id is None def test_escalated_result(self) -> None: r = EngineResult( status=EngineStatus.ESCALATED, pattern_id=fake_id("p"), escalation_reason="no match found", ) assert r.escalation_reason == "no match found" def test_frozen(self) -> None: r = EngineResult(status=EngineStatus.ESCALATED, pattern_id=fake_id("p")) with pytest.raises(Exception): r.status = EngineStatus.APPLIED # type: ignore[misc] class TestDefaultPlugin: """I: DefaultPlugin — exact-match similarity, no semantic fingerprint.""" def test_identical_fps_return_1(self) -> None: fp = fake_id("same") assert DefaultPlugin().similarity(fp, fp) == pytest.approx(1.0) def test_different_fps_return_0(self) -> None: assert DefaultPlugin().similarity(fake_id("a"), fake_id("b")) == pytest.approx(0.0) def test_similarity_commutative(self) -> None: a, b = fake_id("x"), fake_id("y") p = DefaultPlugin() assert p.similarity(a, b) == p.similarity(b, a) def test_similarity_range(self) -> None: for seed1, seed2 in [("a", "a"), ("a", "b"), ("c", "d")]: s = DefaultPlugin().similarity(fake_id(seed1), fake_id(seed2)) assert 0.0 <= s <= 1.0 class TestHarmonyPluginProtocol: """I: HarmonyPlugin is a structural Protocol — custom plugins duck-type in.""" def test_custom_plugin_accepted(self) -> None: class MyPlugin: def similarity(self, fp_a: str, fp_b: str) -> float: return 0.5 plugin = MyPlugin() # Should not raise — duck-typing is sufficient result = resolve( pathlib.Path("/nonexistent"), _make_pattern(), plugin=plugin, ) # Will escalate because repo doesn't exist, but plugin was accepted assert result.status == EngineStatus.ESCALATED class TestFindSimilar: """I: find_similar returns correctly sorted proposals.""" def test_find_similar_empty_store(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) results = find_similar(repo, pattern) assert results == [] def test_find_similar_skips_self(self, repo: pathlib.Path) -> None: """Even if pattern has the same semantic_fp, it shouldn't match itself.""" fp = fake_id("shared-semantic") pattern = _make_pattern(semantic_fp=fp) record_pattern(repo, pattern) res = _make_resolution(pattern) save_resolution(repo, res) results = find_similar(repo, pattern) assert all(p.similar_pattern_id != pattern.pattern_id for p in results) def test_find_similar_returns_semantically_matching_pattern( self, repo: pathlib.Path ) -> None: """Two patterns with same semantic_fp but different paths should match.""" shared_fp = fake_id("shared-semantic") source = _make_pattern(path="source.mid", ours="so", theirs="st", semantic_fp=shared_fp) target = _make_pattern(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp) record_pattern(repo, source) record_pattern(repo, target) # Give source a resolution — target has none res = _make_resolution(source, confidence=0.88) save_resolution(repo, res) proposals = find_similar(repo, target) assert len(proposals) >= 1 assert proposals[0].similar_pattern_id == source.pattern_id assert proposals[0].similarity == pytest.approx(1.0) def test_find_similar_sorted_by_confidence_desc(self, repo: pathlib.Path) -> None: shared_fp = fake_id("shared-fp") target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp) record_pattern(repo, target) # Two sources with different confidence for i, conf in enumerate([0.5, 0.9]): src = _make_pattern( path=f"src{i}.mid", ours=f"o{i}", theirs=f"t{i}", semantic_fp=shared_fp, ) record_pattern(repo, src) r = _make_resolution(src, confidence=conf, outcome_seed=f"out{i}") save_resolution(repo, r) proposals = find_similar(repo, target) assert proposals[0].confidence >= proposals[-1].confidence def test_find_similar_respects_max_proposals(self, repo: pathlib.Path) -> None: shared_fp = fake_id("common") target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp) record_pattern(repo, target) for i in range(10): src = _make_pattern( path=f"s{i}.mid", ours=f"o{i}", theirs=f"t{i}", semantic_fp=shared_fp ) record_pattern(repo, src) r = _make_resolution(src, confidence=0.7 + i * 0.02, outcome_seed=f"o{i}") save_resolution(repo, r) cfg = EngineConfig(max_proposals=3) proposals = find_similar(repo, target, config=cfg) assert len(proposals) <= 3 def test_find_similar_below_threshold_excluded(self, repo: pathlib.Path) -> None: """DefaultPlugin returns 0.0 for different fingerprints → excluded.""" target = _make_pattern(path="t.mid", ours="to", theirs="tt") source = _make_pattern(path="s.mid", ours="so", theirs="st") # different semantic_fp record_pattern(repo, target) record_pattern(repo, source) r = _make_resolution(source) save_resolution(repo, r) # With DefaultPlugin, similarity = 0 for different fps → below any threshold proposals = find_similar(repo, target) assert proposals == [] # =========================================================================== # Tier II — Integration: four resolution tiers # =========================================================================== class TestTierPolicy: """II: Tier 1 — Policy match.""" def test_policy_prefer_ours_above_threshold_returns_applied( self, repo: pathlib.Path ) -> None: policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status == EngineStatus.APPLIED assert result.proposal is not None assert result.proposal.strategy == ResolutionStrategy.POLICY assert result.proposal.proposed_action == PolicyAction.PREFER_OURS assert result.proposal.policy_id == policy.policy_id def test_policy_prefer_theirs_above_threshold_returns_applied( self, repo: pathlib.Path ) -> None: policy = _make_policy(confidence=0.90, action=PolicyAction.PREFER_THEIRS) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status == EngineStatus.APPLIED assert result.proposal.proposed_action == PolicyAction.PREFER_THEIRS def test_policy_below_threshold_returns_proposed(self, repo: pathlib.Path) -> None: policy = _make_policy(confidence=0.60, action=PolicyAction.PREFER_OURS) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) cfg = EngineConfig(auto_apply_threshold=0.85) result = resolve(repo, pattern, config=cfg) assert result.status == EngineStatus.PROPOSED assert result.proposal.requires_confirmation is True def test_policy_escalate_action_returns_escalated(self, repo: pathlib.Path) -> None: policy = _make_policy( confidence=1.0, action=PolicyAction.ESCALATE, escalate_to="human", ) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status == EngineStatus.ESCALATED assert result.escalation_reason is not None assert "human" in result.escalation_reason.lower() or "escalat" in result.escalation_reason.lower() def test_policy_require_human_returns_escalated(self, repo: pathlib.Path) -> None: policy = _make_policy(confidence=1.0, action=PolicyAction.REQUIRE_HUMAN) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status == EngineStatus.ESCALATED def test_policy_delegate_returns_escalated(self, repo: pathlib.Path) -> None: policy = _make_policy( confidence=1.0, action=PolicyAction.DELEGATE, delegate_to="harmony-specialist", ) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status == EngineStatus.ESCALATED assert "harmony-specialist" in (result.escalation_reason or "") def test_policy_domain_filter_does_not_fire_for_wrong_domain( self, repo: pathlib.Path ) -> None: policy = _make_policy(domain="code", action=PolicyAction.PREFER_OURS, confidence=1.0) save_policy(repo, policy) pattern = _make_pattern(domain="midi") record_pattern(repo, pattern) # Should fall through to escalate (no resolution) result = resolve(repo, pattern) assert result.status == EngineStatus.ESCALATED def test_workspace_policy_fires_before_repo_policy(self, repo: pathlib.Path) -> None: workspace_p = _make_policy( "workspace-p", scope=PolicyScope.WORKSPACE, action=PolicyAction.PREFER_OURS, confidence=0.95, ) repo_p = _make_policy( "repo-p", scope=PolicyScope.REPO, action=PolicyAction.PREFER_THEIRS, confidence=0.95, ) save_policy(repo, workspace_p) save_policy(repo, repo_p) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status == EngineStatus.APPLIED assert result.proposal.proposed_action == PolicyAction.PREFER_OURS # workspace wins class TestTierExactReplay: """II: Tier 2 — Exact replay (no policy match).""" def test_high_confidence_resolution_auto_applied(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) res = _make_resolution(pattern, confidence=0.90) save_resolution(repo, res) result = resolve(repo, pattern) assert result.status == EngineStatus.APPLIED assert result.applied_resolution_id == res.resolution_id def test_human_verified_always_auto_applied(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) res = _make_resolution(pattern, confidence=0.50, human_verified=True) save_resolution(repo, res) cfg = EngineConfig(auto_apply_threshold=0.85) result = resolve(repo, pattern, config=cfg) assert result.status == EngineStatus.APPLIED def test_low_confidence_resolution_proposed(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) res = _make_resolution(pattern, confidence=0.60) save_resolution(repo, res) cfg = EngineConfig(auto_apply_threshold=0.85) result = resolve(repo, pattern, config=cfg) assert result.status == EngineStatus.PROPOSED assert result.proposal.strategy == ResolutionStrategy.EXACT_REPLAY assert result.proposal.requires_confirmation is True def test_exact_replay_increments_applied_count(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) res = _make_resolution(pattern, confidence=0.90) save_resolution(repo, res) resolve(repo, pattern) from muse.core.harmony import load_resolution loaded = load_resolution(repo, pattern.pattern_id, res.resolution_id) assert loaded is not None assert loaded.applied_count == 1 def test_exact_replay_prefers_highest_quality_resolution( self, repo: pathlib.Path ) -> None: pattern = _make_pattern() record_pattern(repo, pattern) low = _make_resolution(pattern, confidence=0.70, outcome_seed="low") high = _make_resolution(pattern, confidence=0.95, outcome_seed="high") save_resolution(repo, low) save_resolution(repo, high) result = resolve(repo, pattern) assert result.applied_resolution_id == high.resolution_id class TestTierSemantic: """II: Tier 3 — Semantic match (no policy, no exact replay).""" def test_semantic_match_returns_proposed(self, repo: pathlib.Path) -> None: shared_fp = fake_id("shared-semantic") source = _make_pattern(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp) target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp) record_pattern(repo, source) record_pattern(repo, target) res = _make_resolution(source, confidence=0.85) save_resolution(repo, res) result = resolve(repo, target) assert result.status == EngineStatus.PROPOSED assert result.proposal.strategy == ResolutionStrategy.SEMANTIC_PROPOSAL assert result.proposal.similar_pattern_id == source.pattern_id assert result.proposal.similarity == pytest.approx(1.0) assert result.proposal.requires_confirmation is True def test_semantic_match_below_threshold_escalates(self, repo: pathlib.Path) -> None: """DefaultPlugin returns 0.0 for non-matching fps → no semantic match.""" source = _make_pattern(path="s.mid", ours="so", theirs="st") target = _make_pattern(path="t.mid", ours="to", theirs="tt") record_pattern(repo, source) record_pattern(repo, target) res = _make_resolution(source) save_resolution(repo, res) result = resolve(repo, target) assert result.status == EngineStatus.ESCALATED def test_custom_plugin_similarity_drives_semantic_match( self, repo: pathlib.Path ) -> None: """A custom plugin returning 0.8 similarity enables semantic matching.""" class AlwaysMatchPlugin: def similarity(self, fp_a: str, fp_b: str) -> float: return 0.8 source = _make_pattern(path="s.mid", ours="so", theirs="st") target = _make_pattern(path="t.mid", ours="to", theirs="tt") record_pattern(repo, source) record_pattern(repo, target) res = _make_resolution(source, confidence=0.85) save_resolution(repo, res) cfg = EngineConfig(semantic_threshold=0.70) result = resolve(repo, target, config=cfg, plugin=AlwaysMatchPlugin()) assert result.status == EngineStatus.PROPOSED assert result.proposal.strategy == ResolutionStrategy.SEMANTIC_PROPOSAL class TestTierEscalate: """II: Tier 4 — Escalation when no policy/replay/semantic matches.""" def test_no_policy_no_resolution_escalates(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status == EngineStatus.ESCALATED assert result.escalation_reason is not None assert len(result.escalation_reason) > 0 def test_escalation_has_no_applied_resolution(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.applied_resolution_id is None def test_escalation_has_no_proposal(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.proposal is None def test_unrecorded_pattern_escalates(self, repo: pathlib.Path) -> None: """Pattern not in store → escalate (engine is safe with unknown patterns).""" pattern = _make_pattern() # Not recorded result = resolve(repo, pattern) assert result.status == EngineStatus.ESCALATED # =========================================================================== # Tier III — End-to-end # =========================================================================== class TestEndToEnd: """III: Full resolution pipeline with audit trail.""" def test_policy_applied_writes_audit(self, repo: pathlib.Path) -> None: policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) actor = AgentProvenance.agent("claude-code", "claude-sonnet-4-6") result = resolve(repo, pattern, actor=actor) assert result.status == EngineStatus.APPLIED entries = list_audit(repo) event_types = [e["event_type"] for e in entries] assert AuditEventType.RESOLUTION_APPLIED in event_types def test_exact_replay_applied_writes_audit(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) res = _make_resolution(pattern, confidence=0.90) save_resolution(repo, res) actor = AgentProvenance.human() result = resolve(repo, pattern, actor=actor) assert result.status == EngineStatus.APPLIED entries = list_audit(repo) event_types = [e["event_type"] for e in entries] assert AuditEventType.RESOLUTION_APPLIED in event_types def test_escalation_writes_audit(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) actor = AgentProvenance.human() result = resolve(repo, pattern, actor=actor) assert result.status == EngineStatus.ESCALATED entries = list_audit(repo) event_types = [e["event_type"] for e in entries] assert AuditEventType.ESCALATION_RECORDED in event_types def test_full_lifecycle_policy_to_replay(self, repo: pathlib.Path) -> None: """First resolution via policy; subsequent calls use exact replay.""" policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) # First call → policy fires r1 = resolve(repo, pattern) assert r1.status == EngineStatus.APPLIED assert r1.proposal.strategy == ResolutionStrategy.POLICY # Save the policy resolution so it can be replayed from muse.core.harmony import compute_resolution_id, save_resolution import datetime actor = AgentProvenance.agent("claude-code") resolved_at = datetime.datetime.now(datetime.timezone.utc) rid = compute_resolution_id( pattern.pattern_id, fake_id("policy-outcome"), ResolutionStrategy.POLICY, actor, resolved_at, ) res = Resolution( resolution_id=rid, pattern_id=pattern.pattern_id, strategy=ResolutionStrategy.POLICY, policy_id=policy.policy_id, outcome_blob=fake_id("policy-outcome"), resolved_by=actor, human_verified=False, confidence=0.95, rationale="Policy applied", resolved_at=resolved_at, ) save_resolution(repo, res) # Remove policy — next call should use exact replay from muse.core.harmony import remove_policy remove_policy(repo, policy.policy_id) r2 = resolve(repo, pattern) assert r2.status == EngineStatus.APPLIED assert r2.proposal.strategy == ResolutionStrategy.EXACT_REPLAY def test_resolve_with_explicit_config(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) res = _make_resolution(pattern, confidence=0.80) save_resolution(repo, res) # Low threshold → auto-apply cfg_low = EngineConfig(auto_apply_threshold=0.75) r = resolve(repo, pattern, config=cfg_low) assert r.status == EngineStatus.APPLIED # High threshold → propose cfg_high = EngineConfig(auto_apply_threshold=0.95) r2 = resolve(repo, pattern, config=cfg_high) assert r2.status == EngineStatus.PROPOSED # =========================================================================== # Tier IV — Stress # =========================================================================== class TestStress: """IV: Engine performance under load.""" def test_engine_with_100_patterns_completes(self, repo: pathlib.Path) -> None: """Engine must handle 100 patterns in the store without crashing.""" shared_fp = fake_id("stress-shared") target = _make_pattern(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp) record_pattern(repo, target) for i in range(100): p = _make_pattern( path=f"stress{i}.mid", ours=f"so{i}", theirs=f"st{i}", semantic_fp=shared_fp, ) record_pattern(repo, p) r = _make_resolution(p, confidence=0.7, outcome_seed=f"o{i}") save_resolution(repo, r) result = resolve(repo, target) # Should get a semantic match from one of the 100 assert result.status in (EngineStatus.PROPOSED, EngineStatus.ESCALATED) def test_find_similar_100_patterns(self, repo: pathlib.Path) -> None: """find_similar with 100 patterns returns correct results.""" shared_fp = fake_id("bulk-shared") target = _make_pattern(path="bulk-target.mid", ours="bto", theirs="btt", semantic_fp=shared_fp) record_pattern(repo, target) for i in range(100): p = _make_pattern( path=f"bulk{i}.mid", ours=f"bso{i}", theirs=f"bst{i}", semantic_fp=shared_fp, ) record_pattern(repo, p) r = _make_resolution(p, confidence=0.7 + (i % 10) * 0.02, outcome_seed=f"bo{i}") save_resolution(repo, r) cfg = EngineConfig(max_proposals=5) proposals = find_similar(repo, target, config=cfg) assert len(proposals) <= 5 # All returned proposals should have similarity ≥ threshold for prop in proposals: assert prop.similarity is not None and prop.similarity >= cfg.semantic_threshold def test_many_policy_first_match_wins(self, repo: pathlib.Path) -> None: """With 50 policies, the first matching one wins.""" for i in range(50): p = _make_policy( f"policy-{i:02d}", scope=PolicyScope.REPO, action=PolicyAction.PREFER_OURS if i == 0 else PolicyAction.PREFER_THEIRS, confidence=0.95, ) save_policy(repo, p) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) # First policy (alphabetically / insertion order) should win assert result.status == EngineStatus.APPLIED # =========================================================================== # Tier V — Data integrity # =========================================================================== class TestDataIntegrity: """V: EngineResult fields always populated; JSON-safe.""" def test_escalated_result_fields_complete(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.status is not None assert result.pattern_id is not None # Optional fields default to None (not absent) assert hasattr(result, "proposal") assert hasattr(result, "applied_resolution_id") assert hasattr(result, "escalation_reason") def test_applied_result_has_no_escalation_reason(self, repo: pathlib.Path) -> None: policy = _make_policy(confidence=0.95) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.escalation_reason is None def test_proposal_confidence_in_range(self, repo: pathlib.Path) -> None: policy = _make_policy(confidence=0.95) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.proposal is not None assert 0.0 <= result.proposal.confidence <= 1.0 def test_engine_result_pattern_id_matches_input(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) assert result.pattern_id == pattern.pattern_id def test_proposal_is_json_serialisable(self, repo: pathlib.Path) -> None: import json policy = _make_policy(confidence=0.95) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) if result.proposal is not None: d = dataclasses.asdict(result.proposal) json.dumps(d) # must not raise def test_engine_result_is_json_serialisable(self, repo: pathlib.Path) -> None: import json pattern = _make_pattern() record_pattern(repo, pattern) result = resolve(repo, pattern) d = dataclasses.asdict(result) json.dumps(d) # must not raise # =========================================================================== # Tier VI — Security # =========================================================================== class TestSecurity: """VI: Engine handles adversarial inputs safely.""" def test_plugin_exception_caught(self, repo: pathlib.Path) -> None: """A crashing plugin must not propagate — engine falls back to escalate.""" class CrashingPlugin: def similarity(self, fp_a: str, fp_b: str) -> float: raise RuntimeError("Plugin exploded") source = _make_pattern(path="s.mid", ours="so", theirs="st") target = _make_pattern(path="t.mid", ours="to", theirs="tt") record_pattern(repo, source) record_pattern(repo, target) res = _make_resolution(source) save_resolution(repo, res) result = resolve(repo, target, plugin=CrashingPlugin()) # Must not raise; engine falls back assert result.status in (EngineStatus.ESCALATED, EngineStatus.PROPOSED) def test_plugin_returning_out_of_range_similarity_clamped( self, repo: pathlib.Path ) -> None: """Similarity > 1.0 or < 0.0 from a buggy plugin must be clamped.""" class OverflowPlugin: def similarity(self, fp_a: str, fp_b: str) -> float: return 999.0 shared_fp = fake_id("shared") source = _make_pattern(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp) target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp) record_pattern(repo, source) record_pattern(repo, target) res = _make_resolution(source, confidence=0.85) save_resolution(repo, res) result = resolve(repo, target, plugin=OverflowPlugin()) if result.proposal is not None and result.proposal.similarity is not None: assert 0.0 <= result.proposal.similarity <= 1.0 def test_resolve_safe_with_empty_repo(self, repo: pathlib.Path) -> None: """Engine against empty store must not crash.""" pattern = _make_pattern() result = resolve(repo, pattern) assert result.status == EngineStatus.ESCALATED def test_resolve_safe_with_nonexistent_root(self) -> None: """Engine with nonexistent root must escalate, not crash.""" pattern = _make_pattern() result = resolve(pathlib.Path("/nonexistent/repo"), pattern) assert result.status == EngineStatus.ESCALATED # =========================================================================== # Tier VII — Performance # =========================================================================== class TestPerformance: """VII: Engine timing assertions.""" def test_resolve_no_match_under_50ms(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) start = time.monotonic() resolve(repo, pattern) elapsed = (time.monotonic() - start) * 1000 assert elapsed < 50, f"resolve (escalate) took {elapsed:.1f}ms" def test_resolve_policy_under_50ms(self, repo: pathlib.Path) -> None: policy = _make_policy(confidence=0.95) save_policy(repo, policy) pattern = _make_pattern() record_pattern(repo, pattern) start = time.monotonic() resolve(repo, pattern) elapsed = (time.monotonic() - start) * 1000 assert elapsed < 50, f"resolve (policy) took {elapsed:.1f}ms" def test_resolve_exact_replay_under_50ms(self, repo: pathlib.Path) -> None: pattern = _make_pattern() record_pattern(repo, pattern) res = _make_resolution(pattern, confidence=0.90) save_resolution(repo, res) start = time.monotonic() resolve(repo, pattern) elapsed = (time.monotonic() - start) * 1000 assert elapsed < 50, f"resolve (exact replay) took {elapsed:.1f}ms" def test_find_similar_100_patterns_under_500ms(self, repo: pathlib.Path) -> None: shared_fp = fake_id("perf-shared") target = _make_pattern(path="perf-target.mid", ours="pto", theirs="ptt", semantic_fp=shared_fp) record_pattern(repo, target) for i in range(100): p = _make_pattern(path=f"p{i}.mid", ours=f"po{i}", theirs=f"pt{i}", semantic_fp=shared_fp) record_pattern(repo, p) r = _make_resolution(p, confidence=0.7, outcome_seed=f"perf{i}") save_resolution(repo, r) start = time.monotonic() find_similar(repo, target) elapsed = (time.monotonic() - start) * 1000 assert elapsed < 500, f"find_similar(100) took {elapsed:.1f}ms"