gabriel / muse public
test_harmony_engine.py python
1,008 lines 37.1 KB
Raw
sha256:51116ec824246acde6abf729e6ba854c223dc5173eff31a645520208023b0652 refactor(bridge): comprehensive spec sweep — close all issu… Sonnet 4.6 minor ⚠ breaking 29 days ago
1 """Tests for muse/core/harmony_engine.py — Phase 3: Resolution Engine.
2
3 Coverage tiers
4 --------------
5 I Unit — EngineStatus, EngineConfig, EngineResult, DefaultPlugin,
6 HarmonyPlugin protocol, find_similar
7 II Integration — all four resolution tiers (policy, exact-replay,
8 semantic, escalate); action/threshold branching
9 III End-to-end — full pipeline via resolve() with audit trail
10 IV Stress — 100-pattern semantic search; many-policy matching
11 V Data integrity— EngineResult fields always populated; JSON round-trip
12 VI Security — crafted inputs don't escape engine; plugin errors contained
13 VII Performance — resolve() <50 ms; find_similar(100 patterns) <500 ms
14 """
15 from __future__ import annotations
16
17 import dataclasses
18 import datetime
19 from muse.core.types import fake_id
20 import pathlib
21 import time
22 from typing import Any
23 from unittest import mock
24
25 import pytest
26
27 import muse.core.harmony as h
28 import muse.core.harmony_engine as eng
29 from muse.core.harmony import (
30 AgentProvenance,
31 AuditEventType,
32 ConflictPattern,
33 ConflictType,
34 Policy,
35 PolicyAction,
36 PolicyCondition,
37 PolicyScope,
38 Resolution,
39 ResolutionProposal,
40 ResolutionStrategy,
41 append_audit,
42 best_resolution,
43 blob_fingerprint,
44 compute_pattern_id,
45 compute_resolution_id,
46 list_audit,
47 list_patterns,
48 list_resolutions,
49 record_pattern,
50 save_policy,
51 save_resolution,
52 )
53 from muse.core.harmony_engine import (
54 DefaultPlugin,
55 EngineConfig,
56 EngineResult,
57 EngineStatus,
58 HarmonyPlugin,
59 find_similar,
60 resolve,
61 )
62 from muse.core.paths import muse_dir
63
64
65 # ---------------------------------------------------------------------------
66 # Shared helpers
67 # ---------------------------------------------------------------------------
68
69
70
71 def _utc_now() -> datetime.datetime:
72 return datetime.datetime.now(datetime.timezone.utc)
73
74
75 @pytest.fixture()
76 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
77 muse_dir(tmp_path).mkdir()
78 return tmp_path
79
80
81 def _make_pattern(
82 path: str = "track.mid",
83 domain: str = "midi",
84 conflict_type: str = ConflictType.CONTENT,
85 ours: str = "ours",
86 theirs: str = "theirs",
87 semantic_fp: str | None = None,
88 ) -> ConflictPattern:
89 ours_id = fake_id(ours)
90 theirs_id = fake_id(theirs)
91 blob_fp = blob_fingerprint(ours_id, theirs_id)
92 sfp = semantic_fp if semantic_fp is not None else blob_fp
93 pid = compute_pattern_id(path, blob_fp, sfp)
94 return ConflictPattern(
95 pattern_id=pid,
96 path=path,
97 domain=domain,
98 conflict_type=conflict_type,
99 blob_fingerprint=blob_fp,
100 semantic_fingerprint=sfp,
101 ours_id=ours_id,
102 theirs_id=theirs_id,
103 description={},
104 recorded_at=_utc_now(),
105 recorded_by="claude-code",
106 )
107
108
109 def _make_resolution(
110 pattern: ConflictPattern,
111 confidence: float = 0.9,
112 human_verified: bool = False,
113 strategy: str = ResolutionStrategy.MANUAL,
114 outcome_seed: str = "outcome",
115 applied_count: int = 0,
116 ) -> Resolution:
117 prov = AgentProvenance.agent("claude-code", "claude-sonnet-4-6")
118 outcome_blob = fake_id(outcome_seed)
119 resolved_at = _utc_now()
120 rid = compute_resolution_id(pattern.pattern_id, outcome_blob, strategy, prov, resolved_at)
121 return dataclasses.replace(
122 Resolution(
123 resolution_id=rid,
124 pattern_id=pattern.pattern_id,
125 strategy=strategy,
126 policy_id=None,
127 outcome_blob=outcome_blob,
128 resolved_by=prov,
129 human_verified=human_verified,
130 confidence=confidence,
131 rationale="Test resolution",
132 resolved_at=resolved_at,
133 applied_count=applied_count,
134 ),
135 )
136
137
138 def _make_policy(
139 policy_id: str = "default-policy",
140 scope: str = PolicyScope.REPO,
141 action: str = PolicyAction.PREFER_OURS,
142 confidence: float = 0.95,
143 conflict_type: str | None = None,
144 domain: str | None = None,
145 path_pattern: str | None = None,
146 escalate_to: str | None = None,
147 delegate_to: str | None = None,
148 ) -> Policy:
149 return Policy(
150 policy_id=policy_id,
151 description="Test policy",
152 when=PolicyCondition(
153 conflict_type=conflict_type,
154 domain=domain,
155 path_pattern=path_pattern,
156 ),
157 action=action,
158 confidence=confidence,
159 escalate_to=escalate_to,
160 delegate_to=delegate_to,
161 scope=scope,
162 created_at=_utc_now(),
163 created_by="claude-code",
164 )
165
166
167 # ===========================================================================
168 # Tier I — Unit tests
169 # ===========================================================================
170
171
172 class TestEngineStatus:
173 """I: EngineStatus constants are plain strings."""
174
175 def test_applied_is_string(self) -> None:
176 assert isinstance(EngineStatus.APPLIED, str)
177
178 def test_proposed_is_string(self) -> None:
179 assert isinstance(EngineStatus.PROPOSED, str)
180
181 def test_escalated_is_string(self) -> None:
182 assert isinstance(EngineStatus.ESCALATED, str)
183
184 def test_distinct_values(self) -> None:
185 statuses = {EngineStatus.APPLIED, EngineStatus.PROPOSED, EngineStatus.ESCALATED}
186 assert len(statuses) == 3
187
188
189 class TestEngineConfig:
190 """I: EngineConfig defaults and frozen nature."""
191
192 def test_default_auto_apply_threshold(self) -> None:
193 assert EngineConfig().auto_apply_threshold == pytest.approx(0.85)
194
195 def test_default_semantic_threshold(self) -> None:
196 assert EngineConfig().semantic_threshold == pytest.approx(0.70)
197
198 def test_default_max_proposals(self) -> None:
199 assert EngineConfig().max_proposals == 5
200
201 def test_custom_config(self) -> None:
202 cfg = EngineConfig(auto_apply_threshold=0.99, semantic_threshold=0.50, max_proposals=3)
203 assert cfg.auto_apply_threshold == pytest.approx(0.99)
204 assert cfg.semantic_threshold == pytest.approx(0.50)
205 assert cfg.max_proposals == 3
206
207 def test_frozen(self) -> None:
208 cfg = EngineConfig()
209 with pytest.raises(Exception): # FrozenInstanceError
210 cfg.auto_apply_threshold = 0.0 # type: ignore[misc]
211
212
213 class TestEngineResult:
214 """I: EngineResult fields, defaults, and immutability."""
215
216 def test_applied_result(self) -> None:
217 r = EngineResult(
218 status=EngineStatus.APPLIED,
219 pattern_id=fake_id("p"),
220 applied_resolution_id=fake_id("r"),
221 )
222 assert r.status == EngineStatus.APPLIED
223 assert r.proposal is None
224 assert r.escalation_reason is None
225
226 def test_proposed_result(self) -> None:
227 prop = ResolutionProposal(
228 pattern_id=fake_id("p"),
229 strategy=ResolutionStrategy.POLICY,
230 proposed_action=PolicyAction.PREFER_OURS,
231 confidence=0.9,
232 rationale="test",
233 )
234 r = EngineResult(
235 status=EngineStatus.PROPOSED,
236 pattern_id=fake_id("p"),
237 proposal=prop,
238 )
239 assert r.proposal is prop
240 assert r.applied_resolution_id is None
241
242 def test_escalated_result(self) -> None:
243 r = EngineResult(
244 status=EngineStatus.ESCALATED,
245 pattern_id=fake_id("p"),
246 escalation_reason="no match found",
247 )
248 assert r.escalation_reason == "no match found"
249
250 def test_frozen(self) -> None:
251 r = EngineResult(status=EngineStatus.ESCALATED, pattern_id=fake_id("p"))
252 with pytest.raises(Exception):
253 r.status = EngineStatus.APPLIED # type: ignore[misc]
254
255
256 class TestDefaultPlugin:
257 """I: DefaultPlugin — exact-match similarity, no semantic fingerprint."""
258
259 def test_identical_fps_return_1(self) -> None:
260 fp = fake_id("same")
261 assert DefaultPlugin().similarity(fp, fp) == pytest.approx(1.0)
262
263 def test_different_fps_return_0(self) -> None:
264 assert DefaultPlugin().similarity(fake_id("a"), fake_id("b")) == pytest.approx(0.0)
265
266 def test_similarity_commutative(self) -> None:
267 a, b = fake_id("x"), fake_id("y")
268 p = DefaultPlugin()
269 assert p.similarity(a, b) == p.similarity(b, a)
270
271 def test_similarity_range(self) -> None:
272 for seed1, seed2 in [("a", "a"), ("a", "b"), ("c", "d")]:
273 s = DefaultPlugin().similarity(fake_id(seed1), fake_id(seed2))
274 assert 0.0 <= s <= 1.0
275
276
277 class TestHarmonyPluginProtocol:
278 """I: HarmonyPlugin is a structural Protocol — custom plugins duck-type in."""
279
280 def test_custom_plugin_accepted(self) -> None:
281 class MyPlugin:
282 def similarity(self, fp_a: str, fp_b: str) -> float:
283 return 0.5
284
285 plugin = MyPlugin()
286 # Should not raise — duck-typing is sufficient
287 result = resolve(
288 pathlib.Path("/nonexistent"),
289 _make_pattern(),
290 plugin=plugin,
291 )
292 # Will escalate because repo doesn't exist, but plugin was accepted
293 assert result.status == EngineStatus.ESCALATED
294
295
296 class TestFindSimilar:
297 """I: find_similar returns correctly sorted proposals."""
298
299 def test_find_similar_empty_store(self, repo: pathlib.Path) -> None:
300 pattern = _make_pattern()
301 record_pattern(repo, pattern)
302 results = find_similar(repo, pattern)
303 assert results == []
304
305 def test_find_similar_skips_self(self, repo: pathlib.Path) -> None:
306 """Even if pattern has the same semantic_fp, it shouldn't match itself."""
307 fp = fake_id("shared-semantic")
308 pattern = _make_pattern(semantic_fp=fp)
309 record_pattern(repo, pattern)
310 res = _make_resolution(pattern)
311 save_resolution(repo, res)
312 results = find_similar(repo, pattern)
313 assert all(p.similar_pattern_id != pattern.pattern_id for p in results)
314
315 def test_find_similar_returns_semantically_matching_pattern(
316 self, repo: pathlib.Path
317 ) -> None:
318 """Two patterns with same semantic_fp but different paths should match."""
319 shared_fp = fake_id("shared-semantic")
320
321 source = _make_pattern(path="source.mid", ours="so", theirs="st", semantic_fp=shared_fp)
322 target = _make_pattern(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
323 record_pattern(repo, source)
324 record_pattern(repo, target)
325
326 # Give source a resolution — target has none
327 res = _make_resolution(source, confidence=0.88)
328 save_resolution(repo, res)
329
330 proposals = find_similar(repo, target)
331 assert len(proposals) >= 1
332 assert proposals[0].similar_pattern_id == source.pattern_id
333 assert proposals[0].similarity == pytest.approx(1.0)
334
335 def test_find_similar_sorted_by_confidence_desc(self, repo: pathlib.Path) -> None:
336 shared_fp = fake_id("shared-fp")
337 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
338 record_pattern(repo, target)
339
340 # Two sources with different confidence
341 for i, conf in enumerate([0.5, 0.9]):
342 src = _make_pattern(
343 path=f"src{i}.mid",
344 ours=f"o{i}",
345 theirs=f"t{i}",
346 semantic_fp=shared_fp,
347 )
348 record_pattern(repo, src)
349 r = _make_resolution(src, confidence=conf, outcome_seed=f"out{i}")
350 save_resolution(repo, r)
351
352 proposals = find_similar(repo, target)
353 assert proposals[0].confidence >= proposals[-1].confidence
354
355 def test_find_similar_respects_max_proposals(self, repo: pathlib.Path) -> None:
356 shared_fp = fake_id("common")
357 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
358 record_pattern(repo, target)
359
360 for i in range(10):
361 src = _make_pattern(
362 path=f"s{i}.mid", ours=f"o{i}", theirs=f"t{i}", semantic_fp=shared_fp
363 )
364 record_pattern(repo, src)
365 r = _make_resolution(src, confidence=0.7 + i * 0.02, outcome_seed=f"o{i}")
366 save_resolution(repo, r)
367
368 cfg = EngineConfig(max_proposals=3)
369 proposals = find_similar(repo, target, config=cfg)
370 assert len(proposals) <= 3
371
372 def test_find_similar_below_threshold_excluded(self, repo: pathlib.Path) -> None:
373 """DefaultPlugin returns 0.0 for different fingerprints → excluded."""
374 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
375 source = _make_pattern(path="s.mid", ours="so", theirs="st") # different semantic_fp
376 record_pattern(repo, target)
377 record_pattern(repo, source)
378 r = _make_resolution(source)
379 save_resolution(repo, r)
380
381 # With DefaultPlugin, similarity = 0 for different fps → below any threshold
382 proposals = find_similar(repo, target)
383 assert proposals == []
384
385
386 # ===========================================================================
387 # Tier II — Integration: four resolution tiers
388 # ===========================================================================
389
390
391 class TestTierPolicy:
392 """II: Tier 1 — Policy match."""
393
394 def test_policy_prefer_ours_above_threshold_returns_applied(
395 self, repo: pathlib.Path
396 ) -> None:
397 policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS)
398 save_policy(repo, policy)
399
400 pattern = _make_pattern()
401 record_pattern(repo, pattern)
402
403 result = resolve(repo, pattern)
404 assert result.status == EngineStatus.APPLIED
405 assert result.proposal is not None
406 assert result.proposal.strategy == ResolutionStrategy.POLICY
407 assert result.proposal.proposed_action == PolicyAction.PREFER_OURS
408 assert result.proposal.policy_id == policy.policy_id
409
410 def test_policy_prefer_theirs_above_threshold_returns_applied(
411 self, repo: pathlib.Path
412 ) -> None:
413 policy = _make_policy(confidence=0.90, action=PolicyAction.PREFER_THEIRS)
414 save_policy(repo, policy)
415 pattern = _make_pattern()
416 record_pattern(repo, pattern)
417
418 result = resolve(repo, pattern)
419 assert result.status == EngineStatus.APPLIED
420 assert result.proposal.proposed_action == PolicyAction.PREFER_THEIRS
421
422 def test_policy_below_threshold_returns_proposed(self, repo: pathlib.Path) -> None:
423 policy = _make_policy(confidence=0.60, action=PolicyAction.PREFER_OURS)
424 save_policy(repo, policy)
425 pattern = _make_pattern()
426 record_pattern(repo, pattern)
427
428 cfg = EngineConfig(auto_apply_threshold=0.85)
429 result = resolve(repo, pattern, config=cfg)
430 assert result.status == EngineStatus.PROPOSED
431 assert result.proposal.requires_confirmation is True
432
433 def test_policy_escalate_action_returns_escalated(self, repo: pathlib.Path) -> None:
434 policy = _make_policy(
435 confidence=1.0,
436 action=PolicyAction.ESCALATE,
437 escalate_to="human",
438 )
439 save_policy(repo, policy)
440 pattern = _make_pattern()
441 record_pattern(repo, pattern)
442
443 result = resolve(repo, pattern)
444 assert result.status == EngineStatus.ESCALATED
445 assert result.escalation_reason is not None
446 assert "human" in result.escalation_reason.lower() or "escalat" in result.escalation_reason.lower()
447
448 def test_policy_require_human_returns_escalated(self, repo: pathlib.Path) -> None:
449 policy = _make_policy(confidence=1.0, action=PolicyAction.REQUIRE_HUMAN)
450 save_policy(repo, policy)
451 pattern = _make_pattern()
452 record_pattern(repo, pattern)
453
454 result = resolve(repo, pattern)
455 assert result.status == EngineStatus.ESCALATED
456
457 def test_policy_delegate_returns_escalated(self, repo: pathlib.Path) -> None:
458 policy = _make_policy(
459 confidence=1.0,
460 action=PolicyAction.DELEGATE,
461 delegate_to="harmony-specialist",
462 )
463 save_policy(repo, policy)
464 pattern = _make_pattern()
465 record_pattern(repo, pattern)
466
467 result = resolve(repo, pattern)
468 assert result.status == EngineStatus.ESCALATED
469 assert "harmony-specialist" in (result.escalation_reason or "")
470
471 def test_policy_domain_filter_does_not_fire_for_wrong_domain(
472 self, repo: pathlib.Path
473 ) -> None:
474 policy = _make_policy(domain="code", action=PolicyAction.PREFER_OURS, confidence=1.0)
475 save_policy(repo, policy)
476 pattern = _make_pattern(domain="midi")
477 record_pattern(repo, pattern)
478
479 # Should fall through to escalate (no resolution)
480 result = resolve(repo, pattern)
481 assert result.status == EngineStatus.ESCALATED
482
483 def test_workspace_policy_fires_before_repo_policy(self, repo: pathlib.Path) -> None:
484 workspace_p = _make_policy(
485 "workspace-p",
486 scope=PolicyScope.WORKSPACE,
487 action=PolicyAction.PREFER_OURS,
488 confidence=0.95,
489 )
490 repo_p = _make_policy(
491 "repo-p",
492 scope=PolicyScope.REPO,
493 action=PolicyAction.PREFER_THEIRS,
494 confidence=0.95,
495 )
496 save_policy(repo, workspace_p)
497 save_policy(repo, repo_p)
498 pattern = _make_pattern()
499 record_pattern(repo, pattern)
500
501 result = resolve(repo, pattern)
502 assert result.status == EngineStatus.APPLIED
503 assert result.proposal.proposed_action == PolicyAction.PREFER_OURS # workspace wins
504
505
506 class TestTierExactReplay:
507 """II: Tier 2 — Exact replay (no policy match)."""
508
509 def test_high_confidence_resolution_auto_applied(self, repo: pathlib.Path) -> None:
510 pattern = _make_pattern()
511 record_pattern(repo, pattern)
512 res = _make_resolution(pattern, confidence=0.90)
513 save_resolution(repo, res)
514
515 result = resolve(repo, pattern)
516 assert result.status == EngineStatus.APPLIED
517 assert result.applied_resolution_id == res.resolution_id
518
519 def test_human_verified_always_auto_applied(self, repo: pathlib.Path) -> None:
520 pattern = _make_pattern()
521 record_pattern(repo, pattern)
522 res = _make_resolution(pattern, confidence=0.50, human_verified=True)
523 save_resolution(repo, res)
524
525 cfg = EngineConfig(auto_apply_threshold=0.85)
526 result = resolve(repo, pattern, config=cfg)
527 assert result.status == EngineStatus.APPLIED
528
529 def test_low_confidence_resolution_proposed(self, repo: pathlib.Path) -> None:
530 pattern = _make_pattern()
531 record_pattern(repo, pattern)
532 res = _make_resolution(pattern, confidence=0.60)
533 save_resolution(repo, res)
534
535 cfg = EngineConfig(auto_apply_threshold=0.85)
536 result = resolve(repo, pattern, config=cfg)
537 assert result.status == EngineStatus.PROPOSED
538 assert result.proposal.strategy == ResolutionStrategy.EXACT_REPLAY
539 assert result.proposal.requires_confirmation is True
540
541 def test_exact_replay_increments_applied_count(self, repo: pathlib.Path) -> None:
542 pattern = _make_pattern()
543 record_pattern(repo, pattern)
544 res = _make_resolution(pattern, confidence=0.90)
545 save_resolution(repo, res)
546
547 resolve(repo, pattern)
548
549 from muse.core.harmony import load_resolution
550 loaded = load_resolution(repo, pattern.pattern_id, res.resolution_id)
551 assert loaded is not None
552 assert loaded.applied_count == 1
553
554 def test_exact_replay_prefers_highest_quality_resolution(
555 self, repo: pathlib.Path
556 ) -> None:
557 pattern = _make_pattern()
558 record_pattern(repo, pattern)
559
560 low = _make_resolution(pattern, confidence=0.70, outcome_seed="low")
561 high = _make_resolution(pattern, confidence=0.95, outcome_seed="high")
562 save_resolution(repo, low)
563 save_resolution(repo, high)
564
565 result = resolve(repo, pattern)
566 assert result.applied_resolution_id == high.resolution_id
567
568
569 class TestTierSemantic:
570 """II: Tier 3 — Semantic match (no policy, no exact replay)."""
571
572 def test_semantic_match_returns_proposed(self, repo: pathlib.Path) -> None:
573 shared_fp = fake_id("shared-semantic")
574
575 source = _make_pattern(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp)
576 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
577 record_pattern(repo, source)
578 record_pattern(repo, target)
579
580 res = _make_resolution(source, confidence=0.85)
581 save_resolution(repo, res)
582
583 result = resolve(repo, target)
584 assert result.status == EngineStatus.PROPOSED
585 assert result.proposal.strategy == ResolutionStrategy.SEMANTIC_PROPOSAL
586 assert result.proposal.similar_pattern_id == source.pattern_id
587 assert result.proposal.similarity == pytest.approx(1.0)
588 assert result.proposal.requires_confirmation is True
589
590 def test_semantic_match_below_threshold_escalates(self, repo: pathlib.Path) -> None:
591 """DefaultPlugin returns 0.0 for non-matching fps → no semantic match."""
592 source = _make_pattern(path="s.mid", ours="so", theirs="st")
593 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
594 record_pattern(repo, source)
595 record_pattern(repo, target)
596 res = _make_resolution(source)
597 save_resolution(repo, res)
598
599 result = resolve(repo, target)
600 assert result.status == EngineStatus.ESCALATED
601
602 def test_custom_plugin_similarity_drives_semantic_match(
603 self, repo: pathlib.Path
604 ) -> None:
605 """A custom plugin returning 0.8 similarity enables semantic matching."""
606
607 class AlwaysMatchPlugin:
608 def similarity(self, fp_a: str, fp_b: str) -> float:
609 return 0.8
610
611 source = _make_pattern(path="s.mid", ours="so", theirs="st")
612 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
613 record_pattern(repo, source)
614 record_pattern(repo, target)
615 res = _make_resolution(source, confidence=0.85)
616 save_resolution(repo, res)
617
618 cfg = EngineConfig(semantic_threshold=0.70)
619 result = resolve(repo, target, config=cfg, plugin=AlwaysMatchPlugin())
620 assert result.status == EngineStatus.PROPOSED
621 assert result.proposal.strategy == ResolutionStrategy.SEMANTIC_PROPOSAL
622
623
624 class TestTierEscalate:
625 """II: Tier 4 — Escalation when no policy/replay/semantic matches."""
626
627 def test_no_policy_no_resolution_escalates(self, repo: pathlib.Path) -> None:
628 pattern = _make_pattern()
629 record_pattern(repo, pattern)
630
631 result = resolve(repo, pattern)
632 assert result.status == EngineStatus.ESCALATED
633 assert result.escalation_reason is not None
634 assert len(result.escalation_reason) > 0
635
636 def test_escalation_has_no_applied_resolution(self, repo: pathlib.Path) -> None:
637 pattern = _make_pattern()
638 record_pattern(repo, pattern)
639
640 result = resolve(repo, pattern)
641 assert result.applied_resolution_id is None
642
643 def test_escalation_has_no_proposal(self, repo: pathlib.Path) -> None:
644 pattern = _make_pattern()
645 record_pattern(repo, pattern)
646
647 result = resolve(repo, pattern)
648 assert result.proposal is None
649
650 def test_unrecorded_pattern_escalates(self, repo: pathlib.Path) -> None:
651 """Pattern not in store → escalate (engine is safe with unknown patterns)."""
652 pattern = _make_pattern()
653 # Not recorded
654 result = resolve(repo, pattern)
655 assert result.status == EngineStatus.ESCALATED
656
657
658 # ===========================================================================
659 # Tier III — End-to-end
660 # ===========================================================================
661
662
663 class TestEndToEnd:
664 """III: Full resolution pipeline with audit trail."""
665
666 def test_policy_applied_writes_audit(self, repo: pathlib.Path) -> None:
667 policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS)
668 save_policy(repo, policy)
669 pattern = _make_pattern()
670 record_pattern(repo, pattern)
671
672 actor = AgentProvenance.agent("claude-code", "claude-sonnet-4-6")
673 result = resolve(repo, pattern, actor=actor)
674 assert result.status == EngineStatus.APPLIED
675
676 entries = list_audit(repo)
677 event_types = [e["event_type"] for e in entries]
678 assert AuditEventType.RESOLUTION_APPLIED in event_types
679
680 def test_exact_replay_applied_writes_audit(self, repo: pathlib.Path) -> None:
681 pattern = _make_pattern()
682 record_pattern(repo, pattern)
683 res = _make_resolution(pattern, confidence=0.90)
684 save_resolution(repo, res)
685
686 actor = AgentProvenance.human()
687 result = resolve(repo, pattern, actor=actor)
688 assert result.status == EngineStatus.APPLIED
689
690 entries = list_audit(repo)
691 event_types = [e["event_type"] for e in entries]
692 assert AuditEventType.RESOLUTION_APPLIED in event_types
693
694 def test_escalation_writes_audit(self, repo: pathlib.Path) -> None:
695 pattern = _make_pattern()
696 record_pattern(repo, pattern)
697
698 actor = AgentProvenance.human()
699 result = resolve(repo, pattern, actor=actor)
700 assert result.status == EngineStatus.ESCALATED
701
702 entries = list_audit(repo)
703 event_types = [e["event_type"] for e in entries]
704 assert AuditEventType.ESCALATION_RECORDED in event_types
705
706 def test_full_lifecycle_policy_to_replay(self, repo: pathlib.Path) -> None:
707 """First resolution via policy; subsequent calls use exact replay."""
708 policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS)
709 save_policy(repo, policy)
710
711 pattern = _make_pattern()
712 record_pattern(repo, pattern)
713
714 # First call → policy fires
715 r1 = resolve(repo, pattern)
716 assert r1.status == EngineStatus.APPLIED
717 assert r1.proposal.strategy == ResolutionStrategy.POLICY
718
719 # Save the policy resolution so it can be replayed
720 from muse.core.harmony import compute_resolution_id, save_resolution
721 import datetime
722 actor = AgentProvenance.agent("claude-code")
723 resolved_at = datetime.datetime.now(datetime.timezone.utc)
724 rid = compute_resolution_id(
725 pattern.pattern_id, fake_id("policy-outcome"),
726 ResolutionStrategy.POLICY, actor, resolved_at,
727 )
728 res = Resolution(
729 resolution_id=rid,
730 pattern_id=pattern.pattern_id,
731 strategy=ResolutionStrategy.POLICY,
732 policy_id=policy.policy_id,
733 outcome_blob=fake_id("policy-outcome"),
734 resolved_by=actor,
735 human_verified=False,
736 confidence=0.95,
737 rationale="Policy applied",
738 resolved_at=resolved_at,
739 )
740 save_resolution(repo, res)
741
742 # Remove policy — next call should use exact replay
743 from muse.core.harmony import remove_policy
744 remove_policy(repo, policy.policy_id)
745
746 r2 = resolve(repo, pattern)
747 assert r2.status == EngineStatus.APPLIED
748 assert r2.proposal.strategy == ResolutionStrategy.EXACT_REPLAY
749
750 def test_resolve_with_explicit_config(self, repo: pathlib.Path) -> None:
751 pattern = _make_pattern()
752 record_pattern(repo, pattern)
753 res = _make_resolution(pattern, confidence=0.80)
754 save_resolution(repo, res)
755
756 # Low threshold → auto-apply
757 cfg_low = EngineConfig(auto_apply_threshold=0.75)
758 r = resolve(repo, pattern, config=cfg_low)
759 assert r.status == EngineStatus.APPLIED
760
761 # High threshold → propose
762 cfg_high = EngineConfig(auto_apply_threshold=0.95)
763 r2 = resolve(repo, pattern, config=cfg_high)
764 assert r2.status == EngineStatus.PROPOSED
765
766
767 # ===========================================================================
768 # Tier IV — Stress
769 # ===========================================================================
770
771
772 class TestStress:
773 """IV: Engine performance under load."""
774
775 def test_engine_with_100_patterns_completes(self, repo: pathlib.Path) -> None:
776 """Engine must handle 100 patterns in the store without crashing."""
777 shared_fp = fake_id("stress-shared")
778 target = _make_pattern(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
779 record_pattern(repo, target)
780
781 for i in range(100):
782 p = _make_pattern(
783 path=f"stress{i}.mid",
784 ours=f"so{i}",
785 theirs=f"st{i}",
786 semantic_fp=shared_fp,
787 )
788 record_pattern(repo, p)
789 r = _make_resolution(p, confidence=0.7, outcome_seed=f"o{i}")
790 save_resolution(repo, r)
791
792 result = resolve(repo, target)
793 # Should get a semantic match from one of the 100
794 assert result.status in (EngineStatus.PROPOSED, EngineStatus.ESCALATED)
795
796 def test_find_similar_100_patterns(self, repo: pathlib.Path) -> None:
797 """find_similar with 100 patterns returns correct results."""
798 shared_fp = fake_id("bulk-shared")
799 target = _make_pattern(path="bulk-target.mid", ours="bto", theirs="btt", semantic_fp=shared_fp)
800 record_pattern(repo, target)
801
802 for i in range(100):
803 p = _make_pattern(
804 path=f"bulk{i}.mid",
805 ours=f"bso{i}",
806 theirs=f"bst{i}",
807 semantic_fp=shared_fp,
808 )
809 record_pattern(repo, p)
810 r = _make_resolution(p, confidence=0.7 + (i % 10) * 0.02, outcome_seed=f"bo{i}")
811 save_resolution(repo, r)
812
813 cfg = EngineConfig(max_proposals=5)
814 proposals = find_similar(repo, target, config=cfg)
815 assert len(proposals) <= 5
816 # All returned proposals should have similarity ≥ threshold
817 for prop in proposals:
818 assert prop.similarity is not None and prop.similarity >= cfg.semantic_threshold
819
820 def test_many_policy_first_match_wins(self, repo: pathlib.Path) -> None:
821 """With 50 policies, the first matching one wins."""
822 for i in range(50):
823 p = _make_policy(
824 f"policy-{i:02d}",
825 scope=PolicyScope.REPO,
826 action=PolicyAction.PREFER_OURS if i == 0 else PolicyAction.PREFER_THEIRS,
827 confidence=0.95,
828 )
829 save_policy(repo, p)
830
831 pattern = _make_pattern()
832 record_pattern(repo, pattern)
833 result = resolve(repo, pattern)
834 # First policy (alphabetically / insertion order) should win
835 assert result.status == EngineStatus.APPLIED
836
837
838 # ===========================================================================
839 # Tier V — Data integrity
840 # ===========================================================================
841
842
843 class TestDataIntegrity:
844 """V: EngineResult fields always populated; JSON-safe."""
845
846 def test_escalated_result_fields_complete(self, repo: pathlib.Path) -> None:
847 pattern = _make_pattern()
848 record_pattern(repo, pattern)
849 result = resolve(repo, pattern)
850 assert result.status is not None
851 assert result.pattern_id is not None
852 # Optional fields default to None (not absent)
853 assert hasattr(result, "proposal")
854 assert hasattr(result, "applied_resolution_id")
855 assert hasattr(result, "escalation_reason")
856
857 def test_applied_result_has_no_escalation_reason(self, repo: pathlib.Path) -> None:
858 policy = _make_policy(confidence=0.95)
859 save_policy(repo, policy)
860 pattern = _make_pattern()
861 record_pattern(repo, pattern)
862 result = resolve(repo, pattern)
863 assert result.escalation_reason is None
864
865 def test_proposal_confidence_in_range(self, repo: pathlib.Path) -> None:
866 policy = _make_policy(confidence=0.95)
867 save_policy(repo, policy)
868 pattern = _make_pattern()
869 record_pattern(repo, pattern)
870 result = resolve(repo, pattern)
871 assert result.proposal is not None
872 assert 0.0 <= result.proposal.confidence <= 1.0
873
874 def test_engine_result_pattern_id_matches_input(self, repo: pathlib.Path) -> None:
875 pattern = _make_pattern()
876 record_pattern(repo, pattern)
877 result = resolve(repo, pattern)
878 assert result.pattern_id == pattern.pattern_id
879
880 def test_proposal_is_json_serialisable(self, repo: pathlib.Path) -> None:
881 import json
882 policy = _make_policy(confidence=0.95)
883 save_policy(repo, policy)
884 pattern = _make_pattern()
885 record_pattern(repo, pattern)
886 result = resolve(repo, pattern)
887 if result.proposal is not None:
888 d = dataclasses.asdict(result.proposal)
889 json.dumps(d) # must not raise
890
891 def test_engine_result_is_json_serialisable(self, repo: pathlib.Path) -> None:
892 import json
893 pattern = _make_pattern()
894 record_pattern(repo, pattern)
895 result = resolve(repo, pattern)
896 d = dataclasses.asdict(result)
897 json.dumps(d) # must not raise
898
899
900 # ===========================================================================
901 # Tier VI — Security
902 # ===========================================================================
903
904
905 class TestSecurity:
906 """VI: Engine handles adversarial inputs safely."""
907
908 def test_plugin_exception_caught(self, repo: pathlib.Path) -> None:
909 """A crashing plugin must not propagate — engine falls back to escalate."""
910
911 class CrashingPlugin:
912 def similarity(self, fp_a: str, fp_b: str) -> float:
913 raise RuntimeError("Plugin exploded")
914
915 source = _make_pattern(path="s.mid", ours="so", theirs="st")
916 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
917 record_pattern(repo, source)
918 record_pattern(repo, target)
919 res = _make_resolution(source)
920 save_resolution(repo, res)
921
922 result = resolve(repo, target, plugin=CrashingPlugin())
923 # Must not raise; engine falls back
924 assert result.status in (EngineStatus.ESCALATED, EngineStatus.PROPOSED)
925
926 def test_plugin_returning_out_of_range_similarity_clamped(
927 self, repo: pathlib.Path
928 ) -> None:
929 """Similarity > 1.0 or < 0.0 from a buggy plugin must be clamped."""
930
931 class OverflowPlugin:
932 def similarity(self, fp_a: str, fp_b: str) -> float:
933 return 999.0
934
935 shared_fp = fake_id("shared")
936 source = _make_pattern(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp)
937 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
938 record_pattern(repo, source)
939 record_pattern(repo, target)
940 res = _make_resolution(source, confidence=0.85)
941 save_resolution(repo, res)
942
943 result = resolve(repo, target, plugin=OverflowPlugin())
944 if result.proposal is not None and result.proposal.similarity is not None:
945 assert 0.0 <= result.proposal.similarity <= 1.0
946
947 def test_resolve_safe_with_empty_repo(self, repo: pathlib.Path) -> None:
948 """Engine against empty store must not crash."""
949 pattern = _make_pattern()
950 result = resolve(repo, pattern)
951 assert result.status == EngineStatus.ESCALATED
952
953 def test_resolve_safe_with_nonexistent_root(self) -> None:
954 """Engine with nonexistent root must escalate, not crash."""
955 pattern = _make_pattern()
956 result = resolve(pathlib.Path("/nonexistent/repo"), pattern)
957 assert result.status == EngineStatus.ESCALATED
958
959
960 # ===========================================================================
961 # Tier VII — Performance
962 # ===========================================================================
963
964
965 class TestPerformance:
966 """VII: Engine timing assertions."""
967
968 def test_resolve_no_match_under_50ms(self, repo: pathlib.Path) -> None:
969 pattern = _make_pattern()
970 record_pattern(repo, pattern)
971 start = time.monotonic()
972 resolve(repo, pattern)
973 elapsed = (time.monotonic() - start) * 1000
974 assert elapsed < 50, f"resolve (escalate) took {elapsed:.1f}ms"
975
976 def test_resolve_policy_under_50ms(self, repo: pathlib.Path) -> None:
977 policy = _make_policy(confidence=0.95)
978 save_policy(repo, policy)
979 pattern = _make_pattern()
980 record_pattern(repo, pattern)
981 start = time.monotonic()
982 resolve(repo, pattern)
983 elapsed = (time.monotonic() - start) * 1000
984 assert elapsed < 50, f"resolve (policy) took {elapsed:.1f}ms"
985
986 def test_resolve_exact_replay_under_50ms(self, repo: pathlib.Path) -> None:
987 pattern = _make_pattern()
988 record_pattern(repo, pattern)
989 res = _make_resolution(pattern, confidence=0.90)
990 save_resolution(repo, res)
991 start = time.monotonic()
992 resolve(repo, pattern)
993 elapsed = (time.monotonic() - start) * 1000
994 assert elapsed < 50, f"resolve (exact replay) took {elapsed:.1f}ms"
995
996 def test_find_similar_100_patterns_under_500ms(self, repo: pathlib.Path) -> None:
997 shared_fp = fake_id("perf-shared")
998 target = _make_pattern(path="perf-target.mid", ours="pto", theirs="ptt", semantic_fp=shared_fp)
999 record_pattern(repo, target)
1000 for i in range(100):
1001 p = _make_pattern(path=f"p{i}.mid", ours=f"po{i}", theirs=f"pt{i}", semantic_fp=shared_fp)
1002 record_pattern(repo, p)
1003 r = _make_resolution(p, confidence=0.7, outcome_seed=f"perf{i}")
1004 save_resolution(repo, r)
1005 start = time.monotonic()
1006 find_similar(repo, target)
1007 elapsed = (time.monotonic() - start) * 1000
1008 assert elapsed < 500, f"find_similar(100) took {elapsed:.1f}ms"
File History 1 commit
sha256:51116ec824246acde6abf729e6ba854c223dc5173eff31a645520208023b0652 refactor(bridge): comprehensive spec sweep — close all issu… Sonnet 4.6 minor 29 days ago