gabriel / muse public
test_harmony_engine.py python
1,007 lines 37.1 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago
1 """Tests for muse/core/harmony/engine.py — Phase 3: Resolution Engine.
2
3 Coverage tiers
4 --------------
5 I Unit — EngineStatus, EngineConfig, EngineResult, DefaultPlugin,
6 HarmonyPlugin protocol, find_similar
7 II Integration — all four resolution tiers (policy, exact-replay,
8 semantic, escalate); action/threshold branching
9 III End-to-end — full pipeline via resolve() with audit trail
10 IV Stress — 100-pattern semantic search; many-policy matching
11 V Data integrity— EngineResult fields always populated; JSON round-trip
12 VI Security — crafted inputs don't escape engine; plugin errors contained
13 VII Performance — resolve() <50 ms; find_similar(100 patterns) <500 ms
14 """
15 from __future__ import annotations
16
17 import dataclasses
18 import datetime
19 from muse.core.types import fake_id
20 import pathlib
21 import time
22 from typing import Any
23 from unittest import mock
24
25 import pytest
26
27 import muse.core.harmony as h
28 from muse.core.harmony import (
29 AgentProvenance,
30 AuditEventType,
31 ConflictPattern,
32 ConflictType,
33 Policy,
34 PolicyAction,
35 PolicyCondition,
36 PolicyScope,
37 Resolution,
38 ResolutionProposal,
39 ResolutionStrategy,
40 append_audit,
41 best_resolution,
42 blob_fingerprint,
43 compute_pattern_id,
44 compute_resolution_id,
45 list_audit,
46 list_patterns,
47 list_resolutions,
48 record_pattern,
49 save_policy,
50 save_resolution,
51 )
52 from muse.core.harmony.engine import (
53 DefaultPlugin,
54 EngineConfig,
55 EngineResult,
56 EngineStatus,
57 HarmonyPlugin,
58 find_similar,
59 resolve,
60 )
61 from muse.core.paths import muse_dir
62
63
64 # ---------------------------------------------------------------------------
65 # Shared helpers
66 # ---------------------------------------------------------------------------
67
68
69
70 def _utc_now() -> datetime.datetime:
71 return datetime.datetime.now(datetime.timezone.utc)
72
73
74 @pytest.fixture()
75 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
76 muse_dir(tmp_path).mkdir()
77 return tmp_path
78
79
80 def _make_pattern(
81 path: str = "track.mid",
82 domain: str = "midi",
83 conflict_type: str = ConflictType.CONTENT,
84 ours: str = "ours",
85 theirs: str = "theirs",
86 semantic_fp: str | None = None,
87 ) -> ConflictPattern:
88 ours_id = fake_id(ours)
89 theirs_id = fake_id(theirs)
90 blob_fp = blob_fingerprint(ours_id, theirs_id)
91 sfp = semantic_fp if semantic_fp is not None else blob_fp
92 pid = compute_pattern_id(path, blob_fp, sfp)
93 return ConflictPattern(
94 pattern_id=pid,
95 path=path,
96 domain=domain,
97 conflict_type=conflict_type,
98 blob_fingerprint=blob_fp,
99 semantic_fingerprint=sfp,
100 ours_id=ours_id,
101 theirs_id=theirs_id,
102 description={},
103 recorded_at=_utc_now(),
104 recorded_by="claude-code",
105 )
106
107
108 def _make_resolution(
109 pattern: ConflictPattern,
110 confidence: float = 0.9,
111 human_verified: bool = False,
112 strategy: str = ResolutionStrategy.MANUAL,
113 outcome_seed: str = "outcome",
114 applied_count: int = 0,
115 ) -> Resolution:
116 prov = AgentProvenance.agent("claude-code", "claude-sonnet-4-6")
117 outcome_blob = fake_id(outcome_seed)
118 resolved_at = _utc_now()
119 rid = compute_resolution_id(pattern.pattern_id, outcome_blob, strategy, prov, resolved_at)
120 return dataclasses.replace(
121 Resolution(
122 resolution_id=rid,
123 pattern_id=pattern.pattern_id,
124 strategy=strategy,
125 policy_id=None,
126 outcome_blob=outcome_blob,
127 resolved_by=prov,
128 human_verified=human_verified,
129 confidence=confidence,
130 rationale="Test resolution",
131 resolved_at=resolved_at,
132 applied_count=applied_count,
133 ),
134 )
135
136
137 def _make_policy(
138 policy_id: str = "default-policy",
139 scope: str = PolicyScope.REPO,
140 action: str = PolicyAction.PREFER_OURS,
141 confidence: float = 0.95,
142 conflict_type: str | None = None,
143 domain: str | None = None,
144 path_pattern: str | None = None,
145 escalate_to: str | None = None,
146 delegate_to: str | None = None,
147 ) -> Policy:
148 return Policy(
149 policy_id=policy_id,
150 description="Test policy",
151 when=PolicyCondition(
152 conflict_type=conflict_type,
153 domain=domain,
154 path_pattern=path_pattern,
155 ),
156 action=action,
157 confidence=confidence,
158 escalate_to=escalate_to,
159 delegate_to=delegate_to,
160 scope=scope,
161 created_at=_utc_now(),
162 created_by="claude-code",
163 )
164
165
166 # ===========================================================================
167 # Tier I — Unit tests
168 # ===========================================================================
169
170
171 class TestEngineStatus:
172 """I: EngineStatus constants are plain strings."""
173
174 def test_applied_is_string(self) -> None:
175 assert isinstance(EngineStatus.APPLIED, str)
176
177 def test_proposed_is_string(self) -> None:
178 assert isinstance(EngineStatus.PROPOSED, str)
179
180 def test_escalated_is_string(self) -> None:
181 assert isinstance(EngineStatus.ESCALATED, str)
182
183 def test_distinct_values(self) -> None:
184 statuses = {EngineStatus.APPLIED, EngineStatus.PROPOSED, EngineStatus.ESCALATED}
185 assert len(statuses) == 3
186
187
188 class TestEngineConfig:
189 """I: EngineConfig defaults and frozen nature."""
190
191 def test_default_auto_apply_threshold(self) -> None:
192 assert EngineConfig().auto_apply_threshold == pytest.approx(0.85)
193
194 def test_default_semantic_threshold(self) -> None:
195 assert EngineConfig().semantic_threshold == pytest.approx(0.70)
196
197 def test_default_max_proposals(self) -> None:
198 assert EngineConfig().max_proposals == 5
199
200 def test_custom_config(self) -> None:
201 cfg = EngineConfig(auto_apply_threshold=0.99, semantic_threshold=0.50, max_proposals=3)
202 assert cfg.auto_apply_threshold == pytest.approx(0.99)
203 assert cfg.semantic_threshold == pytest.approx(0.50)
204 assert cfg.max_proposals == 3
205
206 def test_frozen(self) -> None:
207 cfg = EngineConfig()
208 with pytest.raises(Exception): # FrozenInstanceError
209 cfg.auto_apply_threshold = 0.0 # type: ignore[misc]
210
211
212 class TestEngineResult:
213 """I: EngineResult fields, defaults, and immutability."""
214
215 def test_applied_result(self) -> None:
216 r = EngineResult(
217 status=EngineStatus.APPLIED,
218 pattern_id=fake_id("p"),
219 applied_resolution_id=fake_id("r"),
220 )
221 assert r.status == EngineStatus.APPLIED
222 assert r.proposal is None
223 assert r.escalation_reason is None
224
225 def test_proposed_result(self) -> None:
226 prop = ResolutionProposal(
227 pattern_id=fake_id("p"),
228 strategy=ResolutionStrategy.POLICY,
229 proposed_action=PolicyAction.PREFER_OURS,
230 confidence=0.9,
231 rationale="test",
232 )
233 r = EngineResult(
234 status=EngineStatus.PROPOSED,
235 pattern_id=fake_id("p"),
236 proposal=prop,
237 )
238 assert r.proposal is prop
239 assert r.applied_resolution_id is None
240
241 def test_escalated_result(self) -> None:
242 r = EngineResult(
243 status=EngineStatus.ESCALATED,
244 pattern_id=fake_id("p"),
245 escalation_reason="no match found",
246 )
247 assert r.escalation_reason == "no match found"
248
249 def test_frozen(self) -> None:
250 r = EngineResult(status=EngineStatus.ESCALATED, pattern_id=fake_id("p"))
251 with pytest.raises(Exception):
252 r.status = EngineStatus.APPLIED # type: ignore[misc]
253
254
255 class TestDefaultPlugin:
256 """I: DefaultPlugin — exact-match similarity, no semantic fingerprint."""
257
258 def test_identical_fps_return_1(self) -> None:
259 fp = fake_id("same")
260 assert DefaultPlugin().similarity(fp, fp) == pytest.approx(1.0)
261
262 def test_different_fps_return_0(self) -> None:
263 assert DefaultPlugin().similarity(fake_id("a"), fake_id("b")) == pytest.approx(0.0)
264
265 def test_similarity_commutative(self) -> None:
266 a, b = fake_id("x"), fake_id("y")
267 p = DefaultPlugin()
268 assert p.similarity(a, b) == p.similarity(b, a)
269
270 def test_similarity_range(self) -> None:
271 for seed1, seed2 in [("a", "a"), ("a", "b"), ("c", "d")]:
272 s = DefaultPlugin().similarity(fake_id(seed1), fake_id(seed2))
273 assert 0.0 <= s <= 1.0
274
275
276 class TestHarmonyPluginProtocol:
277 """I: HarmonyPlugin is a structural Protocol — custom plugins duck-type in."""
278
279 def test_custom_plugin_accepted(self) -> None:
280 class MyPlugin:
281 def similarity(self, fp_a: str, fp_b: str) -> float:
282 return 0.5
283
284 plugin = MyPlugin()
285 # Should not raise — duck-typing is sufficient
286 result = resolve(
287 pathlib.Path("/nonexistent"),
288 _make_pattern(),
289 plugin=plugin,
290 )
291 # Will escalate because repo doesn't exist, but plugin was accepted
292 assert result.status == EngineStatus.ESCALATED
293
294
295 class TestFindSimilar:
296 """I: find_similar returns correctly sorted proposals."""
297
298 def test_find_similar_empty_store(self, repo: pathlib.Path) -> None:
299 pattern = _make_pattern()
300 record_pattern(repo, pattern)
301 results = find_similar(repo, pattern)
302 assert results == []
303
304 def test_find_similar_skips_self(self, repo: pathlib.Path) -> None:
305 """Even if pattern has the same semantic_fp, it shouldn't match itself."""
306 fp = fake_id("shared-semantic")
307 pattern = _make_pattern(semantic_fp=fp)
308 record_pattern(repo, pattern)
309 res = _make_resolution(pattern)
310 save_resolution(repo, res)
311 results = find_similar(repo, pattern)
312 assert all(p.similar_pattern_id != pattern.pattern_id for p in results)
313
314 def test_find_similar_returns_semantically_matching_pattern(
315 self, repo: pathlib.Path
316 ) -> None:
317 """Two patterns with same semantic_fp but different paths should match."""
318 shared_fp = fake_id("shared-semantic")
319
320 source = _make_pattern(path="source.mid", ours="so", theirs="st", semantic_fp=shared_fp)
321 target = _make_pattern(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
322 record_pattern(repo, source)
323 record_pattern(repo, target)
324
325 # Give source a resolution — target has none
326 res = _make_resolution(source, confidence=0.88)
327 save_resolution(repo, res)
328
329 proposals = find_similar(repo, target)
330 assert len(proposals) >= 1
331 assert proposals[0].similar_pattern_id == source.pattern_id
332 assert proposals[0].similarity == pytest.approx(1.0)
333
334 def test_find_similar_sorted_by_confidence_desc(self, repo: pathlib.Path) -> None:
335 shared_fp = fake_id("shared-fp")
336 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
337 record_pattern(repo, target)
338
339 # Two sources with different confidence
340 for i, conf in enumerate([0.5, 0.9]):
341 src = _make_pattern(
342 path=f"src{i}.mid",
343 ours=f"o{i}",
344 theirs=f"t{i}",
345 semantic_fp=shared_fp,
346 )
347 record_pattern(repo, src)
348 r = _make_resolution(src, confidence=conf, outcome_seed=f"out{i}")
349 save_resolution(repo, r)
350
351 proposals = find_similar(repo, target)
352 assert proposals[0].confidence >= proposals[-1].confidence
353
354 def test_find_similar_respects_max_proposals(self, repo: pathlib.Path) -> None:
355 shared_fp = fake_id("common")
356 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
357 record_pattern(repo, target)
358
359 for i in range(10):
360 src = _make_pattern(
361 path=f"s{i}.mid", ours=f"o{i}", theirs=f"t{i}", semantic_fp=shared_fp
362 )
363 record_pattern(repo, src)
364 r = _make_resolution(src, confidence=0.7 + i * 0.02, outcome_seed=f"o{i}")
365 save_resolution(repo, r)
366
367 cfg = EngineConfig(max_proposals=3)
368 proposals = find_similar(repo, target, config=cfg)
369 assert len(proposals) <= 3
370
371 def test_find_similar_below_threshold_excluded(self, repo: pathlib.Path) -> None:
372 """DefaultPlugin returns 0.0 for different fingerprints → excluded."""
373 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
374 source = _make_pattern(path="s.mid", ours="so", theirs="st") # different semantic_fp
375 record_pattern(repo, target)
376 record_pattern(repo, source)
377 r = _make_resolution(source)
378 save_resolution(repo, r)
379
380 # With DefaultPlugin, similarity = 0 for different fps → below any threshold
381 proposals = find_similar(repo, target)
382 assert proposals == []
383
384
385 # ===========================================================================
386 # Tier II — Integration: four resolution tiers
387 # ===========================================================================
388
389
390 class TestTierPolicy:
391 """II: Tier 1 — Policy match."""
392
393 def test_policy_prefer_ours_above_threshold_returns_applied(
394 self, repo: pathlib.Path
395 ) -> None:
396 policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS)
397 save_policy(repo, policy)
398
399 pattern = _make_pattern()
400 record_pattern(repo, pattern)
401
402 result = resolve(repo, pattern)
403 assert result.status == EngineStatus.APPLIED
404 assert result.proposal is not None
405 assert result.proposal.strategy == ResolutionStrategy.POLICY
406 assert result.proposal.proposed_action == PolicyAction.PREFER_OURS
407 assert result.proposal.policy_id == policy.policy_id
408
409 def test_policy_prefer_theirs_above_threshold_returns_applied(
410 self, repo: pathlib.Path
411 ) -> None:
412 policy = _make_policy(confidence=0.90, action=PolicyAction.PREFER_THEIRS)
413 save_policy(repo, policy)
414 pattern = _make_pattern()
415 record_pattern(repo, pattern)
416
417 result = resolve(repo, pattern)
418 assert result.status == EngineStatus.APPLIED
419 assert result.proposal.proposed_action == PolicyAction.PREFER_THEIRS
420
421 def test_policy_below_threshold_returns_proposed(self, repo: pathlib.Path) -> None:
422 policy = _make_policy(confidence=0.60, action=PolicyAction.PREFER_OURS)
423 save_policy(repo, policy)
424 pattern = _make_pattern()
425 record_pattern(repo, pattern)
426
427 cfg = EngineConfig(auto_apply_threshold=0.85)
428 result = resolve(repo, pattern, config=cfg)
429 assert result.status == EngineStatus.PROPOSED
430 assert result.proposal.requires_confirmation is True
431
432 def test_policy_escalate_action_returns_escalated(self, repo: pathlib.Path) -> None:
433 policy = _make_policy(
434 confidence=1.0,
435 action=PolicyAction.ESCALATE,
436 escalate_to="human",
437 )
438 save_policy(repo, policy)
439 pattern = _make_pattern()
440 record_pattern(repo, pattern)
441
442 result = resolve(repo, pattern)
443 assert result.status == EngineStatus.ESCALATED
444 assert result.escalation_reason is not None
445 assert "human" in result.escalation_reason.lower() or "escalat" in result.escalation_reason.lower()
446
447 def test_policy_require_human_returns_escalated(self, repo: pathlib.Path) -> None:
448 policy = _make_policy(confidence=1.0, action=PolicyAction.REQUIRE_HUMAN)
449 save_policy(repo, policy)
450 pattern = _make_pattern()
451 record_pattern(repo, pattern)
452
453 result = resolve(repo, pattern)
454 assert result.status == EngineStatus.ESCALATED
455
456 def test_policy_delegate_returns_escalated(self, repo: pathlib.Path) -> None:
457 policy = _make_policy(
458 confidence=1.0,
459 action=PolicyAction.DELEGATE,
460 delegate_to="harmony-specialist",
461 )
462 save_policy(repo, policy)
463 pattern = _make_pattern()
464 record_pattern(repo, pattern)
465
466 result = resolve(repo, pattern)
467 assert result.status == EngineStatus.ESCALATED
468 assert "harmony-specialist" in (result.escalation_reason or "")
469
470 def test_policy_domain_filter_does_not_fire_for_wrong_domain(
471 self, repo: pathlib.Path
472 ) -> None:
473 policy = _make_policy(domain="code", action=PolicyAction.PREFER_OURS, confidence=1.0)
474 save_policy(repo, policy)
475 pattern = _make_pattern(domain="midi")
476 record_pattern(repo, pattern)
477
478 # Should fall through to escalate (no resolution)
479 result = resolve(repo, pattern)
480 assert result.status == EngineStatus.ESCALATED
481
482 def test_workspace_policy_fires_before_repo_policy(self, repo: pathlib.Path) -> None:
483 workspace_p = _make_policy(
484 "workspace-p",
485 scope=PolicyScope.WORKSPACE,
486 action=PolicyAction.PREFER_OURS,
487 confidence=0.95,
488 )
489 repo_p = _make_policy(
490 "repo-p",
491 scope=PolicyScope.REPO,
492 action=PolicyAction.PREFER_THEIRS,
493 confidence=0.95,
494 )
495 save_policy(repo, workspace_p)
496 save_policy(repo, repo_p)
497 pattern = _make_pattern()
498 record_pattern(repo, pattern)
499
500 result = resolve(repo, pattern)
501 assert result.status == EngineStatus.APPLIED
502 assert result.proposal.proposed_action == PolicyAction.PREFER_OURS # workspace wins
503
504
505 class TestTierExactReplay:
506 """II: Tier 2 — Exact replay (no policy match)."""
507
508 def test_high_confidence_resolution_auto_applied(self, repo: pathlib.Path) -> None:
509 pattern = _make_pattern()
510 record_pattern(repo, pattern)
511 res = _make_resolution(pattern, confidence=0.90)
512 save_resolution(repo, res)
513
514 result = resolve(repo, pattern)
515 assert result.status == EngineStatus.APPLIED
516 assert result.applied_resolution_id == res.resolution_id
517
518 def test_human_verified_always_auto_applied(self, repo: pathlib.Path) -> None:
519 pattern = _make_pattern()
520 record_pattern(repo, pattern)
521 res = _make_resolution(pattern, confidence=0.50, human_verified=True)
522 save_resolution(repo, res)
523
524 cfg = EngineConfig(auto_apply_threshold=0.85)
525 result = resolve(repo, pattern, config=cfg)
526 assert result.status == EngineStatus.APPLIED
527
528 def test_low_confidence_resolution_proposed(self, repo: pathlib.Path) -> None:
529 pattern = _make_pattern()
530 record_pattern(repo, pattern)
531 res = _make_resolution(pattern, confidence=0.60)
532 save_resolution(repo, res)
533
534 cfg = EngineConfig(auto_apply_threshold=0.85)
535 result = resolve(repo, pattern, config=cfg)
536 assert result.status == EngineStatus.PROPOSED
537 assert result.proposal.strategy == ResolutionStrategy.EXACT_REPLAY
538 assert result.proposal.requires_confirmation is True
539
540 def test_exact_replay_increments_applied_count(self, repo: pathlib.Path) -> None:
541 pattern = _make_pattern()
542 record_pattern(repo, pattern)
543 res = _make_resolution(pattern, confidence=0.90)
544 save_resolution(repo, res)
545
546 resolve(repo, pattern)
547
548 from muse.core.harmony import load_resolution
549 loaded = load_resolution(repo, pattern.pattern_id, res.resolution_id)
550 assert loaded is not None
551 assert loaded.applied_count == 1
552
553 def test_exact_replay_prefers_highest_quality_resolution(
554 self, repo: pathlib.Path
555 ) -> None:
556 pattern = _make_pattern()
557 record_pattern(repo, pattern)
558
559 low = _make_resolution(pattern, confidence=0.70, outcome_seed="low")
560 high = _make_resolution(pattern, confidence=0.95, outcome_seed="high")
561 save_resolution(repo, low)
562 save_resolution(repo, high)
563
564 result = resolve(repo, pattern)
565 assert result.applied_resolution_id == high.resolution_id
566
567
568 class TestTierSemantic:
569 """II: Tier 3 — Semantic match (no policy, no exact replay)."""
570
571 def test_semantic_match_returns_proposed(self, repo: pathlib.Path) -> None:
572 shared_fp = fake_id("shared-semantic")
573
574 source = _make_pattern(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp)
575 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
576 record_pattern(repo, source)
577 record_pattern(repo, target)
578
579 res = _make_resolution(source, confidence=0.85)
580 save_resolution(repo, res)
581
582 result = resolve(repo, target)
583 assert result.status == EngineStatus.PROPOSED
584 assert result.proposal.strategy == ResolutionStrategy.SEMANTIC_PROPOSAL
585 assert result.proposal.similar_pattern_id == source.pattern_id
586 assert result.proposal.similarity == pytest.approx(1.0)
587 assert result.proposal.requires_confirmation is True
588
589 def test_semantic_match_below_threshold_escalates(self, repo: pathlib.Path) -> None:
590 """DefaultPlugin returns 0.0 for non-matching fps → no semantic match."""
591 source = _make_pattern(path="s.mid", ours="so", theirs="st")
592 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
593 record_pattern(repo, source)
594 record_pattern(repo, target)
595 res = _make_resolution(source)
596 save_resolution(repo, res)
597
598 result = resolve(repo, target)
599 assert result.status == EngineStatus.ESCALATED
600
601 def test_custom_plugin_similarity_drives_semantic_match(
602 self, repo: pathlib.Path
603 ) -> None:
604 """A custom plugin returning 0.8 similarity enables semantic matching."""
605
606 class AlwaysMatchPlugin:
607 def similarity(self, fp_a: str, fp_b: str) -> float:
608 return 0.8
609
610 source = _make_pattern(path="s.mid", ours="so", theirs="st")
611 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
612 record_pattern(repo, source)
613 record_pattern(repo, target)
614 res = _make_resolution(source, confidence=0.85)
615 save_resolution(repo, res)
616
617 cfg = EngineConfig(semantic_threshold=0.70)
618 result = resolve(repo, target, config=cfg, plugin=AlwaysMatchPlugin())
619 assert result.status == EngineStatus.PROPOSED
620 assert result.proposal.strategy == ResolutionStrategy.SEMANTIC_PROPOSAL
621
622
623 class TestTierEscalate:
624 """II: Tier 4 — Escalation when no policy/replay/semantic matches."""
625
626 def test_no_policy_no_resolution_escalates(self, repo: pathlib.Path) -> None:
627 pattern = _make_pattern()
628 record_pattern(repo, pattern)
629
630 result = resolve(repo, pattern)
631 assert result.status == EngineStatus.ESCALATED
632 assert result.escalation_reason is not None
633 assert len(result.escalation_reason) > 0
634
635 def test_escalation_has_no_applied_resolution(self, repo: pathlib.Path) -> None:
636 pattern = _make_pattern()
637 record_pattern(repo, pattern)
638
639 result = resolve(repo, pattern)
640 assert result.applied_resolution_id is None
641
642 def test_escalation_has_no_proposal(self, repo: pathlib.Path) -> None:
643 pattern = _make_pattern()
644 record_pattern(repo, pattern)
645
646 result = resolve(repo, pattern)
647 assert result.proposal is None
648
649 def test_unrecorded_pattern_escalates(self, repo: pathlib.Path) -> None:
650 """Pattern not in store → escalate (engine is safe with unknown patterns)."""
651 pattern = _make_pattern()
652 # Not recorded
653 result = resolve(repo, pattern)
654 assert result.status == EngineStatus.ESCALATED
655
656
657 # ===========================================================================
658 # Tier III — End-to-end
659 # ===========================================================================
660
661
662 class TestEndToEnd:
663 """III: Full resolution pipeline with audit trail."""
664
665 def test_policy_applied_writes_audit(self, repo: pathlib.Path) -> None:
666 policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS)
667 save_policy(repo, policy)
668 pattern = _make_pattern()
669 record_pattern(repo, pattern)
670
671 actor = AgentProvenance.agent("claude-code", "claude-sonnet-4-6")
672 result = resolve(repo, pattern, actor=actor)
673 assert result.status == EngineStatus.APPLIED
674
675 entries = list_audit(repo)
676 event_types = [e["event_type"] for e in entries]
677 assert AuditEventType.RESOLUTION_APPLIED in event_types
678
679 def test_exact_replay_applied_writes_audit(self, repo: pathlib.Path) -> None:
680 pattern = _make_pattern()
681 record_pattern(repo, pattern)
682 res = _make_resolution(pattern, confidence=0.90)
683 save_resolution(repo, res)
684
685 actor = AgentProvenance.human()
686 result = resolve(repo, pattern, actor=actor)
687 assert result.status == EngineStatus.APPLIED
688
689 entries = list_audit(repo)
690 event_types = [e["event_type"] for e in entries]
691 assert AuditEventType.RESOLUTION_APPLIED in event_types
692
693 def test_escalation_writes_audit(self, repo: pathlib.Path) -> None:
694 pattern = _make_pattern()
695 record_pattern(repo, pattern)
696
697 actor = AgentProvenance.human()
698 result = resolve(repo, pattern, actor=actor)
699 assert result.status == EngineStatus.ESCALATED
700
701 entries = list_audit(repo)
702 event_types = [e["event_type"] for e in entries]
703 assert AuditEventType.ESCALATION_RECORDED in event_types
704
705 def test_full_lifecycle_policy_to_replay(self, repo: pathlib.Path) -> None:
706 """First resolution via policy; subsequent calls use exact replay."""
707 policy = _make_policy(confidence=0.95, action=PolicyAction.PREFER_OURS)
708 save_policy(repo, policy)
709
710 pattern = _make_pattern()
711 record_pattern(repo, pattern)
712
713 # First call → policy fires
714 r1 = resolve(repo, pattern)
715 assert r1.status == EngineStatus.APPLIED
716 assert r1.proposal.strategy == ResolutionStrategy.POLICY
717
718 # Save the policy resolution so it can be replayed
719 from muse.core.harmony import compute_resolution_id, save_resolution
720 import datetime
721 actor = AgentProvenance.agent("claude-code")
722 resolved_at = datetime.datetime.now(datetime.timezone.utc)
723 rid = compute_resolution_id(
724 pattern.pattern_id, fake_id("policy-outcome"),
725 ResolutionStrategy.POLICY, actor, resolved_at,
726 )
727 res = Resolution(
728 resolution_id=rid,
729 pattern_id=pattern.pattern_id,
730 strategy=ResolutionStrategy.POLICY,
731 policy_id=policy.policy_id,
732 outcome_blob=fake_id("policy-outcome"),
733 resolved_by=actor,
734 human_verified=False,
735 confidence=0.95,
736 rationale="Policy applied",
737 resolved_at=resolved_at,
738 )
739 save_resolution(repo, res)
740
741 # Remove policy — next call should use exact replay
742 from muse.core.harmony import remove_policy
743 remove_policy(repo, policy.policy_id)
744
745 r2 = resolve(repo, pattern)
746 assert r2.status == EngineStatus.APPLIED
747 assert r2.proposal.strategy == ResolutionStrategy.EXACT_REPLAY
748
749 def test_resolve_with_explicit_config(self, repo: pathlib.Path) -> None:
750 pattern = _make_pattern()
751 record_pattern(repo, pattern)
752 res = _make_resolution(pattern, confidence=0.80)
753 save_resolution(repo, res)
754
755 # Low threshold → auto-apply
756 cfg_low = EngineConfig(auto_apply_threshold=0.75)
757 r = resolve(repo, pattern, config=cfg_low)
758 assert r.status == EngineStatus.APPLIED
759
760 # High threshold → propose
761 cfg_high = EngineConfig(auto_apply_threshold=0.95)
762 r2 = resolve(repo, pattern, config=cfg_high)
763 assert r2.status == EngineStatus.PROPOSED
764
765
766 # ===========================================================================
767 # Tier IV — Stress
768 # ===========================================================================
769
770
771 class TestStress:
772 """IV: Engine performance under load."""
773
774 def test_engine_with_100_patterns_completes(self, repo: pathlib.Path) -> None:
775 """Engine must handle 100 patterns in the store without crashing."""
776 shared_fp = fake_id("stress-shared")
777 target = _make_pattern(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
778 record_pattern(repo, target)
779
780 for i in range(100):
781 p = _make_pattern(
782 path=f"stress{i}.mid",
783 ours=f"so{i}",
784 theirs=f"st{i}",
785 semantic_fp=shared_fp,
786 )
787 record_pattern(repo, p)
788 r = _make_resolution(p, confidence=0.7, outcome_seed=f"o{i}")
789 save_resolution(repo, r)
790
791 result = resolve(repo, target)
792 # Should get a semantic match from one of the 100
793 assert result.status in (EngineStatus.PROPOSED, EngineStatus.ESCALATED)
794
795 def test_find_similar_100_patterns(self, repo: pathlib.Path) -> None:
796 """find_similar with 100 patterns returns correct results."""
797 shared_fp = fake_id("bulk-shared")
798 target = _make_pattern(path="bulk-target.mid", ours="bto", theirs="btt", semantic_fp=shared_fp)
799 record_pattern(repo, target)
800
801 for i in range(100):
802 p = _make_pattern(
803 path=f"bulk{i}.mid",
804 ours=f"bso{i}",
805 theirs=f"bst{i}",
806 semantic_fp=shared_fp,
807 )
808 record_pattern(repo, p)
809 r = _make_resolution(p, confidence=0.7 + (i % 10) * 0.02, outcome_seed=f"bo{i}")
810 save_resolution(repo, r)
811
812 cfg = EngineConfig(max_proposals=5)
813 proposals = find_similar(repo, target, config=cfg)
814 assert len(proposals) <= 5
815 # All returned proposals should have similarity ≥ threshold
816 for prop in proposals:
817 assert prop.similarity is not None and prop.similarity >= cfg.semantic_threshold
818
819 def test_many_policy_first_match_wins(self, repo: pathlib.Path) -> None:
820 """With 50 policies, the first matching one wins."""
821 for i in range(50):
822 p = _make_policy(
823 f"policy-{i:02d}",
824 scope=PolicyScope.REPO,
825 action=PolicyAction.PREFER_OURS if i == 0 else PolicyAction.PREFER_THEIRS,
826 confidence=0.95,
827 )
828 save_policy(repo, p)
829
830 pattern = _make_pattern()
831 record_pattern(repo, pattern)
832 result = resolve(repo, pattern)
833 # First policy (alphabetically / insertion order) should win
834 assert result.status == EngineStatus.APPLIED
835
836
837 # ===========================================================================
838 # Tier V — Data integrity
839 # ===========================================================================
840
841
842 class TestDataIntegrity:
843 """V: EngineResult fields always populated; JSON-safe."""
844
845 def test_escalated_result_fields_complete(self, repo: pathlib.Path) -> None:
846 pattern = _make_pattern()
847 record_pattern(repo, pattern)
848 result = resolve(repo, pattern)
849 assert result.status is not None
850 assert result.pattern_id is not None
851 # Optional fields default to None (not absent)
852 assert hasattr(result, "proposal")
853 assert hasattr(result, "applied_resolution_id")
854 assert hasattr(result, "escalation_reason")
855
856 def test_applied_result_has_no_escalation_reason(self, repo: pathlib.Path) -> None:
857 policy = _make_policy(confidence=0.95)
858 save_policy(repo, policy)
859 pattern = _make_pattern()
860 record_pattern(repo, pattern)
861 result = resolve(repo, pattern)
862 assert result.escalation_reason is None
863
864 def test_proposal_confidence_in_range(self, repo: pathlib.Path) -> None:
865 policy = _make_policy(confidence=0.95)
866 save_policy(repo, policy)
867 pattern = _make_pattern()
868 record_pattern(repo, pattern)
869 result = resolve(repo, pattern)
870 assert result.proposal is not None
871 assert 0.0 <= result.proposal.confidence <= 1.0
872
873 def test_engine_result_pattern_id_matches_input(self, repo: pathlib.Path) -> None:
874 pattern = _make_pattern()
875 record_pattern(repo, pattern)
876 result = resolve(repo, pattern)
877 assert result.pattern_id == pattern.pattern_id
878
879 def test_proposal_is_json_serialisable(self, repo: pathlib.Path) -> None:
880 import json
881 policy = _make_policy(confidence=0.95)
882 save_policy(repo, policy)
883 pattern = _make_pattern()
884 record_pattern(repo, pattern)
885 result = resolve(repo, pattern)
886 if result.proposal is not None:
887 d = dataclasses.asdict(result.proposal)
888 json.dumps(d) # must not raise
889
890 def test_engine_result_is_json_serialisable(self, repo: pathlib.Path) -> None:
891 import json
892 pattern = _make_pattern()
893 record_pattern(repo, pattern)
894 result = resolve(repo, pattern)
895 d = dataclasses.asdict(result)
896 json.dumps(d) # must not raise
897
898
899 # ===========================================================================
900 # Tier VI — Security
901 # ===========================================================================
902
903
904 class TestSecurity:
905 """VI: Engine handles adversarial inputs safely."""
906
907 def test_plugin_exception_caught(self, repo: pathlib.Path) -> None:
908 """A crashing plugin must not propagate — engine falls back to escalate."""
909
910 class CrashingPlugin:
911 def similarity(self, fp_a: str, fp_b: str) -> float:
912 raise RuntimeError("Plugin exploded")
913
914 source = _make_pattern(path="s.mid", ours="so", theirs="st")
915 target = _make_pattern(path="t.mid", ours="to", theirs="tt")
916 record_pattern(repo, source)
917 record_pattern(repo, target)
918 res = _make_resolution(source)
919 save_resolution(repo, res)
920
921 result = resolve(repo, target, plugin=CrashingPlugin())
922 # Must not raise; engine falls back
923 assert result.status in (EngineStatus.ESCALATED, EngineStatus.PROPOSED)
924
925 def test_plugin_returning_out_of_range_similarity_clamped(
926 self, repo: pathlib.Path
927 ) -> None:
928 """Similarity > 1.0 or < 0.0 from a buggy plugin must be clamped."""
929
930 class OverflowPlugin:
931 def similarity(self, fp_a: str, fp_b: str) -> float:
932 return 999.0
933
934 shared_fp = fake_id("shared")
935 source = _make_pattern(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp)
936 target = _make_pattern(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
937 record_pattern(repo, source)
938 record_pattern(repo, target)
939 res = _make_resolution(source, confidence=0.85)
940 save_resolution(repo, res)
941
942 result = resolve(repo, target, plugin=OverflowPlugin())
943 if result.proposal is not None and result.proposal.similarity is not None:
944 assert 0.0 <= result.proposal.similarity <= 1.0
945
946 def test_resolve_safe_with_empty_repo(self, repo: pathlib.Path) -> None:
947 """Engine against empty store must not crash."""
948 pattern = _make_pattern()
949 result = resolve(repo, pattern)
950 assert result.status == EngineStatus.ESCALATED
951
952 def test_resolve_safe_with_nonexistent_root(self) -> None:
953 """Engine with nonexistent root must escalate, not crash."""
954 pattern = _make_pattern()
955 result = resolve(pathlib.Path("/nonexistent/repo"), pattern)
956 assert result.status == EngineStatus.ESCALATED
957
958
959 # ===========================================================================
960 # Tier VII — Performance
961 # ===========================================================================
962
963
964 class TestPerformance:
965 """VII: Engine timing assertions."""
966
967 def test_resolve_no_match_under_50ms(self, repo: pathlib.Path) -> None:
968 pattern = _make_pattern()
969 record_pattern(repo, pattern)
970 start = time.monotonic()
971 resolve(repo, pattern)
972 elapsed = (time.monotonic() - start) * 1000
973 assert elapsed < 50, f"resolve (escalate) took {elapsed:.1f}ms"
974
975 def test_resolve_policy_under_50ms(self, repo: pathlib.Path) -> None:
976 policy = _make_policy(confidence=0.95)
977 save_policy(repo, policy)
978 pattern = _make_pattern()
979 record_pattern(repo, pattern)
980 start = time.monotonic()
981 resolve(repo, pattern)
982 elapsed = (time.monotonic() - start) * 1000
983 assert elapsed < 50, f"resolve (policy) took {elapsed:.1f}ms"
984
985 def test_resolve_exact_replay_under_50ms(self, repo: pathlib.Path) -> None:
986 pattern = _make_pattern()
987 record_pattern(repo, pattern)
988 res = _make_resolution(pattern, confidence=0.90)
989 save_resolution(repo, res)
990 start = time.monotonic()
991 resolve(repo, pattern)
992 elapsed = (time.monotonic() - start) * 1000
993 assert elapsed < 50, f"resolve (exact replay) took {elapsed:.1f}ms"
994
995 def test_find_similar_100_patterns_under_500ms(self, repo: pathlib.Path) -> None:
996 shared_fp = fake_id("perf-shared")
997 target = _make_pattern(path="perf-target.mid", ours="pto", theirs="ptt", semantic_fp=shared_fp)
998 record_pattern(repo, target)
999 for i in range(100):
1000 p = _make_pattern(path=f"p{i}.mid", ours=f"po{i}", theirs=f"pt{i}", semantic_fp=shared_fp)
1001 record_pattern(repo, p)
1002 r = _make_resolution(p, confidence=0.7, outcome_seed=f"perf{i}")
1003 save_resolution(repo, r)
1004 start = time.monotonic()
1005 find_similar(repo, target)
1006 elapsed = (time.monotonic() - start) * 1000
1007 assert elapsed < 500, f"find_similar(100) took {elapsed:.1f}ms"
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago