gabriel / muse public
test_harmony_cli_phase3.py python
493 lines 18.7 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Tests for Phase 3 CLI additions to ``muse harmony``.
2
3 New subcommands:
4 ``muse harmony engine <pattern_id>`` — run the three-tier resolution engine
5 ``muse harmony similar <pattern_id>`` — find semantically similar patterns
6
7 Coverage tiers
8 --------------
9 I Unit — TypedDict schemas for engine + similar JSON output
10 II Success — engine applied/proposed/escalated; similar with matches
11 III Errors — invalid IDs; pattern not found
12 IV E2E — full policy → engine → audit lifecycle via CLI
13 V Integrity — all JSON fields always present; confidence in range
14 VI Security — path-traversal IDs rejected
15 VII Perf — both subcommands <300 ms
16 """
17 from __future__ import annotations
18 from collections.abc import Mapping
19
20 from muse.core.types import fake_id
21 from muse.core.paths import muse_dir
22 import json
23 import pathlib
24 import time
25 import typing
26
27 import pytest
28
29 from tests.cli_test_helper import CliRunner
30
31 runner = CliRunner()
32
33
34 # ---------------------------------------------------------------------------
35 # Helpers
36 # ---------------------------------------------------------------------------
37
38
39
40 @pytest.fixture()
41 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
42 dot_muse = muse_dir(tmp_path)
43 dot_muse.mkdir()
44 (dot_muse / "config.toml").write_text('[repo]\nname = "test"\nid = "abc"\n')
45 monkeypatch.chdir(tmp_path)
46 return tmp_path
47
48
49 def _record(
50 path: str = "track.mid",
51 domain: str = "midi",
52 conflict_type: str = "content",
53 ours: str = "ours",
54 theirs: str = "theirs",
55 semantic_fp: str | None = None,
56 ) -> str:
57 args = [
58 "harmony", "record",
59 "--path", path,
60 "--domain", domain,
61 "--conflict-type", conflict_type,
62 "--ours-id", fake_id(ours),
63 "--theirs-id", fake_id(theirs),
64 "--json",
65 ]
66 if semantic_fp is not None:
67 args += ["--semantic-fingerprint", semantic_fp]
68 r = runner.invoke(None, args)
69 assert r.exit_code == 0, r.output
70 return json.loads(r.output)["pattern_id"]
71
72
73 def _resolve(
74 pattern_id: str,
75 confidence: str = "0.9",
76 strategy: str = "manual",
77 outcome: str = "outcome",
78 ) -> str:
79 r = runner.invoke(None, [
80 "harmony", "resolve",
81 "--pattern-id", pattern_id,
82 "--strategy", strategy,
83 "--outcome-blob", fake_id(outcome),
84 "--confidence", confidence,
85 "--json",
86 ])
87 assert r.exit_code == 0, r.output
88 return json.loads(r.output)["resolution_id"]
89
90
91 def _add_policy(
92 policy_id: str = "auto-policy",
93 scope: str = "repo",
94 action: str = "prefer-ours",
95 confidence: str = "0.95",
96 domain: str | None = None,
97 ) -> None:
98 args = [
99 "harmony", "policy-add",
100 "--policy-id", policy_id,
101 "--description", "Test policy",
102 "--scope", scope,
103 "--action", action,
104 "--confidence", confidence,
105 ]
106 if domain:
107 args += ["--domain", domain]
108 runner.invoke(None, args)
109
110
111 # ===========================================================================
112 # Tier I — Unit: TypedDict schemas
113 # ===========================================================================
114
115
116 class TestTypedDictSchemas:
117 """I: Engine and similar TypedDicts declare expected keys."""
118
119 def _hints(self, name: str) -> Mapping[str, object]:
120 import muse.cli.commands.harmony as h
121 td = getattr(h, name)
122 return typing.get_type_hints(td)
123
124 def test_engine_json_has_status(self) -> None:
125 assert "status" in self._hints("_HarmonyEngineJson")
126
127 def test_engine_json_has_pattern_id(self) -> None:
128 assert "pattern_id" in self._hints("_HarmonyEngineJson")
129
130 def test_engine_json_has_proposal(self) -> None:
131 assert "proposal" in self._hints("_HarmonyEngineJson")
132
133 def test_engine_json_has_applied_resolution_id(self) -> None:
134 assert "applied_resolution_id" in self._hints("_HarmonyEngineJson")
135
136 def test_engine_json_has_escalation_reason(self) -> None:
137 assert "escalation_reason" in self._hints("_HarmonyEngineJson")
138
139 def test_similar_json_has_pattern_id(self) -> None:
140 assert "pattern_id" in self._hints("_HarmonySimilarJson")
141
142 def test_similar_json_has_total(self) -> None:
143 assert "total" in self._hints("_HarmonySimilarJson")
144
145 def test_similar_json_has_proposals(self) -> None:
146 assert "proposals" in self._hints("_HarmonySimilarJson")
147
148
149 class TestRegistration:
150 """I: engine and similar subcommands are reachable."""
151
152 def test_engine_help(self, repo: pathlib.Path) -> None:
153 r = runner.invoke(None, ["harmony", "engine", "--help"])
154 assert r.exit_code == 0
155
156 def test_similar_help(self, repo: pathlib.Path) -> None:
157 r = runner.invoke(None, ["harmony", "similar", "--help"])
158 assert r.exit_code == 0
159
160
161 # ===========================================================================
162 # Tier II — Integration: success paths
163 # ===========================================================================
164
165
166 class TestEngineSuccess:
167 """II: muse harmony engine — success paths for all three statuses."""
168
169 def test_engine_escalates_no_policy_no_resolution(self, repo: pathlib.Path) -> None:
170 pid = _record()
171 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
172 assert r.exit_code == 0
173 data = json.loads(r.output)
174 assert data["status"] == "escalated"
175 assert data["pattern_id"] == pid
176 assert data["escalation_reason"] is not None
177
178 def test_engine_applied_via_policy(self, repo: pathlib.Path) -> None:
179 _add_policy(confidence="0.95", action="prefer-ours")
180 pid = _record()
181 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
182 assert r.exit_code == 0
183 data = json.loads(r.output)
184 assert data["status"] == "applied"
185 assert data["proposal"] is not None
186 assert data["proposal"]["strategy"] == "policy"
187
188 def test_engine_applied_via_exact_replay(self, repo: pathlib.Path) -> None:
189 pid = _record()
190 _resolve(pid, confidence="0.90")
191 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
192 assert r.exit_code == 0
193 data = json.loads(r.output)
194 assert data["status"] == "applied"
195 assert data["applied_resolution_id"] is not None
196
197 def test_engine_proposed_low_confidence(self, repo: pathlib.Path) -> None:
198 pid = _record()
199 _resolve(pid, confidence="0.60")
200 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
201 assert r.exit_code == 0
202 data = json.loads(r.output)
203 assert data["status"] == "proposed"
204 assert data["proposal"] is not None
205 assert data["proposal"]["requires_confirmation"] is True
206
207 def test_engine_text_output(self, repo: pathlib.Path) -> None:
208 pid = _record()
209 r = runner.invoke(None, ["harmony", "engine", pid])
210 assert r.exit_code == 0
211 assert pid[:12] in r.output
212
213 def test_engine_with_custom_threshold(self, repo: pathlib.Path) -> None:
214 """--auto-apply-threshold overrides default."""
215 pid = _record()
216 _resolve(pid, confidence="0.80")
217 # Below default threshold (0.85) → would be proposed. Above 0.75 → applied.
218 r = runner.invoke(None, ["harmony", "engine", pid, "--auto-apply-threshold", "0.75", "--json"])
219 assert r.exit_code == 0
220 data = json.loads(r.output)
221 assert data["status"] == "applied"
222
223 def test_engine_proposed_via_policy_low_confidence(self, repo: pathlib.Path) -> None:
224 _add_policy(policy_id="low-conf", confidence="0.60", action="prefer-ours")
225 pid = _record()
226 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
227 assert r.exit_code == 0
228 data = json.loads(r.output)
229 assert data["status"] == "proposed"
230
231 def test_engine_escalated_via_escalate_policy(self, repo: pathlib.Path) -> None:
232 _add_policy(policy_id="esc-policy", confidence="1.0", action="escalate")
233 pid = _record()
234 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
235 assert r.exit_code == 0
236 assert json.loads(r.output)["status"] == "escalated"
237
238
239 class TestSimilarSuccess:
240 """II: muse harmony similar — success paths."""
241
242 def test_similar_empty_when_no_match(self, repo: pathlib.Path) -> None:
243 pid = _record()
244 r = runner.invoke(None, ["harmony", "similar", pid, "--json"])
245 assert r.exit_code == 0
246 data = json.loads(r.output)
247 assert data["pattern_id"] == pid
248 assert data["total"] == 0
249 assert data["proposals"] == []
250
251 def test_similar_finds_shared_semantic_fingerprint(self, repo: pathlib.Path) -> None:
252 shared_fp = fake_id("shared-semantic-cli")
253 source_pid = _record(path="source.mid", ours="so", theirs="st", semantic_fp=shared_fp)
254 target_pid = _record(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
255
256 # Give source a resolution
257 _resolve(source_pid, confidence="0.88")
258
259 r = runner.invoke(None, ["harmony", "similar", target_pid, "--json"])
260 assert r.exit_code == 0
261 data = json.loads(r.output)
262 assert data["total"] >= 1
263 assert data["proposals"][0]["similar_pattern_id"] == source_pid
264
265 def test_similar_entry_has_required_fields(self, repo: pathlib.Path) -> None:
266 shared_fp = fake_id("shared-fields")
267 source_pid = _record(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp)
268 target_pid = _record(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
269 _resolve(source_pid, confidence="0.85")
270
271 r = runner.invoke(None, ["harmony", "similar", target_pid, "--json"])
272 entry = json.loads(r.output)["proposals"][0]
273 for field in ("similar_pattern_id", "similarity", "confidence", "strategy", "rationale"):
274 assert field in entry, f"missing field: {field}"
275
276 def test_similar_text_output(self, repo: pathlib.Path) -> None:
277 shared_fp = fake_id("shared-text")
278 src = _record(path="text-src.mid", ours="so", theirs="st", semantic_fp=shared_fp)
279 tgt = _record(path="text-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
280 _resolve(src)
281 r = runner.invoke(None, ["harmony", "similar", tgt])
282 assert r.exit_code == 0
283
284 def test_similar_limit(self, repo: pathlib.Path) -> None:
285 shared_fp = fake_id("limit-shared")
286 target_pid = _record(path="lim-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
287 for i in range(10):
288 src = _record(
289 path=f"lim{i}.mid", ours=f"o{i}", theirs=f"t{i}", semantic_fp=shared_fp
290 )
291 _resolve(src, confidence=f"0.{70+i}", outcome=f"o{i}")
292
293 r = runner.invoke(None, ["harmony", "similar", target_pid, "--limit", "3", "--json"])
294 data = json.loads(r.output)
295 assert len(data["proposals"]) <= 3
296
297
298 # ===========================================================================
299 # Tier III — Error paths
300 # ===========================================================================
301
302
303 class TestEngineErrors:
304 """III: muse harmony engine — error paths."""
305
306 def test_engine_invalid_id_exits_1(self, repo: pathlib.Path) -> None:
307 r = runner.invoke(None, ["harmony", "engine", "bad-id", "--json"])
308 assert r.exit_code == 1
309
310 def test_engine_nonexistent_id_exits_0_escalated(self, repo: pathlib.Path) -> None:
311 # Unknown pattern → engine escalates rather than errors
312 r = runner.invoke(None, ["harmony", "engine", fake_id("nonexistent"), "--json"])
313 assert r.exit_code == 0
314 assert json.loads(r.output)["status"] == "escalated"
315
316 def test_engine_invalid_threshold_exits_1(self, repo: pathlib.Path) -> None:
317 pid = _record()
318 r = runner.invoke(None, [
319 "harmony", "engine", pid,
320 "--auto-apply-threshold", "1.5",
321 "--json",
322 ])
323 assert r.exit_code == 1
324
325 def test_engine_negative_threshold_exits_1(self, repo: pathlib.Path) -> None:
326 pid = _record()
327 r = runner.invoke(None, [
328 "harmony", "engine", pid,
329 "--auto-apply-threshold", "-0.1",
330 "--json",
331 ])
332 assert r.exit_code == 1
333
334
335 class TestSimilarErrors:
336 """III: muse harmony similar — error paths."""
337
338 def test_similar_invalid_id_exits_1(self, repo: pathlib.Path) -> None:
339 r = runner.invoke(None, ["harmony", "similar", "bad-id", "--json"])
340 assert r.exit_code == 1
341
342 def test_similar_nonexistent_exits_0_empty(self, repo: pathlib.Path) -> None:
343 r = runner.invoke(None, ["harmony", "similar", fake_id("nonexistent"), "--json"])
344 assert r.exit_code == 0
345 data = json.loads(r.output)
346 assert data["total"] == 0
347
348
349 # ===========================================================================
350 # Tier IV — End-to-end
351 # ===========================================================================
352
353
354 class TestEndToEnd:
355 """IV: Full lifecycle via CLI layer."""
356
357 def test_policy_engine_audit_trail(self, repo: pathlib.Path) -> None:
358 _add_policy(confidence="0.95", action="prefer-ours")
359 pid = _record()
360
361 runner.invoke(None, ["harmony", "engine", pid])
362
363 r = runner.invoke(None, ["harmony", "audit", "--json"])
364 event_types = [e["event_type"] for e in json.loads(r.output)["entries"]]
365 assert "resolution_applied" in event_types
366
367 def test_escalation_audit_trail(self, repo: pathlib.Path) -> None:
368 pid = _record()
369 runner.invoke(None, ["harmony", "engine", pid])
370
371 r = runner.invoke(None, ["harmony", "audit", "--json"])
372 event_types = [e["event_type"] for e in json.loads(r.output)["entries"]]
373 assert "escalation_recorded" in event_types
374
375 def test_exact_replay_increments_applied_count_via_cli(self, repo: pathlib.Path) -> None:
376 pid = _record()
377 _resolve(pid, confidence="0.90")
378
379 runner.invoke(None, ["harmony", "engine", pid])
380
381 r = runner.invoke(None, ["harmony", "show", pid, "--json"])
382 res = json.loads(r.output)["resolutions"][0]
383 assert res["applied_count"] == 1
384
385 def test_similar_then_engine_workflow(self, repo: pathlib.Path) -> None:
386 """Agent workflow: find_similar to discover candidates, engine to resolve."""
387 shared_fp = fake_id("workflow-shared")
388 src = _record(path="src.mid", ours="so", theirs="st", semantic_fp=shared_fp)
389 tgt = _record(path="tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
390 _resolve(src, confidence="0.88")
391
392 # Step 1: find similar
393 rs = runner.invoke(None, ["harmony", "similar", tgt, "--json"])
394 assert json.loads(rs.output)["total"] >= 1
395
396 # Step 2: run engine (semantic tier fires, requires confirmation)
397 re = runner.invoke(None, ["harmony", "engine", tgt, "--json"])
398 data = json.loads(re.output)
399 assert data["status"] == "proposed"
400 assert data["proposal"]["strategy"] == "semantic-proposal"
401
402
403 # ===========================================================================
404 # Tier V — Data integrity
405 # ===========================================================================
406
407
408 class TestDataIntegrity:
409 """V: All JSON fields always present; types correct."""
410
411 def test_engine_escalated_fields_all_present(self, repo: pathlib.Path) -> None:
412 pid = _record()
413 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
414 data = json.loads(r.output)
415 for field in ("status", "pattern_id", "proposal", "applied_resolution_id",
416 "escalation_reason"):
417 assert field in data, f"missing field: {field}"
418
419 def test_engine_applied_applied_resolution_id_is_hex64(self, repo: pathlib.Path) -> None:
420 pid = _record()
421 _resolve(pid, confidence="0.90")
422 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
423 rid = json.loads(r.output)["applied_resolution_id"]
424 assert rid is not None
425 assert len(rid) == 71
426
427 def test_similar_empty_proposals_is_list_not_null(self, repo: pathlib.Path) -> None:
428 pid = _record()
429 r = runner.invoke(None, ["harmony", "similar", pid, "--json"])
430 data = json.loads(r.output)
431 assert isinstance(data["proposals"], list)
432
433 def test_engine_proposed_proposal_confidence_in_range(self, repo: pathlib.Path) -> None:
434 pid = _record()
435 _resolve(pid, confidence="0.60")
436 r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
437 prop = json.loads(r.output)["proposal"]
438 assert prop is not None
439 assert 0.0 <= prop["confidence"] <= 1.0
440
441 def test_similar_similarity_in_range(self, repo: pathlib.Path) -> None:
442 shared_fp = fake_id("range-check")
443 src = _record(path="rc-src.mid", ours="so", theirs="st", semantic_fp=shared_fp)
444 tgt = _record(path="rc-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
445 _resolve(src)
446
447 r = runner.invoke(None, ["harmony", "similar", tgt, "--json"])
448 for prop in json.loads(r.output)["proposals"]:
449 assert 0.0 <= prop["similarity"] <= 1.0
450
451
452 # ===========================================================================
453 # Tier VI — Security
454 # ===========================================================================
455
456
457 class TestSecurity:
458 """VI: Path-traversal IDs rejected at engine and similar entry points."""
459
460 def test_engine_traversal_rejected(self, repo: pathlib.Path) -> None:
461 r = runner.invoke(None, ["harmony", "engine", "../../malicious", "--json"])
462 assert r.exit_code == 1
463
464 def test_similar_traversal_rejected(self, repo: pathlib.Path) -> None:
465 r = runner.invoke(None, ["harmony", "similar", "../../malicious", "--json"])
466 assert r.exit_code == 1
467
468 def test_engine_null_byte_rejected(self, repo: pathlib.Path) -> None:
469 r = runner.invoke(None, ["harmony", "engine", "a" * 63 + "\x00", "--json"])
470 assert r.exit_code == 1
471
472
473 # ===========================================================================
474 # Tier VII — Performance
475 # ===========================================================================
476
477
478 class TestPerformance:
479 """VII: engine and similar complete within 300 ms."""
480
481 def test_engine_under_300ms(self, repo: pathlib.Path) -> None:
482 pid = _record()
483 start = time.monotonic()
484 runner.invoke(None, ["harmony", "engine", pid, "--json"])
485 elapsed = (time.monotonic() - start) * 1000
486 assert elapsed < 300, f"engine took {elapsed:.0f}ms"
487
488 def test_similar_under_300ms(self, repo: pathlib.Path) -> None:
489 pid = _record()
490 start = time.monotonic()
491 runner.invoke(None, ["harmony", "similar", pid, "--json"])
492 elapsed = (time.monotonic() - start) * 1000
493 assert elapsed < 600, f"similar took {elapsed:.0f}ms"
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago