tests/test_harmony_cli_phase3.py · gabriel/muse

test_harmony_cli_phase3.py python

493 lines 18.7 KB

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago

1	"""Tests for Phase 3 CLI additions to ``muse harmony``.
2
3	New subcommands:
4	``muse harmony engine <pattern_id>`` — run the three-tier resolution engine
5	``muse harmony similar <pattern_id>`` — find semantically similar patterns
6
7	Coverage tiers
8	--------------
9	I Unit — TypedDict schemas for engine + similar JSON output
10	II Success — engine applied/proposed/escalated; similar with matches
11	III Errors — invalid IDs; pattern not found
12	IV E2E — full policy → engine → audit lifecycle via CLI
13	V Integrity — all JSON fields always present; confidence in range
14	VI Security — path-traversal IDs rejected
15	VII Perf — both subcommands <300 ms
16	"""
17	from __future__ import annotations
18	from collections.abc import Mapping
19
20	from muse.core.types import fake_id
21	from muse.core.paths import muse_dir
22	import json
23	import pathlib
24	import time
25	import typing
26
27	import pytest
28
29	from tests.cli_test_helper import CliRunner
30
31	runner = CliRunner()
32
33
34	# ---------------------------------------------------------------------------
35	# Helpers
36	# ---------------------------------------------------------------------------
37
38
39
40	@pytest.fixture()
41	def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
42	dot_muse = muse_dir(tmp_path)
43	dot_muse.mkdir()
44	(dot_muse / "config.toml").write_text('[repo]\nname = "test"\nid = "abc"\n')
45	monkeypatch.chdir(tmp_path)
46	return tmp_path
47
48
49	def _record(
50	path: str = "track.mid",
51	domain: str = "midi",
52	conflict_type: str = "content",
53	ours: str = "ours",
54	theirs: str = "theirs",
55	semantic_fp: str \| None = None,
56	) -> str:
57	args = [
58	"harmony", "record",
59	"--path", path,
60	"--domain", domain,
61	"--conflict-type", conflict_type,
62	"--ours-id", fake_id(ours),
63	"--theirs-id", fake_id(theirs),
64	"--json",
65	]
66	if semantic_fp is not None:
67	args += ["--semantic-fingerprint", semantic_fp]
68	r = runner.invoke(None, args)
69	assert r.exit_code == 0, r.output
70	return json.loads(r.output)["pattern_id"]
71
72
73	def _resolve(
74	pattern_id: str,
75	confidence: str = "0.9",
76	strategy: str = "manual",
77	outcome: str = "outcome",
78	) -> str:
79	r = runner.invoke(None, [
80	"harmony", "resolve",
81	"--pattern-id", pattern_id,
82	"--strategy", strategy,
83	"--outcome-blob", fake_id(outcome),
84	"--confidence", confidence,
85	"--json",
86	])
87	assert r.exit_code == 0, r.output
88	return json.loads(r.output)["resolution_id"]
89
90
91	def _add_policy(
92	policy_id: str = "auto-policy",
93	scope: str = "repo",
94	action: str = "prefer-ours",
95	confidence: str = "0.95",
96	domain: str \| None = None,
97	) -> None:
98	args = [
99	"harmony", "policy-add",
100	"--policy-id", policy_id,
101	"--description", "Test policy",
102	"--scope", scope,
103	"--action", action,
104	"--confidence", confidence,
105	]
106	if domain:
107	args += ["--domain", domain]
108	runner.invoke(None, args)
109
110
111	# ===========================================================================
112	# Tier I — Unit: TypedDict schemas
113	# ===========================================================================
114
115
116	class TestTypedDictSchemas:
117	"""I: Engine and similar TypedDicts declare expected keys."""
118
119	def _hints(self, name: str) -> Mapping[str, object]:
120	import muse.cli.commands.harmony as h
121	td = getattr(h, name)
122	return typing.get_type_hints(td)
123
124	def test_engine_json_has_status(self) -> None:
125	assert "status" in self._hints("_HarmonyEngineJson")
126
127	def test_engine_json_has_pattern_id(self) -> None:
128	assert "pattern_id" in self._hints("_HarmonyEngineJson")
129
130	def test_engine_json_has_proposal(self) -> None:
131	assert "proposal" in self._hints("_HarmonyEngineJson")
132
133	def test_engine_json_has_applied_resolution_id(self) -> None:
134	assert "applied_resolution_id" in self._hints("_HarmonyEngineJson")
135
136	def test_engine_json_has_escalation_reason(self) -> None:
137	assert "escalation_reason" in self._hints("_HarmonyEngineJson")
138
139	def test_similar_json_has_pattern_id(self) -> None:
140	assert "pattern_id" in self._hints("_HarmonySimilarJson")
141
142	def test_similar_json_has_total(self) -> None:
143	assert "total" in self._hints("_HarmonySimilarJson")
144
145	def test_similar_json_has_proposals(self) -> None:
146	assert "proposals" in self._hints("_HarmonySimilarJson")
147
148
149	class TestRegistration:
150	"""I: engine and similar subcommands are reachable."""
151
152	def test_engine_help(self, repo: pathlib.Path) -> None:
153	r = runner.invoke(None, ["harmony", "engine", "--help"])
154	assert r.exit_code == 0
155
156	def test_similar_help(self, repo: pathlib.Path) -> None:
157	r = runner.invoke(None, ["harmony", "similar", "--help"])
158	assert r.exit_code == 0
159
160
161	# ===========================================================================
162	# Tier II — Integration: success paths
163	# ===========================================================================
164
165
166	class TestEngineSuccess:
167	"""II: muse harmony engine — success paths for all three statuses."""
168
169	def test_engine_escalates_no_policy_no_resolution(self, repo: pathlib.Path) -> None:
170	pid = _record()
171	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
172	assert r.exit_code == 0
173	data = json.loads(r.output)
174	assert data["status"] == "escalated"
175	assert data["pattern_id"] == pid
176	assert data["escalation_reason"] is not None
177
178	def test_engine_applied_via_policy(self, repo: pathlib.Path) -> None:
179	_add_policy(confidence="0.95", action="prefer-ours")
180	pid = _record()
181	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
182	assert r.exit_code == 0
183	data = json.loads(r.output)
184	assert data["status"] == "applied"
185	assert data["proposal"] is not None
186	assert data["proposal"]["strategy"] == "policy"
187
188	def test_engine_applied_via_exact_replay(self, repo: pathlib.Path) -> None:
189	pid = _record()
190	_resolve(pid, confidence="0.90")
191	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
192	assert r.exit_code == 0
193	data = json.loads(r.output)
194	assert data["status"] == "applied"
195	assert data["applied_resolution_id"] is not None
196
197	def test_engine_proposed_low_confidence(self, repo: pathlib.Path) -> None:
198	pid = _record()
199	_resolve(pid, confidence="0.60")
200	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
201	assert r.exit_code == 0
202	data = json.loads(r.output)
203	assert data["status"] == "proposed"
204	assert data["proposal"] is not None
205	assert data["proposal"]["requires_confirmation"] is True
206
207	def test_engine_text_output(self, repo: pathlib.Path) -> None:
208	pid = _record()
209	r = runner.invoke(None, ["harmony", "engine", pid])
210	assert r.exit_code == 0
211	assert pid[:12] in r.output
212
213	def test_engine_with_custom_threshold(self, repo: pathlib.Path) -> None:
214	"""--auto-apply-threshold overrides default."""
215	pid = _record()
216	_resolve(pid, confidence="0.80")
217	# Below default threshold (0.85) → would be proposed. Above 0.75 → applied.
218	r = runner.invoke(None, ["harmony", "engine", pid, "--auto-apply-threshold", "0.75", "--json"])
219	assert r.exit_code == 0
220	data = json.loads(r.output)
221	assert data["status"] == "applied"
222
223	def test_engine_proposed_via_policy_low_confidence(self, repo: pathlib.Path) -> None:
224	_add_policy(policy_id="low-conf", confidence="0.60", action="prefer-ours")
225	pid = _record()
226	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
227	assert r.exit_code == 0
228	data = json.loads(r.output)
229	assert data["status"] == "proposed"
230
231	def test_engine_escalated_via_escalate_policy(self, repo: pathlib.Path) -> None:
232	_add_policy(policy_id="esc-policy", confidence="1.0", action="escalate")
233	pid = _record()
234	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
235	assert r.exit_code == 0
236	assert json.loads(r.output)["status"] == "escalated"
237
238
239	class TestSimilarSuccess:
240	"""II: muse harmony similar — success paths."""
241
242	def test_similar_empty_when_no_match(self, repo: pathlib.Path) -> None:
243	pid = _record()
244	r = runner.invoke(None, ["harmony", "similar", pid, "--json"])
245	assert r.exit_code == 0
246	data = json.loads(r.output)
247	assert data["pattern_id"] == pid
248	assert data["total"] == 0
249	assert data["proposals"] == []
250
251	def test_similar_finds_shared_semantic_fingerprint(self, repo: pathlib.Path) -> None:
252	shared_fp = fake_id("shared-semantic-cli")
253	source_pid = _record(path="source.mid", ours="so", theirs="st", semantic_fp=shared_fp)
254	target_pid = _record(path="target.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
255
256	# Give source a resolution
257	_resolve(source_pid, confidence="0.88")
258
259	r = runner.invoke(None, ["harmony", "similar", target_pid, "--json"])
260	assert r.exit_code == 0
261	data = json.loads(r.output)
262	assert data["total"] >= 1
263	assert data["proposals"][0]["similar_pattern_id"] == source_pid
264
265	def test_similar_entry_has_required_fields(self, repo: pathlib.Path) -> None:
266	shared_fp = fake_id("shared-fields")
267	source_pid = _record(path="s.mid", ours="so", theirs="st", semantic_fp=shared_fp)
268	target_pid = _record(path="t.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
269	_resolve(source_pid, confidence="0.85")
270
271	r = runner.invoke(None, ["harmony", "similar", target_pid, "--json"])
272	entry = json.loads(r.output)["proposals"][0]
273	for field in ("similar_pattern_id", "similarity", "confidence", "strategy", "rationale"):
274	assert field in entry, f"missing field: {field}"
275
276	def test_similar_text_output(self, repo: pathlib.Path) -> None:
277	shared_fp = fake_id("shared-text")
278	src = _record(path="text-src.mid", ours="so", theirs="st", semantic_fp=shared_fp)
279	tgt = _record(path="text-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
280	_resolve(src)
281	r = runner.invoke(None, ["harmony", "similar", tgt])
282	assert r.exit_code == 0
283
284	def test_similar_limit(self, repo: pathlib.Path) -> None:
285	shared_fp = fake_id("limit-shared")
286	target_pid = _record(path="lim-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
287	for i in range(10):
288	src = _record(
289	path=f"lim{i}.mid", ours=f"o{i}", theirs=f"t{i}", semantic_fp=shared_fp
290	)
291	_resolve(src, confidence=f"0.{70+i}", outcome=f"o{i}")
292
293	r = runner.invoke(None, ["harmony", "similar", target_pid, "--limit", "3", "--json"])
294	data = json.loads(r.output)
295	assert len(data["proposals"]) <= 3
296
297
298	# ===========================================================================
299	# Tier III — Error paths
300	# ===========================================================================
301
302
303	class TestEngineErrors:
304	"""III: muse harmony engine — error paths."""
305
306	def test_engine_invalid_id_exits_1(self, repo: pathlib.Path) -> None:
307	r = runner.invoke(None, ["harmony", "engine", "bad-id", "--json"])
308	assert r.exit_code == 1
309
310	def test_engine_nonexistent_id_exits_0_escalated(self, repo: pathlib.Path) -> None:
311	# Unknown pattern → engine escalates rather than errors
312	r = runner.invoke(None, ["harmony", "engine", fake_id("nonexistent"), "--json"])
313	assert r.exit_code == 0
314	assert json.loads(r.output)["status"] == "escalated"
315
316	def test_engine_invalid_threshold_exits_1(self, repo: pathlib.Path) -> None:
317	pid = _record()
318	r = runner.invoke(None, [
319	"harmony", "engine", pid,
320	"--auto-apply-threshold", "1.5",
321	"--json",
322	])
323	assert r.exit_code == 1
324
325	def test_engine_negative_threshold_exits_1(self, repo: pathlib.Path) -> None:
326	pid = _record()
327	r = runner.invoke(None, [
328	"harmony", "engine", pid,
329	"--auto-apply-threshold", "-0.1",
330	"--json",
331	])
332	assert r.exit_code == 1
333
334
335	class TestSimilarErrors:
336	"""III: muse harmony similar — error paths."""
337
338	def test_similar_invalid_id_exits_1(self, repo: pathlib.Path) -> None:
339	r = runner.invoke(None, ["harmony", "similar", "bad-id", "--json"])
340	assert r.exit_code == 1
341
342	def test_similar_nonexistent_exits_0_empty(self, repo: pathlib.Path) -> None:
343	r = runner.invoke(None, ["harmony", "similar", fake_id("nonexistent"), "--json"])
344	assert r.exit_code == 0
345	data = json.loads(r.output)
346	assert data["total"] == 0
347
348
349	# ===========================================================================
350	# Tier IV — End-to-end
351	# ===========================================================================
352
353
354	class TestEndToEnd:
355	"""IV: Full lifecycle via CLI layer."""
356
357	def test_policy_engine_audit_trail(self, repo: pathlib.Path) -> None:
358	_add_policy(confidence="0.95", action="prefer-ours")
359	pid = _record()
360
361	runner.invoke(None, ["harmony", "engine", pid])
362
363	r = runner.invoke(None, ["harmony", "audit", "--json"])
364	event_types = [e["event_type"] for e in json.loads(r.output)["entries"]]
365	assert "resolution_applied" in event_types
366
367	def test_escalation_audit_trail(self, repo: pathlib.Path) -> None:
368	pid = _record()
369	runner.invoke(None, ["harmony", "engine", pid])
370
371	r = runner.invoke(None, ["harmony", "audit", "--json"])
372	event_types = [e["event_type"] for e in json.loads(r.output)["entries"]]
373	assert "escalation_recorded" in event_types
374
375	def test_exact_replay_increments_applied_count_via_cli(self, repo: pathlib.Path) -> None:
376	pid = _record()
377	_resolve(pid, confidence="0.90")
378
379	runner.invoke(None, ["harmony", "engine", pid])
380
381	r = runner.invoke(None, ["harmony", "show", pid, "--json"])
382	res = json.loads(r.output)["resolutions"][0]
383	assert res["applied_count"] == 1
384
385	def test_similar_then_engine_workflow(self, repo: pathlib.Path) -> None:
386	"""Agent workflow: find_similar to discover candidates, engine to resolve."""
387	shared_fp = fake_id("workflow-shared")
388	src = _record(path="src.mid", ours="so", theirs="st", semantic_fp=shared_fp)
389	tgt = _record(path="tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
390	_resolve(src, confidence="0.88")
391
392	# Step 1: find similar
393	rs = runner.invoke(None, ["harmony", "similar", tgt, "--json"])
394	assert json.loads(rs.output)["total"] >= 1
395
396	# Step 2: run engine (semantic tier fires, requires confirmation)
397	re = runner.invoke(None, ["harmony", "engine", tgt, "--json"])
398	data = json.loads(re.output)
399	assert data["status"] == "proposed"
400	assert data["proposal"]["strategy"] == "semantic-proposal"
401
402
403	# ===========================================================================
404	# Tier V — Data integrity
405	# ===========================================================================
406
407
408	class TestDataIntegrity:
409	"""V: All JSON fields always present; types correct."""
410
411	def test_engine_escalated_fields_all_present(self, repo: pathlib.Path) -> None:
412	pid = _record()
413	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
414	data = json.loads(r.output)
415	for field in ("status", "pattern_id", "proposal", "applied_resolution_id",
416	"escalation_reason"):
417	assert field in data, f"missing field: {field}"
418
419	def test_engine_applied_applied_resolution_id_is_hex64(self, repo: pathlib.Path) -> None:
420	pid = _record()
421	_resolve(pid, confidence="0.90")
422	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
423	rid = json.loads(r.output)["applied_resolution_id"]
424	assert rid is not None
425	assert len(rid) == 71
426
427	def test_similar_empty_proposals_is_list_not_null(self, repo: pathlib.Path) -> None:
428	pid = _record()
429	r = runner.invoke(None, ["harmony", "similar", pid, "--json"])
430	data = json.loads(r.output)
431	assert isinstance(data["proposals"], list)
432
433	def test_engine_proposed_proposal_confidence_in_range(self, repo: pathlib.Path) -> None:
434	pid = _record()
435	_resolve(pid, confidence="0.60")
436	r = runner.invoke(None, ["harmony", "engine", pid, "--json"])
437	prop = json.loads(r.output)["proposal"]
438	assert prop is not None
439	assert 0.0 <= prop["confidence"] <= 1.0
440
441	def test_similar_similarity_in_range(self, repo: pathlib.Path) -> None:
442	shared_fp = fake_id("range-check")
443	src = _record(path="rc-src.mid", ours="so", theirs="st", semantic_fp=shared_fp)
444	tgt = _record(path="rc-tgt.mid", ours="to", theirs="tt", semantic_fp=shared_fp)
445	_resolve(src)
446
447	r = runner.invoke(None, ["harmony", "similar", tgt, "--json"])
448	for prop in json.loads(r.output)["proposals"]:
449	assert 0.0 <= prop["similarity"] <= 1.0
450
451
452	# ===========================================================================
453	# Tier VI — Security
454	# ===========================================================================
455
456
457	class TestSecurity:
458	"""VI: Path-traversal IDs rejected at engine and similar entry points."""
459
460	def test_engine_traversal_rejected(self, repo: pathlib.Path) -> None:
461	r = runner.invoke(None, ["harmony", "engine", "../../malicious", "--json"])
462	assert r.exit_code == 1
463
464	def test_similar_traversal_rejected(self, repo: pathlib.Path) -> None:
465	r = runner.invoke(None, ["harmony", "similar", "../../malicious", "--json"])
466	assert r.exit_code == 1
467
468	def test_engine_null_byte_rejected(self, repo: pathlib.Path) -> None:
469	r = runner.invoke(None, ["harmony", "engine", "a" * 63 + "\x00", "--json"])
470	assert r.exit_code == 1
471
472
473	# ===========================================================================
474	# Tier VII — Performance
475	# ===========================================================================
476
477
478	class TestPerformance:
479	"""VII: engine and similar complete within 300 ms."""
480
481	def test_engine_under_300ms(self, repo: pathlib.Path) -> None:
482	pid = _record()
483	start = time.monotonic()
484	runner.invoke(None, ["harmony", "engine", pid, "--json"])
485	elapsed = (time.monotonic() - start) * 1000
486	assert elapsed < 300, f"engine took {elapsed:.0f}ms"
487
488	def test_similar_under_300ms(self, repo: pathlib.Path) -> None:
489	pid = _record()
490	start = time.monotonic()
491	runner.invoke(None, ["harmony", "similar", pid, "--json"])
492	elapsed = (time.monotonic() - start) * 1000
493	assert elapsed < 600, f"similar took {elapsed:.0f}ms"

File History 4 commits

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago

sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago

sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago

sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ 29 days ago

function repo

function _record

function _resolve

function _add_policy

class TestTypedDictSchemas

function _hints

function test_engine_json_has_status

function test_engine_json_has_pattern_id

function test_engine_json_has_proposal

function test_engine_json_has_applied_resolution_id

function test_engine_json_has_escalation_reason

function test_similar_json_has_pattern_id

function test_similar_json_has_total

function test_similar_json_has_proposals

class TestRegistration

function test_engine_help

function test_similar_help

class TestEngineSuccess

function test_engine_escalates_no_policy_no_resolution

function test_engine_applied_via_policy

function test_engine_applied_via_exact_replay

function test_engine_proposed_low_confidence

function test_engine_text_output

function test_engine_with_custom_threshold

function test_engine_proposed_via_policy_low_confidence

function test_engine_escalated_via_escalate_policy

class TestSimilarSuccess

function test_similar_empty_when_no_match

function test_similar_finds_shared_semantic_fingerprint

function test_similar_entry_has_required_fields

function test_similar_text_output

function test_similar_limit

class TestEngineErrors

function test_engine_invalid_id_exits_1

function test_engine_nonexistent_id_exits_0_escalated

function test_engine_invalid_threshold_exits_1

function test_engine_negative_threshold_exits_1

class TestSimilarErrors

function test_similar_invalid_id_exits_1

function test_similar_nonexistent_exits_0_empty

class TestEndToEnd

function test_policy_engine_audit_trail

function test_escalation_audit_trail

function test_exact_replay_increments_applied_count_via_cli

function test_similar_then_engine_workflow

class TestDataIntegrity

function test_engine_escalated_fields_all_present

function test_engine_applied_applied_resolution_id_is_hex64

function test_similar_empty_proposals_is_list_not_null

function test_engine_proposed_proposal_confidence_in_range

function test_similar_similarity_in_range

class TestSecurity

function test_engine_traversal_rejected

function test_similar_traversal_rejected

function test_engine_null_byte_rejected

class TestPerformance

function test_engine_under_300ms

function test_similar_under_300ms

Pathtests/test_harmony_cli_phase3.py

Lines493

Size18.7 KB

LangPython

Refsha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

Object ID

sha256:dd9fe2faf59723d359a1156f47e901d1b4f4a0993c17c4951c7719c2d2aa4ed1…

Last commit

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

fix: remove commit_exists filter from have anchor…

21 days ago

Quick links

Blame History