gabriel / muse public
test_phase7_merge_correctness.py python
529 lines 20.4 KB
Raw
sha256:f0cc3f3fe40f1df9a488585b5b2aef6aa4dcb0b22b5fdbf7bab7071a5b0c7f7c feat(merge): Phase 7 — independence merge correctness + Har… Sonnet 4.6 minor ⚠ breaking 1 day ago
1 """TDD tests for Phase 7 — Step 1.5 correctness + Harmony confidence gating.
2
3 Issue #86 Phase 7 deliverables:
4
5 Step 1.5 independence-merge guard (IC tests):
6 IC_01 — Divergent unparseable Python (SyntaxError on both sides) → conflict
7 surfaces correctly; no silent union-merge.
8 IC_02 — Divergent valid Python, non-overlapping symbols → independence merge
9 fires and produces a clean merge.
10 IC_03 — Divergent valid Python, same symbol changed on both sides → conflict
11 surfaces; independence merge does NOT fire.
12 IC_04 — merge_ops Step 1.5 skipped when child_ops empty on both sides (unit
13 test on the skip condition).
14
15 Harmony confidence gating (HC tests):
16 HC_01 — Resolution with confidence >= 0.85 (default threshold) auto-applies.
17 HC_02 — Resolution with confidence < 0.85 (e.g. 0.8 from --ours) does NOT
18 auto-apply; conflict stays in remaining.
19 HC_03 — harmony.min_auto_apply_confidence config key overrides default;
20 setting it to 0.75 causes the 0.8-confidence pattern to auto-apply.
21 HC_04 — When no resolution meets the threshold, Harmony escalates cleanly
22 (returns the conflict in remaining, does not error).
23
24 Background
25 ----------
26 Gap 1 (Step 1.5): When a .py file can't be parsed (SyntaxError), _semantic_ops
27 returns a PatchOp with child_ops=[]. The commute-check loop is vacuously False
28 over empty lists, so _independence_merge_blob fires and silently union-merges
29 divergent lines — writing both versions without surfacing a conflict. This is
30 data corruption on real divergence in unparseable files. The guard:
31
32 if not our_patch["child_ops"] and not their_patch["child_ops"]:
33 continue
34
35 skips the independence path when neither side produced any symbols.
36
37 Gap 2 (Harmony confidence): auto_apply calls best_resolution() and applies it
38 unconditionally regardless of confidence. A pattern learned from
39 muse checkout --ours (confidence=0.8) replays at the same strength as a
40 hand-edited resolution (confidence=1.0). The fix reads
41 harmony.min_auto_apply_confidence from config (default 0.85) and escalates
42 patterns below the threshold.
43 """
44 from __future__ import annotations
45
46 import datetime
47 import json
48 import pathlib
49 import tomllib
50
51 import pytest
52 from tests.cli_test_helper import CliRunner
53 from muse.core.types import blob_id, fake_id
54 from muse.core.object_store import write_object, read_object
55 from muse.core.paths import heads_dir, muse_dir, ref_path
56
57 runner = CliRunner()
58 cli = None
59
60
61 # ---------------------------------------------------------------------------
62 # Shared helpers
63 # ---------------------------------------------------------------------------
64
65 def _env(root: pathlib.Path) -> dict:
66 return {"MUSE_REPO_ROOT": str(root)}
67
68
69 def _init_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
70 dot_muse = muse_dir(tmp_path)
71 dot_muse.mkdir()
72 repo_id = fake_id("repo")
73 (dot_muse / "repo.json").write_text(json.dumps({
74 "repo_id": repo_id,
75 "domain": "code",
76 "default_branch": "main",
77 "created_at": "2025-01-01T00:00:00+00:00",
78 }), encoding="utf-8")
79 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
80 (dot_muse / "refs" / "heads").mkdir(parents=True)
81 (dot_muse / "snapshots").mkdir()
82 (dot_muse / "commits").mkdir()
83 (dot_muse / "objects").mkdir()
84 return tmp_path, repo_id
85
86
87 def _write_obj(root: pathlib.Path, content: bytes) -> str:
88 oid = blob_id(content)
89 write_object(root, oid, content)
90 return oid
91
92
93 def _make_commit(
94 root: pathlib.Path,
95 repo_id: str,
96 branch: str = "main",
97 message: str = "test",
98 manifest: dict | None = None,
99 parent_id: str | None = None,
100 ) -> str:
101 from muse.core.commits import CommitRecord, write_commit
102 from muse.core.snapshots import SnapshotRecord, write_snapshot
103 from muse.core.ids import hash_snapshot, hash_commit
104
105 ref_file = ref_path(root, branch)
106 if parent_id is None:
107 parent_id = ref_file.read_text().strip() if ref_file.exists() else None
108 m = manifest or {}
109 snap_id = hash_snapshot(m)
110 committed_at = datetime.datetime.now(datetime.timezone.utc)
111 commit_id = hash_commit(
112 parent_ids=[parent_id] if parent_id else [],
113 snapshot_id=snap_id,
114 message=message,
115 committed_at_iso=committed_at.isoformat(),
116 )
117 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=m))
118 write_commit(root, CommitRecord(
119 commit_id=commit_id,
120 branch=branch,
121 snapshot_id=snap_id,
122 message=message,
123 committed_at=committed_at,
124 parent_commit_id=parent_id,
125 ))
126 ref_file.parent.mkdir(parents=True, exist_ok=True)
127 ref_file.write_text(commit_id, encoding="utf-8")
128 return commit_id
129
130
131 # ---------------------------------------------------------------------------
132 # IC_01 — Divergent unparseable Python → conflict surfaces, no silent union-merge
133 # ---------------------------------------------------------------------------
134
135 def test_IC_01_divergent_unparseable_surfaces_conflict(tmp_path: pathlib.Path) -> None:
136 """Divergent unparseable Python (SyntaxError on both sides) → conflict surfaces.
137
138 Before the fix: child_ops=[] causes the commute check to be vacuously False,
139 _independence_merge_blob fires, and silently union-merges both divergent
140 versions into the output without surfacing a conflict.
141
142 After the fix: the empty-child_ops guard skips the independence path; the
143 file-level fallback detects divergent content and surfaces the conflict.
144 """
145 root, repo_id = _init_repo(tmp_path)
146
147 # Base: valid Python
148 base_content = b"value = 1\n"
149 base_oid = _write_obj(root, base_content)
150 base_id = _make_commit(root, repo_id, "main", "base", {"config.py": base_oid})
151
152 # Ours: SyntaxError (unclosed bracket) + changes value line
153 ours_content = b"(((\nvalue = 2\n"
154 ours_oid = _write_obj(root, ours_content)
155 _make_commit(root, repo_id, "main", "ours changes config",
156 {"config.py": ours_oid}, parent_id=base_id)
157
158 # Theirs: different SyntaxError, diverges from ours on the bracket line
159 theirs_content = b")))\nvalue = 3\n"
160 theirs_oid = _write_obj(root, theirs_content)
161 (heads_dir(root) / "feat").write_text(base_id, encoding="utf-8")
162 _make_commit(root, repo_id, "feat", "feat changes config",
163 {"config.py": theirs_oid}, parent_id=base_id)
164
165 # Write ours content to disk so the CLI sees the working tree
166 (root / "config.py").write_bytes(ours_content)
167
168 result = runner.invoke(cli, ["merge", "feat", "--json"], env=_env(root), catch_exceptions=False)
169 data = json.loads(result.output)
170
171 assert "config.py" in data.get("conflicts", []), (
172 f"IC_01: divergent unparseable file must surface conflict, "
173 f"got conflicts={data.get('conflicts')}"
174 )
175
176
177 # ---------------------------------------------------------------------------
178 # IC_02 — Divergent valid Python, non-overlapping symbols → clean independence merge
179 # ---------------------------------------------------------------------------
180
181 def test_IC_02_non_overlapping_symbols_merge_cleanly(tmp_path: pathlib.Path) -> None:
182 """Divergent valid Python with non-overlapping symbols → independence merge, clean.
183
184 Branch A adds def foo(), branch B adds def bar() — both to the same file.
185 The independence merge should produce a clean result with both functions.
186 """
187 root, repo_id = _init_repo(tmp_path)
188
189 base_content = b"# shared module\n"
190 base_oid = _write_obj(root, base_content)
191 base_id = _make_commit(root, repo_id, "main", "base", {"utils.py": base_oid})
192
193 # Ours: add def foo()
194 ours_content = b"# shared module\n\ndef foo():\n return 1\n"
195 ours_oid = _write_obj(root, ours_content)
196 _make_commit(root, repo_id, "main", "add foo",
197 {"utils.py": ours_oid}, parent_id=base_id)
198
199 # Theirs: add def bar() — independent of foo
200 theirs_content = b"# shared module\n\ndef bar():\n return 2\n"
201 theirs_oid = _write_obj(root, theirs_content)
202 (heads_dir(root) / "feat").write_text(base_id, encoding="utf-8")
203 _make_commit(root, repo_id, "feat", "add bar",
204 {"utils.py": theirs_oid}, parent_id=base_id)
205
206 (root / "utils.py").write_bytes(ours_content)
207
208 result = runner.invoke(cli, ["merge", "feat", "--json"], env=_env(root), catch_exceptions=False)
209 data = json.loads(result.output)
210
211 assert "utils.py" not in data.get("conflicts", []), (
212 f"IC_02: non-overlapping symbol additions must merge cleanly, "
213 f"got conflicts={data.get('conflicts')}"
214 )
215 assert data.get("status") in ("clean", "merged", "committed"), (
216 f"IC_02: expected clean merge status, got {data.get('status')}"
217 )
218
219
220 # ---------------------------------------------------------------------------
221 # IC_03 — Same symbol changed on both sides → conflict surfaces
222 # ---------------------------------------------------------------------------
223
224 def test_IC_03_same_symbol_conflict_surfaces(tmp_path: pathlib.Path) -> None:
225 """Same symbol modified on both sides → conflict surfaces; independence merge does not fire."""
226 root, repo_id = _init_repo(tmp_path)
227
228 base_content = b"def compute():\n return 1\n"
229 base_oid = _write_obj(root, base_content)
230 base_id = _make_commit(root, repo_id, "main", "base", {"engine.py": base_oid})
231
232 # Ours: modify compute() to return 2
233 ours_content = b"def compute():\n return 2\n"
234 ours_oid = _write_obj(root, ours_content)
235 _make_commit(root, repo_id, "main", "ours modifies compute",
236 {"engine.py": ours_oid}, parent_id=base_id)
237
238 # Theirs: modify the same compute() to return 3
239 theirs_content = b"def compute():\n return 3\n"
240 theirs_oid = _write_obj(root, theirs_content)
241 (heads_dir(root) / "feat").write_text(base_id, encoding="utf-8")
242 _make_commit(root, repo_id, "feat", "feat modifies compute",
243 {"engine.py": theirs_oid}, parent_id=base_id)
244
245 (root / "engine.py").write_bytes(ours_content)
246
247 result = runner.invoke(cli, ["merge", "feat", "--json"], env=_env(root), catch_exceptions=False)
248 data = json.loads(result.output)
249
250 # Conflict must surface — either at symbol level or file level
251 conflicts = data.get("conflicts", [])
252 assert any("engine.py" in c for c in conflicts), (
253 f"IC_03: same symbol divergence must surface conflict, got conflicts={conflicts}"
254 )
255
256
257 # ---------------------------------------------------------------------------
258 # IC_04 — Unit test: Step 1.5 skipped when both patches have empty child_ops
259 # ---------------------------------------------------------------------------
260
261 def test_IC_04_step15_skips_empty_child_ops(tmp_path: pathlib.Path) -> None:
262 """merge_ops Step 1.5 must skip independence merge when child_ops empty on both sides.
263
264 This is the unit-level proof of the guard:
265 if not our_patch["child_ops"] and not their_patch["child_ops"]:
266 continue
267
268 Verifies that an unparseable file with divergent content ends up as a conflict
269 in merge_ops output rather than in independence_resolved.
270 """
271 from muse.plugins.code.plugin import CodePlugin
272
273 root, _ = _init_repo(tmp_path)
274
275 # Unparseable on both sides — _semantic_ops will return child_ops=[]
276 base_content = b"value = 1\n"
277 ours_content = b"(((\nvalue = 2\n" # SyntaxError
278 theirs_content = b")))\nvalue = 3\n" # SyntaxError, diverges
279
280 base_oid = _write_obj(root, base_content)
281 ours_oid = _write_obj(root, ours_content)
282 theirs_oid = _write_obj(root, theirs_content)
283
284 base_snap = {"files": {"config.py": base_oid}, "domain": "code", "directories": {}}
285 ours_snap = {"files": {"config.py": ours_oid}, "domain": "code", "directories": {}}
286 theirs_snap = {"files": {"config.py": theirs_oid}, "domain": "code", "directories": {}}
287
288 # Construct minimal PatchOps with empty child_ops — simulates what _semantic_ops
289 # returns when a file cannot be parsed (SyntaxError → child_ops=[]).
290 ours_ops = [{"op": "patch", "address": "config.py", "child_domain": "python", "child_ops": []}]
291 theirs_ops = [{"op": "patch", "address": "config.py", "child_domain": "python", "child_ops": []}]
292
293 plugin = CodePlugin()
294 result = plugin.merge_ops(base_snap, ours_snap, theirs_snap, ours_ops, theirs_ops, repo_root=root)
295
296 # config.py must appear as a conflict — the empty-child_ops guard prevented
297 # silent union-merge, so the file-level fallback detected divergence.
298 assert "config.py" in result.conflicts, (
299 f"IC_04: empty child_ops must cause Step 1.5 skip; "
300 f"config.py must conflict, got conflicts={result.conflicts}"
301 )
302
303
304 # ---------------------------------------------------------------------------
305 # Harmony HC test helpers
306 # ---------------------------------------------------------------------------
307
308 _NOW = datetime.datetime(2025, 6, 1, 12, 0, 0, tzinfo=datetime.timezone.utc)
309
310
311 def _plant_harmony_resolution(
312 root: pathlib.Path,
313 path: str,
314 ours_content: bytes,
315 theirs_content: bytes,
316 outcome_content: bytes,
317 confidence: float,
318 human_verified: bool = False,
319 ) -> tuple[str, str]:
320 """Create a harmony pattern + resolution with the given confidence.
321
322 Returns (pattern_id, outcome_oid).
323 """
324 from muse.core.harmony.types import (
325 AgentProvenance, ConflictPattern, ConflictType, Resolution, ResolutionStrategy,
326 )
327 from muse.core.harmony.fingerprint import (
328 blob_fingerprint, compute_pattern_id, compute_resolution_id,
329 )
330 from muse.core.harmony.patterns import record_pattern
331 from muse.core.harmony.resolutions import save_resolution
332
333 ours_oid = _write_obj(root, ours_content)
334 theirs_oid = _write_obj(root, theirs_content)
335 outcome_oid = _write_obj(root, outcome_content)
336
337 blob_fp = blob_fingerprint(ours_oid, theirs_oid)
338 pattern_id = compute_pattern_id(path, blob_fp, blob_fp)
339
340 pattern = ConflictPattern(
341 pattern_id=pattern_id,
342 path=path,
343 domain="code",
344 conflict_type=ConflictType.CONTENT,
345 blob_fingerprint=blob_fp,
346 semantic_fingerprint=blob_fp,
347 ours_id=ours_oid,
348 theirs_id=theirs_oid,
349 description={},
350 recorded_at=_NOW,
351 recorded_by="test",
352 )
353 record_pattern(root, pattern)
354
355 by = AgentProvenance.human()
356 resolution_id = compute_resolution_id(pattern_id, outcome_oid, ResolutionStrategy.MANUAL, by, _NOW)
357 resolution = Resolution(
358 resolution_id=resolution_id,
359 pattern_id=pattern_id,
360 strategy=ResolutionStrategy.MANUAL,
361 policy_id=None,
362 outcome_blob=outcome_oid,
363 resolved_by=by,
364 human_verified=human_verified,
365 confidence=confidence,
366 rationale="test resolution",
367 resolved_at=_NOW,
368 )
369 save_resolution(root, resolution)
370
371 return pattern_id, outcome_oid
372
373
374 # ---------------------------------------------------------------------------
375 # HC_01 — High confidence auto-applies (>= default 0.85)
376 # ---------------------------------------------------------------------------
377
378 def test_HC_01_high_confidence_auto_applies(tmp_path: pathlib.Path) -> None:
379 """Resolution with confidence >= 0.85 (default threshold) auto-applies on re-merge."""
380 from muse.core.harmony.engine import auto_apply
381 from muse.plugins.code.plugin import CodePlugin
382
383 root, _ = _init_repo(tmp_path)
384
385 ours_content = b"value = 2\n"
386 theirs_content = b"value = 3\n"
387 outcome_content = b"value = 2 # merged\n"
388
389 _, outcome_oid = _plant_harmony_resolution(
390 root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.9
391 )
392
393 ours_oid = blob_id(ours_content)
394 theirs_oid = blob_id(theirs_content)
395 ours_manifest = {"config.py": ours_oid}
396 theirs_manifest = {"config.py": theirs_oid}
397
398 plugin = CodePlugin()
399 resolved, remaining = auto_apply(
400 root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin
401 )
402
403 assert "config.py" in resolved, (
404 f"HC_01: confidence=0.9 >= threshold=0.85 must auto-apply, "
405 f"got resolved={resolved}, remaining={remaining}"
406 )
407 assert "config.py" not in remaining
408
409
410 # ---------------------------------------------------------------------------
411 # HC_02 — Low confidence does NOT auto-apply (< default 0.85)
412 # ---------------------------------------------------------------------------
413
414 def test_HC_02_low_confidence_does_not_auto_apply(tmp_path: pathlib.Path) -> None:
415 """Resolution with confidence < 0.85 (e.g. 0.8 from --ours) must NOT auto-apply."""
416 from muse.core.harmony.engine import auto_apply
417 from muse.plugins.code.plugin import CodePlugin
418
419 root, _ = _init_repo(tmp_path)
420
421 ours_content = b"value = 2\n"
422 theirs_content = b"value = 3\n"
423 outcome_content = b"value = 2\n" # same as ours — typical --ours resolution
424
425 _plant_harmony_resolution(
426 root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.8
427 )
428
429 ours_oid = blob_id(ours_content)
430 theirs_oid = blob_id(theirs_content)
431 ours_manifest = {"config.py": ours_oid}
432 theirs_manifest = {"config.py": theirs_oid}
433
434 plugin = CodePlugin()
435 resolved, remaining = auto_apply(
436 root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin
437 )
438
439 assert "config.py" not in resolved, (
440 f"HC_02: confidence=0.8 < threshold=0.85 must NOT auto-apply, "
441 f"got resolved={resolved}"
442 )
443 assert "config.py" in remaining, (
444 f"HC_02: low-confidence path must be in remaining, got remaining={remaining}"
445 )
446
447
448 # ---------------------------------------------------------------------------
449 # HC_03 — Config key overrides default threshold
450 # ---------------------------------------------------------------------------
451
452 def test_HC_03_config_key_overrides_threshold(tmp_path: pathlib.Path) -> None:
453 """harmony.min_auto_apply_confidence=0.75 causes the 0.8-confidence pattern to auto-apply."""
454 from muse.core.harmony.engine import auto_apply
455 from muse.plugins.code.plugin import CodePlugin
456
457 root, _ = _init_repo(tmp_path)
458
459 # Write config: lower threshold to 0.75
460 try:
461 import tomllib as _tomllib_check # noqa: F401
462 except ImportError:
463 pytest.skip("tomllib not available")
464
465 config_content = "[harmony]\nmin_auto_apply_confidence = 0.75\n"
466 (root / ".muse" / "config.toml").write_text(config_content, encoding="utf-8")
467
468 ours_content = b"value = 2\n"
469 theirs_content = b"value = 3\n"
470 outcome_content = b"value = 2\n"
471
472 _plant_harmony_resolution(
473 root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.8
474 )
475
476 ours_oid = blob_id(ours_content)
477 theirs_oid = blob_id(theirs_content)
478 ours_manifest = {"config.py": ours_oid}
479 theirs_manifest = {"config.py": theirs_oid}
480
481 plugin = CodePlugin()
482 resolved, remaining = auto_apply(
483 root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin
484 )
485
486 assert "config.py" in resolved, (
487 f"HC_03: with threshold=0.75, confidence=0.8 must auto-apply, "
488 f"got resolved={resolved}, remaining={remaining}"
489 )
490 assert "config.py" not in remaining
491
492
493 # ---------------------------------------------------------------------------
494 # HC_04 — No resolution meeting threshold → escalates cleanly, no error
495 # ---------------------------------------------------------------------------
496
497 def test_HC_04_no_qualifying_resolution_escalates_cleanly(tmp_path: pathlib.Path) -> None:
498 """When no resolution meets the confidence threshold, auto_apply escalates cleanly."""
499 from muse.core.harmony.engine import auto_apply
500 from muse.plugins.code.plugin import CodePlugin
501
502 root, _ = _init_repo(tmp_path)
503
504 ours_content = b"value = 2\n"
505 theirs_content = b"value = 3\n"
506 outcome_content = b"value = 2\n"
507
508 # Plant a resolution at 0.5 — well below the 0.85 default threshold
509 _plant_harmony_resolution(
510 root, "config.py", ours_content, theirs_content, outcome_content, confidence=0.5
511 )
512
513 ours_oid = blob_id(ours_content)
514 theirs_oid = blob_id(theirs_content)
515 ours_manifest = {"config.py": ours_oid}
516 theirs_manifest = {"config.py": theirs_oid}
517
518 plugin = CodePlugin()
519 # Must not raise — escalation is a normal outcome
520 resolved, remaining = auto_apply(
521 root, ["config.py"], ours_manifest, theirs_manifest, "code", plugin
522 )
523
524 assert "config.py" not in resolved, (
525 f"HC_04: confidence=0.5 << threshold=0.85 must not auto-apply, got resolved={resolved}"
526 )
527 assert "config.py" in remaining, (
528 f"HC_04: unresolved path must be in remaining, got remaining={remaining}"
529 )
File History 1 commit
sha256:f0cc3f3fe40f1df9a488585b5b2aef6aa4dcb0b22b5fdbf7bab7071a5b0c7f7c feat(merge): Phase 7 — independence merge correctness + Har… Sonnet 4.6 minor 1 day ago