gabriel / muse public
test_harmony_integration.py python
565 lines 20.5 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Tests for harmony.auto_apply and harmony.record_resolutions.
2
3 Exercises the high-level integration helpers that sit between the merge and
4 commit commands and the harmony store. These are the functions that actually
5 wire conflict fingerprinting → pattern storage → resolution replay.
6
7 Bugs documented here:
8
9 Bug 1 — symbol-level conflict paths (e.g. "config.py::SomeSymbol"):
10 The file portion must be extracted for manifest lookups while the full
11 address is stored as the ConflictPattern path.
12
13 Bug 2 — record_resolutions not idempotent:
14 Two calls with the same outcome_blob must produce only one resolution.
15
16 Bug 3 — MERGE_STATE original_conflict_paths:
17 ``muse checkout --ours/--theirs`` clears conflict_paths from MERGE_STATE
18 as each is resolved. By commit time conflict_paths is empty, so
19 record_resolutions is called with [] and nothing is ever recorded.
20 MERGE_STATE must preserve the original conflict list so commit can record.
21 """
22 from __future__ import annotations
23
24 from collections.abc import Mapping
25 import pathlib
26 import tempfile
27
28 import pytest
29
30 import muse.core.harmony as h
31 from muse.core.harmony import (
32 auto_apply,
33 blob_fingerprint,
34 compute_pattern_id,
35 compute_semantic_fingerprint,
36 list_patterns,
37 list_resolutions,
38 record_resolutions,
39 )
40 from muse.core.object_store import write_object
41 from muse.core.types import Manifest, NULL_LONG_ID, blob_id, long_id
42 from muse.core.paths import muse_dir
43
44
45 # ---------------------------------------------------------------------------
46 # Helpers
47 # ---------------------------------------------------------------------------
48
49
50 def _fake_object_id(content: bytes) -> str:
51 """Return a canonical sha256:-prefixed object ID for content."""
52 return blob_id(content)
53
54
55 def _write_fake_object(root: pathlib.Path, content: bytes) -> str:
56 """Write content to the object store and return its object ID."""
57 oid = _fake_object_id(content)
58 write_object(root, oid, content)
59 return oid
60
61
62 class _FakePlugin:
63 """Minimal MuseDomainPlugin — no HarmonyPlugin sub-protocol."""
64 name = "test"
65 def schema(self) -> "Mapping[str, object]": return {}
66
67
68 @pytest.fixture()
69 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
70 muse_dir(tmp_path).mkdir()
71 return tmp_path
72
73
74 # ---------------------------------------------------------------------------
75 # compute_semantic_fingerprint
76 # ---------------------------------------------------------------------------
77
78
79 class TestComputeSemanticFingerprint:
80 def test_no_plugin_returns_blob_fingerprint(self, repo: pathlib.Path) -> None:
81 ours = long_id("a" * 64)
82 theirs = long_id("b" * 64)
83 plugin = _FakePlugin()
84 result = compute_semantic_fingerprint("config.py", ours, theirs, plugin, repo)
85 assert result == blob_fingerprint(ours, theirs)
86
87 def test_commutative(self, repo: pathlib.Path) -> None:
88 ours = long_id("a" * 64)
89 theirs = long_id("b" * 64)
90 plugin = _FakePlugin()
91 r1 = compute_semantic_fingerprint("config.py", ours, theirs, plugin, repo)
92 r2 = compute_semantic_fingerprint("config.py", theirs, ours, plugin, repo)
93 assert r1 == r2
94
95 def test_different_paths_different_fingerprints(self, repo: pathlib.Path) -> None:
96 ours = long_id("a" * 64)
97 theirs = long_id("b" * 64)
98 plugin = _FakePlugin()
99 # blob_fingerprint is path-independent; semantic is too by default
100 # but compute_pattern_id incorporates path — verified separately
101 r1 = compute_semantic_fingerprint("a.py", ours, theirs, plugin, repo)
102 r2 = compute_semantic_fingerprint("b.py", ours, theirs, plugin, repo)
103 # Default (no HarmonyPlugin) → same blob_fp regardless of path
104 assert r1 == r2
105
106
107 # ---------------------------------------------------------------------------
108 # record_resolutions — file-level paths
109 # ---------------------------------------------------------------------------
110
111
112 class TestRecordResolutionsFilePaths:
113 """record_resolutions with plain file paths (no :: separator)."""
114
115 def test_records_pattern_and_resolution(self, repo: pathlib.Path) -> None:
116 ours_content = b"version = 1"
117 theirs_content = b"version = 2"
118 resolved_content = b"version = 3"
119
120 ours_id = _write_fake_object(repo, ours_content)
121 theirs_id = _write_fake_object(repo, theirs_content)
122 resolution_id = _write_fake_object(repo, resolved_content)
123
124 ours_manifest: Manifest = {"config.py": ours_id}
125 theirs_manifest: Manifest = {"config.py": theirs_id}
126 new_manifest: Manifest = {"config.py": resolution_id}
127
128 plugin = _FakePlugin()
129 saved = record_resolutions(
130 repo,
131 ["config.py"],
132 ours_manifest,
133 theirs_manifest,
134 new_manifest,
135 "code",
136 plugin,
137 )
138
139 assert saved == ["config.py"]
140 patterns = list_patterns(repo)
141 assert len(patterns) == 1
142 assert patterns[0].path == "config.py"
143 assert patterns[0].domain == "code"
144
145 resolutions = list_resolutions(repo, patterns[0].pattern_id)
146 assert len(resolutions) == 1
147 assert resolutions[0].outcome_blob == resolution_id
148 assert resolutions[0].human_verified is True
149 assert resolutions[0].confidence == 1.0
150
151 def test_skips_path_not_in_manifests(self, repo: pathlib.Path) -> None:
152 plugin = _FakePlugin()
153 saved = record_resolutions(
154 repo,
155 ["missing.py"],
156 {},
157 {},
158 {},
159 "code",
160 plugin,
161 )
162 assert saved == []
163 assert list_patterns(repo) == []
164
165 def test_idempotent_second_call(self, repo: pathlib.Path) -> None:
166 ours_id = _write_fake_object(repo, b"a")
167 theirs_id = _write_fake_object(repo, b"b")
168 resolution_id = _write_fake_object(repo, b"c")
169
170 ours_m: Manifest = {"f.py": ours_id}
171 theirs_m: Manifest = {"f.py": theirs_id}
172 new_m: Manifest = {"f.py": resolution_id}
173 plugin = _FakePlugin()
174
175 record_resolutions(repo, ["f.py"], ours_m, theirs_m, new_m, "code", plugin)
176 record_resolutions(repo, ["f.py"], ours_m, theirs_m, new_m, "code", plugin)
177
178 assert len(list_patterns(repo)) == 1
179 assert len(list_resolutions(repo, list_patterns(repo)[0].pattern_id)) == 1
180
181
182 # ---------------------------------------------------------------------------
183 # BUG: record_resolutions — symbol-level paths ("file.py::Symbol")
184 # ---------------------------------------------------------------------------
185
186
187 class TestRecordResolutionsSymbolPaths:
188 """record_resolutions with symbol-level conflict paths.
189
190 Conflict paths from the code-domain merge engine are symbol addresses of
191 the form "config.py::MAX_CONNECTIONS". Manifests are keyed by file path.
192 The function must extract the file portion for manifest lookups while
193 storing the full symbol address in the ConflictPattern.
194 """
195
196 def test_symbol_path_records_pattern(self, repo: pathlib.Path) -> None:
197 ours_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 10")
198 theirs_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 25")
199 resolution_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 50")
200
201 # Manifests are keyed by FILE path
202 ours_manifest: Manifest = {"config.py": ours_id}
203 theirs_manifest: Manifest = {"config.py": theirs_id}
204 new_manifest: Manifest = {"config.py": resolution_id}
205
206 plugin = _FakePlugin()
207 saved = record_resolutions(
208 repo,
209 ["config.py::MAX_CONNECTIONS"], # symbol-level conflict path
210 ours_manifest,
211 theirs_manifest,
212 new_manifest,
213 "code",
214 plugin,
215 )
216
217 assert saved == ["config.py::MAX_CONNECTIONS"], (
218 "record_resolutions silently skipped a symbol-level conflict path — "
219 "it must extract 'config.py' from 'config.py::MAX_CONNECTIONS' "
220 "for manifest lookups"
221 )
222
223 patterns = list_patterns(repo)
224 assert len(patterns) == 1, "Expected exactly one pattern recorded"
225 # The full symbol address should be stored as the path
226 assert patterns[0].path == "config.py::MAX_CONNECTIONS"
227
228 resolutions = list_resolutions(repo, patterns[0].pattern_id)
229 assert len(resolutions) == 1
230 assert resolutions[0].outcome_blob == resolution_id
231
232 def test_multiple_symbol_paths_same_file(self, repo: pathlib.Path) -> None:
233 """Two conflicting symbols in the same file → two distinct patterns."""
234 file_ours = _write_fake_object(repo, b"file ours")
235 file_theirs = _write_fake_object(repo, b"file theirs")
236 file_resolved = _write_fake_object(repo, b"file resolved")
237
238 ours_m: Manifest = {"app.py": file_ours}
239 theirs_m: Manifest = {"app.py": file_theirs}
240 new_m: Manifest = {"app.py": file_resolved}
241 plugin = _FakePlugin()
242
243 saved = record_resolutions(
244 repo,
245 ["app.py::foo", "app.py::bar"],
246 ours_m,
247 theirs_m,
248 new_m,
249 "code",
250 plugin,
251 )
252
253 assert saved == ["app.py::foo", "app.py::bar"]
254 patterns = list_patterns(repo)
255 assert len(patterns) == 2, (
256 "Each symbol address should produce a distinct pattern "
257 "(pattern_id incorporates path)"
258 )
259 paths = {p.path for p in patterns}
260 assert paths == {"app.py::foo", "app.py::bar"}
261
262 def test_symbol_path_no_file_portion_in_manifest(self, repo: pathlib.Path) -> None:
263 """If the file portion of the symbol path is not in the manifest, skip."""
264 plugin = _FakePlugin()
265 saved = record_resolutions(
266 repo,
267 ["missing.py::SomeSymbol"],
268 {}, # empty manifests
269 {},
270 {},
271 "code",
272 plugin,
273 )
274 assert saved == []
275
276
277 # ---------------------------------------------------------------------------
278 # BUG: auto_apply — symbol-level paths
279 # ---------------------------------------------------------------------------
280
281
282 class TestAutoApplySymbolPaths:
283 """auto_apply must also extract the file portion from symbol paths."""
284
285 def test_auto_apply_with_symbol_path_records_pattern(
286 self, repo: pathlib.Path
287 ) -> None:
288 ours_id = _write_fake_object(repo, b"DEBUG = False")
289 theirs_id = _write_fake_object(repo, b"DEBUG = True")
290
291 ours_m: Manifest = {"settings.py": ours_id}
292 theirs_m: Manifest = {"settings.py": theirs_id}
293 plugin = _FakePlugin()
294
295 resolved, remaining = auto_apply(
296 repo,
297 ["settings.py::DEBUG"], # symbol-level path
298 ours_m,
299 theirs_m,
300 "code",
301 plugin,
302 )
303
304 assert "settings.py::DEBUG" in remaining
305 patterns = list_patterns(repo)
306 assert len(patterns) == 1, (
307 "auto_apply must record the pattern even when no resolution exists yet "
308 "— but it silently skipped the symbol-level path"
309 )
310 assert patterns[0].path == "settings.py::DEBUG"
311
312 def test_auto_apply_replays_symbol_resolution(
313 self, repo: pathlib.Path
314 ) -> None:
315 """After record_resolutions saves a resolution, auto_apply replays it."""
316 ours_id = _write_fake_object(repo, b"TIMEOUT = 30")
317 theirs_id = _write_fake_object(repo, b"TIMEOUT = 60")
318 resolution_content = b"TIMEOUT = 45"
319 resolution_id = _write_fake_object(repo, resolution_content)
320
321 ours_m: Manifest = {"config.py": ours_id}
322 theirs_m: Manifest = {"config.py": theirs_id}
323 new_m: Manifest = {"config.py": resolution_id}
324 plugin = _FakePlugin()
325
326 # Simulate commit recording the resolution
327 saved = record_resolutions(
328 repo,
329 ["config.py::TIMEOUT"],
330 ours_m,
331 theirs_m,
332 new_m,
333 "code",
334 plugin,
335 )
336 assert saved == ["config.py::TIMEOUT"]
337
338 # Now the same conflict recurs — auto_apply should replay it
339 dest = repo / "config.py"
340 resolved, remaining = auto_apply(
341 repo,
342 ["config.py::TIMEOUT"],
343 ours_m,
344 theirs_m,
345 "code",
346 plugin,
347 )
348
349 assert "config.py::TIMEOUT" in resolved, (
350 "auto_apply failed to replay a saved resolution for a symbol-level path"
351 )
352 assert remaining == []
353 assert dest.read_bytes() == resolution_content
354
355 def test_auto_apply_file_path_still_works(
356 self, repo: pathlib.Path
357 ) -> None:
358 """Plain file paths (no ::) still work after the fix."""
359 ours_id = _write_fake_object(repo, b"v1")
360 theirs_id = _write_fake_object(repo, b"v2")
361 resolution_content = b"v3"
362 resolution_id = _write_fake_object(repo, resolution_content)
363
364 ours_m: Manifest = {"README.md": ours_id}
365 theirs_m: Manifest = {"README.md": theirs_id}
366 new_m: Manifest = {"README.md": resolution_id}
367 plugin = _FakePlugin()
368
369 record_resolutions(repo, ["README.md"], ours_m, theirs_m, new_m, "code", plugin)
370
371 dest = repo / "README.md"
372 resolved, remaining = auto_apply(
373 repo, ["README.md"], ours_m, theirs_m, "code", plugin
374 )
375
376 assert "README.md" in resolved
377 assert remaining == []
378 assert dest.read_bytes() == resolution_content
379
380
381 # ---------------------------------------------------------------------------
382 # auto_apply — path traversal guard still applies
383 # ---------------------------------------------------------------------------
384
385
386 class TestAutoApplyPathTraversal:
387 def test_traversal_path_skipped(self, repo: pathlib.Path) -> None:
388 ours_id = _write_fake_object(repo, b"x")
389 theirs_id = _write_fake_object(repo, b"y")
390 ours_m: Manifest = {"../traversal.py": ours_id}
391 theirs_m: Manifest = {"../traversal.py": theirs_id}
392 plugin = _FakePlugin()
393
394 resolved, remaining = auto_apply(
395 repo, ["../traversal.py"], ours_m, theirs_m, "code", plugin
396 )
397 assert resolved == {}
398 assert "../traversal.py" in remaining
399
400 def test_symbol_traversal_skipped(self, repo: pathlib.Path) -> None:
401 ours_id = _write_fake_object(repo, b"x")
402 theirs_id = _write_fake_object(repo, b"y")
403 ours_m: Manifest = {"../traversal.py": ours_id}
404 theirs_m: Manifest = {"../traversal.py": theirs_id}
405 plugin = _FakePlugin()
406
407 resolved, remaining = auto_apply(
408 repo, ["../traversal.py::Symbol"], ours_m, theirs_m, "code", plugin
409 )
410 assert resolved == {}
411 assert "../traversal.py::Symbol" in remaining
412
413
414 # ---------------------------------------------------------------------------
415 # BUG 3: MERGE_STATE must preserve original_conflict_paths
416 # ---------------------------------------------------------------------------
417
418
419 class TestMergeStateOriginalConflictPaths:
420 """MERGE_STATE.original_conflict_paths must survive checkout --ours/--theirs.
421
422 Workflow:
423 1. muse merge → MERGE_STATE written with conflict_paths=[A, B]
424 2. muse checkout --ours A → MERGE_STATE updated: conflict_paths=[B]
425 3. muse checkout --ours B → MERGE_STATE updated: conflict_paths=[]
426 4. muse commit → reads merge_state; calls record_resolutions(conflict_paths=[])
427 → nothing recorded ← BUG
428
429 Fix: MERGE_STATE preserves original_conflict_paths=[A, B] through all
430 checkout calls. Commit reads original_conflict_paths for record_resolutions.
431 """
432
433 def test_write_merge_state_sets_original_conflict_paths(
434 self, repo: pathlib.Path
435 ) -> None:
436 from muse.core.merge_engine import write_merge_state, read_merge_state
437
438 write_merge_state(
439 repo,
440 base_commit=NULL_LONG_ID,
441 ours_commit=long_id("1" * 64),
442 theirs_commit=long_id("2" * 64),
443 conflict_paths=["config.py::MAX_CONNECTIONS", "utils.py::clamp"],
444 )
445
446 state = read_merge_state(repo)
447 assert state is not None
448 assert state.original_conflict_paths == [
449 "config.py::MAX_CONNECTIONS",
450 "utils.py::clamp",
451 ], (
452 "write_merge_state must populate original_conflict_paths "
453 "equal to conflict_paths on first write"
454 )
455
456 def test_original_conflict_paths_preserved_after_partial_resolution(
457 self, repo: pathlib.Path
458 ) -> None:
459 from muse.core.merge_engine import write_merge_state, read_merge_state
460
461 # First write — merge produces two conflicts
462 write_merge_state(
463 repo,
464 base_commit=NULL_LONG_ID,
465 ours_commit=long_id("1" * 64),
466 theirs_commit=long_id("2" * 64),
467 conflict_paths=["config.py::A", "config.py::B"],
468 )
469
470 # Second write — checkout --ours resolved A; only B remains
471 write_merge_state(
472 repo,
473 base_commit=NULL_LONG_ID,
474 ours_commit=long_id("1" * 64),
475 theirs_commit=long_id("2" * 64),
476 conflict_paths=["config.py::B"],
477 )
478
479 state = read_merge_state(repo)
480 assert state is not None
481 assert state.conflict_paths == ["config.py::B"]
482 assert state.original_conflict_paths == ["config.py::A", "config.py::B"], (
483 "original_conflict_paths must be preserved across writes — "
484 "checkout --ours updates conflict_paths but not original_conflict_paths"
485 )
486
487 def test_original_conflict_paths_preserved_after_all_resolved(
488 self, repo: pathlib.Path
489 ) -> None:
490 from muse.core.merge_engine import write_merge_state, read_merge_state
491
492 write_merge_state(
493 repo,
494 base_commit=NULL_LONG_ID,
495 ours_commit=long_id("1" * 64),
496 theirs_commit=long_id("2" * 64),
497 conflict_paths=["config.py::MAX_CONNECTIONS"],
498 )
499 # All resolved via checkout --ours
500 write_merge_state(
501 repo,
502 base_commit=NULL_LONG_ID,
503 ours_commit=long_id("1" * 64),
504 theirs_commit=long_id("2" * 64),
505 conflict_paths=[],
506 )
507
508 state = read_merge_state(repo)
509 assert state is not None
510 assert state.conflict_paths == []
511 assert state.original_conflict_paths == ["config.py::MAX_CONNECTIONS"], (
512 "original_conflict_paths must survive even when all conflicts are cleared"
513 )
514
515 def test_commit_uses_original_conflict_paths_for_harmony(
516 self, repo: pathlib.Path
517 ) -> None:
518 """Commit must pass original_conflict_paths to record_resolutions.
519
520 When all conflicts have been resolved via checkout --ours/--theirs,
521 merge_state.conflict_paths is empty. Commit must fall back to
522 merge_state.original_conflict_paths so harmony still learns.
523 """
524 from muse.core.merge_engine import write_merge_state, read_merge_state
525
526 ours_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 50")
527 theirs_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 25")
528 resolution_id = _write_fake_object(repo, b"MAX_CONNECTIONS = 50") # ours wins
529
530 # Simulate: merge wrote state with conflict, then checkout --ours cleared it
531 write_merge_state(
532 repo,
533 base_commit=NULL_LONG_ID,
534 ours_commit=long_id("1" * 64),
535 theirs_commit=long_id("2" * 64),
536 conflict_paths=["config.py::MAX_CONNECTIONS"],
537 )
538 write_merge_state(
539 repo,
540 base_commit=NULL_LONG_ID,
541 ours_commit=long_id("1" * 64),
542 theirs_commit=long_id("2" * 64),
543 conflict_paths=[], # all resolved
544 )
545
546 state = read_merge_state(repo)
547 assert state is not None
548 assert state.conflict_paths == []
549 assert state.original_conflict_paths == ["config.py::MAX_CONNECTIONS"]
550
551 # The commit should use original_conflict_paths, not conflict_paths
552 ours_m: Manifest = {"config.py": ours_id}
553 theirs_m: Manifest = {"config.py": theirs_id}
554 new_m: Manifest = {"config.py": resolution_id}
555 plugin = _FakePlugin()
556
557 paths_for_harmony = state.original_conflict_paths or state.conflict_paths
558 saved = record_resolutions(
559 repo, paths_for_harmony, ours_m, theirs_m, new_m, "code", plugin
560 )
561
562 assert saved == ["config.py::MAX_CONNECTIONS"], (
563 "commit.py must use merge_state.original_conflict_paths when "
564 "conflict_paths is empty — harmony must learn the resolution"
565 )
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago