gabriel / muse public

test_property_merge_invariants.py file-level

at sha256:8 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:4 Merge branch 'dev' into main · gabriel · Jun 17, 2026
1 """Property-based tests for the three-way merge engine invariants.
2
3 These tests use Hypothesis to generate random manifests and DAG shapes and
4 verify mathematical invariants that must hold for ANY input β€” not just the
5 specific inputs we thought to write as unit tests.
6
7 Why these tests?
8 ----------------
9 The silent-drop bug (fixed in 73427a30) survived all existing tests because
10 the existing tests only exercised SPECIFIC manifests. A property test with
11 the invariant "theirs-only files must survive merge" would have caught it
12 immediately: Hypothesis would have generated a manifest where theirs added
13 a file that ours didn't touch, run apply_merge, and found the file absent.
14
15 Invariants tested
16 -----------------
17 M1 Theirs-only additions survive in merged manifest.
18 M2 Ours-only additions survive in merged manifest.
19 M3 Theirs-only deletions are applied in merged manifest.
20 M4 Ours-only deletions are applied in merged manifest.
21 M5 Conflict paths are exactly the intersection of ours_changed and theirs_changed.
22 M6 Conflict paths are absent from apply_merge output (caller resolves them).
23 M7 Non-conflicting paths from both sides are correct in output.
24 M8 Identical content on both sides is never a conflict in diff_snapshots.
25 M9 apply_merge is idempotent when ours == theirs (convergence).
26 M10 merged result contains no paths from conflict_paths when they exist.
27
28 LCA invariants
29 --------------
30 L1 LCA(A, A) == A.
31 L2 LCA(A, B) is an ancestor of A (reachable from A).
32 L3 LCA(A, B) is an ancestor of B (reachable from B).
33 L4 LCA(A, B) == LCA(B, A) β€” commutativity.
34 L5 If A is ancestor of B, LCA(A, B) == A.
35
36 Snapshot integrity invariant
37 ----------------------------
38 SI1 Every object_id referenced in a merged manifest exists in the object store.
39 """
40 from __future__ import annotations
41
42 import datetime
43 import json
44 import pathlib
45
46 import pytest
47 from hypothesis import HealthCheck, given, settings
48 from hypothesis import strategies as st
49
50 from muse.core.merge_engine import (
51 apply_merge,
52 detect_conflicts,
53 diff_snapshots,
54 find_merge_base,
55 )
56 from muse.core.types import Manifest, fake_id, blob_id
57 from muse.core.paths import head_path, heads_dir, muse_dir
58
59
60 # ---------------------------------------------------------------------------
61 # Strategies β€” building blocks for random manifests and DAGs
62 # ---------------------------------------------------------------------------
63
64
65 def _h(label: str) -> str:
66 return fake_id(label)
67
68
69 # A path looks like "src/module_3.py" or "README.md".
70 _path_strategy = st.text(
71 alphabet=st.characters(whitelist_categories=("Ll", "Lu", "Nd"), whitelist_characters="/_-."),
72 min_size=1,
73 max_size=32,
74 ).filter(lambda s: "/" not in s or not s.startswith("/"))
75
76 # A content hash is a 64-char hex string (we use sha256 of a label for uniqueness).
77 _hash_strategy = st.text(
78 alphabet="0123456789abcdef",
79 min_size=64,
80 max_size=64,
81 )
82
83 # A manifest is a dict of path β†’ hash with at most 40 entries.
84 _manifest_strategy = st.dictionaries(
85 keys=_path_strategy,
86 values=_hash_strategy,
87 min_size=0,
88 max_size=40,
89 )
90
91
92 # ---------------------------------------------------------------------------
93 # Helpers
94 # ---------------------------------------------------------------------------
95
96
97 def _linear_repo(
98 tmp_path: pathlib.Path,
99 ) -> tuple[pathlib.Path, str]:
100 """Create a minimal code-domain Muse repo for LCA tests."""
101 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
102 from muse.core.commits import (
103 CommitRecord,
104 write_commit,
105 )
106 from muse.core.snapshots import (
107 SnapshotRecord,
108 write_snapshot,
109 )
110
111 dot_muse = muse_dir(tmp_path)
112 dot_muse.mkdir(exist_ok=True)
113 repo_id = fake_id("repo")
114 (dot_muse / "repo.json").write_text(
115 json.dumps({"repo_id": repo_id, "domain": "code", "default_branch": "main"}),
116 encoding="utf-8",
117 )
118 (dot_muse / "refs" / "heads").mkdir(parents=True, exist_ok=True)
119 (dot_muse / "snapshots").mkdir(exist_ok=True)
120 (dot_muse / "commits").mkdir(exist_ok=True)
121 (dot_muse / "objects").mkdir(exist_ok=True)
122 return tmp_path, repo_id
123
124
125 def _make_commit_lca(
126 root: pathlib.Path,
127 repo_id: str,
128 manifest: Manifest,
129 message: str = "c",
130 parent_id: str | None = None,
131 parent2_id: str | None = None,
132 ) -> str:
133 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
134 from muse.core.commits import (
135 CommitRecord,
136 write_commit,
137 )
138 from muse.core.snapshots import (
139 SnapshotRecord,
140 write_snapshot,
141 )
142
143 snap_id = compute_snapshot_id(manifest)
144 committed_at = datetime.datetime.now(datetime.timezone.utc)
145 parent_ids: list[str] = []
146 if parent_id:
147 parent_ids.append(parent_id)
148 if parent2_id:
149 parent_ids.append(parent2_id)
150 commit_id = compute_commit_id(
151 parent_ids=parent_ids,
152 snapshot_id=snap_id,
153 message=message,
154 committed_at_iso=committed_at.isoformat(),
155 )
156 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
157 write_commit(root, CommitRecord(
158 commit_id=commit_id,
159 branch="main",
160 snapshot_id=snap_id,
161 message=message,
162 committed_at=committed_at,
163 parent_commit_id=parent_id,
164 parent2_commit_id=parent2_id,
165 ))
166 return commit_id
167
168
169 # ===========================================================================
170 # M β€” apply_merge / diff_snapshots / detect_conflicts invariants
171 # ===========================================================================
172
173
174 class TestMergeEngineInvariantsM:
175 """Property-based invariants for the pure merge functions."""
176
177 @given(
178 base=_manifest_strategy,
179 extra_theirs=_manifest_strategy,
180 )
181 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
182 def test_M1_theirs_only_additions_survive(
183 self, base: Manifest, extra_theirs: Manifest
184 ) -> None:
185 """M1: every file theirs adds (not in base, not touched by ours) is in merged."""
186 # ours: no changes from base
187 ours = dict(base)
188 # theirs: base + extra_theirs (disjoint new keys only)
189 theirs = dict(base)
190 new_keys = {k: v for k, v in extra_theirs.items() if k not in base}
191 theirs.update(new_keys)
192
193 ours_changed = diff_snapshots(base, ours)
194 theirs_changed = diff_snapshots(base, theirs)
195 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
196
197 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
198
199 for path, obj_id in new_keys.items():
200 assert path in merged, (
201 f"M1 VIOLATED: theirs-only addition '{path}' absent from merged.\n"
202 f"base keys: {set(base)}\n"
203 f"extra_theirs keys: {set(new_keys)}\n"
204 f"merged keys: {set(merged)}"
205 )
206 assert merged[path] == obj_id, (
207 f"M1 VIOLATED: theirs-only addition '{path}' has wrong hash in merged."
208 )
209
210 @given(
211 base=_manifest_strategy,
212 extra_ours=_manifest_strategy,
213 )
214 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
215 def test_M2_ours_only_additions_survive(
216 self, base: Manifest, extra_ours: Manifest
217 ) -> None:
218 """M2: every file ours adds (not in base, not touched by theirs) is in merged."""
219 ours = dict(base)
220 new_keys = {k: v for k, v in extra_ours.items() if k not in base}
221 ours.update(new_keys)
222 theirs = dict(base)
223
224 ours_changed = diff_snapshots(base, ours)
225 theirs_changed = diff_snapshots(base, theirs)
226 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
227
228 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
229
230 for path, obj_id in new_keys.items():
231 assert path in merged, f"M2 VIOLATED: ours-only addition '{path}' absent from merged."
232 assert merged[path] == obj_id
233
234 @given(base=_manifest_strategy, del_keys=st.frozensets(st.text(min_size=1, max_size=20)))
235 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
236 def test_M3_theirs_only_deletions_applied(
237 self, base: Manifest, del_keys: frozenset[str]
238 ) -> None:
239 """M3: files theirs deletes (and ours does not touch) are absent from merged."""
240 to_delete = {k for k in del_keys if k in base}
241 if not to_delete:
242 return # no relevant keys to test
243
244 ours = dict(base)
245 theirs = {k: v for k, v in base.items() if k not in to_delete}
246
247 ours_changed = diff_snapshots(base, ours)
248 theirs_changed = diff_snapshots(base, theirs)
249 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
250
251 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
252
253 for path in to_delete:
254 assert path not in merged, (
255 f"M3 VIOLATED: theirs-only deletion '{path}' still present in merged."
256 )
257
258 @given(base=_manifest_strategy, del_keys=st.frozensets(st.text(min_size=1, max_size=20)))
259 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
260 def test_M4_ours_only_deletions_applied(
261 self, base: Manifest, del_keys: frozenset[str]
262 ) -> None:
263 """M4: files ours deletes (and theirs does not touch) are absent from merged."""
264 to_delete = {k for k in del_keys if k in base}
265 if not to_delete:
266 return
267
268 theirs = dict(base)
269 ours = {k: v for k, v in base.items() if k not in to_delete}
270
271 ours_changed = diff_snapshots(base, ours)
272 theirs_changed = diff_snapshots(base, theirs)
273 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
274
275 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
276
277 for path in to_delete:
278 assert path not in merged, (
279 f"M4 VIOLATED: ours-only deletion '{path}' still present in merged."
280 )
281
282 @given(base=_manifest_strategy, ours=_manifest_strategy, theirs=_manifest_strategy)
283 @settings(max_examples=300, suppress_health_check=[HealthCheck.too_slow])
284 def test_M5_conflicts_are_divergent_intersection(
285 self,
286 base: Manifest,
287 ours: Manifest,
288 theirs: Manifest,
289 ) -> None:
290 """M5: detect_conflicts returns exactly paths where both changed AND disagree.
291
292 Formally: conflicts = {p ∈ ours_changed ∩ theirs_changed | ours.get(p) β‰  theirs.get(p)}.
293
294 Convergent changes β€” both deleted the same file (both .get(p) == None), or
295 both added/modified to the same hash β€” are NOT conflicts. The old invariant
296 that conflicts == ours_changed ∩ theirs_changed was incorrect for these cases.
297 """
298 ours_changed = diff_snapshots(base, ours)
299 theirs_changed = diff_snapshots(base, theirs)
300 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
301
302 expected = {
303 p for p in ours_changed & theirs_changed
304 if ours.get(p) != theirs.get(p)
305 }
306 assert conflicts == expected, (
307 f"M5 VIOLATED: conflict set {conflicts!r} != expected {expected!r}.\n"
308 f" ours_changed={ours_changed!r}\n"
309 f" theirs_changed={theirs_changed!r}"
310 )
311
312 @given(base=_manifest_strategy, ours=_manifest_strategy, theirs=_manifest_strategy)
313 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
314 def test_M6_conflict_paths_absent_from_apply_merge(
315 self,
316 base: Manifest,
317 ours: Manifest,
318 theirs: Manifest,
319 ) -> None:
320 """M6: apply_merge never writes conflict paths β€” callers resolve them."""
321 ours_changed = diff_snapshots(base, ours)
322 theirs_changed = diff_snapshots(base, theirs)
323 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
324
325 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
326
327 # Conflict paths must not be in merged at a value taken from ours or theirs
328 # (they stay at base or absent). The strict invariant: a conflict path
329 # must have been either absent from base (so it should be absent in merged)
330 # or present at base's value.
331 for path in conflicts:
332 if path in merged:
333 # If present, it must be at base's value (not ours or theirs override).
334 assert merged[path] == base.get(path), (
335 f"M6 VIOLATED: conflict path '{path}' in merged at non-base value."
336 )
337
338 @given(base=_manifest_strategy, ours=_manifest_strategy, theirs=_manifest_strategy)
339 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
340 def test_M7_non_conflicting_paths_correct_in_output(
341 self,
342 base: Manifest,
343 ours: Manifest,
344 theirs: Manifest,
345 ) -> None:
346 """M7: non-conflicting ours-only changes are at ours value; theirs-only at theirs value."""
347 ours_changed = diff_snapshots(base, ours)
348 theirs_changed = diff_snapshots(base, theirs)
349 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
350 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
351
352 for path in ours_changed - conflicts:
353 if path in ours:
354 assert merged.get(path) == ours[path], (
355 f"M7 VIOLATED: ours-only change '{path}' not at ours value in merged."
356 )
357 else:
358 assert path not in merged, (
359 f"M7 VIOLATED: ours-only deletion '{path}' still present in merged."
360 )
361
362 for path in theirs_changed - conflicts:
363 if path in theirs:
364 assert merged.get(path) == theirs[path], (
365 f"M7 VIOLATED: theirs-only change '{path}' not at theirs value in merged."
366 )
367 else:
368 assert path not in merged, (
369 f"M7 VIOLATED: theirs-only deletion '{path}' still present in merged."
370 )
371
372 @given(base=_manifest_strategy, common_changes=_manifest_strategy)
373 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
374 def test_M8_convergent_modify_auto_resolves(
375 self, base: Manifest, common_changes: Manifest
376 ) -> None:
377 """M8: when both branches independently arrive at the same hash, no conflict fires
378 and the merged manifest contains the path at the agreed hash.
379
380 This is the canonical convergent-change invariant: identical outcomes are not
381 conflicts regardless of whether both sides changed the path.
382 """
383 shared_hash = _h("shared-content")
384 path = "shared.py"
385 base_with = dict(base)
386 base_with[path] = _h("original")
387 ours = dict(base_with)
388 ours[path] = shared_hash
389 theirs = dict(base_with)
390 theirs[path] = shared_hash
391
392 ours_changed = diff_snapshots(base_with, ours)
393 theirs_changed = diff_snapshots(base_with, theirs)
394
395 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
396 merged = apply_merge(base_with, ours, theirs, ours_changed, theirs_changed, conflicts)
397
398 # Convergent change: detect_conflicts must NOT flag 'path'.
399 assert path not in conflicts, (
400 f"M8 VIOLATED: convergent path '{path}' (same hash on both sides) "
401 f"wrongly reported as conflict."
402 )
403 # apply_merge must include 'path' at the agreed hash.
404 assert merged.get(path) == shared_hash, (
405 f"M8 VIOLATED: merged['{path}'] = {merged.get(path)!r}, "
406 f"expected {shared_hash!r}."
407 )
408
409 @given(base=_manifest_strategy, changes=_manifest_strategy)
410 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
411 def test_M9_merge_ours_equals_theirs_is_convergent(
412 self, base: Manifest, changes: Manifest
413 ) -> None:
414 """M9: when ours == theirs, apply_merge produces ours exactly (no-conflict convergence)."""
415 ours = dict(changes)
416 theirs = dict(changes)
417
418 ours_changed = diff_snapshots(base, ours)
419 theirs_changed = diff_snapshots(base, theirs)
420 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
421
422 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
423
424 # Every non-conflicting path must be at ours (== theirs) value.
425 for path in set(ours) | set(theirs):
426 if path in conflicts:
427 continue
428 expected = ours.get(path)
429 if expected is not None:
430 assert merged.get(path) == expected, (
431 f"M9 VIOLATED: ours == theirs but merged[{path!r}] != ours[{path!r}]."
432 )
433
434 @given(base=_manifest_strategy, ours=_manifest_strategy, theirs=_manifest_strategy)
435 @settings(max_examples=200, suppress_health_check=[HealthCheck.too_slow])
436 def test_M10_untouched_base_paths_preserved(
437 self, base: Manifest, ours: Manifest, theirs: Manifest
438 ) -> None:
439 """M10: files neither branch touched remain in merged at base value."""
440 ours_changed = diff_snapshots(base, ours)
441 theirs_changed = diff_snapshots(base, theirs)
442 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
443 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
444
445 untouched = set(base) - ours_changed - theirs_changed
446 for path in untouched:
447 assert merged.get(path) == base[path], (
448 f"M10 VIOLATED: untouched path '{path}' changed value in merged."
449 )
450
451
452 # ===========================================================================
453 # L β€” LCA / find_merge_base invariants
454 # ===========================================================================
455
456
457 class TestLCAInvariantsL:
458 """Property-based invariants for find_merge_base (Lowest Common Ancestor)."""
459
460 @pytest.fixture
461 def repo(self, tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
462 return _linear_repo(tmp_path)
463
464 def test_L1_lca_of_same_commit_is_itself(
465 self, tmp_path: pathlib.Path
466 ) -> None:
467 """L1: LCA(X, X) == X for any commit X."""
468 root, repo_id = _linear_repo(tmp_path)
469 c = _make_commit_lca(root, repo_id, {}, "root")
470 lca = find_merge_base(root, c, c)
471 assert lca == c, f"L1: LCA({c[:8]}, {c[:8]}) should be {c[:8]}, got {lca}"
472
473 def test_L2_L3_lca_is_ancestor_of_both(self, tmp_path: pathlib.Path) -> None:
474 """L2+L3: LCA(A,B) must be in the ancestry of both A and B."""
475 root, repo_id = _linear_repo(tmp_path)
476 c0 = _make_commit_lca(root, repo_id, {}, "base")
477 c1 = _make_commit_lca(root, repo_id, {"a.py": _h("a1")}, "ours", parent_id=c0)
478 c2 = _make_commit_lca(root, repo_id, {"b.py": _h("b1")}, "theirs", parent_id=c0)
479
480 lca = find_merge_base(root, c1, c2)
481 assert lca is not None, "LCA of two commits with common ancestor must exist"
482
483 from muse.core.commits import read_commit
484
485 def _ancestors(start: str) -> set[str]:
486 visited: set[str] = set()
487 q = [start]
488 while q:
489 cid = q.pop()
490 if cid in visited:
491 continue
492 visited.add(cid)
493 commit = read_commit(root, cid)
494 if commit is None:
495 continue
496 if commit.parent_commit_id:
497 q.append(commit.parent_commit_id)
498 if commit.parent2_commit_id:
499 q.append(commit.parent2_commit_id)
500 return visited
501
502 assert lca in _ancestors(c1), f"L2: LCA {lca[:8]} not in ancestry of A {c1[:8]}"
503 assert lca in _ancestors(c2), f"L3: LCA {lca[:8]} not in ancestry of B {c2[:8]}"
504
505 def test_L4_lca_commutativity(self, tmp_path: pathlib.Path) -> None:
506 """L4: LCA(A, B) == LCA(B, A)."""
507 root, repo_id = _linear_repo(tmp_path)
508 c0 = _make_commit_lca(root, repo_id, {}, "base")
509 c1 = _make_commit_lca(root, repo_id, {"a.py": _h("a1")}, "c1", parent_id=c0)
510 c2 = _make_commit_lca(root, repo_id, {"b.py": _h("b1")}, "c2", parent_id=c0)
511
512 lca_ab = find_merge_base(root, c1, c2)
513 lca_ba = find_merge_base(root, c2, c1)
514 assert lca_ab == lca_ba, (
515 f"L4: LCA({c1[:8]}, {c2[:8]}) = {lca_ab}, "
516 f"LCA({c2[:8]}, {c1[:8]}) = {lca_ba} β€” not commutative"
517 )
518
519 def test_L5_if_a_is_ancestor_of_b_lca_is_a(self, tmp_path: pathlib.Path) -> None:
520 """L5: if A is ancestor of B, LCA(A, B) == A."""
521 root, repo_id = _linear_repo(tmp_path)
522 c0 = _make_commit_lca(root, repo_id, {}, "base")
523 c1 = _make_commit_lca(root, repo_id, {"a.py": _h("a1")}, "c1", parent_id=c0)
524 c2 = _make_commit_lca(root, repo_id, {"b.py": _h("b1")}, "c2", parent_id=c1)
525
526 lca = find_merge_base(root, c0, c2)
527 assert lca == c0, f"L5: c0 is ancestor of c2 so LCA should be c0, got {lca}"
528
529 lca2 = find_merge_base(root, c1, c2)
530 assert lca2 == c1, f"L5: c1 is ancestor of c2 so LCA should be c1, got {lca2}"
531
532 def test_L6_lca_for_merge_commit_topology(self, tmp_path: pathlib.Path) -> None:
533 """L6: diamond topology β€” LCA of the two branches is the fork point, not deeper."""
534 root, repo_id = _linear_repo(tmp_path)
535 base = _make_commit_lca(root, repo_id, {}, "base")
536 a = _make_commit_lca(root, repo_id, {"a.py": _h("a")}, "a", parent_id=base)
537 b = _make_commit_lca(root, repo_id, {"b.py": _h("b")}, "b", parent_id=base)
538 # Merge commit of a and b
539 m = _make_commit_lca(
540 root, repo_id, {"a.py": _h("a"), "b.py": _h("b")}, "merge",
541 parent_id=a, parent2_id=b
542 )
543
544 # LCA of a and m should be a (m is a descendant of a).
545 assert find_merge_base(root, a, m) == a, "L6a"
546 # LCA of b and m should be b.
547 assert find_merge_base(root, b, m) == b, "L6b"
548 # LCA of base and m should be base.
549 assert find_merge_base(root, base, m) == base, "L6c"
550
551
552 # ===========================================================================
553 # SI β€” Snapshot integrity invariants
554 # ===========================================================================
555
556
557 class TestSnapshotIntegritySI:
558 """After every merge, every object_id in the snapshot must be in the store."""
559
560 def _object_exists(self, root: pathlib.Path, obj_id: str) -> bool:
561 """Return True if obj_id is readable from the object store."""
562 from muse.core.object_store import read_object
563 return read_object(root, obj_id) is not None
564
565 def test_SI1_fast_forward_snapshot_objects_all_present(
566 self, tmp_path: pathlib.Path
567 ) -> None:
568 """SI1: after fast-forward, every object in the new snapshot is in the store."""
569 from muse.core.object_store import write_object
570 from muse.core.commits import read_commit
571 from muse.core.snapshots import read_snapshot
572
573 root, repo_id = _linear_repo(tmp_path)
574 (head_path(root)).write_text("ref: refs/heads/main", encoding="utf-8")
575
576 # Write real objects to the store.
577 contents = {f"file_{i}.py": f"x = {i}\n".encode() for i in range(10)}
578 manifest: Manifest = {}
579 for path, data in contents.items():
580 obj_id = blob_id(data)
581 write_object(root, obj_id, data)
582 manifest[path] = obj_id
583
584 base_c = _make_commit_lca(root, repo_id, {}, "base")
585 # Write feat branch with the manifest.
586 (heads_dir(root) / "main").write_text(base_c, encoding="utf-8")
587 feat_c = _make_commit_lca(root, repo_id, manifest, "feat", parent_id=base_c)
588 (heads_dir(root) / "feat").write_text(feat_c, encoding="utf-8")
589
590 # Run merge (fast-forward).
591 from tests.cli_test_helper import CliRunner
592 runner = CliRunner()
593 env = {"MUSE_REPO_ROOT": str(root)}
594 runner.invoke(None, ["merge", "--force", "feat"], env=env, catch_exceptions=False)
595
596 # Verify every object in main's snapshot is in the store.
597 main_head = (heads_dir(root) / "main").read_text().strip()
598 commit = read_commit(root, main_head)
599 assert commit is not None
600 snap = read_snapshot(root, commit.snapshot_id)
601 assert snap is not None
602
603 for path, obj_id in snap.manifest.items():
604 assert self._object_exists(root, obj_id), (
605 f"SI1 VIOLATED: blob {obj_id[:8]} for '{path}' missing from store "
606 f"after fast-forward merge."
607 )
608
609 def test_SI2_apply_merge_output_all_hashes_from_inputs(
610 self, tmp_path: pathlib.Path
611 ) -> None:
612 """SI2: every hash in apply_merge output came from base, ours, or theirs.
613
614 This ensures apply_merge never invents a hash. Any invented hash would
615 reference a non-existent object and corrupt the repo on checkout.
616 """
617 base = {"a.py": _h("a-base"), "b.py": _h("b-base"), "c.py": _h("c-base")}
618 ours = {"a.py": _h("a-ours"), "b.py": _h("b-base"), "d.py": _h("d-ours")}
619 theirs = {"a.py": _h("a-theirs"), "c.py": _h("c-theirs"), "e.py": _h("e-theirs")}
620
621 all_known_hashes = (
622 set(base.values()) | set(ours.values()) | set(theirs.values())
623 )
624
625 ours_changed = diff_snapshots(base, ours)
626 theirs_changed = diff_snapshots(base, theirs)
627 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
628 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
629
630 for path, obj_id in merged.items():
631 assert obj_id in all_known_hashes, (
632 f"SI2 VIOLATED: apply_merge produced unknown hash {obj_id[:8]} for '{path}'. "
633 f"Invented hashes corrupt the object store."
634 )
635
636 @given(base=_manifest_strategy, ours=_manifest_strategy, theirs=_manifest_strategy)
637 @settings(max_examples=300, suppress_health_check=[HealthCheck.too_slow])
638 def test_SI3_all_merged_hashes_come_from_known_inputs(
639 self, base: Manifest, ours: Manifest, theirs: Manifest
640 ) -> None:
641 """SI3: property version of SI2 β€” apply_merge never generates new hashes."""
642 all_known = set(base.values()) | set(ours.values()) | set(theirs.values())
643
644 ours_changed = diff_snapshots(base, ours)
645 theirs_changed = diff_snapshots(base, theirs)
646 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
647 merged = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
648
649 for path, obj_id in merged.items():
650 assert obj_id in all_known, (
651 f"SI3 VIOLATED: merged[{path!r}] = {obj_id[:8]} was not in any input manifest."
652 )
653
654
655 # ===========================================================================
656 # DT β€” Determinism tests
657 # ===========================================================================
658
659
660 class TestDeterminismDT:
661 """Snapshot IDs and commit IDs must be deterministic across calls and dict orderings."""
662
663 def test_DT1_snapshot_id_independent_of_dict_insertion_order(self) -> None:
664 """DT1: compute_snapshot_id produces the same ID regardless of dict key order."""
665 from muse.core.ids import hash_snapshot as compute_snapshot_id
666
667 manifest = {f"file_{i:03d}.py": _h(f"content-{i}") for i in range(50)}
668
669 # Build the same dict in reverse insertion order.
670 manifest_reversed: Manifest = {}
671 for k in reversed(list(manifest.keys())):
672 manifest_reversed[k] = manifest[k]
673
674 id_forward = compute_snapshot_id(manifest)
675 id_reversed = compute_snapshot_id(manifest_reversed)
676 assert id_forward == id_reversed, (
677 "DT1 VIOLATED: compute_snapshot_id is sensitive to dict insertion order. "
678 "Two repos with the same content would get different snapshot IDs."
679 )
680
681 def test_DT2_snapshot_id_is_stable_across_calls(self) -> None:
682 """DT2: the same manifest always produces the same snapshot_id."""
683 from muse.core.ids import hash_snapshot as compute_snapshot_id
684
685 manifest = {"app.py": _h("app"), "README.md": _h("readme")}
686 ids = {compute_snapshot_id(manifest) for _ in range(100)}
687 assert len(ids) == 1, "DT2 VIOLATED: compute_snapshot_id is not deterministic."
688
689 def test_DT3_empty_manifest_has_stable_id(self) -> None:
690 """DT3: the empty manifest always has the same snapshot_id."""
691 from muse.core.ids import hash_snapshot as compute_snapshot_id
692 ids = {compute_snapshot_id({}) for _ in range(50)}
693 assert len(ids) == 1, "DT3: empty manifest snapshot_id is not stable."
694
695 def test_DT4_commit_id_stable_for_same_inputs(self) -> None:
696 """DT4: compute_commit_id with the same inputs always returns the same ID."""
697 from muse.core.ids import hash_commit as compute_commit_id
698 kwargs = dict(
699 parent_ids=["abc" * 21 + "a"],
700 snapshot_id=_h("snap"),
701 message="test commit",
702 committed_at_iso="2026-01-01T00:00:00+00:00",
703 )
704 ids = {compute_commit_id(**kwargs) for _ in range(50)}
705 assert len(ids) == 1, "DT4: compute_commit_id is not deterministic."
706
707 @given(manifest=_manifest_strategy)
708 @settings(max_examples=200)
709 def test_DT5_snapshot_id_property_stable(self, manifest: Manifest) -> None:
710 """DT5: for any random manifest, snapshot_id is the same when called twice."""
711 from muse.core.ids import hash_snapshot as compute_snapshot_id
712 assert compute_snapshot_id(manifest) == compute_snapshot_id(manifest)
713
714 @given(manifest=_manifest_strategy)
715 @settings(max_examples=200)
716 def test_DT6_snapshot_id_order_independent_property(self, manifest: Manifest) -> None:
717 """DT6: for any manifest, shuffling key insertion order doesn't change the ID."""
718 from muse.core.ids import hash_snapshot as compute_snapshot_id
719 keys = list(manifest.keys())
720 # Build a copy with reversed key order
721 shuffled = {k: manifest[k] for k in reversed(keys)}
722 assert compute_snapshot_id(manifest) == compute_snapshot_id(shuffled), (
723 "DT6 VIOLATED: snapshot_id changed when dict key order changed."
724 )