gabriel / muse public
test_merge_data_integrity.py python
1,313 lines 55.9 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Data-integrity stress tests for the entire muse merge code path.
2
3 Root cause of the data-loss incident
4 -------------------------------------
5 ``muse merge`` silently defaulted every unreadable snapshot to ``{}`` via
6 ``get_head_snapshot_manifest(...) or {}``. When a snapshot file was missing
7 or in the wrong format (e.g. a plain JSON blob without the muse object header), all three
8 manifests (base/ours/theirs) resolved to ``{}``. This caused:
9
10 1. ``apply_merge({}, {}, {}, …)`` → ``{}``
11 2. ``compute_snapshot_id({})`` → SHA-256 of ``b""`` = ``e3b0c44…``
12 3. ``_restore_from_manifest(root, {})`` → ``apply_manifest(root, {})`` → ALL
13 tracked files deleted.
14
15 The fix is dual-layered:
16
17 * **merge.py**: every snapshot read is now a hard fail — ``None`` returns
18 abort the merge with an error before any manifest is applied to the tree.
19 * **merge.py**: before ``_restore_from_manifest`` is called, the merged
20 manifest is validated; applying an empty result to a non-empty working
21 tree is rejected.
22
23 Test categories
24 ---------------
25 I Sentinel-value unit tests (document the dangerous constants).
26 II Store-read-failure guard tests (missing/corrupt snapshot → abort).
27 III Empty-manifest guard (merged result empty despite non-empty inputs → abort).
28 IV Working-tree integrity (full CLI round-trip — count files, verify content).
29 V apply_manifest safety (workdir.py layer).
30 VI The exact regression scenario (format-migration topology).
31 VII Stress tests (100-file repos, repeated merges, diamond DAGs).
32 """
33 from __future__ import annotations
34
35 import datetime
36 import json
37 import pathlib
38
39 import pytest
40 from tests.cli_test_helper import CliRunner
41 from muse.core.types import Manifest, blob_id, fake_id
42 from muse.core.object_store import object_path
43 from muse.core.paths import commits_dir, muse_dir, ref_path, snapshots_dir
44
45 type _EnvMap = dict[str, str]
46
47 runner = CliRunner()
48 cli = None # CliRunner ignores this positional arg
49
50 # sentinel ID produced by hash_snapshot({}) — empty manifest = data-loss indicator
51 from muse.core.ids import hash_snapshot as _hash_snapshot_fn
52 _SHA256_EMPTY = _hash_snapshot_fn({})
53
54
55 # ---------------------------------------------------------------------------
56 # Repo helpers (mirror test_stress_merge_regression.py conventions)
57 # ---------------------------------------------------------------------------
58
59
60 def _h(label: str) -> str:
61 """Stable fake content hash for a label string."""
62 return fake_id(label)
63
64
65 def _env(root: pathlib.Path) -> _EnvMap:
66 return {"MUSE_REPO_ROOT": str(root)}
67
68
69 def _run(root: pathlib.Path, *args: str) -> tuple[int, str]:
70 """Run a muse CLI command, auto-injecting ``--force`` for merge calls."""
71 final_args = list(args)
72 if final_args and final_args[0] == "merge" and "--force" not in final_args:
73 final_args.insert(1, "--force")
74 result = runner.invoke(cli, final_args, env=_env(root), catch_exceptions=False)
75 return result.exit_code, result.output
76
77
78 def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]:
79 final_args = list(args)
80 if final_args and final_args[0] == "merge" and "--force" not in final_args:
81 final_args.insert(1, "--force")
82 result = runner.invoke(cli, final_args, env=_env(root))
83 return result.exit_code, result.output
84
85
86 def _write_object(root: pathlib.Path, content: bytes) -> str:
87 from muse.core.object_store import object_path, write_object
88 oid = blob_id(content)
89 write_object(root, oid, content)
90 return oid
91
92
93 def _write_file(root: pathlib.Path, content: str) -> str:
94 return _write_object(root, content.encode())
95
96
97 def _init_repo(tmp_path: pathlib.Path, domain: str = "code") -> tuple[pathlib.Path, str]:
98 """Initialise a minimal repo and return (root, repo_id)."""
99 dot_muse = muse_dir(tmp_path)
100 dot_muse.mkdir()
101 repo_id = fake_id("repo")
102 (dot_muse / "repo.json").write_text(json.dumps({
103 "repo_id": repo_id,
104 "domain": domain,
105 "default_branch": "main",
106 "created_at": "2025-01-01T00:00:00+00:00",
107 }), encoding="utf-8")
108 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
109 (dot_muse / "refs" / "heads").mkdir(parents=True)
110 (dot_muse / "snapshots").mkdir()
111 (dot_muse / "commits").mkdir()
112 (dot_muse / "objects").mkdir()
113 return tmp_path, repo_id
114
115
116 def _make_commit(
117 root: pathlib.Path,
118 repo_id: str,
119 branch: str = "main",
120 message: str = "test",
121 manifest: Manifest | None = None,
122 parent_commit_id: str | None = None,
123 parent2_commit_id: str | None = None,
124 ) -> str:
125 """Write a snapshot + commit record, advance the branch ref, return commit_id."""
126 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
127 from muse.core.commits import (
128 CommitRecord,
129 write_commit,
130 )
131 from muse.core.snapshots import (
132 SnapshotRecord,
133 write_snapshot,
134 )
135
136 ref_file = ref_path(root, branch)
137 if parent_commit_id is None and ref_file.exists():
138 parent_commit_id = ref_file.read_text().strip() or None
139
140 m = manifest or {}
141 snap_id = compute_snapshot_id(m)
142 committed_at = datetime.datetime.now(datetime.timezone.utc)
143 parent_ids: list[str] = []
144 if parent_commit_id:
145 parent_ids.append(parent_commit_id)
146 if parent2_commit_id:
147 parent_ids.append(parent2_commit_id)
148 commit_id = compute_commit_id(
149 parent_ids=parent_ids,
150 snapshot_id=snap_id,
151 message=message,
152 committed_at_iso=committed_at.isoformat(),
153 )
154 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=m))
155 write_commit(root, CommitRecord(
156 commit_id=commit_id,
157 branch=branch,
158 snapshot_id=snap_id,
159 message=message,
160 committed_at=committed_at,
161 parent_commit_id=parent_commit_id,
162 parent2_commit_id=parent2_commit_id,
163 ))
164 ref_file.parent.mkdir(parents=True, exist_ok=True)
165 ref_file.write_text(commit_id, encoding="utf-8")
166 return commit_id
167
168
169 def _ref(root: pathlib.Path, branch: str) -> str:
170 return (ref_path(root, branch)).read_text(encoding="utf-8").strip()
171
172
173 def _head_manifest(root: pathlib.Path, branch: str) -> _EnvMap:
174 """Return the snapshot manifest for *branch* HEAD."""
175 from muse.core.commits import read_commit
176 from muse.core.snapshots import read_snapshot
177 commit = read_commit(root, _ref(root, branch))
178 assert commit is not None, f"No commit on branch {branch}"
179 snap = read_snapshot(root, commit.snapshot_id)
180 assert snap is not None, f"No snapshot for {commit.snapshot_id[:8]}"
181 return snap.manifest
182
183
184
185 # ---------------------------------------------------------------------------
186 # Category I — Sentinel-value unit tests
187 # ---------------------------------------------------------------------------
188
189
190 class TestSentinelValuesI:
191 """Document the dangerous constants that signal a broken merge."""
192
193 def test_I1_compute_snapshot_id_empty_dict_produces_known_sha256(self) -> None:
194 """I1: compute_snapshot_id({}) == _SHA256_EMPTY — the data-loss sentinel.
195
196 If this value ever appears as a committed snapshot_id, every tracked
197 file was deleted. This test documents the constant so future readers
198 know exactly what to look for.
199 """
200 from muse.core.ids import hash_snapshot as compute_snapshot_id
201 assert compute_snapshot_id({}) == _SHA256_EMPTY
202
203 def test_I2_apply_merge_with_all_empty_inputs_returns_empty(self) -> None:
204 """I2: apply_merge({}, {}, {}, ∅, ∅, ∅) → {} — documents the dangerous passthrough."""
205 from muse.core.merge_engine import apply_merge
206 result = apply_merge({}, {}, {}, set(), set(), set())
207 assert result == {}
208
209 def test_I3_apply_manifest_with_empty_target_raises_when_prev_non_empty(
210 self, tmp_path: pathlib.Path
211 ) -> None:
212 """I3: apply_manifest(root, prev, {}) raises ValueError when prev is non-empty.
213
214 The guard prevents callers from accidentally deleting all tracked files
215 when an unintentionally empty target manifest is passed.
216 """
217 from muse.core.workdir import apply_manifest
218
219 root, repo_id = _init_repo(tmp_path)
220 prev: dict[str, str] = {}
221 for i in range(5):
222 content = f"file_{i} = True\n".encode()
223 oid = blob_id(content)
224 obj_file = object_path(root, oid)
225 obj_file.parent.mkdir(parents=True, exist_ok=True)
226 obj_file.write_bytes(content)
227 (root / f"file_{i}.py").write_bytes(content)
228 prev[f"file_{i}.py"] = oid
229
230 with pytest.raises(ValueError, match="empty target_manifest"):
231 apply_manifest(root, prev, {})
232
233 def test_I4_diff_snapshots_both_empty_returns_empty_set(self) -> None:
234 """I4: diff_snapshots({}, {}) == set() — no phantom changes."""
235 from muse.core.merge_engine import diff_snapshots
236 assert diff_snapshots({}, {}) == set()
237
238 def test_I5_detect_conflicts_both_empty_returns_empty(self) -> None:
239 """I5: detect_conflicts(set(), set(), {}, {}) == set()."""
240 from muse.core.merge_engine import detect_conflicts
241 assert detect_conflicts(set(), set(), {}, {}) == set()
242
243
244 # ---------------------------------------------------------------------------
245 # Category II — Store-read-failure guard tests
246 # ---------------------------------------------------------------------------
247
248
249 class TestStoreReadFailureGuardII:
250 """merge.py must abort with an error when any required snapshot is unreadable.
251
252 None of these cases should silently fall back to {} and proceed to delete files.
253 """
254
255 def _setup_two_branch_repo(
256 self, tmp_path: pathlib.Path
257 ) -> tuple[pathlib.Path, str, str, str, str]:
258 """Set up a simple diverged repo: main and feat both have commits.
259
260 Returns (root, repo_id, main_commit_id, feat_commit_id, base_commit_id).
261 """
262 root, repo_id = _init_repo(tmp_path)
263 f0 = _write_file(root, "base.py = True\n")
264 base_c = _make_commit(root, repo_id, "main", "base", {"base.py": f0})
265 (ref_path(root, "feat")).write_text(base_c)
266
267 f1 = _write_file(root, "main_only.py = True\n")
268 main_c = _make_commit(root, repo_id, "main", "main change", {"base.py": f0, "main_only.py": f1})
269
270 f2 = _write_file(root, "feat_only.py = True\n")
271 feat_c = _make_commit(root, repo_id, "feat", "feat change", {"base.py": f0, "feat_only.py": f2})
272
273 return root, repo_id, main_c, feat_c, base_c
274
275 def test_II1_missing_ours_snapshot_aborts_merge(self, tmp_path: pathlib.Path) -> None:
276 """II1: if ours (main) snapshot file is deleted, merge must abort — not delete all files."""
277 root, repo_id, main_c, feat_c, base_c = self._setup_two_branch_repo(tmp_path)
278
279 from muse.core.commits import read_commit
280 commit = read_commit(root, main_c)
281 assert commit is not None
282 snap_path = object_path(root, commit.snapshot_id)
283 snap_path.unlink() # Delete the snapshot file
284
285 # Attempt the merge — must fail, not succeed with empty snapshot.
286 code, out = _run_unchecked(root, "merge", "feat")
287 assert code != 0, (
288 f"REGRESSION: merge succeeded despite ours snapshot being missing. "
289 f"Expected abort to prevent data loss.\nOutput: {out}"
290 )
291 # Verify main HEAD has NOT advanced past main_c.
292 assert _ref(root, "main") == main_c, (
293 "REGRESSION: main HEAD advanced after a merge that should have aborted."
294 )
295
296 def test_II2_missing_theirs_snapshot_aborts_merge(self, tmp_path: pathlib.Path) -> None:
297 """II2: if theirs (feat) snapshot file is deleted, merge must abort."""
298 root, repo_id, main_c, feat_c, base_c = self._setup_two_branch_repo(tmp_path)
299
300 from muse.core.commits import read_commit
301 commit = read_commit(root, feat_c)
302 assert commit is not None
303 snap_path = object_path(root, commit.snapshot_id)
304 snap_path.unlink()
305
306 code, out = _run_unchecked(root, "merge", "feat")
307 assert code != 0, (
308 f"REGRESSION: merge succeeded despite theirs snapshot being missing.\nOutput: {out}"
309 )
310 assert _ref(root, "main") == main_c
311
312 def test_II3_corrupt_ours_snapshot_aborts_merge(self, tmp_path: pathlib.Path) -> None:
313 """II3: corrupt ours snapshot (binary garbage, invalid JSON) must abort merge."""
314 root, repo_id, main_c, feat_c, base_c = self._setup_two_branch_repo(tmp_path)
315
316 from muse.core.commits import read_commit
317 commit = read_commit(root, main_c)
318 assert commit is not None
319 snap_path = object_path(root, commit.snapshot_id)
320 snap_path.write_bytes(b"\xff\xfe invalid msgpack garbage")
321
322 code, out = _run_unchecked(root, "merge", "feat")
323 assert code != 0, (
324 f"REGRESSION: merge succeeded with corrupt ours snapshot.\nOutput: {out}"
325 )
326 assert _ref(root, "main") == main_c
327
328 def test_II4_corrupt_theirs_snapshot_aborts_merge(self, tmp_path: pathlib.Path) -> None:
329 """II4: corrupt theirs snapshot must abort merge."""
330 root, repo_id, main_c, feat_c, base_c = self._setup_two_branch_repo(tmp_path)
331
332 from muse.core.commits import read_commit
333 commit = read_commit(root, feat_c)
334 assert commit is not None
335 snap_path = object_path(root, commit.snapshot_id)
336 snap_path.write_bytes(b"\x00\x01\x02 also garbage")
337
338 code, out = _run_unchecked(root, "merge", "feat")
339 assert code != 0, (
340 f"REGRESSION: merge succeeded with corrupt theirs snapshot.\nOutput: {out}"
341 )
342 assert _ref(root, "main") == main_c
343
344 def test_II5_missing_base_snapshot_aborts_merge(self, tmp_path: pathlib.Path) -> None:
345 """II5: if the merge-base snapshot is missing, merge must abort — not treat base as {}."""
346 root, repo_id, main_c, feat_c, base_c = self._setup_two_branch_repo(tmp_path)
347
348 # Delete the base snapshot.
349 from muse.core.commits import read_commit
350 base_commit = read_commit(root, base_c)
351 assert base_commit is not None
352 snap_path = object_path(root, base_commit.snapshot_id)
353 snap_path.unlink()
354
355 code, out = _run_unchecked(root, "merge", "feat")
356 assert code != 0, (
357 "REGRESSION: merge succeeded with missing base snapshot — "
358 "treating base as {} inflates change-sets and may corrupt the merge.\n"
359 f"Output: {out}"
360 )
361 assert _ref(root, "main") == main_c
362
363 def test_II6_missing_ours_commit_file_aborts_merge(self, tmp_path: pathlib.Path) -> None:
364 """II6: if the ours commit file is deleted, merge must abort."""
365 root, repo_id, main_c, feat_c, base_c = self._setup_two_branch_repo(tmp_path)
366
367 cp = object_path(root, main_c)
368 cp.unlink()
369
370 code, out = _run_unchecked(root, "merge", "feat")
371 assert code != 0, (
372 f"REGRESSION: merge succeeded with missing ours commit file.\nOutput: {out}"
373 )
374
375 def test_II7_fast_forward_missing_theirs_snapshot_aborts(self, tmp_path: pathlib.Path) -> None:
376 """II7: fast-forward merge with missing theirs snapshot must abort — not delete all files.
377
378 Before the fix: ff_manifest defaults to {}, _restore_from_manifest({}) deletes everything.
379 After the fix: abort with an error before touching the working tree.
380 """
381 root, repo_id = _init_repo(tmp_path)
382
383 # Write real files to the working tree.
384 f0 = _write_file(root, "keeper.py = 42\n")
385 (root / "keeper.py").write_bytes(b"keeper.py = 42\n")
386
387 base_c = _make_commit(root, repo_id, "main", "base", {"keeper.py": f0})
388 (ref_path(root, "feat")).write_text(base_c)
389
390 f1 = _write_file(root, "new.py = True\n")
391 feat_c = _make_commit(root, repo_id, "feat", "feat commit", {"keeper.py": f0, "new.py": f1})
392
393 # Delete feat's snapshot — main is behind feat (fast-forward case).
394 from muse.core.commits import read_commit
395 feat_commit = read_commit(root, feat_c)
396 assert feat_commit is not None
397 object_path(root, feat_commit.snapshot_id).unlink()
398
399 code, out = _run_unchecked(root, "merge", "feat")
400 assert code != 0, (
401 "REGRESSION: fast-forward merge succeeded despite theirs snapshot missing. "
402 f"This would have applied apply_manifest({{}}) and deleted keeper.py.\nOutput: {out}"
403 )
404 # The critical assertion: keeper.py must still exist.
405 assert (root / "keeper.py").exists(), (
406 "DATA LOSS: keeper.py was deleted when theirs snapshot was missing "
407 "during fast-forward. The guard must abort BEFORE apply_manifest."
408 )
409
410 def test_II8_headerless_json_snapshot_treated_as_corrupt(
411 self, tmp_path: pathlib.Path
412 ) -> None:
413 """II8: snapshot without muse object header is treated as corrupt → merge aborts.
414
415 Simulates a snapshot file written as plain JSON bytes (no 'snapshot <size>\0' header).
416 The muse object reader cannot parse it, causing read_snapshot to return None.
417 Merge must abort rather than proceeding with an empty manifest.
418 """
419 root, repo_id, main_c, feat_c, base_c = self._setup_two_branch_repo(tmp_path)
420
421 from muse.core.commits import read_commit
422 commit = read_commit(root, main_c)
423 assert commit is not None
424 snap_path = object_path(root, commit.snapshot_id)
425
426 # Overwrite the object with the equivalent JSON without the muse object header.
427 # This simulates an old-format file that cannot be parsed as a muse object.
428 old_json = json.dumps({"snapshot_id": commit.snapshot_id, "manifest": {}}).encode()
429 snap_path.write_bytes(old_json)
430
431 code, out = _run_unchecked(root, "merge", "feat")
432 assert code != 0, (
433 "REGRESSION: merge succeeded when ours snapshot was in JSON (old format). "
434 "This is the exact scenario that caused the data-loss incident.\n"
435 f"Expected: abort. Got: {out}"
436 )
437 assert _ref(root, "main") == main_c
438
439 def test_II9_missing_ours_snapshot_does_not_delete_working_tree(
440 self, tmp_path: pathlib.Path
441 ) -> None:
442 """II9: working-tree files must survive when ours snapshot is unreadable.
443
444 Belt-and-suspenders: even if the merge somehow proceeds, it must not
445 apply an empty manifest to the working tree.
446 """
447 root, repo_id = _init_repo(tmp_path)
448
449 # Write 10 files to the working tree.
450 manifest: Manifest = {}
451 for i in range(10):
452 content = f"module_{i} = True\n".encode()
453 oid = _write_object(root, content)
454 (root / f"module_{i}.py").write_bytes(content)
455 manifest[f"module_{i}.py"] = oid
456
457 base_c = _make_commit(root, repo_id, "main", "base", manifest)
458 (ref_path(root, "feat")).write_text(base_c)
459
460 extra = _write_file(root, "extra.py = True\n")
461 feat_c = _make_commit(root, repo_id, "feat", "feat",
462 {**manifest, "extra.py": extra})
463
464 # Advance main past base so this is a three-way merge.
465 bump = _write_file(root, "bump.py = True\n")
466 _make_commit(root, repo_id, "main", "main advance",
467 {**manifest, "bump.py": bump})
468
469 # Delete main's latest snapshot.
470 from muse.core.commits import read_commit
471 main_commit = read_commit(root, _ref(root, "main"))
472 assert main_commit is not None
473 object_path(root, main_commit.snapshot_id).unlink()
474
475 _run_unchecked(root, "merge", "feat")
476
477 # All 10 original files must still exist.
478 for i in range(10):
479 assert (root / f"module_{i}.py").exists(), (
480 f"DATA LOSS: module_{i}.py deleted when merge should have aborted "
481 "due to unreadable ours snapshot."
482 )
483
484
485 # ---------------------------------------------------------------------------
486 # Category III — Empty-manifest guard tests
487 # ---------------------------------------------------------------------------
488
489
490 class TestEmptyManifestGuardIII:
491 """The merged result must never be applied to the working tree if it is
492 suspiciously empty given the inputs.
493 """
494
495 def test_III1_merge_result_snapshot_id_is_never_sha256_empty(
496 self, tmp_path: pathlib.Path
497 ) -> None:
498 """III1: a successful merge must never produce a commit with snapshot_id == SHA-256("").
499
500 e3b0c44… is the fingerprint of an empty snapshot; if it ever appears
501 in the commit graph, the merge engine produced an empty manifest and
502 deleted all tracked files.
503 """
504 root, repo_id = _init_repo(tmp_path)
505
506 f0 = _write_file(root, "a.py = 0\n")
507 base_c = _make_commit(root, repo_id, "main", "base", {"a.py": f0})
508 (ref_path(root, "feat")).write_text(base_c)
509
510 f1 = _write_file(root, "a.py = 1\n")
511 _make_commit(root, repo_id, "main", "ours", {"a.py": f1})
512
513 f2 = _write_file(root, "b.py = True\n")
514 _make_commit(root, repo_id, "feat", "theirs", {"a.py": f0, "b.py": f2})
515
516 code, out = _run(root, "merge", "feat")
517 assert code == 0, out
518
519 from muse.core.commits import read_commit
520 commit = read_commit(root, _ref(root, "main"))
521 assert commit is not None
522 assert commit.snapshot_id != _SHA256_EMPTY, (
523 f"REGRESSION: merge commit has snapshot_id == SHA-256('') == {_SHA256_EMPTY[:16]}…\n"
524 "This means the merged manifest was empty and all tracked files were deleted.\n"
525 "This is the data-loss sentinel produced by compute_snapshot_id({})."
526 )
527
528 def test_III2_merged_manifest_has_at_least_as_many_files_as_base(
529 self, tmp_path: pathlib.Path
530 ) -> None:
531 """III2: clean merge → merged manifest >= base file count.
532
533 When neither side deletes a file, the merged manifest must have AT LEAST
534 as many entries as the base. Fewer entries means files were silently dropped.
535 """
536 root, repo_id = _init_repo(tmp_path)
537
538 base_manifest = {f"file_{i}.py": _write_file(root, f"x_{i} = {i}\n") for i in range(20)}
539 base_c = _make_commit(root, repo_id, "main", "base", base_manifest)
540 (ref_path(root, "feat")).write_text(base_c)
541
542 # ours: modify file_0.py only.
543 ours_manifest = {**base_manifest, "file_0.py": _write_file(root, "x_0 = 'ours'\n")}
544 _make_commit(root, repo_id, "main", "ours", ours_manifest)
545
546 # theirs: modify file_1.py only.
547 theirs_manifest = {**base_manifest, "file_1.py": _write_file(root, "x_1 = 'theirs'\n")}
548 _make_commit(root, repo_id, "feat", "theirs", theirs_manifest)
549
550 code, out = _run(root, "merge", "feat")
551 assert code == 0, out
552
553 merged = _head_manifest(root, "main")
554 assert len(merged) >= len(base_manifest), (
555 f"REGRESSION: merged manifest has {len(merged)} files but base had "
556 f"{len(base_manifest)}. Files were silently dropped."
557 )
558
559 def test_III3_merged_manifest_contains_all_base_files_when_no_deletions(
560 self, tmp_path: pathlib.Path
561 ) -> None:
562 """III3: no-deletion merge — every base file must appear in merged."""
563 root, repo_id = _init_repo(tmp_path)
564
565 base_manifest = {f"mod_{i}.py": _write_file(root, f"MOD_{i} = True\n") for i in range(15)}
566 base_c = _make_commit(root, repo_id, "main", "base", base_manifest)
567 (ref_path(root, "feat")).write_text(base_c)
568
569 new_main = _write_file(root, "new_main.py = True\n")
570 _make_commit(root, repo_id, "main", "ours", {**base_manifest, "new_main.py": new_main})
571
572 new_feat = _write_file(root, "new_feat.py = True\n")
573 _make_commit(root, repo_id, "feat", "theirs", {**base_manifest, "new_feat.py": new_feat})
574
575 code, out = _run(root, "merge", "feat")
576 assert code == 0, out
577
578 merged = _head_manifest(root, "main")
579 for path in base_manifest:
580 assert path in merged, (
581 f"REGRESSION: base file '{path}' is missing from merged manifest. "
582 "Files are being silently dropped."
583 )
584
585
586 # ---------------------------------------------------------------------------
587 # Category IV — Working-tree integrity (full CLI round-trips)
588 # ---------------------------------------------------------------------------
589
590
591 class TestWorkingTreeIntegrityIV:
592 """Full CLI merges must leave the working tree in a coherent state."""
593
594 def test_IV1_three_way_merge_working_tree_matches_snapshot(
595 self, tmp_path: pathlib.Path
596 ) -> None:
597 """IV1: after a clean merge, working tree files match the merged snapshot."""
598 root, repo_id = _init_repo(tmp_path)
599
600 content_a = b"A = 1\n"
601 content_b = b"B = 2\n"
602 a_id = _write_object(root, content_a)
603 b_id = _write_object(root, content_b)
604
605 base_c = _make_commit(root, repo_id, "main", "base", {"a.py": a_id})
606 (ref_path(root, "feat")).write_text(base_c)
607
608 a2_content = b"A = 'ours'\n"
609 a2_id = _write_object(root, a2_content)
610 _make_commit(root, repo_id, "main", "ours", {"a.py": a2_id})
611
612 _make_commit(root, repo_id, "feat", "theirs", {"a.py": a_id, "b.py": b_id})
613
614 code, out = _run(root, "merge", "feat")
615 assert code == 0, out
616
617 # After merge: working tree should have a.py (ours version) and b.py (theirs).
618 merged = _head_manifest(root, "main")
619 assert "b.py" in merged, "theirs-only b.py missing from merged snapshot"
620 assert merged.get("a.py") == a2_id, "ours change to a.py not preserved in merged snapshot"
621
622 def test_IV2_fast_forward_file_count_preserved(self, tmp_path: pathlib.Path) -> None:
623 """IV2: fast-forward merge preserves ALL files from the target branch."""
624 root, repo_id = _init_repo(tmp_path)
625
626 # Write 25 files.
627 manifest: Manifest = {}
628 for i in range(25):
629 oid = _write_file(root, f"x_{i} = {i}\n")
630 manifest[f"file_{i:02d}.py"] = oid
631
632 base_c = _make_commit(root, repo_id, "main", "base", {"start.py": _write_file(root, "x=0\n")})
633 (ref_path(root, "feat")).write_text(base_c)
634 _make_commit(root, repo_id, "feat", "feat: 25 files", manifest)
635
636 code, _out = _run(root, "merge", "feat")
637 assert code == 0
638
639 merged = _head_manifest(root, "main")
640 for path in manifest:
641 assert path in merged, f"DATA LOSS: {path} missing after fast-forward merge"
642
643 def test_IV3_three_way_merge_both_sides_preserved(self, tmp_path: pathlib.Path) -> None:
644 """IV3: ours-only AND theirs-only files both present in merged result."""
645 root, repo_id = _init_repo(tmp_path)
646
647 base_id = _write_file(root, "base = True\n")
648 base_c = _make_commit(root, repo_id, "main", "base", {"base.py": base_id})
649 (ref_path(root, "feat")).write_text(base_c)
650
651 ours_id = _write_file(root, "ours = True\n")
652 _make_commit(root, repo_id, "main", "ours", {"base.py": base_id, "ours_only.py": ours_id})
653
654 theirs_id = _write_file(root, "theirs = True\n")
655 _make_commit(root, repo_id, "feat", "theirs", {"base.py": base_id, "theirs_only.py": theirs_id})
656
657 code, out = _run(root, "merge", "feat")
658 assert code == 0, out
659
660 merged = _head_manifest(root, "main")
661 assert "ours_only.py" in merged, "ours-only file was dropped in merge"
662 assert "theirs_only.py" in merged, "theirs-only file was dropped in merge"
663 assert "base.py" in merged, "base file was dropped in merge"
664
665 def test_IV4_merge_commit_snapshot_not_empty(self, tmp_path: pathlib.Path) -> None:
666 """IV4: merge commit snapshot_id must never be SHA-256 of empty bytes."""
667 root, repo_id = _init_repo(tmp_path)
668
669 f0 = _write_file(root, "a = 0\n")
670 f1 = _write_file(root, "a = 1\n")
671 f2 = _write_file(root, "b = True\n")
672
673 base_c = _make_commit(root, repo_id, "main", "base", {"a.py": f0})
674 (ref_path(root, "feat")).write_text(base_c)
675 _make_commit(root, repo_id, "main", "ours", {"a.py": f1})
676 _make_commit(root, repo_id, "feat", "theirs", {"a.py": f0, "b.py": f2})
677
678 code, out = _run(root, "merge", "feat")
679 assert code == 0, out
680
681 from muse.core.commits import read_commit
682 mc = read_commit(root, _ref(root, "main"))
683 assert mc is not None
684 assert mc.snapshot_id != _SHA256_EMPTY, (
685 "DATA LOSS: merge commit snapshot_id is SHA-256 of empty bytes. "
686 "The merged manifest was empty — all files were or would be deleted."
687 )
688
689 def test_IV5_merge_commit_has_two_parents(self, tmp_path: pathlib.Path) -> None:
690 """IV5: three-way merge commit must record both parent commit IDs."""
691 root, repo_id = _init_repo(tmp_path)
692
693 f0 = _write_file(root, "a = 0\n")
694 base_c = _make_commit(root, repo_id, "main", "base", {"a.py": f0})
695 (ref_path(root, "feat")).write_text(base_c)
696
697 f1 = _write_file(root, "a = 1\n")
698 _make_commit(root, repo_id, "main", "ours", {"a.py": f1})
699 f2 = _write_file(root, "b = True\n")
700 _make_commit(root, repo_id, "feat", "theirs", {"a.py": f0, "b.py": f2})
701
702 _run(root, "merge", "feat")
703
704 from muse.core.commits import read_commit
705 mc = read_commit(root, _ref(root, "main"))
706 assert mc is not None
707 assert mc.parent2_commit_id is not None, (
708 "Three-way merge commit missing second parent — history will appear linear."
709 )
710
711 def test_IV6_strategy_ours_does_not_delete_theirs_only_files(
712 self, tmp_path: pathlib.Path
713 ) -> None:
714 """IV6: --strategy=ours must not delete theirs-only files from merged manifest."""
715 root, repo_id = _init_repo(tmp_path)
716
717 f0 = _write_file(root, "shared = 0\n")
718 base_c = _make_commit(root, repo_id, "main", "base", {"shared.py": f0})
719 (ref_path(root, "feat")).write_text(base_c)
720
721 f_ours = _write_file(root, "shared = 'ours'\n")
722 theirs_only = _write_file(root, "new_feat = True\n")
723 _make_commit(root, repo_id, "main", "ours change", {"shared.py": f_ours})
724 f_theirs = _write_file(root, "shared = 'theirs'\n")
725 _make_commit(root, repo_id, "feat", "theirs",
726 {"shared.py": f_theirs, "new_feat.py": theirs_only})
727
728 code, out = _run(root, "merge", "--strategy", "ours", "feat")
729 assert code == 0, out
730
731 merged = _head_manifest(root, "main")
732 assert merged.get("shared.py") == f_ours, "strategy=ours must keep ours version of conflict"
733 assert "new_feat.py" in merged, (
734 "REGRESSION: --strategy=ours deleted theirs-only new_feat.py. "
735 "Non-conflicting theirs additions must still appear in merged."
736 )
737
738 def test_IV7_strategy_theirs_does_not_delete_ours_only_files(
739 self, tmp_path: pathlib.Path
740 ) -> None:
741 """IV7: --strategy=theirs must not delete ours-only files from merged manifest."""
742 root, repo_id = _init_repo(tmp_path)
743
744 f0 = _write_file(root, "shared = 0\n")
745 base_c = _make_commit(root, repo_id, "main", "base", {"shared.py": f0})
746 (ref_path(root, "feat")).write_text(base_c)
747
748 ours_only = _write_file(root, "ours_new = True\n")
749 f_ours = _write_file(root, "shared = 'ours'\n")
750 _make_commit(root, repo_id, "main", "ours",
751 {"shared.py": f_ours, "ours_new.py": ours_only})
752
753 f_theirs = _write_file(root, "shared = 'theirs'\n")
754 _make_commit(root, repo_id, "feat", "theirs", {"shared.py": f_theirs})
755
756 code, out = _run(root, "merge", "--strategy", "theirs", "feat")
757 assert code == 0, out
758
759 merged = _head_manifest(root, "main")
760 assert merged.get("shared.py") == f_theirs, "strategy=theirs must keep theirs version"
761 assert "ours_new.py" in merged, (
762 "REGRESSION: --strategy=theirs deleted ours-only ours_new.py. "
763 "Non-conflicting ours additions must still appear in merged."
764 )
765
766
767 # ---------------------------------------------------------------------------
768 # Category V — apply_manifest safety
769 # ---------------------------------------------------------------------------
770
771
772 class TestApplyManifestSafetyV:
773 """apply_manifest layer must be precise and not corrupt the working tree."""
774
775 def test_V1_apply_manifest_writes_target_files(self, tmp_path: pathlib.Path) -> None:
776 """V1: apply_manifest restores files from the object store correctly."""
777 from muse.core.workdir import apply_manifest
778
779 root, _ = _init_repo(tmp_path)
780 content = b"HELLO = True\n"
781 oid = _write_object(root, content)
782 apply_manifest(root, {}, {"hello.py": oid})
783 assert (root / "hello.py").read_bytes() == content
784
785 def test_V2_apply_manifest_removes_files_not_in_target(self, tmp_path: pathlib.Path) -> None:
786 """V2: apply_manifest removes tracked files absent from target."""
787 from muse.core.workdir import apply_manifest
788
789 root, _ = _init_repo(tmp_path)
790 content = b"OLD = True\n"
791 oid = _write_object(root, content)
792 (root / "old.py").write_bytes(content)
793
794 new_content = b"NEW = True\n"
795 new_oid = _write_object(root, new_content)
796 apply_manifest(root, {"old.py": oid}, {"new.py": new_oid})
797
798 assert not (root / "old.py").exists(), "apply_manifest should remove tracked files not in target"
799 assert (root / "new.py").read_bytes() == new_content
800
801 def test_V3_apply_manifest_does_not_delete_muse_dir(self, tmp_path: pathlib.Path) -> None:
802 """V3: apply_manifest must never delete .muse/ regardless of target."""
803 from muse.core.workdir import apply_manifest
804
805 root, _ = _init_repo(tmp_path)
806 assert (muse_dir(root)).exists()
807
808 try:
809 apply_manifest(root, {}, {})
810 except (ValueError, SystemExit):
811 pass # Guard fired correctly.
812
813 assert (muse_dir(root)).exists(), ".muse/ was deleted by apply_manifest — critical failure"
814
815 def test_V4_apply_manifest_does_not_follow_symlinks(self, tmp_path: pathlib.Path) -> None:
816 """V4: symlinked files outside the repo are not deleted by apply_manifest."""
817 from muse.core.workdir import apply_manifest
818
819 repo_dir = tmp_path / "myrepo"
820 repo_dir.mkdir()
821 root, _ = _init_repo(repo_dir)
822
823 external = tmp_path / "external_file.txt"
824 external.write_bytes(b"I am external")
825
826 link = root / "link_to_external.py"
827 link.symlink_to(external)
828
829 try:
830 apply_manifest(root, {}, {})
831 except (ValueError, SystemExit):
832 pass # Guard fired — acceptable.
833
834 assert external.exists(), (
835 "apply_manifest followed a symlink outside the repo and deleted the target."
836 )
837
838
839 # ---------------------------------------------------------------------------
840 # Category VI — The exact regression scenario
841 # ---------------------------------------------------------------------------
842
843
844 class TestFormatMigrationRegressionVI:
845 """Reproduce the exact scenario that caused the data-loss incident.
846
847 When a snapshot object is unreadable (missing muse object header, wrong
848 format, or corrupt), the old code returned None and defaulted to {}.
849 All manifests resolved to {}, leading to an empty merge and file deletion.
850 """
851
852 def test_VI1_regression_snapshot_id_never_equals_sha256_empty_after_merge(
853 self, tmp_path: pathlib.Path
854 ) -> None:
855 """VI1: the data-loss sentinel must never appear in the commit graph.
856
857 Walk every commit in the graph after a merge and assert that no
858 snapshot_id equals e3b0c44… (SHA-256 of empty bytes).
859 """
860 root, repo_id = _init_repo(tmp_path)
861
862 # Build a real diverged graph with many files.
863 base_manifest = {f"src_{i}.py": _write_file(root, f"v = {i}\n") for i in range(10)}
864 base_c = _make_commit(root, repo_id, "main", "base", base_manifest)
865 (ref_path(root, "feat")).write_text(base_c)
866
867 ours_manifest = {**base_manifest, "ours.py": _write_file(root, "OURS = True\n")}
868 _make_commit(root, repo_id, "main", "ours", ours_manifest)
869
870 theirs_manifest = {**base_manifest, "theirs.py": _write_file(root, "THEIRS = True\n")}
871 _make_commit(root, repo_id, "feat", "theirs", theirs_manifest)
872
873 code, out = _run(root, "merge", "feat")
874 assert code == 0, out
875
876 # Walk the entire commit graph and check every snapshot_id.
877 from muse.core.commits import read_commit
878 visited: set[str] = set()
879 queue = [_ref(root, "main")]
880 while queue:
881 cid = queue.pop()
882 if cid in visited:
883 continue
884 visited.add(cid)
885 commit = read_commit(root, cid)
886 if commit is None:
887 continue
888 assert commit.snapshot_id != _SHA256_EMPTY, (
889 f"REGRESSION: commit {cid[:8]} has snapshot_id == SHA-256('') — "
890 "the data-loss sentinel. This commit has an empty manifest."
891 )
892 if commit.parent_commit_id:
893 queue.append(commit.parent_commit_id)
894 if commit.parent2_commit_id:
895 queue.append(commit.parent2_commit_id)
896
897 def test_VI2_merge_after_simulated_format_migration_aborts_not_deletes(
898 self, tmp_path: pathlib.Path
899 ) -> None:
900 """VI2: when the ours snapshot is in a corrupt/headerless format, merge must abort.
901
902 Simulates the data-loss scenario: a snapshot file written as plain JSON
903 (no muse object header) is unreadable, ours snapshot returns None.
904 The merge must abort rather than silently empty the working tree.
905 """
906 root, repo_id = _init_repo(tmp_path)
907
908 # Create 20 files in the working tree.
909 manifest: Manifest = {}
910 for i in range(20):
911 content = f"module_{i} = True\n".encode()
912 oid = _write_object(root, content)
913 (root / f"module_{i}.py").write_bytes(content)
914 manifest[f"module_{i}.py"] = oid
915
916 base_c = _make_commit(root, repo_id, "main", "base", manifest)
917 (ref_path(root, "feat")).write_text(base_c)
918
919 extra = _write_file(root, "extra = True\n")
920 _make_commit(root, repo_id, "feat", "feat adds extra",
921 {**manifest, "extra.py": extra})
922
923 bump = _write_file(root, "bump = True\n")
924 main_c = _make_commit(root, repo_id, "main", "main adds bump",
925 {**manifest, "bump.py": bump})
926
927 # Simulate format migration: overwrite ours snapshot with old JSON bytes.
928 from muse.core.commits import read_commit
929 main_commit = read_commit(root, main_c)
930 assert main_commit is not None
931 snap_path = object_path(root, main_commit.snapshot_id)
932 # Write JSON without the muse object header — parser will fail on this.
933 old_json_bytes = json.dumps({
934 "snapshot_id": main_commit.snapshot_id,
935 "manifest": {k: v for k, v in {**manifest, "bump.py": bump}.items()},
936 }).encode()
937 snap_path.write_bytes(old_json_bytes)
938
939 code, _out = _run_unchecked(root, "merge", "feat")
940
941 # Either the merge aborts (code != 0) OR it succeeds with correct content.
942 # What is NEVER acceptable: merging with an empty manifest that deletes files.
943 if code == 0:
944 merged = _head_manifest(root, "main")
945 assert merged != {}, (
946 "DATA LOSS: merge succeeded with an empty manifest. "
947 "All files were deleted because ours snapshot was unreadable (headerless JSON)."
948 )
949 # Must have non-trivially many files.
950 assert len(merged) >= len(manifest), (
951 f"DATA LOSS: merged has only {len(merged)} files, expected ≥ {len(manifest)}."
952 )
953 # If code != 0: correct behaviour (abort).
954
955 # Critical: the working-tree files must still exist.
956 for i in range(20):
957 assert (root / f"module_{i}.py").exists(), (
958 f"DATA LOSS: module_{i}.py was deleted when merge aborted due to unreadable snapshot."
959 )
960
961 def test_VI3_merge_commit_snapshot_id_matches_actual_files(
962 self, tmp_path: pathlib.Path
963 ) -> None:
964 """VI3: compute_snapshot_id of the merged manifest must equal the stored snapshot_id."""
965 from muse.core.ids import hash_snapshot as compute_snapshot_id
966 from muse.core.commits import read_commit
967 from muse.core.snapshots import read_snapshot
968
969 root, repo_id = _init_repo(tmp_path)
970
971 f0 = _write_file(root, "a = 0\n")
972 f1 = _write_file(root, "a = 1\n")
973 f2 = _write_file(root, "b = True\n")
974
975 base_c = _make_commit(root, repo_id, "main", "base", {"a.py": f0})
976 (ref_path(root, "feat")).write_text(base_c)
977 _make_commit(root, repo_id, "main", "ours", {"a.py": f1})
978 _make_commit(root, repo_id, "feat", "theirs", {"a.py": f0, "b.py": f2})
979
980 code, out = _run(root, "merge", "feat")
981 assert code == 0, out
982
983 commit = read_commit(root, _ref(root, "main"))
984 assert commit is not None
985 snap = read_snapshot(root, commit.snapshot_id)
986 assert snap is not None
987
988 recomputed = compute_snapshot_id(snap.manifest)
989 assert recomputed == commit.snapshot_id, (
990 "snapshot_id in the commit record doesn't match "
991 "compute_snapshot_id(snapshot.manifest). The snapshot is corrupt."
992 )
993
994
995 # ---------------------------------------------------------------------------
996 # Category VII — Stress tests
997 # ---------------------------------------------------------------------------
998
999
1000 class TestStressVII:
1001 """Extreme stress tests: large file counts, repeated merges, complex topologies."""
1002
1003 def test_VII1_100_file_clean_merge_all_files_preserved(self, tmp_path: pathlib.Path) -> None:
1004 """VII1: merge with 100 theirs-only file additions — none may be dropped."""
1005 root, repo_id = _init_repo(tmp_path)
1006
1007 base_id = _write_file(root, "base = True\n")
1008 base_c = _make_commit(root, repo_id, "main", "base", {"base.py": base_id})
1009 (ref_path(root, "feat")).write_text(base_c)
1010
1011 # ours: minor bump to base.py.
1012 bumped = _write_file(root, "base = 2\n")
1013 _make_commit(root, repo_id, "main", "ours: bump", {"base.py": bumped})
1014
1015 # theirs: 100 new files.
1016 theirs_manifest: Manifest = {"base.py": base_id}
1017 for i in range(100):
1018 oid = _write_file(root, f"mod_{i:03d} = True\n")
1019 theirs_manifest[f"mod_{i:03d}.py"] = oid
1020 _make_commit(root, repo_id, "feat", "theirs: 100 mods", theirs_manifest)
1021
1022 code, out = _run(root, "merge", "feat")
1023 assert code == 0, out
1024
1025 merged = _head_manifest(root, "main")
1026 dropped = [f"mod_{i:03d}.py" for i in range(100) if f"mod_{i:03d}.py" not in merged]
1027 assert not dropped, (
1028 f"DATA LOSS: {len(dropped)} of 100 theirs-only files dropped after merge: "
1029 f"{dropped[:5]}{'...' if len(dropped) > 5 else ''}"
1030 )
1031
1032 def test_VII2_repeated_merges_file_count_never_decreases(
1033 self, tmp_path: pathlib.Path
1034 ) -> None:
1035 """VII2: five sequential branch merges — total file count must be monotonically non-decreasing.
1036
1037 Each wave:
1038 - Branches from the current main HEAD (inheriting all previously merged files).
1039 - Adds 5 unique files ON TOP of the current main state.
1040 - Main is bumped with 1 unique file (true 3-way merge).
1041 - After merge: main must have all prior files + 5 wave files + 1 bump.
1042
1043 Expected final count: 1 (base) + 5 waves × 5 files + 5 bumps = 31.
1044 """
1045 root, repo_id = _init_repo(tmp_path)
1046
1047 base_id = _write_file(root, "base = True\n")
1048 _make_commit(root, repo_id, "main", "base", {"base.py": base_id})
1049 prev_count = 1
1050
1051 for wave in range(5):
1052 branch = f"wave_{wave}"
1053 # Branch from the current main HEAD — inherits all previously merged files.
1054 (ref_path(root, branch)).write_text(_ref(root, "main"))
1055
1056 # Wave branch: current main state + 5 new unique files.
1057 wave_manifest = dict(_head_manifest(root, "main"))
1058 for j in range(5):
1059 oid = _write_file(root, f"wave_{wave}_file_{j} = True\n")
1060 wave_manifest[f"w{wave}_{j}.py"] = oid
1061 _make_commit(root, repo_id, branch, f"wave {wave} adds 5 files", wave_manifest)
1062
1063 # Advance main with 1 unique file so this is a true 3-way merge.
1064 bump_manifest = dict(_head_manifest(root, "main"))
1065 bump_id = _write_file(root, f"main_bump_{wave} = True\n")
1066 bump_manifest[f"main_bump_{wave}.py"] = bump_id
1067 _make_commit(root, repo_id, "main", f"main bump {wave}", bump_manifest)
1068
1069 code, out = _run(root, "merge", branch)
1070 assert code == 0, f"Wave {wave} merge failed: {out}"
1071
1072 current_count = len(_head_manifest(root, "main"))
1073 assert current_count >= prev_count, (
1074 f"DATA LOSS after wave {wave}: file count decreased "
1075 f"from {prev_count} to {current_count}."
1076 )
1077 prev_count = current_count
1078
1079 # 1 base + 5 waves × 5 files + 5 bumps = 31.
1080 assert prev_count >= 31, (
1081 f"Expected at least 31 files after 5 waves, got {prev_count}."
1082 )
1083
1084 def test_VII3_diamond_topology_no_files_lost(self, tmp_path: pathlib.Path) -> None:
1085 """VII3: diamond merge topology — LCA is correctly found, no data lost.
1086
1087 Topology:
1088 C0 (base: 10 files)
1089 / \\
1090 C1 C2
1091 (ours adds 5) (theirs adds 5 different)
1092 \\ /
1093 merge → must have all 20 files
1094 """
1095 root, repo_id = _init_repo(tmp_path)
1096
1097 base_manifest = {f"base_{i}.py": _write_file(root, f"base_{i} = True\n") for i in range(10)}
1098 base_c = _make_commit(root, repo_id, "main", "C0", base_manifest)
1099 (ref_path(root, "feat")).write_text(base_c)
1100
1101 # C1: ours adds 5 files.
1102 c1_manifest = {**base_manifest}
1103 for i in range(5):
1104 c1_manifest[f"ours_{i}.py"] = _write_file(root, f"ours_{i} = True\n")
1105 _make_commit(root, repo_id, "main", "C1: ours adds 5", c1_manifest)
1106
1107 # C2: theirs adds 5 different files.
1108 c2_manifest = {**base_manifest}
1109 for i in range(5):
1110 c2_manifest[f"theirs_{i}.py"] = _write_file(root, f"theirs_{i} = True\n")
1111 _make_commit(root, repo_id, "feat", "C2: theirs adds 5", c2_manifest)
1112
1113 code, out = _run(root, "merge", "feat")
1114 assert code == 0, out
1115
1116 merged = _head_manifest(root, "main")
1117 assert len(merged) == 20, (
1118 f"DATA LOSS: expected 20 files after diamond merge, got {len(merged)}. "
1119 f"Missing: {sorted(set(list(c1_manifest) + list(c2_manifest)) - set(merged))}"
1120 )
1121
1122 def test_VII4_merge_with_deep_history_correct_lca(self, tmp_path: pathlib.Path) -> None:
1123 """VII4: 50-commit deep history — LCA found correctly, no files lost."""
1124 root, repo_id = _init_repo(tmp_path)
1125
1126 # Build 50 commits on main.
1127 f0 = _write_file(root, "anchor = 0\n")
1128 current_manifest: Manifest = {"anchor.py": f0}
1129 base_c = _make_commit(root, repo_id, "main", "C0", current_manifest)
1130
1131 for depth in range(49):
1132 fi = _write_file(root, f"depth_{depth} = True\n")
1133 current_manifest = {**current_manifest, f"depth_{depth}.py": fi}
1134 _make_commit(root, repo_id, "main", f"C{depth + 1}", current_manifest)
1135
1136 # Branch at the VERY END.
1137 tip_c = _ref(root, "main")
1138 (ref_path(root, "feat")).write_text(tip_c)
1139
1140 # Advance main by 1.
1141 main_extra = _write_file(root, "main_extra = True\n")
1142 main_manifest = {**current_manifest, "main_extra.py": main_extra}
1143 _make_commit(root, repo_id, "main", "main advance", main_manifest)
1144
1145 # Advance feat by 1.
1146 feat_extra = _write_file(root, "feat_extra = True\n")
1147 feat_manifest = {**current_manifest, "feat_extra.py": feat_extra}
1148 _make_commit(root, repo_id, "feat", "feat advance", feat_manifest)
1149
1150 code, out = _run(root, "merge", "feat")
1151 assert code == 0, out
1152
1153 merged = _head_manifest(root, "main")
1154 assert "main_extra.py" in merged, "ours-only main_extra.py lost in deep-history merge"
1155 assert "feat_extra.py" in merged, "theirs-only feat_extra.py lost in deep-history merge"
1156 # All 49 depth files must still be present.
1157 for depth in range(49):
1158 assert f"depth_{depth}.py" in merged, f"depth_{depth}.py lost in deep-history merge"
1159
1160 def test_VII5_stress_strategy_ours_100_theirs_files_all_preserved(
1161 self, tmp_path: pathlib.Path
1162 ) -> None:
1163 """VII5: --strategy=ours with 100 theirs-only additions — all must appear in merged.
1164
1165 The old strategy=ours bug took the entire ours manifest verbatim,
1166 discarding all theirs-only changes. This test ensures 100 theirs-only
1167 files survive even when strategy=ours is used to resolve conflicts.
1168 """
1169 root, repo_id = _init_repo(tmp_path)
1170
1171 shared_content = _write_file(root, "shared = 'base'\n")
1172 base_c = _make_commit(root, repo_id, "main", "base", {"shared.py": shared_content})
1173 (ref_path(root, "feat")).write_text(base_c)
1174
1175 ours_shared = _write_file(root, "shared = 'ours'\n")
1176 _make_commit(root, repo_id, "main", "ours: modify shared", {"shared.py": ours_shared})
1177
1178 # theirs: conflict on shared.py + 100 theirs-only additions.
1179 theirs_shared = _write_file(root, "shared = 'theirs'\n")
1180 theirs_manifest: Manifest = {"shared.py": theirs_shared}
1181 for i in range(100):
1182 oid = _write_file(root, f"extra_{i} = True\n")
1183 theirs_manifest[f"extra_{i:03d}.py"] = oid
1184 _make_commit(root, repo_id, "feat", "theirs: conflict + 100 extras", theirs_manifest)
1185
1186 code, out = _run(root, "merge", "--strategy", "ours", "feat")
1187 assert code == 0, out
1188
1189 merged = _head_manifest(root, "main")
1190 dropped = [f"extra_{i:03d}.py" for i in range(100) if f"extra_{i:03d}.py" not in merged]
1191 assert not dropped, (
1192 f"REGRESSION: --strategy=ours dropped {len(dropped)} theirs-only files: "
1193 f"{dropped[:5]}{'...' if len(dropped) > 5 else ''}"
1194 )
1195 assert merged.get("shared.py") == ours_shared, "--strategy=ours must keep ours version of conflict"
1196
1197 def test_VII6_interleaved_add_delete_all_correct(self, tmp_path: pathlib.Path) -> None:
1198 """VII6: interleaved adds and deletes on both sides — final manifest exactly correct."""
1199 root, repo_id = _init_repo(tmp_path)
1200
1201 # Base: files 0-19.
1202 base_manifest = {f"f{i:02d}.py": _write_file(root, f"f{i} = {i}\n") for i in range(20)}
1203 base_c = _make_commit(root, repo_id, "main", "base", base_manifest)
1204 (ref_path(root, "feat")).write_text(base_c)
1205
1206 # ours: delete even files (0,2,4…18), keep odd, add ours_new.
1207 ours_manifest = {k: v for k, v in base_manifest.items() if int(k[1:3]) % 2 == 1}
1208 ours_manifest["ours_new.py"] = _write_file(root, "OURS_NEW = True\n")
1209 _make_commit(root, repo_id, "main", "ours: delete evens, add ours_new", ours_manifest)
1210
1211 # theirs: delete files 0-9, keep 10-19, add theirs_new.
1212 theirs_manifest = {k: v for k, v in base_manifest.items() if int(k[1:3]) >= 10}
1213 theirs_manifest["theirs_new.py"] = _write_file(root, "THEIRS_NEW = True\n")
1214 _make_commit(root, repo_id, "feat", "theirs: delete f00-f09, add theirs_new", theirs_manifest)
1215
1216 code, out = _run(root, "merge", "feat")
1217 assert code == 0, out
1218
1219 merged = _head_manifest(root, "main")
1220
1221 # Files deleted by ours (evens 0-18): ours deleted, theirs may have kept some.
1222 # The three-way merge rule: if ours deleted and theirs didn't change → keep deleted.
1223 # Files deleted by theirs (0-9): theirs deleted, ours may have kept some.
1224
1225 # ours_new and theirs_new must both be present.
1226 assert "ours_new.py" in merged, "ours_new.py was lost in interleaved merge"
1227 assert "theirs_new.py" in merged, "theirs_new.py was lost in interleaved merge"
1228
1229 # No extra phantom files.
1230 for path in merged:
1231 assert path in ours_manifest or path in theirs_manifest or path in base_manifest or \
1232 path in ("ours_new.py", "theirs_new.py"), (
1233 f"Phantom file {path!r} in merged manifest — not from any input"
1234 )
1235
1236 def test_VII7_merge_output_is_deterministic(self, tmp_path: pathlib.Path) -> None:
1237 """VII7: merging the same two branches twice produces the same commit_id.
1238
1239 Because commit_id is computed from (parent_ids, snapshot_id, message, timestamp),
1240 two runs with the same timestamp must produce the same commit_id. This
1241 tests that the merge is truly deterministic.
1242 """
1243 from muse.core.merge_engine import apply_merge, detect_conflicts, diff_snapshots
1244 from muse.core.ids import hash_snapshot as compute_snapshot_id
1245
1246 # Build a deterministic merge scenario at the pure-function level.
1247 base = {"a.py": _h("a-base"), "b.py": _h("b-base")}
1248 ours = {"a.py": _h("a-ours"), "b.py": _h("b-base")}
1249 theirs = {"a.py": _h("a-base"), "b.py": _h("b-theirs"), "c.py": _h("c-new")}
1250
1251 ours_changed = diff_snapshots(base, ours)
1252 theirs_changed = diff_snapshots(base, theirs)
1253 conflicts = detect_conflicts(ours_changed, theirs_changed, ours, theirs)
1254 merged1 = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
1255 merged2 = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
1256
1257 assert merged1 == merged2, "apply_merge is not deterministic"
1258 assert compute_snapshot_id(merged1) == compute_snapshot_id(merged2), (
1259 "compute_snapshot_id is not deterministic"
1260 )
1261
1262 def test_VII8_dry_run_never_modifies_any_commit(self, tmp_path: pathlib.Path) -> None:
1263 """VII8: --dry-run must not write any commit or advance any branch ref."""
1264 root, repo_id = _init_repo(tmp_path)
1265
1266 f0 = _write_file(root, "a = 0\n")
1267 base_c = _make_commit(root, repo_id, "main", "base", {"a.py": f0})
1268 (ref_path(root, "feat")).write_text(base_c)
1269
1270 f1 = _write_file(root, "a = 1\n")
1271 main_c = _make_commit(root, repo_id, "main", "ours", {"a.py": f1})
1272
1273 f2 = _write_file(root, "b = True\n")
1274 _make_commit(root, repo_id, "feat", "theirs", {"a.py": f0, "b.py": f2})
1275
1276 objects_dir = muse_dir(root) / "objects" / "sha256"
1277 object_count_before = len([p for p in objects_dir.rglob("*") if p.is_file()])
1278
1279 code, _out = _run(root, "merge", "--dry-run", "feat")
1280 assert code == 0
1281
1282 object_count_after = len([p for p in objects_dir.rglob("*") if p.is_file()])
1283
1284 assert _ref(root, "main") == main_c, "--dry-run must not advance main HEAD"
1285 assert object_count_after == object_count_before, "--dry-run must not write any objects"
1286
1287 def test_VII9_no_ff_with_100_files_all_preserved(self, tmp_path: pathlib.Path) -> None:
1288 """VII9: --no-ff with 100-file fast-forward-eligible merge — all files preserved."""
1289 root, repo_id = _init_repo(tmp_path)
1290
1291 base_id = _write_file(root, "anchor = True\n")
1292 base_c = _make_commit(root, repo_id, "main", "base", {"anchor.py": base_id})
1293 (ref_path(root, "feat")).write_text(base_c)
1294
1295 large_manifest: Manifest = {"anchor.py": base_id}
1296 for i in range(100):
1297 oid = _write_file(root, f"big_{i:03d} = True\n")
1298 large_manifest[f"big_{i:03d}.py"] = oid
1299 _make_commit(root, repo_id, "feat", "feat: 100 files", large_manifest)
1300
1301 # --no-ff forces a three-way merge commit even though this is fast-forwardable.
1302 code, out = _run(root, "merge", "--no-ff", "feat")
1303 assert code == 0, out
1304
1305 merged = _head_manifest(root, "main")
1306 for i in range(100):
1307 assert f"big_{i:03d}.py" in merged, f"big_{i:03d}.py missing after --no-ff merge"
1308
1309 # Must have created a real merge commit (two parents).
1310 from muse.core.commits import read_commit
1311 mc = read_commit(root, _ref(root, "main"))
1312 assert mc is not None
1313 assert mc.parent2_commit_id is not None, "--no-ff must produce a merge commit with 2 parents"
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago