gabriel / muse public
test_workdir_integrity.py python
1,143 lines 43.9 KB
Raw
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8 fixing more broken tests Human patch 14 days ago
1 """Zero-data-loss workdir integrity tests.
2
3 What these tests cover
4 ----------------------
5 This suite was written after a real incident where the working tree diverged
6 from the committed snapshot. The root cause chain:
7
8 1. ``restore_object`` used ``shutil.copy2(src, dest)`` directly — not
9 atomic. A crash mid-copy could leave a corrupt destination file.
10 2. ``apply_manifest`` ignored the ``False`` return from ``restore_object``
11 when an object was absent from the store. The file was silently left at
12 its old content; no error surfaced.
13 3. ``_checkout_snapshot`` (incremental delta path) printed a warning when
14 an object was missing but continued — same silent data loss.
15 4. No post-operation integrity verification existed to catch any of the
16 above after the fact.
17
18 Fixes applied:
19 * ``restore_object`` — atomic write: temp file → ``os.replace``.
20 * ``apply_manifest`` — raises ``RuntimeError`` listing every missing object.
21 * ``_checkout_snapshot`` — raises ``SystemExit(INTERNAL_ERROR)`` on missing
22 object; never continues with a partial workdir.
23 * ``verify_workdir_integrity`` — new utility: full hash-based post-op audit.
24
25 Test categories
26 ---------------
27 I restore_object atomicity (temp+replace pattern).
28 II apply_manifest — missing object raises, not silently skips.
29 III verify_workdir_integrity — utility correctness.
30 IV checkout → workdir always matches target snapshot.
31 V fast-forward merge → workdir always matches target snapshot.
32 VI checkout aborts hard when an object is missing from the store.
33 VII Editor-cache simulation — status detects stale-cache workdir drift.
34 VIII Stress tests — 500-file repos, deep chains, diamond DAGs.
35 """
36
37 from __future__ import annotations
38
39 import hashlib
40 import json
41 import os
42 import pathlib
43 import shutil
44 import stat
45 import tempfile
46
47 import pytest
48 from tests.cli_test_helper import CliRunner
49
50 from muse.core.object_store import object_path, restore_object, write_object
51 from muse.core.snapshot import walk_workdir
52 from muse.core.workdir import apply_manifest, verify_workdir_integrity
53 from muse.core.types import Manifest, blob_id, content_hash, fake_id, hash_file
54 from muse.core.paths import muse_dir, ref_path
55
56 type _EnvMap = dict[str, str]
57
58 runner = CliRunner()
59 cli = None # CliRunner ignores this positional
60
61
62 # ---------------------------------------------------------------------------
63 # Shared helpers
64 # ---------------------------------------------------------------------------
65
66
67
68
69 def _env(root: pathlib.Path) -> _EnvMap:
70 return {"MUSE_REPO_ROOT": str(root)}
71
72
73 def _run(root: pathlib.Path, *args: str) -> tuple[int, str]:
74 final = list(args)
75 if final and final[0] == "merge" and "--force" not in final:
76 final.insert(1, "--force")
77 result = runner.invoke(cli, final, env=_env(root), catch_exceptions=False)
78 return result.exit_code, result.output
79
80
81 def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]:
82 final = list(args)
83 if final and final[0] == "merge" and "--force" not in final:
84 final.insert(1, "--force")
85 result = runner.invoke(cli, final, env=_env(root))
86 return result.exit_code, result.output
87
88
89 def _store_object(root: pathlib.Path, content: bytes) -> str:
90 """Write *content* to the object store, return its object ID."""
91 oid = blob_id(content)
92 write_object(root, oid, content)
93 return oid
94
95
96 def _object_path(root: pathlib.Path, oid: str) -> pathlib.Path:
97 return object_path(root, oid)
98
99
100 def _init_repo(tmp_path: pathlib.Path, domain: str = "code") -> tuple[pathlib.Path, str]:
101 dot_muse = muse_dir(tmp_path)
102 dot_muse.mkdir()
103 repo_id = fake_id("repo")
104 (dot_muse / "repo.json").write_text(json.dumps({
105 "repo_id": repo_id,
106 "domain": domain,
107 "version": "1.0.0",
108 }))
109 (dot_muse / "refs" / "heads").mkdir(parents=True)
110 (dot_muse / "objects").mkdir()
111 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
112 return tmp_path, repo_id
113
114
115 def _head_manifest(root: pathlib.Path, branch: str) -> Manifest:
116 from muse.core.commits import read_commit
117 from muse.core.snapshots import read_snapshot
118 ref = (ref_path(root, branch)).read_text().strip()
119 cr = read_commit(root, ref)
120 assert cr is not None
121 sr = read_snapshot(root, cr.snapshot_id)
122 assert sr is not None
123 return dict(sr.manifest)
124
125
126 def _write_disk(root: pathlib.Path, rel_path: str, content: bytes) -> str:
127 """Write content to disk AND store it; return object id."""
128 fp = root / rel_path
129 fp.parent.mkdir(parents=True, exist_ok=True)
130 fp.write_bytes(content)
131 return _store_object(root, content)
132
133
134 # ===========================================================================
135 # I restore_object atomicity
136 # ===========================================================================
137
138
139 class TestRestoreObjectAtomicityI:
140 """restore_object must use atomic writes so a crash mid-copy never
141 leaves a partial file at the destination."""
142
143 def test_I1_successful_restore_produces_correct_content(
144 self, tmp_path: pathlib.Path
145 ) -> None:
146 """I1: happy-path restore writes exact bytes to dest."""
147 root, _ = _init_repo(tmp_path)
148 content = b"hello world\n" * 100
149 oid = _store_object(root, content)
150 dest = tmp_path / "out.bin"
151 assert restore_object(root, oid, dest)
152 assert dest.read_bytes() == content
153
154 def test_I2_restore_overwrites_existing_file(self, tmp_path: pathlib.Path) -> None:
155 """I2: restore replaces whatever was at dest (no skip-if-exists)."""
156 root, _ = _init_repo(tmp_path)
157 old = b"old content\n"
158 new = b"new content\n"
159 dest = tmp_path / "f.txt"
160 dest.write_bytes(old)
161
162 oid = _store_object(root, new)
163 assert restore_object(root, oid, dest)
164 assert dest.read_bytes() == new
165
166 def test_I3_restore_missing_object_returns_false_does_not_touch_dest(
167 self, tmp_path: pathlib.Path
168 ) -> None:
169 """I3: missing object → False, pre-existing dest left intact."""
170 root, _ = _init_repo(tmp_path)
171 sentinel = b"sentinel\n"
172 dest = tmp_path / "existing.txt"
173 dest.write_bytes(sentinel)
174
175 fake_oid = blob_id(b"nonexistent")
176 assert not restore_object(root, fake_oid, dest)
177 assert dest.read_bytes() == sentinel
178
179 def test_I4_restore_creates_parent_directories(
180 self, tmp_path: pathlib.Path
181 ) -> None:
182 """I4: dest parent dirs are created automatically."""
183 root, _ = _init_repo(tmp_path)
184 content = b"deep\n"
185 oid = _store_object(root, content)
186 dest = tmp_path / "a" / "b" / "c" / "deep.txt"
187 assert not dest.parent.exists()
188 assert restore_object(root, oid, dest)
189 assert dest.read_bytes() == content
190
191 def test_I5_atomic_write_leaves_no_tmp_file_on_success(
192 self, tmp_path: pathlib.Path
193 ) -> None:
194 """I5: after a successful restore no .restore-tmp-* file lingers."""
195 root, _ = _init_repo(tmp_path)
196 content = b"data\n"
197 oid = _store_object(root, content)
198 dest = tmp_path / "target.txt"
199 restore_object(root, oid, dest)
200 tmps = list(tmp_path.glob(".restore-tmp-*"))
201 assert tmps == [], f"Stale tmp files found: {tmps}"
202
203 def test_I6_restore_hash_after_restore_matches_object_id(
204 self, tmp_path: pathlib.Path
205 ) -> None:
206 """I6: the restored file's SHA-256 matches the object_id exactly."""
207 root, _ = _init_repo(tmp_path)
208 content = b"integrity\n" * 1000
209 oid = _store_object(root, content)
210 dest = tmp_path / "verified.bin"
211 restore_object(root, oid, dest)
212 actual = hash_file(dest)
213 assert actual == oid, f"Hash mismatch after restore: {actual[:8]} ≠ {oid[:8]}"
214
215 def test_I7_restore_large_file_correct_content(
216 self, tmp_path: pathlib.Path
217 ) -> None:
218 """I7: 10 MiB blob survives a round-trip through the object store."""
219 root, _ = _init_repo(tmp_path)
220 content = os.urandom(10 * 1024 * 1024)
221 oid = _store_object(root, content)
222 dest = tmp_path / "large.bin"
223 assert restore_object(root, oid, dest)
224 assert dest.read_bytes() == content
225
226
227 # ===========================================================================
228 # II apply_manifest — missing object must raise, never silently skip
229 # ===========================================================================
230
231
232 class TestApplyManifestMissingObjectII:
233 """apply_manifest must fail loudly when any object is absent."""
234
235 def test_II1_missing_single_object_raises_runtime_error(
236 self, tmp_path: pathlib.Path
237 ) -> None:
238 """II1: one missing object → RuntimeError, not silent skip."""
239 root, _ = _init_repo(tmp_path)
240 fake_oid = blob_id(b"ghost")
241 with pytest.raises(RuntimeError, match="missing from the local store"):
242 apply_manifest(root, {}, {"ghost.txt": fake_oid})
243
244 def test_II2_error_message_names_the_missing_path(
245 self, tmp_path: pathlib.Path
246 ) -> None:
247 """II2: the error message includes the missing path name."""
248 root, _ = _init_repo(tmp_path)
249 fake_oid = blob_id(b"abc")
250 with pytest.raises(RuntimeError) as exc_info:
251 apply_manifest(root, {}, {"crucial/file.py": fake_oid})
252 assert "crucial/file.py" in str(exc_info.value)
253
254 def test_II3_partial_manifest_some_missing_raises(
255 self, tmp_path: pathlib.Path
256 ) -> None:
257 """II3: when one of N files is missing the whole call raises."""
258 root, _ = _init_repo(tmp_path)
259 good_oid = _write_disk(root, "exists.txt", b"ok\n")
260 bad_oid = blob_id(b"not stored")
261 with pytest.raises(RuntimeError):
262 apply_manifest(root, {}, {"exists.txt": good_oid, "missing.txt": bad_oid})
263
264 def test_II4_all_objects_present_succeeds(
265 self, tmp_path: pathlib.Path
266 ) -> None:
267 """II4: when every object is in the store apply_manifest succeeds."""
268 root, _ = _init_repo(tmp_path)
269 oid_a = _write_disk(root, "a.txt", b"aaa\n")
270 oid_b = _write_disk(root, "b.txt", b"bbb\n")
271 (root / "a.txt").unlink()
272 (root / "b.txt").unlink()
273 apply_manifest(root, {}, {"a.txt": oid_a, "b.txt": oid_b})
274 assert (root / "a.txt").read_bytes() == b"aaa\n"
275 assert (root / "b.txt").read_bytes() == b"bbb\n"
276
277 def test_II5_multiple_missing_reported_in_error(
278 self, tmp_path: pathlib.Path
279 ) -> None:
280 """II5: error message covers multiple missing files."""
281 root, _ = _init_repo(tmp_path)
282 manifest = {f"f{i}.py": blob_id(f"fake{i}".encode()) for i in range(10)}
283 with pytest.raises(RuntimeError) as exc_info:
284 apply_manifest(root, {}, manifest)
285 msg = str(exc_info.value)
286 assert "10 object(s)" in msg
287
288 def test_II6_apply_manifest_removes_files_not_in_target(
289 self, tmp_path: pathlib.Path
290 ) -> None:
291 """II6: tracked files absent from target manifest are deleted."""
292 root, _ = _init_repo(tmp_path)
293 keep_oid = _write_disk(root, "keep.txt", b"keep\n")
294 del_oid = _store_object(root, b"delete\n")
295 (root / "delete_me.txt").write_bytes(b"delete\n")
296 # delete_me.txt is in prev_manifest (was tracked) but not in target — must be removed
297 apply_manifest(
298 root,
299 {"keep.txt": keep_oid, "delete_me.txt": del_oid},
300 {"keep.txt": keep_oid},
301 )
302 assert not (root / "delete_me.txt").exists()
303 assert (root / "keep.txt").exists()
304
305 def test_II7_empty_manifest_non_empty_prev_raises_value_error(
306 self, tmp_path: pathlib.Path
307 ) -> None:
308 """II7: data-loss guard — empty target with non-empty prev_manifest raises ValueError."""
309 root, _ = _init_repo(tmp_path)
310 oid = _store_object(root, b"data\n")
311 with pytest.raises(ValueError, match="empty target_manifest"):
312 apply_manifest(root, {"file.txt": oid}, {})
313
314
315 # ===========================================================================
316 # III verify_workdir_integrity — utility correctness
317 # ===========================================================================
318
319
320 class TestVerifyWorkdirIntegrityIII:
321 """verify_workdir_integrity must catch every form of workdir drift."""
322
323 def test_III1_clean_workdir_returns_empty_list(
324 self, tmp_path: pathlib.Path
325 ) -> None:
326 """III1: workdir matches manifest → no mismatches."""
327 root, _ = _init_repo(tmp_path)
328 oid = _write_disk(root, "a.py", b"x = 1\n")
329 mismatches = verify_workdir_integrity(root, {"a.py": oid})
330 assert mismatches == []
331
332 def test_III2_modified_file_detected(self, tmp_path: pathlib.Path) -> None:
333 """III2: externally modified file shows up as mismatch."""
334 root, _ = _init_repo(tmp_path)
335 original = b"original\n"
336 oid = _write_disk(root, "f.py", original)
337
338 (root / "f.py").write_bytes(b"tampered\n")
339 mismatches = verify_workdir_integrity(root, {"f.py": oid})
340 assert len(mismatches) == 1
341 path, expected, actual = mismatches[0]
342 assert path == "f.py"
343 assert expected == oid
344 assert actual != oid
345 assert actual is not None
346
347 def test_III3_missing_file_detected(self, tmp_path: pathlib.Path) -> None:
348 """III3: file present in manifest but absent from disk → mismatch."""
349 root, _ = _init_repo(tmp_path)
350 oid = _write_disk(root, "gone.py", b"gone\n")
351 (root / "gone.py").unlink()
352 mismatches = verify_workdir_integrity(root, {"gone.py": oid})
353 assert any(m[0] == "gone.py" and m[2] is None for m in mismatches)
354
355 def test_III4_extra_tracked_file_detected(
356 self, tmp_path: pathlib.Path
357 ) -> None:
358 """III4: file on disk but not in manifest is also reported."""
359 root, _ = _init_repo(tmp_path)
360 oid = _write_disk(root, "tracked.py", b"ok\n")
361 (root / "extra.py").write_bytes(b"extra\n")
362 mismatches = verify_workdir_integrity(root, {"tracked.py": oid})
363 extras = [m for m in mismatches if m[0] == "extra.py"]
364 assert extras, "Extra file not reported"
365
366 def test_III5_empty_manifest_empty_workdir_clean(
367 self, tmp_path: pathlib.Path
368 ) -> None:
369 """III5: both manifest and workdir empty → clean."""
370 root, _ = _init_repo(tmp_path)
371 assert verify_workdir_integrity(root, {}) == []
372
373 def test_III6_multiple_mismatches_all_reported(
374 self, tmp_path: pathlib.Path
375 ) -> None:
376 """III6: all mismatches returned, not just the first."""
377 root, _ = _init_repo(tmp_path)
378 manifest: Manifest = {}
379 for i in range(20):
380 oid = _write_disk(root, f"f{i}.py", f"v={i}\n".encode())
381 manifest[f"f{i}.py"] = oid
382
383 # Tamper with half the files
384 for i in range(0, 20, 2):
385 (root / f"f{i}.py").write_bytes(b"tampered\n")
386
387 mismatches = verify_workdir_integrity(root, manifest)
388 assert len(mismatches) == 10, f"Expected 10 mismatches, got {len(mismatches)}"
389
390 def test_III7_correct_content_after_apply_manifest(
391 self, tmp_path: pathlib.Path
392 ) -> None:
393 """III7: after apply_manifest, verify_workdir_integrity is clean."""
394 root, _ = _init_repo(tmp_path)
395 oid_a = _write_disk(root, "a.py", b"a = 1\n")
396 oid_b = _write_disk(root, "b.py", b"b = 2\n")
397 (root / "a.py").unlink()
398 (root / "b.py").unlink()
399
400 apply_manifest(root, {}, {"a.py": oid_a, "b.py": oid_b})
401 mismatches = verify_workdir_integrity(root, {"a.py": oid_a, "b.py": oid_b})
402 assert mismatches == [], f"Expected clean after apply_manifest: {mismatches}"
403
404
405 # ===========================================================================
406 # IV checkout → workdir always matches the target snapshot
407 # ===========================================================================
408
409
410 class TestCheckoutWorkdirIntegrityIV:
411 """After muse checkout, the working tree must byte-for-byte match
412 the target branch's committed snapshot. No exceptions."""
413
414 def _full_roundtrip(self, tmp_path: pathlib.Path, n_files: int) -> None:
415 """Create two branches with different content, checkout between them,
416 verify integrity on each switch."""
417 root, repo_id = _init_repo(tmp_path)
418 code, _ = _run(root, "init", str(root))
419
420 main_manifest: Manifest = {}
421 for i in range(n_files):
422 oid = _write_disk(root, f"src/m{i}.py", f"# main {i}\n".encode())
423 main_manifest[f"src/m{i}.py"] = oid
424 code, out = _run(root, "commit", "--allow-empty", "-m", "main files")
425 assert code == 0, out
426
427 code, out = _run(root, "branch", "feat")
428 assert code == 0, out
429 code, out = _run(root, "checkout", "feat")
430 assert code == 0, out
431
432 feat_manifest: Manifest = {}
433 for i in range(n_files):
434 oid = _write_disk(root, f"src/f{i}.py", f"# feat {i}\n".encode())
435 feat_manifest[f"src/f{i}.py"] = oid
436 code, out = _run(root, "commit", "-m", "feat files")
437 assert code == 0, out
438
439 # Switch back to main — verify clean
440 code, out = _run(root, "checkout", "main")
441 assert code == 0, out
442 main_snap = _head_manifest(root, "main")
443 mismatches = verify_workdir_integrity(root, main_snap)
444 assert mismatches == [], (
445 f"DATA LOSS: {len(mismatches)} mismatch(es) after checkout main:\n"
446 f"{'\n'.join(f' {m}' for m in mismatches[:5])}"
447 )
448
449 # Switch to feat — verify clean
450 code, out = _run(root, "checkout", "feat")
451 assert code == 0, out
452 feat_snap = _head_manifest(root, "feat")
453 mismatches = verify_workdir_integrity(root, feat_snap)
454 assert mismatches == [], (
455 f"DATA LOSS: {len(mismatches)} mismatch(es) after checkout feat:\n"
456 f"{'\n'.join(f' {m}' for m in mismatches[:5])}"
457 )
458
459 def test_IV1_checkout_10_files_workdir_matches_snapshot(
460 self, tmp_path: pathlib.Path
461 ) -> None:
462 """IV1: 10-file repo checkout — workdir matches target snapshot."""
463 self._full_roundtrip(tmp_path, 10)
464
465 def test_IV2_checkout_50_files_workdir_matches_snapshot(
466 self, tmp_path: pathlib.Path
467 ) -> None:
468 """IV2: 50-file repo checkout — workdir matches target snapshot."""
469 self._full_roundtrip(tmp_path, 50)
470
471 def test_IV3_repeated_checkout_workdir_consistent(
472 self, tmp_path: pathlib.Path
473 ) -> None:
474 """IV3: switching back and forth 10 times never corrupts the workdir."""
475 root, repo_id = _init_repo(tmp_path)
476 _run(root, "init", str(root))
477
478 oid_a = _write_disk(root, "f.py", b"version_a\n")
479 _run(root, "commit", "--allow-empty", "-m", "main")
480 _run(root, "branch", "feat")
481 _run(root, "checkout", "feat")
482 oid_b = _write_disk(root, "f.py", b"version_b\n")
483 _run(root, "commit", "-m", "feat")
484
485 main_snap = _head_manifest(root, "main")
486 feat_snap = _head_manifest(root, "feat")
487
488 for i in range(10):
489 branch = "main" if i % 2 == 0 else "feat"
490 _run(root, "checkout", branch)
491 expected = main_snap if branch == "main" else feat_snap
492 mismatches = verify_workdir_integrity(root, expected)
493 assert mismatches == [], (
494 f"Iteration {i}: mismatch after checkout {branch}: {mismatches}"
495 )
496
497 def test_IV4_checkout_restores_file_modified_between_branches(
498 self, tmp_path: pathlib.Path
499 ) -> None:
500 """IV4: a file modified on one branch is correctly restored when
501 switching to a branch that has the original version."""
502 root, repo_id = _init_repo(tmp_path)
503 _run(root, "init", str(root))
504
505 oid_v1 = _write_disk(root, "shared.py", b"# v1\n")
506 _run(root, "commit", "--allow-empty", "-m", "base")
507 _run(root, "branch", "feat")
508
509 oid_v2 = _write_disk(root, "shared.py", b"# v2\n")
510 _run(root, "commit", "-m", "main v2")
511
512 _run(root, "checkout", "feat")
513 assert (root / "shared.py").read_bytes() == b"# v1\n", (
514 "feat branch should have v1 of shared.py"
515 )
516
517 _run(root, "checkout", "main")
518 assert (root / "shared.py").read_bytes() == b"# v2\n", (
519 "main branch should have v2 of shared.py"
520 )
521
522 def test_IV5_checkout_deletes_files_not_in_target_branch(
523 self, tmp_path: pathlib.Path
524 ) -> None:
525 """IV5: files added on one branch are deleted when switching away."""
526 root, repo_id = _init_repo(tmp_path)
527 _run(root, "init", str(root))
528
529 _write_disk(root, "base.py", b"base\n")
530 _run(root, "commit", "--allow-empty", "-m", "base")
531 _run(root, "branch", "feat")
532 _run(root, "checkout", "feat")
533
534 _write_disk(root, "feat_only.py", b"feat\n")
535 _run(root, "commit", "-m", "feat_only")
536
537 _run(root, "checkout", "main")
538 assert not (root / "feat_only.py").exists(), (
539 "feat-only file should not exist on main branch"
540 )
541
542
543 # ===========================================================================
544 # V fast-forward merge → workdir always matches target snapshot
545 # ===========================================================================
546
547
548 class TestFFMergeWorkdirIntegrityV:
549 """A fast-forward merge must update the working tree to match the
550 incoming branch's snapshot — ALL files, not just the delta."""
551
552 def test_V1_ff_merge_all_files_restored(self, tmp_path: pathlib.Path) -> None:
553 """V1: after FF merge every file in the target manifest is on disk
554 with the correct content."""
555 root, repo_id = _init_repo(tmp_path)
556 _run(root, "init", str(root))
557
558 _write_disk(root, "base.py", b"base\n")
559 _run(root, "commit", "--allow-empty", "-m", "base")
560 _run(root, "branch", "feat")
561 _run(root, "checkout", "feat")
562
563 manifest: Manifest = {}
564 for i in range(30):
565 oid = _write_disk(root, f"f{i}.py", f"# feat {i}\n".encode())
566 manifest[f"f{i}.py"] = oid
567 _run(root, "commit", "-m", "feat 30 files")
568
569 _run(root, "checkout", "main")
570 code, out = _run(root, "merge", "feat")
571 assert code == 0, out
572
573 merged = _head_manifest(root, "main")
574 mismatches = verify_workdir_integrity(root, merged)
575 assert mismatches == [], (
576 f"DATA LOSS after FF merge: {len(mismatches)} mismatch(es)\n"
577 f"{'\n'.join(f' {m}' for m in mismatches[:5])}"
578 )
579
580 def test_V2_ff_merge_correct_content_not_just_present(
581 self, tmp_path: pathlib.Path
582 ) -> None:
583 """V2: FF merge writes the correct *content*, not just the correct filename."""
584 root, repo_id = _init_repo(tmp_path)
585 _run(root, "init", str(root))
586
587 old_content = b"# old version\n"
588 new_content = b"# new version\n"
589
590 _write_disk(root, "important.py", old_content)
591 _run(root, "commit", "--allow-empty", "-m", "base")
592 _run(root, "branch", "feat")
593 _run(root, "checkout", "feat")
594
595 _write_disk(root, "important.py", new_content)
596 _run(root, "commit", "-m", "update important.py")
597
598 _run(root, "checkout", "main")
599 # File on disk is now old_content
600 assert (root / "important.py").read_bytes() == old_content
601
602 _run(root, "merge", "feat")
603 # File on disk must now be new_content
604 actual = (root / "important.py").read_bytes()
605 assert actual == new_content, (
606 f"FF merge did not restore correct content. "
607 f"Expected {new_content!r}, got {actual!r}"
608 )
609
610 def test_V3_ff_merge_workdir_matches_snapshot_byte_for_byte(
611 self, tmp_path: pathlib.Path
612 ) -> None:
613 """V3: verify_workdir_integrity confirms zero drift after FF merge."""
614 root, repo_id = _init_repo(tmp_path)
615 _run(root, "init", str(root))
616
617 _write_disk(root, "a.py", b"a\n")
618 _run(root, "commit", "--allow-empty", "-m", "base")
619 _run(root, "branch", "feat")
620 _run(root, "checkout", "feat")
621
622 for i in range(20):
623 _write_disk(root, f"feat_{i}.py", f"feat{i}\n".encode())
624 _run(root, "commit", "-m", "feat 20 files")
625
626 _run(root, "checkout", "main")
627 _run(root, "merge", "feat")
628
629 merged_snap = _head_manifest(root, "main")
630 mismatches = verify_workdir_integrity(root, merged_snap)
631 assert mismatches == []
632
633
634 # ===========================================================================
635 # VI checkout aborts hard when an object is missing from the store
636 # ===========================================================================
637
638
639 class TestCheckoutMissingObjectVI:
640 """Checkout must refuse to proceed when an object it needs is absent
641 from the local object store. The partial-checkout silent-data-loss
642 path is now closed."""
643
644 def test_VI1_checkout_to_branch_with_missing_object_aborts(
645 self, tmp_path: pathlib.Path
646 ) -> None:
647 """VI1: if a required object is purged from the store, checkout
648 exits non-zero and does NOT silently leave the workdir in a
649 partially restored state."""
650 root, repo_id = _init_repo(tmp_path)
651 _run(root, "init", str(root))
652
653 _write_disk(root, "base.py", b"base\n")
654 _run(root, "commit", "--allow-empty", "-m", "base")
655 _run(root, "branch", "feat")
656 _run(root, "checkout", "feat")
657
658 content = b"# feat file\n"
659 oid = _write_disk(root, "feat.py", content)
660 _run(root, "commit", "-m", "feat")
661 _run(root, "checkout", "main")
662
663 # Purge the feat.py object from the store — simulates a corruption
664 obj = _object_path(root, oid)
665 obj.unlink()
666
667 code, out = _run_unchecked(root, "checkout", "feat")
668 assert code != 0, (
669 "Checkout should fail when a required object is missing from the store"
670 )
671
672 def test_VI2_apply_manifest_raises_on_missing_object_not_silent(
673 self, tmp_path: pathlib.Path
674 ) -> None:
675 """VI2: apply_manifest raises RuntimeError (not returns None or logs
676 a warning) when an object is missing."""
677 root, _ = _init_repo(tmp_path)
678 ghost_oid = blob_id(b"not in store")
679 with pytest.raises(RuntimeError) as exc_info:
680 apply_manifest(root, {}, {"ghost.py": ghost_oid})
681 assert "missing from the local store" in str(exc_info.value)
682
683 def test_VI3_ff_merge_aborts_if_incoming_object_missing(
684 self, tmp_path: pathlib.Path
685 ) -> None:
686 """VI3: FF merge aborts if an object from the target snapshot is
687 not in the local store."""
688 root, repo_id = _init_repo(tmp_path)
689 _run(root, "init", str(root))
690
691 _write_disk(root, "base.py", b"base\n")
692 _run(root, "commit", "--allow-empty", "-m", "base")
693 _run(root, "branch", "feat")
694 _run(root, "checkout", "feat")
695
696 content = b"# critical\n"
697 oid = _write_disk(root, "critical.py", content)
698 _run(root, "commit", "-m", "critical")
699 _run(root, "checkout", "main")
700
701 # Purge the object after committing
702 _object_path(root, oid).unlink()
703
704 code, out = _run_unchecked(root, "merge", "feat")
705 assert code != 0, "FF merge should fail when a target object is missing"
706
707
708 # ===========================================================================
709 # VII Editor-cache simulation — status must detect stale-cache workdir drift
710 # ===========================================================================
711
712
713 class TestEditorCacheSimulationVII:
714 """Simulates the exact incident: the editor had a cached (stale) version
715 of a file. After a merge updated the on-disk file, the editor wrote the
716 stale version back, corrupting the workdir.
717
718 Muse cannot prevent an editor from writing stale data, but it CAN detect
719 the drift via `muse status` (which compares workdir hashes against HEAD)
720 and via `verify_workdir_integrity`.
721 """
722
723 def test_VII1_status_detects_workdir_drift_after_external_write(
724 self, tmp_path: pathlib.Path
725 ) -> None:
726 """VII1: if a file is externally overwritten to an old version,
727 muse status reports it as modified."""
728 root, repo_id = _init_repo(tmp_path)
729 _run(root, "init", str(root))
730
731 old_content = b"# version 1\n"
732 new_content = b"# version 2\n"
733
734 _write_disk(root, "plugin.py", old_content)
735 _run(root, "commit", "--allow-empty", "-m", "v1")
736 _run(root, "branch", "feat")
737 _run(root, "checkout", "feat")
738
739 _write_disk(root, "plugin.py", new_content)
740 _run(root, "commit", "-m", "v2")
741 _run(root, "checkout", "main")
742 _run(root, "merge", "feat")
743
744 # HEAD now says plugin.py = new_content.
745 # Simulate editor writing back the stale old version.
746 (root / "plugin.py").write_bytes(old_content)
747
748 code, out = _run(root, "status")
749 assert code == 0
750 assert "plugin.py" in out, (
751 "muse status must report plugin.py as modified after stale write"
752 )
753
754 def test_VII2_verify_workdir_integrity_catches_stale_editor_write(
755 self, tmp_path: pathlib.Path
756 ) -> None:
757 """VII2: verify_workdir_integrity spots the stale-editor-cache corruption."""
758 root, repo_id = _init_repo(tmp_path)
759 _run(root, "init", str(root))
760
761 old_content = b"# old\n"
762 new_content = b"# new\n"
763
764 oid_new = _write_disk(root, "plugin.py", old_content)
765 _run(root, "commit", "--allow-empty", "-m", "base")
766 _run(root, "branch", "feat")
767 _run(root, "checkout", "feat")
768
769 oid_new = _write_disk(root, "plugin.py", new_content)
770 _run(root, "commit", "-m", "feat")
771 _run(root, "checkout", "main")
772 _run(root, "merge", "feat")
773
774 # Simulate editor stale write
775 (root / "plugin.py").write_bytes(old_content)
776
777 head_snap = _head_manifest(root, "main")
778 mismatches = verify_workdir_integrity(root, head_snap)
779 assert any(m[0] == "plugin.py" for m in mismatches), (
780 "verify_workdir_integrity must detect the stale file"
781 )
782
783 def test_VII3_correct_state_after_reapplying_manifest(
784 self, tmp_path: pathlib.Path
785 ) -> None:
786 """VII3: after detecting stale-editor drift, re-applying the manifest
787 restores correct state and verify_workdir_integrity is clean."""
788 root, repo_id = _init_repo(tmp_path)
789 _run(root, "init", str(root))
790
791 _write_disk(root, "plugin.py", b"# old\n")
792 _run(root, "commit", "--allow-empty", "-m", "base")
793 _run(root, "branch", "feat")
794 _run(root, "checkout", "feat")
795
796 _write_disk(root, "plugin.py", b"# new\n")
797 _run(root, "commit", "-m", "feat")
798 _run(root, "checkout", "main")
799 _run(root, "merge", "feat")
800
801 # Stale write
802 (root / "plugin.py").write_bytes(b"# stale\n")
803
804 head_snap = _head_manifest(root, "main")
805 # Repair by re-applying the manifest
806 apply_manifest(root, head_snap, head_snap)
807 mismatches = verify_workdir_integrity(root, head_snap)
808 assert mismatches == [], "After re-applying manifest, workdir must be clean"
809
810
811 # ===========================================================================
812 # VIII Stress tests
813 # ===========================================================================
814
815
816 class TestStressWorkdirVIII:
817 """High-volume, adversarial scenarios to eliminate the entire class of
818 workdir corruption bugs."""
819
820 def test_VIII1_500_file_checkout_all_match_snapshot(
821 self, tmp_path: pathlib.Path
822 ) -> None:
823 """VIII1: 500-file repo — every file matches the committed snapshot
824 after every checkout."""
825 root, repo_id = _init_repo(tmp_path)
826 _run(root, "init", str(root))
827
828 manifest_main: Manifest = {}
829 for i in range(500):
830 oid = _write_disk(root, f"main_{i:04d}.py", f"# main {i}\n".encode())
831 manifest_main[f"main_{i:04d}.py"] = oid
832 code, out = _run(root, "commit", "--allow-empty", "-m", "main 500")
833 assert code == 0, out
834
835 _run(root, "branch", "feat")
836 _run(root, "checkout", "feat")
837
838 manifest_feat: Manifest = {}
839 for i in range(500):
840 oid = _write_disk(root, f"feat_{i:04d}.py", f"# feat {i}\n".encode())
841 manifest_feat[f"feat_{i:04d}.py"] = oid
842 code, out = _run(root, "commit", "-m", "feat 500")
843 assert code == 0, out
844
845 _run(root, "checkout", "main")
846 snp = _head_manifest(root, "main")
847 mismatches = verify_workdir_integrity(root, snp)
848 assert mismatches == [], f"{len(mismatches)} mismatch(es) on main"
849
850 _run(root, "checkout", "feat")
851 snp = _head_manifest(root, "feat")
852 mismatches = verify_workdir_integrity(root, snp)
853 assert mismatches == [], f"{len(mismatches)} mismatch(es) on feat"
854
855 def test_VIII2_ff_merge_500_files_all_correct(
856 self, tmp_path: pathlib.Path
857 ) -> None:
858 """VIII2: FF merge with 500 incoming files — all correct after merge."""
859 root, repo_id = _init_repo(tmp_path)
860 _run(root, "init", str(root))
861
862 _write_disk(root, "base.py", b"base\n")
863 _run(root, "commit", "--allow-empty", "-m", "base")
864 _run(root, "branch", "feat")
865 _run(root, "checkout", "feat")
866
867 expected: Manifest = {}
868 for i in range(500):
869 content = f"# file {i} unique content {content_hash({'i': i})}\n".encode()
870 oid = _write_disk(root, f"feat_{i:04d}.py", content)
871 expected[f"feat_{i:04d}.py"] = oid
872 _run(root, "commit", "-m", "feat 500")
873
874 _run(root, "checkout", "main")
875 code, out = _run(root, "merge", "feat")
876 assert code == 0, out
877
878 merged = _head_manifest(root, "main")
879 mismatches = verify_workdir_integrity(root, merged)
880 assert mismatches == [], (
881 f"DATA LOSS: {len(mismatches)} file(s) wrong after 500-file FF merge"
882 )
883
884 def test_VIII3_alternating_checkout_never_drifts(
885 self, tmp_path: pathlib.Path
886 ) -> None:
887 """VIII3: 20 alternating checkout cycles — workdir never drifts."""
888 root, repo_id = _init_repo(tmp_path)
889 _run(root, "init", str(root))
890
891 oid_a = _write_disk(root, "shared.py", b"# version A\n")
892 _run(root, "commit", "--allow-empty", "-m", "main")
893 snap_main = _head_manifest(root, "main")
894
895 _run(root, "branch", "feat")
896 _run(root, "checkout", "feat")
897 oid_b = _write_disk(root, "shared.py", b"# version B\n")
898 _run(root, "commit", "-m", "feat")
899 snap_feat = _head_manifest(root, "feat")
900
901 for cycle in range(20):
902 branch = "main" if cycle % 2 == 0 else "feat"
903 code, out = _run(root, "checkout", branch)
904 assert code == 0, f"Cycle {cycle}: checkout {branch} failed"
905 expected = snap_main if branch == "main" else snap_feat
906 mismatches = verify_workdir_integrity(root, expected)
907 assert mismatches == [], (
908 f"Cycle {cycle}, branch {branch}: {len(mismatches)} mismatch(es)"
909 )
910
911 def test_VIII4_deep_chain_checkout_base_restores_correctly(
912 self, tmp_path: pathlib.Path
913 ) -> None:
914 """VIII4: deep commit chain — checkout of base commit restores correctly."""
915 root, repo_id = _init_repo(tmp_path)
916 _run(root, "init", str(root))
917
918 # Build a chain of 20 commits, each modifying the same file
919 for i in range(20):
920 _write_disk(root, "evolving.py", f"# iteration {i}\n".encode())
921 _run(root, "commit", "--allow-empty", "-m", f"iter {i}")
922
923 snap = _head_manifest(root, "main")
924 assert (root / "evolving.py").read_bytes() == b"# iteration 19\n"
925 mismatches = verify_workdir_integrity(root, snap)
926 assert mismatches == []
927
928 def test_VIII5_stress_apply_manifest_100_times_deterministic(
929 self, tmp_path: pathlib.Path
930 ) -> None:
931 """VIII5: apply_manifest called 100 times is deterministic and
932 always produces an identical workdir."""
933 root, _ = _init_repo(tmp_path)
934 manifest: Manifest = {}
935 for i in range(50):
936 oid = _write_disk(root, f"f{i}.py", f"content {i}\n".encode())
937 manifest[f"f{i}.py"] = oid
938
939 for trial in range(100):
940 apply_manifest(root, manifest, manifest)
941 mismatches = verify_workdir_integrity(root, manifest)
942 assert mismatches == [], (
943 f"Trial {trial}: {len(mismatches)} mismatch(es) after apply_manifest"
944 )
945
946 def test_VIII6_diamond_topology_workdir_clean_after_all_merges(
947 self, tmp_path: pathlib.Path
948 ) -> None:
949 """VIII6: diamond merge topology — verify integrity at every step."""
950 root, repo_id = _init_repo(tmp_path)
951 _run(root, "init", str(root))
952
953 # Base
954 _write_disk(root, "base.py", b"base\n")
955 _run(root, "commit", "--allow-empty", "-m", "base")
956
957 # Left branch
958 _run(root, "branch", "left")
959 _run(root, "checkout", "left")
960 _write_disk(root, "left.py", b"left\n")
961 _run(root, "commit", "-m", "left")
962
963 # Right branch (from main)
964 _run(root, "checkout", "main")
965 _run(root, "branch", "right")
966 _run(root, "checkout", "right")
967 _write_disk(root, "right.py", b"right\n")
968 _run(root, "commit", "-m", "right")
969
970 # Merge left → main
971 _run(root, "checkout", "main")
972 code, out = _run(root, "merge", "left")
973 assert code == 0, out
974 snp = _head_manifest(root, "main")
975 assert verify_workdir_integrity(root, snp) == []
976
977 # Merge right → main
978 code, out = _run(root, "merge", "right")
979 assert code == 0, out
980 snp = _head_manifest(root, "main")
981 assert verify_workdir_integrity(root, snp) == []
982
983 def test_VIII7_binary_files_survive_checkout(
984 self, tmp_path: pathlib.Path
985 ) -> None:
986 """VIII7: binary content (random bytes) survives checkout intact."""
987 root, repo_id = _init_repo(tmp_path)
988 _run(root, "init", str(root))
989
990 binary = os.urandom(1024 * 512) # 512 KiB of random bytes
991 oid = _write_disk(root, "data.bin", binary)
992 _run(root, "commit", "--allow-empty", "-m", "binary")
993
994 _run(root, "branch", "feat")
995 _run(root, "checkout", "feat")
996 _write_disk(root, "other.py", b"other\n")
997 _run(root, "commit", "-m", "other")
998
999 _run(root, "checkout", "main")
1000 snp = _head_manifest(root, "main")
1001 mismatches = verify_workdir_integrity(root, snp)
1002 assert mismatches == []
1003 assert (root / "data.bin").read_bytes() == binary
1004
1005 def test_VIII8_unicode_filenames_survive_checkout(
1006 self, tmp_path: pathlib.Path
1007 ) -> None:
1008 """VIII8: files with unicode path components survive checkout."""
1009 root, repo_id = _init_repo(tmp_path)
1010 _run(root, "init", str(root))
1011
1012 paths = [
1013 "src/módulo.py",
1014 "src/données.txt",
1015 "src/файл.py",
1016 ]
1017 for p in paths:
1018 _write_disk(root, p, f"# {p}\n".encode())
1019 _run(root, "commit", "--allow-empty", "-m", "unicode paths")
1020
1021 _run(root, "branch", "feat")
1022 _run(root, "checkout", "feat")
1023 _write_disk(root, "extra.py", b"extra\n")
1024 _run(root, "commit", "-m", "extra")
1025
1026 _run(root, "checkout", "main")
1027 snp = _head_manifest(root, "main")
1028 mismatches = verify_workdir_integrity(root, snp)
1029 assert mismatches == []
1030
1031 def test_VIII9_no_data_loss_after_100_consecutive_commits(
1032 self, tmp_path: pathlib.Path
1033 ) -> None:
1034 """VIII9: 100 consecutive commits on main — verify final state is correct."""
1035 root, repo_id = _init_repo(tmp_path)
1036 _run(root, "init", str(root))
1037
1038 for i in range(100):
1039 _write_disk(root, f"file_{i:03d}.py", f"# commit {i}\n".encode())
1040 code, out = _run(root, "commit", "--allow-empty", "-m", f"commit {i}")
1041 assert code == 0, f"Commit {i} failed: {out}"
1042
1043 snp = _head_manifest(root, "main")
1044 assert len(snp) == 100, f"Expected 100 files, got {len(snp)}"
1045 mismatches = verify_workdir_integrity(root, snp)
1046 assert mismatches == [], f"{len(mismatches)} mismatch(es) after 100 commits"
1047
1048
1049 # ===========================================================================
1050 # IX apply_manifest must not delete untracked files
1051 # ===========================================================================
1052
1053
1054 class TestApplyManifestUntrackedFilesIX:
1055 """Untracked files must survive apply_manifest regardless of target manifest.
1056
1057 Root cause of the bug: apply_manifest used walk_workdir(root) to build
1058 current_files, which returns ALL files on disk — including files the user
1059 created that were never committed. The fix: use prev_manifest.keys() as
1060 the deletion candidate set so only previously-tracked files are candidates.
1061 """
1062
1063 def test_IX1_untracked_file_not_deleted_by_apply_manifest(
1064 self, tmp_path: pathlib.Path
1065 ) -> None:
1066 """IX1: a file never in any manifest must survive apply_manifest."""
1067 root, _ = _init_repo(tmp_path)
1068
1069 # prev state: file A is tracked
1070 oid_a = _write_disk(root, "a.py", b"a = 1\n")
1071 # target state: file B is tracked
1072 oid_b = _store_object(root, b"b = 2\n")
1073 (root / "b.py").write_bytes(b"b = 2\n")
1074
1075 # untracked file — never in any manifest
1076 untracked = root / "notes.txt"
1077 untracked.write_bytes(b"my personal notes\n")
1078
1079 apply_manifest(root, {"a.py": oid_a}, {"b.py": oid_b})
1080
1081 assert untracked.exists(), "untracked file was deleted by apply_manifest — data loss bug"
1082 assert (root / "b.py").read_bytes() == b"b = 2\n"
1083 assert not (root / "a.py").exists(), "a.py was tracked then removed — must be deleted"
1084
1085 def test_IX2_untracked_dotfile_not_deleted(
1086 self, tmp_path: pathlib.Path
1087 ) -> None:
1088 """IX2: untracked dotfiles (e.g. spec docs written by Write tool) must survive."""
1089 root, _ = _init_repo(tmp_path)
1090
1091 oid_a = _write_disk(root, "main.py", b"x = 1\n")
1092 oid_b = _store_object(root, b"x = 2\n")
1093 (root / "main.py").write_bytes(b"x = 2\n")
1094
1095 spec_doc = root / "docs" / "spec.md"
1096 spec_doc.parent.mkdir()
1097 spec_doc.write_bytes(b"# spec\n")
1098
1099 apply_manifest(root, {"main.py": oid_a}, {"main.py": oid_b})
1100
1101 assert spec_doc.exists(), "untracked doc file was deleted by apply_manifest"
1102
1103 def test_IX3_tracked_file_removed_from_target_gets_deleted(
1104 self, tmp_path: pathlib.Path
1105 ) -> None:
1106 """IX3: files in prev_manifest but not in target must still be deleted."""
1107 root, _ = _init_repo(tmp_path)
1108
1109 oid_a = _write_disk(root, "gone.py", b"gone\n")
1110 oid_b = _write_disk(root, "kept.py", b"kept\n")
1111
1112 apply_manifest(root, {"gone.py": oid_a, "kept.py": oid_b}, {"kept.py": oid_b})
1113
1114 assert not (root / "gone.py").exists(), "tracked file removed from target must be deleted"
1115 assert (root / "kept.py").exists()
1116
1117 def test_IX4_commit_does_not_delete_untracked_file(
1118 self, tmp_path: pathlib.Path
1119 ) -> None:
1120 """IX4: muse commit must not delete untracked files from the working tree."""
1121 root, repo_id = _init_repo(tmp_path)
1122 _run(root, "init", str(root))
1123
1124 # First commit with one tracked file
1125 _write_disk(root, "tracked.py", b"x = 1\n")
1126 code, out = _run(root, "commit", "-m", "initial")
1127 assert code == 0, out
1128
1129 # Write an untracked file (simulates Write tool creating a spec doc)
1130 untracked = root / "docs" / "spec.md"
1131 untracked.parent.mkdir()
1132 untracked.write_bytes(b"# my spec\n")
1133 assert untracked.exists()
1134
1135 # Modify tracked file and commit
1136 (root / "tracked.py").write_bytes(b"x = 2\n")
1137 code, out = _run(root, "commit", "-m", "update")
1138 assert code == 0, out
1139
1140 assert untracked.exists(), (
1141 "muse commit deleted an untracked file — data loss bug. "
1142 "apply_manifest must not delete files absent from prev_manifest."
1143 )
File History 1 commit
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8 fixing more broken tests Human patch 14 days ago