gabriel / muse public
test_workdir_integrity.py python
1,164 lines 44.7 KB
Raw
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8 fixing more broken tests Human patch 2 days ago
1 """Zero-data-loss workdir integrity tests.
2
3 What these tests cover
4 ----------------------
5 This suite was written after a real incident where the working tree diverged
6 from the committed snapshot. The root cause chain:
7
8 1. ``restore_object`` used ``shutil.copy2(src, dest)`` directly — not
9 atomic. A crash mid-copy could leave a corrupt destination file.
10 2. ``apply_manifest`` ignored the ``False`` return from ``restore_object``
11 when an object was absent from the store. The file was silently left at
12 its old content; no error surfaced.
13 3. ``_checkout_snapshot`` (incremental delta path) printed a warning when
14 an object was missing but continued — same silent data loss.
15 4. No post-operation integrity verification existed to catch any of the
16 above after the fact.
17
18 Fixes applied:
19 * ``restore_object`` — atomic write: temp file → ``os.replace``.
20 * ``apply_manifest`` — raises ``RuntimeError`` listing every missing object.
21 * ``_checkout_snapshot`` — raises ``SystemExit(INTERNAL_ERROR)`` on missing
22 object; never continues with a partial workdir.
23 * ``verify_workdir_integrity`` — new utility: full hash-based post-op audit.
24
25 Test categories
26 ---------------
27 I restore_object atomicity (temp+replace pattern).
28 II apply_manifest — missing object raises, not silently skips.
29 III verify_workdir_integrity — utility correctness.
30 IV checkout → workdir always matches target snapshot.
31 V fast-forward merge → workdir always matches target snapshot.
32 VI checkout aborts hard when an object is missing from the store.
33 VII Editor-cache simulation — status detects stale-cache workdir drift.
34 VIII Stress tests — 500-file repos, deep chains, diamond DAGs.
35 """
36
37 from __future__ import annotations
38
39 import hashlib
40 import json
41 import os
42 import pathlib
43 import shutil
44 import stat
45 import tempfile
46
47 import pytest
48 from tests.cli_test_helper import CliRunner
49
50 from muse.core.object_store import object_path, restore_object, write_object
51 from muse.core.snapshot import walk_workdir
52 from muse.core.workdir import apply_manifest, verify_workdir_integrity
53 from muse.core.types import Manifest, blob_id, content_hash, fake_id, hash_file
54 from muse.core.paths import muse_dir, ref_path
55
56 type _EnvMap = dict[str, str]
57
58 runner = CliRunner()
59 cli = None # CliRunner ignores this positional
60
61
62 # ---------------------------------------------------------------------------
63 # Shared helpers
64 # ---------------------------------------------------------------------------
65
66
67
68
69 def _env(root: pathlib.Path) -> _EnvMap:
70 return {"MUSE_REPO_ROOT": str(root)}
71
72
73 def _run(root: pathlib.Path, *args: str) -> tuple[int, str]:
74 final = list(args)
75 if final and final[0] == "merge" and "--force" not in final:
76 final.insert(1, "--force")
77 result = runner.invoke(cli, final, env=_env(root), catch_exceptions=False)
78 return result.exit_code, result.output
79
80
81 def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]:
82 final = list(args)
83 if final and final[0] == "merge" and "--force" not in final:
84 final.insert(1, "--force")
85 result = runner.invoke(cli, final, env=_env(root))
86 return result.exit_code, result.output
87
88
89 def _store_object(root: pathlib.Path, content: bytes) -> str:
90 """Write *content* to the object store, return its object ID."""
91 oid = blob_id(content)
92 write_object(root, oid, content)
93 return oid
94
95
96 def _object_path(root: pathlib.Path, oid: str) -> pathlib.Path:
97 return object_path(root, oid)
98
99
100 def _init_repo(tmp_path: pathlib.Path, domain: str = "code") -> tuple[pathlib.Path, str]:
101 dot_muse = muse_dir(tmp_path)
102 dot_muse.mkdir()
103 repo_id = fake_id("repo")
104 (dot_muse / "repo.json").write_text(json.dumps({
105 "repo_id": repo_id,
106 "domain": domain,
107 "version": "1.0.0",
108 }))
109 (dot_muse / "refs" / "heads").mkdir(parents=True)
110 (dot_muse / "objects").mkdir()
111 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
112 return tmp_path, repo_id
113
114
115 def _head_manifest(root: pathlib.Path, branch: str) -> Manifest:
116 from muse.core.commits import read_commit
117 from muse.core.snapshots import read_snapshot
118 ref = (ref_path(root, branch)).read_text().strip()
119 cr = read_commit(root, ref)
120 assert cr is not None
121 sr = read_snapshot(root, cr.snapshot_id)
122 assert sr is not None
123 return dict(sr.manifest)
124
125
126 def _write_disk(root: pathlib.Path, rel_path: str, content: bytes) -> str:
127 """Write content to disk AND store it; return object id."""
128 fp = root / rel_path
129 fp.parent.mkdir(parents=True, exist_ok=True)
130 fp.write_bytes(content)
131 return _store_object(root, content)
132
133
134 # ===========================================================================
135 # I restore_object atomicity
136 # ===========================================================================
137
138
139 class TestRestoreObjectAtomicityI:
140 """restore_object must use atomic writes so a crash mid-copy never
141 leaves a partial file at the destination."""
142
143 def test_I1_successful_restore_produces_correct_content(
144 self, tmp_path: pathlib.Path
145 ) -> None:
146 """I1: happy-path restore writes exact bytes to dest."""
147 root, _ = _init_repo(tmp_path)
148 content = b"hello world\n" * 100
149 oid = _store_object(root, content)
150 dest = tmp_path / "out.bin"
151 assert restore_object(root, oid, dest)
152 assert dest.read_bytes() == content
153
154 def test_I2_restore_overwrites_existing_file(self, tmp_path: pathlib.Path) -> None:
155 """I2: restore replaces whatever was at dest (no skip-if-exists)."""
156 root, _ = _init_repo(tmp_path)
157 old = b"old content\n"
158 new = b"new content\n"
159 dest = tmp_path / "f.txt"
160 dest.write_bytes(old)
161
162 oid = _store_object(root, new)
163 assert restore_object(root, oid, dest)
164 assert dest.read_bytes() == new
165
166 def test_I3_restore_missing_object_returns_false_does_not_touch_dest(
167 self, tmp_path: pathlib.Path
168 ) -> None:
169 """I3: missing object → False, pre-existing dest left intact."""
170 root, _ = _init_repo(tmp_path)
171 sentinel = b"sentinel\n"
172 dest = tmp_path / "existing.txt"
173 dest.write_bytes(sentinel)
174
175 fake_oid = blob_id(b"nonexistent")
176 assert not restore_object(root, fake_oid, dest)
177 assert dest.read_bytes() == sentinel
178
179 def test_I4_restore_creates_parent_directories(
180 self, tmp_path: pathlib.Path
181 ) -> None:
182 """I4: dest parent dirs are created automatically."""
183 root, _ = _init_repo(tmp_path)
184 content = b"deep\n"
185 oid = _store_object(root, content)
186 dest = tmp_path / "a" / "b" / "c" / "deep.txt"
187 assert not dest.parent.exists()
188 assert restore_object(root, oid, dest)
189 assert dest.read_bytes() == content
190
191 def test_I5_atomic_write_leaves_no_tmp_file_on_success(
192 self, tmp_path: pathlib.Path
193 ) -> None:
194 """I5: after a successful restore no .restore-tmp-* file lingers."""
195 root, _ = _init_repo(tmp_path)
196 content = b"data\n"
197 oid = _store_object(root, content)
198 dest = tmp_path / "target.txt"
199 restore_object(root, oid, dest)
200 tmps = list(tmp_path.glob(".restore-tmp-*"))
201 assert tmps == [], f"Stale tmp files found: {tmps}"
202
203 def test_I6_restore_hash_after_restore_matches_object_id(
204 self, tmp_path: pathlib.Path
205 ) -> None:
206 """I6: the restored file's SHA-256 matches the object_id exactly."""
207 root, _ = _init_repo(tmp_path)
208 content = b"integrity\n" * 1000
209 oid = _store_object(root, content)
210 dest = tmp_path / "verified.bin"
211 restore_object(root, oid, dest)
212 actual = hash_file(dest)
213 assert actual == oid, f"Hash mismatch after restore: {actual[:8]} ≠ {oid[:8]}"
214
215 def test_I7_restore_large_file_correct_content(
216 self, tmp_path: pathlib.Path
217 ) -> None:
218 """I7: 10 MiB blob survives a round-trip through the object store."""
219 root, _ = _init_repo(tmp_path)
220 content = os.urandom(10 * 1024 * 1024)
221 oid = _store_object(root, content)
222 dest = tmp_path / "large.bin"
223 assert restore_object(root, oid, dest)
224 assert dest.read_bytes() == content
225
226
227 # ===========================================================================
228 # II apply_manifest — missing object must raise, never silently skip
229 # ===========================================================================
230
231
232 class TestApplyManifestMissingObjectII:
233 """apply_manifest must fail loudly when any object is absent."""
234
235 def test_II1_missing_single_object_raises_runtime_error(
236 self, tmp_path: pathlib.Path
237 ) -> None:
238 """II1: one missing object → RuntimeError, not silent skip."""
239 root, _ = _init_repo(tmp_path)
240 fake_oid = blob_id(b"ghost")
241 with pytest.raises(RuntimeError, match="missing from the local store"):
242 apply_manifest(root, {}, {"ghost.txt": fake_oid})
243
244 def test_II2_error_message_names_the_missing_path(
245 self, tmp_path: pathlib.Path
246 ) -> None:
247 """II2: the error message includes the missing path name."""
248 root, _ = _init_repo(tmp_path)
249 fake_oid = blob_id(b"abc")
250 with pytest.raises(RuntimeError) as exc_info:
251 apply_manifest(root, {}, {"crucial/file.py": fake_oid})
252 assert "crucial/file.py" in str(exc_info.value)
253
254 def test_II3_partial_manifest_some_missing_raises(
255 self, tmp_path: pathlib.Path
256 ) -> None:
257 """II3: when one of N files is missing the whole call raises."""
258 root, _ = _init_repo(tmp_path)
259 good_oid = _write_disk(root, "exists.txt", b"ok\n")
260 bad_oid = blob_id(b"not stored")
261 with pytest.raises(RuntimeError):
262 apply_manifest(root, {}, {"exists.txt": good_oid, "missing.txt": bad_oid})
263
264 def test_II4_all_objects_present_succeeds(
265 self, tmp_path: pathlib.Path
266 ) -> None:
267 """II4: when every object is in the store apply_manifest succeeds."""
268 root, _ = _init_repo(tmp_path)
269 oid_a = _write_disk(root, "a.txt", b"aaa\n")
270 oid_b = _write_disk(root, "b.txt", b"bbb\n")
271 (root / "a.txt").unlink()
272 (root / "b.txt").unlink()
273 apply_manifest(root, {}, {"a.txt": oid_a, "b.txt": oid_b})
274 assert (root / "a.txt").read_bytes() == b"aaa\n"
275 assert (root / "b.txt").read_bytes() == b"bbb\n"
276
277 def test_II5_multiple_missing_reported_in_error(
278 self, tmp_path: pathlib.Path
279 ) -> None:
280 """II5: error message covers multiple missing files."""
281 root, _ = _init_repo(tmp_path)
282 manifest = {f"f{i}.py": blob_id(f"fake{i}".encode()) for i in range(10)}
283 with pytest.raises(RuntimeError) as exc_info:
284 apply_manifest(root, {}, manifest)
285 msg = str(exc_info.value)
286 assert "10 object(s)" in msg
287
288 def test_II6_apply_manifest_removes_files_not_in_target(
289 self, tmp_path: pathlib.Path
290 ) -> None:
291 """II6: tracked files absent from target manifest are deleted."""
292 root, _ = _init_repo(tmp_path)
293 keep_oid = _write_disk(root, "keep.txt", b"keep\n")
294 del_oid = _store_object(root, b"delete\n")
295 (root / "delete_me.txt").write_bytes(b"delete\n")
296 # delete_me.txt is in prev_manifest (was tracked) but not in target — must be removed
297 apply_manifest(
298 root,
299 {"keep.txt": keep_oid, "delete_me.txt": del_oid},
300 {"keep.txt": keep_oid},
301 )
302 assert not (root / "delete_me.txt").exists()
303 assert (root / "keep.txt").exists()
304
305 def test_II7_empty_manifest_non_empty_prev_raises_value_error(
306 self, tmp_path: pathlib.Path
307 ) -> None:
308 """II7: data-loss guard — empty target with non-empty prev_manifest raises ValueError."""
309 root, _ = _init_repo(tmp_path)
310 oid = _store_object(root, b"data\n")
311 with pytest.raises(ValueError, match="empty target_manifest"):
312 apply_manifest(root, {"file.txt": oid}, {})
313
314
315 # ===========================================================================
316 # III verify_workdir_integrity — utility correctness
317 # ===========================================================================
318
319
320 class TestVerifyWorkdirIntegrityIII:
321 """verify_workdir_integrity must catch every form of workdir drift."""
322
323 def test_III1_clean_workdir_returns_empty_list(
324 self, tmp_path: pathlib.Path
325 ) -> None:
326 """III1: workdir matches manifest → no mismatches."""
327 root, _ = _init_repo(tmp_path)
328 oid = _write_disk(root, "a.py", b"x = 1\n")
329 mismatches = verify_workdir_integrity(root, {"a.py": oid})
330 assert mismatches == []
331
332 def test_III2_modified_file_detected(self, tmp_path: pathlib.Path) -> None:
333 """III2: externally modified file shows up as mismatch."""
334 root, _ = _init_repo(tmp_path)
335 original = b"original\n"
336 oid = _write_disk(root, "f.py", original)
337
338 (root / "f.py").write_bytes(b"tampered\n")
339 mismatches = verify_workdir_integrity(root, {"f.py": oid})
340 assert len(mismatches) == 1
341 path, expected, actual = mismatches[0]
342 assert path == "f.py"
343 assert expected == oid
344 assert actual != oid
345 assert actual is not None
346
347 def test_III3_missing_file_detected(self, tmp_path: pathlib.Path) -> None:
348 """III3: file present in manifest but absent from disk → mismatch."""
349 root, _ = _init_repo(tmp_path)
350 oid = _write_disk(root, "gone.py", b"gone\n")
351 (root / "gone.py").unlink()
352 mismatches = verify_workdir_integrity(root, {"gone.py": oid})
353 assert any(m[0] == "gone.py" and m[2] is None for m in mismatches)
354
355 def test_III4_extra_tracked_file_detected(
356 self, tmp_path: pathlib.Path
357 ) -> None:
358 """III4: file on disk but not in manifest is also reported."""
359 root, _ = _init_repo(tmp_path)
360 oid = _write_disk(root, "tracked.py", b"ok\n")
361 (root / "extra.py").write_bytes(b"extra\n")
362 mismatches = verify_workdir_integrity(root, {"tracked.py": oid})
363 extras = [m for m in mismatches if m[0] == "extra.py"]
364 assert extras, "Extra file not reported"
365
366 def test_III5_empty_manifest_empty_workdir_clean(
367 self, tmp_path: pathlib.Path
368 ) -> None:
369 """III5: both manifest and workdir empty → clean."""
370 root, _ = _init_repo(tmp_path)
371 assert verify_workdir_integrity(root, {}) == []
372
373 def test_III6_multiple_mismatches_all_reported(
374 self, tmp_path: pathlib.Path
375 ) -> None:
376 """III6: all mismatches returned, not just the first."""
377 root, _ = _init_repo(tmp_path)
378 manifest: Manifest = {}
379 for i in range(20):
380 oid = _write_disk(root, f"f{i}.py", f"v={i}\n".encode())
381 manifest[f"f{i}.py"] = oid
382
383 # Tamper with half the files
384 for i in range(0, 20, 2):
385 (root / f"f{i}.py").write_bytes(b"tampered\n")
386
387 mismatches = verify_workdir_integrity(root, manifest)
388 assert len(mismatches) == 10, f"Expected 10 mismatches, got {len(mismatches)}"
389
390 def test_III7_correct_content_after_apply_manifest(
391 self, tmp_path: pathlib.Path
392 ) -> None:
393 """III7: after apply_manifest, verify_workdir_integrity is clean."""
394 root, _ = _init_repo(tmp_path)
395 oid_a = _write_disk(root, "a.py", b"a = 1\n")
396 oid_b = _write_disk(root, "b.py", b"b = 2\n")
397 (root / "a.py").unlink()
398 (root / "b.py").unlink()
399
400 apply_manifest(root, {}, {"a.py": oid_a, "b.py": oid_b})
401 mismatches = verify_workdir_integrity(root, {"a.py": oid_a, "b.py": oid_b})
402 assert mismatches == [], f"Expected clean after apply_manifest: {mismatches}"
403
404
405 # ===========================================================================
406 # IV checkout → workdir always matches the target snapshot
407 # ===========================================================================
408
409
410 class TestCheckoutWorkdirIntegrityIV:
411 """After muse checkout, the working tree must byte-for-byte match
412 the target branch's committed snapshot. No exceptions."""
413
414 def _full_roundtrip(self, tmp_path: pathlib.Path, n_files: int) -> None:
415 """Create two branches with different content, checkout between them,
416 verify integrity on each switch."""
417 root, repo_id = _init_repo(tmp_path)
418 code, _ = _run(root, "init", str(root))
419
420 main_manifest: Manifest = {}
421 for i in range(n_files):
422 oid = _write_disk(root, f"src/m{i}.py", f"# main {i}\n".encode())
423 main_manifest[f"src/m{i}.py"] = oid
424 code, out = _run(root, "commit", "--allow-empty", "-m", "main files")
425 assert code == 0, out
426
427 code, out = _run(root, "branch", "feat")
428 assert code == 0, out
429 code, out = _run(root, "checkout", "feat")
430 assert code == 0, out
431
432 feat_manifest: Manifest = {}
433 for i in range(n_files):
434 oid = _write_disk(root, f"src/f{i}.py", f"# feat {i}\n".encode())
435 feat_manifest[f"src/f{i}.py"] = oid
436 _run(root, "code", "add", ".")
437 code, out = _run(root, "commit", "-m", "feat files")
438 assert code == 0, out
439
440 # Switch back to main — verify clean
441 code, out = _run(root, "checkout", "main")
442 assert code == 0, out
443 main_snap = _head_manifest(root, "main")
444 mismatches = verify_workdir_integrity(root, main_snap)
445 assert mismatches == [], (
446 f"DATA LOSS: {len(mismatches)} mismatch(es) after checkout main:\n"
447 f"{'\n'.join(f' {m}' for m in mismatches[:5])}"
448 )
449
450 # Switch to feat — verify clean
451 code, out = _run(root, "checkout", "feat")
452 assert code == 0, out
453 feat_snap = _head_manifest(root, "feat")
454 mismatches = verify_workdir_integrity(root, feat_snap)
455 assert mismatches == [], (
456 f"DATA LOSS: {len(mismatches)} mismatch(es) after checkout feat:\n"
457 f"{'\n'.join(f' {m}' for m in mismatches[:5])}"
458 )
459
460 def test_IV1_checkout_10_files_workdir_matches_snapshot(
461 self, tmp_path: pathlib.Path
462 ) -> None:
463 """IV1: 10-file repo checkout — workdir matches target snapshot."""
464 self._full_roundtrip(tmp_path, 10)
465
466 def test_IV2_checkout_50_files_workdir_matches_snapshot(
467 self, tmp_path: pathlib.Path
468 ) -> None:
469 """IV2: 50-file repo checkout — workdir matches target snapshot."""
470 self._full_roundtrip(tmp_path, 50)
471
472 def test_IV3_repeated_checkout_workdir_consistent(
473 self, tmp_path: pathlib.Path
474 ) -> None:
475 """IV3: switching back and forth 10 times never corrupts the workdir."""
476 root, repo_id = _init_repo(tmp_path)
477 _run(root, "init", str(root))
478
479 oid_a = _write_disk(root, "f.py", b"version_a\n")
480 _run(root, "commit", "--allow-empty", "-m", "main")
481 _run(root, "branch", "feat")
482 _run(root, "checkout", "feat")
483 oid_b = _write_disk(root, "f.py", b"version_b\n")
484 _run(root, "code", "add", ".")
485 _run(root, "commit", "-m", "feat")
486
487 main_snap = _head_manifest(root, "main")
488 feat_snap = _head_manifest(root, "feat")
489
490 for i in range(10):
491 branch = "main" if i % 2 == 0 else "feat"
492 _run(root, "checkout", branch)
493 expected = main_snap if branch == "main" else feat_snap
494 mismatches = verify_workdir_integrity(root, expected)
495 assert mismatches == [], (
496 f"Iteration {i}: mismatch after checkout {branch}: {mismatches}"
497 )
498
499 def test_IV4_checkout_restores_file_modified_between_branches(
500 self, tmp_path: pathlib.Path
501 ) -> None:
502 """IV4: a file modified on one branch is correctly restored when
503 switching to a branch that has the original version."""
504 root, repo_id = _init_repo(tmp_path)
505 _run(root, "init", str(root))
506
507 oid_v1 = _write_disk(root, "shared.py", b"# v1\n")
508 _run(root, "commit", "--allow-empty", "-m", "base")
509 _run(root, "branch", "feat")
510
511 oid_v2 = _write_disk(root, "shared.py", b"# v2\n")
512 _run(root, "code", "add", ".")
513 _run(root, "commit", "-m", "main v2")
514
515 _run(root, "checkout", "feat")
516 assert (root / "shared.py").read_bytes() == b"# v1\n", (
517 "feat branch should have v1 of shared.py"
518 )
519
520 _run(root, "checkout", "main")
521 assert (root / "shared.py").read_bytes() == b"# v2\n", (
522 "main branch should have v2 of shared.py"
523 )
524
525 def test_IV5_checkout_deletes_files_not_in_target_branch(
526 self, tmp_path: pathlib.Path
527 ) -> None:
528 """IV5: files added on one branch are deleted when switching away."""
529 root, repo_id = _init_repo(tmp_path)
530 _run(root, "init", str(root))
531
532 _write_disk(root, "base.py", b"base\n")
533 _run(root, "commit", "--allow-empty", "-m", "base")
534 _run(root, "branch", "feat")
535 _run(root, "checkout", "feat")
536
537 _write_disk(root, "feat_only.py", b"feat\n")
538 _run(root, "code", "add", ".")
539 _run(root, "commit", "-m", "feat_only")
540
541 _run(root, "checkout", "main")
542 assert not (root / "feat_only.py").exists(), (
543 "feat-only file should not exist on main branch"
544 )
545
546
547 # ===========================================================================
548 # V fast-forward merge → workdir always matches target snapshot
549 # ===========================================================================
550
551
552 class TestFFMergeWorkdirIntegrityV:
553 """A fast-forward merge must update the working tree to match the
554 incoming branch's snapshot — ALL files, not just the delta."""
555
556 def test_V1_ff_merge_all_files_restored(self, tmp_path: pathlib.Path) -> None:
557 """V1: after FF merge every file in the target manifest is on disk
558 with the correct content."""
559 root, repo_id = _init_repo(tmp_path)
560 _run(root, "init", str(root))
561
562 _write_disk(root, "base.py", b"base\n")
563 _run(root, "commit", "--allow-empty", "-m", "base")
564 _run(root, "branch", "feat")
565 _run(root, "checkout", "feat")
566
567 manifest: Manifest = {}
568 for i in range(30):
569 oid = _write_disk(root, f"f{i}.py", f"# feat {i}\n".encode())
570 manifest[f"f{i}.py"] = oid
571 _run(root, "code", "add", ".")
572 _run(root, "commit", "-m", "feat 30 files")
573
574 _run(root, "checkout", "main")
575 code, out = _run(root, "merge", "feat")
576 assert code == 0, out
577
578 merged = _head_manifest(root, "main")
579 mismatches = verify_workdir_integrity(root, merged)
580 assert mismatches == [], (
581 f"DATA LOSS after FF merge: {len(mismatches)} mismatch(es)\n"
582 f"{'\n'.join(f' {m}' for m in mismatches[:5])}"
583 )
584
585 def test_V2_ff_merge_correct_content_not_just_present(
586 self, tmp_path: pathlib.Path
587 ) -> None:
588 """V2: FF merge writes the correct *content*, not just the correct filename."""
589 root, repo_id = _init_repo(tmp_path)
590 _run(root, "init", str(root))
591
592 old_content = b"# old version\n"
593 new_content = b"# new version\n"
594
595 _write_disk(root, "important.py", old_content)
596 _run(root, "commit", "--allow-empty", "-m", "base")
597 _run(root, "branch", "feat")
598 _run(root, "checkout", "feat")
599
600 _write_disk(root, "important.py", new_content)
601 _run(root, "code", "add", ".")
602 _run(root, "commit", "-m", "update important.py")
603
604 _run(root, "checkout", "main")
605 # File on disk is now old_content
606 assert (root / "important.py").read_bytes() == old_content
607
608 _run(root, "merge", "feat")
609 # File on disk must now be new_content
610 actual = (root / "important.py").read_bytes()
611 assert actual == new_content, (
612 f"FF merge did not restore correct content. "
613 f"Expected {new_content!r}, got {actual!r}"
614 )
615
616 def test_V3_ff_merge_workdir_matches_snapshot_byte_for_byte(
617 self, tmp_path: pathlib.Path
618 ) -> None:
619 """V3: verify_workdir_integrity confirms zero drift after FF merge."""
620 root, repo_id = _init_repo(tmp_path)
621 _run(root, "init", str(root))
622
623 _write_disk(root, "a.py", b"a\n")
624 _run(root, "commit", "--allow-empty", "-m", "base")
625 _run(root, "branch", "feat")
626 _run(root, "checkout", "feat")
627
628 for i in range(20):
629 _write_disk(root, f"feat_{i}.py", f"feat{i}\n".encode())
630 _run(root, "code", "add", ".")
631 _run(root, "commit", "-m", "feat 20 files")
632
633 _run(root, "checkout", "main")
634 _run(root, "merge", "feat")
635
636 merged_snap = _head_manifest(root, "main")
637 mismatches = verify_workdir_integrity(root, merged_snap)
638 assert mismatches == []
639
640
641 # ===========================================================================
642 # VI checkout aborts hard when an object is missing from the store
643 # ===========================================================================
644
645
646 class TestCheckoutMissingObjectVI:
647 """Checkout must refuse to proceed when an object it needs is absent
648 from the local object store. The partial-checkout silent-data-loss
649 path is now closed."""
650
651 def test_VI1_checkout_to_branch_with_missing_object_aborts(
652 self, tmp_path: pathlib.Path
653 ) -> None:
654 """VI1: if a required object is purged from the store, checkout
655 exits non-zero and does NOT silently leave the workdir in a
656 partially restored state."""
657 root, repo_id = _init_repo(tmp_path)
658 _run(root, "init", str(root))
659
660 _write_disk(root, "base.py", b"base\n")
661 _run(root, "commit", "--allow-empty", "-m", "base")
662 _run(root, "branch", "feat")
663 _run(root, "checkout", "feat")
664
665 content = b"# feat file\n"
666 oid = _write_disk(root, "feat.py", content)
667 _run(root, "code", "add", ".")
668 _run(root, "commit", "-m", "feat")
669 _run(root, "checkout", "main")
670
671 # Purge the feat.py object from the store — simulates a corruption
672 obj = _object_path(root, oid)
673 obj.unlink()
674
675 code, out = _run_unchecked(root, "checkout", "feat")
676 assert code != 0, (
677 "Checkout should fail when a required object is missing from the store"
678 )
679
680 def test_VI2_apply_manifest_raises_on_missing_object_not_silent(
681 self, tmp_path: pathlib.Path
682 ) -> None:
683 """VI2: apply_manifest raises RuntimeError (not returns None or logs
684 a warning) when an object is missing."""
685 root, _ = _init_repo(tmp_path)
686 ghost_oid = blob_id(b"not in store")
687 with pytest.raises(RuntimeError) as exc_info:
688 apply_manifest(root, {}, {"ghost.py": ghost_oid})
689 assert "missing from the local store" in str(exc_info.value)
690
691 def test_VI3_ff_merge_aborts_if_incoming_object_missing(
692 self, tmp_path: pathlib.Path
693 ) -> None:
694 """VI3: FF merge aborts if an object from the target snapshot is
695 not in the local store."""
696 root, repo_id = _init_repo(tmp_path)
697 _run(root, "init", str(root))
698
699 _write_disk(root, "base.py", b"base\n")
700 _run(root, "commit", "--allow-empty", "-m", "base")
701 _run(root, "branch", "feat")
702 _run(root, "checkout", "feat")
703
704 content = b"# critical\n"
705 oid = _write_disk(root, "critical.py", content)
706 _run(root, "code", "add", ".")
707 _run(root, "commit", "-m", "critical")
708 _run(root, "checkout", "main")
709
710 # Purge the object after committing
711 _object_path(root, oid).unlink()
712
713 code, out = _run_unchecked(root, "merge", "feat")
714 assert code != 0, "FF merge should fail when a target object is missing"
715
716
717 # ===========================================================================
718 # VII Editor-cache simulation — status must detect stale-cache workdir drift
719 # ===========================================================================
720
721
722 class TestEditorCacheSimulationVII:
723 """Simulates the exact incident: the editor had a cached (stale) version
724 of a file. After a merge updated the on-disk file, the editor wrote the
725 stale version back, corrupting the workdir.
726
727 Muse cannot prevent an editor from writing stale data, but it CAN detect
728 the drift via `muse status` (which compares workdir hashes against HEAD)
729 and via `verify_workdir_integrity`.
730 """
731
732 def test_VII1_status_detects_workdir_drift_after_external_write(
733 self, tmp_path: pathlib.Path
734 ) -> None:
735 """VII1: if a file is externally overwritten to an old version,
736 muse status reports it as modified."""
737 root, repo_id = _init_repo(tmp_path)
738 _run(root, "init", str(root))
739
740 old_content = b"# version 1\n"
741 new_content = b"# version 2\n"
742
743 _write_disk(root, "plugin.py", old_content)
744 _run(root, "commit", "--allow-empty", "-m", "v1")
745 _run(root, "branch", "feat")
746 _run(root, "checkout", "feat")
747
748 _write_disk(root, "plugin.py", new_content)
749 _run(root, "code", "add", ".")
750 _run(root, "commit", "-m", "v2")
751 _run(root, "checkout", "main")
752 _run(root, "merge", "feat")
753
754 # HEAD now says plugin.py = new_content.
755 # Simulate editor writing back the stale old version.
756 (root / "plugin.py").write_bytes(old_content)
757
758 code, out = _run(root, "status")
759 assert code == 0
760 assert "plugin.py" in out, (
761 "muse status must report plugin.py as modified after stale write"
762 )
763
764 def test_VII2_verify_workdir_integrity_catches_stale_editor_write(
765 self, tmp_path: pathlib.Path
766 ) -> None:
767 """VII2: verify_workdir_integrity spots the stale-editor-cache corruption."""
768 root, repo_id = _init_repo(tmp_path)
769 _run(root, "init", str(root))
770
771 old_content = b"# old\n"
772 new_content = b"# new\n"
773
774 oid_new = _write_disk(root, "plugin.py", old_content)
775 _run(root, "commit", "--allow-empty", "-m", "base")
776 _run(root, "branch", "feat")
777 _run(root, "checkout", "feat")
778
779 oid_new = _write_disk(root, "plugin.py", new_content)
780 _run(root, "code", "add", ".")
781 _run(root, "commit", "-m", "feat")
782 _run(root, "checkout", "main")
783 _run(root, "merge", "feat")
784
785 # Simulate editor stale write
786 (root / "plugin.py").write_bytes(old_content)
787
788 head_snap = _head_manifest(root, "main")
789 mismatches = verify_workdir_integrity(root, head_snap)
790 assert any(m[0] == "plugin.py" for m in mismatches), (
791 "verify_workdir_integrity must detect the stale file"
792 )
793
794 def test_VII3_correct_state_after_reapplying_manifest(
795 self, tmp_path: pathlib.Path
796 ) -> None:
797 """VII3: after detecting stale-editor drift, re-applying the manifest
798 restores correct state and verify_workdir_integrity is clean."""
799 root, repo_id = _init_repo(tmp_path)
800 _run(root, "init", str(root))
801
802 _write_disk(root, "plugin.py", b"# old\n")
803 _run(root, "commit", "--allow-empty", "-m", "base")
804 _run(root, "branch", "feat")
805 _run(root, "checkout", "feat")
806
807 _write_disk(root, "plugin.py", b"# new\n")
808 _run(root, "code", "add", ".")
809 _run(root, "commit", "-m", "feat")
810 _run(root, "checkout", "main")
811 _run(root, "merge", "feat")
812
813 # Stale write
814 (root / "plugin.py").write_bytes(b"# stale\n")
815
816 head_snap = _head_manifest(root, "main")
817 # Repair by re-applying the manifest
818 apply_manifest(root, head_snap, head_snap)
819 mismatches = verify_workdir_integrity(root, head_snap)
820 assert mismatches == [], "After re-applying manifest, workdir must be clean"
821
822
823 # ===========================================================================
824 # VIII Stress tests
825 # ===========================================================================
826
827
828 class TestStressWorkdirVIII:
829 """High-volume, adversarial scenarios to eliminate the entire class of
830 workdir corruption bugs."""
831
832 def test_VIII1_500_file_checkout_all_match_snapshot(
833 self, tmp_path: pathlib.Path
834 ) -> None:
835 """VIII1: 500-file repo — every file matches the committed snapshot
836 after every checkout."""
837 root, repo_id = _init_repo(tmp_path)
838 _run(root, "init", str(root))
839
840 manifest_main: Manifest = {}
841 for i in range(500):
842 oid = _write_disk(root, f"main_{i:04d}.py", f"# main {i}\n".encode())
843 manifest_main[f"main_{i:04d}.py"] = oid
844 code, out = _run(root, "commit", "--allow-empty", "-m", "main 500")
845 assert code == 0, out
846
847 _run(root, "branch", "feat")
848 _run(root, "checkout", "feat")
849
850 manifest_feat: Manifest = {}
851 for i in range(500):
852 oid = _write_disk(root, f"feat_{i:04d}.py", f"# feat {i}\n".encode())
853 manifest_feat[f"feat_{i:04d}.py"] = oid
854 _run(root, "code", "add", ".")
855 code, out = _run(root, "commit", "-m", "feat 500")
856 assert code == 0, out
857
858 _run(root, "checkout", "main")
859 snp = _head_manifest(root, "main")
860 mismatches = verify_workdir_integrity(root, snp)
861 assert mismatches == [], f"{len(mismatches)} mismatch(es) on main"
862
863 _run(root, "checkout", "feat")
864 snp = _head_manifest(root, "feat")
865 mismatches = verify_workdir_integrity(root, snp)
866 assert mismatches == [], f"{len(mismatches)} mismatch(es) on feat"
867
868 def test_VIII2_ff_merge_500_files_all_correct(
869 self, tmp_path: pathlib.Path
870 ) -> None:
871 """VIII2: FF merge with 500 incoming files — all correct after merge."""
872 root, repo_id = _init_repo(tmp_path)
873 _run(root, "init", str(root))
874
875 _write_disk(root, "base.py", b"base\n")
876 _run(root, "commit", "--allow-empty", "-m", "base")
877 _run(root, "branch", "feat")
878 _run(root, "checkout", "feat")
879
880 expected: Manifest = {}
881 for i in range(500):
882 content = f"# file {i} unique content {content_hash({'i': i})}\n".encode()
883 oid = _write_disk(root, f"feat_{i:04d}.py", content)
884 expected[f"feat_{i:04d}.py"] = oid
885 _run(root, "code", "add", ".")
886 _run(root, "commit", "-m", "feat 500")
887
888 _run(root, "checkout", "main")
889 code, out = _run(root, "merge", "feat")
890 assert code == 0, out
891
892 merged = _head_manifest(root, "main")
893 mismatches = verify_workdir_integrity(root, merged)
894 assert mismatches == [], (
895 f"DATA LOSS: {len(mismatches)} file(s) wrong after 500-file FF merge"
896 )
897
898 def test_VIII3_alternating_checkout_never_drifts(
899 self, tmp_path: pathlib.Path
900 ) -> None:
901 """VIII3: 20 alternating checkout cycles — workdir never drifts."""
902 root, repo_id = _init_repo(tmp_path)
903 _run(root, "init", str(root))
904
905 oid_a = _write_disk(root, "shared.py", b"# version A\n")
906 _run(root, "commit", "--allow-empty", "-m", "main")
907 snap_main = _head_manifest(root, "main")
908
909 _run(root, "branch", "feat")
910 _run(root, "checkout", "feat")
911 oid_b = _write_disk(root, "shared.py", b"# version B\n")
912 _run(root, "code", "add", ".")
913 _run(root, "commit", "-m", "feat")
914 snap_feat = _head_manifest(root, "feat")
915
916 for cycle in range(20):
917 branch = "main" if cycle % 2 == 0 else "feat"
918 code, out = _run(root, "checkout", branch)
919 assert code == 0, f"Cycle {cycle}: checkout {branch} failed"
920 expected = snap_main if branch == "main" else snap_feat
921 mismatches = verify_workdir_integrity(root, expected)
922 assert mismatches == [], (
923 f"Cycle {cycle}, branch {branch}: {len(mismatches)} mismatch(es)"
924 )
925
926 def test_VIII4_deep_chain_checkout_base_restores_correctly(
927 self, tmp_path: pathlib.Path
928 ) -> None:
929 """VIII4: deep commit chain — checkout of base commit restores correctly."""
930 root, repo_id = _init_repo(tmp_path)
931 _run(root, "init", str(root))
932
933 # Build a chain of 20 commits, each modifying the same file
934 for i in range(20):
935 _write_disk(root, "evolving.py", f"# iteration {i}\n".encode())
936 _run(root, "commit", "--allow-empty", "-m", f"iter {i}")
937
938 snap = _head_manifest(root, "main")
939 assert (root / "evolving.py").read_bytes() == b"# iteration 19\n"
940 mismatches = verify_workdir_integrity(root, snap)
941 assert mismatches == []
942
943 def test_VIII5_stress_apply_manifest_100_times_deterministic(
944 self, tmp_path: pathlib.Path
945 ) -> None:
946 """VIII5: apply_manifest called 100 times is deterministic and
947 always produces an identical workdir."""
948 root, _ = _init_repo(tmp_path)
949 manifest: Manifest = {}
950 for i in range(50):
951 oid = _write_disk(root, f"f{i}.py", f"content {i}\n".encode())
952 manifest[f"f{i}.py"] = oid
953
954 for trial in range(100):
955 apply_manifest(root, manifest, manifest)
956 mismatches = verify_workdir_integrity(root, manifest)
957 assert mismatches == [], (
958 f"Trial {trial}: {len(mismatches)} mismatch(es) after apply_manifest"
959 )
960
961 def test_VIII6_diamond_topology_workdir_clean_after_all_merges(
962 self, tmp_path: pathlib.Path
963 ) -> None:
964 """VIII6: diamond merge topology — verify integrity at every step."""
965 root, repo_id = _init_repo(tmp_path)
966 _run(root, "init", str(root))
967
968 # Base
969 _write_disk(root, "base.py", b"base\n")
970 _run(root, "commit", "--allow-empty", "-m", "base")
971
972 # Left branch
973 _run(root, "branch", "left")
974 _run(root, "checkout", "left")
975 _write_disk(root, "left.py", b"left\n")
976 _run(root, "code", "add", ".")
977 _run(root, "commit", "-m", "left")
978
979 # Right branch (from main)
980 _run(root, "checkout", "main")
981 _run(root, "branch", "right")
982 _run(root, "checkout", "right")
983 _write_disk(root, "right.py", b"right\n")
984 _run(root, "code", "add", ".")
985 _run(root, "commit", "-m", "right")
986
987 # Merge left → main
988 _run(root, "checkout", "main")
989 code, out = _run(root, "merge", "left")
990 assert code == 0, out
991 snp = _head_manifest(root, "main")
992 assert verify_workdir_integrity(root, snp) == []
993
994 # Merge right → main
995 code, out = _run(root, "merge", "right")
996 assert code == 0, out
997 snp = _head_manifest(root, "main")
998 assert verify_workdir_integrity(root, snp) == []
999
1000 def test_VIII7_binary_files_survive_checkout(
1001 self, tmp_path: pathlib.Path
1002 ) -> None:
1003 """VIII7: binary content (random bytes) survives checkout intact."""
1004 root, repo_id = _init_repo(tmp_path)
1005 _run(root, "init", str(root))
1006
1007 binary = os.urandom(1024 * 512) # 512 KiB of random bytes
1008 oid = _write_disk(root, "data.bin", binary)
1009 _run(root, "commit", "--allow-empty", "-m", "binary")
1010
1011 _run(root, "branch", "feat")
1012 _run(root, "checkout", "feat")
1013 _write_disk(root, "other.py", b"other\n")
1014 _run(root, "code", "add", ".")
1015 _run(root, "commit", "-m", "other")
1016
1017 _run(root, "checkout", "main")
1018 snp = _head_manifest(root, "main")
1019 mismatches = verify_workdir_integrity(root, snp)
1020 assert mismatches == []
1021 assert (root / "data.bin").read_bytes() == binary
1022
1023 def test_VIII8_unicode_filenames_survive_checkout(
1024 self, tmp_path: pathlib.Path
1025 ) -> None:
1026 """VIII8: files with unicode path components survive checkout."""
1027 root, repo_id = _init_repo(tmp_path)
1028 _run(root, "init", str(root))
1029
1030 paths = [
1031 "src/módulo.py",
1032 "src/données.txt",
1033 "src/файл.py",
1034 ]
1035 for p in paths:
1036 _write_disk(root, p, f"# {p}\n".encode())
1037 _run(root, "commit", "--allow-empty", "-m", "unicode paths")
1038
1039 _run(root, "branch", "feat")
1040 _run(root, "checkout", "feat")
1041 _write_disk(root, "extra.py", b"extra\n")
1042 _run(root, "code", "add", ".")
1043 _run(root, "commit", "-m", "extra")
1044
1045 _run(root, "checkout", "main")
1046 snp = _head_manifest(root, "main")
1047 mismatches = verify_workdir_integrity(root, snp)
1048 assert mismatches == []
1049
1050 def test_VIII9_no_data_loss_after_100_consecutive_commits(
1051 self, tmp_path: pathlib.Path
1052 ) -> None:
1053 """VIII9: 100 consecutive commits on main — verify final state is correct."""
1054 root, repo_id = _init_repo(tmp_path)
1055 _run(root, "init", str(root))
1056
1057 for i in range(100):
1058 _write_disk(root, f"file_{i:03d}.py", f"# commit {i}\n".encode())
1059 code, out = _run(root, "commit", "--allow-empty", "-m", f"commit {i}")
1060 assert code == 0, f"Commit {i} failed: {out}"
1061
1062 snp = _head_manifest(root, "main")
1063 assert len(snp) == 100, f"Expected 100 files, got {len(snp)}"
1064 mismatches = verify_workdir_integrity(root, snp)
1065 assert mismatches == [], f"{len(mismatches)} mismatch(es) after 100 commits"
1066
1067
1068 # ===========================================================================
1069 # IX apply_manifest must not delete untracked files
1070 # ===========================================================================
1071
1072
1073 class TestApplyManifestUntrackedFilesIX:
1074 """Untracked files must survive apply_manifest regardless of target manifest.
1075
1076 Root cause of the bug: apply_manifest used walk_workdir(root) to build
1077 current_files, which returns ALL files on disk — including files the user
1078 created that were never committed. The fix: use prev_manifest.keys() as
1079 the deletion candidate set so only previously-tracked files are candidates.
1080 """
1081
1082 def test_IX1_untracked_file_not_deleted_by_apply_manifest(
1083 self, tmp_path: pathlib.Path
1084 ) -> None:
1085 """IX1: a file never in any manifest must survive apply_manifest."""
1086 root, _ = _init_repo(tmp_path)
1087
1088 # prev state: file A is tracked
1089 oid_a = _write_disk(root, "a.py", b"a = 1\n")
1090 # target state: file B is tracked
1091 oid_b = _store_object(root, b"b = 2\n")
1092 (root / "b.py").write_bytes(b"b = 2\n")
1093
1094 # untracked file — never in any manifest
1095 untracked = root / "notes.txt"
1096 untracked.write_bytes(b"my personal notes\n")
1097
1098 apply_manifest(root, {"a.py": oid_a}, {"b.py": oid_b})
1099
1100 assert untracked.exists(), "untracked file was deleted by apply_manifest — data loss bug"
1101 assert (root / "b.py").read_bytes() == b"b = 2\n"
1102 assert not (root / "a.py").exists(), "a.py was tracked then removed — must be deleted"
1103
1104 def test_IX2_untracked_dotfile_not_deleted(
1105 self, tmp_path: pathlib.Path
1106 ) -> None:
1107 """IX2: untracked dotfiles (e.g. spec docs written by Write tool) must survive."""
1108 root, _ = _init_repo(tmp_path)
1109
1110 oid_a = _write_disk(root, "main.py", b"x = 1\n")
1111 oid_b = _store_object(root, b"x = 2\n")
1112 (root / "main.py").write_bytes(b"x = 2\n")
1113
1114 spec_doc = root / "docs" / "spec.md"
1115 spec_doc.parent.mkdir()
1116 spec_doc.write_bytes(b"# spec\n")
1117
1118 apply_manifest(root, {"main.py": oid_a}, {"main.py": oid_b})
1119
1120 assert spec_doc.exists(), "untracked doc file was deleted by apply_manifest"
1121
1122 def test_IX3_tracked_file_removed_from_target_gets_deleted(
1123 self, tmp_path: pathlib.Path
1124 ) -> None:
1125 """IX3: files in prev_manifest but not in target must still be deleted."""
1126 root, _ = _init_repo(tmp_path)
1127
1128 oid_a = _write_disk(root, "gone.py", b"gone\n")
1129 oid_b = _write_disk(root, "kept.py", b"kept\n")
1130
1131 apply_manifest(root, {"gone.py": oid_a, "kept.py": oid_b}, {"kept.py": oid_b})
1132
1133 assert not (root / "gone.py").exists(), "tracked file removed from target must be deleted"
1134 assert (root / "kept.py").exists()
1135
1136 def test_IX4_commit_does_not_delete_untracked_file(
1137 self, tmp_path: pathlib.Path
1138 ) -> None:
1139 """IX4: muse commit must not delete untracked files from the working tree."""
1140 root, repo_id = _init_repo(tmp_path)
1141 _run(root, "init", str(root))
1142
1143 # First commit with one tracked file
1144 _write_disk(root, "tracked.py", b"x = 1\n")
1145 _run(root, "code", "add", ".")
1146 code, out = _run(root, "commit", "-m", "initial")
1147 assert code == 0, out
1148
1149 # Write an untracked file (simulates Write tool creating a spec doc)
1150 untracked = root / "docs" / "spec.md"
1151 untracked.parent.mkdir()
1152 untracked.write_bytes(b"# my spec\n")
1153 assert untracked.exists()
1154
1155 # Modify tracked file and commit
1156 (root / "tracked.py").write_bytes(b"x = 2\n")
1157 _run(root, "code", "add", "tracked.py")
1158 code, out = _run(root, "commit", "-m", "update")
1159 assert code == 0, out
1160
1161 assert untracked.exists(), (
1162 "muse commit deleted an untracked file — data loss bug. "
1163 "apply_manifest must not delete files absent from prev_manifest."
1164 )
File History 1 commit
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8 fixing more broken tests Human patch 2 days ago