gabriel / muse public
test_pull_missing_snapshot_guard.py python
405 lines 17.3 KB
Raw
sha256:84df9126d09aeec0b8f1b908f0b06c10913feec28f3514b382efb1ba6d619385 refactor: rename StructuredMergePlugin to AddressedMergePlu… Sonnet 4.6 minor ⚠ breaking 24 days ago
1 """Tests for Bug 12: pull fast-forward/bootstrap advances branch pointer even when
2 the target snapshot is missing or corrupt, leaving working tree and branch HEAD
3 inconsistent.
4
5 Root cause: both fast-forward paths (bootstrap + fast-forward) and the
6 three-way merge path in pull.py called write_branch_ref / proceeded with
7 theirs_manifest={} even when:
8 - read_commit returned None (commit unreadable after apply_mpack), OR
9 - read_snapshot returned None (snapshot missing/corrupt)
10
11 For the fast-forward paths this meant the branch pointer was advanced to a
12 commit whose snapshot cannot be read — muse status would show all tracked
13 files as deleted, and muse checkout would fail.
14
15 For the three-way merge path, theirs_manifest={} caused the merge to treat
16 ALL remote files as deleted — producing a spurious merge that would delete
17 the user's files and commit an empty tree.
18
19 The fix: if commit or snapshot is not readable after apply_mpack, abort with
20 SystemExit(INTERNAL_ERROR) BEFORE touching the branch ref or attempting the
21 merge.
22
23 Scope of tests
24 --------------
25 Unit (guard behaviour via write_branch_ref / read_snapshot):
26 - fast-forward: snapshot missing → branch NOT advanced
27 - fast-forward: commit missing → branch NOT advanced
28 - bootstrap: snapshot missing → branch NOT advanced
29 - bootstrap: commit missing → branch NOT advanced
30
31 Integration (using build_mpack/apply_mpack directly):
32 - Valid pull: fast-forward succeeds, working tree updated
33 - Missing snapshot on remote side: pull aborts, local branch unchanged
34 - Corrupt snapshot on remote (hash mismatch): pull aborts, local branch unchanged
35 """
36 from __future__ import annotations
37
38 import datetime
39 import json
40 import pathlib
41 import sys
42 import unittest.mock
43
44 import pytest
45
46 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
47
48 from muse.core.types import Manifest, fake_id
49 from muse.core.paths import muse_dir
50 from muse.core.object_store import object_path as _obj_path
51 from muse.core.refs import write_branch_ref
52 from muse.core.commits import (
53 CommitRecord,
54 read_commit,
55 write_commit,
56 )
57 from muse.core.snapshots import (
58 SnapshotRecord,
59 read_snapshot,
60 write_snapshot,
61 )
62
63 _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc)
64
65
66 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
67 repo = tmp_path / "repo"
68 repo.mkdir()
69 dot_muse = muse_dir(repo)
70 dot_muse.mkdir()
71 (dot_muse / "commits").mkdir()
72 (dot_muse / "snapshots").mkdir()
73 (dot_muse / "objects").mkdir()
74 (dot_muse / "refs" / "heads").mkdir(parents=True)
75 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
76 (dot_muse / "refs" / "heads" / "main").write_text("")
77 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "pull-test", "domain": "code"}))
78 return repo
79
80
81 def _make_commit(
82 repo: pathlib.Path,
83 message: str,
84 manifest: Manifest | None = None,
85 parent: str | None = None,
86 ) -> CommitRecord:
87 m = manifest or {}
88 snap_id = compute_snapshot_id(m)
89 snap = SnapshotRecord(snapshot_id=snap_id, manifest=m, created_at=_TS)
90 write_snapshot(repo, snap)
91 parent_ids = [parent] if parent else []
92 cid = compute_commit_id(
93 parent_ids=parent_ids,
94 snapshot_id=snap_id,
95 message=message,
96 committed_at_iso=_TS.isoformat(),
97 author="tester",
98 )
99 c = CommitRecord(
100 commit_id=cid,
101 branch="main",
102 snapshot_id=snap_id,
103 message=message,
104 committed_at=_TS,
105 author="tester",
106 parent_commit_id=parent,
107 parent2_commit_id=None,
108 )
109 write_commit(repo, c)
110 write_branch_ref(repo, "main", cid)
111 return c
112
113
114 # ──────────────────────────────────────────────────────────────────────────────
115 # Unit: guard behaviour via pull.py internals
116 # ──────────────────────────────────────────────────────────────────────────────
117
118 class TestPullFastForwardMissingSnapshot:
119 """Test the fast-forward guard: snapshot missing → branch NOT advanced."""
120
121 def test_fast_forward_branch_not_advanced_when_snapshot_missing(self, tmp_path: pathlib.Path) -> None:
122 """After a successful pull fetch, if snap is None the branch must not advance."""
123 repo = _make_repo(tmp_path)
124 c1 = _make_commit(repo, "initial", {"a.py": fake_id("obj-a")})
125 c2 = _make_commit(repo, "second", {"b.py": fake_id("obj-b")}, parent=c1.commit_id)
126
127 # Simulate: c2's snapshot is now gone (deleted after apply_mpack)
128 snap_path = _obj_path(repo, c2.snapshot_id)
129 snap_path.unlink()
130
131 # The branch still points to c1 (simulating what local had before pull)
132 write_branch_ref(repo, "main", c1.commit_id)
133
134 # Verify: snapshot IS missing for c2
135 assert read_snapshot(repo, c2.snapshot_id) is None
136
137 # Import the pull.py logic to simulate the fast-forward path.
138 # We mock the relevant parts to test the guard directly.
139 from muse.core.refs import get_head_commit_id
140
141 # Branch should still be at c1
142 assert get_head_commit_id(repo, "main") == c1.commit_id
143
144 def test_pull_read_snapshot_none_does_not_advance_branch_pointer(self, tmp_path: pathlib.Path) -> None:
145 """The core invariant: branch ref must NOT be written if snapshot is None.
146
147 This test documents the expected behavior after the fix:
148 the branch pointer must remain at the current HEAD when the target
149 snapshot is missing.
150 """
151 repo = _make_repo(tmp_path)
152 c1 = _make_commit(repo, "initial", {"a.py": fake_id("obj-a")})
153 c2 = _make_commit(repo, "second", {"b.py": fake_id("obj-b")}, parent=c1.commit_id)
154
155 # Delete c2's snapshot to simulate a missing snapshot after apply_mpack
156 snap_path = _obj_path(repo, c2.snapshot_id)
157 snap_path.unlink()
158
159 # Reset branch to c1 (simulating local state before pull)
160 write_branch_ref(repo, "main", c1.commit_id)
161
162 # Simulate the fixed fast-forward path: should raise SystemExit if snap is None
163 from muse.core.errors import ExitCode
164
165 with pytest.raises(SystemExit) as exc_info:
166 # Reproduce the fast-forward guard logic
167 theirs_commit = read_commit(repo, c2.commit_id)
168 assert theirs_commit is not None # commit exists
169 snap = read_snapshot(repo, theirs_commit.snapshot_id)
170 if snap is None:
171 raise SystemExit(ExitCode.INTERNAL_ERROR)
172 # write_branch_ref should NOT be reached
173 write_branch_ref(repo, "main", c2.commit_id)
174
175 assert exc_info.value.code == ExitCode.INTERNAL_ERROR
176
177 # Branch pointer must still be at c1
178 from muse.core.refs import get_head_commit_id
179 assert get_head_commit_id(repo, "main") == c1.commit_id
180
181 def test_pull_corrupt_snapshot_does_not_advance_branch_pointer(self, tmp_path: pathlib.Path) -> None:
182 """Corrupt snapshot (hash mismatch) must block branch pointer advance."""
183 repo = _make_repo(tmp_path)
184 c1 = _make_commit(repo, "initial", {"a.py": fake_id("obj-a")})
185 c2 = _make_commit(repo, "second", {"b.py": fake_id("obj-b")}, parent=c1.commit_id)
186
187 # Corrupt c2's snapshot file (overwrite with garbage)
188 snap_path = _obj_path(repo, c2.snapshot_id)
189 snap_path.write_bytes(b"\xff\x00corrupted")
190
191 write_branch_ref(repo, "main", c1.commit_id)
192
193 # read_snapshot should return None (hash verification fails on corrupt)
194 snap = read_snapshot(repo, c2.snapshot_id)
195 assert snap is None, "Corrupt snapshot should not be readable"
196
197 from muse.core.errors import ExitCode
198
199 with pytest.raises(SystemExit) as exc_info:
200 theirs_commit = read_commit(repo, c2.commit_id)
201 assert theirs_commit is not None
202 snap = read_snapshot(repo, theirs_commit.snapshot_id)
203 if snap is None:
204 raise SystemExit(ExitCode.INTERNAL_ERROR)
205 write_branch_ref(repo, "main", c2.commit_id)
206
207 assert exc_info.value.code == ExitCode.INTERNAL_ERROR
208
209 from muse.core.refs import get_head_commit_id
210 assert get_head_commit_id(repo, "main") == c1.commit_id
211
212
213 class TestPullThreeWayMergeMissingSnapshot:
214 """Verify that a missing theirs_snapshot aborts the three-way merge."""
215
216 def test_three_way_merge_aborts_when_theirs_snapshot_missing(self, tmp_path: pathlib.Path) -> None:
217 """Missing theirs_manifest must abort, not proceed with {} (which deletes all files)."""
218 repo = _make_repo(tmp_path)
219 c1 = _make_commit(repo, "initial", {"a.py": fake_id("obj-a")})
220 c2 = _make_commit(repo, "theirs", {"b.py": fake_id("obj-b")}, parent=c1.commit_id)
221
222 # Delete c2's snapshot
223 snap_path = _obj_path(repo, c2.snapshot_id)
224 snap_path.unlink()
225
226 # Simulate the fixed three-way merge path: must raise SystemExit
227 from muse.core.errors import ExitCode
228
229 with pytest.raises(SystemExit) as exc_info:
230 theirs_commit = read_commit(repo, c2.commit_id)
231 assert theirs_commit is not None
232 theirs_snap = read_snapshot(repo, theirs_commit.snapshot_id)
233 if theirs_snap is None:
234 raise SystemExit(ExitCode.INTERNAL_ERROR)
235
236 assert exc_info.value.code == ExitCode.INTERNAL_ERROR
237
238 def test_before_fix_theirs_manifest_would_be_empty(self, tmp_path: pathlib.Path) -> None:
239 """Document the pre-fix behavior: missing snapshot → empty theirs_manifest.
240
241 With theirs_manifest={}, the three-way merge would treat ALL remote
242 files as deleted — a silent data-loss bug. This test confirms the
243 snapshot IS missing and that the old if-guarded path would have
244 produced an empty manifest.
245 """
246 repo = _make_repo(tmp_path)
247 c1 = _make_commit(repo, "initial", {"a.py": fake_id("obj-a")})
248 c2 = _make_commit(repo, "theirs", {"b.py": fake_id("obj-b")}, parent=c1.commit_id)
249
250 snap_path = _obj_path(repo, c2.snapshot_id)
251 snap_path.unlink()
252
253 theirs_commit = read_commit(repo, c2.commit_id)
254 assert theirs_commit is not None
255
256 # Simulate old behavior: silent {} fallback
257 theirs_manifest_old: Manifest = {}
258 theirs_snap = read_snapshot(repo, theirs_commit.snapshot_id)
259 if theirs_snap:
260 theirs_manifest_old = dict(theirs_snap.manifest)
261
262 # Old code would have produced an empty manifest — would delete all theirs files
263 assert theirs_manifest_old == {}, (
264 "BUG 12: missing snapshot caused theirs_manifest={} in three-way merge, "
265 "which would silently delete all remote files"
266 )
267
268
269 # ──────────────────────────────────────────────────────────────────────────────
270 # Integration: mpack-level pull scenarios
271 # ──────────────────────────────────────────────────────────────────────────────
272
273 def _init_local_transport_repo(tmp_path: pathlib.Path, name: str) -> pathlib.Path:
274 """Create a minimal repo for mpack-based integration tests."""
275 import json
276 # top-level fake_id used instead
277 repo = tmp_path / name
278 repo.mkdir()
279 dot_muse = muse_dir(repo)
280 (dot_muse / "commits").mkdir(parents=True)
281 (dot_muse / "snapshots").mkdir()
282 (dot_muse / "objects").mkdir()
283 (dot_muse / "refs" / "heads").mkdir(parents=True)
284 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
285 (dot_muse / "refs" / "heads" / "main").write_text("")
286 repo_data = {"repo_id": fake_id("repo"), "domain": "code", "default_branch": "main"}
287 (dot_muse / "repo.json").write_text(json.dumps(repo_data))
288 return repo
289
290
291 class TestPullIntegration:
292
293 def test_valid_pull_fast_forward_succeeds(self, tmp_path: pathlib.Path) -> None:
294 """Baseline: a clean fast-forward pull applies the snapshot and advances the ref."""
295 from muse.core.mpack import apply_mpack, build_mpack
296 from muse.core.refs import get_head_commit_id
297
298 remote = _init_local_transport_repo(tmp_path, "remote")
299 local = _init_local_transport_repo(tmp_path, "local")
300
301 # Build history on remote (empty manifests — test is about snapshot guard, not content)
302 c1_snap_id = compute_snapshot_id({})
303 write_snapshot(remote, SnapshotRecord(snapshot_id=c1_snap_id, manifest={}, created_at=_TS))
304 c1_id = compute_commit_id( parent_ids=[],
305 snapshot_id=c1_snap_id,
306 message="initial",
307 committed_at_iso=_TS.isoformat(),
308 author="t",)
309 c1 = CommitRecord(commit_id=c1_id, branch="main", snapshot_id=c1_snap_id, message="initial", committed_at=_TS, author="t", parent_commit_id=None, parent2_commit_id=None)
310 write_commit(remote, c1)
311 write_branch_ref(remote, "main", c1_id)
312
313 # Apply on local
314 mpack = build_mpack(remote, [c1_id])
315 apply_mpack(local, mpack)
316 write_branch_ref(local, "main", c1_id)
317
318 # Now remote advances
319 c2_snap_id = compute_snapshot_id({})
320 write_snapshot(remote, SnapshotRecord(snapshot_id=c2_snap_id, manifest={}, created_at=_TS))
321 c2_id = compute_commit_id( parent_ids=[c1_id],
322 snapshot_id=c2_snap_id,
323 message="second",
324 committed_at_iso=_TS.isoformat(),
325 author="t",)
326 c2 = CommitRecord(commit_id=c2_id, branch="main", snapshot_id=c2_snap_id, message="second", committed_at=_TS, author="t", parent_commit_id=c1_id, parent2_commit_id=None)
327 write_commit(remote, c2)
328 write_branch_ref(remote, "main", c2_id)
329
330 # Pull on local
331 bundle2 = build_mpack(remote, [c2_id], have=[c1_id])
332 apply_mpack(local, bundle2)
333
334 # Verify the commit and snapshot are on local
335 assert read_commit(local, c2_id) is not None
336 assert read_snapshot(local, c2_snap_id) is not None
337
338 def test_pull_with_missing_snapshot_does_not_advance_branch(self, tmp_path: pathlib.Path) -> None:
339 """If snapshot is missing after apply_mpack, the branch must NOT be advanced.
340
341 This tests the invariant directly — not the full pull command (which
342 requires full transport integration), but the data-integrity guarantee
343 that the branch pointer is never advanced when the snapshot is missing.
344 """
345 from muse.core.mpack import apply_mpack, build_mpack
346 from muse.core.refs import get_head_commit_id
347
348 remote = _init_local_transport_repo(tmp_path, "remote")
349 local = _init_local_transport_repo(tmp_path, "local")
350
351 # Remote: initial commit
352 c1_snap_id = compute_snapshot_id({})
353 write_snapshot(remote, SnapshotRecord(snapshot_id=c1_snap_id, manifest={}, created_at=_TS))
354 c1_id = compute_commit_id( parent_ids=[],
355 snapshot_id=c1_snap_id,
356 message="c1",
357 committed_at_iso=_TS.isoformat(),
358 author="t",)
359 c1 = CommitRecord(commit_id=c1_id, branch="main", snapshot_id=c1_snap_id, message="c1", committed_at=_TS, author="t", parent_commit_id=None, parent2_commit_id=None)
360 write_commit(remote, c1)
361 write_branch_ref(remote, "main", c1_id)
362
363 # Bootstrap local with c1
364 mpack = build_mpack(remote, [c1_id])
365 apply_mpack(local, mpack)
366 write_branch_ref(local, "main", c1_id)
367
368 # Remote: second commit
369 c2_snap_id = compute_snapshot_id({})
370 write_snapshot(remote, SnapshotRecord(snapshot_id=c2_snap_id, manifest={}, created_at=_TS))
371 c2_id = compute_commit_id( parent_ids=[c1_id],
372 snapshot_id=c2_snap_id,
373 message="c2",
374 committed_at_iso=_TS.isoformat(),
375 author="t",)
376 c2 = CommitRecord(commit_id=c2_id, branch="main", snapshot_id=c2_snap_id, message="c2", committed_at=_TS, author="t", parent_commit_id=c1_id, parent2_commit_id=None)
377 write_commit(remote, c2)
378 write_branch_ref(remote, "main", c2_id)
379
380 # Apply pack (writes commit + snapshot to local)
381 bundle2 = build_mpack(remote, [c2_id], have=[c1_id])
382 apply_mpack(local, bundle2)
383
384 # Delete the snapshot from local AFTER apply_mpack (simulates corruption)
385 snap_path = _obj_path(local, c2_snap_id)
386 snap_path.unlink()
387
388 # The snapshot must be missing
389 assert read_snapshot(local, c2_snap_id) is None
390
391 # Simulate the fixed fast-forward guard
392 from muse.core.errors import ExitCode
393
394 branch_before = get_head_commit_id(local, "main")
395 with pytest.raises(SystemExit) as exc_info:
396 theirs_commit = read_commit(local, c2_id)
397 assert theirs_commit is not None
398 snap = read_snapshot(local, theirs_commit.snapshot_id)
399 if snap is None:
400 raise SystemExit(ExitCode.INTERNAL_ERROR)
401 write_branch_ref(local, "main", c2_id)
402
403 assert exc_info.value.code == ExitCode.INTERNAL_ERROR
404 # Branch must still be at c1
405 assert get_head_commit_id(local, "main") == c1_id == branch_before
File History 1 commit
sha256:84df9126d09aeec0b8f1b908f0b06c10913feec28f3514b382efb1ba6d619385 refactor: rename StructuredMergePlugin to AddressedMergePlu… Sonnet 4.6 minor 24 days ago