gabriel / muse public
test_mpack_snapshot_integrity.py python
321 lines 13.2 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Tests for the missing-snapshot integrity invariant in pack building.
2
3 Root cause
4 ----------
5 ``build_mpack_from_walk`` silently skips a snapshot when its file is absent,
6 but still includes the commit that references it in the pack mpack. The
7 remote then receives a commit record pointing to a snapshot_id it will never
8 have — a dangling reference that silently corrupts the remote's history.
9
10 Invariant being enforced
11 ------------------------
12 Every commit in a push mpack MUST have its snapshot present in the local
13 store. If any snapshot file is missing, ``build_mpack_from_walk`` raises
14 ``ValueError`` ("Push aborted") rather than sending a commit with a dangling
15 snapshot reference. Behaviour:
16
17 * ``walk_commits`` detects missing snapshots and reports them in
18 ``missing_snapshots``; a WARNING is emitted for each.
19 * ``build_mpack_from_walk`` raises ``ValueError`` if ``missing_snapshots``
20 is non-empty — no partial mpack is ever returned.
21
22 These tests drive the implementation in ``muse/core/pack.py``.
23 """
24
25 from __future__ import annotations
26
27 import datetime
28 import pathlib
29
30 import pytest
31
32 from muse.core.types import Manifest, blob_id
33 from muse.core.object_store import write_object
34
35 type _FileBytes = dict[str, bytes]
36 from muse.core.mpack import MPack, build_mpack_from_walk, walk_commits
37 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
38 from muse.core.commits import (
39 CommitRecord,
40 write_commit,
41 )
42 from muse.core.snapshots import (
43 SnapshotRecord,
44 write_snapshot,
45 )
46 from muse.core.paths import ref_path, muse_dir
47
48 # ---------------------------------------------------------------------------
49 # Helpers
50 # ---------------------------------------------------------------------------
51
52 _REPO_ID = "integrity-test"
53
54
55
56
57 def _init_repo(root: pathlib.Path) -> None:
58 import json as _json
59 dot_muse = muse_dir(root)
60 for d in ("commits", "snapshots", "objects", "refs/heads"):
61 (dot_muse / d).mkdir(parents=True, exist_ok=True)
62 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
63 (dot_muse / "repo.json").write_text(
64 _json.dumps({"repo_id": _REPO_ID, "domain": "code"}), encoding="utf-8"
65 )
66
67
68 def _make_commit(
69 root: pathlib.Path,
70 files: _FileBytes,
71 message: str,
72 parent_id: str | None = None,
73 branch: str = "main",
74 write_snap: bool = True,
75 ) -> CommitRecord:
76 """Create a commit, optionally skipping snapshot write to simulate corruption."""
77 manifest = {}
78 for path, content in files.items():
79 oid = blob_id(content)
80 write_object(root, oid, content)
81 manifest[path] = oid
82
83 snap_id = compute_snapshot_id(manifest)
84 now = datetime.datetime.now(datetime.timezone.utc)
85 commit_id = compute_commit_id(
86 parent_ids=[parent_id] if parent_id else [],
87 snapshot_id=snap_id,
88 message=message,
89 committed_at_iso=now.isoformat(),
90 )
91
92 if write_snap:
93 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
94
95 record = CommitRecord(
96 commit_id=commit_id,
97 branch=branch,
98 snapshot_id=snap_id,
99 message=message,
100 committed_at=now,
101 parent_commit_id=parent_id,
102 )
103 write_commit(root, record)
104 (ref_path(root, branch)).write_text(commit_id, encoding="utf-8")
105 return record
106
107
108 # ---------------------------------------------------------------------------
109 # I — walk_commits exposes missing_snapshots
110 # ---------------------------------------------------------------------------
111
112 class TestWalkCommitsMissingSnapshotDetection:
113 """walk_commits must report commits whose snapshot files are absent."""
114
115 def test_walk_commits_no_missing_snapshots_when_all_present(
116 self, tmp_path: pathlib.Path
117 ) -> None:
118 _init_repo(tmp_path)
119 c = _make_commit(tmp_path, {"a.py": b"x"}, "first", write_snap=True)
120 result = walk_commits(tmp_path, [c.commit_id])
121 assert not result["missing_snapshots"], (
122 "No snapshots are missing — missing_snapshots should be empty"
123 )
124
125 def test_walk_commits_detects_single_missing_snapshot(
126 self, tmp_path: pathlib.Path
127 ) -> None:
128 _init_repo(tmp_path)
129 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "first", write_snap=True)
130 # Second commit: snapshot file deliberately not written
131 c2 = _make_commit(tmp_path, {"a.py": b"v2"}, "second",
132 parent_id=c1.commit_id, write_snap=False)
133
134 result = walk_commits(tmp_path, [c2.commit_id])
135 assert c2.snapshot_id in result["missing_snapshots"], (
136 "walk_commits must expose the missing snapshot_id"
137 )
138
139 def test_walk_commits_detects_multiple_missing_snapshots_in_chain(
140 self, tmp_path: pathlib.Path
141 ) -> None:
142 _init_repo(tmp_path)
143 c1 = _make_commit(tmp_path, {"f.py": b"v1"}, "A", write_snap=True)
144 c2 = _make_commit(tmp_path, {"f.py": b"v2"}, "B",
145 parent_id=c1.commit_id, write_snap=False)
146 c3 = _make_commit(tmp_path, {"f.py": b"v3"}, "C",
147 parent_id=c2.commit_id, write_snap=False)
148 c4 = _make_commit(tmp_path, {"f.py": b"v4"}, "D",
149 parent_id=c3.commit_id, write_snap=True)
150
151 result = walk_commits(tmp_path, [c4.commit_id])
152 assert c2.snapshot_id in result["missing_snapshots"]
153 assert c3.snapshot_id in result["missing_snapshots"]
154 assert c1.snapshot_id not in result["missing_snapshots"]
155 assert c4.snapshot_id not in result["missing_snapshots"]
156
157 def test_walk_commits_missing_snapshots_not_in_have_are_excluded(
158 self, tmp_path: pathlib.Path
159 ) -> None:
160 """Commits in the have-set are never walked so their snapshots don't matter."""
161 _init_repo(tmp_path)
162 c1 = _make_commit(tmp_path, {"f.py": b"v1"}, "A", write_snap=False)
163 c2 = _make_commit(tmp_path, {"f.py": b"v2"}, "B",
164 parent_id=c1.commit_id, write_snap=True)
165
166 # c1 is in have — BFS stops before it; its missing snapshot is irrelevant.
167 result = walk_commits(tmp_path, [c2.commit_id], have=[c1.commit_id])
168 assert not result["missing_snapshots"], (
169 "Commits in have are not walked — their snapshots should not be flagged"
170 )
171
172
173 # ---------------------------------------------------------------------------
174 # II — build_mpack_from_walk raises when missing snapshots are present
175 # ---------------------------------------------------------------------------
176
177 class TestBuildPackExcludesCommitsWithMissingSnapshot:
178 """build_mpack_from_walk must raise ValueError when any snapshot is absent.
179
180 Silently skipping would push commits without their snapshots, creating
181 dangling references on the remote that can never be healed without
182 rewriting history. The strict raise forces the caller to either repair
183 the store (``muse verify``) or exclude the broken commits before pushing.
184 """
185
186 def test_pack_raises_when_snapshot_missing(
187 self, tmp_path: pathlib.Path
188 ) -> None:
189 _init_repo(tmp_path)
190 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "good", write_snap=True)
191 c2 = _make_commit(tmp_path, {"a.py": b"v2"}, "broken",
192 parent_id=c1.commit_id, write_snap=False)
193
194 walk = walk_commits(tmp_path, [c2.commit_id])
195 with pytest.raises(ValueError, match="Push aborted"):
196 build_mpack_from_walk(tmp_path, walk)
197
198 def test_pack_includes_commit_when_snapshot_present(
199 self, tmp_path: pathlib.Path
200 ) -> None:
201 _init_repo(tmp_path)
202 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "good", write_snap=True)
203
204 walk = walk_commits(tmp_path, [c1.commit_id])
205 mpack = build_mpack_from_walk(tmp_path, walk)
206
207 commit_ids_in_pack = {c["commit_id"] for c in mpack["commits"]}
208 assert c1.commit_id in commit_ids_in_pack
209
210 def test_pack_raises_when_any_snapshot_missing_in_chain(
211 self, tmp_path: pathlib.Path
212 ) -> None:
213 """A single missing snapshot in a chain aborts the entire pack."""
214 _init_repo(tmp_path)
215 c1 = _make_commit(tmp_path, {"f.py": b"v1"}, "A", write_snap=True)
216 c2 = _make_commit(tmp_path, {"f.py": b"v2"}, "B",
217 parent_id=c1.commit_id, write_snap=False)
218 c3 = _make_commit(tmp_path, {"f.py": b"v3"}, "C",
219 parent_id=c2.commit_id, write_snap=True)
220
221 walk = walk_commits(tmp_path, [c3.commit_id])
222 with pytest.raises(ValueError, match="Push aborted"):
223 build_mpack_from_walk(tmp_path, walk)
224
225 def test_pack_bundle_snapshot_list_and_commit_list_are_consistent(
226 self, tmp_path: pathlib.Path
227 ) -> None:
228 """Every snapshot_id referenced by a commit in the mpack must be present
229 in mpack['snapshots'] — verified on a fully intact chain."""
230 _init_repo(tmp_path)
231 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "A", write_snap=True)
232 c2 = _make_commit(tmp_path, {"a.py": b"v2"}, "B",
233 parent_id=c1.commit_id, write_snap=True)
234 c3 = _make_commit(tmp_path, {"a.py": b"v3"}, "C",
235 parent_id=c2.commit_id, write_snap=True)
236
237 walk = walk_commits(tmp_path, [c3.commit_id])
238 mpack = build_mpack_from_walk(tmp_path, walk)
239
240 snap_ids_in_bundle = {s["snapshot_id"] for s in mpack["snapshots"]}
241 for commit_dict in mpack["commits"]:
242 sid = commit_dict["snapshot_id"]
243 assert sid in snap_ids_in_bundle, (
244 f"Commit {commit_dict['commit_id'][:8]} references snapshot "
245 f"{sid[:8]} which is not in the mpack — dangling reference"
246 )
247
248 def test_no_warning_when_all_snapshots_present(
249 self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
250 ) -> None:
251 _init_repo(tmp_path)
252 c = _make_commit(tmp_path, {"x.py": b"ok"}, "clean", write_snap=True)
253 import logging
254 with caplog.at_level(logging.WARNING, logger="muse.core.mpack"):
255 walk = walk_commits(tmp_path, [c.commit_id])
256 build_mpack_from_walk(tmp_path, walk)
257 assert "not found" not in caplog.text
258
259 def test_warning_emitted_when_snapshot_missing(
260 self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
261 ) -> None:
262 _init_repo(tmp_path)
263 c = _make_commit(tmp_path, {"x.py": b"broken"}, "oops", write_snap=False)
264 import logging
265 with caplog.at_level(logging.WARNING, logger="muse.core.mpack"):
266 walk = walk_commits(tmp_path, [c.commit_id])
267 with pytest.raises(ValueError, match="Push aborted"):
268 build_mpack_from_walk(tmp_path, walk)
269 assert c.snapshot_id[:8] in caplog.text
270
271
272 # ---------------------------------------------------------------------------
273 # III — regression: the real muse repo's 3 broken commits
274 # ---------------------------------------------------------------------------
275
276 class TestMissingSnapshotRegressionInvariant:
277 """Verify the invariant holds end-to-end: every reachable commit in a repo
278 that we attempt to push must have its snapshot present — build_mpack_from_walk
279 raises ValueError rather than sending a commit with a dangling snapshot ref."""
280
281 def test_pack_aborts_on_chain_with_gaps(
282 self, tmp_path: pathlib.Path
283 ) -> None:
284 """A chain with missing snapshots raises ValueError, not a partial mpack."""
285 _init_repo(tmp_path)
286 # Build: A(good) → B(broken) → C(broken) → D(good)
287 c_a = _make_commit(tmp_path, {"f": b"a"}, "A", write_snap=True)
288 c_b = _make_commit(tmp_path, {"f": b"b"}, "B",
289 parent_id=c_a.commit_id, write_snap=False)
290 c_c = _make_commit(tmp_path, {"f": b"c"}, "C",
291 parent_id=c_b.commit_id, write_snap=False)
292 c_d = _make_commit(tmp_path, {"f": b"d"}, "D",
293 parent_id=c_c.commit_id, write_snap=True)
294
295 walk = walk_commits(tmp_path, [c_d.commit_id])
296 with pytest.raises(ValueError, match="Push aborted"):
297 build_mpack_from_walk(tmp_path, walk)
298
299 def test_reachable_commits_with_missing_snapshots_are_reported(
300 self, tmp_path: pathlib.Path
301 ) -> None:
302 """walk_commits must expose all missing snapshot_ids so callers can
303 surface the issue before attempting a push."""
304 _init_repo(tmp_path)
305 c1 = _make_commit(tmp_path, {"f": b"1"}, "root", write_snap=True)
306 c2 = _make_commit(tmp_path, {"f": b"2"}, "broken-1",
307 parent_id=c1.commit_id, write_snap=False)
308 c3 = _make_commit(tmp_path, {"f": b"3"}, "broken-2",
309 parent_id=c2.commit_id, write_snap=False)
310 c4 = _make_commit(tmp_path, {"f": b"4"}, "broken-3",
311 parent_id=c3.commit_id, write_snap=False)
312 c5 = _make_commit(tmp_path, {"f": b"5"}, "good",
313 parent_id=c4.commit_id, write_snap=True)
314
315 result = walk_commits(tmp_path, [c5.commit_id])
316 missing = result["missing_snapshots"]
317 assert c2.snapshot_id in missing
318 assert c3.snapshot_id in missing
319 assert c4.snapshot_id in missing
320 assert c1.snapshot_id not in missing
321 assert c5.snapshot_id not in missing
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago