gabriel / muse public

test_pack_missing_snapshot_integrity.py file-level

at sha256:f · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:b adding issues docs to bust staging mpack prebuild cache. · gabriel · Jun 20, 2026
1 """Tests for the missing-snapshot integrity invariant in pack building.
2
3 Root cause
4 ----------
5 ``build_pack_from_walk`` silently skips a snapshot when its file is absent,
6 but still includes the commit that references it in the pack mpack. The
7 remote then receives a commit record pointing to a snapshot_id it will never
8 have β€” a dangling reference that silently corrupts the remote's history.
9
10 Invariant being enforced
11 ------------------------
12 Every commit in a push mpack MUST have its snapshot present in the local
13 store. If any snapshot file is missing, ``build_pack_from_walk`` raises
14 ``ValueError`` ("Push aborted") rather than sending a commit with a dangling
15 snapshot reference. Behaviour:
16
17 * ``walk_commits`` detects missing snapshots and reports them in
18 ``missing_snapshots``; a WARNING is emitted for each.
19 * ``build_pack_from_walk`` raises ``ValueError`` if ``missing_snapshots``
20 is non-empty β€” no partial mpack is ever returned.
21
22 These tests drive the implementation in ``muse/core/pack.py``.
23 """
24
25 from __future__ import annotations
26
27 import datetime
28 import hashlib
29 import pathlib
30
31 import pytest
32
33 from muse.core.types import Manifest, blob_id
34 from muse.core.object_store import write_object
35
36 type _FileBytes = dict[str, bytes]
37 from muse.core.mpack import MPack as PackBundle, build_mpack_from_walk as build_pack_from_walk, walk_commits
38 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
39 from muse.core.commits import (
40 CommitRecord,
41 write_commit,
42 )
43 from muse.core.snapshots import (
44 SnapshotRecord,
45 write_snapshot,
46 )
47 from muse.core.paths import ref_path, muse_dir
48
49 # ---------------------------------------------------------------------------
50 # Helpers
51 # ---------------------------------------------------------------------------
52
53 _REPO_ID = "integrity-test"
54
55
56
57
58 def _init_repo(root: pathlib.Path) -> None:
59 import json as _json
60 dot_muse = muse_dir(root)
61 for d in ("commits", "snapshots", "objects", "refs/heads"):
62 (dot_muse / d).mkdir(parents=True, exist_ok=True)
63 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
64 (dot_muse / "repo.json").write_text(
65 _json.dumps({"repo_id": _REPO_ID, "domain": "code"}), encoding="utf-8"
66 )
67
68
69 def _make_commit(
70 root: pathlib.Path,
71 files: _FileBytes,
72 message: str,
73 parent_id: str | None = None,
74 branch: str = "main",
75 write_snap: bool = True,
76 ) -> CommitRecord:
77 """Create a commit, optionally skipping snapshot write to simulate corruption."""
78 manifest = {}
79 for path, content in files.items():
80 oid = blob_id(content)
81 write_object(root, oid, content)
82 manifest[path] = oid
83
84 snap_id = compute_snapshot_id(manifest)
85 now = datetime.datetime.now(datetime.timezone.utc)
86 commit_id = compute_commit_id(
87 parent_ids=[parent_id] if parent_id else [],
88 snapshot_id=snap_id,
89 message=message,
90 committed_at_iso=now.isoformat(),
91 )
92
93 if write_snap:
94 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
95
96 record = CommitRecord(
97 commit_id=commit_id,
98 branch=branch,
99 snapshot_id=snap_id,
100 message=message,
101 committed_at=now,
102 parent_commit_id=parent_id,
103 )
104 write_commit(root, record)
105 (ref_path(root, branch)).write_text(commit_id, encoding="utf-8")
106 return record
107
108
109 # ---------------------------------------------------------------------------
110 # I β€” walk_commits exposes missing_snapshots
111 # ---------------------------------------------------------------------------
112
113 class TestWalkCommitsMissingSnapshotDetection:
114 """walk_commits must report commits whose snapshot files are absent."""
115
116 def test_walk_commits_no_missing_snapshots_when_all_present(
117 self, tmp_path: pathlib.Path
118 ) -> None:
119 _init_repo(tmp_path)
120 c = _make_commit(tmp_path, {"a.py": b"x"}, "first", write_snap=True)
121 result = walk_commits(tmp_path, [c.commit_id])
122 assert not result["missing_snapshots"], (
123 "No snapshots are missing β€” missing_snapshots should be empty"
124 )
125
126 def test_walk_commits_detects_single_missing_snapshot(
127 self, tmp_path: pathlib.Path
128 ) -> None:
129 _init_repo(tmp_path)
130 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "first", write_snap=True)
131 # Second commit: snapshot file deliberately not written
132 c2 = _make_commit(tmp_path, {"a.py": b"v2"}, "second",
133 parent_id=c1.commit_id, write_snap=False)
134
135 result = walk_commits(tmp_path, [c2.commit_id])
136 assert c2.snapshot_id in result["missing_snapshots"], (
137 "walk_commits must expose the missing snapshot_id"
138 )
139
140 def test_walk_commits_detects_multiple_missing_snapshots_in_chain(
141 self, tmp_path: pathlib.Path
142 ) -> None:
143 _init_repo(tmp_path)
144 c1 = _make_commit(tmp_path, {"f.py": b"v1"}, "A", write_snap=True)
145 c2 = _make_commit(tmp_path, {"f.py": b"v2"}, "B",
146 parent_id=c1.commit_id, write_snap=False)
147 c3 = _make_commit(tmp_path, {"f.py": b"v3"}, "C",
148 parent_id=c2.commit_id, write_snap=False)
149 c4 = _make_commit(tmp_path, {"f.py": b"v4"}, "D",
150 parent_id=c3.commit_id, write_snap=True)
151
152 result = walk_commits(tmp_path, [c4.commit_id])
153 assert c2.snapshot_id in result["missing_snapshots"]
154 assert c3.snapshot_id in result["missing_snapshots"]
155 assert c1.snapshot_id not in result["missing_snapshots"]
156 assert c4.snapshot_id not in result["missing_snapshots"]
157
158 def test_walk_commits_missing_snapshots_not_in_have_are_excluded(
159 self, tmp_path: pathlib.Path
160 ) -> None:
161 """Commits in the have-set are never walked so their snapshots don't matter."""
162 _init_repo(tmp_path)
163 c1 = _make_commit(tmp_path, {"f.py": b"v1"}, "A", write_snap=False)
164 c2 = _make_commit(tmp_path, {"f.py": b"v2"}, "B",
165 parent_id=c1.commit_id, write_snap=True)
166
167 # c1 is in have β€” BFS stops before it; its missing snapshot is irrelevant.
168 result = walk_commits(tmp_path, [c2.commit_id], have=[c1.commit_id])
169 assert not result["missing_snapshots"], (
170 "Commits in have are not walked β€” their snapshots should not be flagged"
171 )
172
173
174 # ---------------------------------------------------------------------------
175 # II β€” build_pack_from_walk raises when missing snapshots are present
176 # ---------------------------------------------------------------------------
177
178 class TestBuildPackExcludesCommitsWithMissingSnapshot:
179 """build_pack_from_walk must raise ValueError when any snapshot is absent.
180
181 Silently skipping would push commits without their snapshots, creating
182 dangling references on the remote that can never be healed without
183 rewriting history. The strict raise forces the caller to either repair
184 the store (``muse verify``) or exclude the broken commits before pushing.
185 """
186
187 def test_pack_raises_when_snapshot_missing(
188 self, tmp_path: pathlib.Path
189 ) -> None:
190 _init_repo(tmp_path)
191 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "good", write_snap=True)
192 c2 = _make_commit(tmp_path, {"a.py": b"v2"}, "broken",
193 parent_id=c1.commit_id, write_snap=False)
194
195 walk = walk_commits(tmp_path, [c2.commit_id])
196 with pytest.raises(ValueError, match="Push aborted"):
197 build_pack_from_walk(tmp_path, walk)
198
199 def test_pack_includes_commit_when_snapshot_present(
200 self, tmp_path: pathlib.Path
201 ) -> None:
202 _init_repo(tmp_path)
203 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "good", write_snap=True)
204
205 walk = walk_commits(tmp_path, [c1.commit_id])
206 mpack = build_pack_from_walk(tmp_path, walk)
207
208 commit_ids_in_pack = {c["commit_id"] for c in mpack["commits"]}
209 assert c1.commit_id in commit_ids_in_pack
210
211 def test_pack_raises_when_any_snapshot_missing_in_chain(
212 self, tmp_path: pathlib.Path
213 ) -> None:
214 """A single missing snapshot in a chain aborts the entire pack."""
215 _init_repo(tmp_path)
216 c1 = _make_commit(tmp_path, {"f.py": b"v1"}, "A", write_snap=True)
217 c2 = _make_commit(tmp_path, {"f.py": b"v2"}, "B",
218 parent_id=c1.commit_id, write_snap=False)
219 c3 = _make_commit(tmp_path, {"f.py": b"v3"}, "C",
220 parent_id=c2.commit_id, write_snap=True)
221
222 walk = walk_commits(tmp_path, [c3.commit_id])
223 with pytest.raises(ValueError, match="Push aborted"):
224 build_pack_from_walk(tmp_path, walk)
225
226 def test_pack_bundle_snapshot_list_and_commit_list_are_consistent(
227 self, tmp_path: pathlib.Path
228 ) -> None:
229 """Every snapshot_id referenced by a commit in the mpack must be present
230 in mpack['snapshots'] β€” verified on a fully intact chain."""
231 _init_repo(tmp_path)
232 c1 = _make_commit(tmp_path, {"a.py": b"v1"}, "A", write_snap=True)
233 c2 = _make_commit(tmp_path, {"a.py": b"v2"}, "B",
234 parent_id=c1.commit_id, write_snap=True)
235 c3 = _make_commit(tmp_path, {"a.py": b"v3"}, "C",
236 parent_id=c2.commit_id, write_snap=True)
237
238 walk = walk_commits(tmp_path, [c3.commit_id])
239 mpack = build_pack_from_walk(tmp_path, walk)
240
241 snap_ids_in_bundle = {s["snapshot_id"] for s in mpack["snapshots"]}
242 for commit_dict in mpack["commits"]:
243 sid = commit_dict["snapshot_id"]
244 assert sid in snap_ids_in_bundle, (
245 f"Commit {commit_dict['commit_id'][:8]} references snapshot "
246 f"{sid[:8]} which is not in the mpack β€” dangling reference"
247 )
248
249 def test_no_warning_when_all_snapshots_present(
250 self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
251 ) -> None:
252 _init_repo(tmp_path)
253 c = _make_commit(tmp_path, {"x.py": b"ok"}, "clean", write_snap=True)
254 import logging
255 with caplog.at_level(logging.WARNING, logger="muse.core.mpack"):
256 walk = walk_commits(tmp_path, [c.commit_id])
257 build_pack_from_walk(tmp_path, walk)
258 assert "not found" not in caplog.text
259
260 def test_warning_emitted_when_snapshot_missing(
261 self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
262 ) -> None:
263 _init_repo(tmp_path)
264 c = _make_commit(tmp_path, {"x.py": b"broken"}, "oops", write_snap=False)
265 import logging
266 with caplog.at_level(logging.WARNING, logger="muse.core.mpack"):
267 walk = walk_commits(tmp_path, [c.commit_id])
268 with pytest.raises(ValueError, match="Push aborted"):
269 build_pack_from_walk(tmp_path, walk)
270 assert c.snapshot_id[:8] in caplog.text
271
272
273 # ---------------------------------------------------------------------------
274 # III β€” regression: the real muse repo's 3 broken commits
275 # ---------------------------------------------------------------------------
276
277 class TestMissingSnapshotRegressionInvariant:
278 """Verify the invariant holds end-to-end: every reachable commit in a repo
279 that we attempt to push must have its snapshot present β€” build_pack_from_walk
280 raises ValueError rather than sending a commit with a dangling snapshot ref."""
281
282 def test_pack_aborts_on_chain_with_gaps(
283 self, tmp_path: pathlib.Path
284 ) -> None:
285 """A chain with missing snapshots raises ValueError, not a partial mpack."""
286 _init_repo(tmp_path)
287 # Build: A(good) β†’ B(broken) β†’ C(broken) β†’ D(good)
288 c_a = _make_commit(tmp_path, {"f": b"a"}, "A", write_snap=True)
289 c_b = _make_commit(tmp_path, {"f": b"b"}, "B",
290 parent_id=c_a.commit_id, write_snap=False)
291 c_c = _make_commit(tmp_path, {"f": b"c"}, "C",
292 parent_id=c_b.commit_id, write_snap=False)
293 c_d = _make_commit(tmp_path, {"f": b"d"}, "D",
294 parent_id=c_c.commit_id, write_snap=True)
295
296 walk = walk_commits(tmp_path, [c_d.commit_id])
297 with pytest.raises(ValueError, match="Push aborted"):
298 build_pack_from_walk(tmp_path, walk)
299
300 def test_reachable_commits_with_missing_snapshots_are_reported(
301 self, tmp_path: pathlib.Path
302 ) -> None:
303 """walk_commits must expose all missing snapshot_ids so callers can
304 surface the issue before attempting a push."""
305 _init_repo(tmp_path)
306 c1 = _make_commit(tmp_path, {"f": b"1"}, "root", write_snap=True)
307 c2 = _make_commit(tmp_path, {"f": b"2"}, "broken-1",
308 parent_id=c1.commit_id, write_snap=False)
309 c3 = _make_commit(tmp_path, {"f": b"3"}, "broken-2",
310 parent_id=c2.commit_id, write_snap=False)
311 c4 = _make_commit(tmp_path, {"f": b"4"}, "broken-3",
312 parent_id=c3.commit_id, write_snap=False)
313 c5 = _make_commit(tmp_path, {"f": b"5"}, "good",
314 parent_id=c4.commit_id, write_snap=True)
315
316 result = walk_commits(tmp_path, [c5.commit_id])
317 missing = result["missing_snapshots"]
318 assert c2.snapshot_id in missing
319 assert c3.snapshot_id in missing
320 assert c4.snapshot_id in missing
321 assert c1.snapshot_id not in missing
322 assert c5.snapshot_id not in missing