gabriel / muse public
test_mpack_core.py python
478 lines 18.0 KB
Raw
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045 fix tests Human minor ⚠ breaking 4 days ago
1 """Tests for muse.core.mpack — MPack build and apply operations."""
2
3 from __future__ import annotations
4
5 import datetime
6 import json
7 import pathlib
8
9 import pytest
10
11 from muse.core.object_store import has_object, read_object, write_object
12 from muse.core.mpack import (
13 BlobPayload,
14 MPack,
15 SnapshotDeltaDict,
16 apply_mpack,
17 build_mpack,
18 )
19 from muse.core.commits import CommitDict
20 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
21
22 from muse.core.types import Manifest, blob_id, fake_id
23 from muse.core.commits import (
24 CommitRecord,
25 read_commit,
26 write_commit,
27 )
28 from muse.core.snapshots import (
29 SnapshotRecord,
30 read_snapshot,
31 write_snapshot,
32 )
33 from muse.core.paths import commits_dir, objects_dir, snapshots_dir, muse_dir
34
35
36 # ---------------------------------------------------------------------------
37 # Fixtures
38 # ---------------------------------------------------------------------------
39
40
41 @pytest.fixture
42 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
43 """Minimal .muse/ repo structure."""
44 dot_muse = muse_dir(tmp_path)
45 (dot_muse / "commits").mkdir(parents=True)
46 (dot_muse / "snapshots").mkdir(parents=True)
47 (dot_muse / "objects").mkdir(parents=True)
48 (dot_muse / "refs" / "heads").mkdir(parents=True)
49 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
50 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
51 (dot_muse / "refs" / "heads" / "main").write_text("")
52 return tmp_path
53
54
55 def _make_object(root: pathlib.Path, content: bytes) -> str:
56 """Write raw bytes into the object store; return the object_id."""
57 oid = blob_id(content)
58 write_object(root, oid, content)
59 return oid
60
61
62 def _make_snapshot(root: pathlib.Path, manifest: Manifest) -> str:
63 """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id."""
64 snap_id = compute_snapshot_id(manifest)
65 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
66 return snap_id
67
68
69 def _make_commit(
70 root: pathlib.Path,
71 snapshot_id: str,
72 message: str = "test",
73 parent: str | None = None,
74 ) -> str:
75 """Write a commit with a valid content-hash commit_id. Returns the commit_id."""
76 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
77 parent_ids = [parent] if parent else []
78 commit_id = compute_commit_id(
79 parent_ids=parent_ids,
80 snapshot_id=snapshot_id,
81 message=message,
82 committed_at_iso=committed_at.isoformat(),
83 )
84 c = CommitRecord(
85 commit_id=commit_id,
86 branch="main",
87 snapshot_id=snapshot_id,
88 message=message,
89 committed_at=committed_at,
90 parent_commit_id=parent,
91 )
92 write_commit(root, c)
93 return commit_id
94
95
96 # ---------------------------------------------------------------------------
97 # build_mpack tests
98 # ---------------------------------------------------------------------------
99
100
101 class TestBuildPack:
102 def test_single_commit_no_history(self, repo: pathlib.Path) -> None:
103 content = b"hello world"
104 oid = _make_object(repo, content)
105 snap_id = _make_snapshot(repo, {"file.txt": oid})
106 c1_id = _make_commit(repo, snap_id)
107
108 mpack = build_mpack(repo, [c1_id])
109
110 assert len(mpack.get("commits") or []) == 1
111 assert len(mpack.get("snapshots") or []) == 1
112 assert len(mpack.get("blobs") or []) == 1
113 assert (mpack.get("blobs") or [{}])[0]["object_id"] == oid
114
115 def test_object_content_is_raw_bytes(self, repo: pathlib.Path) -> None:
116 content = b"\x00\x01\x02\x03"
117 oid = _make_object(repo, content)
118 snap_id = _make_snapshot(repo, {"bin.dat": oid})
119 c1_id = _make_commit(repo, snap_id)
120
121 mpack = build_mpack(repo, [c1_id])
122
123 objs = mpack.get("blobs") or []
124 assert len(objs) == 1
125 assert objs[0]["content"] == content
126
127 def test_multi_commit_chain(self, repo: pathlib.Path) -> None:
128 oid1 = _make_object(repo, b"v1")
129 oid2 = _make_object(repo, b"v2")
130 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
131 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
132 c1_id = _make_commit(repo, snap1_id)
133 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
134
135 mpack = build_mpack(repo, [c2_id])
136
137 assert len(mpack.get("commits") or []) == 2
138 assert len(mpack.get("snapshots") or []) == 2
139 assert len(mpack.get("blobs") or []) == 2
140
141 def test_have_excludes_ancestor_commits(self, repo: pathlib.Path) -> None:
142 oid1 = _make_object(repo, b"v1")
143 oid2 = _make_object(repo, b"v2")
144 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
145 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
146 c1_id = _make_commit(repo, snap1_id)
147 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
148
149 mpack = build_mpack(repo, [c2_id], have=[c1_id])
150
151 # Only c2 should be in the mpack; c1 is in have.
152 commit_ids = [c["commit_id"] for c in (mpack.get("commits") or [])]
153 assert c2_id in commit_ids
154 assert c1_id not in commit_ids
155
156 def test_deduplicates_shared_objects(self, repo: pathlib.Path) -> None:
157 shared_oid = _make_object(repo, b"shared")
158 snap1_id = _make_snapshot(repo, {"a.txt": shared_oid})
159 snap2_id = _make_snapshot(repo, {"b.txt": shared_oid})
160 c1_id = _make_commit(repo, snap1_id)
161 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
162
163 mpack = build_mpack(repo, [c2_id])
164
165 # Shared object should appear only once.
166 object_ids = [o["object_id"] for o in (mpack.get("blobs") or [])]
167 assert object_ids.count(shared_oid) == 1
168
169 def test_empty_commit_ids_returns_empty_bundle(self, repo: pathlib.Path) -> None:
170 mpack = build_mpack(repo, [])
171 assert (mpack.get("commits") or []) == []
172 assert (mpack.get("blobs") or []) == []
173
174 def test_missing_commit_skipped_gracefully(self, repo: pathlib.Path) -> None:
175 # Should not raise even if a commit_id does not exist.
176 mpack = build_mpack(repo, [fake_id("nonexistent")])
177 assert (mpack.get("commits") or []) == []
178
179 def test_snapshot_always_included_for_every_commit(self, repo: pathlib.Path) -> None:
180 """Every commit in the pack must have its snapshot included.
181
182 This is the data-integrity invariant that prevents the corruption
183 pattern where commits arrive on the remote without their snapshots,
184 making them permanently unreadable after a local .muse wipe.
185 """
186 oid = _make_object(repo, b"content")
187 snap_id = _make_snapshot(repo, {"a.txt": oid})
188 c_id = _make_commit(repo, snap_id)
189
190 mpack = build_mpack(repo, [c_id])
191
192 commit_snap_ids = {c["snapshot_id"] for c in (mpack.get("commits") or [])}
193 bundled_snap_ids = {s["snapshot_id"] for s in (mpack.get("snapshots") or [])}
194
195 assert commit_snap_ids == bundled_snap_ids, (
196 "Every commit's snapshot_id must appear in the mpack's snapshots list"
197 )
198
199 def test_missing_snapshot_raises_not_skips(self, repo: pathlib.Path) -> None:
200 """build_mpack must raise ValueError when a commit's snapshot is absent.
201
202 Silently skipping was the root cause of the recurring snapshot
203 corruption: commits reached the remote without their snapshots, and
204 subsequent pulls restored commits but not snapshots.
205 """
206 # Write commit record directly — no snapshot written
207 import datetime
208 from muse.core.ids import hash_commit as compute_commit_id
209 snap_id = fake_id("ab-missing-snapshot") # valid prefixed ID, but no snapshot file exists
210 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
211 c_id = compute_commit_id(
212 parent_ids=[],
213 snapshot_id=snap_id,
214 message="orphan",
215 committed_at_iso=committed_at.isoformat(),
216 )
217 write_commit(repo, CommitRecord(
218 commit_id=c_id, branch="main",
219 snapshot_id=snap_id, message="orphan", committed_at=committed_at,
220 ))
221
222 with pytest.raises(ValueError, match="Push aborted"):
223 build_mpack(repo, [c_id])
224
225 def test_merge_commit_includes_both_parents(self, repo: pathlib.Path) -> None:
226 oid_a = _make_object(repo, b"branch-a")
227 oid_b = _make_object(repo, b"branch-b")
228 snap_a_id = _make_snapshot(repo, {"a.txt": oid_a})
229 snap_b_id = _make_snapshot(repo, {"b.txt": oid_b})
230 snap_m_id = _make_snapshot(repo, {"a.txt": oid_a, "b.txt": oid_b})
231 c_a_id = _make_commit(repo, snap_a_id)
232 c_b_id = _make_commit(repo, snap_b_id)
233 # Merge commit with two parents — compute its ID from both parent hashes.
234 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
235 c_merge_id = compute_commit_id(
236 parent_ids=[c_a_id, c_b_id],
237 snapshot_id=snap_m_id,
238 message="merge",
239 committed_at_iso=committed_at.isoformat(),
240 )
241 c_merge = CommitRecord(
242 commit_id=c_merge_id,
243 branch="main",
244 snapshot_id=snap_m_id,
245 message="merge",
246 committed_at=committed_at,
247 parent_commit_id=c_a_id,
248 parent2_commit_id=c_b_id,
249 )
250 write_commit(repo, c_merge)
251
252 mpack = build_mpack(repo, [c_merge_id])
253 commit_ids = {c["commit_id"] for c in (mpack.get("commits") or [])}
254 assert {c_merge_id, c_a_id, c_b_id}.issubset(commit_ids)
255
256
257 # ---------------------------------------------------------------------------
258 # apply_mpack tests
259 # ---------------------------------------------------------------------------
260
261
262 class TestApplyPack:
263 def test_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
264 """build_mpack → apply_mpack in a fresh repo produces identical data."""
265 content = b"round trip"
266 oid = _make_object(repo, content)
267 snap_id = _make_snapshot(repo, {"f.txt": oid})
268 c1_id = _make_commit(repo, snap_id, message="initial")
269
270 mpack = build_mpack(repo, [c1_id])
271
272 # Apply into a fresh repo.
273 dest = tmp_path / "dest"
274 dot_muse = muse_dir(dest)
275 (dot_muse / "commits").mkdir(parents=True)
276 (dot_muse / "snapshots").mkdir(parents=True)
277 (dot_muse / "objects").mkdir(parents=True)
278
279 result = apply_mpack(dest, mpack)
280
281 assert result["blobs_written"] ==1
282 assert has_object(dest, oid)
283 assert read_object(dest, oid) == content
284 assert read_snapshot(dest, snap_id) is not None
285 assert read_commit(dest, c1_id) is not None
286
287 def test_idempotent_apply(self, repo: pathlib.Path) -> None:
288 """Applying the same mpack twice does not raise and new_count = 0."""
289 content = b"idempotent"
290 oid = _make_object(repo, content)
291 snap_id = _make_snapshot(repo, {"f.txt": oid})
292 c1_id = _make_commit(repo, snap_id)
293
294 mpack = build_mpack(repo, [c1_id])
295 apply_mpack(repo, mpack)
296 result = apply_mpack(repo, mpack)
297
298 assert result["blobs_written"] ==0 # All already present.
299
300 def test_malformed_object_skipped(self, repo: pathlib.Path) -> None:
301 # content must be bytes; passing wrong type is caught gracefully
302 mpack: MPack = {
303 "commits": [],
304 "snapshots": [],
305 "blobs": [BlobPayload(object_id="abc123", content=b"")],
306 }
307 result = apply_mpack(repo, mpack)
308 assert result["blobs_written"] ==0
309
310 def test_empty_bundle_is_noop(self, repo: pathlib.Path) -> None:
311 mpack: MPack = {}
312 result = apply_mpack(repo, mpack)
313 assert result["blobs_written"] ==0
314
315 def test_apply_preserves_commit_metadata(
316 self, repo: pathlib.Path, tmp_path: pathlib.Path
317 ) -> None:
318 oid = _make_object(repo, b"data")
319 snap_id = _make_snapshot(repo, {"data.bin": oid})
320 c1_id = _make_commit(repo, snap_id, message="preserve me")
321
322 mpack = build_mpack(repo, [c1_id])
323
324 dest = tmp_path / "d"
325 (commits_dir(dest)).mkdir(parents=True)
326 (snapshots_dir(dest)).mkdir(parents=True)
327 (objects_dir(dest)).mkdir(parents=True)
328 apply_mpack(dest, mpack)
329
330 commit = read_commit(dest, c1_id)
331 assert commit is not None
332 assert commit.message == "preserve me"
333 assert commit.snapshot_id == snap_id
334
335 def test_apply_returns_new_object_count(
336 self, repo: pathlib.Path, tmp_path: pathlib.Path
337 ) -> None:
338 oid1 = _make_object(repo, b"obj1")
339 oid2 = _make_object(repo, b"obj2")
340 snap_id = _make_snapshot(repo, {"a": oid1, "b": oid2})
341 c1_id = _make_commit(repo, snap_id)
342
343 mpack = build_mpack(repo, [c1_id])
344 dest = tmp_path / "d"
345 (commits_dir(dest)).mkdir(parents=True)
346 (snapshots_dir(dest)).mkdir(parents=True)
347 (objects_dir(dest)).mkdir(parents=True)
348
349 result = apply_mpack(dest, mpack)
350 assert result["blobs_written"] ==2
351
352
353 # ---------------------------------------------------------------------------
354 # Commit-without-snapshot guard — regression for the "snaps=0" pull bug.
355 #
356 # When the server returns a commit but omits its snapshot (e.g. snaps=0 due to
357 # a wire_fetch bug), apply_mpack must NOT write the commit to the local store.
358 # Writing the commit without its snapshot leaves the store in an inconsistent
359 # state: on the next pull the commit is in `have`, the server returns nothing
360 # new, and pull aborts with "snapshot missing" forever.
361 # ---------------------------------------------------------------------------
362
363
364 def _wire_snap(manifest: Manifest) -> SnapshotDeltaDict:
365 """Build a full-manifest wire snapshot dict (no delta chain)."""
366 snap_id = compute_snapshot_id(manifest)
367 return SnapshotDeltaDict(
368 snapshot_id=snap_id,
369 parent_snapshot_id=None,
370 delta_upsert=manifest,
371 delta_remove=[],
372 )
373
374
375 def _wire_commit(snap_id: str, message: str = "c", parent: str | None = None) -> CommitDict:
376 """Build a minimal wire commit dict whose commit_id matches hash_commit exactly."""
377 import datetime
378 from muse.core.ids import hash_commit
379 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
380 parent_ids = [parent] if parent else []
381 author = "gabriel"
382 commit_id = hash_commit(
383 parent_ids=parent_ids,
384 snapshot_id=snap_id,
385 message=message,
386 committed_at_iso=committed_at.isoformat(),
387 author=author,
388 signer_public_key="",
389 )
390 return {
391 "commit_id": commit_id,
392 "branch": "main",
393 "snapshot_id": snap_id,
394 "message": message,
395 "committed_at": committed_at.isoformat(),
396 "parent_commit_id": parent,
397 "parent2_commit_id": None,
398 "author": author,
399 "agent_id": "",
400 "model_id": "",
401 "toolchain_id": "",
402 }
403
404
405 class TestApplyMpackMissingSnapshotGuard:
406 """apply_mpack must not write a commit whose snapshot is absent from both
407 the mpack and the local store (snaps=0 wire_fetch bug regression)."""
408
409 def test_commit_not_written_when_snapshot_absent_from_mpack(
410 self, repo: pathlib.Path
411 ) -> None:
412 """Core regression: commit arrives in mpack but its snapshot does not.
413 The commit must NOT be written to the local store."""
414 oid = blob_id(b"some content")
415 snap_id = compute_snapshot_id({"f.txt": oid})
416 wire_c = _wire_commit(snap_id)
417
418 # mpack has the commit but zero snapshots — simulates snaps=0
419 result = apply_mpack(repo, {"commits": [wire_c], "snapshots": [], "blobs": []})
420
421 assert result["commits_written"] == 0, (
422 "commit must not be written when its snapshot is absent from the mpack"
423 )
424 assert read_commit(repo, wire_c["commit_id"]) is None, (
425 "commit must not be present in the local store"
426 )
427
428 def test_next_pull_can_succeed_after_refused_commit(
429 self, repo: pathlib.Path
430 ) -> None:
431 """After the guard refuses the commit, a second apply with the snapshot
432 included must succeed and write both commit and snapshot."""
433 oid = blob_id(b"content")
434 manifest = {"f.txt": oid}
435 snap_id = compute_snapshot_id(manifest)
436 wire_c = _wire_commit(snap_id)
437 wire_s = _wire_snap(manifest)
438
439 # First apply: snapshot missing → commit refused
440 r1 = apply_mpack(repo, {"commits": [wire_c], "snapshots": [], "blobs": []})
441 assert r1["commits_written"] == 0
442
443 # Second apply: snapshot included → commit written
444 r2 = apply_mpack(repo, {"commits": [wire_c], "snapshots": [wire_s], "blobs": []})
445 assert r2["commits_written"] == 1
446 assert read_commit(repo, wire_c["commit_id"]) is not None
447 assert read_snapshot(repo, snap_id) is not None
448
449 def test_commit_written_when_snapshot_already_in_local_store(
450 self, repo: pathlib.Path
451 ) -> None:
452 """If the snapshot is already in the local store (from a prior fetch),
453 the commit must be written even if the mpack has zero snapshots."""
454 oid = _make_object(repo, b"pre-existing content")
455 snap_id = _make_snapshot(repo, {"f.txt": oid}) # already in local store
456 wire_c = _wire_commit(snap_id)
457
458 result = apply_mpack(repo, {"commits": [wire_c], "snapshots": [], "blobs": []})
459
460 assert result["commits_written"] == 1, (
461 "commit must be written when its snapshot is already in the local store"
462 )
463 assert read_commit(repo, wire_c["commit_id"]) is not None
464
465 def test_commit_written_when_snapshot_in_same_mpack(
466 self, repo: pathlib.Path
467 ) -> None:
468 """Happy path: both commit and snapshot in mpack → commit is written."""
469 oid = blob_id(b"happy path")
470 manifest = {"g.txt": oid}
471 snap_id = compute_snapshot_id(manifest)
472 wire_c = _wire_commit(snap_id)
473 wire_s = _wire_snap(manifest)
474
475 result = apply_mpack(repo, {"commits": [wire_c], "snapshots": [wire_s], "blobs": []})
476
477 assert result["commits_written"] == 1
478 assert read_commit(repo, wire_c["commit_id"]) is not None
File History 3 commits
sha256:2c59968e5fd34f1740180d630338fddfb8c465b71e150a0965f11dbdcba5dec7 fix: apply_mpack refuses commits when their snapshot is abs… Sonnet 4.6 patch 4 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 11 days ago