gabriel / muse public
test_core_pack.py python
403 lines 15.3 KB
Raw
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
1 """Tests for muse.core.mpack — MPack build and apply operations."""
2
3 from __future__ import annotations
4
5 import datetime
6 import json
7 import pathlib
8
9 import pytest
10
11 from muse.core.object_store import has_object, read_object, write_object
12 from muse.core.mpack import (
13 ObjectPayload,
14 MPack,
15 apply_mpack,
16 build_mpack,
17 )
18 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
19
20 from muse.core.types import Manifest, NULL_LONG_ID, long_id
21 from muse.core.store import (
22 CommitRecord,
23 SnapshotRecord,
24 read_commit,
25 read_snapshot,
26 write_commit,
27 write_snapshot,
28 )
29 from muse.core.paths import commits_dir, objects_dir, snapshots_dir, muse_dir
30
31
32 # ---------------------------------------------------------------------------
33 # Fixtures
34 # ---------------------------------------------------------------------------
35
36
37 @pytest.fixture
38 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
39 """Minimal .muse/ repo structure."""
40 dot_muse = muse_dir(tmp_path)
41 (dot_muse / "commits").mkdir(parents=True)
42 (dot_muse / "snapshots").mkdir(parents=True)
43 (dot_muse / "objects").mkdir(parents=True)
44 (dot_muse / "refs" / "heads").mkdir(parents=True)
45 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
46 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
47 (dot_muse / "refs" / "heads" / "main").write_text("")
48 return tmp_path
49
50
51 def _make_object(root: pathlib.Path, content: bytes) -> str:
52 """Write raw bytes into the object store; return the object_id."""
53 from muse.core.types import blob_id
54 oid = blob_id(content)
55 write_object(root, oid, content)
56 return oid
57
58
59 def _make_snapshot(root: pathlib.Path, manifest: Manifest) -> str:
60 """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id."""
61 snap_id = compute_snapshot_id(manifest)
62 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
63 return snap_id
64
65
66 def _make_commit(
67 root: pathlib.Path,
68 snapshot_id: str,
69 message: str = "test",
70 parent: str | None = None,
71 ) -> str:
72 """Write a commit with a valid content-hash commit_id. Returns the commit_id."""
73 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
74 parent_ids = [parent] if parent else []
75 commit_id = compute_commit_id(
76 parent_ids=parent_ids,
77 snapshot_id=snapshot_id,
78 message=message,
79 committed_at_iso=committed_at.isoformat(),
80 )
81 c = CommitRecord(
82 commit_id=commit_id,
83 branch="main",
84 snapshot_id=snapshot_id,
85 message=message,
86 committed_at=committed_at,
87 parent_commit_id=parent,
88 )
89 write_commit(root, c)
90 return commit_id
91
92
93 # ---------------------------------------------------------------------------
94 # build_mpack tests
95 # ---------------------------------------------------------------------------
96
97
98 class TestBuildMPack:
99 def test_single_commit_no_history(self, repo: pathlib.Path) -> None:
100 content = b"hello world"
101 oid = _make_object(repo, content)
102 snap_id = _make_snapshot(repo, {"file.txt": oid})
103 c1_id = _make_commit(repo, snap_id)
104
105 mpack = build_mpack(repo, [c1_id])
106
107 assert len(mpack.get("commits") or []) == 1
108 assert len(mpack.get("snapshots") or []) == 1
109 assert len(mpack.get("objects") or []) == 1
110 assert (mpack.get("objects") or [{}])[0]["object_id"] == oid
111
112 def test_object_content_is_raw_bytes(self, repo: pathlib.Path) -> None:
113 content = b"\x00\x01\x02\x03"
114 oid = _make_object(repo, content)
115 snap_id = _make_snapshot(repo, {"bin.dat": oid})
116 c1_id = _make_commit(repo, snap_id)
117
118 mpack = build_mpack(repo, [c1_id])
119
120 objs = mpack.get("objects") or []
121 assert len(objs) == 1
122 assert objs[0]["content"] == content
123
124 def test_multi_commit_chain(self, repo: pathlib.Path) -> None:
125 oid1 = _make_object(repo, b"v1")
126 oid2 = _make_object(repo, b"v2")
127 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
128 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
129 c1_id = _make_commit(repo, snap1_id)
130 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
131
132 mpack = build_mpack(repo, [c2_id])
133
134 assert len(mpack.get("commits") or []) == 2
135 assert len(mpack.get("snapshots") or []) == 2
136 assert len(mpack.get("objects") or []) == 2
137
138 def test_have_excludes_ancestor_commits(self, repo: pathlib.Path) -> None:
139 oid1 = _make_object(repo, b"v1")
140 oid2 = _make_object(repo, b"v2")
141 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
142 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
143 c1_id = _make_commit(repo, snap1_id)
144 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
145
146 mpack = build_mpack(repo, [c2_id], have=[c1_id])
147
148 # Only c2 should be in the mpack; c1 is in have.
149 commit_ids = [c["commit_id"] for c in (mpack.get("commits") or [])]
150 assert c2_id in commit_ids
151 assert c1_id not in commit_ids
152
153 def test_deduplicates_shared_objects(self, repo: pathlib.Path) -> None:
154 shared_oid = _make_object(repo, b"shared")
155 snap1_id = _make_snapshot(repo, {"a.txt": shared_oid})
156 snap2_id = _make_snapshot(repo, {"b.txt": shared_oid})
157 c1_id = _make_commit(repo, snap1_id)
158 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
159
160 mpack = build_mpack(repo, [c2_id])
161
162 # Shared object should appear only once.
163 object_ids = [o["object_id"] for o in (mpack.get("objects") or [])]
164 assert object_ids.count(shared_oid) == 1
165
166 def test_empty_commit_ids_returns_empty_mpack(self, repo: pathlib.Path) -> None:
167 mpack = build_mpack(repo, [])
168 assert (mpack.get("commits") or []) == []
169 assert (mpack.get("objects") or []) == []
170
171 def test_missing_commit_skipped_gracefully(self, repo: pathlib.Path) -> None:
172 # Should not raise even if a commit_id does not exist.
173 mpack = build_mpack(repo, [NULL_LONG_ID])
174 assert (mpack.get("commits") or []) == []
175
176 def test_snapshot_always_included_for_every_commit(self, repo: pathlib.Path) -> None:
177 """Every commit in the mpack must have its snapshot included.
178
179 This is the data-integrity invariant that prevents the corruption
180 pattern where commits arrive on the remote without their snapshots,
181 making them permanently unreadable after a local .muse wipe.
182 """
183 oid = _make_object(repo, b"content")
184 snap_id = _make_snapshot(repo, {"a.txt": oid})
185 c_id = _make_commit(repo, snap_id)
186
187 mpack = build_mpack(repo, [c_id])
188
189 commit_snap_ids = {c["snapshot_id"] for c in (mpack.get("commits") or [])}
190 bundled_snap_ids = {s["snapshot_id"] for s in (mpack.get("snapshots") or [])}
191
192 assert commit_snap_ids == bundled_snap_ids, (
193 "Every commit's snapshot_id must appear in the mpack's snapshots list"
194 )
195
196 def test_missing_snapshot_raises_not_skips(self, repo: pathlib.Path) -> None:
197 """build_mpack must raise ValueError when a commit's snapshot is absent.
198
199 Silently skipping was the root cause of the recurring snapshot
200 corruption: commits reached the remote without their snapshots, and
201 subsequent pulls restored commits but not snapshots.
202 """
203 # Write commit record directly — no snapshot written
204 import datetime
205 from muse.core.ids import hash_commit as compute_commit_id
206 from muse.core.types import long_id as _long_id
207 snap_id = _long_id("ab" * 32) # valid prefixed ID, but no snapshot file exists
208 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
209 c_id = compute_commit_id( parent_ids=[],
210 snapshot_id=snap_id,
211 message="orphan",
212 committed_at_iso=committed_at.isoformat(),
213 )
214 write_commit(repo, CommitRecord(
215 commit_id=c_id, branch="main",
216 snapshot_id=snap_id, message="orphan", committed_at=committed_at,
217 ))
218
219 with pytest.raises(ValueError, match="Push aborted"):
220 build_mpack(repo, [c_id])
221
222 def test_merge_commit_includes_both_parents(self, repo: pathlib.Path) -> None:
223 oid_a = _make_object(repo, b"branch-a")
224 oid_b = _make_object(repo, b"branch-b")
225 snap_a_id = _make_snapshot(repo, {"a.txt": oid_a})
226 snap_b_id = _make_snapshot(repo, {"b.txt": oid_b})
227 snap_m_id = _make_snapshot(repo, {"a.txt": oid_a, "b.txt": oid_b})
228 c_a_id = _make_commit(repo, snap_a_id)
229 c_b_id = _make_commit(repo, snap_b_id)
230 # Merge commit with two parents — compute its ID from both parent hashes.
231 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
232 c_merge_id = compute_commit_id(
233 parent_ids=[c_a_id, c_b_id],
234 snapshot_id=snap_m_id,
235 message="merge",
236 committed_at_iso=committed_at.isoformat(),
237 )
238 c_merge = CommitRecord(
239 commit_id=c_merge_id,
240 branch="main",
241 snapshot_id=snap_m_id,
242 message="merge",
243 committed_at=committed_at,
244 parent_commit_id=c_a_id,
245 parent2_commit_id=c_b_id,
246 )
247 write_commit(repo, c_merge)
248
249 mpack = build_mpack(repo, [c_merge_id])
250 commit_ids = {c["commit_id"] for c in (mpack.get("commits") or [])}
251 assert {c_merge_id, c_a_id, c_b_id}.issubset(commit_ids)
252
253
254 # ---------------------------------------------------------------------------
255 # apply_mpack tests
256 # ---------------------------------------------------------------------------
257
258
259 class TestApplyMPack:
260 def test_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
261 """build_mpack → apply_mpack in a fresh repo produces identical data."""
262 content = b"round trip"
263 oid = _make_object(repo, content)
264 snap_id = _make_snapshot(repo, {"f.txt": oid})
265 c1_id = _make_commit(repo, snap_id, message="initial")
266
267 mpack = build_mpack(repo, [c1_id])
268
269 # Apply into a fresh repo.
270 dest = tmp_path / "dest"
271 dot_muse = muse_dir(dest)
272 (dot_muse / "commits").mkdir(parents=True)
273 (dot_muse / "snapshots").mkdir(parents=True)
274 (dot_muse / "objects").mkdir(parents=True)
275
276 result = apply_mpack(dest, mpack)
277
278 assert result["objects_written"] == 1
279 assert has_object(dest, oid)
280 assert read_object(dest, oid) == content
281 assert read_snapshot(dest, snap_id) is not None
282 assert read_commit(dest, c1_id) is not None
283
284 def test_idempotent_apply(self, repo: pathlib.Path) -> None:
285 """Applying the same mpack twice does not raise and new_count = 0."""
286 content = b"idempotent"
287 oid = _make_object(repo, content)
288 snap_id = _make_snapshot(repo, {"f.txt": oid})
289 c1_id = _make_commit(repo, snap_id)
290
291 mpack = build_mpack(repo, [c1_id])
292 apply_mpack(repo, mpack)
293 result = apply_mpack(repo, mpack)
294
295 assert result["objects_written"] == 0 # All already present.
296
297 def test_malformed_object_skipped(self, repo: pathlib.Path) -> None:
298 # content must be bytes; passing wrong type is caught gracefully
299 mpack: MPack = {
300 "commits": [],
301 "snapshots": [],
302 "objects": [ObjectPayload(object_id="abc123", content=b"")],
303 }
304 result = apply_mpack(repo, mpack)
305 assert result["objects_written"] == 0
306
307 def test_empty_mpack_is_noop(self, repo: pathlib.Path) -> None:
308 mpack: MPack = {}
309 result = apply_mpack(repo, mpack)
310 assert result["objects_written"] == 0
311
312 def test_apply_preserves_commit_metadata(
313 self, repo: pathlib.Path, tmp_path: pathlib.Path
314 ) -> None:
315 oid = _make_object(repo, b"data")
316 snap_id = _make_snapshot(repo, {"data.bin": oid})
317 c1_id = _make_commit(repo, snap_id, message="preserve me")
318
319 mpack = build_mpack(repo, [c1_id])
320
321 dest = tmp_path / "d"
322 (commits_dir(dest)).mkdir(parents=True)
323 (snapshots_dir(dest)).mkdir(parents=True)
324 (objects_dir(dest)).mkdir(parents=True)
325 apply_mpack(dest, mpack)
326
327 commit = read_commit(dest, c1_id)
328 assert commit is not None
329 assert commit.message == "preserve me"
330 assert commit.snapshot_id == snap_id
331
332 def test_apply_returns_new_object_count(
333 self, repo: pathlib.Path, tmp_path: pathlib.Path
334 ) -> None:
335 oid1 = _make_object(repo, b"obj1")
336 oid2 = _make_object(repo, b"obj2")
337 snap_id = _make_snapshot(repo, {"a": oid1, "b": oid2})
338 c1_id = _make_commit(repo, snap_id)
339
340 mpack = build_mpack(repo, [c1_id])
341 dest = tmp_path / "d"
342 (commits_dir(dest)).mkdir(parents=True)
343 (snapshots_dir(dest)).mkdir(parents=True)
344 (objects_dir(dest)).mkdir(parents=True)
345
346 result = apply_mpack(dest, mpack)
347 assert result["objects_written"] == 2
348
349 def test_apply_full_manifest_snapshot_from_server(
350 self, repo: pathlib.Path, tmp_path: pathlib.Path
351 ) -> None:
352 """apply_mpack must write snapshots sent in full-manifest format.
353
354 The server fetch response may include WireSnapshot dicts with a
355 ``manifest`` key (full content, no delta encoding).
356 _apply_snapshot_deltas only understands the ``delta_add``/``delta_remove``
357 format used by build_mpack. When it receives a full-manifest dict:
358
359 {"snapshot_id": "sha256:...", "manifest": {"f.txt": "sha256:..."},
360 "directories": [], "created_at": ""}
361
362 it finds delta_add={} and delta_remove=[], reconstructs base={},
363 computes sha256(empty) = "sha256:e3b0c44...", which mismatches the
364 real snapshot_id → snapshot skipped → pull aborted with
365 "snapshot referenced by commit" error.
366
367 The fix must handle both formats in _apply_snapshot_deltas:
368 - delta format: ``{snapshot_id, parent_snapshot_id, delta_add, delta_remove}``
369 - full format: ``{snapshot_id, manifest, directories, ...}``
370 """
371 oid = _make_object(repo, b"stream content")
372 snap_id = _make_snapshot(repo, {"stream.txt": oid})
373 c_id = _make_commit(repo, snap_id, message="stream commit")
374
375 # Simulate what _coerce_snapshot_dict produces from a full-manifest snapshot:
376 # a dict with 'manifest' key, NO 'delta_add' or 'delta_remove'.
377 full_manifest_snapshot = {
378 "snapshot_id": snap_id,
379 "manifest": {"stream.txt": oid},
380 "directories": [],
381 "created_at": "",
382 }
383 commit_dict = read_commit(repo, c_id)
384 assert commit_dict is not None
385
386 dest = tmp_path / "dest"
387 (commits_dir(dest)).mkdir(parents=True)
388 (snapshots_dir(dest)).mkdir(parents=True)
389 (objects_dir(dest)).mkdir(parents=True)
390 write_object(dest, oid, b"stream content") # object already present
391
392 mpack: MPack = {
393 "commits": [commit_dict.to_dict()],
394 "snapshots": [full_manifest_snapshot],
395 "objects": [],
396 }
397 result = apply_mpack(dest, mpack)
398
399 assert read_snapshot(dest, snap_id) is not None, (
400 "Snapshot with full manifest format was not written — "
401 "_apply_snapshot_deltas did not handle the 'manifest' key"
402 )
403 assert result["snapshots_written"] == 1
File History 2 commits
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago