gabriel / muse public
test_core_pack.py python
405 lines 15.3 KB
Raw
1 """Tests for muse.core.mpack — MPack build and apply operations."""
2
3 from __future__ import annotations
4
5 import datetime
6 import json
7 import pathlib
8
9 import pytest
10
11 from muse.core.object_store import has_object, read_object, write_object
12 from muse.core.mpack import (
13 BlobPayload,
14 MPack,
15 apply_mpack,
16 build_mpack,
17 )
18 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
19
20 from muse.core.types import Manifest, NULL_LONG_ID, long_id
21 from muse.core.commits import (
22 CommitRecord,
23 read_commit,
24 write_commit,
25 )
26 from muse.core.snapshots import (
27 SnapshotRecord,
28 read_snapshot,
29 write_snapshot,
30 )
31 from muse.core.paths import commits_dir, objects_dir, snapshots_dir, muse_dir
32
33
34 # ---------------------------------------------------------------------------
35 # Fixtures
36 # ---------------------------------------------------------------------------
37
38
39 @pytest.fixture
40 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
41 """Minimal .muse/ repo structure."""
42 dot_muse = muse_dir(tmp_path)
43 (dot_muse / "commits").mkdir(parents=True)
44 (dot_muse / "snapshots").mkdir(parents=True)
45 (dot_muse / "objects").mkdir(parents=True)
46 (dot_muse / "refs" / "heads").mkdir(parents=True)
47 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
48 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
49 (dot_muse / "refs" / "heads" / "main").write_text("")
50 return tmp_path
51
52
53 def _make_object(root: pathlib.Path, content: bytes) -> str:
54 """Write raw bytes into the object store; return the object_id."""
55 from muse.core.types import blob_id
56 oid = blob_id(content)
57 write_object(root, oid, content)
58 return oid
59
60
61 def _make_snapshot(root: pathlib.Path, manifest: Manifest) -> str:
62 """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id."""
63 snap_id = compute_snapshot_id(manifest)
64 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
65 return snap_id
66
67
68 def _make_commit(
69 root: pathlib.Path,
70 snapshot_id: str,
71 message: str = "test",
72 parent: str | None = None,
73 ) -> str:
74 """Write a commit with a valid content-hash commit_id. Returns the commit_id."""
75 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
76 parent_ids = [parent] if parent else []
77 commit_id = compute_commit_id(
78 parent_ids=parent_ids,
79 snapshot_id=snapshot_id,
80 message=message,
81 committed_at_iso=committed_at.isoformat(),
82 )
83 c = CommitRecord(
84 commit_id=commit_id,
85 branch="main",
86 snapshot_id=snapshot_id,
87 message=message,
88 committed_at=committed_at,
89 parent_commit_id=parent,
90 )
91 write_commit(root, c)
92 return commit_id
93
94
95 # ---------------------------------------------------------------------------
96 # build_mpack tests
97 # ---------------------------------------------------------------------------
98
99
100 class TestBuildMPack:
101 def test_single_commit_no_history(self, repo: pathlib.Path) -> None:
102 content = b"hello world"
103 oid = _make_object(repo, content)
104 snap_id = _make_snapshot(repo, {"file.txt": oid})
105 c1_id = _make_commit(repo, snap_id)
106
107 mpack = build_mpack(repo, [c1_id])
108
109 assert len(mpack.get("commits") or []) == 1
110 assert len(mpack.get("snapshots") or []) == 1
111 assert len(mpack.get("blobs") or []) == 1
112 assert (mpack.get("blobs") or [{}])[0]["object_id"] == oid
113
114 def test_object_content_is_raw_bytes(self, repo: pathlib.Path) -> None:
115 content = b"\x00\x01\x02\x03"
116 oid = _make_object(repo, content)
117 snap_id = _make_snapshot(repo, {"bin.dat": oid})
118 c1_id = _make_commit(repo, snap_id)
119
120 mpack = build_mpack(repo, [c1_id])
121
122 objs = mpack.get("blobs") or []
123 assert len(objs) == 1
124 assert objs[0]["content"] == content
125
126 def test_multi_commit_chain(self, repo: pathlib.Path) -> None:
127 oid1 = _make_object(repo, b"v1")
128 oid2 = _make_object(repo, b"v2")
129 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
130 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
131 c1_id = _make_commit(repo, snap1_id)
132 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
133
134 mpack = build_mpack(repo, [c2_id])
135
136 assert len(mpack.get("commits") or []) == 2
137 assert len(mpack.get("snapshots") or []) == 2
138 assert len(mpack.get("blobs") or []) == 2
139
140 def test_have_excludes_ancestor_commits(self, repo: pathlib.Path) -> None:
141 oid1 = _make_object(repo, b"v1")
142 oid2 = _make_object(repo, b"v2")
143 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
144 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
145 c1_id = _make_commit(repo, snap1_id)
146 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
147
148 mpack = build_mpack(repo, [c2_id], have=[c1_id])
149
150 # Only c2 should be in the mpack; c1 is in have.
151 commit_ids = [c["commit_id"] for c in (mpack.get("commits") or [])]
152 assert c2_id in commit_ids
153 assert c1_id not in commit_ids
154
155 def test_deduplicates_shared_objects(self, repo: pathlib.Path) -> None:
156 shared_oid = _make_object(repo, b"shared")
157 snap1_id = _make_snapshot(repo, {"a.txt": shared_oid})
158 snap2_id = _make_snapshot(repo, {"b.txt": shared_oid})
159 c1_id = _make_commit(repo, snap1_id)
160 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
161
162 mpack = build_mpack(repo, [c2_id])
163
164 # Shared object should appear only once.
165 object_ids = [o["object_id"] for o in (mpack.get("blobs") or [])]
166 assert object_ids.count(shared_oid) == 1
167
168 def test_empty_commit_ids_returns_empty_mpack(self, repo: pathlib.Path) -> None:
169 mpack = build_mpack(repo, [])
170 assert (mpack.get("commits") or []) == []
171 assert (mpack.get("blobs") or []) == []
172
173 def test_missing_commit_skipped_gracefully(self, repo: pathlib.Path) -> None:
174 # Should not raise even if a commit_id does not exist.
175 mpack = build_mpack(repo, [NULL_LONG_ID])
176 assert (mpack.get("commits") or []) == []
177
178 def test_snapshot_always_included_for_every_commit(self, repo: pathlib.Path) -> None:
179 """Every commit in the mpack must have its snapshot included.
180
181 This is the data-integrity invariant that prevents the corruption
182 pattern where commits arrive on the remote without their snapshots,
183 making them permanently unreadable after a local .muse wipe.
184 """
185 oid = _make_object(repo, b"content")
186 snap_id = _make_snapshot(repo, {"a.txt": oid})
187 c_id = _make_commit(repo, snap_id)
188
189 mpack = build_mpack(repo, [c_id])
190
191 commit_snap_ids = {c["snapshot_id"] for c in (mpack.get("commits") or [])}
192 bundled_snap_ids = {s["snapshot_id"] for s in (mpack.get("snapshots") or [])}
193
194 assert commit_snap_ids == bundled_snap_ids, (
195 "Every commit's snapshot_id must appear in the mpack's snapshots list"
196 )
197
198 def test_missing_snapshot_raises_not_skips(self, repo: pathlib.Path) -> None:
199 """build_mpack must raise ValueError when a commit's snapshot is absent.
200
201 Silently skipping was the root cause of the recurring snapshot
202 corruption: commits reached the remote without their snapshots, and
203 subsequent pulls restored commits but not snapshots.
204 """
205 # Write commit record directly — no snapshot written
206 import datetime
207 from muse.core.ids import hash_commit as compute_commit_id
208 from muse.core.types import long_id as _long_id
209 snap_id = _long_id("ab" * 32) # valid prefixed ID, but no snapshot file exists
210 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
211 c_id = compute_commit_id( parent_ids=[],
212 snapshot_id=snap_id,
213 message="orphan",
214 committed_at_iso=committed_at.isoformat(),
215 )
216 write_commit(repo, CommitRecord(
217 commit_id=c_id, branch="main",
218 snapshot_id=snap_id, message="orphan", committed_at=committed_at,
219 ))
220
221 with pytest.raises(ValueError, match="Push aborted"):
222 build_mpack(repo, [c_id])
223
224 def test_merge_commit_includes_both_parents(self, repo: pathlib.Path) -> None:
225 oid_a = _make_object(repo, b"branch-a")
226 oid_b = _make_object(repo, b"branch-b")
227 snap_a_id = _make_snapshot(repo, {"a.txt": oid_a})
228 snap_b_id = _make_snapshot(repo, {"b.txt": oid_b})
229 snap_m_id = _make_snapshot(repo, {"a.txt": oid_a, "b.txt": oid_b})
230 c_a_id = _make_commit(repo, snap_a_id)
231 c_b_id = _make_commit(repo, snap_b_id)
232 # Merge commit with two parents — compute its ID from both parent hashes.
233 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
234 c_merge_id = compute_commit_id(
235 parent_ids=[c_a_id, c_b_id],
236 snapshot_id=snap_m_id,
237 message="merge",
238 committed_at_iso=committed_at.isoformat(),
239 )
240 c_merge = CommitRecord(
241 commit_id=c_merge_id,
242 branch="main",
243 snapshot_id=snap_m_id,
244 message="merge",
245 committed_at=committed_at,
246 parent_commit_id=c_a_id,
247 parent2_commit_id=c_b_id,
248 )
249 write_commit(repo, c_merge)
250
251 mpack = build_mpack(repo, [c_merge_id])
252 commit_ids = {c["commit_id"] for c in (mpack.get("commits") or [])}
253 assert {c_merge_id, c_a_id, c_b_id}.issubset(commit_ids)
254
255
256 # ---------------------------------------------------------------------------
257 # apply_mpack tests
258 # ---------------------------------------------------------------------------
259
260
261 class TestApplyMPack:
262 def test_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
263 """build_mpack → apply_mpack in a fresh repo produces identical data."""
264 content = b"round trip"
265 oid = _make_object(repo, content)
266 snap_id = _make_snapshot(repo, {"f.txt": oid})
267 c1_id = _make_commit(repo, snap_id, message="initial")
268
269 mpack = build_mpack(repo, [c1_id])
270
271 # Apply into a fresh repo.
272 dest = tmp_path / "dest"
273 dot_muse = muse_dir(dest)
274 (dot_muse / "commits").mkdir(parents=True)
275 (dot_muse / "snapshots").mkdir(parents=True)
276 (dot_muse / "objects").mkdir(parents=True)
277
278 result = apply_mpack(dest, mpack)
279
280 assert result["blobs_written"] == 1
281 assert has_object(dest, oid)
282 assert read_object(dest, oid) == content
283 assert read_snapshot(dest, snap_id) is not None
284 assert read_commit(dest, c1_id) is not None
285
286 def test_idempotent_apply(self, repo: pathlib.Path) -> None:
287 """Applying the same mpack twice does not raise and new_count = 0."""
288 content = b"idempotent"
289 oid = _make_object(repo, content)
290 snap_id = _make_snapshot(repo, {"f.txt": oid})
291 c1_id = _make_commit(repo, snap_id)
292
293 mpack = build_mpack(repo, [c1_id])
294 apply_mpack(repo, mpack)
295 result = apply_mpack(repo, mpack)
296
297 assert result["blobs_written"] == 0 # All already present.
298
299 def test_malformed_object_skipped(self, repo: pathlib.Path) -> None:
300 # content must be bytes; passing wrong type is caught gracefully
301 mpack: MPack = {
302 "commits": [],
303 "snapshots": [],
304 "blobs": [BlobPayload(object_id="abc123", content=b"")],
305 }
306 result = apply_mpack(repo, mpack)
307 assert result["blobs_written"] == 0
308
309 def test_empty_mpack_is_noop(self, repo: pathlib.Path) -> None:
310 mpack: MPack = {}
311 result = apply_mpack(repo, mpack)
312 assert result["blobs_written"] == 0
313
314 def test_apply_preserves_commit_metadata(
315 self, repo: pathlib.Path, tmp_path: pathlib.Path
316 ) -> None:
317 oid = _make_object(repo, b"data")
318 snap_id = _make_snapshot(repo, {"data.bin": oid})
319 c1_id = _make_commit(repo, snap_id, message="preserve me")
320
321 mpack = build_mpack(repo, [c1_id])
322
323 dest = tmp_path / "d"
324 (commits_dir(dest)).mkdir(parents=True)
325 (snapshots_dir(dest)).mkdir(parents=True)
326 (objects_dir(dest)).mkdir(parents=True)
327 apply_mpack(dest, mpack)
328
329 commit = read_commit(dest, c1_id)
330 assert commit is not None
331 assert commit.message == "preserve me"
332 assert commit.snapshot_id == snap_id
333
334 def test_apply_returns_new_object_count(
335 self, repo: pathlib.Path, tmp_path: pathlib.Path
336 ) -> None:
337 oid1 = _make_object(repo, b"obj1")
338 oid2 = _make_object(repo, b"obj2")
339 snap_id = _make_snapshot(repo, {"a": oid1, "b": oid2})
340 c1_id = _make_commit(repo, snap_id)
341
342 mpack = build_mpack(repo, [c1_id])
343 dest = tmp_path / "d"
344 (commits_dir(dest)).mkdir(parents=True)
345 (snapshots_dir(dest)).mkdir(parents=True)
346 (objects_dir(dest)).mkdir(parents=True)
347
348 result = apply_mpack(dest, mpack)
349 assert result["blobs_written"] == 2
350
351 def test_apply_full_manifest_snapshot_from_server(
352 self, repo: pathlib.Path, tmp_path: pathlib.Path
353 ) -> None:
354 """apply_mpack must write snapshots sent in full-manifest format.
355
356 The server fetch response may include WireSnapshot dicts with a
357 ``manifest`` key (full content, no delta encoding).
358 _apply_snapshot_deltas only understands the ``delta_upsert``/``delta_remove``
359 format used by build_mpack. When it receives a full-manifest dict:
360
361 {"snapshot_id": "sha256:...", "manifest": {"f.txt": "sha256:..."},
362 "directories": [], "created_at": ""}
363
364 it finds delta_upsert={} and delta_remove=[], reconstructs base={},
365 computes sha256(empty) = "sha256:e3b0c44...", which mismatches the
366 real snapshot_id → snapshot skipped → pull aborted with
367 "snapshot referenced by commit" error.
368
369 The fix must handle both formats in _apply_snapshot_deltas:
370 - delta format: ``{snapshot_id, parent_snapshot_id, delta_upsert, delta_remove}``
371 - full format: ``{snapshot_id, manifest, directories, ...}``
372 """
373 oid = _make_object(repo, b"stream content")
374 snap_id = _make_snapshot(repo, {"stream.txt": oid})
375 c_id = _make_commit(repo, snap_id, message="stream commit")
376
377 # Simulate what _coerce_snapshot_dict produces from a full-manifest snapshot:
378 # a dict with 'manifest' key, NO 'delta_upsert' or 'delta_remove'.
379 full_manifest_snapshot = {
380 "snapshot_id": snap_id,
381 "manifest": {"stream.txt": oid},
382 "directories": [],
383 "created_at": "",
384 }
385 commit_dict = read_commit(repo, c_id)
386 assert commit_dict is not None
387
388 dest = tmp_path / "dest"
389 (commits_dir(dest)).mkdir(parents=True)
390 (snapshots_dir(dest)).mkdir(parents=True)
391 (objects_dir(dest)).mkdir(parents=True)
392 write_object(dest, oid, b"stream content") # object already present
393
394 mpack: MPack = {
395 "commits": [commit_dict.to_dict()],
396 "snapshots": [full_manifest_snapshot],
397 "blobs": [],
398 }
399 result = apply_mpack(dest, mpack)
400
401 assert read_snapshot(dest, snap_id) is not None, (
402 "Snapshot with full manifest format was not written — "
403 "_apply_snapshot_deltas did not handle the 'manifest' key"
404 )
405 assert result["snapshots_written"] == 1
File History 1 commit