gabriel / muse public
test_mpack_core.py python
348 lines 12.8 KB
Raw
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045 fix tests Human minor ⚠ breaking 15 days ago
1 """Tests for muse.core.mpack — MPack build and apply operations."""
2
3 from __future__ import annotations
4
5 import datetime
6 import json
7 import pathlib
8
9 import pytest
10
11 from muse.core.object_store import has_object, read_object, write_object
12 from muse.core.mpack import (
13 BlobPayload,
14 MPack,
15 apply_mpack,
16 build_mpack,
17 )
18 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
19
20 from muse.core.types import Manifest, blob_id, fake_id
21 from muse.core.commits import (
22 CommitRecord,
23 read_commit,
24 write_commit,
25 )
26 from muse.core.snapshots import (
27 SnapshotRecord,
28 read_snapshot,
29 write_snapshot,
30 )
31 from muse.core.paths import commits_dir, objects_dir, snapshots_dir, muse_dir
32
33
34 # ---------------------------------------------------------------------------
35 # Fixtures
36 # ---------------------------------------------------------------------------
37
38
39 @pytest.fixture
40 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
41 """Minimal .muse/ repo structure."""
42 dot_muse = muse_dir(tmp_path)
43 (dot_muse / "commits").mkdir(parents=True)
44 (dot_muse / "snapshots").mkdir(parents=True)
45 (dot_muse / "objects").mkdir(parents=True)
46 (dot_muse / "refs" / "heads").mkdir(parents=True)
47 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
48 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
49 (dot_muse / "refs" / "heads" / "main").write_text("")
50 return tmp_path
51
52
53 def _make_object(root: pathlib.Path, content: bytes) -> str:
54 """Write raw bytes into the object store; return the object_id."""
55 oid = blob_id(content)
56 write_object(root, oid, content)
57 return oid
58
59
60 def _make_snapshot(root: pathlib.Path, manifest: Manifest) -> str:
61 """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id."""
62 snap_id = compute_snapshot_id(manifest)
63 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
64 return snap_id
65
66
67 def _make_commit(
68 root: pathlib.Path,
69 snapshot_id: str,
70 message: str = "test",
71 parent: str | None = None,
72 ) -> str:
73 """Write a commit with a valid content-hash commit_id. Returns the commit_id."""
74 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
75 parent_ids = [parent] if parent else []
76 commit_id = compute_commit_id(
77 parent_ids=parent_ids,
78 snapshot_id=snapshot_id,
79 message=message,
80 committed_at_iso=committed_at.isoformat(),
81 )
82 c = CommitRecord(
83 commit_id=commit_id,
84 branch="main",
85 snapshot_id=snapshot_id,
86 message=message,
87 committed_at=committed_at,
88 parent_commit_id=parent,
89 )
90 write_commit(root, c)
91 return commit_id
92
93
94 # ---------------------------------------------------------------------------
95 # build_mpack tests
96 # ---------------------------------------------------------------------------
97
98
99 class TestBuildPack:
100 def test_single_commit_no_history(self, repo: pathlib.Path) -> None:
101 content = b"hello world"
102 oid = _make_object(repo, content)
103 snap_id = _make_snapshot(repo, {"file.txt": oid})
104 c1_id = _make_commit(repo, snap_id)
105
106 mpack = build_mpack(repo, [c1_id])
107
108 assert len(mpack.get("commits") or []) == 1
109 assert len(mpack.get("snapshots") or []) == 1
110 assert len(mpack.get("blobs") or []) == 1
111 assert (mpack.get("blobs") or [{}])[0]["object_id"] == oid
112
113 def test_object_content_is_raw_bytes(self, repo: pathlib.Path) -> None:
114 content = b"\x00\x01\x02\x03"
115 oid = _make_object(repo, content)
116 snap_id = _make_snapshot(repo, {"bin.dat": oid})
117 c1_id = _make_commit(repo, snap_id)
118
119 mpack = build_mpack(repo, [c1_id])
120
121 objs = mpack.get("blobs") or []
122 assert len(objs) == 1
123 assert objs[0]["content"] == content
124
125 def test_multi_commit_chain(self, repo: pathlib.Path) -> None:
126 oid1 = _make_object(repo, b"v1")
127 oid2 = _make_object(repo, b"v2")
128 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
129 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
130 c1_id = _make_commit(repo, snap1_id)
131 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
132
133 mpack = build_mpack(repo, [c2_id])
134
135 assert len(mpack.get("commits") or []) == 2
136 assert len(mpack.get("snapshots") or []) == 2
137 assert len(mpack.get("blobs") or []) == 2
138
139 def test_have_excludes_ancestor_commits(self, repo: pathlib.Path) -> None:
140 oid1 = _make_object(repo, b"v1")
141 oid2 = _make_object(repo, b"v2")
142 snap1_id = _make_snapshot(repo, {"f.txt": oid1})
143 snap2_id = _make_snapshot(repo, {"f.txt": oid2})
144 c1_id = _make_commit(repo, snap1_id)
145 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
146
147 mpack = build_mpack(repo, [c2_id], have=[c1_id])
148
149 # Only c2 should be in the mpack; c1 is in have.
150 commit_ids = [c["commit_id"] for c in (mpack.get("commits") or [])]
151 assert c2_id in commit_ids
152 assert c1_id not in commit_ids
153
154 def test_deduplicates_shared_objects(self, repo: pathlib.Path) -> None:
155 shared_oid = _make_object(repo, b"shared")
156 snap1_id = _make_snapshot(repo, {"a.txt": shared_oid})
157 snap2_id = _make_snapshot(repo, {"b.txt": shared_oid})
158 c1_id = _make_commit(repo, snap1_id)
159 c2_id = _make_commit(repo, snap2_id, parent=c1_id)
160
161 mpack = build_mpack(repo, [c2_id])
162
163 # Shared object should appear only once.
164 object_ids = [o["object_id"] for o in (mpack.get("blobs") or [])]
165 assert object_ids.count(shared_oid) == 1
166
167 def test_empty_commit_ids_returns_empty_bundle(self, repo: pathlib.Path) -> None:
168 mpack = build_mpack(repo, [])
169 assert (mpack.get("commits") or []) == []
170 assert (mpack.get("blobs") or []) == []
171
172 def test_missing_commit_skipped_gracefully(self, repo: pathlib.Path) -> None:
173 # Should not raise even if a commit_id does not exist.
174 mpack = build_mpack(repo, [fake_id("nonexistent")])
175 assert (mpack.get("commits") or []) == []
176
177 def test_snapshot_always_included_for_every_commit(self, repo: pathlib.Path) -> None:
178 """Every commit in the pack must have its snapshot included.
179
180 This is the data-integrity invariant that prevents the corruption
181 pattern where commits arrive on the remote without their snapshots,
182 making them permanently unreadable after a local .muse wipe.
183 """
184 oid = _make_object(repo, b"content")
185 snap_id = _make_snapshot(repo, {"a.txt": oid})
186 c_id = _make_commit(repo, snap_id)
187
188 mpack = build_mpack(repo, [c_id])
189
190 commit_snap_ids = {c["snapshot_id"] for c in (mpack.get("commits") or [])}
191 bundled_snap_ids = {s["snapshot_id"] for s in (mpack.get("snapshots") or [])}
192
193 assert commit_snap_ids == bundled_snap_ids, (
194 "Every commit's snapshot_id must appear in the mpack's snapshots list"
195 )
196
197 def test_missing_snapshot_raises_not_skips(self, repo: pathlib.Path) -> None:
198 """build_mpack must raise ValueError when a commit's snapshot is absent.
199
200 Silently skipping was the root cause of the recurring snapshot
201 corruption: commits reached the remote without their snapshots, and
202 subsequent pulls restored commits but not snapshots.
203 """
204 # Write commit record directly — no snapshot written
205 import datetime
206 from muse.core.ids import hash_commit as compute_commit_id
207 snap_id = fake_id("ab-missing-snapshot") # valid prefixed ID, but no snapshot file exists
208 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
209 c_id = compute_commit_id(
210 parent_ids=[],
211 snapshot_id=snap_id,
212 message="orphan",
213 committed_at_iso=committed_at.isoformat(),
214 )
215 write_commit(repo, CommitRecord(
216 commit_id=c_id, branch="main",
217 snapshot_id=snap_id, message="orphan", committed_at=committed_at,
218 ))
219
220 with pytest.raises(ValueError, match="Push aborted"):
221 build_mpack(repo, [c_id])
222
223 def test_merge_commit_includes_both_parents(self, repo: pathlib.Path) -> None:
224 oid_a = _make_object(repo, b"branch-a")
225 oid_b = _make_object(repo, b"branch-b")
226 snap_a_id = _make_snapshot(repo, {"a.txt": oid_a})
227 snap_b_id = _make_snapshot(repo, {"b.txt": oid_b})
228 snap_m_id = _make_snapshot(repo, {"a.txt": oid_a, "b.txt": oid_b})
229 c_a_id = _make_commit(repo, snap_a_id)
230 c_b_id = _make_commit(repo, snap_b_id)
231 # Merge commit with two parents — compute its ID from both parent hashes.
232 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
233 c_merge_id = compute_commit_id(
234 parent_ids=[c_a_id, c_b_id],
235 snapshot_id=snap_m_id,
236 message="merge",
237 committed_at_iso=committed_at.isoformat(),
238 )
239 c_merge = CommitRecord(
240 commit_id=c_merge_id,
241 branch="main",
242 snapshot_id=snap_m_id,
243 message="merge",
244 committed_at=committed_at,
245 parent_commit_id=c_a_id,
246 parent2_commit_id=c_b_id,
247 )
248 write_commit(repo, c_merge)
249
250 mpack = build_mpack(repo, [c_merge_id])
251 commit_ids = {c["commit_id"] for c in (mpack.get("commits") or [])}
252 assert {c_merge_id, c_a_id, c_b_id}.issubset(commit_ids)
253
254
255 # ---------------------------------------------------------------------------
256 # apply_mpack tests
257 # ---------------------------------------------------------------------------
258
259
260 class TestApplyPack:
261 def test_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
262 """build_mpack → apply_mpack in a fresh repo produces identical data."""
263 content = b"round trip"
264 oid = _make_object(repo, content)
265 snap_id = _make_snapshot(repo, {"f.txt": oid})
266 c1_id = _make_commit(repo, snap_id, message="initial")
267
268 mpack = build_mpack(repo, [c1_id])
269
270 # Apply into a fresh repo.
271 dest = tmp_path / "dest"
272 dot_muse = muse_dir(dest)
273 (dot_muse / "commits").mkdir(parents=True)
274 (dot_muse / "snapshots").mkdir(parents=True)
275 (dot_muse / "objects").mkdir(parents=True)
276
277 result = apply_mpack(dest, mpack)
278
279 assert result["blobs_written"] ==1
280 assert has_object(dest, oid)
281 assert read_object(dest, oid) == content
282 assert read_snapshot(dest, snap_id) is not None
283 assert read_commit(dest, c1_id) is not None
284
285 def test_idempotent_apply(self, repo: pathlib.Path) -> None:
286 """Applying the same mpack twice does not raise and new_count = 0."""
287 content = b"idempotent"
288 oid = _make_object(repo, content)
289 snap_id = _make_snapshot(repo, {"f.txt": oid})
290 c1_id = _make_commit(repo, snap_id)
291
292 mpack = build_mpack(repo, [c1_id])
293 apply_mpack(repo, mpack)
294 result = apply_mpack(repo, mpack)
295
296 assert result["blobs_written"] ==0 # All already present.
297
298 def test_malformed_object_skipped(self, repo: pathlib.Path) -> None:
299 # content must be bytes; passing wrong type is caught gracefully
300 mpack: MPack = {
301 "commits": [],
302 "snapshots": [],
303 "blobs": [BlobPayload(object_id="abc123", content=b"")],
304 }
305 result = apply_mpack(repo, mpack)
306 assert result["blobs_written"] ==0
307
308 def test_empty_bundle_is_noop(self, repo: pathlib.Path) -> None:
309 mpack: MPack = {}
310 result = apply_mpack(repo, mpack)
311 assert result["blobs_written"] ==0
312
313 def test_apply_preserves_commit_metadata(
314 self, repo: pathlib.Path, tmp_path: pathlib.Path
315 ) -> None:
316 oid = _make_object(repo, b"data")
317 snap_id = _make_snapshot(repo, {"data.bin": oid})
318 c1_id = _make_commit(repo, snap_id, message="preserve me")
319
320 mpack = build_mpack(repo, [c1_id])
321
322 dest = tmp_path / "d"
323 (commits_dir(dest)).mkdir(parents=True)
324 (snapshots_dir(dest)).mkdir(parents=True)
325 (objects_dir(dest)).mkdir(parents=True)
326 apply_mpack(dest, mpack)
327
328 commit = read_commit(dest, c1_id)
329 assert commit is not None
330 assert commit.message == "preserve me"
331 assert commit.snapshot_id == snap_id
332
333 def test_apply_returns_new_object_count(
334 self, repo: pathlib.Path, tmp_path: pathlib.Path
335 ) -> None:
336 oid1 = _make_object(repo, b"obj1")
337 oid2 = _make_object(repo, b"obj2")
338 snap_id = _make_snapshot(repo, {"a": oid1, "b": oid2})
339 c1_id = _make_commit(repo, snap_id)
340
341 mpack = build_mpack(repo, [c1_id])
342 dest = tmp_path / "d"
343 (commits_dir(dest)).mkdir(parents=True)
344 (snapshots_dir(dest)).mkdir(parents=True)
345 (objects_dir(dest)).mkdir(parents=True)
346
347 result = apply_mpack(dest, mpack)
348 assert result["blobs_written"] ==2
File History 3 commits
sha256:2c59968e5fd34f1740180d630338fddfb8c465b71e150a0965f11dbdcba5dec7 fix: apply_mpack refuses commits when their snapshot is abs… Sonnet 4.6 patch 15 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago