test_core_pack.py
python
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago
| 1 | """Tests for muse.core.mpack — MPack build and apply operations.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import datetime |
| 6 | import json |
| 7 | import pathlib |
| 8 | |
| 9 | import pytest |
| 10 | |
| 11 | from muse.core.object_store import has_object, read_object, write_object |
| 12 | from muse.core.mpack import ( |
| 13 | BlobPayload, |
| 14 | MPack, |
| 15 | apply_mpack, |
| 16 | build_mpack, |
| 17 | ) |
| 18 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 19 | |
| 20 | from muse.core.types import Manifest, NULL_LONG_ID, long_id |
| 21 | from muse.core.commits import ( |
| 22 | CommitRecord, |
| 23 | read_commit, |
| 24 | write_commit, |
| 25 | ) |
| 26 | from muse.core.snapshots import ( |
| 27 | SnapshotRecord, |
| 28 | read_snapshot, |
| 29 | write_snapshot, |
| 30 | ) |
| 31 | from muse.core.paths import commits_dir, objects_dir, snapshots_dir, muse_dir |
| 32 | |
| 33 | |
| 34 | # --------------------------------------------------------------------------- |
| 35 | # Fixtures |
| 36 | # --------------------------------------------------------------------------- |
| 37 | |
| 38 | |
| 39 | @pytest.fixture |
| 40 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 41 | """Minimal .muse/ repo structure.""" |
| 42 | dot_muse = muse_dir(tmp_path) |
| 43 | (dot_muse / "commits").mkdir(parents=True) |
| 44 | (dot_muse / "snapshots").mkdir(parents=True) |
| 45 | (dot_muse / "objects").mkdir(parents=True) |
| 46 | (dot_muse / "refs" / "heads").mkdir(parents=True) |
| 47 | (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"})) |
| 48 | (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 49 | (dot_muse / "refs" / "heads" / "main").write_text("") |
| 50 | return tmp_path |
| 51 | |
| 52 | |
| 53 | def _make_object(root: pathlib.Path, content: bytes) -> str: |
| 54 | """Write raw bytes into the object store; return the object_id.""" |
| 55 | from muse.core.types import blob_id |
| 56 | oid = blob_id(content) |
| 57 | write_object(root, oid, content) |
| 58 | return oid |
| 59 | |
| 60 | |
| 61 | def _make_snapshot(root: pathlib.Path, manifest: Manifest) -> str: |
| 62 | """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id.""" |
| 63 | snap_id = compute_snapshot_id(manifest) |
| 64 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) |
| 65 | return snap_id |
| 66 | |
| 67 | |
| 68 | def _make_commit( |
| 69 | root: pathlib.Path, |
| 70 | snapshot_id: str, |
| 71 | message: str = "test", |
| 72 | parent: str | None = None, |
| 73 | ) -> str: |
| 74 | """Write a commit with a valid content-hash commit_id. Returns the commit_id.""" |
| 75 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 76 | parent_ids = [parent] if parent else [] |
| 77 | commit_id = compute_commit_id( |
| 78 | parent_ids=parent_ids, |
| 79 | snapshot_id=snapshot_id, |
| 80 | message=message, |
| 81 | committed_at_iso=committed_at.isoformat(), |
| 82 | ) |
| 83 | c = CommitRecord( |
| 84 | commit_id=commit_id, |
| 85 | branch="main", |
| 86 | snapshot_id=snapshot_id, |
| 87 | message=message, |
| 88 | committed_at=committed_at, |
| 89 | parent_commit_id=parent, |
| 90 | ) |
| 91 | write_commit(root, c) |
| 92 | return commit_id |
| 93 | |
| 94 | |
| 95 | # --------------------------------------------------------------------------- |
| 96 | # build_mpack tests |
| 97 | # --------------------------------------------------------------------------- |
| 98 | |
| 99 | |
| 100 | class TestBuildMPack: |
| 101 | def test_single_commit_no_history(self, repo: pathlib.Path) -> None: |
| 102 | content = b"hello world" |
| 103 | oid = _make_object(repo, content) |
| 104 | snap_id = _make_snapshot(repo, {"file.txt": oid}) |
| 105 | c1_id = _make_commit(repo, snap_id) |
| 106 | |
| 107 | mpack = build_mpack(repo, [c1_id]) |
| 108 | |
| 109 | assert len(mpack.get("commits") or []) == 1 |
| 110 | assert len(mpack.get("snapshots") or []) == 1 |
| 111 | assert len(mpack.get("blobs") or []) == 1 |
| 112 | assert (mpack.get("blobs") or [{}])[0]["object_id"] == oid |
| 113 | |
| 114 | def test_object_content_is_raw_bytes(self, repo: pathlib.Path) -> None: |
| 115 | content = b"\x00\x01\x02\x03" |
| 116 | oid = _make_object(repo, content) |
| 117 | snap_id = _make_snapshot(repo, {"bin.dat": oid}) |
| 118 | c1_id = _make_commit(repo, snap_id) |
| 119 | |
| 120 | mpack = build_mpack(repo, [c1_id]) |
| 121 | |
| 122 | objs = mpack.get("blobs") or [] |
| 123 | assert len(objs) == 1 |
| 124 | assert objs[0]["content"] == content |
| 125 | |
| 126 | def test_multi_commit_chain(self, repo: pathlib.Path) -> None: |
| 127 | oid1 = _make_object(repo, b"v1") |
| 128 | oid2 = _make_object(repo, b"v2") |
| 129 | snap1_id = _make_snapshot(repo, {"f.txt": oid1}) |
| 130 | snap2_id = _make_snapshot(repo, {"f.txt": oid2}) |
| 131 | c1_id = _make_commit(repo, snap1_id) |
| 132 | c2_id = _make_commit(repo, snap2_id, parent=c1_id) |
| 133 | |
| 134 | mpack = build_mpack(repo, [c2_id]) |
| 135 | |
| 136 | assert len(mpack.get("commits") or []) == 2 |
| 137 | assert len(mpack.get("snapshots") or []) == 2 |
| 138 | assert len(mpack.get("blobs") or []) == 2 |
| 139 | |
| 140 | def test_have_excludes_ancestor_commits(self, repo: pathlib.Path) -> None: |
| 141 | oid1 = _make_object(repo, b"v1") |
| 142 | oid2 = _make_object(repo, b"v2") |
| 143 | snap1_id = _make_snapshot(repo, {"f.txt": oid1}) |
| 144 | snap2_id = _make_snapshot(repo, {"f.txt": oid2}) |
| 145 | c1_id = _make_commit(repo, snap1_id) |
| 146 | c2_id = _make_commit(repo, snap2_id, parent=c1_id) |
| 147 | |
| 148 | mpack = build_mpack(repo, [c2_id], have=[c1_id]) |
| 149 | |
| 150 | # Only c2 should be in the mpack; c1 is in have. |
| 151 | commit_ids = [c["commit_id"] for c in (mpack.get("commits") or [])] |
| 152 | assert c2_id in commit_ids |
| 153 | assert c1_id not in commit_ids |
| 154 | |
| 155 | def test_deduplicates_shared_objects(self, repo: pathlib.Path) -> None: |
| 156 | shared_oid = _make_object(repo, b"shared") |
| 157 | snap1_id = _make_snapshot(repo, {"a.txt": shared_oid}) |
| 158 | snap2_id = _make_snapshot(repo, {"b.txt": shared_oid}) |
| 159 | c1_id = _make_commit(repo, snap1_id) |
| 160 | c2_id = _make_commit(repo, snap2_id, parent=c1_id) |
| 161 | |
| 162 | mpack = build_mpack(repo, [c2_id]) |
| 163 | |
| 164 | # Shared object should appear only once. |
| 165 | object_ids = [o["object_id"] for o in (mpack.get("blobs") or [])] |
| 166 | assert object_ids.count(shared_oid) == 1 |
| 167 | |
| 168 | def test_empty_commit_ids_returns_empty_mpack(self, repo: pathlib.Path) -> None: |
| 169 | mpack = build_mpack(repo, []) |
| 170 | assert (mpack.get("commits") or []) == [] |
| 171 | assert (mpack.get("blobs") or []) == [] |
| 172 | |
| 173 | def test_missing_commit_skipped_gracefully(self, repo: pathlib.Path) -> None: |
| 174 | # Should not raise even if a commit_id does not exist. |
| 175 | mpack = build_mpack(repo, [NULL_LONG_ID]) |
| 176 | assert (mpack.get("commits") or []) == [] |
| 177 | |
| 178 | def test_snapshot_always_included_for_every_commit(self, repo: pathlib.Path) -> None: |
| 179 | """Every commit in the mpack must have its snapshot included. |
| 180 | |
| 181 | This is the data-integrity invariant that prevents the corruption |
| 182 | pattern where commits arrive on the remote without their snapshots, |
| 183 | making them permanently unreadable after a local .muse wipe. |
| 184 | """ |
| 185 | oid = _make_object(repo, b"content") |
| 186 | snap_id = _make_snapshot(repo, {"a.txt": oid}) |
| 187 | c_id = _make_commit(repo, snap_id) |
| 188 | |
| 189 | mpack = build_mpack(repo, [c_id]) |
| 190 | |
| 191 | commit_snap_ids = {c["snapshot_id"] for c in (mpack.get("commits") or [])} |
| 192 | bundled_snap_ids = {s["snapshot_id"] for s in (mpack.get("snapshots") or [])} |
| 193 | |
| 194 | assert commit_snap_ids == bundled_snap_ids, ( |
| 195 | "Every commit's snapshot_id must appear in the mpack's snapshots list" |
| 196 | ) |
| 197 | |
| 198 | def test_missing_snapshot_raises_not_skips(self, repo: pathlib.Path) -> None: |
| 199 | """build_mpack must raise ValueError when a commit's snapshot is absent. |
| 200 | |
| 201 | Silently skipping was the root cause of the recurring snapshot |
| 202 | corruption: commits reached the remote without their snapshots, and |
| 203 | subsequent pulls restored commits but not snapshots. |
| 204 | """ |
| 205 | # Write commit record directly — no snapshot written |
| 206 | import datetime |
| 207 | from muse.core.ids import hash_commit as compute_commit_id |
| 208 | from muse.core.types import long_id as _long_id |
| 209 | snap_id = _long_id("ab" * 32) # valid prefixed ID, but no snapshot file exists |
| 210 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 211 | c_id = compute_commit_id( parent_ids=[], |
| 212 | snapshot_id=snap_id, |
| 213 | message="orphan", |
| 214 | committed_at_iso=committed_at.isoformat(), |
| 215 | ) |
| 216 | write_commit(repo, CommitRecord( |
| 217 | commit_id=c_id, branch="main", |
| 218 | snapshot_id=snap_id, message="orphan", committed_at=committed_at, |
| 219 | )) |
| 220 | |
| 221 | with pytest.raises(ValueError, match="Push aborted"): |
| 222 | build_mpack(repo, [c_id]) |
| 223 | |
| 224 | def test_merge_commit_includes_both_parents(self, repo: pathlib.Path) -> None: |
| 225 | oid_a = _make_object(repo, b"branch-a") |
| 226 | oid_b = _make_object(repo, b"branch-b") |
| 227 | snap_a_id = _make_snapshot(repo, {"a.txt": oid_a}) |
| 228 | snap_b_id = _make_snapshot(repo, {"b.txt": oid_b}) |
| 229 | snap_m_id = _make_snapshot(repo, {"a.txt": oid_a, "b.txt": oid_b}) |
| 230 | c_a_id = _make_commit(repo, snap_a_id) |
| 231 | c_b_id = _make_commit(repo, snap_b_id) |
| 232 | # Merge commit with two parents — compute its ID from both parent hashes. |
| 233 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 234 | c_merge_id = compute_commit_id( |
| 235 | parent_ids=[c_a_id, c_b_id], |
| 236 | snapshot_id=snap_m_id, |
| 237 | message="merge", |
| 238 | committed_at_iso=committed_at.isoformat(), |
| 239 | ) |
| 240 | c_merge = CommitRecord( |
| 241 | commit_id=c_merge_id, |
| 242 | branch="main", |
| 243 | snapshot_id=snap_m_id, |
| 244 | message="merge", |
| 245 | committed_at=committed_at, |
| 246 | parent_commit_id=c_a_id, |
| 247 | parent2_commit_id=c_b_id, |
| 248 | ) |
| 249 | write_commit(repo, c_merge) |
| 250 | |
| 251 | mpack = build_mpack(repo, [c_merge_id]) |
| 252 | commit_ids = {c["commit_id"] for c in (mpack.get("commits") or [])} |
| 253 | assert {c_merge_id, c_a_id, c_b_id}.issubset(commit_ids) |
| 254 | |
| 255 | |
| 256 | # --------------------------------------------------------------------------- |
| 257 | # apply_mpack tests |
| 258 | # --------------------------------------------------------------------------- |
| 259 | |
| 260 | |
| 261 | class TestApplyMPack: |
| 262 | def test_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None: |
| 263 | """build_mpack → apply_mpack in a fresh repo produces identical data.""" |
| 264 | content = b"round trip" |
| 265 | oid = _make_object(repo, content) |
| 266 | snap_id = _make_snapshot(repo, {"f.txt": oid}) |
| 267 | c1_id = _make_commit(repo, snap_id, message="initial") |
| 268 | |
| 269 | mpack = build_mpack(repo, [c1_id]) |
| 270 | |
| 271 | # Apply into a fresh repo. |
| 272 | dest = tmp_path / "dest" |
| 273 | dot_muse = muse_dir(dest) |
| 274 | (dot_muse / "commits").mkdir(parents=True) |
| 275 | (dot_muse / "snapshots").mkdir(parents=True) |
| 276 | (dot_muse / "objects").mkdir(parents=True) |
| 277 | |
| 278 | result = apply_mpack(dest, mpack) |
| 279 | |
| 280 | assert result["blobs_written"] == 1 |
| 281 | assert has_object(dest, oid) |
| 282 | assert read_object(dest, oid) == content |
| 283 | assert read_snapshot(dest, snap_id) is not None |
| 284 | assert read_commit(dest, c1_id) is not None |
| 285 | |
| 286 | def test_idempotent_apply(self, repo: pathlib.Path) -> None: |
| 287 | """Applying the same mpack twice does not raise and new_count = 0.""" |
| 288 | content = b"idempotent" |
| 289 | oid = _make_object(repo, content) |
| 290 | snap_id = _make_snapshot(repo, {"f.txt": oid}) |
| 291 | c1_id = _make_commit(repo, snap_id) |
| 292 | |
| 293 | mpack = build_mpack(repo, [c1_id]) |
| 294 | apply_mpack(repo, mpack) |
| 295 | result = apply_mpack(repo, mpack) |
| 296 | |
| 297 | assert result["blobs_written"] == 0 # All already present. |
| 298 | |
| 299 | def test_malformed_object_skipped(self, repo: pathlib.Path) -> None: |
| 300 | # content must be bytes; passing wrong type is caught gracefully |
| 301 | mpack: MPack = { |
| 302 | "commits": [], |
| 303 | "snapshots": [], |
| 304 | "blobs": [BlobPayload(object_id="abc123", content=b"")], |
| 305 | } |
| 306 | result = apply_mpack(repo, mpack) |
| 307 | assert result["blobs_written"] == 0 |
| 308 | |
| 309 | def test_empty_mpack_is_noop(self, repo: pathlib.Path) -> None: |
| 310 | mpack: MPack = {} |
| 311 | result = apply_mpack(repo, mpack) |
| 312 | assert result["blobs_written"] == 0 |
| 313 | |
| 314 | def test_apply_preserves_commit_metadata( |
| 315 | self, repo: pathlib.Path, tmp_path: pathlib.Path |
| 316 | ) -> None: |
| 317 | oid = _make_object(repo, b"data") |
| 318 | snap_id = _make_snapshot(repo, {"data.bin": oid}) |
| 319 | c1_id = _make_commit(repo, snap_id, message="preserve me") |
| 320 | |
| 321 | mpack = build_mpack(repo, [c1_id]) |
| 322 | |
| 323 | dest = tmp_path / "d" |
| 324 | (commits_dir(dest)).mkdir(parents=True) |
| 325 | (snapshots_dir(dest)).mkdir(parents=True) |
| 326 | (objects_dir(dest)).mkdir(parents=True) |
| 327 | apply_mpack(dest, mpack) |
| 328 | |
| 329 | commit = read_commit(dest, c1_id) |
| 330 | assert commit is not None |
| 331 | assert commit.message == "preserve me" |
| 332 | assert commit.snapshot_id == snap_id |
| 333 | |
| 334 | def test_apply_returns_new_object_count( |
| 335 | self, repo: pathlib.Path, tmp_path: pathlib.Path |
| 336 | ) -> None: |
| 337 | oid1 = _make_object(repo, b"obj1") |
| 338 | oid2 = _make_object(repo, b"obj2") |
| 339 | snap_id = _make_snapshot(repo, {"a": oid1, "b": oid2}) |
| 340 | c1_id = _make_commit(repo, snap_id) |
| 341 | |
| 342 | mpack = build_mpack(repo, [c1_id]) |
| 343 | dest = tmp_path / "d" |
| 344 | (commits_dir(dest)).mkdir(parents=True) |
| 345 | (snapshots_dir(dest)).mkdir(parents=True) |
| 346 | (objects_dir(dest)).mkdir(parents=True) |
| 347 | |
| 348 | result = apply_mpack(dest, mpack) |
| 349 | assert result["blobs_written"] == 2 |
| 350 | |
| 351 | def test_apply_full_manifest_snapshot_from_server( |
| 352 | self, repo: pathlib.Path, tmp_path: pathlib.Path |
| 353 | ) -> None: |
| 354 | """apply_mpack must write snapshots sent in full-manifest format. |
| 355 | |
| 356 | The server fetch response may include WireSnapshot dicts with a |
| 357 | ``manifest`` key (full content, no delta encoding). |
| 358 | _apply_snapshot_deltas only understands the ``delta_upsert``/``delta_remove`` |
| 359 | format used by build_mpack. When it receives a full-manifest dict: |
| 360 | |
| 361 | {"snapshot_id": "sha256:...", "manifest": {"f.txt": "sha256:..."}, |
| 362 | "directories": [], "created_at": ""} |
| 363 | |
| 364 | it finds delta_upsert={} and delta_remove=[], reconstructs base={}, |
| 365 | computes sha256(empty) = "sha256:e3b0c44...", which mismatches the |
| 366 | real snapshot_id → snapshot skipped → pull aborted with |
| 367 | "snapshot referenced by commit" error. |
| 368 | |
| 369 | The fix must handle both formats in _apply_snapshot_deltas: |
| 370 | - delta format: ``{snapshot_id, parent_snapshot_id, delta_upsert, delta_remove}`` |
| 371 | - full format: ``{snapshot_id, manifest, directories, ...}`` |
| 372 | """ |
| 373 | oid = _make_object(repo, b"stream content") |
| 374 | snap_id = _make_snapshot(repo, {"stream.txt": oid}) |
| 375 | c_id = _make_commit(repo, snap_id, message="stream commit") |
| 376 | |
| 377 | # Simulate what _coerce_snapshot_dict produces from a full-manifest snapshot: |
| 378 | # a dict with 'manifest' key, NO 'delta_upsert' or 'delta_remove'. |
| 379 | full_manifest_snapshot = { |
| 380 | "snapshot_id": snap_id, |
| 381 | "manifest": {"stream.txt": oid}, |
| 382 | "directories": [], |
| 383 | "created_at": "", |
| 384 | } |
| 385 | commit_dict = read_commit(repo, c_id) |
| 386 | assert commit_dict is not None |
| 387 | |
| 388 | dest = tmp_path / "dest" |
| 389 | (commits_dir(dest)).mkdir(parents=True) |
| 390 | (snapshots_dir(dest)).mkdir(parents=True) |
| 391 | (objects_dir(dest)).mkdir(parents=True) |
| 392 | write_object(dest, oid, b"stream content") # object already present |
| 393 | |
| 394 | mpack: MPack = { |
| 395 | "commits": [commit_dict.to_dict()], |
| 396 | "snapshots": [full_manifest_snapshot], |
| 397 | "blobs": [], |
| 398 | } |
| 399 | result = apply_mpack(dest, mpack) |
| 400 | |
| 401 | assert read_snapshot(dest, snap_id) is not None, ( |
| 402 | "Snapshot with full manifest format was not written — " |
| 403 | "_apply_snapshot_deltas did not handle the 'manifest' key" |
| 404 | ) |
| 405 | assert result["snapshots_written"] == 1 |
File History
1 commit
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago