test_core_pack.py
python
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
28 days ago
| 1 | """Tests for muse.core.mpack — MPack build and apply operations.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import datetime |
| 6 | import json |
| 7 | import pathlib |
| 8 | |
| 9 | import pytest |
| 10 | |
| 11 | from muse.core.object_store import has_object, read_object, write_object |
| 12 | from muse.core.mpack import ( |
| 13 | ObjectPayload, |
| 14 | MPack, |
| 15 | apply_mpack, |
| 16 | build_mpack, |
| 17 | ) |
| 18 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 19 | |
| 20 | from muse.core.types import Manifest, NULL_LONG_ID, long_id |
| 21 | from muse.core.store import ( |
| 22 | CommitRecord, |
| 23 | SnapshotRecord, |
| 24 | read_commit, |
| 25 | read_snapshot, |
| 26 | write_commit, |
| 27 | write_snapshot, |
| 28 | ) |
| 29 | from muse.core.paths import commits_dir, objects_dir, snapshots_dir, muse_dir |
| 30 | |
| 31 | |
| 32 | # --------------------------------------------------------------------------- |
| 33 | # Fixtures |
| 34 | # --------------------------------------------------------------------------- |
| 35 | |
| 36 | |
| 37 | @pytest.fixture |
| 38 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 39 | """Minimal .muse/ repo structure.""" |
| 40 | dot_muse = muse_dir(tmp_path) |
| 41 | (dot_muse / "commits").mkdir(parents=True) |
| 42 | (dot_muse / "snapshots").mkdir(parents=True) |
| 43 | (dot_muse / "objects").mkdir(parents=True) |
| 44 | (dot_muse / "refs" / "heads").mkdir(parents=True) |
| 45 | (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"})) |
| 46 | (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 47 | (dot_muse / "refs" / "heads" / "main").write_text("") |
| 48 | return tmp_path |
| 49 | |
| 50 | |
| 51 | def _make_object(root: pathlib.Path, content: bytes) -> str: |
| 52 | """Write raw bytes into the object store; return the object_id.""" |
| 53 | from muse.core.types import blob_id |
| 54 | oid = blob_id(content) |
| 55 | write_object(root, oid, content) |
| 56 | return oid |
| 57 | |
| 58 | |
| 59 | def _make_snapshot(root: pathlib.Path, manifest: Manifest) -> str: |
| 60 | """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id.""" |
| 61 | snap_id = compute_snapshot_id(manifest) |
| 62 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) |
| 63 | return snap_id |
| 64 | |
| 65 | |
| 66 | def _make_commit( |
| 67 | root: pathlib.Path, |
| 68 | snapshot_id: str, |
| 69 | message: str = "test", |
| 70 | parent: str | None = None, |
| 71 | ) -> str: |
| 72 | """Write a commit with a valid content-hash commit_id. Returns the commit_id.""" |
| 73 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 74 | parent_ids = [parent] if parent else [] |
| 75 | commit_id = compute_commit_id( |
| 76 | parent_ids=parent_ids, |
| 77 | snapshot_id=snapshot_id, |
| 78 | message=message, |
| 79 | committed_at_iso=committed_at.isoformat(), |
| 80 | ) |
| 81 | c = CommitRecord( |
| 82 | commit_id=commit_id, |
| 83 | branch="main", |
| 84 | snapshot_id=snapshot_id, |
| 85 | message=message, |
| 86 | committed_at=committed_at, |
| 87 | parent_commit_id=parent, |
| 88 | ) |
| 89 | write_commit(root, c) |
| 90 | return commit_id |
| 91 | |
| 92 | |
| 93 | # --------------------------------------------------------------------------- |
| 94 | # build_mpack tests |
| 95 | # --------------------------------------------------------------------------- |
| 96 | |
| 97 | |
| 98 | class TestBuildMPack: |
| 99 | def test_single_commit_no_history(self, repo: pathlib.Path) -> None: |
| 100 | content = b"hello world" |
| 101 | oid = _make_object(repo, content) |
| 102 | snap_id = _make_snapshot(repo, {"file.txt": oid}) |
| 103 | c1_id = _make_commit(repo, snap_id) |
| 104 | |
| 105 | mpack = build_mpack(repo, [c1_id]) |
| 106 | |
| 107 | assert len(mpack.get("commits") or []) == 1 |
| 108 | assert len(mpack.get("snapshots") or []) == 1 |
| 109 | assert len(mpack.get("objects") or []) == 1 |
| 110 | assert (mpack.get("objects") or [{}])[0]["object_id"] == oid |
| 111 | |
| 112 | def test_object_content_is_raw_bytes(self, repo: pathlib.Path) -> None: |
| 113 | content = b"\x00\x01\x02\x03" |
| 114 | oid = _make_object(repo, content) |
| 115 | snap_id = _make_snapshot(repo, {"bin.dat": oid}) |
| 116 | c1_id = _make_commit(repo, snap_id) |
| 117 | |
| 118 | mpack = build_mpack(repo, [c1_id]) |
| 119 | |
| 120 | objs = mpack.get("objects") or [] |
| 121 | assert len(objs) == 1 |
| 122 | assert objs[0]["content"] == content |
| 123 | |
| 124 | def test_multi_commit_chain(self, repo: pathlib.Path) -> None: |
| 125 | oid1 = _make_object(repo, b"v1") |
| 126 | oid2 = _make_object(repo, b"v2") |
| 127 | snap1_id = _make_snapshot(repo, {"f.txt": oid1}) |
| 128 | snap2_id = _make_snapshot(repo, {"f.txt": oid2}) |
| 129 | c1_id = _make_commit(repo, snap1_id) |
| 130 | c2_id = _make_commit(repo, snap2_id, parent=c1_id) |
| 131 | |
| 132 | mpack = build_mpack(repo, [c2_id]) |
| 133 | |
| 134 | assert len(mpack.get("commits") or []) == 2 |
| 135 | assert len(mpack.get("snapshots") or []) == 2 |
| 136 | assert len(mpack.get("objects") or []) == 2 |
| 137 | |
| 138 | def test_have_excludes_ancestor_commits(self, repo: pathlib.Path) -> None: |
| 139 | oid1 = _make_object(repo, b"v1") |
| 140 | oid2 = _make_object(repo, b"v2") |
| 141 | snap1_id = _make_snapshot(repo, {"f.txt": oid1}) |
| 142 | snap2_id = _make_snapshot(repo, {"f.txt": oid2}) |
| 143 | c1_id = _make_commit(repo, snap1_id) |
| 144 | c2_id = _make_commit(repo, snap2_id, parent=c1_id) |
| 145 | |
| 146 | mpack = build_mpack(repo, [c2_id], have=[c1_id]) |
| 147 | |
| 148 | # Only c2 should be in the mpack; c1 is in have. |
| 149 | commit_ids = [c["commit_id"] for c in (mpack.get("commits") or [])] |
| 150 | assert c2_id in commit_ids |
| 151 | assert c1_id not in commit_ids |
| 152 | |
| 153 | def test_deduplicates_shared_objects(self, repo: pathlib.Path) -> None: |
| 154 | shared_oid = _make_object(repo, b"shared") |
| 155 | snap1_id = _make_snapshot(repo, {"a.txt": shared_oid}) |
| 156 | snap2_id = _make_snapshot(repo, {"b.txt": shared_oid}) |
| 157 | c1_id = _make_commit(repo, snap1_id) |
| 158 | c2_id = _make_commit(repo, snap2_id, parent=c1_id) |
| 159 | |
| 160 | mpack = build_mpack(repo, [c2_id]) |
| 161 | |
| 162 | # Shared object should appear only once. |
| 163 | object_ids = [o["object_id"] for o in (mpack.get("objects") or [])] |
| 164 | assert object_ids.count(shared_oid) == 1 |
| 165 | |
| 166 | def test_empty_commit_ids_returns_empty_mpack(self, repo: pathlib.Path) -> None: |
| 167 | mpack = build_mpack(repo, []) |
| 168 | assert (mpack.get("commits") or []) == [] |
| 169 | assert (mpack.get("objects") or []) == [] |
| 170 | |
| 171 | def test_missing_commit_skipped_gracefully(self, repo: pathlib.Path) -> None: |
| 172 | # Should not raise even if a commit_id does not exist. |
| 173 | mpack = build_mpack(repo, [NULL_LONG_ID]) |
| 174 | assert (mpack.get("commits") or []) == [] |
| 175 | |
| 176 | def test_snapshot_always_included_for_every_commit(self, repo: pathlib.Path) -> None: |
| 177 | """Every commit in the mpack must have its snapshot included. |
| 178 | |
| 179 | This is the data-integrity invariant that prevents the corruption |
| 180 | pattern where commits arrive on the remote without their snapshots, |
| 181 | making them permanently unreadable after a local .muse wipe. |
| 182 | """ |
| 183 | oid = _make_object(repo, b"content") |
| 184 | snap_id = _make_snapshot(repo, {"a.txt": oid}) |
| 185 | c_id = _make_commit(repo, snap_id) |
| 186 | |
| 187 | mpack = build_mpack(repo, [c_id]) |
| 188 | |
| 189 | commit_snap_ids = {c["snapshot_id"] for c in (mpack.get("commits") or [])} |
| 190 | bundled_snap_ids = {s["snapshot_id"] for s in (mpack.get("snapshots") or [])} |
| 191 | |
| 192 | assert commit_snap_ids == bundled_snap_ids, ( |
| 193 | "Every commit's snapshot_id must appear in the mpack's snapshots list" |
| 194 | ) |
| 195 | |
| 196 | def test_missing_snapshot_raises_not_skips(self, repo: pathlib.Path) -> None: |
| 197 | """build_mpack must raise ValueError when a commit's snapshot is absent. |
| 198 | |
| 199 | Silently skipping was the root cause of the recurring snapshot |
| 200 | corruption: commits reached the remote without their snapshots, and |
| 201 | subsequent pulls restored commits but not snapshots. |
| 202 | """ |
| 203 | # Write commit record directly — no snapshot written |
| 204 | import datetime |
| 205 | from muse.core.ids import hash_commit as compute_commit_id |
| 206 | from muse.core.types import long_id as _long_id |
| 207 | snap_id = _long_id("ab" * 32) # valid prefixed ID, but no snapshot file exists |
| 208 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 209 | c_id = compute_commit_id( parent_ids=[], |
| 210 | snapshot_id=snap_id, |
| 211 | message="orphan", |
| 212 | committed_at_iso=committed_at.isoformat(), |
| 213 | ) |
| 214 | write_commit(repo, CommitRecord( |
| 215 | commit_id=c_id, branch="main", |
| 216 | snapshot_id=snap_id, message="orphan", committed_at=committed_at, |
| 217 | )) |
| 218 | |
| 219 | with pytest.raises(ValueError, match="Push aborted"): |
| 220 | build_mpack(repo, [c_id]) |
| 221 | |
| 222 | def test_merge_commit_includes_both_parents(self, repo: pathlib.Path) -> None: |
| 223 | oid_a = _make_object(repo, b"branch-a") |
| 224 | oid_b = _make_object(repo, b"branch-b") |
| 225 | snap_a_id = _make_snapshot(repo, {"a.txt": oid_a}) |
| 226 | snap_b_id = _make_snapshot(repo, {"b.txt": oid_b}) |
| 227 | snap_m_id = _make_snapshot(repo, {"a.txt": oid_a, "b.txt": oid_b}) |
| 228 | c_a_id = _make_commit(repo, snap_a_id) |
| 229 | c_b_id = _make_commit(repo, snap_b_id) |
| 230 | # Merge commit with two parents — compute its ID from both parent hashes. |
| 231 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 232 | c_merge_id = compute_commit_id( |
| 233 | parent_ids=[c_a_id, c_b_id], |
| 234 | snapshot_id=snap_m_id, |
| 235 | message="merge", |
| 236 | committed_at_iso=committed_at.isoformat(), |
| 237 | ) |
| 238 | c_merge = CommitRecord( |
| 239 | commit_id=c_merge_id, |
| 240 | branch="main", |
| 241 | snapshot_id=snap_m_id, |
| 242 | message="merge", |
| 243 | committed_at=committed_at, |
| 244 | parent_commit_id=c_a_id, |
| 245 | parent2_commit_id=c_b_id, |
| 246 | ) |
| 247 | write_commit(repo, c_merge) |
| 248 | |
| 249 | mpack = build_mpack(repo, [c_merge_id]) |
| 250 | commit_ids = {c["commit_id"] for c in (mpack.get("commits") or [])} |
| 251 | assert {c_merge_id, c_a_id, c_b_id}.issubset(commit_ids) |
| 252 | |
| 253 | |
| 254 | # --------------------------------------------------------------------------- |
| 255 | # apply_mpack tests |
| 256 | # --------------------------------------------------------------------------- |
| 257 | |
| 258 | |
| 259 | class TestApplyMPack: |
| 260 | def test_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None: |
| 261 | """build_mpack → apply_mpack in a fresh repo produces identical data.""" |
| 262 | content = b"round trip" |
| 263 | oid = _make_object(repo, content) |
| 264 | snap_id = _make_snapshot(repo, {"f.txt": oid}) |
| 265 | c1_id = _make_commit(repo, snap_id, message="initial") |
| 266 | |
| 267 | mpack = build_mpack(repo, [c1_id]) |
| 268 | |
| 269 | # Apply into a fresh repo. |
| 270 | dest = tmp_path / "dest" |
| 271 | dot_muse = muse_dir(dest) |
| 272 | (dot_muse / "commits").mkdir(parents=True) |
| 273 | (dot_muse / "snapshots").mkdir(parents=True) |
| 274 | (dot_muse / "objects").mkdir(parents=True) |
| 275 | |
| 276 | result = apply_mpack(dest, mpack) |
| 277 | |
| 278 | assert result["objects_written"] == 1 |
| 279 | assert has_object(dest, oid) |
| 280 | assert read_object(dest, oid) == content |
| 281 | assert read_snapshot(dest, snap_id) is not None |
| 282 | assert read_commit(dest, c1_id) is not None |
| 283 | |
| 284 | def test_idempotent_apply(self, repo: pathlib.Path) -> None: |
| 285 | """Applying the same mpack twice does not raise and new_count = 0.""" |
| 286 | content = b"idempotent" |
| 287 | oid = _make_object(repo, content) |
| 288 | snap_id = _make_snapshot(repo, {"f.txt": oid}) |
| 289 | c1_id = _make_commit(repo, snap_id) |
| 290 | |
| 291 | mpack = build_mpack(repo, [c1_id]) |
| 292 | apply_mpack(repo, mpack) |
| 293 | result = apply_mpack(repo, mpack) |
| 294 | |
| 295 | assert result["objects_written"] == 0 # All already present. |
| 296 | |
| 297 | def test_malformed_object_skipped(self, repo: pathlib.Path) -> None: |
| 298 | # content must be bytes; passing wrong type is caught gracefully |
| 299 | mpack: MPack = { |
| 300 | "commits": [], |
| 301 | "snapshots": [], |
| 302 | "objects": [ObjectPayload(object_id="abc123", content=b"")], |
| 303 | } |
| 304 | result = apply_mpack(repo, mpack) |
| 305 | assert result["objects_written"] == 0 |
| 306 | |
| 307 | def test_empty_mpack_is_noop(self, repo: pathlib.Path) -> None: |
| 308 | mpack: MPack = {} |
| 309 | result = apply_mpack(repo, mpack) |
| 310 | assert result["objects_written"] == 0 |
| 311 | |
| 312 | def test_apply_preserves_commit_metadata( |
| 313 | self, repo: pathlib.Path, tmp_path: pathlib.Path |
| 314 | ) -> None: |
| 315 | oid = _make_object(repo, b"data") |
| 316 | snap_id = _make_snapshot(repo, {"data.bin": oid}) |
| 317 | c1_id = _make_commit(repo, snap_id, message="preserve me") |
| 318 | |
| 319 | mpack = build_mpack(repo, [c1_id]) |
| 320 | |
| 321 | dest = tmp_path / "d" |
| 322 | (commits_dir(dest)).mkdir(parents=True) |
| 323 | (snapshots_dir(dest)).mkdir(parents=True) |
| 324 | (objects_dir(dest)).mkdir(parents=True) |
| 325 | apply_mpack(dest, mpack) |
| 326 | |
| 327 | commit = read_commit(dest, c1_id) |
| 328 | assert commit is not None |
| 329 | assert commit.message == "preserve me" |
| 330 | assert commit.snapshot_id == snap_id |
| 331 | |
| 332 | def test_apply_returns_new_object_count( |
| 333 | self, repo: pathlib.Path, tmp_path: pathlib.Path |
| 334 | ) -> None: |
| 335 | oid1 = _make_object(repo, b"obj1") |
| 336 | oid2 = _make_object(repo, b"obj2") |
| 337 | snap_id = _make_snapshot(repo, {"a": oid1, "b": oid2}) |
| 338 | c1_id = _make_commit(repo, snap_id) |
| 339 | |
| 340 | mpack = build_mpack(repo, [c1_id]) |
| 341 | dest = tmp_path / "d" |
| 342 | (commits_dir(dest)).mkdir(parents=True) |
| 343 | (snapshots_dir(dest)).mkdir(parents=True) |
| 344 | (objects_dir(dest)).mkdir(parents=True) |
| 345 | |
| 346 | result = apply_mpack(dest, mpack) |
| 347 | assert result["objects_written"] == 2 |
| 348 | |
| 349 | def test_apply_full_manifest_snapshot_from_server( |
| 350 | self, repo: pathlib.Path, tmp_path: pathlib.Path |
| 351 | ) -> None: |
| 352 | """apply_mpack must write snapshots sent in full-manifest format. |
| 353 | |
| 354 | The server fetch response may include WireSnapshot dicts with a |
| 355 | ``manifest`` key (full content, no delta encoding). |
| 356 | _apply_snapshot_deltas only understands the ``delta_add``/``delta_remove`` |
| 357 | format used by build_mpack. When it receives a full-manifest dict: |
| 358 | |
| 359 | {"snapshot_id": "sha256:...", "manifest": {"f.txt": "sha256:..."}, |
| 360 | "directories": [], "created_at": ""} |
| 361 | |
| 362 | it finds delta_add={} and delta_remove=[], reconstructs base={}, |
| 363 | computes sha256(empty) = "sha256:e3b0c44...", which mismatches the |
| 364 | real snapshot_id → snapshot skipped → pull aborted with |
| 365 | "snapshot referenced by commit" error. |
| 366 | |
| 367 | The fix must handle both formats in _apply_snapshot_deltas: |
| 368 | - delta format: ``{snapshot_id, parent_snapshot_id, delta_add, delta_remove}`` |
| 369 | - full format: ``{snapshot_id, manifest, directories, ...}`` |
| 370 | """ |
| 371 | oid = _make_object(repo, b"stream content") |
| 372 | snap_id = _make_snapshot(repo, {"stream.txt": oid}) |
| 373 | c_id = _make_commit(repo, snap_id, message="stream commit") |
| 374 | |
| 375 | # Simulate what _coerce_snapshot_dict produces from a full-manifest snapshot: |
| 376 | # a dict with 'manifest' key, NO 'delta_add' or 'delta_remove'. |
| 377 | full_manifest_snapshot = { |
| 378 | "snapshot_id": snap_id, |
| 379 | "manifest": {"stream.txt": oid}, |
| 380 | "directories": [], |
| 381 | "created_at": "", |
| 382 | } |
| 383 | commit_dict = read_commit(repo, c_id) |
| 384 | assert commit_dict is not None |
| 385 | |
| 386 | dest = tmp_path / "dest" |
| 387 | (commits_dir(dest)).mkdir(parents=True) |
| 388 | (snapshots_dir(dest)).mkdir(parents=True) |
| 389 | (objects_dir(dest)).mkdir(parents=True) |
| 390 | write_object(dest, oid, b"stream content") # object already present |
| 391 | |
| 392 | mpack: MPack = { |
| 393 | "commits": [commit_dict.to_dict()], |
| 394 | "snapshots": [full_manifest_snapshot], |
| 395 | "objects": [], |
| 396 | } |
| 397 | result = apply_mpack(dest, mpack) |
| 398 | |
| 399 | assert read_snapshot(dest, snap_id) is not None, ( |
| 400 | "Snapshot with full manifest format was not written — " |
| 401 | "_apply_snapshot_deltas did not handle the 'manifest' key" |
| 402 | ) |
| 403 | assert result["snapshots_written"] == 1 |
File History
2 commits
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
29 days ago