test_apply_mpack_pack_store.py
python
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
21 days ago
| 1 | """TDD — apply_mpack writes pack, not loose objects (issue #70 Phase 2). |
| 2 | |
| 3 | After this change: |
| 4 | - Wire-received objects land in a single .mpack + .idx file pair. |
| 5 | - Zero loose object writes for wire-received blobs. |
| 6 | - Commits and snapshots still go to .muse/commits/ and .muse/snapshots/. |
| 7 | - read_object() still works transparently via the pack store fallthrough. |
| 8 | - All existing safety invariants (dedup, size cap, integrity check, |
| 9 | failed-object propagation) are preserved. |
| 10 | """ |
| 11 | from __future__ import annotations |
| 12 | |
| 13 | import datetime |
| 14 | import json |
| 15 | import pathlib |
| 16 | from unittest.mock import patch |
| 17 | |
| 18 | import pytest |
| 19 | |
| 20 | from muse.core.mpack import MPack, apply_mpack |
| 21 | from muse.core.object_store import has_object, read_object |
| 22 | from muse.core.paths import muse_dir, packs_dir |
| 23 | from muse.core.ids import hash_commit, hash_snapshot |
| 24 | from muse.core.commits import ( |
| 25 | CommitRecord, |
| 26 | read_commit, |
| 27 | ) |
| 28 | from muse.core.snapshots import ( |
| 29 | SnapshotRecord, |
| 30 | read_snapshot, |
| 31 | ) |
| 32 | from muse.core.types import blob_id |
| 33 | |
| 34 | _DT = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 35 | |
| 36 | |
| 37 | # --------------------------------------------------------------------------- |
| 38 | # Fixtures |
| 39 | # --------------------------------------------------------------------------- |
| 40 | |
| 41 | |
| 42 | def _init_repo(root: pathlib.Path) -> pathlib.Path: |
| 43 | dot = muse_dir(root) |
| 44 | dot.mkdir(parents=True) |
| 45 | (dot / "repo.json").write_text(json.dumps({"repo_id": "ps-test"})) |
| 46 | for d in ("commits", "snapshots", "objects", "refs/heads"): |
| 47 | (dot / d).mkdir(parents=True, exist_ok=True) |
| 48 | (dot / "HEAD").write_text("ref: refs/heads/main\n") |
| 49 | (dot / "config.toml").write_text("") |
| 50 | return root |
| 51 | |
| 52 | |
| 53 | def _make_mpack(n_objects: int = 3) -> tuple[MPack, list[tuple[str, bytes]]]: |
| 54 | """Build a minimal MPack with *n_objects* blobs, one snapshot, one commit.""" |
| 55 | objects: list[tuple[str, bytes]] = [] |
| 56 | manifest: dict[str, str] = {} |
| 57 | for i in range(n_objects): |
| 58 | content = f"file-content-{i}".encode() * 16 |
| 59 | oid = blob_id(content) |
| 60 | objects.append((oid, content)) |
| 61 | manifest[f"file_{i}.txt"] = oid |
| 62 | |
| 63 | sid = hash_snapshot(manifest) |
| 64 | cid = hash_commit( |
| 65 | parent_ids=[], snapshot_id=sid, message="test commit", |
| 66 | committed_at_iso=_DT.isoformat(), |
| 67 | ) |
| 68 | mpack: MPack = { |
| 69 | "blobs": [{"object_id": oid, "content": raw} for oid, raw in objects], |
| 70 | "snapshots": [{ |
| 71 | "snapshot_id": sid, |
| 72 | "parent_snapshot_id": None, |
| 73 | "delta_upsert": manifest, |
| 74 | "delta_remove": [], |
| 75 | }], |
| 76 | "commits": [CommitRecord( |
| 77 | commit_id=cid, branch="main", |
| 78 | snapshot_id=sid, message="test commit", committed_at=_DT, |
| 79 | parent_commit_id=None, parent2_commit_id=None, |
| 80 | author="", metadata={}, structured_delta=None, |
| 81 | sem_ver_bump="none", breaking_changes=[], |
| 82 | agent_id="", model_id="", toolchain_id="", |
| 83 | prompt_hash="", signature="", signer_key_id="", |
| 84 | ).to_dict()], |
| 85 | "tags": [], |
| 86 | } |
| 87 | return mpack, objects |
| 88 | |
| 89 | |
| 90 | # --------------------------------------------------------------------------- |
| 91 | # Core behaviour |
| 92 | # --------------------------------------------------------------------------- |
| 93 | |
| 94 | |
| 95 | class TestApplyMpackWritesPack: |
| 96 | def test_blobs_go_to_pack_not_loose(self, tmp_path: pathlib.Path) -> None: |
| 97 | repo = _init_repo(tmp_path) |
| 98 | mpack, objects = _make_mpack(5) |
| 99 | apply_mpack(repo, mpack) |
| 100 | loose_dir = muse_dir(repo) / "objects" / "sha256" |
| 101 | loose_files = {p for p in loose_dir.rglob("*") if p.is_file()} if loose_dir.exists() else set() |
| 102 | blob_ids = {oid for oid, _ in objects} |
| 103 | # Blobs must go to the pack store — none should appear as loose objects. |
| 104 | for oid in blob_ids: |
| 105 | _, hex_part = oid.split(":", 1) |
| 106 | loose_path = loose_dir / hex_part[:2] / hex_part[2:] |
| 107 | assert loose_path not in loose_files, f"blob {oid} written as loose object" |
| 108 | |
| 109 | def test_writes_exactly_one_pack_and_one_idx(self, tmp_path: pathlib.Path) -> None: |
| 110 | repo = _init_repo(tmp_path) |
| 111 | mpack, _ = _make_mpack(5) |
| 112 | apply_mpack(repo, mpack) |
| 113 | pack_dir = packs_dir(repo) |
| 114 | mpack_files = list(pack_dir.glob("*.mpack")) |
| 115 | idx_files = list(pack_dir.glob("*.idx")) |
| 116 | assert len(mpack_files) == 1, f"expected 1 .mpack, got {len(mpack_files)}" |
| 117 | assert len(idx_files) == 1, f"expected 1 .idx, got {len(idx_files)}" |
| 118 | |
| 119 | def test_objects_readable_via_read_object(self, tmp_path: pathlib.Path) -> None: |
| 120 | repo = _init_repo(tmp_path) |
| 121 | mpack, objects = _make_mpack(5) |
| 122 | apply_mpack(repo, mpack) |
| 123 | for oid, content in objects: |
| 124 | assert read_object(repo, oid) == content |
| 125 | |
| 126 | def test_has_object_finds_packed_objects(self, tmp_path: pathlib.Path) -> None: |
| 127 | repo = _init_repo(tmp_path) |
| 128 | mpack, objects = _make_mpack(3) |
| 129 | apply_mpack(repo, mpack) |
| 130 | for oid, _ in objects: |
| 131 | assert has_object(repo, oid) |
| 132 | |
| 133 | def test_commits_written_to_unified_object_store(self, tmp_path: pathlib.Path) -> None: |
| 134 | repo = _init_repo(tmp_path) |
| 135 | mpack, _ = _make_mpack(2) |
| 136 | cid = mpack["commits"][0]["commit_id"] |
| 137 | apply_mpack(repo, mpack) |
| 138 | assert read_commit(repo, cid) is not None |
| 139 | |
| 140 | def test_snapshots_written_to_unified_object_store(self, tmp_path: pathlib.Path) -> None: |
| 141 | repo = _init_repo(tmp_path) |
| 142 | mpack, _ = _make_mpack(2) |
| 143 | sid = mpack["snapshots"][0]["snapshot_id"] |
| 144 | apply_mpack(repo, mpack) |
| 145 | assert read_snapshot(repo, sid) is not None |
| 146 | |
| 147 | def test_xl_objects_produce_two_files_not_thousands(self, tmp_path: pathlib.Path) -> None: |
| 148 | repo = _init_repo(tmp_path) |
| 149 | mpack, _ = _make_mpack(500) |
| 150 | apply_mpack(repo, mpack) |
| 151 | pack_dir = packs_dir(repo) |
| 152 | total_files = len(list(pack_dir.glob("*"))) |
| 153 | assert total_files == 2, f"expected 2 files (1 .mpack + 1 .idx), got {total_files}" |
| 154 | |
| 155 | def test_apply_mpack_result_counts_objects_written(self, tmp_path: pathlib.Path) -> None: |
| 156 | repo = _init_repo(tmp_path) |
| 157 | mpack, objects = _make_mpack(4) |
| 158 | result = apply_mpack(repo, mpack) |
| 159 | assert result["blobs_written"] == 4 |
| 160 | assert result["blobs_skipped"] == 0 |
| 161 | |
| 162 | def test_apply_mpack_idempotent(self, tmp_path: pathlib.Path) -> None: |
| 163 | repo = _init_repo(tmp_path) |
| 164 | mpack, objects = _make_mpack(3) |
| 165 | apply_mpack(repo, mpack) |
| 166 | result2 = apply_mpack(repo, mpack) |
| 167 | # Second apply: all objects already present → all skipped |
| 168 | assert result2["blobs_written"] == 0 |
| 169 | assert result2["blobs_skipped"] == 3 |
| 170 | # Still exactly one pack file (no duplicate written) |
| 171 | assert len(list(packs_dir(repo).glob("*.mpack"))) == 1 |
| 172 | |
| 173 | |
| 174 | # --------------------------------------------------------------------------- |
| 175 | # Safety invariants preserved |
| 176 | # --------------------------------------------------------------------------- |
| 177 | |
| 178 | |
| 179 | class TestSafetyInvariantsPreserved: |
| 180 | def test_poisoned_object_skips_its_snapshot_and_commit(self, tmp_path: pathlib.Path) -> None: |
| 181 | """Content/ID mismatch → object, snapshot, and commit all skipped.""" |
| 182 | repo = _init_repo(tmp_path) |
| 183 | content = b"legitimate content" |
| 184 | oid = blob_id(content) |
| 185 | bad_content = b"poisoned content" # wrong bytes for this oid |
| 186 | manifest = {"file.txt": oid} |
| 187 | sid = hash_snapshot(manifest) |
| 188 | cid = hash_commit( |
| 189 | parent_ids=[], snapshot_id=sid, message="poisoned", |
| 190 | committed_at_iso=_DT.isoformat(), |
| 191 | ) |
| 192 | mpack: MPack = { |
| 193 | "blobs": [{"object_id": oid, "content": bad_content}], |
| 194 | "snapshots": [{"snapshot_id": sid, "parent_snapshot_id": None, |
| 195 | "delta_upsert": manifest, "delta_remove": []}], |
| 196 | "commits": [CommitRecord( |
| 197 | commit_id=cid, branch="main", |
| 198 | snapshot_id=sid, message="poisoned", committed_at=_DT, |
| 199 | parent_commit_id=None, parent2_commit_id=None, |
| 200 | author="", metadata={}, structured_delta=None, |
| 201 | sem_ver_bump="none", breaking_changes=[], |
| 202 | agent_id="", model_id="", toolchain_id="", |
| 203 | prompt_hash="", signature="", signer_key_id="", |
| 204 | ).to_dict()], |
| 205 | "tags": [], |
| 206 | } |
| 207 | result = apply_mpack(repo, mpack) |
| 208 | assert not has_object(repo, oid) |
| 209 | assert read_snapshot(repo, sid) is None |
| 210 | assert read_commit(repo, cid) is None |
| 211 | assert oid in result["failed_blobs"] |
| 212 | |
| 213 | def test_oserror_on_write_pack_aborts_cleanly(self, tmp_path: pathlib.Path) -> None: |
| 214 | """OSError from write_pack must propagate before any snapshot or commit is written.""" |
| 215 | repo = _init_repo(tmp_path) |
| 216 | mpack, _ = _make_mpack(1) |
| 217 | sid = mpack["snapshots"][0]["snapshot_id"] |
| 218 | cid = mpack["commits"][0]["commit_id"] |
| 219 | |
| 220 | with patch("muse.core.mpack.write_pack", side_effect=OSError("disk full")): |
| 221 | with pytest.raises(OSError, match="disk full"): |
| 222 | apply_mpack(repo, mpack) |
| 223 | |
| 224 | assert read_snapshot(repo, sid) is None |
| 225 | assert read_commit(repo, cid) is None |
| 226 | |
| 227 | def test_duplicate_object_ids_skipped(self, tmp_path: pathlib.Path) -> None: |
| 228 | repo = _init_repo(tmp_path) |
| 229 | content = b"dedup me" |
| 230 | oid = blob_id(content) |
| 231 | mpack: MPack = { |
| 232 | "blobs": [ |
| 233 | {"object_id": oid, "content": content}, |
| 234 | {"object_id": oid, "content": content}, # duplicate |
| 235 | ], |
| 236 | "snapshots": [], "commits": [], "tags": [], |
| 237 | } |
| 238 | result = apply_mpack(repo, mpack) |
| 239 | assert result["blobs_written"] == 1 |
| 240 | assert result["blobs_skipped"] == 1 |
| 241 | |
| 242 | def test_oversized_object_tracked_as_failed(self, tmp_path: pathlib.Path) -> None: |
| 243 | from muse.core.validation import MAX_OBJECT_WRITE_BYTES |
| 244 | repo = _init_repo(tmp_path) |
| 245 | big = b"x" * (MAX_OBJECT_WRITE_BYTES + 1) |
| 246 | oid = blob_id(big) |
| 247 | mpack: MPack = { |
| 248 | "blobs": [{"object_id": oid, "content": big}], |
| 249 | "snapshots": [], "commits": [], "tags": [], |
| 250 | } |
| 251 | result = apply_mpack(repo, mpack) |
| 252 | assert oid in result["failed_blobs"] |
| 253 | assert not has_object(repo, oid) |
| 254 | |
| 255 | def test_empty_objects_list_writes_no_pack(self, tmp_path: pathlib.Path) -> None: |
| 256 | repo = _init_repo(tmp_path) |
| 257 | mpack: MPack = {"blobs": [], "snapshots": [], "commits": [], "tags": []} |
| 258 | apply_mpack(repo, mpack) |
| 259 | assert not packs_dir(repo).exists() or not list(packs_dir(repo).glob("*.mpack")) |
File History
6 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
22 days ago
sha256:0313c134f0ef4518a9c3a0ec359ffdc42546dc720010730374edfe0857caf7ef
rename: delta_add → delta_upsert across wire format, source…
Sonnet 4.6
minor
⚠
23 days ago
sha256:fb19dc03703eb3fc11d016ea19f619eebfab7bde2acf247346dc0f032e65ff19
fix(push): step 0 log shows full /refs URL instead of misle…
Sonnet 4.6
patch
23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
29 days ago