gabriel / muse public
test_apply_mpack_pack_store.py python
259 lines 10.4 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """TDD — apply_mpack writes pack, not loose objects (issue #70 Phase 2).
2
3 After this change:
4 - Wire-received objects land in a single .mpack + .idx file pair.
5 - Zero loose object writes for wire-received blobs.
6 - Commits and snapshots still go to .muse/commits/ and .muse/snapshots/.
7 - read_object() still works transparently via the pack store fallthrough.
8 - All existing safety invariants (dedup, size cap, integrity check,
9 failed-object propagation) are preserved.
10 """
11 from __future__ import annotations
12
13 import datetime
14 import json
15 import pathlib
16 from unittest.mock import patch
17
18 import pytest
19
20 from muse.core.mpack import MPack, apply_mpack
21 from muse.core.object_store import has_object, read_object
22 from muse.core.paths import muse_dir, packs_dir
23 from muse.core.ids import hash_commit, hash_snapshot
24 from muse.core.commits import (
25 CommitRecord,
26 read_commit,
27 )
28 from muse.core.snapshots import (
29 SnapshotRecord,
30 read_snapshot,
31 )
32 from muse.core.types import blob_id
33
34 _DT = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
35
36
37 # ---------------------------------------------------------------------------
38 # Fixtures
39 # ---------------------------------------------------------------------------
40
41
42 def _init_repo(root: pathlib.Path) -> pathlib.Path:
43 dot = muse_dir(root)
44 dot.mkdir(parents=True)
45 (dot / "repo.json").write_text(json.dumps({"repo_id": "ps-test"}))
46 for d in ("commits", "snapshots", "objects", "refs/heads"):
47 (dot / d).mkdir(parents=True, exist_ok=True)
48 (dot / "HEAD").write_text("ref: refs/heads/main\n")
49 (dot / "config.toml").write_text("")
50 return root
51
52
53 def _make_mpack(n_objects: int = 3) -> tuple[MPack, list[tuple[str, bytes]]]:
54 """Build a minimal MPack with *n_objects* blobs, one snapshot, one commit."""
55 objects: list[tuple[str, bytes]] = []
56 manifest: dict[str, str] = {}
57 for i in range(n_objects):
58 content = f"file-content-{i}".encode() * 16
59 oid = blob_id(content)
60 objects.append((oid, content))
61 manifest[f"file_{i}.txt"] = oid
62
63 sid = hash_snapshot(manifest)
64 cid = hash_commit(
65 parent_ids=[], snapshot_id=sid, message="test commit",
66 committed_at_iso=_DT.isoformat(),
67 )
68 mpack: MPack = {
69 "blobs": [{"object_id": oid, "content": raw} for oid, raw in objects],
70 "snapshots": [{
71 "snapshot_id": sid,
72 "parent_snapshot_id": None,
73 "delta_upsert": manifest,
74 "delta_remove": [],
75 }],
76 "commits": [CommitRecord(
77 commit_id=cid, branch="main",
78 snapshot_id=sid, message="test commit", committed_at=_DT,
79 parent_commit_id=None, parent2_commit_id=None,
80 author="", metadata={}, structured_delta=None,
81 sem_ver_bump="none", breaking_changes=[],
82 agent_id="", model_id="", toolchain_id="",
83 prompt_hash="", signature="", signer_key_id="",
84 ).to_dict()],
85 "tags": [],
86 }
87 return mpack, objects
88
89
90 # ---------------------------------------------------------------------------
91 # Core behaviour
92 # ---------------------------------------------------------------------------
93
94
95 class TestApplyMpackWritesPack:
96 def test_blobs_go_to_pack_not_loose(self, tmp_path: pathlib.Path) -> None:
97 repo = _init_repo(tmp_path)
98 mpack, objects = _make_mpack(5)
99 apply_mpack(repo, mpack)
100 loose_dir = muse_dir(repo) / "objects" / "sha256"
101 loose_files = {p for p in loose_dir.rglob("*") if p.is_file()} if loose_dir.exists() else set()
102 blob_ids = {oid for oid, _ in objects}
103 # Blobs must go to the pack store — none should appear as loose objects.
104 for oid in blob_ids:
105 _, hex_part = oid.split(":", 1)
106 loose_path = loose_dir / hex_part[:2] / hex_part[2:]
107 assert loose_path not in loose_files, f"blob {oid} written as loose object"
108
109 def test_writes_exactly_one_pack_and_one_idx(self, tmp_path: pathlib.Path) -> None:
110 repo = _init_repo(tmp_path)
111 mpack, _ = _make_mpack(5)
112 apply_mpack(repo, mpack)
113 pack_dir = packs_dir(repo)
114 mpack_files = list(pack_dir.glob("*.mpack"))
115 idx_files = list(pack_dir.glob("*.idx"))
116 assert len(mpack_files) == 1, f"expected 1 .mpack, got {len(mpack_files)}"
117 assert len(idx_files) == 1, f"expected 1 .idx, got {len(idx_files)}"
118
119 def test_objects_readable_via_read_object(self, tmp_path: pathlib.Path) -> None:
120 repo = _init_repo(tmp_path)
121 mpack, objects = _make_mpack(5)
122 apply_mpack(repo, mpack)
123 for oid, content in objects:
124 assert read_object(repo, oid) == content
125
126 def test_has_object_finds_packed_objects(self, tmp_path: pathlib.Path) -> None:
127 repo = _init_repo(tmp_path)
128 mpack, objects = _make_mpack(3)
129 apply_mpack(repo, mpack)
130 for oid, _ in objects:
131 assert has_object(repo, oid)
132
133 def test_commits_written_to_unified_object_store(self, tmp_path: pathlib.Path) -> None:
134 repo = _init_repo(tmp_path)
135 mpack, _ = _make_mpack(2)
136 cid = mpack["commits"][0]["commit_id"]
137 apply_mpack(repo, mpack)
138 assert read_commit(repo, cid) is not None
139
140 def test_snapshots_written_to_unified_object_store(self, tmp_path: pathlib.Path) -> None:
141 repo = _init_repo(tmp_path)
142 mpack, _ = _make_mpack(2)
143 sid = mpack["snapshots"][0]["snapshot_id"]
144 apply_mpack(repo, mpack)
145 assert read_snapshot(repo, sid) is not None
146
147 def test_xl_objects_produce_two_files_not_thousands(self, tmp_path: pathlib.Path) -> None:
148 repo = _init_repo(tmp_path)
149 mpack, _ = _make_mpack(500)
150 apply_mpack(repo, mpack)
151 pack_dir = packs_dir(repo)
152 total_files = len(list(pack_dir.glob("*")))
153 assert total_files == 2, f"expected 2 files (1 .mpack + 1 .idx), got {total_files}"
154
155 def test_apply_mpack_result_counts_objects_written(self, tmp_path: pathlib.Path) -> None:
156 repo = _init_repo(tmp_path)
157 mpack, objects = _make_mpack(4)
158 result = apply_mpack(repo, mpack)
159 assert result["blobs_written"] == 4
160 assert result["blobs_skipped"] == 0
161
162 def test_apply_mpack_idempotent(self, tmp_path: pathlib.Path) -> None:
163 repo = _init_repo(tmp_path)
164 mpack, objects = _make_mpack(3)
165 apply_mpack(repo, mpack)
166 result2 = apply_mpack(repo, mpack)
167 # Second apply: all objects already present → all skipped
168 assert result2["blobs_written"] == 0
169 assert result2["blobs_skipped"] == 3
170 # Still exactly one pack file (no duplicate written)
171 assert len(list(packs_dir(repo).glob("*.mpack"))) == 1
172
173
174 # ---------------------------------------------------------------------------
175 # Safety invariants preserved
176 # ---------------------------------------------------------------------------
177
178
179 class TestSafetyInvariantsPreserved:
180 def test_poisoned_object_skips_its_snapshot_and_commit(self, tmp_path: pathlib.Path) -> None:
181 """Content/ID mismatch → object, snapshot, and commit all skipped."""
182 repo = _init_repo(tmp_path)
183 content = b"legitimate content"
184 oid = blob_id(content)
185 bad_content = b"poisoned content" # wrong bytes for this oid
186 manifest = {"file.txt": oid}
187 sid = hash_snapshot(manifest)
188 cid = hash_commit(
189 parent_ids=[], snapshot_id=sid, message="poisoned",
190 committed_at_iso=_DT.isoformat(),
191 )
192 mpack: MPack = {
193 "blobs": [{"object_id": oid, "content": bad_content}],
194 "snapshots": [{"snapshot_id": sid, "parent_snapshot_id": None,
195 "delta_upsert": manifest, "delta_remove": []}],
196 "commits": [CommitRecord(
197 commit_id=cid, branch="main",
198 snapshot_id=sid, message="poisoned", committed_at=_DT,
199 parent_commit_id=None, parent2_commit_id=None,
200 author="", metadata={}, structured_delta=None,
201 sem_ver_bump="none", breaking_changes=[],
202 agent_id="", model_id="", toolchain_id="",
203 prompt_hash="", signature="", signer_key_id="",
204 ).to_dict()],
205 "tags": [],
206 }
207 result = apply_mpack(repo, mpack)
208 assert not has_object(repo, oid)
209 assert read_snapshot(repo, sid) is None
210 assert read_commit(repo, cid) is None
211 assert oid in result["failed_blobs"]
212
213 def test_oserror_on_write_pack_aborts_cleanly(self, tmp_path: pathlib.Path) -> None:
214 """OSError from write_pack must propagate before any snapshot or commit is written."""
215 repo = _init_repo(tmp_path)
216 mpack, _ = _make_mpack(1)
217 sid = mpack["snapshots"][0]["snapshot_id"]
218 cid = mpack["commits"][0]["commit_id"]
219
220 with patch("muse.core.mpack.write_pack", side_effect=OSError("disk full")):
221 with pytest.raises(OSError, match="disk full"):
222 apply_mpack(repo, mpack)
223
224 assert read_snapshot(repo, sid) is None
225 assert read_commit(repo, cid) is None
226
227 def test_duplicate_object_ids_skipped(self, tmp_path: pathlib.Path) -> None:
228 repo = _init_repo(tmp_path)
229 content = b"dedup me"
230 oid = blob_id(content)
231 mpack: MPack = {
232 "blobs": [
233 {"object_id": oid, "content": content},
234 {"object_id": oid, "content": content}, # duplicate
235 ],
236 "snapshots": [], "commits": [], "tags": [],
237 }
238 result = apply_mpack(repo, mpack)
239 assert result["blobs_written"] == 1
240 assert result["blobs_skipped"] == 1
241
242 def test_oversized_object_tracked_as_failed(self, tmp_path: pathlib.Path) -> None:
243 from muse.core.validation import MAX_OBJECT_WRITE_BYTES
244 repo = _init_repo(tmp_path)
245 big = b"x" * (MAX_OBJECT_WRITE_BYTES + 1)
246 oid = blob_id(big)
247 mpack: MPack = {
248 "blobs": [{"object_id": oid, "content": big}],
249 "snapshots": [], "commits": [], "tags": [],
250 }
251 result = apply_mpack(repo, mpack)
252 assert oid in result["failed_blobs"]
253 assert not has_object(repo, oid)
254
255 def test_empty_objects_list_writes_no_pack(self, tmp_path: pathlib.Path) -> None:
256 repo = _init_repo(tmp_path)
257 mpack: MPack = {"blobs": [], "snapshots": [], "commits": [], "tags": []}
258 apply_mpack(repo, mpack)
259 assert not packs_dir(repo).exists() or not list(packs_dir(repo).glob("*.mpack"))
File History 6 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:0313c134f0ef4518a9c3a0ec359ffdc42546dc720010730374edfe0857caf7ef rename: delta_add → delta_upsert across wire format, source… Sonnet 4.6 minor 23 days ago
sha256:fb19dc03703eb3fc11d016ea19f619eebfab7bde2acf247346dc0f032e65ff19 fix(push): step 0 log shows full /refs URL instead of misle… Sonnet 4.6 patch 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago