gabriel / muse public
test_apply_mpack_pack_store.py python
257 lines 10.3 KB
Raw
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
1 """TDD — apply_mpack writes pack, not loose objects (issue #70 Phase 2).
2
3 After this change:
4 - Wire-received objects land in a single .mpack + .idx file pair.
5 - Zero loose object writes for wire-received blobs.
6 - Commits and snapshots still go to .muse/commits/ and .muse/snapshots/.
7 - read_object() still works transparently via the pack store fallthrough.
8 - All existing safety invariants (dedup, size cap, integrity check,
9 failed-object propagation) are preserved.
10 """
11 from __future__ import annotations
12
13 import datetime
14 import json
15 import pathlib
16 from unittest.mock import patch
17
18 import pytest
19
20 from muse.core.mpack import MPack, apply_mpack
21 from muse.core.object_store import has_object, read_object
22 from muse.core.paths import muse_dir, packs_dir
23 from muse.core.ids import hash_commit, hash_snapshot
24 from muse.core.store import (
25 CommitRecord,
26 SnapshotRecord,
27 read_commit,
28 read_snapshot,
29 )
30 from muse.core.types import blob_id
31
32 _DT = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
33
34
35 # ---------------------------------------------------------------------------
36 # Fixtures
37 # ---------------------------------------------------------------------------
38
39
40 def _init_repo(root: pathlib.Path) -> pathlib.Path:
41 dot = muse_dir(root)
42 dot.mkdir(parents=True)
43 (dot / "repo.json").write_text(json.dumps({"repo_id": "ps-test"}))
44 for d in ("commits", "snapshots", "objects", "refs/heads"):
45 (dot / d).mkdir(parents=True, exist_ok=True)
46 (dot / "HEAD").write_text("ref: refs/heads/main\n")
47 (dot / "config.toml").write_text("")
48 return root
49
50
51 def _make_mpack(n_objects: int = 3) -> tuple[MPack, list[tuple[str, bytes]]]:
52 """Build a minimal MPack with *n_objects* blobs, one snapshot, one commit."""
53 objects: list[tuple[str, bytes]] = []
54 manifest: dict[str, str] = {}
55 for i in range(n_objects):
56 content = f"file-content-{i}".encode() * 16
57 oid = blob_id(content)
58 objects.append((oid, content))
59 manifest[f"file_{i}.txt"] = oid
60
61 sid = hash_snapshot(manifest)
62 cid = hash_commit(
63 parent_ids=[], snapshot_id=sid, message="test commit",
64 committed_at_iso=_DT.isoformat(),
65 )
66 mpack: MPack = {
67 "objects": [{"object_id": oid, "content": raw} for oid, raw in objects],
68 "snapshots": [{
69 "snapshot_id": sid,
70 "parent_snapshot_id": None,
71 "delta_add": manifest,
72 "delta_remove": [],
73 }],
74 "commits": [CommitRecord(
75 commit_id=cid, branch="main",
76 snapshot_id=sid, message="test commit", committed_at=_DT,
77 parent_commit_id=None, parent2_commit_id=None,
78 author="", metadata={}, structured_delta=None,
79 sem_ver_bump="none", breaking_changes=[],
80 agent_id="", model_id="", toolchain_id="",
81 prompt_hash="", signature="", signer_key_id="",
82 ).to_dict()],
83 "tags": [],
84 }
85 return mpack, objects
86
87
88 # ---------------------------------------------------------------------------
89 # Core behaviour
90 # ---------------------------------------------------------------------------
91
92
93 class TestApplyMpackWritesPack:
94 def test_blobs_go_to_pack_not_loose(self, tmp_path: pathlib.Path) -> None:
95 repo = _init_repo(tmp_path)
96 mpack, objects = _make_mpack(5)
97 apply_mpack(repo, mpack)
98 loose_dir = muse_dir(repo) / "objects" / "sha256"
99 loose_files = {p for p in loose_dir.rglob("*") if p.is_file()} if loose_dir.exists() else set()
100 blob_ids = {oid for oid, _ in objects}
101 # Blobs must go to the pack store — none should appear as loose objects.
102 for oid in blob_ids:
103 _, hex_part = oid.split(":", 1)
104 loose_path = loose_dir / hex_part[:2] / hex_part[2:]
105 assert loose_path not in loose_files, f"blob {oid} written as loose object"
106
107 def test_writes_exactly_one_pack_and_one_idx(self, tmp_path: pathlib.Path) -> None:
108 repo = _init_repo(tmp_path)
109 mpack, _ = _make_mpack(5)
110 apply_mpack(repo, mpack)
111 pack_dir = packs_dir(repo)
112 mpack_files = list(pack_dir.glob("*.mpack"))
113 idx_files = list(pack_dir.glob("*.idx"))
114 assert len(mpack_files) == 1, f"expected 1 .mpack, got {len(mpack_files)}"
115 assert len(idx_files) == 1, f"expected 1 .idx, got {len(idx_files)}"
116
117 def test_objects_readable_via_read_object(self, tmp_path: pathlib.Path) -> None:
118 repo = _init_repo(tmp_path)
119 mpack, objects = _make_mpack(5)
120 apply_mpack(repo, mpack)
121 for oid, content in objects:
122 assert read_object(repo, oid) == content
123
124 def test_has_object_finds_packed_objects(self, tmp_path: pathlib.Path) -> None:
125 repo = _init_repo(tmp_path)
126 mpack, objects = _make_mpack(3)
127 apply_mpack(repo, mpack)
128 for oid, _ in objects:
129 assert has_object(repo, oid)
130
131 def test_commits_still_written_to_commits_dir(self, tmp_path: pathlib.Path) -> None:
132 repo = _init_repo(tmp_path)
133 mpack, _ = _make_mpack(2)
134 cid = mpack["commits"][0]["commit_id"]
135 apply_mpack(repo, mpack)
136 assert read_commit(repo, cid) is not None
137
138 def test_snapshots_still_written_to_snapshots_dir(self, tmp_path: pathlib.Path) -> None:
139 repo = _init_repo(tmp_path)
140 mpack, _ = _make_mpack(2)
141 sid = mpack["snapshots"][0]["snapshot_id"]
142 apply_mpack(repo, mpack)
143 assert read_snapshot(repo, sid) is not None
144
145 def test_xl_objects_produce_two_files_not_thousands(self, tmp_path: pathlib.Path) -> None:
146 repo = _init_repo(tmp_path)
147 mpack, _ = _make_mpack(500)
148 apply_mpack(repo, mpack)
149 pack_dir = packs_dir(repo)
150 total_files = len(list(pack_dir.glob("*")))
151 assert total_files == 2, f"expected 2 files (1 .mpack + 1 .idx), got {total_files}"
152
153 def test_apply_mpack_result_counts_objects_written(self, tmp_path: pathlib.Path) -> None:
154 repo = _init_repo(tmp_path)
155 mpack, objects = _make_mpack(4)
156 result = apply_mpack(repo, mpack)
157 assert result["objects_written"] == 4
158 assert result["objects_skipped"] == 0
159
160 def test_apply_mpack_idempotent(self, tmp_path: pathlib.Path) -> None:
161 repo = _init_repo(tmp_path)
162 mpack, objects = _make_mpack(3)
163 apply_mpack(repo, mpack)
164 result2 = apply_mpack(repo, mpack)
165 # Second apply: all objects already present → all skipped
166 assert result2["objects_written"] == 0
167 assert result2["objects_skipped"] == 3
168 # Still exactly one pack file (no duplicate written)
169 assert len(list(packs_dir(repo).glob("*.mpack"))) == 1
170
171
172 # ---------------------------------------------------------------------------
173 # Safety invariants preserved
174 # ---------------------------------------------------------------------------
175
176
177 class TestSafetyInvariantsPreserved:
178 def test_poisoned_object_skips_its_snapshot_and_commit(self, tmp_path: pathlib.Path) -> None:
179 """Content/ID mismatch → object, snapshot, and commit all skipped."""
180 repo = _init_repo(tmp_path)
181 content = b"legitimate content"
182 oid = blob_id(content)
183 bad_content = b"poisoned content" # wrong bytes for this oid
184 manifest = {"file.txt": oid}
185 sid = hash_snapshot(manifest)
186 cid = hash_commit(
187 parent_ids=[], snapshot_id=sid, message="poisoned",
188 committed_at_iso=_DT.isoformat(),
189 )
190 mpack: MPack = {
191 "objects": [{"object_id": oid, "content": bad_content}],
192 "snapshots": [{"snapshot_id": sid, "parent_snapshot_id": None,
193 "delta_add": manifest, "delta_remove": []}],
194 "commits": [CommitRecord(
195 commit_id=cid, branch="main",
196 snapshot_id=sid, message="poisoned", committed_at=_DT,
197 parent_commit_id=None, parent2_commit_id=None,
198 author="", metadata={}, structured_delta=None,
199 sem_ver_bump="none", breaking_changes=[],
200 agent_id="", model_id="", toolchain_id="",
201 prompt_hash="", signature="", signer_key_id="",
202 ).to_dict()],
203 "tags": [],
204 }
205 result = apply_mpack(repo, mpack)
206 assert not has_object(repo, oid)
207 assert read_snapshot(repo, sid) is None
208 assert read_commit(repo, cid) is None
209 assert oid in result["failed_objects"]
210
211 def test_oserror_on_write_pack_aborts_cleanly(self, tmp_path: pathlib.Path) -> None:
212 """OSError from write_pack must propagate before any snapshot or commit is written."""
213 repo = _init_repo(tmp_path)
214 mpack, _ = _make_mpack(1)
215 sid = mpack["snapshots"][0]["snapshot_id"]
216 cid = mpack["commits"][0]["commit_id"]
217
218 with patch("muse.core.mpack.write_pack", side_effect=OSError("disk full")):
219 with pytest.raises(OSError, match="disk full"):
220 apply_mpack(repo, mpack)
221
222 assert read_snapshot(repo, sid) is None
223 assert read_commit(repo, cid) is None
224
225 def test_duplicate_object_ids_skipped(self, tmp_path: pathlib.Path) -> None:
226 repo = _init_repo(tmp_path)
227 content = b"dedup me"
228 oid = blob_id(content)
229 mpack: MPack = {
230 "objects": [
231 {"object_id": oid, "content": content},
232 {"object_id": oid, "content": content}, # duplicate
233 ],
234 "snapshots": [], "commits": [], "tags": [],
235 }
236 result = apply_mpack(repo, mpack)
237 assert result["objects_written"] == 1
238 assert result["objects_skipped"] == 1
239
240 def test_oversized_object_tracked_as_failed(self, tmp_path: pathlib.Path) -> None:
241 from muse.core.validation import MAX_OBJECT_WRITE_BYTES
242 repo = _init_repo(tmp_path)
243 big = b"x" * (MAX_OBJECT_WRITE_BYTES + 1)
244 oid = blob_id(big)
245 mpack: MPack = {
246 "objects": [{"object_id": oid, "content": big}],
247 "snapshots": [], "commits": [], "tags": [],
248 }
249 result = apply_mpack(repo, mpack)
250 assert oid in result["failed_objects"]
251 assert not has_object(repo, oid)
252
253 def test_empty_objects_list_writes_no_pack(self, tmp_path: pathlib.Path) -> None:
254 repo = _init_repo(tmp_path)
255 mpack: MPack = {"objects": [], "snapshots": [], "commits": [], "tags": []}
256 apply_mpack(repo, mpack)
257 assert not packs_dir(repo).exists() or not list(packs_dir(repo).glob("*.mpack"))
File History 2 commits
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago