gabriel / muse public
test_mpack_delta_format.py python
265 lines 9.5 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """TDD — MPack snapshot delta format.
2
3 Guiding principle: content-addressing is a proof, not a label.
4 snapshot_id = sha256(sorted path-NUL-oid pairs)
5
6 If we hold snapshot_id and a delta from the parent manifest, we reconstruct
7 the full manifest and hash it. If the hash matches snapshot_id, the delta
8 is correct. No external store needed. The math IS the verification.
9
10 Tests:
11 1. build_mpack emits SnapshotDeltaDict entries (delta_upsert/delta_remove),
12 never a full manifest blob per snapshot after the first one.
13 2. Delta chain reconstruction: apply each delta → hash matches snapshot_id.
14 3. MPack wire size is < 10% of the equivalent full-manifest mpack for a
15 100-commit chain where each commit changes one file.
16 4. apply_mpack round-trips delta bundles: snapshots written to local store
17 have the correct full manifest.
18 """
19 from __future__ import annotations
20
21 import datetime
22 import hashlib
23 import pathlib
24
25 import pytest
26
27 from muse.core.object_store import write_object
28 from muse.core.mpack import MPack, apply_mpack, build_mpack
29 from muse.core.paths import muse_dir
30 from muse.core.ids import hash_snapshot as compute_snapshot_id
31 from muse.core.refs import write_branch_ref
32 from muse.core.commits import (
33 CommitRecord,
34 write_commit,
35 )
36 from muse.core.snapshots import (
37 SnapshotRecord,
38 read_snapshot,
39 write_snapshot,
40 )
41 from muse.core.types import blob_id
42
43 _Manifest = dict[str, str] # snapshot manifest: path → blob_id
44 _ManifestMap = dict[str, _Manifest] # snapshot_id → full manifest
45
46
47 # ---------------------------------------------------------------------------
48 # Helpers
49 # ---------------------------------------------------------------------------
50
51 def _make_repo(tmp: pathlib.Path) -> pathlib.Path:
52 tmp.mkdir(parents=True, exist_ok=True)
53 dot = muse_dir(tmp)
54 dot.mkdir()
55 (dot / "repo.json").write_text('{"repo_id":"delta-test","owner":"gabriel"}')
56 for d in ("commits", "snapshots", "objects"):
57 (dot / d).mkdir()
58 (dot / "refs" / "heads").mkdir(parents=True)
59 (dot / "HEAD").write_text("ref: refs/heads/main\n")
60 (dot / "config.toml").write_text("")
61 return tmp
62
63
64 _N_BASE_FILES = 50
65 _N_COMMITS = 100
66 _BLOB_SIZE = 256
67
68
69 def _make_blob(tag: str) -> tuple[str, bytes]:
70 raw = tag.encode() + b"x" * _BLOB_SIZE
71 return blob_id(raw), raw
72
73
74 def _populate_chain(repo: pathlib.Path) -> tuple[str, list[str]]:
75 """Create _N_BASE_FILES blobs + _N_COMMITS commits, each changing one file.
76
77 Returns (head_commit_id, ordered_snapshot_ids_oldest_first).
78 """
79 base_blobs: dict[str, tuple[str, bytes]] = {}
80 for i in range(_N_BASE_FILES):
81 oid, raw = _make_blob(f"base-{i:04d}")
82 write_object(repo, oid, raw)
83 base_blobs[f"file_{i:04d}.txt"] = (oid, raw)
84
85 base_manifest = {path: oid for path, (oid, _) in base_blobs.items()}
86
87 parent: str | None = None
88 tip = ""
89 ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
90 snapshot_ids: list[str] = []
91
92 for i in range(_N_COMMITS):
93 # Each commit changes exactly one file.
94 new_oid, new_raw = _make_blob(f"commit-{i:05d}-variant")
95 write_object(repo, new_oid, new_raw)
96 manifest = dict(base_manifest)
97 manifest[f"file_{i % _N_BASE_FILES:04d}.txt"] = new_oid
98
99 sid = compute_snapshot_id(manifest)
100 write_snapshot(repo, SnapshotRecord(snapshot_id=sid, manifest=manifest))
101 snapshot_ids.append(sid)
102
103 cid = _make_commit_id(parent, sid, f"c{i:05d}", ts.isoformat())
104 rec = CommitRecord(
105 commit_id=cid,
106 branch="main",
107 snapshot_id=sid,
108 message=f"c{i:05d}",
109 committed_at=ts,
110 parent_commit_id=parent,
111 parent2_commit_id=None,
112 author="gabriel",
113 metadata={},
114 structured_delta=None,
115 sem_ver_bump="none",
116 breaking_changes=[],
117 agent_id="", model_id="", toolchain_id="",
118 prompt_hash="", signature="", signer_key_id="",
119 )
120 write_commit(repo, rec)
121 parent = cid
122 tip = cid
123 ts += datetime.timedelta(seconds=60)
124
125 write_branch_ref(repo, "main", tip)
126 return tip, snapshot_ids
127
128
129 def _make_commit_id(parent: str | None, sid: str, msg: str, ts: str) -> str:
130 from muse.core.ids import hash_commit as compute_commit_id
131 return compute_commit_id(
132 parent_ids=[parent] if parent else [],
133 snapshot_id=sid,
134 message=msg,
135 committed_at_iso=ts,
136 author="gabriel",
137 )
138
139
140 def _reconstruct_from_deltas(mpack: MPack) -> _ManifestMap:
141 """Apply the delta chain and return {snapshot_id: full_manifest}."""
142 from muse.core.ids import hash_snapshot as csi
143 resolved: _ManifestMap = {}
144 for snap in mpack.get("snapshots") or []:
145 sid = snap["snapshot_id"]
146 parent_sid = snap.get("parent_snapshot_id")
147 delta_upsert: dict[str, str] = snap.get("delta_upsert") or {}
148 delta_remove: list[str] = snap.get("delta_remove") or []
149
150 base = dict(resolved[parent_sid]) if parent_sid and parent_sid in resolved else {}
151 base.update(delta_upsert)
152 for path in delta_remove:
153 base.pop(path, None)
154
155 # The hash IS the proof.
156 assert csi(base) == sid, f"hash mismatch for {sid[:16]}"
157 resolved[sid] = base
158 return resolved
159
160
161 # ---------------------------------------------------------------------------
162 # Tests
163 # ---------------------------------------------------------------------------
164
165 def test_bundle_snapshots_are_deltas(tmp_path: pathlib.Path) -> None:
166 """build_mpack emits snapshot deltas, not full manifests."""
167 repo = _make_repo(tmp_path / "repo")
168 head, _ = _populate_chain(repo)
169
170 mpack = build_mpack(repo, [head], have=[])
171
172 snaps = mpack.get("snapshots") or []
173 assert len(snaps) == _N_COMMITS, f"expected {_N_COMMITS} snapshots, got {len(snaps)}"
174
175 for snap in snaps:
176 assert "delta_upsert" in snap, f"missing delta_upsert in snapshot {snap.get('snapshot_id', '?')[:16]}"
177 assert "delta_remove" in snap, f"missing delta_remove"
178 assert "manifest" not in snap, "full manifest must not be present — delta format only"
179
180
181 def test_delta_reconstruction_proves_snapshot_id(tmp_path: pathlib.Path) -> None:
182 """Applying each delta and hashing the result must equal snapshot_id."""
183 repo = _make_repo(tmp_path / "repo")
184 head, snapshot_ids = _populate_chain(repo)
185
186 mpack = build_mpack(repo, [head], have=[])
187
188 # Will assert inside _reconstruct_from_deltas if any hash mismatches.
189 resolved = _reconstruct_from_deltas(mpack)
190
191 assert set(resolved.keys()) == set(snapshot_ids), "not all snapshots resolved"
192
193
194 def test_only_first_snapshot_has_full_manifest(tmp_path: pathlib.Path) -> None:
195 """All snapshots after the first should have delta_upsert < full manifest size."""
196 repo = _make_repo(tmp_path / "repo")
197 head, _ = _populate_chain(repo)
198
199 mpack = build_mpack(repo, [head], have=[])
200 snaps = mpack.get("snapshots") or []
201
202 # First snapshot: delta_upsert == full manifest (no parent), so len == N_BASE_FILES.
203 assert len(snaps[0].get("delta_upsert", {})) == _N_BASE_FILES
204
205 # All subsequent snapshots change exactly one file → delta_upsert has 1 or 2 entries
206 # (1 add + maybe 1 implicit change if same path reverted).
207 for snap in snaps[1:]:
208 n_add = len(snap.get("delta_upsert", {}))
209 assert n_add < _N_BASE_FILES, (
210 f"snapshot {snap['snapshot_id'][:16]} delta_upsert has {n_add} entries — "
211 f"should be a small delta, not a full manifest copy"
212 )
213
214
215 def test_delta_bundle_smaller_than_full_manifest(tmp_path: pathlib.Path) -> None:
216 """Delta mpack wire bytes must be < 10% of a hypothetical full-manifest mpack."""
217 import msgpack
218
219 repo = _make_repo(tmp_path / "repo")
220 head, snapshot_ids = _populate_chain(repo)
221
222 delta_bundle = build_mpack(repo, [head], have=[])
223 delta_bytes = len(msgpack.packb(delta_bundle, use_bin_type=True))
224
225 # Build a synthetic "full manifest" mpack for size comparison.
226 full_snap_size = sum(
227 len(msgpack.packb({
228 "snapshot_id": sid,
229 "manifest": (read_snapshot(repo, sid) or SnapshotRecord(snapshot_id=sid, manifest={})).manifest,
230 }, use_bin_type=True))
231 for sid in snapshot_ids
232 )
233 delta_snap_size = sum(
234 len(msgpack.packb(snap, use_bin_type=True))
235 for snap in (delta_bundle.get("snapshots") or [])
236 )
237
238 ratio = delta_snap_size / full_snap_size
239 assert ratio < 0.10, (
240 f"Delta snapshots are {ratio:.1%} of full-manifest size — expected < 10%.\n"
241 f" delta_snap_bytes={delta_snap_size} full_snap_bytes={full_snap_size}"
242 )
243 _ = delta_bytes # measured; useful for manual inspection
244
245
246 def test_apply_mpack_reconstructs_snapshots_from_deltas(tmp_path: pathlib.Path) -> None:
247 """apply_mpack writes correct full SnapshotRecords from delta bundles."""
248 src = _make_repo(tmp_path / "src")
249 head, snapshot_ids = _populate_chain(src)
250
251 mpack = build_mpack(src, [head], have=[])
252
253 dst = _make_repo(tmp_path / "dst")
254 result = apply_mpack(dst, mpack)
255
256 assert result["snapshots_written"] == _N_COMMITS
257
258 # Every snapshot in dst must have the full correct manifest.
259 for sid in snapshot_ids:
260 snap = read_snapshot(dst, sid)
261 assert snap is not None, f"snapshot {sid[:16]} not written to dst"
262 assert compute_snapshot_id(snap.manifest) == sid, (
263 f"manifest hash mismatch for {sid[:16]}: "
264 f"compute_snapshot_id gives {compute_snapshot_id(snap.manifest)[:16]}"
265 )
File History 5 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:fb19dc03703eb3fc11d016ea19f619eebfab7bde2acf247346dc0f032e65ff19 fix(push): step 0 log shows full /refs URL instead of misle… Sonnet 4.6 patch 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago