test_mpack_delta_format.py
python
sha256:0313c134f0ef4518a9c3a0ec359ffdc42546dc720010730374edfe0857caf7ef
rename: delta_add → delta_upsert across wire format, source…
Sonnet 4.6
minor
⚠ breaking
22 days ago
| 1 | """TDD — MPack snapshot delta format. |
| 2 | |
| 3 | Guiding principle: content-addressing is a proof, not a label. |
| 4 | snapshot_id = sha256(sorted path-NUL-oid pairs) |
| 5 | |
| 6 | If we hold snapshot_id and a delta from the parent manifest, we reconstruct |
| 7 | the full manifest and hash it. If the hash matches snapshot_id, the delta |
| 8 | is correct. No external store needed. The math IS the verification. |
| 9 | |
| 10 | Tests: |
| 11 | 1. build_mpack emits SnapshotDeltaDict entries (delta_upsert/delta_remove), |
| 12 | never a full manifest blob per snapshot after the first one. |
| 13 | 2. Delta chain reconstruction: apply each delta → hash matches snapshot_id. |
| 14 | 3. MPack wire size is < 10% of the equivalent full-manifest mpack for a |
| 15 | 100-commit chain where each commit changes one file. |
| 16 | 4. apply_mpack round-trips delta bundles: snapshots written to local store |
| 17 | have the correct full manifest. |
| 18 | """ |
| 19 | from __future__ import annotations |
| 20 | |
| 21 | import datetime |
| 22 | import hashlib |
| 23 | import pathlib |
| 24 | |
| 25 | import pytest |
| 26 | |
| 27 | from muse.core.object_store import write_object |
| 28 | from muse.core.mpack import MPack, apply_mpack, build_mpack |
| 29 | from muse.core.paths import muse_dir |
| 30 | from muse.core.ids import hash_snapshot as compute_snapshot_id |
| 31 | from muse.core.refs import write_branch_ref |
| 32 | from muse.core.commits import ( |
| 33 | CommitRecord, |
| 34 | write_commit, |
| 35 | ) |
| 36 | from muse.core.snapshots import ( |
| 37 | SnapshotRecord, |
| 38 | read_snapshot, |
| 39 | write_snapshot, |
| 40 | ) |
| 41 | from muse.core.types import blob_id |
| 42 | |
| 43 | _Manifest = dict[str, str] # snapshot manifest: path → blob_id |
| 44 | _ManifestMap = dict[str, _Manifest] # snapshot_id → full manifest |
| 45 | |
| 46 | |
| 47 | # --------------------------------------------------------------------------- |
| 48 | # Helpers |
| 49 | # --------------------------------------------------------------------------- |
| 50 | |
| 51 | def _make_repo(tmp: pathlib.Path) -> pathlib.Path: |
| 52 | tmp.mkdir(parents=True, exist_ok=True) |
| 53 | dot = muse_dir(tmp) |
| 54 | dot.mkdir() |
| 55 | (dot / "repo.json").write_text('{"repo_id":"delta-test","owner":"gabriel"}') |
| 56 | for d in ("commits", "snapshots", "objects"): |
| 57 | (dot / d).mkdir() |
| 58 | (dot / "refs" / "heads").mkdir(parents=True) |
| 59 | (dot / "HEAD").write_text("ref: refs/heads/main\n") |
| 60 | (dot / "config.toml").write_text("") |
| 61 | return tmp |
| 62 | |
| 63 | |
| 64 | _N_BASE_FILES = 50 |
| 65 | _N_COMMITS = 100 |
| 66 | _BLOB_SIZE = 256 |
| 67 | |
| 68 | |
| 69 | def _make_blob(tag: str) -> tuple[str, bytes]: |
| 70 | raw = tag.encode() + b"x" * _BLOB_SIZE |
| 71 | return blob_id(raw), raw |
| 72 | |
| 73 | |
| 74 | def _populate_chain(repo: pathlib.Path) -> tuple[str, list[str]]: |
| 75 | """Create _N_BASE_FILES blobs + _N_COMMITS commits, each changing one file. |
| 76 | |
| 77 | Returns (head_commit_id, ordered_snapshot_ids_oldest_first). |
| 78 | """ |
| 79 | base_blobs: dict[str, tuple[str, bytes]] = {} |
| 80 | for i in range(_N_BASE_FILES): |
| 81 | oid, raw = _make_blob(f"base-{i:04d}") |
| 82 | write_object(repo, oid, raw) |
| 83 | base_blobs[f"file_{i:04d}.txt"] = (oid, raw) |
| 84 | |
| 85 | base_manifest = {path: oid for path, (oid, _) in base_blobs.items()} |
| 86 | |
| 87 | parent: str | None = None |
| 88 | tip = "" |
| 89 | ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 90 | snapshot_ids: list[str] = [] |
| 91 | |
| 92 | for i in range(_N_COMMITS): |
| 93 | # Each commit changes exactly one file. |
| 94 | new_oid, new_raw = _make_blob(f"commit-{i:05d}-variant") |
| 95 | write_object(repo, new_oid, new_raw) |
| 96 | manifest = dict(base_manifest) |
| 97 | manifest[f"file_{i % _N_BASE_FILES:04d}.txt"] = new_oid |
| 98 | |
| 99 | sid = compute_snapshot_id(manifest) |
| 100 | write_snapshot(repo, SnapshotRecord(snapshot_id=sid, manifest=manifest)) |
| 101 | snapshot_ids.append(sid) |
| 102 | |
| 103 | cid = _make_commit_id(parent, sid, f"c{i:05d}", ts.isoformat()) |
| 104 | rec = CommitRecord( |
| 105 | commit_id=cid, |
| 106 | branch="main", |
| 107 | snapshot_id=sid, |
| 108 | message=f"c{i:05d}", |
| 109 | committed_at=ts, |
| 110 | parent_commit_id=parent, |
| 111 | parent2_commit_id=None, |
| 112 | author="gabriel", |
| 113 | metadata={}, |
| 114 | structured_delta=None, |
| 115 | sem_ver_bump="none", |
| 116 | breaking_changes=[], |
| 117 | agent_id="", model_id="", toolchain_id="", |
| 118 | prompt_hash="", signature="", signer_key_id="", |
| 119 | ) |
| 120 | write_commit(repo, rec) |
| 121 | parent = cid |
| 122 | tip = cid |
| 123 | ts += datetime.timedelta(seconds=60) |
| 124 | |
| 125 | write_branch_ref(repo, "main", tip) |
| 126 | return tip, snapshot_ids |
| 127 | |
| 128 | |
| 129 | def _make_commit_id(parent: str | None, sid: str, msg: str, ts: str) -> str: |
| 130 | from muse.core.ids import hash_commit as compute_commit_id |
| 131 | return compute_commit_id( |
| 132 | parent_ids=[parent] if parent else [], |
| 133 | snapshot_id=sid, |
| 134 | message=msg, |
| 135 | committed_at_iso=ts, |
| 136 | author="gabriel", |
| 137 | ) |
| 138 | |
| 139 | |
| 140 | def _reconstruct_from_deltas(mpack: MPack) -> _ManifestMap: |
| 141 | """Apply the delta chain and return {snapshot_id: full_manifest}.""" |
| 142 | from muse.core.ids import hash_snapshot as csi |
| 143 | resolved: _ManifestMap = {} |
| 144 | for snap in mpack.get("snapshots") or []: |
| 145 | sid = snap["snapshot_id"] |
| 146 | parent_sid = snap.get("parent_snapshot_id") |
| 147 | delta_upsert: dict[str, str] = snap.get("delta_upsert") or {} |
| 148 | delta_remove: list[str] = snap.get("delta_remove") or [] |
| 149 | |
| 150 | base = dict(resolved[parent_sid]) if parent_sid and parent_sid in resolved else {} |
| 151 | base.update(delta_upsert) |
| 152 | for path in delta_remove: |
| 153 | base.pop(path, None) |
| 154 | |
| 155 | # The hash IS the proof. |
| 156 | assert csi(base) == sid, f"hash mismatch for {sid[:16]}" |
| 157 | resolved[sid] = base |
| 158 | return resolved |
| 159 | |
| 160 | |
| 161 | # --------------------------------------------------------------------------- |
| 162 | # Tests |
| 163 | # --------------------------------------------------------------------------- |
| 164 | |
| 165 | def test_bundle_snapshots_are_deltas(tmp_path: pathlib.Path) -> None: |
| 166 | """build_mpack emits snapshot deltas, not full manifests.""" |
| 167 | repo = _make_repo(tmp_path / "repo") |
| 168 | head, _ = _populate_chain(repo) |
| 169 | |
| 170 | mpack = build_mpack(repo, [head], have=[]) |
| 171 | |
| 172 | snaps = mpack.get("snapshots") or [] |
| 173 | assert len(snaps) == _N_COMMITS, f"expected {_N_COMMITS} snapshots, got {len(snaps)}" |
| 174 | |
| 175 | for snap in snaps: |
| 176 | assert "delta_upsert" in snap, f"missing delta_upsert in snapshot {snap.get('snapshot_id', '?')[:16]}" |
| 177 | assert "delta_remove" in snap, f"missing delta_remove" |
| 178 | assert "manifest" not in snap, "full manifest must not be present — delta format only" |
| 179 | |
| 180 | |
| 181 | def test_delta_reconstruction_proves_snapshot_id(tmp_path: pathlib.Path) -> None: |
| 182 | """Applying each delta and hashing the result must equal snapshot_id.""" |
| 183 | repo = _make_repo(tmp_path / "repo") |
| 184 | head, snapshot_ids = _populate_chain(repo) |
| 185 | |
| 186 | mpack = build_mpack(repo, [head], have=[]) |
| 187 | |
| 188 | # Will assert inside _reconstruct_from_deltas if any hash mismatches. |
| 189 | resolved = _reconstruct_from_deltas(mpack) |
| 190 | |
| 191 | assert set(resolved.keys()) == set(snapshot_ids), "not all snapshots resolved" |
| 192 | |
| 193 | |
| 194 | def test_only_first_snapshot_has_full_manifest(tmp_path: pathlib.Path) -> None: |
| 195 | """All snapshots after the first should have delta_upsert < full manifest size.""" |
| 196 | repo = _make_repo(tmp_path / "repo") |
| 197 | head, _ = _populate_chain(repo) |
| 198 | |
| 199 | mpack = build_mpack(repo, [head], have=[]) |
| 200 | snaps = mpack.get("snapshots") or [] |
| 201 | |
| 202 | # First snapshot: delta_upsert == full manifest (no parent), so len == N_BASE_FILES. |
| 203 | assert len(snaps[0].get("delta_upsert", {})) == _N_BASE_FILES |
| 204 | |
| 205 | # All subsequent snapshots change exactly one file → delta_upsert has 1 or 2 entries |
| 206 | # (1 add + maybe 1 implicit change if same path reverted). |
| 207 | for snap in snaps[1:]: |
| 208 | n_add = len(snap.get("delta_upsert", {})) |
| 209 | assert n_add < _N_BASE_FILES, ( |
| 210 | f"snapshot {snap['snapshot_id'][:16]} delta_upsert has {n_add} entries — " |
| 211 | f"should be a small delta, not a full manifest copy" |
| 212 | ) |
| 213 | |
| 214 | |
| 215 | def test_delta_bundle_smaller_than_full_manifest(tmp_path: pathlib.Path) -> None: |
| 216 | """Delta mpack wire bytes must be < 10% of a hypothetical full-manifest mpack.""" |
| 217 | import msgpack |
| 218 | |
| 219 | repo = _make_repo(tmp_path / "repo") |
| 220 | head, snapshot_ids = _populate_chain(repo) |
| 221 | |
| 222 | delta_bundle = build_mpack(repo, [head], have=[]) |
| 223 | delta_bytes = len(msgpack.packb(delta_bundle, use_bin_type=True)) |
| 224 | |
| 225 | # Build a synthetic "full manifest" mpack for size comparison. |
| 226 | full_snap_size = sum( |
| 227 | len(msgpack.packb({ |
| 228 | "snapshot_id": sid, |
| 229 | "manifest": (read_snapshot(repo, sid) or SnapshotRecord(snapshot_id=sid, manifest={})).manifest, |
| 230 | }, use_bin_type=True)) |
| 231 | for sid in snapshot_ids |
| 232 | ) |
| 233 | delta_snap_size = sum( |
| 234 | len(msgpack.packb(snap, use_bin_type=True)) |
| 235 | for snap in (delta_bundle.get("snapshots") or []) |
| 236 | ) |
| 237 | |
| 238 | ratio = delta_snap_size / full_snap_size |
| 239 | assert ratio < 0.10, ( |
| 240 | f"Delta snapshots are {ratio:.1%} of full-manifest size — expected < 10%.\n" |
| 241 | f" delta_snap_bytes={delta_snap_size} full_snap_bytes={full_snap_size}" |
| 242 | ) |
| 243 | _ = delta_bytes # measured; useful for manual inspection |
| 244 | |
| 245 | |
| 246 | def test_apply_mpack_reconstructs_snapshots_from_deltas(tmp_path: pathlib.Path) -> None: |
| 247 | """apply_mpack writes correct full SnapshotRecords from delta bundles.""" |
| 248 | src = _make_repo(tmp_path / "src") |
| 249 | head, snapshot_ids = _populate_chain(src) |
| 250 | |
| 251 | mpack = build_mpack(src, [head], have=[]) |
| 252 | |
| 253 | dst = _make_repo(tmp_path / "dst") |
| 254 | result = apply_mpack(dst, mpack) |
| 255 | |
| 256 | assert result["snapshots_written"] == _N_COMMITS |
| 257 | |
| 258 | # Every snapshot in dst must have the full correct manifest. |
| 259 | for sid in snapshot_ids: |
| 260 | snap = read_snapshot(dst, sid) |
| 261 | assert snap is not None, f"snapshot {sid[:16]} not written to dst" |
| 262 | assert compute_snapshot_id(snap.manifest) == sid, ( |
| 263 | f"manifest hash mismatch for {sid[:16]}: " |
| 264 | f"compute_snapshot_id gives {compute_snapshot_id(snap.manifest)[:16]}" |
| 265 | ) |
File History
4 commits
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
22 days ago
sha256:fb19dc03703eb3fc11d016ea19f619eebfab7bde2acf247346dc0f032e65ff19
fix(push): step 0 log shows full /refs URL instead of misle…
Sonnet 4.6
patch
23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
29 days ago