gabriel / musehub public
test_mpack_delta_e2e.py python
297 lines 10.0 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 22 days ago
1 """TDD — mpack push delta format end-to-end.
2
3 Proves the full path: client builds delta mpack → PUT to MinIO →
4 server reconstructs manifests from deltas → snapshots written correctly.
5
6 Dimensions match the real musehub repo: ~1031 commits, ~700 files per
7 snapshot, ~5 files changed per commit.
8
9 The one principle: snapshot_id = sha256(manifest). The delta chain is
10 the proof. No full manifest blobs on the wire or in PG.
11 """
12 from __future__ import annotations
13
14 import datetime
15 import hashlib
16 import pathlib
17 import time
18
19 import httpx
20 import msgpack
21 import pytest
22 import pytest_asyncio
23 from httpx import AsyncClient, ASGITransport
24 from sqlalchemy.ext.asyncio import AsyncSession
25
26 from musehub.auth.request_signing import MSignContext, require_signed_request, optional_signed_request
27 from musehub.db.database import get_db
28 from musehub.main import app
29
30 from muse.core.object_store import write_object
31 from muse.core.mpack import build_mpack
32 from muse.core.paths import muse_dir
33 from muse.core.snapshot import compute_commit_id, compute_snapshot_id
34 from muse.core.commits import CommitRecord, write_commit
35 from muse.core.refs import write_branch_ref
36 from muse.core.snapshots import SnapshotRecord, write_snapshot
37 from muse.core.types import blob_id
38
39
40 # ---------------------------------------------------------------------------
41 # Dimensions — match real musehub repo
42 # ---------------------------------------------------------------------------
43
44 _N_FILES = 700 # files per snapshot
45 _N_COMMITS = 1_031
46 _FILES_CHANGED = 5 # files changed per commit
47 _BLOB_SIZE = 512
48 _GATE_S = 60.0
49
50
51 # ---------------------------------------------------------------------------
52 # Auth fixtures
53 # ---------------------------------------------------------------------------
54
55 _AUTH_CTX = MSignContext(
56 handle="gabriel",
57 identity_id="sha256:" + "0" * 64,
58 is_agent=False,
59 is_admin=True,
60 )
61
62
63 @pytest_asyncio.fixture()
64 async def client(db_session: AsyncSession) -> None:
65 async def _override_get_db() -> None:
66 yield db_session
67
68 app.dependency_overrides[get_db] = _override_get_db
69 app.dependency_overrides[require_signed_request] = lambda: _AUTH_CTX
70 app.dependency_overrides[optional_signed_request] = lambda: _AUTH_CTX
71
72 async with AsyncClient(
73 transport=ASGITransport(app=app),
74 base_url="https://localhost:1337",
75 ) as c:
76 yield c
77
78 app.dependency_overrides.clear()
79
80
81 @pytest_asyncio.fixture()
82 async def repo(client: AsyncClient) -> None:
83 resp = await client.post(
84 "/api/repos",
85 json={"owner": "gabriel", "name": "mpack-delta-e2e", "visibility": "public", "initialize": False},
86 )
87 assert resp.status_code in (200, 201), resp.text
88 data = resp.json()
89 yield data["slug"]
90 await client.delete(f"/api/repos/{data['repoId']}")
91
92
93 # ---------------------------------------------------------------------------
94 # Local repo builder
95 # ---------------------------------------------------------------------------
96
97 def _make_repo(tmp: pathlib.Path) -> pathlib.Path:
98 tmp.mkdir(parents=True, exist_ok=True)
99 dot = muse_dir(tmp)
100 dot.mkdir()
101 (dot / "repo.json").write_text('{"repo_id":"delta-e2e","owner":"gabriel"}')
102 for d in ("commits", "snapshots", "objects"):
103 (dot / d).mkdir()
104 (dot / "refs" / "heads").mkdir(parents=True)
105 (dot / "HEAD").write_text("ref: refs/heads/main\n")
106 (dot / "config.toml").write_text("")
107 return tmp
108
109
110 def _populate(repo: pathlib.Path) -> str:
111 blob_ids: list[str] = []
112 for i in range(_N_FILES):
113 data = f"base-{i:06d}".encode() + b"x" * _BLOB_SIZE
114 oid = blob_id(data)
115 write_object(repo, oid, data)
116 blob_ids.append(oid)
117
118 base_manifest: dict[str, str] = {
119 f"src/file_{i:04d}.py": blob_ids[i] for i in range(_N_FILES)
120 }
121
122 parent: str | None = None
123 tip = ""
124 ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
125
126 for i in range(_N_COMMITS):
127 manifest = dict(base_manifest)
128 for j in range(_FILES_CHANGED):
129 idx = (i * _FILES_CHANGED + j) % _N_FILES
130 variant = f"commit-{i:05d}-file-{j}".encode() + b"y" * _BLOB_SIZE
131 variant_oid = blob_id(variant)
132 write_object(repo, variant_oid, variant)
133 manifest[f"src/file_{idx:04d}.py"] = variant_oid
134
135 sid = compute_snapshot_id(manifest)
136 write_snapshot(repo, SnapshotRecord(snapshot_id=sid, manifest=manifest))
137
138 msg = f"commit-{i:05d}"
139 cid = compute_commit_id(
140 parent_ids=[parent] if parent else [],
141 snapshot_id=sid,
142 message=msg,
143 committed_at_iso=ts.isoformat(),
144 author="gabriel",
145 )
146 write_commit(repo, CommitRecord(
147 commit_id=cid,
148 branch="main",
149 snapshot_id=sid,
150 message=msg,
151 committed_at=ts,
152 parent_commit_id=parent,
153 parent2_commit_id=None,
154 author="gabriel",
155 metadata={},
156 structured_delta=None,
157 sem_ver_bump="none",
158 breaking_changes=[],
159 agent_id="", model_id="", toolchain_id="",
160 prompt_hash="", signature="", signer_key_id="",
161 ))
162 parent = cid
163 tip = cid
164 ts += datetime.timedelta(seconds=60)
165
166 write_branch_ref(repo, "main", tip)
167 return tip
168
169
170 # ---------------------------------------------------------------------------
171 # Tests
172 # ---------------------------------------------------------------------------
173
174 def test_mpack_is_delta_encoded(tmp_path: pathlib.Path) -> None:
175 """Client-side: mpack snapshots are deltas, not full manifests.
176
177 First snapshot has delta_upsert == full manifest (no parent).
178 All subsequent snapshots have delta_upsert << _N_FILES entries.
179 """
180 repo = _make_repo(tmp_path / "repo")
181 head = _populate(repo)
182 mpack = build_mpack(repo, [head], have=[])
183
184 snaps = mpack.get("snapshots") or []
185 assert len(snaps) == _N_COMMITS
186
187 # First snapshot: full manifest as delta_upsert (no parent)
188 assert len(snaps[0].get("delta_upsert", {})) == _N_FILES
189 assert snaps[0].get("parent_snapshot_id") is None
190
191 # All subsequent: only changed files
192 for snap in snaps[1:]:
193 n = len(snap.get("delta_upsert", {}))
194 # Each commit changes _FILES_CHANGED files; when those differ from the
195 # previous commit's changed files, delta_upsert includes both the new
196 # additions and the reversions — at most 2× _FILES_CHANGED.
197 assert n <= _FILES_CHANGED * 2, (
198 f"snapshot {snap['snapshot_id'][:16]} has {n} delta_upsert entries — "
199 f"expected ≤ {_FILES_CHANGED * 2}"
200 )
201 assert "manifest" not in snap, "full manifest must not appear in delta mpack"
202
203 # Wire size: delta snapshots must be < 5% of full-manifest equivalent
204 full_size = _N_COMMITS * _N_FILES * 80 # rough: 80 bytes per path+oid entry
205 delta_size = sum(
206 len(msgpack.packb(s, use_bin_type=True)) for s in snaps
207 )
208 ratio = delta_size / full_size
209 assert ratio < 0.05, f"delta snapshots are {ratio:.1%} of full — expected < 5%"
210
211
212 @pytest.mark.skip(reason="muse wire protocol in flux")
213 @pytest.mark.asyncio
214 async def test_mpack_push_delta_e2e(
215 client: AsyncClient, repo: str, tmp_path: pathlib.Path, db_session: AsyncSession
216 ) -> None:
217 """Full path: build delta mpack → PUT to MinIO → unpack-mpack → verify.
218
219 Gate: server pipeline < 10s for 1031 commits × 700 files × 5 changed per commit.
220 Proves manifest_blob is NOT stored (delta chain is the proof).
221 """
222 local_repo = _make_repo(tmp_path / "repo")
223 head = _populate(local_repo)
224
225 mpack = build_mpack(local_repo, [head], have=[])
226 wire_bytes = msgpack.packb(mpack, use_bin_type=True)
227 mpack_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest()
228
229 t_server = time.perf_counter()
230
231 # presign
232 presign_resp = await client.post(
233 f"/gabriel/{repo}/push/mpack-presign",
234 content=msgpack.packb(
235 {"mpack_key": mpack_key, "size_bytes": len(wire_bytes)},
236 use_bin_type=True,
237 ),
238 headers={"Content-Type": "application/x-msgpack"},
239 )
240 assert presign_resp.status_code == 200, presign_resp.text
241 upload_url = presign_resp.json().get("upload_url") or presign_resp.json().get("uploadUrl")
242 assert upload_url
243
244 t_presign = time.perf_counter()
245
246 # PUT to MinIO
247 async with httpx.AsyncClient() as raw:
248 put_resp = await raw.put(upload_url, content=wire_bytes)
249 assert put_resp.status_code in (200, 204)
250
251 t_put = time.perf_counter()
252
253 # unpack
254 unpack_resp = await client.post(
255 f"/gabriel/{repo}/push/unpack-mpack",
256 content=msgpack.packb(
257 {"mpack_key": mpack_key, "branch": "main", "head": head},
258 use_bin_type=True,
259 ),
260 headers={"Content-Type": "application/x-msgpack"},
261 )
262 assert unpack_resp.status_code == 200, unpack_resp.text
263 result = unpack_resp.json()
264
265 t_unpack = time.perf_counter()
266
267 # verify refs
268 refs_resp = await client.get(f"/gabriel/{repo}/refs")
269 assert refs_resp.status_code == 200
270 assert refs_resp.json().get("branch_heads", {}).get("main") == head
271
272 t_done = time.perf_counter()
273
274 server_ms = (
275 (t_presign - t_server) +
276 (t_unpack - t_put) +
277 (t_done - t_unpack)
278 ) * 1000
279
280 assert result.get("commits_written") == _N_COMMITS, result
281 assert result.get("snapshots_written") == _N_COMMITS, result
282
283 gate_ms = _GATE_S * 1000
284 assert server_ms < gate_ms, (
285 f"Gate FAIL: {server_ms:.0f}ms > {gate_ms:.0f}ms\n"
286 f" presign={(t_presign-t_server)*1000:.0f}ms "
287 f"unpack={(t_unpack-t_put)*1000:.0f}ms "
288 f"refs={(t_done-t_unpack)*1000:.0f}ms"
289 )
290
291 print(
292 f"\n {_N_COMMITS} commits × {_N_FILES} files × {_FILES_CHANGED} changed/commit\n"
293 f" mpack wire: {len(wire_bytes)//1024} KiB\n"
294 f" server total: {server_ms:.0f}ms (gate {gate_ms:.0f}ms)\n"
295 f" commits: {result.get('commits_written')}\n"
296 f" snapshots: {result.get('snapshots_written')}"
297 )
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 22 days ago