gabriel / musehub public
test_wire_fetch_step1.py python
449 lines 19.8 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 22 days ago
1 """TDD — POST /{owner}/{slug}/fetch — Step 1 of the fetch protocol (issue #68).
2
3 The fetch endpoint is the server side of the three-step fetch flow:
4
5 1. POST /{owner}/{slug}/fetch
6 body: {"want": ["sha256:..."], "have": ["sha256:..."]}
7 → {"mpack_url": "...", "mpack_id": "sha256:...", "commit_count": N, "object_count": N}
8
9 2. Client GETs mpack bytes directly from MinIO via presigned URL.
10 3. Client calls apply_mpack() locally.
11
12 Tests:
13 F01 Happy path — want=[tip], have=[] → 200, mpack_url set, sha256 mpack_id.
14 F02 Up-to-date — want=[tip], have=[tip] → 200, commit_count=0, object_count=0, mpack_url null.
15 F03 Delta — want=[c2], have=[c1] → only new commit + new object in delta.
16 F04 Missing repo → 404.
17 F05 Empty want → 422.
18 F06 Malformed want entry (not sha256:*) → 422.
19 F07 Unknown want commit_id → 404.
20 F08 Objects not in pack_index → 503 with Retry-After header.
21 F09 Private repo without auth → 404.
22 F10 mpack_id == sha256(mpack_bytes) — content-addressing proof survives end-to-end.
23 """
24 from __future__ import annotations
25
26 import hashlib
27 import typing
28 from collections.abc import Coroutine, Mapping
29 from datetime import datetime, timezone
30 from unittest.mock import AsyncMock
31
32 import msgpack
33 import pytest
34 from httpx import AsyncClient
35 from sqlalchemy.dialects.postgresql import insert as pg_insert
36 from sqlalchemy.ext.asyncio import AsyncSession
37
38 from muse.core.mpack import parse_wire_mpack
39 from muse.core.types import blob_id, fake_id
40 from musehub.db.musehub_repo_models import (
41 MusehubBranch,
42 MusehubCommit,
43 MusehubCommitGraph,
44 MusehubCommitRef,
45 MusehubMPackIndex,
46 MusehubObject,
47 MusehubObjectRef,
48 MusehubSnapshot,
49 MusehubSnapshotRef,
50 )
51 from tests.factories import create_repo
52 from musehub.types.json_types import StrDict
53
54 type _ByteStore = dict[str, bytes]
55
56
57 # ── helpers ───────────────────────────────────────────────────────────────────
58
59 def _now() -> datetime:
60 return datetime.now(tz=timezone.utc)
61
62
63 def _stub_backend(monkeypatch: pytest.MonkeyPatch) -> _ByteStore:
64 """Replace MinIO backend with an in-memory dict. Returns the store."""
65 store: dict[str, bytes] = {}
66
67 async def _put(oid: str, data: bytes, **_: typing.Any) -> str:
68 store[oid] = data
69 return f"mem://{oid}"
70
71 async def _get(oid: str) -> bytes | None:
72 return store.get(oid)
73
74 async def _exists(oid: str, **_: typing.Any) -> bool:
75 return oid in store
76
77 async def _presign_get(oid: str, ttl_seconds: int = 3600) -> str:
78 return f"https://minio.test/mpacks/{oid}?ttl={ttl_seconds}"
79
80 async def _get_mpack(mpack_id: str) -> bytes | None:
81 return store.get(mpack_id)
82
83 async def _put_mpack(mpack_id: str, data: bytes) -> str:
84 store[mpack_id] = data
85 return f"mem://mpacks/{mpack_id}"
86
87 async def _presign_mpack_get(mpack_id: str, ttl: int) -> str:
88 return f"https://minio.test/mpacks/{mpack_id}?ttl={ttl}"
89
90 backend = AsyncMock()
91 backend.put = _put
92 backend.get = _get
93 backend.exists = _exists
94 backend.presign_get = _presign_get
95 backend.get_mpack = _get_mpack
96 backend.put_mpack = _put_mpack
97 backend.presign_mpack_get = _presign_mpack_get
98 backend.supports_presign = True
99 monkeypatch.setattr("musehub.services.musehub_wire.get_backend", lambda: backend)
100 monkeypatch.setattr("musehub.services.musehub_wire_fetch.get_backend", lambda: backend)
101 monkeypatch.setattr("musehub.services.musehub_wire_shared.get_backend", lambda: backend)
102 return store
103
104
105 async def _store_object(
106 session: AsyncSession,
107 repo_id: str,
108 oid: str,
109 content: bytes,
110 *,
111 indexed: bool = True,
112 mpack_key: str = "sha256:" + "a" * 64,
113 ) -> None:
114 """Write a musehub_objects row + ref + optional pack_index entry."""
115 await session.execute(
116 pg_insert(MusehubObject)
117 .values(object_id=oid, path="file.dat", size_bytes=len(content),
118 storage_uri=f"mem://{oid}", content_cache=content)
119 .on_conflict_do_nothing(index_elements=["object_id"])
120 )
121 await session.execute(
122 pg_insert(MusehubObjectRef)
123 .values(repo_id=repo_id, object_id=oid)
124 .on_conflict_do_nothing()
125 )
126 if indexed:
127 await session.execute(
128 pg_insert(MusehubMPackIndex)
129 .values(entity_id=oid, mpack_id=mpack_key, entity_type="object")
130 .on_conflict_do_nothing()
131 )
132 await session.commit()
133
134
135 async def _make_commit(
136 session: AsyncSession,
137 repo_id: str,
138 *,
139 manifest: dict[str, str],
140 seed: str,
141 parent_ids: list[str] | None = None,
142 generation: int = 0,
143 ) -> tuple[MusehubCommit, MusehubSnapshot]:
144 snap_id = fake_id(f"snap-f01-{seed}")
145 snap = MusehubSnapshot(
146 snapshot_id=snap_id,
147 directories=[],
148 manifest_blob=msgpack.packb(manifest, use_bin_type=True),
149 entry_count=len(manifest),
150 created_at=_now(),
151 )
152 session.add(snap)
153 await session.execute(
154 pg_insert(MusehubSnapshotRef)
155 .values(repo_id=repo_id, snapshot_id=snap_id)
156 .on_conflict_do_nothing()
157 )
158
159 commit_id = fake_id(f"commit-f01-{seed}")
160 pids = parent_ids or []
161 commit = MusehubCommit(
162 commit_id=commit_id,
163 branch="main",
164 parent_ids=pids,
165 message=f"commit {seed}",
166 author="gabriel",
167 timestamp=_now(),
168 snapshot_id=snap_id,
169 )
170 session.add(commit)
171 await session.execute(
172 pg_insert(MusehubCommitRef)
173 .values(repo_id=repo_id, commit_id=commit_id)
174 .on_conflict_do_nothing()
175 )
176 graph = MusehubCommitGraph(
177 commit_id=commit_id,
178 parent_ids=pids,
179 generation=generation,
180 snapshot_id=snap_id,
181 )
182 session.add(graph)
183
184 from sqlalchemy import select as _select
185 branch_q = await session.execute(
186 _select(MusehubBranch).where(
187 MusehubBranch.repo_id == repo_id,
188 MusehubBranch.name == "main",
189 )
190 )
191 branch = branch_q.scalar_one_or_none()
192 if branch:
193 branch.head_commit_id = commit_id
194 await session.commit()
195 return commit, snap
196
197
198 def _fetch(
199 client: AsyncClient,
200 owner: str,
201 slug: str,
202 want: list[str],
203 have: list[str],
204 headers: dict[str, str] | None = None,
205 ) -> Coroutine[typing.Any, typing.Any, typing.Any]:
206 merged = {"Content-Type": "application/x-msgpack", **(headers or {})}
207 return client.post(
208 f"/{owner}/{slug}/fetch",
209 content=msgpack.packb({"want": want, "have": have}, use_bin_type=True),
210 headers=merged,
211 )
212
213
214 # ══════════════════════════════════════════════════════════════════════════════
215 # F01 — happy path: want=[tip], have=[] → 200, mpack_url, sha256 mpack_id
216 # ══════════════════════════════════════════════════════════════════════════════
217
218 @pytest.mark.asyncio
219 async def test_f01_happy_path_returns_presigned_url(
220 client: AsyncClient, db_session: AsyncSession,
221 monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict,
222 ) -> None:
223 """want=[tip], have=[] → 200, mpack_url non-empty, mpack_id sha256-prefixed."""
224 _stub_backend(monkeypatch)
225 repo = await create_repo(db_session, owner="gabriel", visibility="public")
226 raw = b"hello fetch world"
227 oid = blob_id(raw)
228 await _store_object(db_session, repo.repo_id, oid, raw)
229 commit, _ = await _make_commit(db_session, repo.repo_id, manifest={"f.txt": oid}, seed="f01")
230
231 resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [], headers=wire_headers)
232
233 assert resp.status_code == 200
234 data = msgpack.unpackb(resp.content, raw=False)
235 assert data["mpack_id"].startswith("sha256:"), f"bad mpack_id: {data['mpack_id']!r}"
236 assert data["mpack_url"], "mpack_url must be non-empty"
237 assert data["commit_count"] == 1
238 assert data["blob_count"] == 1
239
240
241 # ══════════════════════════════════════════════════════════════════════════════
242 # F02 — up-to-date: want=[tip], have=[tip] → 200, all counts zero, mpack_url null
243 # ══════════════════════════════════════════════════════════════════════════════
244
245 @pytest.mark.asyncio
246 async def test_f02_up_to_date_returns_zero_counts(
247 client: AsyncClient, db_session: AsyncSession,
248 monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict,
249 ) -> None:
250 """Client already has everything — server returns 200 with commit_count=0."""
251 _stub_backend(monkeypatch)
252 repo = await create_repo(db_session, owner="gabriel", visibility="public")
253 raw = b"already have this"
254 oid = blob_id(raw)
255 await _store_object(db_session, repo.repo_id, oid, raw)
256 commit, _ = await _make_commit(db_session, repo.repo_id, manifest={"g.txt": oid}, seed="f02")
257
258 resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [commit.commit_id],
259 headers=wire_headers)
260
261 assert resp.status_code == 200
262 data = msgpack.unpackb(resp.content, raw=False)
263 assert data["commit_count"] == 0
264 assert data["blob_count"] == 0
265 assert not data.get("mpack_url"), "mpack_url must be null/empty when nothing to send"
266
267
268 # ══════════════════════════════════════════════════════════════════════════════
269 # F03 — delta: want=[c2], have=[c1] → only new commit + new object
270 # ══════════════════════════════════════════════════════════════════════════════
271
272 @pytest.mark.asyncio
273 async def test_f03_delta_excludes_have_side_objects(
274 client: AsyncClient, db_session: AsyncSession,
275 monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict,
276 ) -> None:
277 """have=[c1] cuts the delta — c1 and its object must not appear in the mpack."""
278 store = _stub_backend(monkeypatch)
279 repo = await create_repo(db_session, owner="gabriel", visibility="public")
280
281 raw1 = b"base object"
282 oid1 = blob_id(raw1)
283 await _store_object(db_session, repo.repo_id, oid1, raw1)
284 c1, _ = await _make_commit(db_session, repo.repo_id,
285 manifest={"base.txt": oid1}, seed="f03-c1", generation=0)
286
287 raw2 = b"delta object"
288 oid2 = blob_id(raw2)
289 await _store_object(db_session, repo.repo_id, oid2, raw2)
290 c2, _ = await _make_commit(db_session, repo.repo_id,
291 manifest={"base.txt": oid1, "delta.txt": oid2},
292 seed="f03-c2", parent_ids=[c1.commit_id], generation=1)
293
294 resp = await _fetch(client, "gabriel", repo.slug, [c2.commit_id], [c1.commit_id],
295 headers=wire_headers)
296
297 assert resp.status_code == 200
298 data = msgpack.unpackb(resp.content, raw=False)
299 assert data["commit_count"] == 1
300 assert data["blob_count"] == 1
301
302 mpack = parse_wire_mpack(store[data["mpack_id"]])
303 commit_ids = {c["commit_id"] for c in mpack["commits"]}
304 assert c2.commit_id in commit_ids
305 assert c1.commit_id not in commit_ids, "have-side commit must be excluded"
306
307 obj_ids = {o["object_id"] for o in mpack["blobs"]}
308 assert oid2 in obj_ids
309 assert oid1 not in obj_ids, "have-side object must be excluded"
310
311
312 # ══════════════════════════════════════════════════════════════════════════════
313 # F04 — missing repo → 404
314 # ══════════════════════════════════════════════════════════════════════════════
315
316 @pytest.mark.asyncio
317 async def test_f04_missing_repo_returns_404(
318 client: AsyncClient, wire_headers: StrDict,
319 ) -> None:
320 """POST to a repo that doesn't exist → 404."""
321 resp = await _fetch(client, "nobody", "no-such-repo",
322 ["sha256:" + "a" * 64], [], headers=wire_headers)
323 assert resp.status_code == 404
324
325
326 # ══════════════════════════════════════════════════════════════════════════════
327 # F05 — empty want → 422
328 # ══════════════════════════════════════════════════════════════════════════════
329
330 @pytest.mark.asyncio
331 async def test_f05_empty_want_returns_422(
332 client: AsyncClient, db_session: AsyncSession, wire_headers: StrDict,
333 ) -> None:
334 """Empty want list → 422 (not a valid fetch request)."""
335 repo = await create_repo(db_session, owner="gabriel", visibility="public")
336 resp = await _fetch(client, "gabriel", repo.slug, [], [], headers=wire_headers)
337 assert resp.status_code == 422
338
339
340 # ══════════════════════════════════════════════════════════════════════════════
341 # F06 — malformed want entry → 422
342 # ══════════════════════════════════════════════════════════════════════════════
343
344 @pytest.mark.asyncio
345 async def test_f06_malformed_want_entry_returns_422(
346 client: AsyncClient, db_session: AsyncSession, wire_headers: StrDict,
347 ) -> None:
348 """want entries that don't match sha256:* → 422."""
349 repo = await create_repo(db_session, owner="gabriel", visibility="public")
350 resp = await _fetch(client, "gabriel", repo.slug, ["not-a-hash"], [], headers=wire_headers)
351 assert resp.status_code == 422
352
353
354 # ══════════════════════════════════════════════════════════════════════════════
355 # F07 — unknown want commit_id → 404
356 # ══════════════════════════════════════════════════════════════════════════════
357
358 @pytest.mark.asyncio
359 async def test_f07_unknown_want_commit_returns_404(
360 client: AsyncClient, db_session: AsyncSession, wire_headers: StrDict,
361 ) -> None:
362 """want contains a sha256 ID that doesn't exist in musehub_commits → 404."""
363 repo = await create_repo(db_session, owner="gabriel", visibility="public")
364 ghost_id = "sha256:" + "b" * 64
365 resp = await _fetch(client, "gabriel", repo.slug, [ghost_id], [], headers=wire_headers)
366 assert resp.status_code == 404
367
368
369 # ══════════════════════════════════════════════════════════════════════════════
370 # F08 — objects not in pack_index → 503 with Retry-After
371 # ══════════════════════════════════════════════════════════════════════════════
372
373 @pytest.mark.asyncio
374 async def test_f08_unindexed_objects_returns_503(
375 client: AsyncClient, db_session: AsyncSession,
376 monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict,
377 ) -> None:
378 """Objects that haven't been indexed in musehub_mpack_index → 503 Retry-After.
379
380 This happens when unpack-mpack returned 200 but the mpack.index background
381 job hasn't completed yet. The client must retry rather than receiving a
382 partial or corrupt fetch mpack.
383 """
384 _stub_backend(monkeypatch)
385 repo = await create_repo(db_session, owner="gabriel", visibility="public")
386 raw = b"not yet indexed"
387 oid = blob_id(raw)
388 # Write object WITHOUT a pack_index entry (indexed=False)
389 await _store_object(db_session, repo.repo_id, oid, raw, indexed=False)
390 commit, _ = await _make_commit(db_session, repo.repo_id,
391 manifest={"pending.txt": oid}, seed="f08")
392
393 resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [],
394 headers=wire_headers)
395
396 assert resp.status_code == 503
397 assert "retry-after" in {k.lower() for k in resp.headers}, (
398 "503 response must include Retry-After header"
399 )
400
401
402 # ══════════════════════════════════════════════════════════════════════════════
403 # F09 — private repo without auth → 404
404 # ══════════════════════════════════════════════════════════════════════════════
405
406 @pytest.mark.asyncio
407 async def test_f09_private_repo_without_auth_returns_404(
408 client: AsyncClient, db_session: AsyncSession,
409 ) -> None:
410 """Private repo must 404 for unauthenticated requests (don't leak existence)."""
411 repo = await create_repo(db_session, owner="gabriel", visibility="private")
412 resp = await _fetch(client, "gabriel", repo.slug,
413 ["sha256:" + "c" * 64], [])
414 assert resp.status_code == 404
415
416
417 # ══════════════════════════════════════════════════════════════════════════════
418 # F10 — mpack_id == sha256(mpack_bytes) end-to-end
419 # ══════════════════════════════════════════════════════════════════════════════
420
421 @pytest.mark.asyncio
422 async def test_f10_mpack_id_equals_sha256_of_stored_bytes(
423 client: AsyncClient, db_session: AsyncSession,
424 monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict,
425 ) -> None:
426 """mpack_id in the HTTP response == sha256(bytes written to MinIO).
427
428 Content-addressing is the integrity proof — no secondary verification needed.
429 """
430 store = _stub_backend(monkeypatch)
431 repo = await create_repo(db_session, owner="gabriel", visibility="public")
432 raw = b"content-addressing proof"
433 oid = blob_id(raw)
434 await _store_object(db_session, repo.repo_id, oid, raw)
435 commit, _ = await _make_commit(db_session, repo.repo_id,
436 manifest={"proof.bin": oid}, seed="f10")
437
438 resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [],
439 headers=wire_headers)
440
441 assert resp.status_code == 200
442 data = msgpack.unpackb(resp.content, raw=False)
443 mpack_id = data["mpack_id"]
444
445 stored_bytes = store[mpack_id]
446 expected_id = blob_id(stored_bytes)
447 assert mpack_id == expected_id, (
448 f"mpack_id {mpack_id!r} != blob_id(stored_bytes) {expected_id!r}"
449 )
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 22 days ago