test_wire_fetch_step1.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
22 days ago
| 1 | """TDD — POST /{owner}/{slug}/fetch — Step 1 of the fetch protocol (issue #68). |
| 2 | |
| 3 | The fetch endpoint is the server side of the three-step fetch flow: |
| 4 | |
| 5 | 1. POST /{owner}/{slug}/fetch |
| 6 | body: {"want": ["sha256:..."], "have": ["sha256:..."]} |
| 7 | → {"mpack_url": "...", "mpack_id": "sha256:...", "commit_count": N, "object_count": N} |
| 8 | |
| 9 | 2. Client GETs mpack bytes directly from MinIO via presigned URL. |
| 10 | 3. Client calls apply_mpack() locally. |
| 11 | |
| 12 | Tests: |
| 13 | F01 Happy path — want=[tip], have=[] → 200, mpack_url set, sha256 mpack_id. |
| 14 | F02 Up-to-date — want=[tip], have=[tip] → 200, commit_count=0, object_count=0, mpack_url null. |
| 15 | F03 Delta — want=[c2], have=[c1] → only new commit + new object in delta. |
| 16 | F04 Missing repo → 404. |
| 17 | F05 Empty want → 422. |
| 18 | F06 Malformed want entry (not sha256:*) → 422. |
| 19 | F07 Unknown want commit_id → 404. |
| 20 | F08 Objects not in pack_index → 503 with Retry-After header. |
| 21 | F09 Private repo without auth → 404. |
| 22 | F10 mpack_id == sha256(mpack_bytes) — content-addressing proof survives end-to-end. |
| 23 | """ |
| 24 | from __future__ import annotations |
| 25 | |
| 26 | import hashlib |
| 27 | import typing |
| 28 | from collections.abc import Coroutine, Mapping |
| 29 | from datetime import datetime, timezone |
| 30 | from unittest.mock import AsyncMock |
| 31 | |
| 32 | import msgpack |
| 33 | import pytest |
| 34 | from httpx import AsyncClient |
| 35 | from sqlalchemy.dialects.postgresql import insert as pg_insert |
| 36 | from sqlalchemy.ext.asyncio import AsyncSession |
| 37 | |
| 38 | from muse.core.mpack import parse_wire_mpack |
| 39 | from muse.core.types import blob_id, fake_id |
| 40 | from musehub.db.musehub_repo_models import ( |
| 41 | MusehubBranch, |
| 42 | MusehubCommit, |
| 43 | MusehubCommitGraph, |
| 44 | MusehubCommitRef, |
| 45 | MusehubMPackIndex, |
| 46 | MusehubObject, |
| 47 | MusehubObjectRef, |
| 48 | MusehubSnapshot, |
| 49 | MusehubSnapshotRef, |
| 50 | ) |
| 51 | from tests.factories import create_repo |
| 52 | from musehub.types.json_types import StrDict |
| 53 | |
| 54 | type _ByteStore = dict[str, bytes] |
| 55 | |
| 56 | |
| 57 | # ── helpers ─────────────────────────────────────────────────────────────────── |
| 58 | |
| 59 | def _now() -> datetime: |
| 60 | return datetime.now(tz=timezone.utc) |
| 61 | |
| 62 | |
| 63 | def _stub_backend(monkeypatch: pytest.MonkeyPatch) -> _ByteStore: |
| 64 | """Replace MinIO backend with an in-memory dict. Returns the store.""" |
| 65 | store: dict[str, bytes] = {} |
| 66 | |
| 67 | async def _put(oid: str, data: bytes, **_: typing.Any) -> str: |
| 68 | store[oid] = data |
| 69 | return f"mem://{oid}" |
| 70 | |
| 71 | async def _get(oid: str) -> bytes | None: |
| 72 | return store.get(oid) |
| 73 | |
| 74 | async def _exists(oid: str, **_: typing.Any) -> bool: |
| 75 | return oid in store |
| 76 | |
| 77 | async def _presign_get(oid: str, ttl_seconds: int = 3600) -> str: |
| 78 | return f"https://minio.test/mpacks/{oid}?ttl={ttl_seconds}" |
| 79 | |
| 80 | async def _get_mpack(mpack_id: str) -> bytes | None: |
| 81 | return store.get(mpack_id) |
| 82 | |
| 83 | async def _put_mpack(mpack_id: str, data: bytes) -> str: |
| 84 | store[mpack_id] = data |
| 85 | return f"mem://mpacks/{mpack_id}" |
| 86 | |
| 87 | async def _presign_mpack_get(mpack_id: str, ttl: int) -> str: |
| 88 | return f"https://minio.test/mpacks/{mpack_id}?ttl={ttl}" |
| 89 | |
| 90 | backend = AsyncMock() |
| 91 | backend.put = _put |
| 92 | backend.get = _get |
| 93 | backend.exists = _exists |
| 94 | backend.presign_get = _presign_get |
| 95 | backend.get_mpack = _get_mpack |
| 96 | backend.put_mpack = _put_mpack |
| 97 | backend.presign_mpack_get = _presign_mpack_get |
| 98 | backend.supports_presign = True |
| 99 | monkeypatch.setattr("musehub.services.musehub_wire.get_backend", lambda: backend) |
| 100 | monkeypatch.setattr("musehub.services.musehub_wire_fetch.get_backend", lambda: backend) |
| 101 | monkeypatch.setattr("musehub.services.musehub_wire_shared.get_backend", lambda: backend) |
| 102 | return store |
| 103 | |
| 104 | |
| 105 | async def _store_object( |
| 106 | session: AsyncSession, |
| 107 | repo_id: str, |
| 108 | oid: str, |
| 109 | content: bytes, |
| 110 | *, |
| 111 | indexed: bool = True, |
| 112 | mpack_key: str = "sha256:" + "a" * 64, |
| 113 | ) -> None: |
| 114 | """Write a musehub_objects row + ref + optional pack_index entry.""" |
| 115 | await session.execute( |
| 116 | pg_insert(MusehubObject) |
| 117 | .values(object_id=oid, path="file.dat", size_bytes=len(content), |
| 118 | storage_uri=f"mem://{oid}", content_cache=content) |
| 119 | .on_conflict_do_nothing(index_elements=["object_id"]) |
| 120 | ) |
| 121 | await session.execute( |
| 122 | pg_insert(MusehubObjectRef) |
| 123 | .values(repo_id=repo_id, object_id=oid) |
| 124 | .on_conflict_do_nothing() |
| 125 | ) |
| 126 | if indexed: |
| 127 | await session.execute( |
| 128 | pg_insert(MusehubMPackIndex) |
| 129 | .values(entity_id=oid, mpack_id=mpack_key, entity_type="object") |
| 130 | .on_conflict_do_nothing() |
| 131 | ) |
| 132 | await session.commit() |
| 133 | |
| 134 | |
| 135 | async def _make_commit( |
| 136 | session: AsyncSession, |
| 137 | repo_id: str, |
| 138 | *, |
| 139 | manifest: dict[str, str], |
| 140 | seed: str, |
| 141 | parent_ids: list[str] | None = None, |
| 142 | generation: int = 0, |
| 143 | ) -> tuple[MusehubCommit, MusehubSnapshot]: |
| 144 | snap_id = fake_id(f"snap-f01-{seed}") |
| 145 | snap = MusehubSnapshot( |
| 146 | snapshot_id=snap_id, |
| 147 | directories=[], |
| 148 | manifest_blob=msgpack.packb(manifest, use_bin_type=True), |
| 149 | entry_count=len(manifest), |
| 150 | created_at=_now(), |
| 151 | ) |
| 152 | session.add(snap) |
| 153 | await session.execute( |
| 154 | pg_insert(MusehubSnapshotRef) |
| 155 | .values(repo_id=repo_id, snapshot_id=snap_id) |
| 156 | .on_conflict_do_nothing() |
| 157 | ) |
| 158 | |
| 159 | commit_id = fake_id(f"commit-f01-{seed}") |
| 160 | pids = parent_ids or [] |
| 161 | commit = MusehubCommit( |
| 162 | commit_id=commit_id, |
| 163 | branch="main", |
| 164 | parent_ids=pids, |
| 165 | message=f"commit {seed}", |
| 166 | author="gabriel", |
| 167 | timestamp=_now(), |
| 168 | snapshot_id=snap_id, |
| 169 | ) |
| 170 | session.add(commit) |
| 171 | await session.execute( |
| 172 | pg_insert(MusehubCommitRef) |
| 173 | .values(repo_id=repo_id, commit_id=commit_id) |
| 174 | .on_conflict_do_nothing() |
| 175 | ) |
| 176 | graph = MusehubCommitGraph( |
| 177 | commit_id=commit_id, |
| 178 | parent_ids=pids, |
| 179 | generation=generation, |
| 180 | snapshot_id=snap_id, |
| 181 | ) |
| 182 | session.add(graph) |
| 183 | |
| 184 | from sqlalchemy import select as _select |
| 185 | branch_q = await session.execute( |
| 186 | _select(MusehubBranch).where( |
| 187 | MusehubBranch.repo_id == repo_id, |
| 188 | MusehubBranch.name == "main", |
| 189 | ) |
| 190 | ) |
| 191 | branch = branch_q.scalar_one_or_none() |
| 192 | if branch: |
| 193 | branch.head_commit_id = commit_id |
| 194 | await session.commit() |
| 195 | return commit, snap |
| 196 | |
| 197 | |
| 198 | def _fetch( |
| 199 | client: AsyncClient, |
| 200 | owner: str, |
| 201 | slug: str, |
| 202 | want: list[str], |
| 203 | have: list[str], |
| 204 | headers: dict[str, str] | None = None, |
| 205 | ) -> Coroutine[typing.Any, typing.Any, typing.Any]: |
| 206 | merged = {"Content-Type": "application/x-msgpack", **(headers or {})} |
| 207 | return client.post( |
| 208 | f"/{owner}/{slug}/fetch", |
| 209 | content=msgpack.packb({"want": want, "have": have}, use_bin_type=True), |
| 210 | headers=merged, |
| 211 | ) |
| 212 | |
| 213 | |
| 214 | # ══════════════════════════════════════════════════════════════════════════════ |
| 215 | # F01 — happy path: want=[tip], have=[] → 200, mpack_url, sha256 mpack_id |
| 216 | # ══════════════════════════════════════════════════════════════════════════════ |
| 217 | |
| 218 | @pytest.mark.asyncio |
| 219 | async def test_f01_happy_path_returns_presigned_url( |
| 220 | client: AsyncClient, db_session: AsyncSession, |
| 221 | monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict, |
| 222 | ) -> None: |
| 223 | """want=[tip], have=[] → 200, mpack_url non-empty, mpack_id sha256-prefixed.""" |
| 224 | _stub_backend(monkeypatch) |
| 225 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 226 | raw = b"hello fetch world" |
| 227 | oid = blob_id(raw) |
| 228 | await _store_object(db_session, repo.repo_id, oid, raw) |
| 229 | commit, _ = await _make_commit(db_session, repo.repo_id, manifest={"f.txt": oid}, seed="f01") |
| 230 | |
| 231 | resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [], headers=wire_headers) |
| 232 | |
| 233 | assert resp.status_code == 200 |
| 234 | data = msgpack.unpackb(resp.content, raw=False) |
| 235 | assert data["mpack_id"].startswith("sha256:"), f"bad mpack_id: {data['mpack_id']!r}" |
| 236 | assert data["mpack_url"], "mpack_url must be non-empty" |
| 237 | assert data["commit_count"] == 1 |
| 238 | assert data["blob_count"] == 1 |
| 239 | |
| 240 | |
| 241 | # ══════════════════════════════════════════════════════════════════════════════ |
| 242 | # F02 — up-to-date: want=[tip], have=[tip] → 200, all counts zero, mpack_url null |
| 243 | # ══════════════════════════════════════════════════════════════════════════════ |
| 244 | |
| 245 | @pytest.mark.asyncio |
| 246 | async def test_f02_up_to_date_returns_zero_counts( |
| 247 | client: AsyncClient, db_session: AsyncSession, |
| 248 | monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict, |
| 249 | ) -> None: |
| 250 | """Client already has everything — server returns 200 with commit_count=0.""" |
| 251 | _stub_backend(monkeypatch) |
| 252 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 253 | raw = b"already have this" |
| 254 | oid = blob_id(raw) |
| 255 | await _store_object(db_session, repo.repo_id, oid, raw) |
| 256 | commit, _ = await _make_commit(db_session, repo.repo_id, manifest={"g.txt": oid}, seed="f02") |
| 257 | |
| 258 | resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [commit.commit_id], |
| 259 | headers=wire_headers) |
| 260 | |
| 261 | assert resp.status_code == 200 |
| 262 | data = msgpack.unpackb(resp.content, raw=False) |
| 263 | assert data["commit_count"] == 0 |
| 264 | assert data["blob_count"] == 0 |
| 265 | assert not data.get("mpack_url"), "mpack_url must be null/empty when nothing to send" |
| 266 | |
| 267 | |
| 268 | # ══════════════════════════════════════════════════════════════════════════════ |
| 269 | # F03 — delta: want=[c2], have=[c1] → only new commit + new object |
| 270 | # ══════════════════════════════════════════════════════════════════════════════ |
| 271 | |
| 272 | @pytest.mark.asyncio |
| 273 | async def test_f03_delta_excludes_have_side_objects( |
| 274 | client: AsyncClient, db_session: AsyncSession, |
| 275 | monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict, |
| 276 | ) -> None: |
| 277 | """have=[c1] cuts the delta — c1 and its object must not appear in the mpack.""" |
| 278 | store = _stub_backend(monkeypatch) |
| 279 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 280 | |
| 281 | raw1 = b"base object" |
| 282 | oid1 = blob_id(raw1) |
| 283 | await _store_object(db_session, repo.repo_id, oid1, raw1) |
| 284 | c1, _ = await _make_commit(db_session, repo.repo_id, |
| 285 | manifest={"base.txt": oid1}, seed="f03-c1", generation=0) |
| 286 | |
| 287 | raw2 = b"delta object" |
| 288 | oid2 = blob_id(raw2) |
| 289 | await _store_object(db_session, repo.repo_id, oid2, raw2) |
| 290 | c2, _ = await _make_commit(db_session, repo.repo_id, |
| 291 | manifest={"base.txt": oid1, "delta.txt": oid2}, |
| 292 | seed="f03-c2", parent_ids=[c1.commit_id], generation=1) |
| 293 | |
| 294 | resp = await _fetch(client, "gabriel", repo.slug, [c2.commit_id], [c1.commit_id], |
| 295 | headers=wire_headers) |
| 296 | |
| 297 | assert resp.status_code == 200 |
| 298 | data = msgpack.unpackb(resp.content, raw=False) |
| 299 | assert data["commit_count"] == 1 |
| 300 | assert data["blob_count"] == 1 |
| 301 | |
| 302 | mpack = parse_wire_mpack(store[data["mpack_id"]]) |
| 303 | commit_ids = {c["commit_id"] for c in mpack["commits"]} |
| 304 | assert c2.commit_id in commit_ids |
| 305 | assert c1.commit_id not in commit_ids, "have-side commit must be excluded" |
| 306 | |
| 307 | obj_ids = {o["object_id"] for o in mpack["blobs"]} |
| 308 | assert oid2 in obj_ids |
| 309 | assert oid1 not in obj_ids, "have-side object must be excluded" |
| 310 | |
| 311 | |
| 312 | # ══════════════════════════════════════════════════════════════════════════════ |
| 313 | # F04 — missing repo → 404 |
| 314 | # ══════════════════════════════════════════════════════════════════════════════ |
| 315 | |
| 316 | @pytest.mark.asyncio |
| 317 | async def test_f04_missing_repo_returns_404( |
| 318 | client: AsyncClient, wire_headers: StrDict, |
| 319 | ) -> None: |
| 320 | """POST to a repo that doesn't exist → 404.""" |
| 321 | resp = await _fetch(client, "nobody", "no-such-repo", |
| 322 | ["sha256:" + "a" * 64], [], headers=wire_headers) |
| 323 | assert resp.status_code == 404 |
| 324 | |
| 325 | |
| 326 | # ══════════════════════════════════════════════════════════════════════════════ |
| 327 | # F05 — empty want → 422 |
| 328 | # ══════════════════════════════════════════════════════════════════════════════ |
| 329 | |
| 330 | @pytest.mark.asyncio |
| 331 | async def test_f05_empty_want_returns_422( |
| 332 | client: AsyncClient, db_session: AsyncSession, wire_headers: StrDict, |
| 333 | ) -> None: |
| 334 | """Empty want list → 422 (not a valid fetch request).""" |
| 335 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 336 | resp = await _fetch(client, "gabriel", repo.slug, [], [], headers=wire_headers) |
| 337 | assert resp.status_code == 422 |
| 338 | |
| 339 | |
| 340 | # ══════════════════════════════════════════════════════════════════════════════ |
| 341 | # F06 — malformed want entry → 422 |
| 342 | # ══════════════════════════════════════════════════════════════════════════════ |
| 343 | |
| 344 | @pytest.mark.asyncio |
| 345 | async def test_f06_malformed_want_entry_returns_422( |
| 346 | client: AsyncClient, db_session: AsyncSession, wire_headers: StrDict, |
| 347 | ) -> None: |
| 348 | """want entries that don't match sha256:* → 422.""" |
| 349 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 350 | resp = await _fetch(client, "gabriel", repo.slug, ["not-a-hash"], [], headers=wire_headers) |
| 351 | assert resp.status_code == 422 |
| 352 | |
| 353 | |
| 354 | # ══════════════════════════════════════════════════════════════════════════════ |
| 355 | # F07 — unknown want commit_id → 404 |
| 356 | # ══════════════════════════════════════════════════════════════════════════════ |
| 357 | |
| 358 | @pytest.mark.asyncio |
| 359 | async def test_f07_unknown_want_commit_returns_404( |
| 360 | client: AsyncClient, db_session: AsyncSession, wire_headers: StrDict, |
| 361 | ) -> None: |
| 362 | """want contains a sha256 ID that doesn't exist in musehub_commits → 404.""" |
| 363 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 364 | ghost_id = "sha256:" + "b" * 64 |
| 365 | resp = await _fetch(client, "gabriel", repo.slug, [ghost_id], [], headers=wire_headers) |
| 366 | assert resp.status_code == 404 |
| 367 | |
| 368 | |
| 369 | # ══════════════════════════════════════════════════════════════════════════════ |
| 370 | # F08 — objects not in pack_index → 503 with Retry-After |
| 371 | # ══════════════════════════════════════════════════════════════════════════════ |
| 372 | |
| 373 | @pytest.mark.asyncio |
| 374 | async def test_f08_unindexed_objects_returns_503( |
| 375 | client: AsyncClient, db_session: AsyncSession, |
| 376 | monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict, |
| 377 | ) -> None: |
| 378 | """Objects that haven't been indexed in musehub_mpack_index → 503 Retry-After. |
| 379 | |
| 380 | This happens when unpack-mpack returned 200 but the mpack.index background |
| 381 | job hasn't completed yet. The client must retry rather than receiving a |
| 382 | partial or corrupt fetch mpack. |
| 383 | """ |
| 384 | _stub_backend(monkeypatch) |
| 385 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 386 | raw = b"not yet indexed" |
| 387 | oid = blob_id(raw) |
| 388 | # Write object WITHOUT a pack_index entry (indexed=False) |
| 389 | await _store_object(db_session, repo.repo_id, oid, raw, indexed=False) |
| 390 | commit, _ = await _make_commit(db_session, repo.repo_id, |
| 391 | manifest={"pending.txt": oid}, seed="f08") |
| 392 | |
| 393 | resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [], |
| 394 | headers=wire_headers) |
| 395 | |
| 396 | assert resp.status_code == 503 |
| 397 | assert "retry-after" in {k.lower() for k in resp.headers}, ( |
| 398 | "503 response must include Retry-After header" |
| 399 | ) |
| 400 | |
| 401 | |
| 402 | # ══════════════════════════════════════════════════════════════════════════════ |
| 403 | # F09 — private repo without auth → 404 |
| 404 | # ══════════════════════════════════════════════════════════════════════════════ |
| 405 | |
| 406 | @pytest.mark.asyncio |
| 407 | async def test_f09_private_repo_without_auth_returns_404( |
| 408 | client: AsyncClient, db_session: AsyncSession, |
| 409 | ) -> None: |
| 410 | """Private repo must 404 for unauthenticated requests (don't leak existence).""" |
| 411 | repo = await create_repo(db_session, owner="gabriel", visibility="private") |
| 412 | resp = await _fetch(client, "gabriel", repo.slug, |
| 413 | ["sha256:" + "c" * 64], []) |
| 414 | assert resp.status_code == 404 |
| 415 | |
| 416 | |
| 417 | # ══════════════════════════════════════════════════════════════════════════════ |
| 418 | # F10 — mpack_id == sha256(mpack_bytes) end-to-end |
| 419 | # ══════════════════════════════════════════════════════════════════════════════ |
| 420 | |
| 421 | @pytest.mark.asyncio |
| 422 | async def test_f10_mpack_id_equals_sha256_of_stored_bytes( |
| 423 | client: AsyncClient, db_session: AsyncSession, |
| 424 | monkeypatch: pytest.MonkeyPatch, wire_headers: StrDict, |
| 425 | ) -> None: |
| 426 | """mpack_id in the HTTP response == sha256(bytes written to MinIO). |
| 427 | |
| 428 | Content-addressing is the integrity proof — no secondary verification needed. |
| 429 | """ |
| 430 | store = _stub_backend(monkeypatch) |
| 431 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 432 | raw = b"content-addressing proof" |
| 433 | oid = blob_id(raw) |
| 434 | await _store_object(db_session, repo.repo_id, oid, raw) |
| 435 | commit, _ = await _make_commit(db_session, repo.repo_id, |
| 436 | manifest={"proof.bin": oid}, seed="f10") |
| 437 | |
| 438 | resp = await _fetch(client, "gabriel", repo.slug, [commit.commit_id], [], |
| 439 | headers=wire_headers) |
| 440 | |
| 441 | assert resp.status_code == 200 |
| 442 | data = msgpack.unpackb(resp.content, raw=False) |
| 443 | mpack_id = data["mpack_id"] |
| 444 | |
| 445 | stored_bytes = store[mpack_id] |
| 446 | expected_id = blob_id(stored_bytes) |
| 447 | assert mpack_id == expected_id, ( |
| 448 | f"mpack_id {mpack_id!r} != blob_id(stored_bytes) {expected_id!r}" |
| 449 | ) |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
22 days ago