"""TDD — HttpTransport.fetch_mpack Phase 2: single presigned-URL protocol (issue #68). The Phase 2 server response has exactly four fields — no presign flag, no inline bytes, no pack_urls list. The URL is always the delivery channel. Server response shape: { "mpack_url": str, # presigned GET URL; null/absent → up-to-date "mpack_id": str, # sha256:; null/absent → up-to-date "commit_count": int, "object_count": int, } Client protocol (Step 2 from issue #68 spec): 1. GET mpack_url (direct to MinIO, bypasses Cloudflare) 2. Verify sha256(mpack_bytes).hexdigest() == mpack_id[7:] → abort with TransportError on mismatch (corrupt in transit) 3. apply_mpack() on the verified bytes Tests: FM-1 Normal fetch: server returns mpack_url + mpack_id, client GETs, sha256 matches, FetchMPackResult returned. FM-2 sha256 mismatch on downloaded bytes → TransportError raised immediately. FM-3 Non-200 from GET mpack_url → TransportError. FM-4 Non-200 from POST /fetch → TransportError. FM-5 Server returns mpack_url=null (up-to-date) → empty result, no GET call. FM-6 All objects from the downloaded mpack are present in FetchMPackResult.objects. """ from __future__ import annotations from unittest.mock import MagicMock, patch import msgpack import pytest from muse.core.transport import FetchMPackResult, HttpTransport, TransportError from muse.core.types import blob_id, fake_id _URL = "https://staging.musehub.ai/gabriel/muse" # ── helpers ─────────────────────────────────────────────────────────────────── def _make_mpack(*, n_objects: int = 1, n_commits: int = 1) -> tuple[bytes, str]: """Return (mpack_bytes, mpack_id).""" raws = [f"content-{i}".encode() for i in range(n_objects)] oids = [blob_id(r) for r in raws] commit_ids = [fake_id(f"commit-{i}") for i in range(n_commits)] snap_id = fake_id("snap-0") mpack = { "commits": [ { "commit_id": cid, "parent_commit_id": None, "snapshot_id": snap_id, "branch": "main", "message": f"c{i}", "author": "gabriel", } for i, cid in enumerate(commit_ids) ], "snapshots": [{"snapshot_id": snap_id, "manifest": {f"f{i}.bin": oid for i, oid in enumerate(oids)}}], "blobs": [{"object_id": oid, "content": raw} for oid, raw in zip(oids, raws)], } wire = msgpack.packb(mpack, use_bin_type=True) return wire, blob_id(wire) def _fetch_response(*, mpack_url: str | None, mpack_id: str | None, commit_count: int = 1, object_count: int = 1) -> bytes: """Phase 2 server response — no presign flag, no inline bytes.""" return msgpack.packb({ "mpack_url": mpack_url, "mpack_id": mpack_id, "commit_count": commit_count, "object_count": object_count, }, use_bin_type=True) import urllib.error from typing import Protocol class _DoFn(Protocol): calls: list[str] def __call__(self, method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = ..., **kwargs: "str | int | bool") -> bytes: ... def _mock_urllib_do(post_body: bytes, get_body: bytes | None = None, get_status: int = 200) -> _DoFn: """Return a side_effect for _urllib_do that serves POST then GET responses.""" calls: list[str] = [] def _side_effect(method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = None, **kwargs: "str | int | bool") -> bytes: calls.append(method) if method == "POST": return post_body # GET if get_status >= 400: err = urllib.error.HTTPError(url, get_status, f"HTTP {get_status}", {}, None) err.read = lambda n=None: get_body or b"" raise err if get_body is None: raise AssertionError("GET called but no get_body provided") return get_body _side_effect.calls = calls return _side_effect # ── FM-1 ────────────────────────────────────────────────────────────────────── def test_fm1_normal_fetch_returns_result() -> None: """Server returns mpack_url + mpack_id; client GETs, verifies sha256, returns result.""" mpack_bytes, mpack_id = _make_mpack(n_objects=2, n_commits=1) mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake" do = _mock_urllib_do( post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id, commit_count=1, object_count=2), get_body=mpack_bytes, ) with patch("muse.core.transport._urllib_do", side_effect=do): result = HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[]) assert "GET" in do.calls, "GET for mpack_url must be called" assert result["blobs_received"] == 2 assert len(result["commits"]) == 1 # ── FM-2 ────────────────────────────────────────────────────────────────────── def test_fm2_sha256_mismatch_raises() -> None: """sha256(downloaded_bytes) != mpack_id → TransportError before any apply.""" mpack_bytes, mpack_id = _make_mpack() mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake" tampered = mpack_bytes + b"\xff" # one extra byte breaks the hash do = _mock_urllib_do( post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id), get_body=tampered, ) with patch("muse.core.transport._urllib_do", side_effect=do): with pytest.raises(TransportError, match="integrity"): HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[]) # ── FM-3 ────────────────────────────────────────────────────────────────────── def test_fm3_non200_get_raises() -> None: """Non-200 from GET mpack_url → TransportError.""" mpack_bytes, mpack_id = _make_mpack() mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake" do = _mock_urllib_do( post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id), get_body=b"Access Denied", get_status=403, ) with patch("muse.core.transport._urllib_do", side_effect=do): with pytest.raises(TransportError): HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[]) # ── FM-4 ────────────────────────────────────────────────────────────────────── def test_fm4_non200_post_raises() -> None: """Non-200 from POST /fetch → TransportError, no GET attempted.""" get_called = [] def _fail_post(method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = None, **kwargs: "str | int | bool") -> bytes: if method == "POST": raise TransportError("HTTP 404", 404) get_called.append(url) return b"" with patch("muse.core.transport._urllib_do", side_effect=_fail_post): with pytest.raises(TransportError): HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[]) assert not get_called, "GET must not be called when POST fails" # ── FM-5 ────────────────────────────────────────────────────────────────────── def test_fm5_null_mpack_url_means_up_to_date() -> None: """Server returns mpack_url=null → client already up-to-date, no GET.""" get_called = [] def _null_url(method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = None, **kwargs: "str | int | bool") -> bytes: if method == "POST": return _fetch_response(mpack_url=None, mpack_id=None, commit_count=0, object_count=0) get_called.append(url) return b"" with patch("muse.core.transport._urllib_do", side_effect=_null_url): result = HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[]) assert not get_called, "GET must not be called when mpack_url is null" assert result["blobs_received"] == 0 assert result["commits"] == [] # ── FM-6 ────────────────────────────────────────────────────────────────────── def test_fm6_all_objects_present_in_result() -> None: """All objects from the downloaded mpack appear in FetchMPackResult.objects.""" mpack_bytes, mpack_id = _make_mpack(n_objects=3) mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake" do = _mock_urllib_do( post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id, object_count=3), get_body=mpack_bytes, ) with patch("muse.core.transport._urllib_do", side_effect=do): result = HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[]) assert result["blobs_received"] == 3 assert len(result["blobs"]) == 3 assert all(isinstance(o, dict) and "object_id" in o for o in result["blobs"])