gabriel / muse public
test_transport_fetch_phase2.py python
219 lines 9.6 KB
Raw
sha256:79ffe87f5fe2ec146e35f05521218bbf54dffdb0440c07f970bad05f16efb89f chore: merge main — carry all urllib/typing/test fixes from dev Sonnet 4.6 minor ⚠ breaking 20 days ago
1 """TDD — HttpTransport.fetch_mpack Phase 2: single presigned-URL protocol (issue #68).
2
3 The Phase 2 server response has exactly four fields — no presign flag, no inline
4 bytes, no pack_urls list. The URL is always the delivery channel.
5
6 Server response shape:
7 {
8 "mpack_url": str, # presigned GET URL; null/absent → up-to-date
9 "mpack_id": str, # sha256:<hex>; null/absent → up-to-date
10 "commit_count": int,
11 "object_count": int,
12 }
13
14 Client protocol (Step 2 from issue #68 spec):
15 1. GET mpack_url (direct to MinIO, bypasses Cloudflare)
16 2. Verify sha256(mpack_bytes).hexdigest() == mpack_id[7:]
17 → abort with TransportError on mismatch (corrupt in transit)
18 3. apply_mpack() on the verified bytes
19
20 Tests:
21 FM-1 Normal fetch: server returns mpack_url + mpack_id, client GETs, sha256
22 matches, FetchMPackResult returned.
23 FM-2 sha256 mismatch on downloaded bytes → TransportError raised immediately.
24 FM-3 Non-200 from GET mpack_url → TransportError.
25 FM-4 Non-200 from POST /fetch → TransportError.
26 FM-5 Server returns mpack_url=null (up-to-date) → empty result, no GET call.
27 FM-6 All objects from the downloaded mpack are present in FetchMPackResult.objects.
28 """
29 from __future__ import annotations
30
31 from unittest.mock import MagicMock, patch
32
33 import msgpack
34 import pytest
35
36 from muse.core.transport import FetchMPackResult, HttpTransport, TransportError
37 from muse.core.types import blob_id, fake_id
38
39 _URL = "https://staging.musehub.ai/gabriel/muse"
40
41
42 # ── helpers ───────────────────────────────────────────────────────────────────
43
44 def _make_mpack(*, n_objects: int = 1, n_commits: int = 1) -> tuple[bytes, str]:
45 """Return (mpack_bytes, mpack_id)."""
46 raws = [f"content-{i}".encode() for i in range(n_objects)]
47 oids = [blob_id(r) for r in raws]
48 commit_ids = [fake_id(f"commit-{i}") for i in range(n_commits)]
49 snap_id = fake_id("snap-0")
50 mpack = {
51 "commits": [
52 {
53 "commit_id": cid, "parent_commit_id": None,
54 "snapshot_id": snap_id, "branch": "main",
55 "message": f"c{i}", "author": "gabriel",
56 }
57 for i, cid in enumerate(commit_ids)
58 ],
59 "snapshots": [{"snapshot_id": snap_id, "manifest": {f"f{i}.bin": oid for i, oid in enumerate(oids)}}],
60 "blobs": [{"object_id": oid, "content": raw} for oid, raw in zip(oids, raws)],
61 }
62 wire = msgpack.packb(mpack, use_bin_type=True)
63 return wire, blob_id(wire)
64
65
66 def _fetch_response(*, mpack_url: str | None, mpack_id: str | None,
67 commit_count: int = 1, object_count: int = 1) -> bytes:
68 """Phase 2 server response — no presign flag, no inline bytes."""
69 return msgpack.packb({
70 "mpack_url": mpack_url,
71 "mpack_id": mpack_id,
72 "commit_count": commit_count,
73 "object_count": object_count,
74 }, use_bin_type=True)
75
76
77 import urllib.error
78 from typing import Protocol
79
80 class _DoFn(Protocol):
81 calls: list[str]
82 def __call__(self, method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = ..., **kwargs: "str | int | bool") -> bytes: ...
83
84
85 def _mock_urllib_do(post_body: bytes, get_body: bytes | None = None,
86 get_status: int = 200) -> _DoFn:
87 """Return a side_effect for _urllib_do that serves POST then GET responses."""
88 calls: list[str] = []
89
90 def _side_effect(method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = None, **kwargs: "str | int | bool") -> bytes:
91 calls.append(method)
92 if method == "POST":
93 return post_body
94 # GET
95 if get_status >= 400:
96 err = urllib.error.HTTPError(url, get_status, f"HTTP {get_status}", {}, None)
97 err.read = lambda n=None: get_body or b""
98 raise err
99 if get_body is None:
100 raise AssertionError("GET called but no get_body provided")
101 return get_body
102
103 _side_effect.calls = calls
104 return _side_effect
105
106
107 # ── FM-1 ──────────────────────────────────────────────────────────────────────
108
109 def test_fm1_normal_fetch_returns_result() -> None:
110 """Server returns mpack_url + mpack_id; client GETs, verifies sha256, returns result."""
111 mpack_bytes, mpack_id = _make_mpack(n_objects=2, n_commits=1)
112 mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake"
113
114 do = _mock_urllib_do(
115 post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id,
116 commit_count=1, object_count=2),
117 get_body=mpack_bytes,
118 )
119 with patch("muse.core.transport._urllib_do", side_effect=do):
120 result = HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[])
121
122 assert "GET" in do.calls, "GET for mpack_url must be called"
123 assert result["blobs_received"] == 2
124 assert len(result["commits"]) == 1
125
126
127 # ── FM-2 ──────────────────────────────────────────────────────────────────────
128
129 def test_fm2_sha256_mismatch_raises() -> None:
130 """sha256(downloaded_bytes) != mpack_id → TransportError before any apply."""
131 mpack_bytes, mpack_id = _make_mpack()
132 mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake"
133
134 tampered = mpack_bytes + b"\xff" # one extra byte breaks the hash
135
136 do = _mock_urllib_do(
137 post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id),
138 get_body=tampered,
139 )
140 with patch("muse.core.transport._urllib_do", side_effect=do):
141 with pytest.raises(TransportError, match="integrity"):
142 HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[])
143
144
145 # ── FM-3 ──────────────────────────────────────────────────────────────────────
146
147 def test_fm3_non200_get_raises() -> None:
148 """Non-200 from GET mpack_url → TransportError."""
149 mpack_bytes, mpack_id = _make_mpack()
150 mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake"
151
152 do = _mock_urllib_do(
153 post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id),
154 get_body=b"Access Denied",
155 get_status=403,
156 )
157 with patch("muse.core.transport._urllib_do", side_effect=do):
158 with pytest.raises(TransportError):
159 HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[])
160
161
162 # ── FM-4 ──────────────────────────────────────────────────────────────────────
163
164 def test_fm4_non200_post_raises() -> None:
165 """Non-200 from POST /fetch → TransportError, no GET attempted."""
166 get_called = []
167
168 def _fail_post(method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = None, **kwargs: "str | int | bool") -> bytes:
169 if method == "POST":
170 raise TransportError("HTTP 404", 404)
171 get_called.append(url)
172 return b""
173
174 with patch("muse.core.transport._urllib_do", side_effect=_fail_post):
175 with pytest.raises(TransportError):
176 HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[])
177
178 assert not get_called, "GET must not be called when POST fails"
179
180
181 # ── FM-5 ──────────────────────────────────────────────────────────────────────
182
183 def test_fm5_null_mpack_url_means_up_to_date() -> None:
184 """Server returns mpack_url=null → client already up-to-date, no GET."""
185 get_called = []
186
187 def _null_url(method: str, url: str, headers: "dict[str, str]", data: "bytes | None" = None, **kwargs: "str | int | bool") -> bytes:
188 if method == "POST":
189 return _fetch_response(mpack_url=None, mpack_id=None,
190 commit_count=0, object_count=0)
191 get_called.append(url)
192 return b""
193
194 with patch("muse.core.transport._urllib_do", side_effect=_null_url):
195 result = HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[])
196
197 assert not get_called, "GET must not be called when mpack_url is null"
198 assert result["blobs_received"] == 0
199 assert result["commits"] == []
200
201
202 # ── FM-6 ──────────────────────────────────────────────────────────────────────
203
204 def test_fm6_all_objects_present_in_result() -> None:
205 """All objects from the downloaded mpack appear in FetchMPackResult.objects."""
206 mpack_bytes, mpack_id = _make_mpack(n_objects=3)
207 mpack_url = f"https://minio.example.com/mpacks/{mpack_id}?sig=fake"
208
209 do = _mock_urllib_do(
210 post_body=_fetch_response(mpack_url=mpack_url, mpack_id=mpack_id, object_count=3),
211 get_body=mpack_bytes,
212 )
213
214 with patch("muse.core.transport._urllib_do", side_effect=do):
215 result = HttpTransport().fetch_mpack(_URL, None, want=[fake_id("w")], have=[])
216
217 assert result["blobs_received"] == 3
218 assert len(result["blobs"]) == 3
219 assert all(isinstance(o, dict) and "object_id" in o for o in result["blobs"])
File History 2 commits
sha256:79ffe87f5fe2ec146e35f05521218bbf54dffdb0440c07f970bad05f16efb89f chore: merge main — carry all urllib/typing/test fixes from dev Sonnet 4.6 minor 20 days ago
sha256:0bea7600d1eee83e87950be49933b1006fa9dc2c71e7c4ee748d324f61138156 chore: bump version to 0.2.0rc11; fix typing audit violatio… Sonnet 4.6 minor 20 days ago