gabriel / musehub public
test_cdn_reads_phase5.py python
238 lines 9.2 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 1 day ago
1 """TDD — Phase 5: CDN-first reads (issue #63).
2
3 CDN-1 When BLOB_STORAGE_CDN_BASE_URL is set, presign_mpack_get returns a URL
4 whose host is the CDN base URL, not the internal storage endpoint.
5
6 CDN-2 put_mpack writes with Cache-Control: public, max-age=31536000, immutable
7 so that Cloudflare caches mpack bytes on first GET and serves from edge
8 on every subsequent clone.
9
10 CDN-3 Regular object presigned URLs (presign_get) are NOT rewritten through
11 the CDN — only mpack GETs use the CDN base URL.
12
13 CDN-4 When BLOB_STORAGE_CDN_BASE_URL is not set, presign_mpack_get behaves
14 exactly as before (no CDN rewrite, no regression).
15
16 CDN-5 wire_fetch_mpack pack_urls use CDN-rewritten URLs end-to-end — the
17 client receives CDN URLs when the setting is active.
18 """
19 from __future__ import annotations
20
21 import datetime
22 import hashlib
23 import unittest.mock
24 from collections.abc import Mapping
25
26 import msgpack
27 import pytest
28 from sqlalchemy.ext.asyncio import AsyncSession
29
30 from muse.core.types import blob_id
31 from musehub.core.genesis import compute_identity_id
32 from musehub.services.musehub_repository import create_repo
33 from musehub.types.json_types import JSONValue
34
35
36 CDN_BASE = "https://cdn.musehub.ai"
37
38
39 # ---------------------------------------------------------------------------
40 # Helpers — reuse push helpers from earlier phases
41 # ---------------------------------------------------------------------------
42
43 def _make_mpack(objects: Mapping[str, bytes]) -> tuple[bytes, str]:
44 mpack = {
45 "commits": [],
46 "snapshots": [],
47 "objects": [{"object_id": oid, "content": data} for oid, data in objects.items()],
48 "branch_heads": {},
49 }
50 wire_bytes = msgpack.packb(mpack, use_bin_type=True)
51 mpack_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest()
52 return wire_bytes, mpack_key
53
54
55 async def _push_and_index(
56 session: AsyncSession,
57 repo_id: str,
58 objects: dict[str, bytes],
59 commits: list[dict] | None = None,
60 snapshots: list[dict] | None = None,
61 branch_heads: dict[str, str] | None = None,
62 ) -> str:
63 import musehub.storage.backends as _backends_mod
64 from musehub.core.genesis import compute_job_id
65 from musehub.db.musehub_jobs_models import MusehubBackgroundJob
66 from musehub.services.musehub_wire import process_mpack_index_job
67
68 mpack = {
69 "commits": commits or [],
70 "snapshots": snapshots or [],
71 "objects": [{"object_id": oid, "content": data} for oid, data in objects.items()],
72 "branch_heads": branch_heads or {},
73 }
74 wire_bytes = msgpack.packb(mpack, use_bin_type=True)
75 mpack_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest()
76
77 backend = _backends_mod.get_backend()
78 await backend.put_mpack(mpack_key, wire_bytes)
79
80 now = datetime.datetime.now(datetime.timezone.utc)
81 job_id = compute_job_id(repo_id, "mpack.index", now.isoformat())
82 session.add(MusehubBackgroundJob(
83 job_id=job_id, repo_id=repo_id, job_type="mpack.index",
84 payload={"mpack_key": mpack_key, "branch": "main",
85 "head": (commits or [{}])[-1].get("commit_id", ""),
86 "pusher_id": "", "declared_objects_count": len(objects),
87 "declared_commits_count": len(commits or [])},
88 status="pending", created_at=now, attempt=0,
89 ))
90 await session.commit()
91 await process_mpack_index_job(session, job_id)
92 await session.commit()
93 return mpack_key
94
95
96 def _make_commit_chain(n: int, seed: str) -> tuple[list[dict], list[dict], str, dict[str, str]]:
97 objects: dict[str, bytes] = {}
98 commits = []
99 snapshots = []
100 parent_id = None
101 for i in range(n):
102 oid = blob_id(f"{seed}-obj-{i}".encode())
103 objects[oid] = f"{seed}-obj-{i}".encode()
104 snap_id = blob_id(f"{seed}-snap-{i}".encode())
105 snapshots.append({"snapshot_id": snap_id, "parent_snapshot_id": None,
106 "delta_upsert": {f"f{i}.txt": oid}, "delta_remove": []})
107 cid = blob_id(f"{seed}-commit-{i}-p={parent_id}".encode())
108 commits.append({
109 "commit_id": cid, "branch": "main", "message": f"c{i}",
110 "author": "gabriel",
111 "committed_at": datetime.datetime(2025, 1, 1, tzinfo=datetime.timezone.utc).isoformat(),
112 "parent_commit_id": parent_id, "parent2_commit_id": None,
113 "snapshot_id": snap_id, "agent_id": "", "model_id": "", "toolchain_id": "",
114 "sem_ver_bump": "none", "breaking_changes": [], "signature": "",
115 "signer_key_id": "", "signer_public_key": "", "prompt_hash": "",
116 })
117 parent_id = cid
118 return commits, snapshots, parent_id, objects # type: ignore[return-value]
119
120
121 # ---------------------------------------------------------------------------
122 # CDN-1
123 # ---------------------------------------------------------------------------
124
125 @pytest.mark.asyncio
126 async def test_cdn1_presign_mpack_get_uses_cdn_url() -> None:
127 """presign_mpack_get returns a CDN URL when BLOB_STORAGE_CDN_BASE_URL is set."""
128 import musehub.storage.backends as _backends_mod
129 from musehub.storage.backends import BlobBackend
130
131 backend = _backends_mod.get_backend()
132 if not isinstance(backend, BlobBackend):
133 pytest.skip("CDN rewrite requires BlobBackend")
134
135 mpack_key = "sha256:" + "a" * 64
136 original_url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600)
137
138 with unittest.mock.patch.object(backend, "_cdn_base_url", CDN_BASE):
139 cdn_url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600)
140
141 assert cdn_url.startswith(CDN_BASE), (
142 f"expected CDN URL starting with {CDN_BASE!r}, got {cdn_url!r}"
143 )
144 # Path portion should be preserved
145 from urllib.parse import urlparse
146 orig_path = urlparse(original_url).path
147 cdn_path = urlparse(cdn_url).path
148 assert orig_path == cdn_path, (
149 f"CDN rewrite changed path: {orig_path!r} → {cdn_path!r}"
150 )
151
152
153 # ---------------------------------------------------------------------------
154 # CDN-2
155 # ---------------------------------------------------------------------------
156
157 @pytest.mark.asyncio
158 async def test_cdn2_put_mpack_sets_cache_control_immutable() -> None:
159 """put_mpack writes Cache-Control: public, max-age=31536000, immutable."""
160 import musehub.storage.backends as _backends_mod
161 from musehub.storage.backends import BlobBackend
162
163 backend = _backends_mod.get_backend()
164 if not isinstance(backend, BlobBackend):
165 pytest.skip("Cache-Control header requires BlobBackend")
166
167 mpack_key = "sha256:" + hashlib.sha256(b"cdn2-test").hexdigest()
168 data = b"cdn2-test-payload"
169
170 # Intercept put_object calls to capture kwargs
171 captured: list[dict] = []
172 original_put = backend._get_client().put_object
173
174 def _capture_put(**kwargs: JSONValue) -> JSONValue:
175 captured.append(dict(kwargs))
176 return original_put(**kwargs)
177
178 client = backend._get_client()
179 with unittest.mock.patch.object(client, "put_object", side_effect=_capture_put):
180 await backend.put_mpack(mpack_key, data)
181
182 assert captured, "put_object was not called"
183 call_kwargs = captured[0]
184 cc = call_kwargs.get("CacheControl", "")
185 assert "immutable" in cc, f"expected 'immutable' in CacheControl, got {cc!r}"
186 assert "max-age=31536000" in cc, f"expected 'max-age=31536000' in CacheControl, got {cc!r}"
187 assert "public" in cc, f"expected 'public' in CacheControl, got {cc!r}"
188
189
190 # ---------------------------------------------------------------------------
191 # CDN-3
192 # ---------------------------------------------------------------------------
193
194 @pytest.mark.asyncio
195 async def test_cdn3_regular_object_presign_not_cdn_rewritten() -> None:
196 """presign_get for regular objects does NOT use the CDN base URL."""
197 import musehub.storage.backends as _backends_mod
198 from musehub.storage.backends import BlobBackend
199
200 backend = _backends_mod.get_backend()
201 if not isinstance(backend, BlobBackend):
202 pytest.skip("BlobBackend required")
203
204 # Write a real object so presign_get works
205 oid = blob_id(b"cdn3-regular-object")
206 await backend.put(oid, b"cdn3-regular-object")
207
208 with unittest.mock.patch.object(backend, "_cdn_base_url", CDN_BASE):
209 regular_url = await backend.presign_get(oid, ttl_seconds=3600)
210
211 assert not regular_url.startswith(CDN_BASE), (
212 f"regular object URL should NOT use CDN base URL, got {regular_url!r}"
213 )
214
215
216 # ---------------------------------------------------------------------------
217 # CDN-4
218 # ---------------------------------------------------------------------------
219
220 @pytest.mark.asyncio
221 async def test_cdn4_no_cdn_setting_no_regression() -> None:
222 """presign_mpack_get returns the normal (non-CDN) URL when setting is absent."""
223 import musehub.storage.backends as _backends_mod
224 from musehub.storage.backends import BlobBackend
225
226 backend = _backends_mod.get_backend()
227 if not isinstance(backend, BlobBackend):
228 pytest.skip("BlobBackend required")
229
230 mpack_key = "sha256:" + "b" * 64
231
232 with unittest.mock.patch.object(backend, "_cdn_base_url", None):
233 url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600)
234
235 # Must be a non-empty URL that does not start with the CDN base
236 assert url and not url.startswith(CDN_BASE), (
237 f"expected non-CDN URL when CDN is not configured, got {url!r}"
238 )
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 1 day ago