test_cdn_reads_phase5.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """TDD — Phase 5: CDN-first reads (issue #63). |
| 2 | |
| 3 | CDN-1 When BLOB_STORAGE_CDN_BASE_URL is set, presign_mpack_get returns a URL |
| 4 | whose host is the CDN base URL, not the internal storage endpoint. |
| 5 | |
| 6 | CDN-2 put_mpack writes with Cache-Control: public, max-age=31536000, immutable |
| 7 | so that Cloudflare caches mpack bytes on first GET and serves from edge |
| 8 | on every subsequent clone. |
| 9 | |
| 10 | CDN-3 Regular object presigned URLs (presign_get) are NOT rewritten through |
| 11 | the CDN — only mpack GETs use the CDN base URL. |
| 12 | |
| 13 | CDN-4 When BLOB_STORAGE_CDN_BASE_URL is not set, presign_mpack_get behaves |
| 14 | exactly as before (no CDN rewrite, no regression). |
| 15 | |
| 16 | CDN-5 wire_fetch_mpack pack_urls use CDN-rewritten URLs end-to-end — the |
| 17 | client receives CDN URLs when the setting is active. |
| 18 | """ |
| 19 | from __future__ import annotations |
| 20 | |
| 21 | import datetime |
| 22 | import hashlib |
| 23 | import unittest.mock |
| 24 | from collections.abc import Mapping |
| 25 | |
| 26 | import msgpack |
| 27 | import pytest |
| 28 | from sqlalchemy.ext.asyncio import AsyncSession |
| 29 | |
| 30 | from muse.core.types import blob_id |
| 31 | from musehub.core.genesis import compute_identity_id |
| 32 | from musehub.services.musehub_repository import create_repo |
| 33 | from musehub.types.json_types import JSONValue |
| 34 | |
| 35 | |
| 36 | CDN_BASE = "https://cdn.musehub.ai" |
| 37 | |
| 38 | |
| 39 | # --------------------------------------------------------------------------- |
| 40 | # Helpers — reuse push helpers from earlier phases |
| 41 | # --------------------------------------------------------------------------- |
| 42 | |
| 43 | def _make_mpack(objects: Mapping[str, bytes]) -> tuple[bytes, str]: |
| 44 | mpack = { |
| 45 | "commits": [], |
| 46 | "snapshots": [], |
| 47 | "objects": [{"object_id": oid, "content": data} for oid, data in objects.items()], |
| 48 | "branch_heads": {}, |
| 49 | } |
| 50 | wire_bytes = msgpack.packb(mpack, use_bin_type=True) |
| 51 | mpack_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest() |
| 52 | return wire_bytes, mpack_key |
| 53 | |
| 54 | |
| 55 | async def _push_and_index( |
| 56 | session: AsyncSession, |
| 57 | repo_id: str, |
| 58 | objects: dict[str, bytes], |
| 59 | commits: list[dict] | None = None, |
| 60 | snapshots: list[dict] | None = None, |
| 61 | branch_heads: dict[str, str] | None = None, |
| 62 | ) -> str: |
| 63 | import musehub.storage.backends as _backends_mod |
| 64 | from musehub.core.genesis import compute_job_id |
| 65 | from musehub.db.musehub_jobs_models import MusehubBackgroundJob |
| 66 | from musehub.services.musehub_wire import process_mpack_index_job |
| 67 | |
| 68 | mpack = { |
| 69 | "commits": commits or [], |
| 70 | "snapshots": snapshots or [], |
| 71 | "objects": [{"object_id": oid, "content": data} for oid, data in objects.items()], |
| 72 | "branch_heads": branch_heads or {}, |
| 73 | } |
| 74 | wire_bytes = msgpack.packb(mpack, use_bin_type=True) |
| 75 | mpack_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest() |
| 76 | |
| 77 | backend = _backends_mod.get_backend() |
| 78 | await backend.put_mpack(mpack_key, wire_bytes) |
| 79 | |
| 80 | now = datetime.datetime.now(datetime.timezone.utc) |
| 81 | job_id = compute_job_id(repo_id, "mpack.index", now.isoformat()) |
| 82 | session.add(MusehubBackgroundJob( |
| 83 | job_id=job_id, repo_id=repo_id, job_type="mpack.index", |
| 84 | payload={"mpack_key": mpack_key, "branch": "main", |
| 85 | "head": (commits or [{}])[-1].get("commit_id", ""), |
| 86 | "pusher_id": "", "declared_objects_count": len(objects), |
| 87 | "declared_commits_count": len(commits or [])}, |
| 88 | status="pending", created_at=now, attempt=0, |
| 89 | )) |
| 90 | await session.commit() |
| 91 | await process_mpack_index_job(session, job_id) |
| 92 | await session.commit() |
| 93 | return mpack_key |
| 94 | |
| 95 | |
| 96 | def _make_commit_chain(n: int, seed: str) -> tuple[list[dict], list[dict], str, dict[str, str]]: |
| 97 | objects: dict[str, bytes] = {} |
| 98 | commits = [] |
| 99 | snapshots = [] |
| 100 | parent_id = None |
| 101 | for i in range(n): |
| 102 | oid = blob_id(f"{seed}-obj-{i}".encode()) |
| 103 | objects[oid] = f"{seed}-obj-{i}".encode() |
| 104 | snap_id = blob_id(f"{seed}-snap-{i}".encode()) |
| 105 | snapshots.append({"snapshot_id": snap_id, "parent_snapshot_id": None, |
| 106 | "delta_upsert": {f"f{i}.txt": oid}, "delta_remove": []}) |
| 107 | cid = blob_id(f"{seed}-commit-{i}-p={parent_id}".encode()) |
| 108 | commits.append({ |
| 109 | "commit_id": cid, "branch": "main", "message": f"c{i}", |
| 110 | "author": "gabriel", |
| 111 | "committed_at": datetime.datetime(2025, 1, 1, tzinfo=datetime.timezone.utc).isoformat(), |
| 112 | "parent_commit_id": parent_id, "parent2_commit_id": None, |
| 113 | "snapshot_id": snap_id, "agent_id": "", "model_id": "", "toolchain_id": "", |
| 114 | "sem_ver_bump": "none", "breaking_changes": [], "signature": "", |
| 115 | "signer_key_id": "", "signer_public_key": "", "prompt_hash": "", |
| 116 | }) |
| 117 | parent_id = cid |
| 118 | return commits, snapshots, parent_id, objects # type: ignore[return-value] |
| 119 | |
| 120 | |
| 121 | # --------------------------------------------------------------------------- |
| 122 | # CDN-1 |
| 123 | # --------------------------------------------------------------------------- |
| 124 | |
| 125 | @pytest.mark.asyncio |
| 126 | async def test_cdn1_presign_mpack_get_uses_cdn_url() -> None: |
| 127 | """presign_mpack_get returns a CDN URL when BLOB_STORAGE_CDN_BASE_URL is set.""" |
| 128 | import musehub.storage.backends as _backends_mod |
| 129 | from musehub.storage.backends import BlobBackend |
| 130 | |
| 131 | backend = _backends_mod.get_backend() |
| 132 | if not isinstance(backend, BlobBackend): |
| 133 | pytest.skip("CDN rewrite requires BlobBackend") |
| 134 | |
| 135 | mpack_key = "sha256:" + "a" * 64 |
| 136 | original_url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600) |
| 137 | |
| 138 | with unittest.mock.patch.object(backend, "_cdn_base_url", CDN_BASE): |
| 139 | cdn_url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600) |
| 140 | |
| 141 | assert cdn_url.startswith(CDN_BASE), ( |
| 142 | f"expected CDN URL starting with {CDN_BASE!r}, got {cdn_url!r}" |
| 143 | ) |
| 144 | # Path portion should be preserved |
| 145 | from urllib.parse import urlparse |
| 146 | orig_path = urlparse(original_url).path |
| 147 | cdn_path = urlparse(cdn_url).path |
| 148 | assert orig_path == cdn_path, ( |
| 149 | f"CDN rewrite changed path: {orig_path!r} → {cdn_path!r}" |
| 150 | ) |
| 151 | |
| 152 | |
| 153 | # --------------------------------------------------------------------------- |
| 154 | # CDN-2 |
| 155 | # --------------------------------------------------------------------------- |
| 156 | |
| 157 | @pytest.mark.asyncio |
| 158 | async def test_cdn2_put_mpack_sets_cache_control_immutable() -> None: |
| 159 | """put_mpack writes Cache-Control: public, max-age=31536000, immutable.""" |
| 160 | import musehub.storage.backends as _backends_mod |
| 161 | from musehub.storage.backends import BlobBackend |
| 162 | |
| 163 | backend = _backends_mod.get_backend() |
| 164 | if not isinstance(backend, BlobBackend): |
| 165 | pytest.skip("Cache-Control header requires BlobBackend") |
| 166 | |
| 167 | mpack_key = "sha256:" + hashlib.sha256(b"cdn2-test").hexdigest() |
| 168 | data = b"cdn2-test-payload" |
| 169 | |
| 170 | # Intercept put_object calls to capture kwargs |
| 171 | captured: list[dict] = [] |
| 172 | original_put = backend._get_client().put_object |
| 173 | |
| 174 | def _capture_put(**kwargs: JSONValue) -> JSONValue: |
| 175 | captured.append(dict(kwargs)) |
| 176 | return original_put(**kwargs) |
| 177 | |
| 178 | client = backend._get_client() |
| 179 | with unittest.mock.patch.object(client, "put_object", side_effect=_capture_put): |
| 180 | await backend.put_mpack(mpack_key, data) |
| 181 | |
| 182 | assert captured, "put_object was not called" |
| 183 | call_kwargs = captured[0] |
| 184 | cc = call_kwargs.get("CacheControl", "") |
| 185 | assert "immutable" in cc, f"expected 'immutable' in CacheControl, got {cc!r}" |
| 186 | assert "max-age=31536000" in cc, f"expected 'max-age=31536000' in CacheControl, got {cc!r}" |
| 187 | assert "public" in cc, f"expected 'public' in CacheControl, got {cc!r}" |
| 188 | |
| 189 | |
| 190 | # --------------------------------------------------------------------------- |
| 191 | # CDN-3 |
| 192 | # --------------------------------------------------------------------------- |
| 193 | |
| 194 | @pytest.mark.asyncio |
| 195 | async def test_cdn3_regular_object_presign_not_cdn_rewritten() -> None: |
| 196 | """presign_get for regular objects does NOT use the CDN base URL.""" |
| 197 | import musehub.storage.backends as _backends_mod |
| 198 | from musehub.storage.backends import BlobBackend |
| 199 | |
| 200 | backend = _backends_mod.get_backend() |
| 201 | if not isinstance(backend, BlobBackend): |
| 202 | pytest.skip("BlobBackend required") |
| 203 | |
| 204 | # Write a real object so presign_get works |
| 205 | oid = blob_id(b"cdn3-regular-object") |
| 206 | await backend.put(oid, b"cdn3-regular-object") |
| 207 | |
| 208 | with unittest.mock.patch.object(backend, "_cdn_base_url", CDN_BASE): |
| 209 | regular_url = await backend.presign_get(oid, ttl_seconds=3600) |
| 210 | |
| 211 | assert not regular_url.startswith(CDN_BASE), ( |
| 212 | f"regular object URL should NOT use CDN base URL, got {regular_url!r}" |
| 213 | ) |
| 214 | |
| 215 | |
| 216 | # --------------------------------------------------------------------------- |
| 217 | # CDN-4 |
| 218 | # --------------------------------------------------------------------------- |
| 219 | |
| 220 | @pytest.mark.asyncio |
| 221 | async def test_cdn4_no_cdn_setting_no_regression() -> None: |
| 222 | """presign_mpack_get returns the normal (non-CDN) URL when setting is absent.""" |
| 223 | import musehub.storage.backends as _backends_mod |
| 224 | from musehub.storage.backends import BlobBackend |
| 225 | |
| 226 | backend = _backends_mod.get_backend() |
| 227 | if not isinstance(backend, BlobBackend): |
| 228 | pytest.skip("BlobBackend required") |
| 229 | |
| 230 | mpack_key = "sha256:" + "b" * 64 |
| 231 | |
| 232 | with unittest.mock.patch.object(backend, "_cdn_base_url", None): |
| 233 | url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600) |
| 234 | |
| 235 | # Must be a non-empty URL that does not start with the CDN base |
| 236 | assert url and not url.startswith(CDN_BASE), ( |
| 237 | f"expected non-CDN URL when CDN is not configured, got {url!r}" |
| 238 | ) |