"""TDD — Phase 5: CDN-first reads (issue #63). CDN-1 When BLOB_STORAGE_CDN_BASE_URL is set, presign_mpack_get returns a URL whose host is the CDN base URL, not the internal storage endpoint. CDN-2 put_mpack writes with Cache-Control: public, max-age=31536000, immutable so that Cloudflare caches mpack bytes on first GET and serves from edge on every subsequent clone. CDN-3 Regular object presigned URLs (presign_get) are NOT rewritten through the CDN — only mpack GETs use the CDN base URL. CDN-4 When BLOB_STORAGE_CDN_BASE_URL is not set, presign_mpack_get behaves exactly as before (no CDN rewrite, no regression). CDN-5 wire_fetch_mpack pack_urls use CDN-rewritten URLs end-to-end — the client receives CDN URLs when the setting is active. """ from __future__ import annotations import datetime import hashlib import unittest.mock from collections.abc import Mapping import msgpack import pytest from sqlalchemy.ext.asyncio import AsyncSession from muse.core.types import blob_id from musehub.core.genesis import compute_identity_id from musehub.services.musehub_repository import create_repo from musehub.types.json_types import JSONValue CDN_BASE = "https://cdn.musehub.ai" # --------------------------------------------------------------------------- # Helpers — reuse push helpers from earlier phases # --------------------------------------------------------------------------- def _make_mpack(objects: Mapping[str, bytes]) -> tuple[bytes, str]: mpack = { "commits": [], "snapshots": [], "objects": [{"object_id": oid, "content": data} for oid, data in objects.items()], "branch_heads": {}, } wire_bytes = msgpack.packb(mpack, use_bin_type=True) mpack_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest() return wire_bytes, mpack_key async def _push_and_index( session: AsyncSession, repo_id: str, objects: dict[str, bytes], commits: list[dict] | None = None, snapshots: list[dict] | None = None, branch_heads: dict[str, str] | None = None, ) -> str: import musehub.storage.backends as _backends_mod from musehub.core.genesis import compute_job_id from musehub.db.musehub_jobs_models import MusehubBackgroundJob from musehub.services.musehub_wire import process_mpack_index_job mpack = { "commits": commits or [], "snapshots": snapshots or [], "objects": [{"object_id": oid, "content": data} for oid, data in objects.items()], "branch_heads": branch_heads or {}, } wire_bytes = msgpack.packb(mpack, use_bin_type=True) mpack_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest() backend = _backends_mod.get_backend() await backend.put_mpack(mpack_key, wire_bytes) now = datetime.datetime.now(datetime.timezone.utc) job_id = compute_job_id(repo_id, "mpack.index", now.isoformat()) session.add(MusehubBackgroundJob( job_id=job_id, repo_id=repo_id, job_type="mpack.index", payload={"mpack_key": mpack_key, "branch": "main", "head": (commits or [{}])[-1].get("commit_id", ""), "pusher_id": "", "declared_objects_count": len(objects), "declared_commits_count": len(commits or [])}, status="pending", created_at=now, attempt=0, )) await session.commit() await process_mpack_index_job(session, job_id) await session.commit() return mpack_key def _make_commit_chain(n: int, seed: str) -> tuple[list[dict], list[dict], str, dict[str, str]]: objects: dict[str, bytes] = {} commits = [] snapshots = [] parent_id = None for i in range(n): oid = blob_id(f"{seed}-obj-{i}".encode()) objects[oid] = f"{seed}-obj-{i}".encode() snap_id = blob_id(f"{seed}-snap-{i}".encode()) snapshots.append({"snapshot_id": snap_id, "parent_snapshot_id": None, "delta_upsert": {f"f{i}.txt": oid}, "delta_remove": []}) cid = blob_id(f"{seed}-commit-{i}-p={parent_id}".encode()) commits.append({ "commit_id": cid, "branch": "main", "message": f"c{i}", "author": "gabriel", "committed_at": datetime.datetime(2025, 1, 1, tzinfo=datetime.timezone.utc).isoformat(), "parent_commit_id": parent_id, "parent2_commit_id": None, "snapshot_id": snap_id, "agent_id": "", "model_id": "", "toolchain_id": "", "sem_ver_bump": "none", "breaking_changes": [], "signature": "", "signer_key_id": "", "signer_public_key": "", "prompt_hash": "", }) parent_id = cid return commits, snapshots, parent_id, objects # type: ignore[return-value] # --------------------------------------------------------------------------- # CDN-1 # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_cdn1_presign_mpack_get_uses_cdn_url() -> None: """presign_mpack_get returns a CDN URL when BLOB_STORAGE_CDN_BASE_URL is set.""" import musehub.storage.backends as _backends_mod from musehub.storage.backends import BlobBackend backend = _backends_mod.get_backend() if not isinstance(backend, BlobBackend): pytest.skip("CDN rewrite requires BlobBackend") mpack_key = "sha256:" + "a" * 64 original_url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600) with unittest.mock.patch.object(backend, "_cdn_base_url", CDN_BASE): cdn_url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600) assert cdn_url.startswith(CDN_BASE), ( f"expected CDN URL starting with {CDN_BASE!r}, got {cdn_url!r}" ) # Path portion should be preserved from urllib.parse import urlparse orig_path = urlparse(original_url).path cdn_path = urlparse(cdn_url).path assert orig_path == cdn_path, ( f"CDN rewrite changed path: {orig_path!r} → {cdn_path!r}" ) # --------------------------------------------------------------------------- # CDN-2 # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_cdn2_put_mpack_sets_cache_control_immutable() -> None: """put_mpack writes Cache-Control: public, max-age=31536000, immutable.""" import musehub.storage.backends as _backends_mod from musehub.storage.backends import BlobBackend backend = _backends_mod.get_backend() if not isinstance(backend, BlobBackend): pytest.skip("Cache-Control header requires BlobBackend") mpack_key = "sha256:" + hashlib.sha256(b"cdn2-test").hexdigest() data = b"cdn2-test-payload" # Intercept put_object calls to capture kwargs captured: list[dict] = [] original_put = backend._get_client().put_object def _capture_put(**kwargs: JSONValue) -> JSONValue: captured.append(dict(kwargs)) return original_put(**kwargs) client = backend._get_client() with unittest.mock.patch.object(client, "put_object", side_effect=_capture_put): await backend.put_mpack(mpack_key, data) assert captured, "put_object was not called" call_kwargs = captured[0] cc = call_kwargs.get("CacheControl", "") assert "immutable" in cc, f"expected 'immutable' in CacheControl, got {cc!r}" assert "max-age=31536000" in cc, f"expected 'max-age=31536000' in CacheControl, got {cc!r}" assert "public" in cc, f"expected 'public' in CacheControl, got {cc!r}" # --------------------------------------------------------------------------- # CDN-3 # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_cdn3_regular_object_presign_not_cdn_rewritten() -> None: """presign_get for regular objects does NOT use the CDN base URL.""" import musehub.storage.backends as _backends_mod from musehub.storage.backends import BlobBackend backend = _backends_mod.get_backend() if not isinstance(backend, BlobBackend): pytest.skip("BlobBackend required") # Write a real object so presign_get works oid = blob_id(b"cdn3-regular-object") await backend.put(oid, b"cdn3-regular-object") with unittest.mock.patch.object(backend, "_cdn_base_url", CDN_BASE): regular_url = await backend.presign_get(oid, ttl_seconds=3600) assert not regular_url.startswith(CDN_BASE), ( f"regular object URL should NOT use CDN base URL, got {regular_url!r}" ) # --------------------------------------------------------------------------- # CDN-4 # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_cdn4_no_cdn_setting_no_regression() -> None: """presign_mpack_get returns the normal (non-CDN) URL when setting is absent.""" import musehub.storage.backends as _backends_mod from musehub.storage.backends import BlobBackend backend = _backends_mod.get_backend() if not isinstance(backend, BlobBackend): pytest.skip("BlobBackend required") mpack_key = "sha256:" + "b" * 64 with unittest.mock.patch.object(backend, "_cdn_base_url", None): url = await backend.presign_mpack_get(mpack_key, ttl_seconds=3600) # Must be a non-empty URL that does not start with the CDN base assert url and not url.startswith(CDN_BASE), ( f"expected non-CDN URL when CDN is not configured, got {url!r}" )