"""TDD — Phase 1 mpack push security: size gates. Three gates, zero trust: 1. presign rejects size_bytes above _MAX_BUNDLE_BYTES → HTTP 413 Fires before a presigned URL is issued — client never gets a URL to abuse. 2. unpack-mpack rejects wire_bytes above _MAX_BUNDLE_BYTES → HTTP 422 Defends against a client that bypassed presign and PUT directly to MinIO. 3. unpack-mpack rejects commits_count / objects_count above their caps → HTTP 422 These are logging inputs, not trusted counts, but bounding them prevents absurd allocations and log lines in the background worker. All caps live in musehub.config.Settings so they can be tuned per environment. """ from __future__ import annotations import hashlib from unittest.mock import AsyncMock, patch import msgpack import pytest import pytest_asyncio from httpx import AsyncClient, ASGITransport from sqlalchemy.ext.asyncio import AsyncSession from musehub.auth.request_signing import MSignContext, require_signed_request, optional_signed_request from musehub.db.database import get_db from musehub.main import app _AUTH_CTX = MSignContext( handle="gabriel", identity_id="sha256:" + "0" * 64, is_agent=False, is_admin=True, ) @pytest_asyncio.fixture() async def client(db_session: AsyncSession) -> None: async def _override_get_db() -> None: yield db_session app.dependency_overrides[get_db] = _override_get_db app.dependency_overrides[require_signed_request] = lambda: _AUTH_CTX app.dependency_overrides[optional_signed_request] = lambda: _AUTH_CTX async with AsyncClient( transport=ASGITransport(app=app), base_url="https://localhost:1337", ) as c: yield c app.dependency_overrides.clear() @pytest_asyncio.fixture() async def repo(client: AsyncClient) -> None: resp = await client.post( "/api/repos", json={"owner": "gabriel", "name": "size-gates-test", "visibility": "public", "initialize": False}, ) assert resp.status_code in (200, 201), resp.text data = resp.json() yield data["slug"] await client.delete(f"/api/repos/{data['repoId']}") # ── Gate 1: presign rejects oversized mpacks ────────────────────────────── @pytest.mark.asyncio async def test_presign_rejects_oversized_mpack( client: AsyncClient, repo: str, ) -> None: """POST /push/mpack-presign with size_bytes above cap → 413. The cap is enforced before the presigned URL is issued so the client never receives a URL they can abuse. """ from musehub.config import get_settings settings = get_settings() oversized = settings.mpack_max_bytes + 1 resp = await client.post( f"/gabriel/{repo}/push/mpack-presign", content=msgpack.packb( { "mpack_key": "sha256:" + "a" * 64, "size_bytes": oversized, }, use_bin_type=True, ), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status_code == 413, ( f"Expected 413 for mpack size {oversized:,} bytes, got {resp.status_code}: {resp.text}" ) @pytest.mark.skip(reason="muse wire protocol in flux") @pytest.mark.asyncio async def test_presign_accepts_mpack_at_limit( client: AsyncClient, repo: str, ) -> None: """POST /push/mpack-presign with size_bytes == cap → 200 (not rejected). The limit is exclusive: exactly at the cap is still allowed. """ from musehub.config import get_settings settings = get_settings() at_limit = settings.mpack_max_bytes resp = await client.post( f"/gabriel/{repo}/push/mpack-presign", content=msgpack.packb( { "mpack_key": "sha256:" + "b" * 64, "size_bytes": at_limit, }, use_bin_type=True, ), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status_code == 200, ( f"Expected 200 for mpack size == cap ({at_limit:,}), got {resp.status_code}: {resp.text}" ) # ── Gate 2: unpack-mpack rejects oversized wire bytes ───────────────────── @pytest.mark.skip(reason="muse wire protocol in flux") @pytest.mark.asyncio async def test_unpack_mpack_rejects_oversized_wire_bytes( client: AsyncClient, repo: str, ) -> None: """POST /push/unpack-mpack where MinIO returns bytes above cap → 422. Defends against a client that bypassed the presign check and PUT a giant blob directly to MinIO. The gate fires after the MinIO GET, before the background job is enqueued. """ from musehub.config import get_settings settings = get_settings() # Fabricate oversized wire bytes (random content, doesn't need to be a # real mpack — the size check fires before any parsing). oversized_bytes = b"x" * (settings.mpack_max_bytes + 1) mpack_key = "sha256:" + hashlib.sha256(oversized_bytes).hexdigest() with patch( "musehub.services.musehub_wire.get_backend", ) as mock_get_backend: mock_backend = AsyncMock() mock_backend.get_mpack = AsyncMock(return_value=oversized_bytes) mock_get_backend.return_value = mock_backend resp = await client.post( f"/gabriel/{repo}/push/unpack-mpack", content=msgpack.packb( { "mpack_key": mpack_key, "branch": "main", "head": "sha256:" + "c" * 64, "commits_count": 1, "objects_count": 1, }, use_bin_type=True, ), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status_code == 422, ( f"Expected 422 for oversized wire bytes ({len(oversized_bytes):,}), " f"got {resp.status_code}: {resp.text}" ) # ── Gate 3: unpack-mpack rejects absurd count values ───────────────────── @pytest.mark.asyncio async def test_unpack_mpack_rejects_absurd_commits_count( client: AsyncClient, repo: str, ) -> None: """commits_count above cap → 422 at the route layer, before MinIO is touched.""" from musehub.config import get_settings settings = get_settings() resp = await client.post( f"/gabriel/{repo}/push/unpack-mpack", content=msgpack.packb( { "mpack_key": "sha256:" + "d" * 64, "branch": "main", "head": "sha256:" + "e" * 64, "commits_count": settings.mpack_max_commits + 1, "objects_count": 1, }, use_bin_type=True, ), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status_code == 422, ( f"Expected 422 for commits_count above cap, got {resp.status_code}: {resp.text}" ) @pytest.mark.asyncio async def test_unpack_mpack_rejects_absurd_objects_count( client: AsyncClient, repo: str, ) -> None: """objects_count above cap → 422 at the route layer, before MinIO is touched.""" from musehub.config import get_settings settings = get_settings() resp = await client.post( f"/gabriel/{repo}/push/unpack-mpack", content=msgpack.packb( { "mpack_key": "sha256:" + "f" * 64, "branch": "main", "head": "sha256:" + "a" * 64, "commits_count": 1, "objects_count": settings.mpack_max_objects + 1, }, use_bin_type=True, ), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status_code == 422, ( f"Expected 422 for objects_count above cap, got {resp.status_code}: {resp.text}" ) @pytest.mark.skip(reason="muse wire protocol in flux") @pytest.mark.asyncio async def test_unpack_mpack_accepts_counts_at_limit( client: AsyncClient, repo: str, ) -> None: """commits_count and objects_count exactly at cap → not rejected by the count gate. The subsequent MinIO GET will fail (fake mpack_key), but the count gate itself must not fire — that is what this test asserts. """ from musehub.config import get_settings settings = get_settings() fake_bytes = b"fake" mpack_key = "sha256:" + hashlib.sha256(fake_bytes).hexdigest() with patch( "musehub.services.musehub_wire.get_backend", ) as mock_get_backend: mock_backend = AsyncMock() mock_backend.get_mpack = AsyncMock(return_value=None) mock_get_backend.return_value = mock_backend resp = await client.post( f"/gabriel/{repo}/push/unpack-mpack", content=msgpack.packb( { "mpack_key": mpack_key, "branch": "main", "head": "sha256:" + "b" * 64, "commits_count": settings.mpack_max_commits, "objects_count": settings.mpack_max_objects, }, use_bin_type=True, ), headers={"Content-Type": "application/x-msgpack"}, ) # Count gate did not fire. The 422 here is from the missing mpack — correct. assert resp.status_code == 422, resp.text body = resp.json() assert "commits_count" not in str(body).lower() and "objects_count" not in str(body).lower(), ( f"Count gate fired at limit — should only fire above limit. Response: {body}" )