"""Tests for GET /{owner}/{repo_slug}/raw/{ref}/{path} endpoint. Covers: raw_file_semantic (ui_tree.py): - 200: file exists in snapshot manifest and object exists in storage - 404: file exists in manifest but object missing from storage - 404: file not in snapshot manifest at ref - 404: ref does not exist - correct Content-Type for text files (.py, .toml, .md) - correct Content-Type for binary files (.png) - Content-Disposition: inline for text, attachment for binary storage.exists() interface: - BlobBackend.exists(object_id) — single argument, no repo_id - BlobBackend satisfies the StorageBackend protocol """ from __future__ import annotations import secrets from datetime import datetime, timezone from unittest.mock import MagicMock, patch import msgpack import pytest from httpx import AsyncClient from sqlalchemy.ext.asyncio import AsyncSession from muse.core.types import fake_id from musehub.core.genesis import compute_branch_id, compute_identity_id, compute_repo_id from musehub.db.musehub_repo_models import MusehubBranch, MusehubCommit, MusehubCommitRef, MusehubObject, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef from musehub.storage.backends import BlobBackend MINIO_ENDPOINT = "http://localhost:9000" MINIO_BUCKET = "muse-objects" MINIO_ACCESS_KEY = "minioadmin" MINIO_SECRET_KEY = "minioadmin" def _uid() -> str: return secrets.token_hex(16) def _minio_backend() -> BlobBackend: return BlobBackend( bucket=MINIO_BUCKET, endpoint_url=MINIO_ENDPOINT, access_key_id=MINIO_ACCESS_KEY, secret_access_key=MINIO_SECRET_KEY, region="us-east-1", ) # ── DB fixtures ─────────────────────────────────────────────────────────────── async def _make_repo( db: AsyncSession, owner: str = "gabriel", slug: str = "muse", ) -> MusehubRepo: created_at = datetime.now(tz=timezone.utc) owner_id = compute_identity_id(owner.encode()) repo_id = compute_repo_id(owner_id, slug, "code", created_at.isoformat()) repo = MusehubRepo( repo_id=repo_id, name=slug, owner=owner, slug=slug, visibility="public", owner_user_id=owner_id, created_at=created_at, updated_at=created_at, ) db.add(repo) await db.flush() return repo async def _make_snapshot( db: AsyncSession, repo_id: str, manifest: dict[str, str], ) -> MusehubSnapshot: snap = MusehubSnapshot( snapshot_id=fake_id(_uid()), manifest_blob=msgpack.packb(manifest, use_bin_type=True), entry_count=len(manifest), created_at=datetime.now(tz=timezone.utc), ) db.add(snap) db.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=snap.snapshot_id)) await db.flush() return snap async def _make_branch_at_commit( db: AsyncSession, repo_id: str, branch_name: str, manifest: dict[str, str], ) -> tuple[MusehubCommit, MusehubSnapshot]: snap = await _make_snapshot(db, repo_id, manifest) now = datetime.now(tz=timezone.utc) commit = MusehubCommit( commit_id=fake_id(_uid()), snapshot_id=snap.snapshot_id, message="test commit", author="gabriel", branch=branch_name, parent_ids=[], timestamp=now, created_at=now, ) db.add(commit) db.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit.commit_id)) await db.flush() branch = MusehubBranch( branch_id=compute_branch_id(repo_id, branch_name), repo_id=repo_id, name=branch_name, head_commit_id=commit.commit_id, ) db.add(branch) await db.flush() return commit, snap async def _make_object_in_db( db: AsyncSession, object_id: str, content: bytes, path: str = "file", ) -> MusehubObject: """Insert a MusehubObject row with storage_uri pointing to MinIO.""" obj = MusehubObject( object_id=object_id, path=path, size_bytes=len(content), storage_uri=f"s3://{MINIO_BUCKET}/objects/{object_id}", ) db.add(obj) await db.flush() return obj # ═══════════════════════════════════════════════════════════════════════════════ # StorageBackend interface — exists() takes exactly one argument (object_id) # ═══════════════════════════════════════════════════════════════════════════════ class TestStorageBackendExistsInterface: """Regression: exists() must accept a single object_id, never (repo_id, object_id).""" async def test_blob_backend_exists_single_arg(self) -> None: mock_client = MagicMock() mock_client.head_object.return_value = {} backend = BlobBackend(bucket="test-bucket", region="us-east-1") backend._client = mock_client oid = fake_id("test-object") result = await backend.exists(oid) assert result is True mock_client.head_object.assert_called_once_with( Bucket="test-bucket", Key=f"objects/{oid}" ) async def test_blob_backend_exists_error_returns_false(self) -> None: """BlobBackend.exists() returns False when head_object fails.""" mock_client = MagicMock() mock_client.head_object.side_effect = Exception("not found") backend = BlobBackend(bucket="test-bucket", region="us-east-1") backend._client = mock_client result = await backend.exists(fake_id("test-object")) assert result is False # ═══════════════════════════════════════════════════════════════════════════════ # GET /{owner}/{repo_slug}/raw/{ref}/{path} — endpoint tests # ═══════════════════════════════════════════════════════════════════════════════ class TestRawEndpoint: async def test_returns_200_for_file_in_manifest_and_storage( self, client: AsyncClient, db_session: AsyncSession ) -> None: repo = await _make_repo(db_session) file_content = b"[tool.poetry]\nname = 'muse'\n" oid = fake_id("pyproject-oid-" + _uid()) _, _ = await _make_branch_at_commit( db_session, repo.repo_id, "main", {"pyproject.toml": oid} ) await _make_object_in_db(db_session, oid, file_content, path="pyproject.toml") await db_session.commit() backend = _minio_backend() await backend.put(oid, file_content) with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp = await client.get(f"/{repo.owner}/{repo.slug}/raw/main/pyproject.toml") assert resp.status_code == 200 assert resp.content == file_content async def test_returns_404_when_file_not_in_manifest( self, client: AsyncClient, db_session: AsyncSession ) -> None: repo = await _make_repo(db_session, slug="muse-raw2-" + _uid()) _, _ = await _make_branch_at_commit( db_session, repo.repo_id, "main", {"README.md": fake_id("readme-oid")} ) await db_session.commit() backend = _minio_backend() with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp = await client.get(f"/{repo.owner}/{repo.slug}/raw/main/pyproject.toml") assert resp.status_code == 404 assert "pyproject.toml" in resp.json()["detail"] async def test_returns_404_when_object_missing_from_storage( self, client: AsyncClient, db_session: AsyncSession ) -> None: repo = await _make_repo(db_session, slug="muse-raw3-" + _uid()) oid = fake_id("missing-oid-" + _uid()) _, _ = await _make_branch_at_commit( db_session, repo.repo_id, "main", {"pyproject.toml": oid} ) await db_session.commit() # Backend has no object written for this OID — exists() returns False backend = _minio_backend() with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp = await client.get(f"/{repo.owner}/{repo.slug}/raw/main/pyproject.toml") assert resp.status_code == 404 # Must be distinct from the manifest-miss message so we can tell the two # failure cases apart from logs/responses (critical for staging diagnosis). assert "storage" in resp.json()["detail"].lower() async def test_404_manifest_miss_and_storage_miss_have_distinct_messages( self, client: AsyncClient, db_session: AsyncSession ) -> None: """Regression: the two 404 paths must produce different detail strings. Without this the staging 404 is undiagnosable — we can't tell whether the snapshot manifest has the file or whether the object is missing from R2. """ backend = _minio_backend() # Case A: file not in manifest at all slug_a = "muse-raw3b-" + _uid() repo_a = await _make_repo(db_session, slug=slug_a) _, _ = await _make_branch_at_commit( db_session, repo_a.repo_id, "main", {"README.md": fake_id("readme-oid")} ) await db_session.commit() with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp_a = await client.get(f"/{repo_a.owner}/{repo_a.slug}/raw/main/pyproject.toml") # Case B: file in manifest, object missing from storage slug_b = "muse-raw3c-" + _uid() repo_b = await _make_repo(db_session, slug=slug_b) _, _ = await _make_branch_at_commit( db_session, repo_b.repo_id, "main", {"pyproject.toml": fake_id("missing-oid-" + _uid())} ) await db_session.commit() with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp_b = await client.get(f"/{repo_b.owner}/{repo_b.slug}/raw/main/pyproject.toml") assert resp_a.status_code == 404 assert resp_b.status_code == 404 assert resp_a.json()["detail"] != resp_b.json()["detail"], ( "manifest-miss and storage-miss must produce different detail strings" ) async def test_returns_404_for_unknown_ref( self, client: AsyncClient, db_session: AsyncSession ) -> None: repo = await _make_repo(db_session, slug="muse-raw4-" + _uid()) _, _ = await _make_branch_at_commit( db_session, repo.repo_id, "main", {"pyproject.toml": fake_id("oid")} ) await db_session.commit() backend = _minio_backend() with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp = await client.get(f"/{repo.owner}/{repo.slug}/raw/nonexistent-branch/pyproject.toml") assert resp.status_code == 404 async def test_text_file_served_as_text_plain( self, client: AsyncClient, db_session: AsyncSession ) -> None: repo = await _make_repo(db_session, slug="muse-raw5-" + _uid()) oid = fake_id("py-oid-" + _uid()) content = b"def main(): pass\n" _, _ = await _make_branch_at_commit( db_session, repo.repo_id, "main", {"musehub/main.py": oid} ) await _make_object_in_db(db_session, oid, content, path="musehub/main.py") await db_session.commit() backend = _minio_backend() await backend.put(oid, content) with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp = await client.get(f"/{repo.owner}/{repo.slug}/raw/main/musehub/main.py") assert resp.status_code == 200 assert "text/plain" in resp.headers["content-type"] async def test_binary_file_served_as_attachment( self, client: AsyncClient, db_session: AsyncSession ) -> None: repo = await _make_repo(db_session, slug="muse-raw6-" + _uid()) oid = fake_id("png-oid-" + _uid()) png_bytes = b"\x89PNG\r\n\x1a\n" _, _ = await _make_branch_at_commit( db_session, repo.repo_id, "main", {"logo.png": oid} ) await _make_object_in_db(db_session, oid, png_bytes, path="logo.png") await db_session.commit() backend = _minio_backend() await backend.put(oid, png_bytes) with patch("musehub.api.routes.musehub.ui_tree._get_storage_backend", return_value=backend): resp = await client.get(f"/{repo.owner}/{repo.slug}/raw/main/logo.png") assert resp.status_code == 200 assert resp.headers["content-type"] == "image/png" assert "attachment" in resp.headers["content-disposition"]