"""SSR tests for the repo home page clone URL. Regression tests for two bugs: 1. Clone URL showed `muse remote add local musehub://owner/slug` — wrong command and wrong scheme. The muse CLI does not understand musehub://. 2. Clone URL was hardcoded to musehub.ai regardless of which host served the request, so staging.musehub.ai always showed the wrong URL. Fixes verified here: - GET /{owner}/{slug} renders `muse clone https://...` (not musehub://) - Clone URL host matches the request host (not hardcoded musehub.ai) - page_json block exposes `clone_url` as a valid https URL """ from __future__ import annotations import json import pytest from httpx import AsyncClient from sqlalchemy.ext.asyncio import AsyncSession from datetime import datetime, timezone from musehub.core.genesis import compute_identity_id, compute_repo_id from musehub.db.musehub_repo_models import MusehubRepo # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- async def _make_repo( db: AsyncSession, owner: str = "gabriel", slug: str = "musehub", ) -> MusehubRepo: created_at = datetime.now(tz=timezone.utc) owner_id = compute_identity_id(owner.encode()) repo = MusehubRepo( repo_id=compute_repo_id(owner_id, slug, "code", created_at.isoformat()), name=slug, owner=owner, slug=slug, visibility="public", owner_user_id=owner_id, created_at=created_at, updated_at=created_at, ) db.add(repo) await db.commit() await db.refresh(repo) return repo # --------------------------------------------------------------------------- # Clone URL tests — Bug 1: wrong command and scheme # --------------------------------------------------------------------------- async def test_repo_home_clone_url_uses_muse_clone_not_remote_add( client: AsyncClient, db_session: AsyncSession, ) -> None: """Repo home page must show `muse clone `, not `muse remote add local musehub://`.""" await _make_repo(db_session, owner="alice", slug="my-repo") resp = await client.get("/alice/my-repo") assert resp.status_code == 200 assert "muse clone" in resp.text, "Sidebar must show 'muse clone '" assert "muse remote add" not in resp.text, ( "muse remote add is not the clone command — remove it from the sidebar" ) async def test_repo_home_clone_url_has_no_musehub_scheme( client: AsyncClient, db_session: AsyncSession, ) -> None: """Clone URL in the repo page must NOT use the musehub:// scheme. The muse CLI does not resolve musehub:// — it only handles http(s)://. A user copying this URL and running `muse clone musehub://...` will get a transport error. """ await _make_repo(db_session, owner="bob", slug="another-repo") resp = await client.get("/bob/another-repo") assert resp.status_code == 200 assert "musehub://" not in resp.text, ( "musehub:// is not a valid muse CLI scheme — remove it from the page" ) async def test_repo_home_clone_url_uses_request_host( client: AsyncClient, db_session: AsyncSession, ) -> None: """Clone URL uses the actual request host, not a hardcoded musehub.ai. On staging.musehub.ai the clone URL must be https://staging.musehub.ai/... not https://musehub.ai/... This test uses the test client's base_url (http://test) and verifies the clone URL contains that host. """ await _make_repo(db_session, owner="carol", slug="test-repo") resp = await client.get("/carol/test-repo") assert resp.status_code == 200 # The httpx test client base_url is http://test — clone URL must reflect that. assert "http://test" in resp.text, ( "Clone URL must be built from request.base_url, not hardcoded to musehub.ai" ) assert "musehub.ai" not in resp.text or "staging.musehub.ai" not in resp.text, ( "Clone URL must not hardcode musehub.ai when served from a different host" ) async def test_repo_home_page_json_clone_url_is_valid_https( client: AsyncClient, db_session: AsyncSession, ) -> None: """The page_json block exposes clone_url as a valid http(s):// URL. The TypeScript initialiser reads clone_url from page_json to populate the clone input. It must be a URL the muse CLI can use directly. """ await _make_repo(db_session, owner="dave", slug="json-repo") resp = await client.get("/dave/json-repo") assert resp.status_code == 200 # Extract the page_json block content text = resp.text start = text.find('id="page-data"') assert start != -1, "page-data script block not found" # Find the content between the script tags content_start = text.find(">", start) + 1 content_end = text.find("", content_start) page_json_raw = text[content_start:content_end].strip() data = json.loads(page_json_raw) clone_url = data.get("clone_url", "") assert clone_url, "page_json must include clone_url" assert clone_url.startswith("http"), ( f"clone_url must be http(s)://, got: {clone_url!r}" ) assert "musehub://" not in clone_url assert "dave" in clone_url assert "json-repo" in clone_url # --------------------------------------------------------------------------- # Host allowlist test — spoofed Host header falls back to public_url # --------------------------------------------------------------------------- async def test_repo_home_clone_url_rejects_spoofed_host( db_session: AsyncSession, ) -> None: """A spoofed Host header must NOT appear in the clone URL. An attacker who can set an arbitrary Host header (e.g. evil.com) must not be able to make the server render a clone URL pointing at their domain. The route validates the host against settings.allowed_hosts and falls back to settings.public_url when the host is not on the allowlist. """ from httpx import AsyncClient, ASGITransport from musehub.main import app await _make_repo(db_session, owner="eve", slug="evil-test") async with AsyncClient( transport=ASGITransport(app=app), base_url="http://evil.com", headers={"Host": "evil.com"}, ) as evil_client: resp = await evil_client.get("/eve/evil-test") assert resp.status_code == 200 # The clone input value must not reflect the spoofed host. # (evil.com may legitimately appear in oEmbed/og tags that encode the request URL.) assert 'value="muse clone http://evil.com' not in resp.text, ( "Spoofed Host header must not appear in the clone input value" ) # --------------------------------------------------------------------------- # nginx config test — Bug 2: fetch endpoint missing from long-timeout block # --------------------------------------------------------------------------- def test_nginx_config_fetch_endpoints_have_long_timeout() -> None: """nginx-cf.conf must give /fetch and /fetch/objects the same 300s timeout as /push. Without this, cloning a large repo times out at 60s (the default location / timeout), producing an HTTP 504 that the muse CLI surfaces as '❌ Fetch objects failed: HTTP 504'. """ import pathlib nginx_conf = pathlib.Path(__file__).parent.parent / "deploy" / "nginx-cf.conf" assert nginx_conf.exists(), f"nginx config not found at {nginx_conf}" text = nginx_conf.read_text() # Must have a location block covering the fetch endpoints assert "fetch" in text, "nginx config must have a location block for fetch endpoints" # The fetch block must have a 300s (or longer) timeout, not just 60s lines = text.splitlines() in_fetch_block = False fetch_block_timeout: str | None = None for line in lines: stripped = line.strip() if "fetch" in stripped and "location" in stripped: in_fetch_block = True if in_fetch_block and "proxy_read_timeout" in stripped: fetch_block_timeout = stripped break if in_fetch_block and stripped == "}": in_fetch_block = False assert fetch_block_timeout is not None, ( "No proxy_read_timeout found in the fetch location block. " "Add: proxy_read_timeout 300s; to the fetch location block." ) # Extract seconds value timeout_val = fetch_block_timeout.split()[-1].rstrip(";").rstrip("s") assert int(timeout_val) >= 300, ( f"fetch location block timeout must be ≥300s, got {timeout_val}s" )