"""Tier 4 — Stress tests for the clone browser (issue #17). Verifies correctness and response-time bounds under large data volumes. Timing bounds are measured with ``time.perf_counter`` — no pytest-benchmark dependency. All thresholds are conservative multiples of observed median times so they remain stable under CI load. Cases: S01 List page renders correctly with 500 clusters — response < 500ms S02 Detail page renders correctly with 300-member cluster — response < 300ms S03 Dashboard clones card query is fast with 10 000 total clusters — < 500ms S04 Tier filter over 500+500 clusters returns correct subset — < 300ms S05 File breakdown with 50 distinct files in one cluster — no truncation """ from __future__ import annotations import json import time import pytest import pytest_asyncio from httpx import AsyncClient from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.ext.asyncio import AsyncSession from musehub.db.musehub_intel_models import MusehubIntelClones from tests.factories import create_repo from muse.core.types import long_id _REF = long_id("a" * 64) async def _bulk_insert( session: AsyncSession, repo_id: str, n: int, tier: str = "exact", member_count: int = 2, members_json: str | None = None, ) -> None: """Insert N cluster rows using executemany for speed.""" if members_json is None: members_json = json.dumps([ {"address": f"src/a.py::fn{i}", "kind": "function", "language": "Python", "body_hash": long_id("a" * 64), "signature_id": long_id("b" * 64), "content_id": long_id("a" * 64)} for i in range(member_count) ]) rows = [ { "repo_id": repo_id, "cluster_hash": long_id(f"{tier[0]}{str(i).zfill(63)}"), "tier": tier, "member_count": member_count, "members_json": members_json, "ref": _REF, } for i in range(n) ] # Insert in batches of 500 to stay within asyncpg parameter limit. batch = 500 for start in range(0, len(rows), batch): await session.execute( pg_insert(MusehubIntelClones) .values(rows[start : start + batch]) .on_conflict_do_nothing() ) await session.commit() @pytest_asyncio.fixture async def repo_500(db_session: AsyncSession) -> MusehubRepo: r = await create_repo(db_session, owner="stressuser", slug="stress-500") await _bulk_insert(db_session, str(r.repo_id), 300, tier="exact") await _bulk_insert(db_session, str(r.repo_id), 200, tier="near") return r @pytest_asyncio.fixture async def repo_big_cluster(db_session: AsyncSession) -> tuple[MusehubRepo, str]: """One cluster with 300 members spread across 50 files.""" r = await create_repo(db_session, owner="stressuser", slug="stress-big") members = [ { "address": f"src/module_{i // 6}.py::fn_{i}", "kind": "function", "language": "Python", "body_hash": long_id("a" * 64), "signature_id": long_id("b" * 64), "content_id": long_id("a" * 64), } for i in range(300) ] h = long_id("b" * 64) await db_session.execute( pg_insert(MusehubIntelClones) .values( repo_id=str(r.repo_id), cluster_hash=h, tier="near", member_count=300, members_json=json.dumps(members), ref=_REF, ) .on_conflict_do_nothing() ) await db_session.commit() return r, h @pytest_asyncio.fixture async def repo_10k(db_session: AsyncSession) -> MusehubRepo: r = await create_repo(db_session, owner="stressuser", slug="stress-10k") await _bulk_insert(db_session, str(r.repo_id), 5000, tier="exact") await _bulk_insert(db_session, str(r.repo_id), 5000, tier="near") return r class TestClonesStress: @pytest.mark.asyncio async def test_S01_list_500_clusters_under_500ms( self, client: AsyncClient, repo_500: MusehubRepo ) -> None: """500-cluster list page responds within 500ms.""" t0 = time.perf_counter() r = await client.get("/stressuser/stress-500/intel/clones") elapsed = time.perf_counter() - t0 assert r.status_code == 200 assert b"cl-row" in r.content assert elapsed < 0.5, f"List page too slow: {elapsed:.3f}s" @pytest.mark.asyncio async def test_S02_detail_300_members_under_300ms( self, client: AsyncClient, repo_big_cluster: tuple[MusehubRepo, str] ) -> None: """300-member cluster detail page responds within 300ms.""" repo, h = repo_big_cluster t0 = time.perf_counter() r = await client.get( f"/stressuser/stress-big/intel/clones/detail?cluster={h}" ) elapsed = time.perf_counter() - t0 assert r.status_code == 200 assert b"cl-member-row" in r.content assert elapsed < 0.3, f"Detail page too slow: {elapsed:.3f}s" @pytest.mark.asyncio async def test_S03_dashboard_10k_clusters_under_500ms( self, client: AsyncClient, repo_10k: MusehubRepo ) -> None: """Dashboard clones card with 10 000 rows responds within 500ms.""" t0 = time.perf_counter() r = await client.get("/stressuser/stress-10k/intel") elapsed = time.perf_counter() - t0 assert r.status_code == 200 assert b"CLONES" in r.content assert elapsed < 0.5, f"Dashboard too slow with 10k rows: {elapsed:.3f}s" @pytest.mark.asyncio async def test_S04_tier_filter_500_each_under_300ms( self, client: AsyncClient, repo_500: MusehubRepo ) -> None: """Tier filter over 500 exact + 200 near responds within 300ms.""" t0 = time.perf_counter() r = await client.get("/stressuser/stress-500/intel/clones?tier=exact") elapsed = time.perf_counter() - t0 assert r.status_code == 200 assert b"cl-badge--exact" in r.content assert b"cl-badge--near" not in r.content assert elapsed < 0.3, f"Tier filter too slow: {elapsed:.3f}s" @pytest.mark.asyncio async def test_S05_file_breakdown_50_files_no_truncation( self, client: AsyncClient, repo_big_cluster: tuple[MusehubRepo, str] ) -> None: """300-member cluster spanning 50 files shows the full file breakdown.""" repo, h = repo_big_cluster r = await client.get( f"/stressuser/stress-big/intel/clones/detail?cluster={h}" ) assert r.status_code == 200 body = r.text # 300 members / 6 per file = 50 distinct files — all should appear file_rows = body.count("cl-file-row") assert file_rows == 50, f"Expected 50 file rows, got {file_rows}"