""" Tier 7 — Performance tests for enrich_repo_cards(). These tests establish latency baselines that should hold on the CI database. They are deliberately conservative — failing here signals a query regression, not a slow machine. Test IDs -------- T700 — single-repo enrichment completes in < 100 ms T701 — 10-repo batch completes in < 200 ms (sub-linear scaling) T702 — p95 latency across 20 repeated single-repo calls is < 80 ms T703 — enriching an empty repo (no intel) is faster than one with full data """ from __future__ import annotations import statistics import time from datetime import datetime, timedelta, timezone import pytest from sqlalchemy.ext.asyncio import AsyncSession from musehub.db.musehub_intel_models import MusehubIntelBreakageMeta, MusehubIntelDead, MusehubSymbolIntel from musehub.services.repo_card_enrichment import enrich_repo_cards from tests.factories import create_commit, create_repo def _utc_now() -> datetime: return datetime.now(tz=timezone.utc) async def _seed_full_repo(db: AsyncSession) -> str: """Seed a repo with commits, symbols, dead rows, and breakage meta.""" repo = await create_repo(db, visibility="public") for i in range(10): await create_commit(db, repo.repo_id, timestamp=_utc_now() - timedelta(days=i)) for i in range(20): db.add(MusehubSymbolIntel( repo_id=repo.repo_id, address=f"src/mod.py::fn_{i}", churn_30d=i, blast=i * 2, )) db.add(MusehubIntelDead( repo_id=repo.repo_id, address="src/old.py::dead_fn", kind="function", confidence="high", ref="main", )) db.add(MusehubIntelBreakageMeta( repo_id=repo.repo_id, total_issues=1, error_count=0, warning_count=1, file_count=1, ref="main", )) await db.commit() return repo.repo_id # --------------------------------------------------------------------------- # T700 — single-repo enrichment < 100 ms # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_t700_single_repo_under_100ms(db_session: AsyncSession) -> None: """T700: enriching one fully-populated repo completes in < 100 ms.""" repo_id = await _seed_full_repo(db_session) t0 = time.monotonic() await enrich_repo_cards(db_session, [repo_id]) elapsed_ms = (time.monotonic() - t0) * 1000 assert elapsed_ms < 100, f"Single-repo enrichment took {elapsed_ms:.1f} ms" # --------------------------------------------------------------------------- # T701 — 10-repo batch < 200 ms # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_t701_ten_repo_batch_under_200ms(db_session: AsyncSession) -> None: """T701: enriching 10 repos completes in < 200 ms (sub-linear vs T700).""" repo_ids = [await _seed_full_repo(db_session) for _ in range(10)] t0 = time.monotonic() await enrich_repo_cards(db_session, repo_ids) elapsed_ms = (time.monotonic() - t0) * 1000 assert elapsed_ms < 200, f"10-repo batch took {elapsed_ms:.1f} ms" # --------------------------------------------------------------------------- # T702 — p95 latency across 20 calls < 80 ms # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_t702_p95_single_repo_under_80ms(db_session: AsyncSession) -> None: """T702: p95 latency across 20 repeated single-repo calls is < 80 ms.""" repo_id = await _seed_full_repo(db_session) latencies = [] for _ in range(20): t0 = time.monotonic() await enrich_repo_cards(db_session, [repo_id]) latencies.append((time.monotonic() - t0) * 1000) p95 = statistics.quantiles(latencies, n=20)[18] # 95th percentile assert p95 < 80, f"p95 latency was {p95:.1f} ms — expected < 80 ms" # --------------------------------------------------------------------------- # T703 — empty repo faster than full repo # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_t703_empty_repo_faster_than_full_repo(db_session: AsyncSession) -> None: """T703: enriching an empty repo is not slower than a fully-populated one.""" full_id = await _seed_full_repo(db_session) empty_repo = await create_repo(db_session, visibility="public") empty_id = empty_repo.repo_id samples_full = [] samples_empty = [] for _ in range(10): t0 = time.monotonic() await enrich_repo_cards(db_session, [full_id]) samples_full.append(time.monotonic() - t0) t0 = time.monotonic() await enrich_repo_cards(db_session, [empty_id]) samples_empty.append(time.monotonic() - t0) median_full = statistics.median(samples_full) * 1000 median_empty = statistics.median(samples_empty) * 1000 # Empty should be no more than 2× slower than full (same 5 queries run) assert median_empty < median_full * 2, ( f"Empty repo ({median_empty:.1f} ms) unexpectedly slower than " f"full repo ({median_full:.1f} ms) by > 2×" )