gabriel / musehub public

test_repo_card_performance.py file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:0 fix: fall back to any indexed mpack in read_object_bytes when push mpac… · gabriel · Jun 17, 2026
1 """
2 Tier 7 — Performance tests for enrich_repo_cards().
3
4 These tests establish latency baselines that should hold on the CI database.
5 They are deliberately conservative — failing here signals a query regression,
6 not a slow machine.
7
8 Test IDs
9 --------
10 T700 — single-repo enrichment completes in < 100 ms
11 T701 — 10-repo batch completes in < 200 ms (sub-linear scaling)
12 T702 — p95 latency across 20 repeated single-repo calls is < 80 ms
13 T703 — enriching an empty repo (no intel) is faster than one with full data
14 """
15 from __future__ import annotations
16
17 import statistics
18 import time
19 from datetime import datetime, timedelta, timezone
20
21 import pytest
22 from sqlalchemy.ext.asyncio import AsyncSession
23
24 from musehub.db.musehub_intel_models import MusehubIntelBreakageMeta, MusehubIntelDead, MusehubSymbolIntel
25 from musehub.services.repo_card_enrichment import enrich_repo_cards
26 from tests.factories import create_commit, create_repo
27
28
29 def _utc_now() -> datetime:
30 return datetime.now(tz=timezone.utc)
31
32
33 async def _seed_full_repo(db: AsyncSession) -> str:
34 """Seed a repo with commits, symbols, dead rows, and breakage meta."""
35 repo = await create_repo(db, visibility="public")
36 for i in range(10):
37 await create_commit(db, repo.repo_id, timestamp=_utc_now() - timedelta(days=i))
38 for i in range(20):
39 db.add(MusehubSymbolIntel(
40 repo_id=repo.repo_id,
41 address=f"src/mod.py::fn_{i}",
42 churn_30d=i,
43 blast=i * 2,
44 ))
45 db.add(MusehubIntelDead(
46 repo_id=repo.repo_id,
47 address="src/old.py::dead_fn",
48 kind="function",
49 confidence="high",
50 ref="main",
51 ))
52 db.add(MusehubIntelBreakageMeta(
53 repo_id=repo.repo_id,
54 total_issues=1,
55 error_count=0,
56 warning_count=1,
57 file_count=1,
58 ref="main",
59 ))
60 await db.commit()
61 return repo.repo_id
62
63
64 # ---------------------------------------------------------------------------
65 # T700 — single-repo enrichment < 100 ms
66 # ---------------------------------------------------------------------------
67
68 @pytest.mark.asyncio
69 async def test_t700_single_repo_under_100ms(db_session: AsyncSession) -> None:
70 """T700: enriching one fully-populated repo completes in < 100 ms."""
71 repo_id = await _seed_full_repo(db_session)
72
73 t0 = time.monotonic()
74 await enrich_repo_cards(db_session, [repo_id])
75 elapsed_ms = (time.monotonic() - t0) * 1000
76
77 assert elapsed_ms < 100, f"Single-repo enrichment took {elapsed_ms:.1f} ms"
78
79
80 # ---------------------------------------------------------------------------
81 # T701 — 10-repo batch < 200 ms
82 # ---------------------------------------------------------------------------
83
84 @pytest.mark.asyncio
85 async def test_t701_ten_repo_batch_under_200ms(db_session: AsyncSession) -> None:
86 """T701: enriching 10 repos completes in < 200 ms (sub-linear vs T700)."""
87 repo_ids = [await _seed_full_repo(db_session) for _ in range(10)]
88
89 t0 = time.monotonic()
90 await enrich_repo_cards(db_session, repo_ids)
91 elapsed_ms = (time.monotonic() - t0) * 1000
92
93 assert elapsed_ms < 200, f"10-repo batch took {elapsed_ms:.1f} ms"
94
95
96 # ---------------------------------------------------------------------------
97 # T702 — p95 latency across 20 calls < 80 ms
98 # ---------------------------------------------------------------------------
99
100 @pytest.mark.asyncio
101 async def test_t702_p95_single_repo_under_80ms(db_session: AsyncSession) -> None:
102 """T702: p95 latency across 20 repeated single-repo calls is < 80 ms."""
103 repo_id = await _seed_full_repo(db_session)
104
105 latencies = []
106 for _ in range(20):
107 t0 = time.monotonic()
108 await enrich_repo_cards(db_session, [repo_id])
109 latencies.append((time.monotonic() - t0) * 1000)
110
111 p95 = statistics.quantiles(latencies, n=20)[18] # 95th percentile
112 assert p95 < 80, f"p95 latency was {p95:.1f} ms — expected < 80 ms"
113
114
115 # ---------------------------------------------------------------------------
116 # T703 — empty repo faster than full repo
117 # ---------------------------------------------------------------------------
118
119 @pytest.mark.asyncio
120 async def test_t703_empty_repo_faster_than_full_repo(db_session: AsyncSession) -> None:
121 """T703: enriching an empty repo is not slower than a fully-populated one."""
122 full_id = await _seed_full_repo(db_session)
123 empty_repo = await create_repo(db_session, visibility="public")
124 empty_id = empty_repo.repo_id
125
126 samples_full = []
127 samples_empty = []
128
129 for _ in range(10):
130 t0 = time.monotonic()
131 await enrich_repo_cards(db_session, [full_id])
132 samples_full.append(time.monotonic() - t0)
133
134 t0 = time.monotonic()
135 await enrich_repo_cards(db_session, [empty_id])
136 samples_empty.append(time.monotonic() - t0)
137
138 median_full = statistics.median(samples_full) * 1000
139 median_empty = statistics.median(samples_empty) * 1000
140
141 # Empty should be no more than 2× slower than full (same 5 queries run)
142 assert median_empty < median_full * 2, (
143 f"Empty repo ({median_empty:.1f} ms) unexpectedly slower than "
144 f"full repo ({median_full:.1f} ms) by > 2×"
145 )