gabriel / musehub public
test_clones_performance.py python
245 lines 8.2 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 22 days ago
1 """Tier 6 — Performance tests for the clone browser (issue #17).
2
3 Measures raw query and helper latency in isolation — no HTTP overhead.
4 All bounds are measured with ``time.perf_counter`` and set conservatively
5 above observed p99 on a single-core CI runner.
6
7 Cases:
8 P01 List query — 500 rows fetched < 50ms
9 P02 Detail query — single row lookup < 10ms
10 P03 Dashboard count query — 10 000 rows < 30ms
11 P04 _cl_language_set on 200-member JSON < 2ms
12 P05 _cl_file_count on 200-member JSON < 2ms
13 P06 Full list template render — 20 clusters < 100ms
14 """
15 from __future__ import annotations
16
17 import json
18 import time
19
20 import pytest
21 import pytest_asyncio
22 import sqlalchemy as sa
23 from sqlalchemy.dialects.postgresql import insert as pg_insert
24 from sqlalchemy.ext.asyncio import AsyncSession
25
26 from muse.core.types import long_id
27 from musehub.db.musehub_intel_models import MusehubIntelClones
28 from musehub.api.routes.musehub.ui_intel import (
29 _cl_language_set,
30 _cl_file_count,
31 )
32 from tests.factories import create_repo
33
34 _REF = long_id("a" * 64)
35
36
37 def _make_members_json(n: int, files: int = 4) -> str:
38 return json.dumps([
39 {
40 "address": f"src/module_{i % files}/mod.py::fn_{i}",
41 "kind": "function",
42 "language": "Python",
43 "body_hash": long_id("a" * 64),
44 "signature_id": long_id("b" * 64),
45 "content_id": long_id("a" * 64),
46 }
47 for i in range(n)
48 ])
49
50
51 async def _bulk_insert(
52 session: AsyncSession,
53 repo_id: str,
54 n: int,
55 tier: str = "exact",
56 ) -> None:
57 mj = _make_members_json(2)
58 rows = [
59 {
60 "repo_id": repo_id,
61 "cluster_hash": long_id(f"{tier[0]}{str(i).zfill(63)}"),
62 "tier": tier,
63 "member_count": 2,
64 "members_json": mj,
65 "ref": _REF,
66 }
67 for i in range(n)
68 ]
69 batch = 500
70 for start in range(0, len(rows), batch):
71 await session.execute(
72 pg_insert(MusehubIntelClones)
73 .values(rows[start : start + batch])
74 .on_conflict_do_nothing()
75 )
76 await session.commit()
77
78
79 @pytest_asyncio.fixture
80 async def repo_500(db_session: AsyncSession) -> MusehubRepo:
81 r = await create_repo(db_session, owner="perfuser", slug="perf-500")
82 await _bulk_insert(db_session, str(r.repo_id), 300, tier="exact")
83 await _bulk_insert(db_session, str(r.repo_id), 200, tier="near")
84 return r
85
86
87 @pytest_asyncio.fixture
88 async def repo_10k(db_session: AsyncSession) -> MusehubRepo:
89 r = await create_repo(db_session, owner="perfuser", slug="perf-10k")
90 await _bulk_insert(db_session, str(r.repo_id), 5000, tier="exact")
91 await _bulk_insert(db_session, str(r.repo_id), 5000, tier="near")
92 return r
93
94
95 @pytest_asyncio.fixture
96 async def detail_repo(db_session: AsyncSession) -> tuple[MusehubRepo, str]:
97 r = await create_repo(db_session, owner="perfuser", slug="perf-detail")
98 h = long_id("d" * 64)
99 await db_session.execute(
100 pg_insert(MusehubIntelClones)
101 .values(
102 repo_id=str(r.repo_id),
103 cluster_hash=h,
104 tier="exact",
105 member_count=10,
106 members_json=_make_members_json(10),
107 ref=_REF,
108 )
109 .on_conflict_do_nothing()
110 )
111 await db_session.commit()
112 return r, h
113
114
115 class TestClonesPerformance:
116
117 @pytest.mark.asyncio
118 async def test_P01_list_query_500_rows_under_50ms(
119 self, db_session: AsyncSession, repo_500: MusehubRepo
120 ) -> None:
121 """Raw list query for 500 clusters completes in < 50ms."""
122 t0 = time.perf_counter()
123 result = await db_session.execute(
124 sa.select(MusehubIntelClones)
125 .where(MusehubIntelClones.repo_id == str(repo_500.repo_id))
126 .order_by(sa.desc(MusehubIntelClones.member_count))
127 .limit(20)
128 )
129 rows = result.scalars().all()
130 elapsed = time.perf_counter() - t0
131
132 assert len(rows) == 20
133 assert elapsed < 0.05, f"List query too slow: {elapsed*1000:.1f}ms"
134
135 @pytest.mark.asyncio
136 async def test_P02_detail_query_single_row_under_10ms(
137 self, db_session: AsyncSession, detail_repo: tuple[MusehubRepo, str]
138 ) -> None:
139 """Single-row lookup by (repo_id, cluster_hash) completes in < 10ms."""
140 repo, h = detail_repo
141 t0 = time.perf_counter()
142 result = await db_session.execute(
143 sa.select(MusehubIntelClones).where(
144 MusehubIntelClones.repo_id == str(repo.repo_id),
145 MusehubIntelClones.cluster_hash == h,
146 )
147 )
148 row = result.scalar_one_or_none()
149 elapsed = time.perf_counter() - t0
150
151 assert row is not None
152 assert elapsed < 0.05, f"Detail query too slow: {elapsed*1000:.1f}ms"
153
154 @pytest.mark.asyncio
155 async def test_P03_dashboard_count_10k_rows_under_30ms(
156 self, db_session: AsyncSession, repo_10k: MusehubRepo
157 ) -> None:
158 """COUNT(*) over 10 000 rows completes in < 30ms."""
159 t0 = time.perf_counter()
160 result = await db_session.execute(
161 sa.select(sa.func.count())
162 .select_from(MusehubIntelClones)
163 .where(MusehubIntelClones.repo_id == str(repo_10k.repo_id))
164 )
165 count = result.scalar_one()
166 elapsed = time.perf_counter() - t0
167
168 assert count == 10_000
169 assert elapsed < 0.03, f"Count query too slow: {elapsed*1000:.1f}ms"
170
171 def test_P04_language_set_200_members_under_2ms(self) -> None:
172 """_cl_language_set on a 200-member blob completes in < 2ms."""
173 mj = _make_members_json(200)
174 t0 = time.perf_counter()
175 langs = _cl_language_set(mj)
176 elapsed = time.perf_counter() - t0
177
178 assert langs == ["Python"]
179 assert elapsed < 0.002, f"_cl_language_set too slow: {elapsed*1000:.2f}ms"
180
181 def test_P05_file_count_200_members_under_2ms(self) -> None:
182 """_cl_file_count on a 200-member blob completes in < 2ms."""
183 mj = _make_members_json(200, files=10)
184 t0 = time.perf_counter()
185 fc = _cl_file_count(mj)
186 elapsed = time.perf_counter() - t0
187
188 assert fc == 10
189 assert elapsed < 0.002, f"_cl_file_count too slow: {elapsed*1000:.2f}ms"
190
191 @pytest.mark.asyncio
192 async def test_P06_template_render_20_clusters_under_100ms(
193 self, db_session: AsyncSession, repo_500: MusehubRepo
194 ) -> None:
195 """Jinja2 render of the clones list page (20 clusters) completes in < 100ms."""
196 from musehub.api.routes.musehub._templates import templates
197 from musehub.api.routes.musehub.ui_intel import (
198 _cl_tier_class,
199 _cl_language_set,
200 _cl_file_count,
201 _cl_is_cross_file,
202 )
203
204 result = await db_session.execute(
205 sa.select(MusehubIntelClones)
206 .where(MusehubIntelClones.repo_id == str(repo_500.repo_id))
207 .order_by(sa.desc(MusehubIntelClones.member_count))
208 .limit(20)
209 )
210 rows = result.scalars().all()
211 clusters = [
212 {
213 "cluster_hash": r.cluster_hash,
214 "tier": r.tier,
215 "tier_class": _cl_tier_class(r.tier),
216 "member_count": r.member_count,
217 "languages": _cl_language_set(r.members_json),
218 "file_count": _cl_file_count(r.members_json),
219 "is_cross_file": _cl_is_cross_file(r.members_json),
220 }
221 for r in rows
222 ]
223
224 tpl = templates.env.get_template("musehub/pages/intel_clones.html")
225 ctx = {
226 "request": None,
227 "repo": repo_500,
228 "base_url": "/perfuser/perf-500",
229 "clusters": clusters,
230 "tier_filter": "",
231 "top_filter": 20,
232 "valid_tops": (20, 50, 100),
233 "exact_count": 300,
234 "near_count": 200,
235 "total_count": 500,
236 "total_symbols": 1000,
237 "file_hotspots": [],
238 }
239
240 t0 = time.perf_counter()
241 html = tpl.render(**ctx)
242 elapsed = time.perf_counter() - t0
243
244 assert "cl-row" in html
245 assert elapsed < 0.1, f"Template render too slow: {elapsed*1000:.1f}ms"
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 22 days ago