tests/test_repo_card_stress.py · gabriel/musehub

test_repo_card_stress.py python

202 lines 7.5 KB

sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 20 days ago

1	"""
2	Tier 4 — Stress tests for enrich_repo_cards() under load.
3
4	All tests run against the test database (not mocks) to catch real query
5	behaviour: N+1 regressions, batch overflows, and degenerate data patterns
6	that would silently misbehave in production.
7
8	Test IDs
9	--------
10	T400 — enriching 50 repos issues exactly 5 SQL queries (no N+1)
11	T401 — enriching 100 repos completes in < 2 s (performance floor)
12	T402 — repos with 1000 commits each produce correct pulse buckets
13	T403 — 100 symbols per repo returns the correct hottest without full-scan
14	T404 — mixed batch: some repos with data, some without — no cross-contamination
15	T405 — passing duplicate repo_ids is idempotent (no doubled rows)
16	"""
17	from __future__ import annotations
18
19	import time
20	from datetime import datetime, timedelta, timezone
21
22	import typing
23
24	import pytest
25	from sqlalchemy import Executable
26	from sqlalchemy.engine import CursorResult
27	from sqlalchemy.ext.asyncio import AsyncSession
28
29	from musehub.services.repo_card_enrichment import (
30	_PULSE_DAYS,
31	enrich_repo_cards,
32	)
33	from tests.factories import create_commit, create_repo
34
35
36	def _utc_now() -> datetime:
37	return datetime.now(tz=timezone.utc)
38
39
40	def _days_ago(n: int) -> datetime:
41	return _utc_now() - timedelta(days=n)
42
43
44	# ---------------------------------------------------------------------------
45	# T400 — no N+1: 5 queries regardless of batch size
46	# ---------------------------------------------------------------------------
47
48	@pytest.mark.asyncio
49	async def test_t400_no_n_plus_one_queries(db_session: AsyncSession) -> None:
50	"""T400: enriching 50 repos uses at most 6 queries (5 signal + 1 init)."""
51	repos = [await create_repo(db_session, visibility="public") for _ in range(50)]
52	repo_ids = [r.repo_id for r in repos]
53
54	query_count = 0
55	original_execute = db_session.execute
56
57	async def counting_execute(stmt: Executable, args: typing.Any, *kwargs: typing.Any) -> CursorResult[typing.Any]:
58	nonlocal query_count
59	query_count += 1
60	return await original_execute(stmt, args, *kwargs)
61
62	db_session.execute = counting_execute # type: ignore[method-assign]
63	await enrich_repo_cards(db_session, repo_ids)
64	db_session.execute = original_execute # type: ignore[method-assign]
65
66	# 5 signal queries (pulse, autonomy, hottest, blast, dead+breakage).
67	# Some implementations may split dead/breakage — allow up to 7.
68	assert query_count <= 7, f"Expected ≤7 queries, got {query_count}"
69
70
71	# ---------------------------------------------------------------------------
72	# T401 — 100-repo batch completes in < 2 s
73	# ---------------------------------------------------------------------------
74
75	@pytest.mark.asyncio
76	async def test_t401_hundred_repos_under_two_seconds(db_session: AsyncSession) -> None:
77	"""T401: enrich_repo_cards with 100 repos finishes in under 2 seconds."""
78	repos = [await create_repo(db_session, visibility="public") for _ in range(100)]
79	repo_ids = [r.repo_id for r in repos]
80
81	t0 = time.monotonic()
82	await enrich_repo_cards(db_session, repo_ids)
83	elapsed = time.monotonic() - t0
84
85	assert elapsed < 2.0, f"Enrichment took {elapsed:.2f}s — expected < 2s"
86
87
88	# ---------------------------------------------------------------------------
89	# T402 — 1000 commits produce valid 30-day pulse
90	# ---------------------------------------------------------------------------
91
92	@pytest.mark.asyncio
93	async def test_t402_high_volume_commits_correct_pulse(db_session: AsyncSession) -> None:
94	"""T402: a repo with 1000 commits in the window yields valid 30-bucket pulse."""
95	repo = await create_repo(db_session, visibility="public")
96
97	# Spread 1000 commits across the 30-day window
98	for i in range(1000):
99	day_offset = i % _PULSE_DAYS
100	await create_commit(db_session, repo.repo_id, timestamp=_days_ago(day_offset))
101
102	result = await enrich_repo_cards(db_session, [repo.repo_id])
103	enc = result[repo.repo_id]
104
105	assert len(enc.pulse_buckets) == _PULSE_DAYS
106	total_counted = sum(b.count for b in enc.pulse_buckets)
107	assert total_counted == 1000
108	# Busiest bucket is normalised to h=24
109	max_h = max(b.h for b in enc.pulse_buckets)
110	assert max_h == 24
111
112
113	# ---------------------------------------------------------------------------
114	# T403 — 100 symbols: hottest is still the correct one
115	# ---------------------------------------------------------------------------
116
117	@pytest.mark.asyncio
118	async def test_t403_hundred_symbols_hottest_correct(db_session: AsyncSession) -> None:
119	"""T403: with 100 symbols the hottest is reliably the one with max churn_30d."""
120	from musehub.db.musehub_intel_models import MusehubSymbolIntel
121
122	repo = await create_repo(db_session, visibility="public")
123
124	for i in range(99):
125	db_session.add(MusehubSymbolIntel(
126	repo_id=repo.repo_id,
127	address=f"src/mod_{i}.py::fn_{i}",
128	churn_30d=i,
129	blast=0,
130	))
131	# The winner: churn_30d = 9999
132	db_session.add(MusehubSymbolIntel(
133	repo_id=repo.repo_id,
134	address="src/winner.py::hottest_fn",
135	churn_30d=9999,
136	blast=0,
137	))
138	await db_session.commit()
139
140	result = await enrich_repo_cards(db_session, [repo.repo_id])
141	enc = result[repo.repo_id]
142
143	assert enc.hottest_symbol is not None
144	assert enc.hottest_symbol.address == "src/winner.py::hottest_fn"
145	assert enc.hottest_symbol.churn_30d == 9999
146
147
148	# ---------------------------------------------------------------------------
149	# T404 — mixed batch: data isolation
150	# ---------------------------------------------------------------------------
151
152	@pytest.mark.asyncio
153	async def test_t404_mixed_batch_no_cross_contamination(db_session: AsyncSession) -> None:
154	"""T404: 25 repos with data + 25 without — no signal leaks between repos."""
155	from musehub.db.musehub_intel_models import MusehubSymbolIntel
156
157	repos_with = [await create_repo(db_session, visibility="public") for _ in range(25)]
158	repos_without = [await create_repo(db_session, visibility="public") for _ in range(25)]
159
160	for repo in repos_with:
161	db_session.add(MusehubSymbolIntel(
162	repo_id=repo.repo_id,
163	address="src/a.py::fn",
164	churn_30d=10,
165	blast=5,
166	))
167	await db_session.commit()
168
169	all_ids = [r.repo_id for r in repos_with + repos_without]
170	result = await enrich_repo_cards(db_session, all_ids)
171
172	for repo in repos_with:
173	assert result[repo.repo_id].hottest_symbol is not None
174
175	for repo in repos_without:
176	enc = result[repo.repo_id]
177	assert enc.hottest_symbol is None
178	assert enc.blast_leader is None
179	assert enc.dead_count == 0
180	assert enc.autonomy_pct == 0
181
182
183	# ---------------------------------------------------------------------------
184	# T405 — duplicate repo_ids are idempotent
185	# ---------------------------------------------------------------------------
186
187	@pytest.mark.asyncio
188	async def test_t405_duplicate_repo_ids_idempotent(db_session: AsyncSession) -> None:
189	"""T405: passing the same repo_id twice yields exactly one result entry."""
190	repo = await create_repo(db_session, visibility="public")
191	await create_commit(db_session, repo.repo_id, timestamp=_utc_now())
192
193	result = await enrich_repo_cards(
194	db_session, [repo.repo_id, repo.repo_id, repo.repo_id]
195	)
196
197	# Only one entry regardless of duplicates in input
198	assert len(result) == 1
199	assert repo.repo_id in result
200	# Pulse should not double-count due to deduplication
201	total_counted = sum(b.count for b in result[repo.repo_id].pulse_buckets)
202	assert total_counted == 1

File History 1 commit

sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ 20 days ago

patch test_repo_card_stress.py 20 days ago

insert CursorResult 20 days ago

insert Executable 20 days ago

insert typing 20 days ago

replace test_t400_no_n_plus_one_queries 20 days ago

Pathtests/test_repo_card_stress.py

Lines202

Size7.5 KB

LangPython

Refsha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2

Object ID

sha256:724cd1e49d99db11c02d71c49e62c2c9eba026376f1f994295845fc7dfdb9d8f…

Last commit

sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595

fix: typing audit — 0 violations, 0 untyped defs …

20 days ago

Quick links

Blame History