test_repo_card_stress.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
20 days ago
| 1 | """ |
| 2 | Tier 4 — Stress tests for enrich_repo_cards() under load. |
| 3 | |
| 4 | All tests run against the test database (not mocks) to catch real query |
| 5 | behaviour: N+1 regressions, batch overflows, and degenerate data patterns |
| 6 | that would silently misbehave in production. |
| 7 | |
| 8 | Test IDs |
| 9 | -------- |
| 10 | T400 — enriching 50 repos issues exactly 5 SQL queries (no N+1) |
| 11 | T401 — enriching 100 repos completes in < 2 s (performance floor) |
| 12 | T402 — repos with 1000 commits each produce correct pulse buckets |
| 13 | T403 — 100 symbols per repo returns the correct hottest without full-scan |
| 14 | T404 — mixed batch: some repos with data, some without — no cross-contamination |
| 15 | T405 — passing duplicate repo_ids is idempotent (no doubled rows) |
| 16 | """ |
| 17 | from __future__ import annotations |
| 18 | |
| 19 | import time |
| 20 | from datetime import datetime, timedelta, timezone |
| 21 | |
| 22 | import typing |
| 23 | |
| 24 | import pytest |
| 25 | from sqlalchemy import Executable |
| 26 | from sqlalchemy.engine import CursorResult |
| 27 | from sqlalchemy.ext.asyncio import AsyncSession |
| 28 | |
| 29 | from musehub.services.repo_card_enrichment import ( |
| 30 | _PULSE_DAYS, |
| 31 | enrich_repo_cards, |
| 32 | ) |
| 33 | from tests.factories import create_commit, create_repo |
| 34 | |
| 35 | |
| 36 | def _utc_now() -> datetime: |
| 37 | return datetime.now(tz=timezone.utc) |
| 38 | |
| 39 | |
| 40 | def _days_ago(n: int) -> datetime: |
| 41 | return _utc_now() - timedelta(days=n) |
| 42 | |
| 43 | |
| 44 | # --------------------------------------------------------------------------- |
| 45 | # T400 — no N+1: 5 queries regardless of batch size |
| 46 | # --------------------------------------------------------------------------- |
| 47 | |
| 48 | @pytest.mark.asyncio |
| 49 | async def test_t400_no_n_plus_one_queries(db_session: AsyncSession) -> None: |
| 50 | """T400: enriching 50 repos uses at most 6 queries (5 signal + 1 init).""" |
| 51 | repos = [await create_repo(db_session, visibility="public") for _ in range(50)] |
| 52 | repo_ids = [r.repo_id for r in repos] |
| 53 | |
| 54 | query_count = 0 |
| 55 | original_execute = db_session.execute |
| 56 | |
| 57 | async def counting_execute(stmt: Executable, *args: typing.Any, **kwargs: typing.Any) -> CursorResult[typing.Any]: |
| 58 | nonlocal query_count |
| 59 | query_count += 1 |
| 60 | return await original_execute(stmt, *args, **kwargs) |
| 61 | |
| 62 | db_session.execute = counting_execute # type: ignore[method-assign] |
| 63 | await enrich_repo_cards(db_session, repo_ids) |
| 64 | db_session.execute = original_execute # type: ignore[method-assign] |
| 65 | |
| 66 | # 5 signal queries (pulse, autonomy, hottest, blast, dead+breakage). |
| 67 | # Some implementations may split dead/breakage — allow up to 7. |
| 68 | assert query_count <= 7, f"Expected ≤7 queries, got {query_count}" |
| 69 | |
| 70 | |
| 71 | # --------------------------------------------------------------------------- |
| 72 | # T401 — 100-repo batch completes in < 2 s |
| 73 | # --------------------------------------------------------------------------- |
| 74 | |
| 75 | @pytest.mark.asyncio |
| 76 | async def test_t401_hundred_repos_under_two_seconds(db_session: AsyncSession) -> None: |
| 77 | """T401: enrich_repo_cards with 100 repos finishes in under 2 seconds.""" |
| 78 | repos = [await create_repo(db_session, visibility="public") for _ in range(100)] |
| 79 | repo_ids = [r.repo_id for r in repos] |
| 80 | |
| 81 | t0 = time.monotonic() |
| 82 | await enrich_repo_cards(db_session, repo_ids) |
| 83 | elapsed = time.monotonic() - t0 |
| 84 | |
| 85 | assert elapsed < 2.0, f"Enrichment took {elapsed:.2f}s — expected < 2s" |
| 86 | |
| 87 | |
| 88 | # --------------------------------------------------------------------------- |
| 89 | # T402 — 1000 commits produce valid 30-day pulse |
| 90 | # --------------------------------------------------------------------------- |
| 91 | |
| 92 | @pytest.mark.asyncio |
| 93 | async def test_t402_high_volume_commits_correct_pulse(db_session: AsyncSession) -> None: |
| 94 | """T402: a repo with 1000 commits in the window yields valid 30-bucket pulse.""" |
| 95 | repo = await create_repo(db_session, visibility="public") |
| 96 | |
| 97 | # Spread 1000 commits across the 30-day window |
| 98 | for i in range(1000): |
| 99 | day_offset = i % _PULSE_DAYS |
| 100 | await create_commit(db_session, repo.repo_id, timestamp=_days_ago(day_offset)) |
| 101 | |
| 102 | result = await enrich_repo_cards(db_session, [repo.repo_id]) |
| 103 | enc = result[repo.repo_id] |
| 104 | |
| 105 | assert len(enc.pulse_buckets) == _PULSE_DAYS |
| 106 | total_counted = sum(b.count for b in enc.pulse_buckets) |
| 107 | assert total_counted == 1000 |
| 108 | # Busiest bucket is normalised to h=24 |
| 109 | max_h = max(b.h for b in enc.pulse_buckets) |
| 110 | assert max_h == 24 |
| 111 | |
| 112 | |
| 113 | # --------------------------------------------------------------------------- |
| 114 | # T403 — 100 symbols: hottest is still the correct one |
| 115 | # --------------------------------------------------------------------------- |
| 116 | |
| 117 | @pytest.mark.asyncio |
| 118 | async def test_t403_hundred_symbols_hottest_correct(db_session: AsyncSession) -> None: |
| 119 | """T403: with 100 symbols the hottest is reliably the one with max churn_30d.""" |
| 120 | from musehub.db.musehub_intel_models import MusehubSymbolIntel |
| 121 | |
| 122 | repo = await create_repo(db_session, visibility="public") |
| 123 | |
| 124 | for i in range(99): |
| 125 | db_session.add(MusehubSymbolIntel( |
| 126 | repo_id=repo.repo_id, |
| 127 | address=f"src/mod_{i}.py::fn_{i}", |
| 128 | churn_30d=i, |
| 129 | blast=0, |
| 130 | )) |
| 131 | # The winner: churn_30d = 9999 |
| 132 | db_session.add(MusehubSymbolIntel( |
| 133 | repo_id=repo.repo_id, |
| 134 | address="src/winner.py::hottest_fn", |
| 135 | churn_30d=9999, |
| 136 | blast=0, |
| 137 | )) |
| 138 | await db_session.commit() |
| 139 | |
| 140 | result = await enrich_repo_cards(db_session, [repo.repo_id]) |
| 141 | enc = result[repo.repo_id] |
| 142 | |
| 143 | assert enc.hottest_symbol is not None |
| 144 | assert enc.hottest_symbol.address == "src/winner.py::hottest_fn" |
| 145 | assert enc.hottest_symbol.churn_30d == 9999 |
| 146 | |
| 147 | |
| 148 | # --------------------------------------------------------------------------- |
| 149 | # T404 — mixed batch: data isolation |
| 150 | # --------------------------------------------------------------------------- |
| 151 | |
| 152 | @pytest.mark.asyncio |
| 153 | async def test_t404_mixed_batch_no_cross_contamination(db_session: AsyncSession) -> None: |
| 154 | """T404: 25 repos with data + 25 without — no signal leaks between repos.""" |
| 155 | from musehub.db.musehub_intel_models import MusehubSymbolIntel |
| 156 | |
| 157 | repos_with = [await create_repo(db_session, visibility="public") for _ in range(25)] |
| 158 | repos_without = [await create_repo(db_session, visibility="public") for _ in range(25)] |
| 159 | |
| 160 | for repo in repos_with: |
| 161 | db_session.add(MusehubSymbolIntel( |
| 162 | repo_id=repo.repo_id, |
| 163 | address="src/a.py::fn", |
| 164 | churn_30d=10, |
| 165 | blast=5, |
| 166 | )) |
| 167 | await db_session.commit() |
| 168 | |
| 169 | all_ids = [r.repo_id for r in repos_with + repos_without] |
| 170 | result = await enrich_repo_cards(db_session, all_ids) |
| 171 | |
| 172 | for repo in repos_with: |
| 173 | assert result[repo.repo_id].hottest_symbol is not None |
| 174 | |
| 175 | for repo in repos_without: |
| 176 | enc = result[repo.repo_id] |
| 177 | assert enc.hottest_symbol is None |
| 178 | assert enc.blast_leader is None |
| 179 | assert enc.dead_count == 0 |
| 180 | assert enc.autonomy_pct == 0 |
| 181 | |
| 182 | |
| 183 | # --------------------------------------------------------------------------- |
| 184 | # T405 — duplicate repo_ids are idempotent |
| 185 | # --------------------------------------------------------------------------- |
| 186 | |
| 187 | @pytest.mark.asyncio |
| 188 | async def test_t405_duplicate_repo_ids_idempotent(db_session: AsyncSession) -> None: |
| 189 | """T405: passing the same repo_id twice yields exactly one result entry.""" |
| 190 | repo = await create_repo(db_session, visibility="public") |
| 191 | await create_commit(db_session, repo.repo_id, timestamp=_utc_now()) |
| 192 | |
| 193 | result = await enrich_repo_cards( |
| 194 | db_session, [repo.repo_id, repo.repo_id, repo.repo_id] |
| 195 | ) |
| 196 | |
| 197 | # Only one entry regardless of duplicates in input |
| 198 | assert len(result) == 1 |
| 199 | assert repo.repo_id in result |
| 200 | # Pulse should not double-count due to deduplication |
| 201 | total_counted = sum(b.count for b in result[repo.repo_id].pulse_buckets) |
| 202 | assert total_counted == 1 |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
20 days ago