gabriel / musehub public
test_repo_card_enrichment_integration.py python
391 lines 15.3 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 20 days ago
1 """
2 Tier 2 — Integration tests for enrich_repo_cards() against a real test database.
3
4 These tests exercise the full service call — SQL queries run against the test
5 postgres instance populated via factory helpers and direct ORM inserts.
6
7 Test IDs
8 --------
9 T200 — repo with commits on every day of the 30-day window gets correct daily counts
10 T201 — autonomy_pct is 100 when all commits carry a non-empty agent_id
11 T202 — autonomy_pct is 0 when no commits carry an agent_id
12 T203 — autonomy_pct is rounded correctly for a mixed repo (e.g. 3/4 = 75%)
13 T204 — hottest_symbol matches the symbol with the highest churn_30d
14 T205 — blast_leader matches the symbol with the highest blast score
15 T206 — dead_count counts only high-confidence dead symbols (medium/low excluded)
16 T207 — health_status is 'risk' when breakage_meta has error_count > 0
17 T208 — health_status is 'warn' when dead_count > 0, error_count == 0
18 T209 — health_status is 'clean' when no dead symbols and no breakage meta row
19 T210 — enrich_repo_cards batches two repos correctly in a single call
20 T211 — repos with no intel rows return safe zero-value enrichment (no crash)
21 T212 — pulse_buckets always has exactly 30 entries regardless of commit pattern
22 T213 — commits older than 30 days do not appear in pulse_buckets
23 T214 — hottest_symbol is None when symbol_intel has no rows for the repo
24 T215 — blast_leader is None when all blast scores are zero
25 """
26 from __future__ import annotations
27
28 import secrets
29 from datetime import datetime, timedelta, timezone
30
31 import pytest
32 import pytest_asyncio
33 from sqlalchemy.ext.asyncio import AsyncSession
34
35 from musehub.db.musehub_intel_models import MusehubIntelBreakageMeta, MusehubIntelDead, MusehubSymbolIntel
36 from musehub.db.musehub_repo_models import MusehubCommit
37 from musehub.services.repo_card_enrichment import (
38 _PULSE_DAYS,
39 enrich_repo_cards,
40 )
41 from tests.factories import create_commit, create_repo
42
43 # ---------------------------------------------------------------------------
44 # Helpers
45 # ---------------------------------------------------------------------------
46
47 def _utc_now() -> datetime:
48 return datetime.now(tz=timezone.utc)
49
50
51 def _days_ago(n: int) -> datetime:
52 return _utc_now() - timedelta(days=n)
53
54
55 def _commit_id() -> str:
56 return f"sha256:{secrets.token_hex(32)}"
57
58
59 async def _insert_symbol_intel(
60 session: AsyncSession,
61 repo_id: str,
62 address: str,
63 churn_30d: int = 0,
64 blast: int = 0,
65 ) -> MusehubSymbolIntel:
66 """Insert a MusehubSymbolIntel row and commit."""
67 row = MusehubSymbolIntel(
68 repo_id=repo_id,
69 address=address,
70 churn_30d=churn_30d,
71 blast=blast,
72 )
73 session.add(row)
74 await session.commit()
75 return row
76
77
78 async def _insert_dead(
79 session: AsyncSession,
80 repo_id: str,
81 address: str,
82 confidence: str = "high",
83 ) -> MusehubIntelDead:
84 """Insert a MusehubIntelDead row and commit."""
85 row = MusehubIntelDead(
86 repo_id=repo_id,
87 address=address,
88 kind="function",
89 confidence=confidence,
90 ref="main",
91 )
92 session.add(row)
93 await session.commit()
94 return row
95
96
97 async def _insert_breakage_meta(
98 session: AsyncSession,
99 repo_id: str,
100 error_count: int = 0,
101 warning_count: int = 0,
102 ) -> MusehubIntelBreakageMeta:
103 """Insert a MusehubIntelBreakageMeta row and commit."""
104 row = MusehubIntelBreakageMeta(
105 repo_id=repo_id,
106 total_issues=error_count + warning_count,
107 error_count=error_count,
108 warning_count=warning_count,
109 file_count=1,
110 ref="main",
111 )
112 session.add(row)
113 await session.commit()
114 return row
115
116
117 async def _add_agent_commit(
118 session: AsyncSession,
119 repo_id: str,
120 timestamp: datetime | None = None,
121 agent_id: str = "claude-code",
122 ) -> MusehubCommit:
123 """Create a commit with a non-empty agent_id (agent commit)."""
124 commit = await create_commit(session, repo_id, timestamp=timestamp or _utc_now())
125 # MusehubCommit.agent_id is not in CommitFactory; set it directly via update
126 from sqlalchemy import text
127 await session.execute(
128 text("UPDATE musehub_commits SET agent_id = :aid WHERE commit_id = :cid"),
129 {"aid": agent_id, "cid": commit.commit_id},
130 )
131 await session.commit()
132 return commit
133
134
135 async def _add_human_commit(
136 session: AsyncSession,
137 repo_id: str,
138 timestamp: datetime | None = None,
139 ) -> MusehubCommit:
140 """Create a commit with an empty agent_id (human commit)."""
141 return await create_commit(session, repo_id, timestamp=timestamp or _utc_now())
142
143
144 # ---------------------------------------------------------------------------
145 # T200: correct daily pulse counts
146 # ---------------------------------------------------------------------------
147
148 @pytest.mark.asyncio
149 async def test_t200_pulse_correct_daily_counts(db_session: AsyncSession) -> None:
150 """T200: commits on known days produce the correct count in pulse_buckets."""
151 repo = await create_repo(db_session, visibility="public")
152 today = _utc_now().replace(hour=12, minute=0, second=0, microsecond=0)
153
154 # 3 commits today, 2 commits yesterday
155 for _ in range(3):
156 await create_commit(db_session, repo.repo_id, timestamp=today)
157 for _ in range(2):
158 await create_commit(db_session, repo.repo_id, timestamp=today - timedelta(days=1))
159
160 result = await enrich_repo_cards(db_session, [repo.repo_id])
161 enc = result[repo.repo_id]
162
163 today_bucket = next(b for b in enc.pulse_buckets if b.date == today.date().isoformat())
164 yesterday_bucket = next(
165 b for b in enc.pulse_buckets
166 if b.date == (today - timedelta(days=1)).date().isoformat()
167 )
168
169 assert today_bucket.count == 3
170 assert yesterday_bucket.count == 2
171
172
173 # ---------------------------------------------------------------------------
174 # T201–T203: autonomy_pct
175 # ---------------------------------------------------------------------------
176
177 @pytest.mark.asyncio
178 async def test_t201_autonomy_pct_100_all_agent(db_session: AsyncSession) -> None:
179 """T201: autonomy_pct is 100 when every commit has a non-empty agent_id."""
180 repo = await create_repo(db_session, visibility="public")
181 for _ in range(4):
182 await _add_agent_commit(db_session, repo.repo_id)
183
184 result = await enrich_repo_cards(db_session, [repo.repo_id])
185 assert result[repo.repo_id].autonomy_pct == 100
186
187
188 @pytest.mark.asyncio
189 async def test_t202_autonomy_pct_0_all_human(db_session: AsyncSession) -> None:
190 """T202: autonomy_pct is 0 when no commits have an agent_id set."""
191 repo = await create_repo(db_session, visibility="public")
192 for _ in range(3):
193 await _add_human_commit(db_session, repo.repo_id)
194
195 result = await enrich_repo_cards(db_session, [repo.repo_id])
196 assert result[repo.repo_id].autonomy_pct == 0
197
198
199 @pytest.mark.asyncio
200 async def test_t203_autonomy_pct_mixed(db_session: AsyncSession) -> None:
201 """T203: autonomy_pct rounds correctly for a 3-agent / 1-human repo (75%)."""
202 repo = await create_repo(db_session, visibility="public")
203 for _ in range(3):
204 await _add_agent_commit(db_session, repo.repo_id)
205 await _add_human_commit(db_session, repo.repo_id)
206
207 result = await enrich_repo_cards(db_session, [repo.repo_id])
208 assert result[repo.repo_id].autonomy_pct == 75
209
210
211 # ---------------------------------------------------------------------------
212 # T204–T205: hottest_symbol and blast_leader
213 # ---------------------------------------------------------------------------
214
215 @pytest.mark.asyncio
216 async def test_t204_hottest_symbol_highest_churn(db_session: AsyncSession) -> None:
217 """T204: hottest_symbol is the symbol with the highest churn_30d."""
218 repo = await create_repo(db_session, visibility="public")
219 await _insert_symbol_intel(db_session, repo.repo_id, "src/a.py::slow_fn", churn_30d=2)
220 await _insert_symbol_intel(db_session, repo.repo_id, "src/b.py::hot_fn", churn_30d=9)
221 await _insert_symbol_intel(db_session, repo.repo_id, "src/c.py::mid_fn", churn_30d=5)
222
223 result = await enrich_repo_cards(db_session, [repo.repo_id])
224 enc = result[repo.repo_id]
225
226 assert enc.hottest_symbol is not None
227 assert enc.hottest_symbol.address == "src/b.py::hot_fn"
228 assert enc.hottest_symbol.churn_30d == 9
229
230
231 @pytest.mark.asyncio
232 async def test_t205_blast_leader_highest_blast(db_session: AsyncSession) -> None:
233 """T205: blast_leader is the symbol with the highest blast score."""
234 repo = await create_repo(db_session, visibility="public")
235 await _insert_symbol_intel(db_session, repo.repo_id, "src/a.py::small", blast=10)
236 await _insert_symbol_intel(db_session, repo.repo_id, "src/b.py::large", blast=847)
237
238 result = await enrich_repo_cards(db_session, [repo.repo_id])
239 enc = result[repo.repo_id]
240
241 assert enc.blast_leader is not None
242 assert enc.blast_leader.address == "src/b.py::large"
243 assert enc.blast_leader.blast == 847
244
245
246 # ---------------------------------------------------------------------------
247 # T206: dead_count confidence filtering
248 # ---------------------------------------------------------------------------
249
250 @pytest.mark.asyncio
251 async def test_t206_dead_count_only_high_confidence(db_session: AsyncSession) -> None:
252 """T206: dead_count excludes medium and low confidence dead symbols."""
253 repo = await create_repo(db_session, visibility="public")
254 await _insert_dead(db_session, repo.repo_id, "src/a.py::fn_high", confidence="high")
255 await _insert_dead(db_session, repo.repo_id, "src/b.py::fn_medium", confidence="medium")
256 await _insert_dead(db_session, repo.repo_id, "src/c.py::fn_low", confidence="low")
257
258 result = await enrich_repo_cards(db_session, [repo.repo_id])
259 assert result[repo.repo_id].dead_count == 1
260
261
262 # ---------------------------------------------------------------------------
263 # T207–T209: health_status via breakage + dead data
264 # ---------------------------------------------------------------------------
265
266 @pytest.mark.asyncio
267 async def test_t207_health_risk_when_breakage_errors(db_session: AsyncSession) -> None:
268 """T207: health_status is 'risk' when breakage_meta has error_count > 0."""
269 repo = await create_repo(db_session, visibility="public")
270 await _insert_breakage_meta(db_session, repo.repo_id, error_count=2, warning_count=1)
271
272 result = await enrich_repo_cards(db_session, [repo.repo_id])
273 assert result[repo.repo_id].health_status == "risk"
274
275
276 @pytest.mark.asyncio
277 async def test_t208_health_warn_when_dead_no_errors(db_session: AsyncSession) -> None:
278 """T208: health_status is 'warn' when dead symbols exist but no errors."""
279 repo = await create_repo(db_session, visibility="public")
280 await _insert_dead(db_session, repo.repo_id, "src/a.py::old_fn", confidence="high")
281
282 result = await enrich_repo_cards(db_session, [repo.repo_id])
283 assert result[repo.repo_id].health_status == "warn"
284
285
286 @pytest.mark.asyncio
287 async def test_t209_health_clean_no_data(db_session: AsyncSession) -> None:
288 """T209: health_status is 'clean' when intel tables have no rows for repo."""
289 repo = await create_repo(db_session, visibility="public")
290
291 result = await enrich_repo_cards(db_session, [repo.repo_id])
292 assert result[repo.repo_id].health_status == "clean"
293
294
295 # ---------------------------------------------------------------------------
296 # T210: batching multiple repos
297 # ---------------------------------------------------------------------------
298
299 @pytest.mark.asyncio
300 async def test_t210_batches_multiple_repos(db_session: AsyncSession) -> None:
301 """T210: enrich_repo_cards correctly enriches two repos in one call."""
302 repo_a = await create_repo(db_session, visibility="public")
303 repo_b = await create_repo(db_session, visibility="public")
304
305 await _add_agent_commit(db_session, repo_a.repo_id)
306 await _insert_dead(db_session, repo_b.repo_id, "src/b.py::fn", confidence="high")
307
308 result = await enrich_repo_cards(db_session, [repo_a.repo_id, repo_b.repo_id])
309
310 assert result[repo_a.repo_id].autonomy_pct == 100
311 assert result[repo_b.repo_id].dead_count == 1
312 # cross-repo isolation
313 assert result[repo_a.repo_id].dead_count == 0
314 assert result[repo_b.repo_id].autonomy_pct == 0
315
316
317 # ---------------------------------------------------------------------------
318 # T211: safe zero-value for repos with no intel data
319 # ---------------------------------------------------------------------------
320
321 @pytest.mark.asyncio
322 async def test_t211_safe_zero_value_no_intel(db_session: AsyncSession) -> None:
323 """T211: a repo with no intel rows returns a zero-value enrichment without crashing."""
324 repo = await create_repo(db_session, visibility="public")
325
326 result = await enrich_repo_cards(db_session, [repo.repo_id])
327 enc = result[repo.repo_id]
328
329 assert enc.autonomy_pct == 0
330 assert enc.hottest_symbol is None
331 assert enc.blast_leader is None
332 assert enc.dead_count == 0
333 assert enc.error_count == 0
334 assert enc.warning_count == 0
335 assert len(enc.pulse_buckets) == _PULSE_DAYS
336
337
338 # ---------------------------------------------------------------------------
339 # T212: pulse always 30 buckets
340 # ---------------------------------------------------------------------------
341
342 @pytest.mark.asyncio
343 async def test_t212_pulse_always_30_buckets(db_session: AsyncSession) -> None:
344 """T212: pulse_buckets always has exactly 30 entries regardless of commit pattern."""
345 repo = await create_repo(db_session, visibility="public")
346 # Scatter commits across random days in the window
347 for n in [0, 5, 10, 15, 20, 25]:
348 await create_commit(db_session, repo.repo_id, timestamp=_days_ago(n))
349
350 result = await enrich_repo_cards(db_session, [repo.repo_id])
351 assert len(result[repo.repo_id].pulse_buckets) == _PULSE_DAYS
352
353
354 # ---------------------------------------------------------------------------
355 # T213: old commits excluded from pulse
356 # ---------------------------------------------------------------------------
357
358 @pytest.mark.asyncio
359 async def test_t213_commits_older_than_30d_excluded_from_pulse(db_session: AsyncSession) -> None:
360 """T213: commits older than 30 days do not appear in pulse_buckets."""
361 repo = await create_repo(db_session, visibility="public")
362 await create_commit(db_session, repo.repo_id, timestamp=_days_ago(31))
363 await create_commit(db_session, repo.repo_id, timestamp=_days_ago(60))
364
365 result = await enrich_repo_cards(db_session, [repo.repo_id])
366 enc = result[repo.repo_id]
367 total_counted = sum(b.count for b in enc.pulse_buckets)
368 assert total_counted == 0
369
370
371 # ---------------------------------------------------------------------------
372 # T214–T215: None when no qualifying intel rows
373 # ---------------------------------------------------------------------------
374
375 @pytest.mark.asyncio
376 async def test_t214_hottest_symbol_none_when_no_symbol_intel(db_session: AsyncSession) -> None:
377 """T214: hottest_symbol is None when musehub_symbol_intel has no rows for repo."""
378 repo = await create_repo(db_session, visibility="public")
379
380 result = await enrich_repo_cards(db_session, [repo.repo_id])
381 assert result[repo.repo_id].hottest_symbol is None
382
383
384 @pytest.mark.asyncio
385 async def test_t215_blast_leader_none_when_all_blast_zero(db_session: AsyncSession) -> None:
386 """T215: blast_leader is None when all blast scores are zero."""
387 repo = await create_repo(db_session, visibility="public")
388 await _insert_symbol_intel(db_session, repo.repo_id, "src/a.py::fn", churn_30d=5, blast=0)
389
390 result = await enrich_repo_cards(db_session, [repo.repo_id])
391 assert result[repo.repo_id].blast_leader is None
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago