gabriel / musehub public
test_coupling_provider.py python
1,022 lines 45.0 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 20 days ago
1 """TDD spec for CouplingProvider — issue #15, Phase 5.
2
3 Verifies that CouplingProvider reproduces the same file co-change analysis
4 as ``muse code coupling``: file derivation from symbol addresses, bare-path
5 handling, mass-commit exclusion, canonical pair ordering, MAX_PAIRS cap,
6 and strict repo isolation.
7
8 Seven test tiers (49 cases)
9 ----------------------------
10 Unit CP_01 – CP_08 file derivation, heat modifier, pair canonicalisation
11 Integration CP_09 – CP_18 provider upserts, re-runs, counts
12 E2E CP_19 – CP_25 full seeded scenarios
13 Performance CP_26 – CP_32 timing bounds
14 State CP_33 – CP_38 idempotency, stale-row purge, incremental updates
15 Security CP_39 – CP_44 injection strings, repo isolation
16 Stress CP_45 – CP_49 MAX_PAIRS cap, mass-commit exclusion, BFS cap
17 """
18 from __future__ import annotations
19
20 import secrets
21 import time
22 from collections import defaultdict
23 from datetime import datetime, timezone
24
25 import pytest
26 import pytest_asyncio
27 import sqlalchemy as sa
28 from sqlalchemy.dialects.postgresql import insert as pg_insert
29 from sqlalchemy.ext.asyncio import AsyncSession
30
31 from muse.core.types import fake_id, long_id
32 from musehub.db.musehub_intel_models import MusehubIntelCoupling, MusehubSymbolHistoryEntry
33 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo
34 from musehub.services.musehub_intel_providers import CouplingProvider
35 from musehub.types.json_types import JSONObject
36 from musehub.api.routes.musehub.ui_intel import _cp_heat, _cp_short
37 from tests.factories import create_repo
38
39
40 # ─────────────────────────────────────────────────────────────────────────────
41 # Helpers
42 # ─────────────────────────────────────────────────────────────────────────────
43
44 def _cid() -> str:
45 return long_id(secrets.token_hex(32))
46
47
48 async def _seed_commit(
49 session: AsyncSession,
50 repo_id: str,
51 commit_id: str,
52 parent_ids: list[str] | None = None,
53 ) -> None:
54 stmt = (
55 pg_insert(MusehubCommit)
56 .values(
57 commit_id=commit_id,
58 message="test commit",
59 author="test",
60 branch="dev",
61 parent_ids=parent_ids or [],
62 snapshot_id=None,
63 timestamp=datetime.now(timezone.utc),
64 )
65 .on_conflict_do_nothing()
66 )
67 await session.execute(stmt)
68 ref_stmt = (
69 pg_insert(MusehubCommitRef)
70 .values(repo_id=repo_id, commit_id=commit_id)
71 .on_conflict_do_nothing()
72 )
73 await session.execute(ref_stmt)
74
75
76 async def _seed_history(
77 session: AsyncSession,
78 repo_id: str,
79 commit_id: str,
80 addresses: list[str],
81 ) -> None:
82 for addr in addresses:
83 stmt = (
84 pg_insert(MusehubSymbolHistoryEntry)
85 .values(
86 repo_id=repo_id,
87 address=addr,
88 commit_id=commit_id,
89 committed_at=datetime.now(timezone.utc),
90 op="modify",
91 )
92 .on_conflict_do_nothing()
93 )
94 await session.execute(stmt)
95
96
97 async def _run(session: AsyncSession, repo_id: str, ref: str) -> list[tuple[str, JSONObject]]:
98 return await CouplingProvider().compute(session, repo_id, ref, {})
99
100
101 async def _fetch(session: AsyncSession, repo_id: str) -> list[MusehubIntelCoupling]:
102 result = await session.execute(
103 sa.select(MusehubIntelCoupling)
104 .where(MusehubIntelCoupling.repo_id == repo_id)
105 .order_by(sa.desc(MusehubIntelCoupling.co_changes))
106 )
107 return list(result.scalars().all())
108
109
110 # ─────────────────────────────────────────────────────────────────────────────
111 # Fixtures
112 # ─────────────────────────────────────────────────────────────────────────────
113
114 @pytest_asyncio.fixture
115 async def repo(db_session: AsyncSession) -> MusehubRepo:
116 return await create_repo(db_session, owner="testuser", slug="couplingprovider")
117
118
119 @pytest_asyncio.fixture
120 async def two_repos(db_session: AsyncSession) -> tuple[MusehubRepo, MusehubRepo]:
121 r1 = await create_repo(db_session, owner="testuser", slug="cp-repo-1")
122 r2 = await create_repo(db_session, owner="testuser", slug="cp-repo-2")
123 return r1, r2
124
125
126 # ─────────────────────────────────────────────────────────────────────────────
127 # Tier 1 — Unit: file derivation, heat modifier, pair canonicalisation
128 # ─────────────────────────────────────────────────────────────────────────────
129
130 class TestCouplingUnit:
131 """Pure-function tests — no database required."""
132
133 def test_CP_01_file_from_symbol_address(self) -> None:
134 """File extracted correctly from symbol address."""
135 addr = "src/billing.py::charge"
136 file = addr.split("::")[0] if "::" in addr else addr
137 assert file == "src/billing.py"
138
139 def test_CP_02_bare_path_is_file(self) -> None:
140 """Bare path (no '::') treated directly as filename."""
141 addr = "cloudflare"
142 file = addr.split("::")[0] if "::" in addr else addr
143 assert file == "cloudflare"
144
145 def test_CP_03_pair_key_canonical_a_lt_b(self) -> None:
146 """Pair key is always (a, b) where a < b lexicographically."""
147 files = ["src/z.py", "src/a.py"]
148 canonical = tuple(sorted(files))
149 assert canonical == ("src/a.py", "src/z.py")
150
151 def test_CP_04_same_file_pair_excluded(self) -> None:
152 """Two symbols from the same file produce no file pair."""
153 addr_a = "src/billing.py::charge"
154 addr_b = "src/billing.py::refund"
155 file_a = addr_a.split("::")[0]
156 file_b = addr_b.split("::")[0]
157 assert file_a == file_b
158
159 def test_CP_05_heat_low(self) -> None:
160 """co_changes < 10 → empty modifier (accent fill)."""
161 assert _cp_heat(1) == ""
162 assert _cp_heat(9) == ""
163
164 def test_CP_06_heat_medium(self) -> None:
165 """co_changes 10–19 → 'medium' modifier (warning fill)."""
166 assert _cp_heat(10) == "medium"
167 assert _cp_heat(19) == "medium"
168
169 def test_CP_07_heat_high(self) -> None:
170 """co_changes >= 20 → 'high' modifier (danger fill)."""
171 assert _cp_heat(20) == "high"
172 assert _cp_heat(99) == "high"
173
174 def test_CP_08_min_co_changes_constant(self) -> None:
175 """_MIN_CO_CHANGES is 2 — pairs below this are noise."""
176 assert CouplingProvider._MIN_CO_CHANGES == 2
177
178
179 # ─────────────────────────────────────────────────────────────────────────────
180 # Tier 2 — Integration: provider upserts, counts, re-runs
181 # ─────────────────────────────────────────────────────────────────────────────
182
183 class TestCouplingIntegration:
184
185 @pytest.mark.asyncio
186 async def test_CP_09_empty_repo_returns_empty(
187 self, db_session: AsyncSession, repo: MusehubRepo
188 ) -> None:
189 """Provider on a repo with no commits returns [] and stores no rows."""
190 result = await _run(db_session, repo.repo_id, _cid())
191 assert result == []
192 assert await _fetch(db_session, repo.repo_id) == []
193
194 @pytest.mark.asyncio
195 async def test_CP_10_no_history_entries_returns_empty(
196 self, db_session: AsyncSession, repo: MusehubRepo
197 ) -> None:
198 """Commits exist but no history entries → no pairs stored."""
199 c1 = _cid()
200 await _seed_commit(db_session, repo.repo_id, c1)
201 await db_session.commit()
202 result = await _run(db_session, repo.repo_id, c1)
203 assert result == []
204
205 @pytest.mark.asyncio
206 async def test_CP_11_single_co_change_below_threshold(
207 self, db_session: AsyncSession, repo: MusehubRepo
208 ) -> None:
209 """One co-change commit → co_changes=1, below _MIN_CO_CHANGES=2, no row."""
210 c1 = _cid()
211 await _seed_commit(db_session, repo.repo_id, c1)
212 await _seed_history(db_session, repo.repo_id, c1,
213 ["src/a.py::fn_a", "src/b.py::fn_b"])
214 await db_session.commit()
215 await _run(db_session, repo.repo_id, c1)
216 assert await _fetch(db_session, repo.repo_id) == []
217
218 @pytest.mark.asyncio
219 async def test_CP_12_two_co_changes_produces_one_pair(
220 self, db_session: AsyncSession, repo: MusehubRepo
221 ) -> None:
222 """Exactly 2 co-change commits → 1 pair with co_changes=2."""
223 c1, c2 = _cid(), _cid()
224 await _seed_commit(db_session, repo.repo_id, c1)
225 await _seed_commit(db_session, repo.repo_id, c2, [c1])
226 for cid in [c1, c2]:
227 await _seed_history(db_session, repo.repo_id, cid,
228 ["src/a.py::fn_a", "src/b.py::fn_b"])
229 await db_session.commit()
230 await _run(db_session, repo.repo_id, c2)
231 pairs = await _fetch(db_session, repo.repo_id)
232 assert len(pairs) == 1
233 assert pairs[0].co_changes == 2
234
235 @pytest.mark.asyncio
236 async def test_CP_13_three_files_produces_three_pairs(
237 self, db_session: AsyncSession, repo: MusehubRepo
238 ) -> None:
239 """Three files in a commit → 3 cross-file pairs (A↔B, A↔C, B↔C)."""
240 c1, c2 = _cid(), _cid()
241 await _seed_commit(db_session, repo.repo_id, c1)
242 await _seed_commit(db_session, repo.repo_id, c2, [c1])
243 for cid in [c1, c2]:
244 await _seed_history(db_session, repo.repo_id, cid, [
245 "src/a.py::fn", "src/b.py::fn", "src/c.py::fn",
246 ])
247 await db_session.commit()
248 await _run(db_session, repo.repo_id, c2)
249 pairs = await _fetch(db_session, repo.repo_id)
250 assert len(pairs) == 3
251
252 @pytest.mark.asyncio
253 async def test_CP_14_same_file_symbols_no_pair(
254 self, db_session: AsyncSession, repo: MusehubRepo
255 ) -> None:
256 """Two symbols from the same file never produce a pair."""
257 c1, c2 = _cid(), _cid()
258 await _seed_commit(db_session, repo.repo_id, c1)
259 await _seed_commit(db_session, repo.repo_id, c2, [c1])
260 for cid in [c1, c2]:
261 await _seed_history(db_session, repo.repo_id, cid, [
262 "src/billing.py::charge", "src/billing.py::refund",
263 ])
264 await db_session.commit()
265 await _run(db_session, repo.repo_id, c2)
266 assert await _fetch(db_session, repo.repo_id) == []
267
268 @pytest.mark.asyncio
269 async def test_CP_15_pair_stored_canonical_a_lt_b(
270 self, db_session: AsyncSession, repo: MusehubRepo
271 ) -> None:
272 """Stored pair always has file_a <= file_b lexicographically."""
273 c1, c2 = _cid(), _cid()
274 await _seed_commit(db_session, repo.repo_id, c1)
275 await _seed_commit(db_session, repo.repo_id, c2, [c1])
276 for cid in [c1, c2]:
277 await _seed_history(db_session, repo.repo_id, cid,
278 ["src/z.py::zfn", "src/a.py::afn"])
279 await db_session.commit()
280 await _run(db_session, repo.repo_id, c2)
281 pairs = await _fetch(db_session, repo.repo_id)
282 assert len(pairs) == 1
283 assert pairs[0].file_a <= pairs[0].file_b
284
285 @pytest.mark.asyncio
286 async def test_CP_16_ref_column_populated(
287 self, db_session: AsyncSession, repo: MusehubRepo
288 ) -> None:
289 """ref column on each row matches the HEAD ref passed to compute()."""
290 c1, c2 = _cid(), _cid()
291 await _seed_commit(db_session, repo.repo_id, c1)
292 await _seed_commit(db_session, repo.repo_id, c2, [c1])
293 for cid in [c1, c2]:
294 await _seed_history(db_session, repo.repo_id, cid,
295 ["src/a.py::fn", "src/b.py::fn"])
296 await db_session.commit()
297 await _run(db_session, repo.repo_id, c2)
298 pairs = await _fetch(db_session, repo.repo_id)
299 assert all(p.ref == c2 for p in pairs)
300
301 @pytest.mark.asyncio
302 async def test_CP_17_co_changes_count_exact(
303 self, db_session: AsyncSession, repo: MusehubRepo
304 ) -> None:
305 """co_changes is the exact number of commits where both files appeared."""
306 commits = [_cid() for _ in range(4)]
307 prev = None
308 for cid in commits:
309 await _seed_commit(db_session, repo.repo_id, cid,
310 [prev] if prev else [])
311 prev = cid
312 for cid in commits:
313 await _seed_history(db_session, repo.repo_id, cid,
314 ["src/a.py::fn", "src/b.py::fn"])
315 await db_session.commit()
316 await _run(db_session, repo.repo_id, commits[-1])
317 pairs = await _fetch(db_session, repo.repo_id)
318 assert pairs[0].co_changes == 4
319
320 @pytest.mark.asyncio
321 async def test_CP_18_result_key_correct(
322 self, db_session: AsyncSession, repo: MusehubRepo
323 ) -> None:
324 """Provider returns result tuple with key 'intel.code.coupling'."""
325 c1, c2 = _cid(), _cid()
326 await _seed_commit(db_session, repo.repo_id, c1)
327 await _seed_commit(db_session, repo.repo_id, c2, [c1])
328 for cid in [c1, c2]:
329 await _seed_history(db_session, repo.repo_id, cid,
330 ["src/a.py::fn", "src/b.py::fn"])
331 await db_session.commit()
332 result = await _run(db_session, repo.repo_id, c2)
333 assert len(result) == 1
334 key, payload = result[0]
335 assert key == "intel.code.coupling"
336 assert "count" in payload
337 assert "commits_analysed" in payload
338 assert "truncated" in payload
339
340
341 # ─────────────────────────────────────────────────────────────────────────────
342 # Tier 3 — E2E: full seeded scenarios
343 # ─────────────────────────────────────────────────────────────────────────────
344
345 class TestCouplingE2E:
346
347 @pytest.mark.asyncio
348 async def test_CP_19_three_files_correct_ranking(
349 self, db_session: AsyncSession, repo: MusehubRepo
350 ) -> None:
351 """A↔B co-changes more than A↔C → A↔B ranked first."""
352 commits = [_cid() for _ in range(5)]
353 prev = None
354 for cid in commits:
355 await _seed_commit(db_session, repo.repo_id, cid,
356 [prev] if prev else [])
357 prev = cid
358 # A and B in all 5 commits
359 for cid in commits:
360 await _seed_history(db_session, repo.repo_id, cid,
361 ["src/a.py::fn", "src/b.py::fn"])
362 # A and C only in first 2
363 for cid in commits[:2]:
364 await _seed_history(db_session, repo.repo_id, cid,
365 ["src/c.py::fn"])
366 await db_session.commit()
367 await _run(db_session, repo.repo_id, commits[-1])
368 pairs = await _fetch(db_session, repo.repo_id)
369 assert pairs[0].co_changes == 5
370 assert pairs[0].file_a in ("src/a.py", "src/b.py")
371 assert pairs[0].file_b in ("src/a.py", "src/b.py")
372
373 @pytest.mark.asyncio
374 async def test_CP_20_result_count_matches_stored_rows(
375 self, db_session: AsyncSession, repo: MusehubRepo
376 ) -> None:
377 """metadata 'count' equals the number of rows actually stored."""
378 c1, c2, c3 = _cid(), _cid(), _cid()
379 await _seed_commit(db_session, repo.repo_id, c1)
380 await _seed_commit(db_session, repo.repo_id, c2, [c1])
381 await _seed_commit(db_session, repo.repo_id, c3, [c2])
382 for cid in [c1, c2, c3]:
383 await _seed_history(db_session, repo.repo_id, cid,
384 ["src/a.py::fn", "src/b.py::fn", "src/c.py::fn"])
385 await db_session.commit()
386 result = await _run(db_session, repo.repo_id, c3)
387 key, payload = result[0]
388 pairs = await _fetch(db_session, repo.repo_id)
389 assert payload["count"] == len(pairs)
390
391 @pytest.mark.asyncio
392 async def test_CP_21_truncated_true_over_max_pairs(
393 self, db_session: AsyncSession, repo: MusehubRepo
394 ) -> None:
395 """truncated=True when raw pair count exceeds MAX_PAIRS."""
396 provider = CouplingProvider()
397 commits = [_cid() for _ in range(3)]
398 prev = None
399 for cid in commits:
400 await _seed_commit(db_session, repo.repo_id, cid,
401 [prev] if prev else [])
402 prev = cid
403 # 21 files → 210 pairs, exceeds MAX_PAIRS=200
404 addrs = [f"src/file_{i}.py::fn" for i in range(21)]
405 for cid in commits:
406 await _seed_history(db_session, repo.repo_id, cid, addrs)
407 await db_session.commit()
408 result = await _run(db_session, repo.repo_id, commits[-1])
409 key, payload = result[0]
410 assert payload["truncated"] is True
411
412 @pytest.mark.asyncio
413 async def test_CP_22_min_co_filter_in_route_helpers(
414 self, db_session: AsyncSession, repo: MusehubRepo
415 ) -> None:
416 """Pairs with co_changes below min_co are excluded from route results."""
417 # Build: A↔B = 5, A↔C = 2 → with min_co=3 only A↔B appears
418 commits_ab = [_cid() for _ in range(5)]
419 commits_ac = [_cid() for _ in range(2)]
420 all_commits = commits_ab + commits_ac
421 prev = None
422 for cid in all_commits:
423 await _seed_commit(db_session, repo.repo_id, cid,
424 [prev] if prev else [])
425 prev = cid
426 for cid in commits_ab:
427 await _seed_history(db_session, repo.repo_id, cid,
428 ["src/a.py::fn", "src/b.py::fn"])
429 for cid in commits_ac:
430 await _seed_history(db_session, repo.repo_id, cid,
431 ["src/a.py::fn", "src/c.py::fn"])
432 await db_session.commit()
433 await _run(db_session, repo.repo_id, all_commits[-1])
434 # Simulate route min_co=3 filter
435 repo_id = repo.repo_id
436 result = await db_session.execute(
437 sa.select(MusehubIntelCoupling)
438 .where(
439 MusehubIntelCoupling.repo_id == repo_id,
440 MusehubIntelCoupling.co_changes >= 3,
441 )
442 .order_by(sa.desc(MusehubIntelCoupling.co_changes))
443 )
444 filtered = result.scalars().all()
445 assert all(p.co_changes >= 3 for p in filtered)
446 assert len(filtered) == 1
447 assert filtered[0].co_changes == 5
448
449 @pytest.mark.asyncio
450 async def test_CP_23_top_limit_respected(
451 self, db_session: AsyncSession, repo: MusehubRepo
452 ) -> None:
453 """SQL LIMIT top correctly caps the number of rows returned."""
454 commits = [_cid() for _ in range(3)]
455 prev = None
456 for cid in commits:
457 await _seed_commit(db_session, repo.repo_id, cid,
458 [prev] if prev else [])
459 prev = cid
460 # 10 files → 45 pairs
461 addrs = [f"src/f{i}.py::fn" for i in range(10)]
462 for cid in commits:
463 await _seed_history(db_session, repo.repo_id, cid, addrs)
464 await db_session.commit()
465 await _run(db_session, repo.repo_id, commits[-1])
466 result = await db_session.execute(
467 sa.select(MusehubIntelCoupling)
468 .where(MusehubIntelCoupling.repo_id == repo.repo_id)
469 .order_by(sa.desc(MusehubIntelCoupling.co_changes))
470 .limit(5)
471 )
472 assert len(result.scalars().all()) <= 5
473
474 @pytest.mark.asyncio
475 async def test_CP_24_heat_high_on_stored_pairs(
476 self, db_session: AsyncSession, repo: MusehubRepo
477 ) -> None:
478 """_cp_heat returns 'high' for pairs with co_changes >= 20."""
479 commits = [_cid() for _ in range(22)]
480 prev = None
481 for cid in commits:
482 await _seed_commit(db_session, repo.repo_id, cid,
483 [prev] if prev else [])
484 prev = cid
485 for cid in commits:
486 await _seed_history(db_session, repo.repo_id, cid,
487 ["src/a.py::fn", "src/b.py::fn"])
488 await db_session.commit()
489 await _run(db_session, repo.repo_id, commits[-1])
490 pairs = await _fetch(db_session, repo.repo_id)
491 assert pairs[0].co_changes >= 20
492 assert _cp_heat(pairs[0].co_changes) == "high"
493
494 @pytest.mark.asyncio
495 async def test_CP_25_bar_pct_100_for_top_pair(
496 self, db_session: AsyncSession, repo: MusehubRepo
497 ) -> None:
498 """Top pair always gets bar_pct=100 (it is the normalisation anchor)."""
499 commits = [_cid() for _ in range(5)]
500 prev = None
501 for cid in commits:
502 await _seed_commit(db_session, repo.repo_id, cid,
503 [prev] if prev else [])
504 prev = cid
505 for cid in commits:
506 await _seed_history(db_session, repo.repo_id, cid,
507 ["src/a.py::fn", "src/b.py::fn"])
508 await db_session.commit()
509 await _run(db_session, repo.repo_id, commits[-1])
510 pairs = await _fetch(db_session, repo.repo_id)
511 max_co = pairs[0].co_changes
512 bar_pct = round((pairs[0].co_changes / max_co) * 100)
513 assert bar_pct == 100
514
515
516 # ─────────────────────────────────────────────────────────────────────────────
517 # Tier 4 — Performance: timing bounds
518 # ─────────────────────────────────────────────────────────────────────────────
519
520 class TestCouplingPerformance:
521
522 @pytest.mark.asyncio
523 async def test_CP_26_ten_commits_ten_files_under_500ms(
524 self, db_session: AsyncSession, repo: MusehubRepo
525 ) -> None:
526 """10 commits × 10 files completes in under 500 ms."""
527 commits = [_cid() for _ in range(10)]
528 prev = None
529 for cid in commits:
530 await _seed_commit(db_session, repo.repo_id, cid,
531 [prev] if prev else [])
532 prev = cid
533 addrs = [f"src/f{i}.py::fn" for i in range(10)]
534 for cid in commits:
535 await _seed_history(db_session, repo.repo_id, cid, addrs)
536 await db_session.commit()
537 t0 = time.monotonic()
538 await _run(db_session, repo.repo_id, commits[-1])
539 assert time.monotonic() - t0 < 0.5
540
541 @pytest.mark.asyncio
542 async def test_CP_27_100_commits_20_files_under_2s(
543 self, db_session: AsyncSession, repo: MusehubRepo
544 ) -> None:
545 """100 commits × 20 files completes in under 2 s."""
546 commits = [_cid() for _ in range(100)]
547 prev = None
548 for cid in commits:
549 await _seed_commit(db_session, repo.repo_id, cid,
550 [prev] if prev else [])
551 prev = cid
552 addrs = [f"src/f{i}.py::fn" for i in range(20)]
553 for cid in commits:
554 await _seed_history(db_session, repo.repo_id, cid, addrs)
555 await db_session.commit()
556 t0 = time.monotonic()
557 await _run(db_session, repo.repo_id, commits[-1])
558 assert time.monotonic() - t0 < 2.0
559
560 @pytest.mark.asyncio
561 async def test_CP_28_empty_repo_fast_path_under_50ms(
562 self, db_session: AsyncSession, repo: MusehubRepo
563 ) -> None:
564 """Empty repo fast-path exits under 50 ms."""
565 t0 = time.monotonic()
566 await _run(db_session, repo.repo_id, _cid())
567 assert time.monotonic() - t0 < 0.05
568
569 @pytest.mark.asyncio
570 async def test_CP_29_rerun_not_5x_slower(
571 self, db_session: AsyncSession, repo: MusehubRepo
572 ) -> None:
573 """Second run is not more than 5× slower than the first."""
574 c1, c2 = _cid(), _cid()
575 await _seed_commit(db_session, repo.repo_id, c1)
576 await _seed_commit(db_session, repo.repo_id, c2, [c1])
577 for cid in [c1, c2]:
578 await _seed_history(db_session, repo.repo_id, cid,
579 ["src/a.py::fn", "src/b.py::fn"])
580 await db_session.commit()
581 t1 = time.monotonic(); await _run(db_session, repo.repo_id, c2); d1 = time.monotonic() - t1
582 t2 = time.monotonic(); await _run(db_session, repo.repo_id, c2); d2 = time.monotonic() - t2
583 assert d2 < max(d1 * 5, 0.5)
584
585 @pytest.mark.asyncio
586 async def test_CP_30_point_lookup_under_10ms(
587 self, db_session: AsyncSession, repo: MusehubRepo
588 ) -> None:
589 """Fetching pairs for a repo is sub-10 ms after the provider run."""
590 c1, c2 = _cid(), _cid()
591 await _seed_commit(db_session, repo.repo_id, c1)
592 await _seed_commit(db_session, repo.repo_id, c2, [c1])
593 for cid in [c1, c2]:
594 await _seed_history(db_session, repo.repo_id, cid,
595 ["src/a.py::fn", "src/b.py::fn"])
596 await db_session.commit()
597 await _run(db_session, repo.repo_id, c2)
598 t0 = time.monotonic()
599 await _fetch(db_session, repo.repo_id)
600 assert time.monotonic() - t0 < 0.01
601
602 @pytest.mark.asyncio
603 async def test_CP_31_200_pairs_query_fast(
604 self, db_session: AsyncSession, repo: MusehubRepo
605 ) -> None:
606 """Fetching full 200-pair leaderboard is sub-50 ms."""
607 commits = [_cid() for _ in range(3)]
608 prev = None
609 for cid in commits:
610 await _seed_commit(db_session, repo.repo_id, cid,
611 [prev] if prev else [])
612 prev = cid
613 # 21 files → 210 pairs → stored as 200 (MAX_PAIRS)
614 addrs = [f"src/f{i}.py::fn" for i in range(21)]
615 for cid in commits:
616 await _seed_history(db_session, repo.repo_id, cid, addrs)
617 await db_session.commit()
618 await _run(db_session, repo.repo_id, commits[-1])
619 t0 = time.monotonic()
620 await _fetch(db_session, repo.repo_id)
621 assert time.monotonic() - t0 < 0.05
622
623 @pytest.mark.asyncio
624 async def test_CP_32_dashboard_preview_query_fast(
625 self, db_session: AsyncSession, repo: MusehubRepo
626 ) -> None:
627 """Dashboard preview (top 3, LIMIT query) completes under 20 ms."""
628 commits = [_cid() for _ in range(3)]
629 prev = None
630 for cid in commits:
631 await _seed_commit(db_session, repo.repo_id, cid,
632 [prev] if prev else [])
633 prev = cid
634 addrs = [f"src/f{i}.py::fn" for i in range(6)]
635 for cid in commits:
636 await _seed_history(db_session, repo.repo_id, cid, addrs)
637 await db_session.commit()
638 await _run(db_session, repo.repo_id, commits[-1])
639 t0 = time.monotonic()
640 await db_session.execute(
641 sa.select(MusehubIntelCoupling)
642 .where(MusehubIntelCoupling.repo_id == repo.repo_id)
643 .order_by(sa.desc(MusehubIntelCoupling.co_changes))
644 .limit(3)
645 )
646 assert time.monotonic() - t0 < 0.02
647
648
649 # ─────────────────────────────────────────────────────────────────────────────
650 # Tier 5 — State: idempotency, stale-row purge, incremental updates
651 # ─────────────────────────────────────────────────────────────────────────────
652
653 class TestCouplingState:
654
655 @pytest.mark.asyncio
656 async def test_CP_33_idempotent_two_runs(
657 self, db_session: AsyncSession, repo: MusehubRepo
658 ) -> None:
659 """Running the provider twice produces identical rows."""
660 c1, c2 = _cid(), _cid()
661 await _seed_commit(db_session, repo.repo_id, c1)
662 await _seed_commit(db_session, repo.repo_id, c2, [c1])
663 for cid in [c1, c2]:
664 await _seed_history(db_session, repo.repo_id, cid,
665 ["src/a.py::fn", "src/b.py::fn"])
666 await db_session.commit()
667 await _run(db_session, repo.repo_id, c2)
668 first = {(p.file_a, p.file_b, p.co_changes)
669 for p in await _fetch(db_session, repo.repo_id)}
670 await _run(db_session, repo.repo_id, c2)
671 second = {(p.file_a, p.file_b, p.co_changes)
672 for p in await _fetch(db_session, repo.repo_id)}
673 assert first == second
674
675 @pytest.mark.asyncio
676 async def test_CP_34_stale_rows_purged_on_rerun(
677 self, db_session: AsyncSession, repo: MusehubRepo
678 ) -> None:
679 """Re-run deletes all old rows before inserting fresh set."""
680 c1, c2 = _cid(), _cid()
681 await _seed_commit(db_session, repo.repo_id, c1)
682 await _seed_commit(db_session, repo.repo_id, c2, [c1])
683 for cid in [c1, c2]:
684 await _seed_history(db_session, repo.repo_id, cid,
685 ["src/a.py::fn", "src/b.py::fn"])
686 await db_session.commit()
687 await _run(db_session, repo.repo_id, c2)
688 count_after_first = (await db_session.execute(
689 sa.select(sa.func.count()).select_from(MusehubIntelCoupling)
690 .where(MusehubIntelCoupling.repo_id == repo.repo_id)
691 )).scalar_one()
692 await _run(db_session, repo.repo_id, c2)
693 count_after_second = (await db_session.execute(
694 sa.select(sa.func.count()).select_from(MusehubIntelCoupling)
695 .where(MusehubIntelCoupling.repo_id == repo.repo_id)
696 )).scalar_one()
697 assert count_after_first == count_after_second
698
699 @pytest.mark.asyncio
700 async def test_CP_35_incremental_new_pair_appears(
701 self, db_session: AsyncSession, repo: MusehubRepo
702 ) -> None:
703 """After adding commits, a new pair materialises on re-run."""
704 c1, c2 = _cid(), _cid()
705 await _seed_commit(db_session, repo.repo_id, c1)
706 await _seed_commit(db_session, repo.repo_id, c2, [c1])
707 for cid in [c1, c2]:
708 await _seed_history(db_session, repo.repo_id, cid,
709 ["src/a.py::fn", "src/b.py::fn"])
710 await db_session.commit()
711 await _run(db_session, repo.repo_id, c2)
712 before = len(await _fetch(db_session, repo.repo_id))
713
714 c3, c4 = _cid(), _cid()
715 await _seed_commit(db_session, repo.repo_id, c3, [c2])
716 await _seed_commit(db_session, repo.repo_id, c4, [c3])
717 for cid in [c3, c4]:
718 await _seed_history(db_session, repo.repo_id, cid,
719 ["src/c.py::fn", "src/d.py::fn"])
720 await db_session.commit()
721 await _run(db_session, repo.repo_id, c4)
722 after = len(await _fetch(db_session, repo.repo_id))
723 assert after > before
724
725 @pytest.mark.asyncio
726 async def test_CP_36_no_duplicate_pairs_after_3_runs(
727 self, db_session: AsyncSession, repo: MusehubRepo
728 ) -> None:
729 """No duplicate (file_a, file_b) rows after 3 consecutive runs."""
730 c1, c2 = _cid(), _cid()
731 await _seed_commit(db_session, repo.repo_id, c1)
732 await _seed_commit(db_session, repo.repo_id, c2, [c1])
733 for cid in [c1, c2]:
734 await _seed_history(db_session, repo.repo_id, cid,
735 ["src/a.py::fn", "src/b.py::fn"])
736 await db_session.commit()
737 for _ in range(3):
738 await _run(db_session, repo.repo_id, c2)
739 pairs = await _fetch(db_session, repo.repo_id)
740 keys = [(p.file_a, p.file_b) for p in pairs]
741 assert len(keys) == len(set(keys))
742
743 @pytest.mark.asyncio
744 async def test_CP_37_co_changes_increases_with_new_commits(
745 self, db_session: AsyncSession, repo: MusehubRepo
746 ) -> None:
747 """co_changes increases when more co-change commits are added."""
748 c1, c2 = _cid(), _cid()
749 await _seed_commit(db_session, repo.repo_id, c1)
750 await _seed_commit(db_session, repo.repo_id, c2, [c1])
751 for cid in [c1, c2]:
752 await _seed_history(db_session, repo.repo_id, cid,
753 ["src/a.py::fn", "src/b.py::fn"])
754 await db_session.commit()
755 await _run(db_session, repo.repo_id, c2)
756 before = (await _fetch(db_session, repo.repo_id))[0].co_changes
757
758 c3 = _cid()
759 await _seed_commit(db_session, repo.repo_id, c3, [c2])
760 await _seed_history(db_session, repo.repo_id, c3,
761 ["src/a.py::fn", "src/b.py::fn"])
762 await db_session.commit()
763 await _run(db_session, repo.repo_id, c3)
764 after = (await _fetch(db_session, repo.repo_id))[0].co_changes
765 assert after > before
766
767 @pytest.mark.asyncio
768 async def test_CP_38_truncated_false_when_under_cap(
769 self, db_session: AsyncSession, repo: MusehubRepo
770 ) -> None:
771 """truncated=False when pair count is within MAX_PAIRS."""
772 c1, c2 = _cid(), _cid()
773 await _seed_commit(db_session, repo.repo_id, c1)
774 await _seed_commit(db_session, repo.repo_id, c2, [c1])
775 for cid in [c1, c2]:
776 await _seed_history(db_session, repo.repo_id, cid,
777 ["src/a.py::fn", "src/b.py::fn"])
778 await db_session.commit()
779 result = await _run(db_session, repo.repo_id, c2)
780 key, payload = result[0]
781 assert payload["truncated"] is False
782
783
784 # ─────────────────────────────────────────────────────────────────────────────
785 # Tier 6 — Security: injection, isolation, unicode
786 # ─────────────────────────────────────────────────────────────────────────────
787
788 class TestCouplingSecurity:
789
790 @pytest.mark.asyncio
791 async def test_CP_39_sql_injection_stored_verbatim(
792 self, db_session: AsyncSession, repo: MusehubRepo
793 ) -> None:
794 """SQL injection in file path stored as-is; table survives."""
795 inject = "src/a.py::fn'; DROP TABLE musehub_intel_coupling; --"
796 c1, c2 = _cid(), _cid()
797 await _seed_commit(db_session, repo.repo_id, c1)
798 await _seed_commit(db_session, repo.repo_id, c2, [c1])
799 for cid in [c1, c2]:
800 await _seed_history(db_session, repo.repo_id, cid,
801 [inject, "src/b.py::fn"])
802 await db_session.commit()
803 await _run(db_session, repo.repo_id, c2)
804 pairs = await _fetch(db_session, repo.repo_id)
805 assert isinstance(pairs, list)
806
807 @pytest.mark.asyncio
808 async def test_CP_40_xss_payload_stored_safely(
809 self, db_session: AsyncSession, repo: MusehubRepo
810 ) -> None:
811 """XSS payload in file path stored without execution."""
812 xss = "src/<script>alert(1)</script>.py::fn"
813 c1, c2 = _cid(), _cid()
814 await _seed_commit(db_session, repo.repo_id, c1)
815 await _seed_commit(db_session, repo.repo_id, c2, [c1])
816 for cid in [c1, c2]:
817 await _seed_history(db_session, repo.repo_id, cid,
818 [xss, "src/b.py::fn"])
819 await db_session.commit()
820 await _run(db_session, repo.repo_id, c2)
821 pairs = await _fetch(db_session, repo.repo_id)
822 assert isinstance(pairs, list)
823
824 @pytest.mark.asyncio
825 async def test_CP_41_repo_isolation_strict(
826 self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo]
827 ) -> None:
828 """Pairs from repo A are never visible when querying repo B."""
829 r1, r2 = two_repos
830 c1, c2 = _cid(), _cid()
831 await _seed_commit(db_session, r1.repo_id, c1)
832 await _seed_commit(db_session, r1.repo_id, c2, [c1])
833 for cid in [c1, c2]:
834 await _seed_history(db_session, r1.repo_id, cid,
835 ["src/a.py::fn", "src/b.py::fn"])
836 await db_session.commit()
837 await _run(db_session, r1.repo_id, c2)
838 assert await _fetch(db_session, r2.repo_id) == []
839
840 @pytest.mark.asyncio
841 async def test_CP_42_two_repos_independent_pairs(
842 self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo]
843 ) -> None:
844 """Two repos each produce their own independent pair sets."""
845 r1, r2 = two_repos
846 for repo in [r1, r2]:
847 c1, c2 = _cid(), _cid()
848 await _seed_commit(db_session, repo.repo_id, c1)
849 await _seed_commit(db_session, repo.repo_id, c2, [c1])
850 for cid in [c1, c2]:
851 await _seed_history(db_session, repo.repo_id, cid,
852 ["src/a.py::fn", "src/b.py::fn"])
853 await db_session.commit()
854 await _run(db_session, repo.repo_id, c2)
855 p1 = await _fetch(db_session, r1.repo_id)
856 p2 = await _fetch(db_session, r2.repo_id)
857 assert len(p1) == 1 and p1[0].repo_id == r1.repo_id
858 assert len(p2) == 1 and p2[0].repo_id == r2.repo_id
859
860 @pytest.mark.asyncio
861 async def test_CP_43_rerun_updates_ref_column(
862 self, db_session: AsyncSession, repo: MusehubRepo
863 ) -> None:
864 """Re-run for a new ref updates the ref column on all rows."""
865 c1, c2, c3 = _cid(), _cid(), _cid()
866 await _seed_commit(db_session, repo.repo_id, c1)
867 await _seed_commit(db_session, repo.repo_id, c2, [c1])
868 await _seed_commit(db_session, repo.repo_id, c3, [c2])
869 for cid in [c1, c2, c3]:
870 await _seed_history(db_session, repo.repo_id, cid,
871 ["src/a.py::fn", "src/b.py::fn"])
872 await db_session.commit()
873 await _run(db_session, repo.repo_id, c2)
874 await _run(db_session, repo.repo_id, c3)
875 pairs = await _fetch(db_session, repo.repo_id)
876 assert all(p.ref == c3 for p in pairs)
877
878 @pytest.mark.asyncio
879 async def test_CP_44_unicode_in_path_handled(
880 self, db_session: AsyncSession, repo: MusehubRepo
881 ) -> None:
882 """Unicode characters in file paths do not crash the provider."""
883 c1, c2 = _cid(), _cid()
884 await _seed_commit(db_session, repo.repo_id, c1)
885 await _seed_commit(db_session, repo.repo_id, c2, [c1])
886 for cid in [c1, c2]:
887 await _seed_history(db_session, repo.repo_id, cid,
888 ["src/música.py::canción", "src/b.py::fn"])
889 await db_session.commit()
890 await _run(db_session, repo.repo_id, c2)
891 assert isinstance(await _fetch(db_session, repo.repo_id), list)
892
893
894 # ─────────────────────────────────────────────────────────────────────────────
895 # Tier 7 — Stress: MAX_PAIRS cap, mass-commit exclusion, BFS cap
896 # ─────────────────────────────────────────────────────────────────────────────
897
898 class TestCouplingStress:
899
900 @pytest.mark.asyncio
901 async def test_CP_45_max_pairs_cap_respected(
902 self, db_session: AsyncSession, repo: MusehubRepo
903 ) -> None:
904 """Stored pair count never exceeds MAX_PAIRS."""
905 provider = CouplingProvider()
906 commits = [_cid() for _ in range(3)]
907 prev = None
908 for cid in commits:
909 await _seed_commit(db_session, repo.repo_id, cid,
910 [prev] if prev else [])
911 prev = cid
912 # 21 files → 210 pairs; exceeds MAX_PAIRS=200
913 addrs = [f"src/file_{i}.py::fn" for i in range(21)]
914 for cid in commits:
915 await _seed_history(db_session, repo.repo_id, cid, addrs)
916 await db_session.commit()
917 await _run(db_session, repo.repo_id, commits[-1])
918 pairs = await _fetch(db_session, repo.repo_id)
919 assert len(pairs) <= provider._MAX_PAIRS
920
921 @pytest.mark.asyncio
922 async def test_CP_46_mass_commit_excluded(
923 self, db_session: AsyncSession, repo: MusehubRepo
924 ) -> None:
925 """Commits touching > MAX_FILES_PER_COMMIT files are skipped."""
926 provider = CouplingProvider()
927 c_good1, c_good2, c_mass = _cid(), _cid(), _cid()
928 await _seed_commit(db_session, repo.repo_id, c_good1)
929 await _seed_commit(db_session, repo.repo_id, c_good2, [c_good1])
930 await _seed_commit(db_session, repo.repo_id, c_mass, [c_good2])
931 for cid in [c_good1, c_good2]:
932 await _seed_history(db_session, repo.repo_id, cid,
933 ["src/a.py::fn", "src/b.py::fn"])
934 # Mass commit: 250 distinct files
935 big_addrs = [f"src/gen_{i}.py::fn"
936 for i in range(provider._MAX_FILES_PER_COMMIT + 50)]
937 await _seed_history(db_session, repo.repo_id, c_mass, big_addrs)
938 await db_session.commit()
939 await _run(db_session, repo.repo_id, c_mass)
940 pairs = await _fetch(db_session, repo.repo_id)
941 # The A↔B pair from good commits must still be present
942 assert any(
943 "src/a.py" in (p.file_a, p.file_b) for p in pairs
944 )
945
946 @pytest.mark.asyncio
947 async def test_CP_47_500_commits_completes(
948 self, db_session: AsyncSession, repo: MusehubRepo
949 ) -> None:
950 """500 commits × 5 files completes without error."""
951 commits = [_cid() for _ in range(500)]
952 prev = None
953 for cid in commits:
954 await _seed_commit(db_session, repo.repo_id, cid,
955 [prev] if prev else [])
956 prev = cid
957 addrs = [f"src/f{i}.py::fn" for i in range(5)]
958 for cid in commits:
959 await _seed_history(db_session, repo.repo_id, cid, addrs)
960 await db_session.commit()
961 result = await _run(db_session, repo.repo_id, commits[-1])
962 assert result
963
964 @pytest.mark.asyncio
965 async def test_CP_48_result_count_matches_stored(
966 self, db_session: AsyncSession, repo: MusehubRepo
967 ) -> None:
968 """metadata 'count' always equals len(stored rows)."""
969 commits = [_cid() for _ in range(4)]
970 prev = None
971 for cid in commits:
972 await _seed_commit(db_session, repo.repo_id, cid,
973 [prev] if prev else [])
974 prev = cid
975 addrs = [f"src/f{i}.py::fn" for i in range(6)]
976 for cid in commits:
977 await _seed_history(db_session, repo.repo_id, cid, addrs)
978 await db_session.commit()
979 result = await _run(db_session, repo.repo_id, commits[-1])
980 key, payload = result[0]
981 stored = await _fetch(db_session, repo.repo_id)
982 assert payload["count"] == len(stored)
983
984 @pytest.mark.asyncio
985 async def test_CP_49_bfs_walk_cap(
986 self, db_session: AsyncSession, repo: MusehubRepo
987 ) -> None:
988 """commits_analysed never exceeds MAX_WALK."""
989 provider = CouplingProvider()
990 commits = [_cid() for _ in range(50)]
991 prev = None
992 for cid in commits:
993 await _seed_commit(db_session, repo.repo_id, cid,
994 [prev] if prev else [])
995 prev = cid
996 await _seed_history(db_session, repo.repo_id, commits[0],
997 ["src/a.py::fn", "src/b.py::fn"])
998 await db_session.commit()
999 result = await _run(db_session, repo.repo_id, commits[-1])
1000 if result:
1001 key, payload = result[0]
1002 assert payload["commits_analysed"] <= provider._MAX_WALK
1003
1004
1005 # ─────────────────────────────────────────────────────────────────────────────
1006 # Helpers — _cp_short correctness
1007 # ─────────────────────────────────────────────────────────────────────────────
1008
1009 class TestCpShort:
1010 """Unit tests for the _cp_short display helper."""
1011
1012 def test_deep_path_truncated_to_two_parts(self) -> None:
1013 assert _cp_short("musehub/services/musehub_wire.py") == "services/musehub_wire.py"
1014
1015 def test_single_component_unchanged(self) -> None:
1016 assert _cp_short("musehub_wire.py") == "musehub_wire.py"
1017
1018 def test_two_components_unchanged(self) -> None:
1019 assert _cp_short("services/musehub_wire.py") == "services/musehub_wire.py"
1020
1021 def test_very_deep_path(self) -> None:
1022 assert _cp_short("a/b/c/d/e.py") == "d/e.py"
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago