test_coupling_provider.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
20 days ago
| 1 | """TDD spec for CouplingProvider — issue #15, Phase 5. |
| 2 | |
| 3 | Verifies that CouplingProvider reproduces the same file co-change analysis |
| 4 | as ``muse code coupling``: file derivation from symbol addresses, bare-path |
| 5 | handling, mass-commit exclusion, canonical pair ordering, MAX_PAIRS cap, |
| 6 | and strict repo isolation. |
| 7 | |
| 8 | Seven test tiers (49 cases) |
| 9 | ---------------------------- |
| 10 | Unit CP_01 – CP_08 file derivation, heat modifier, pair canonicalisation |
| 11 | Integration CP_09 – CP_18 provider upserts, re-runs, counts |
| 12 | E2E CP_19 – CP_25 full seeded scenarios |
| 13 | Performance CP_26 – CP_32 timing bounds |
| 14 | State CP_33 – CP_38 idempotency, stale-row purge, incremental updates |
| 15 | Security CP_39 – CP_44 injection strings, repo isolation |
| 16 | Stress CP_45 – CP_49 MAX_PAIRS cap, mass-commit exclusion, BFS cap |
| 17 | """ |
| 18 | from __future__ import annotations |
| 19 | |
| 20 | import secrets |
| 21 | import time |
| 22 | from collections import defaultdict |
| 23 | from datetime import datetime, timezone |
| 24 | |
| 25 | import pytest |
| 26 | import pytest_asyncio |
| 27 | import sqlalchemy as sa |
| 28 | from sqlalchemy.dialects.postgresql import insert as pg_insert |
| 29 | from sqlalchemy.ext.asyncio import AsyncSession |
| 30 | |
| 31 | from muse.core.types import fake_id, long_id |
| 32 | from musehub.db.musehub_intel_models import MusehubIntelCoupling, MusehubSymbolHistoryEntry |
| 33 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo |
| 34 | from musehub.services.musehub_intel_providers import CouplingProvider |
| 35 | from musehub.types.json_types import JSONObject |
| 36 | from musehub.api.routes.musehub.ui_intel import _cp_heat, _cp_short |
| 37 | from tests.factories import create_repo |
| 38 | |
| 39 | |
| 40 | # ───────────────────────────────────────────────────────────────────────────── |
| 41 | # Helpers |
| 42 | # ───────────────────────────────────────────────────────────────────────────── |
| 43 | |
| 44 | def _cid() -> str: |
| 45 | return long_id(secrets.token_hex(32)) |
| 46 | |
| 47 | |
| 48 | async def _seed_commit( |
| 49 | session: AsyncSession, |
| 50 | repo_id: str, |
| 51 | commit_id: str, |
| 52 | parent_ids: list[str] | None = None, |
| 53 | ) -> None: |
| 54 | stmt = ( |
| 55 | pg_insert(MusehubCommit) |
| 56 | .values( |
| 57 | commit_id=commit_id, |
| 58 | message="test commit", |
| 59 | author="test", |
| 60 | branch="dev", |
| 61 | parent_ids=parent_ids or [], |
| 62 | snapshot_id=None, |
| 63 | timestamp=datetime.now(timezone.utc), |
| 64 | ) |
| 65 | .on_conflict_do_nothing() |
| 66 | ) |
| 67 | await session.execute(stmt) |
| 68 | ref_stmt = ( |
| 69 | pg_insert(MusehubCommitRef) |
| 70 | .values(repo_id=repo_id, commit_id=commit_id) |
| 71 | .on_conflict_do_nothing() |
| 72 | ) |
| 73 | await session.execute(ref_stmt) |
| 74 | |
| 75 | |
| 76 | async def _seed_history( |
| 77 | session: AsyncSession, |
| 78 | repo_id: str, |
| 79 | commit_id: str, |
| 80 | addresses: list[str], |
| 81 | ) -> None: |
| 82 | for addr in addresses: |
| 83 | stmt = ( |
| 84 | pg_insert(MusehubSymbolHistoryEntry) |
| 85 | .values( |
| 86 | repo_id=repo_id, |
| 87 | address=addr, |
| 88 | commit_id=commit_id, |
| 89 | committed_at=datetime.now(timezone.utc), |
| 90 | op="modify", |
| 91 | ) |
| 92 | .on_conflict_do_nothing() |
| 93 | ) |
| 94 | await session.execute(stmt) |
| 95 | |
| 96 | |
| 97 | async def _run(session: AsyncSession, repo_id: str, ref: str) -> list[tuple[str, JSONObject]]: |
| 98 | return await CouplingProvider().compute(session, repo_id, ref, {}) |
| 99 | |
| 100 | |
| 101 | async def _fetch(session: AsyncSession, repo_id: str) -> list[MusehubIntelCoupling]: |
| 102 | result = await session.execute( |
| 103 | sa.select(MusehubIntelCoupling) |
| 104 | .where(MusehubIntelCoupling.repo_id == repo_id) |
| 105 | .order_by(sa.desc(MusehubIntelCoupling.co_changes)) |
| 106 | ) |
| 107 | return list(result.scalars().all()) |
| 108 | |
| 109 | |
| 110 | # ───────────────────────────────────────────────────────────────────────────── |
| 111 | # Fixtures |
| 112 | # ───────────────────────────────────────────────────────────────────────────── |
| 113 | |
| 114 | @pytest_asyncio.fixture |
| 115 | async def repo(db_session: AsyncSession) -> MusehubRepo: |
| 116 | return await create_repo(db_session, owner="testuser", slug="couplingprovider") |
| 117 | |
| 118 | |
| 119 | @pytest_asyncio.fixture |
| 120 | async def two_repos(db_session: AsyncSession) -> tuple[MusehubRepo, MusehubRepo]: |
| 121 | r1 = await create_repo(db_session, owner="testuser", slug="cp-repo-1") |
| 122 | r2 = await create_repo(db_session, owner="testuser", slug="cp-repo-2") |
| 123 | return r1, r2 |
| 124 | |
| 125 | |
| 126 | # ───────────────────────────────────────────────────────────────────────────── |
| 127 | # Tier 1 — Unit: file derivation, heat modifier, pair canonicalisation |
| 128 | # ───────────────────────────────────────────────────────────────────────────── |
| 129 | |
| 130 | class TestCouplingUnit: |
| 131 | """Pure-function tests — no database required.""" |
| 132 | |
| 133 | def test_CP_01_file_from_symbol_address(self) -> None: |
| 134 | """File extracted correctly from symbol address.""" |
| 135 | addr = "src/billing.py::charge" |
| 136 | file = addr.split("::")[0] if "::" in addr else addr |
| 137 | assert file == "src/billing.py" |
| 138 | |
| 139 | def test_CP_02_bare_path_is_file(self) -> None: |
| 140 | """Bare path (no '::') treated directly as filename.""" |
| 141 | addr = "cloudflare" |
| 142 | file = addr.split("::")[0] if "::" in addr else addr |
| 143 | assert file == "cloudflare" |
| 144 | |
| 145 | def test_CP_03_pair_key_canonical_a_lt_b(self) -> None: |
| 146 | """Pair key is always (a, b) where a < b lexicographically.""" |
| 147 | files = ["src/z.py", "src/a.py"] |
| 148 | canonical = tuple(sorted(files)) |
| 149 | assert canonical == ("src/a.py", "src/z.py") |
| 150 | |
| 151 | def test_CP_04_same_file_pair_excluded(self) -> None: |
| 152 | """Two symbols from the same file produce no file pair.""" |
| 153 | addr_a = "src/billing.py::charge" |
| 154 | addr_b = "src/billing.py::refund" |
| 155 | file_a = addr_a.split("::")[0] |
| 156 | file_b = addr_b.split("::")[0] |
| 157 | assert file_a == file_b |
| 158 | |
| 159 | def test_CP_05_heat_low(self) -> None: |
| 160 | """co_changes < 10 → empty modifier (accent fill).""" |
| 161 | assert _cp_heat(1) == "" |
| 162 | assert _cp_heat(9) == "" |
| 163 | |
| 164 | def test_CP_06_heat_medium(self) -> None: |
| 165 | """co_changes 10–19 → 'medium' modifier (warning fill).""" |
| 166 | assert _cp_heat(10) == "medium" |
| 167 | assert _cp_heat(19) == "medium" |
| 168 | |
| 169 | def test_CP_07_heat_high(self) -> None: |
| 170 | """co_changes >= 20 → 'high' modifier (danger fill).""" |
| 171 | assert _cp_heat(20) == "high" |
| 172 | assert _cp_heat(99) == "high" |
| 173 | |
| 174 | def test_CP_08_min_co_changes_constant(self) -> None: |
| 175 | """_MIN_CO_CHANGES is 2 — pairs below this are noise.""" |
| 176 | assert CouplingProvider._MIN_CO_CHANGES == 2 |
| 177 | |
| 178 | |
| 179 | # ───────────────────────────────────────────────────────────────────────────── |
| 180 | # Tier 2 — Integration: provider upserts, counts, re-runs |
| 181 | # ───────────────────────────────────────────────────────────────────────────── |
| 182 | |
| 183 | class TestCouplingIntegration: |
| 184 | |
| 185 | @pytest.mark.asyncio |
| 186 | async def test_CP_09_empty_repo_returns_empty( |
| 187 | self, db_session: AsyncSession, repo: MusehubRepo |
| 188 | ) -> None: |
| 189 | """Provider on a repo with no commits returns [] and stores no rows.""" |
| 190 | result = await _run(db_session, repo.repo_id, _cid()) |
| 191 | assert result == [] |
| 192 | assert await _fetch(db_session, repo.repo_id) == [] |
| 193 | |
| 194 | @pytest.mark.asyncio |
| 195 | async def test_CP_10_no_history_entries_returns_empty( |
| 196 | self, db_session: AsyncSession, repo: MusehubRepo |
| 197 | ) -> None: |
| 198 | """Commits exist but no history entries → no pairs stored.""" |
| 199 | c1 = _cid() |
| 200 | await _seed_commit(db_session, repo.repo_id, c1) |
| 201 | await db_session.commit() |
| 202 | result = await _run(db_session, repo.repo_id, c1) |
| 203 | assert result == [] |
| 204 | |
| 205 | @pytest.mark.asyncio |
| 206 | async def test_CP_11_single_co_change_below_threshold( |
| 207 | self, db_session: AsyncSession, repo: MusehubRepo |
| 208 | ) -> None: |
| 209 | """One co-change commit → co_changes=1, below _MIN_CO_CHANGES=2, no row.""" |
| 210 | c1 = _cid() |
| 211 | await _seed_commit(db_session, repo.repo_id, c1) |
| 212 | await _seed_history(db_session, repo.repo_id, c1, |
| 213 | ["src/a.py::fn_a", "src/b.py::fn_b"]) |
| 214 | await db_session.commit() |
| 215 | await _run(db_session, repo.repo_id, c1) |
| 216 | assert await _fetch(db_session, repo.repo_id) == [] |
| 217 | |
| 218 | @pytest.mark.asyncio |
| 219 | async def test_CP_12_two_co_changes_produces_one_pair( |
| 220 | self, db_session: AsyncSession, repo: MusehubRepo |
| 221 | ) -> None: |
| 222 | """Exactly 2 co-change commits → 1 pair with co_changes=2.""" |
| 223 | c1, c2 = _cid(), _cid() |
| 224 | await _seed_commit(db_session, repo.repo_id, c1) |
| 225 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 226 | for cid in [c1, c2]: |
| 227 | await _seed_history(db_session, repo.repo_id, cid, |
| 228 | ["src/a.py::fn_a", "src/b.py::fn_b"]) |
| 229 | await db_session.commit() |
| 230 | await _run(db_session, repo.repo_id, c2) |
| 231 | pairs = await _fetch(db_session, repo.repo_id) |
| 232 | assert len(pairs) == 1 |
| 233 | assert pairs[0].co_changes == 2 |
| 234 | |
| 235 | @pytest.mark.asyncio |
| 236 | async def test_CP_13_three_files_produces_three_pairs( |
| 237 | self, db_session: AsyncSession, repo: MusehubRepo |
| 238 | ) -> None: |
| 239 | """Three files in a commit → 3 cross-file pairs (A↔B, A↔C, B↔C).""" |
| 240 | c1, c2 = _cid(), _cid() |
| 241 | await _seed_commit(db_session, repo.repo_id, c1) |
| 242 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 243 | for cid in [c1, c2]: |
| 244 | await _seed_history(db_session, repo.repo_id, cid, [ |
| 245 | "src/a.py::fn", "src/b.py::fn", "src/c.py::fn", |
| 246 | ]) |
| 247 | await db_session.commit() |
| 248 | await _run(db_session, repo.repo_id, c2) |
| 249 | pairs = await _fetch(db_session, repo.repo_id) |
| 250 | assert len(pairs) == 3 |
| 251 | |
| 252 | @pytest.mark.asyncio |
| 253 | async def test_CP_14_same_file_symbols_no_pair( |
| 254 | self, db_session: AsyncSession, repo: MusehubRepo |
| 255 | ) -> None: |
| 256 | """Two symbols from the same file never produce a pair.""" |
| 257 | c1, c2 = _cid(), _cid() |
| 258 | await _seed_commit(db_session, repo.repo_id, c1) |
| 259 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 260 | for cid in [c1, c2]: |
| 261 | await _seed_history(db_session, repo.repo_id, cid, [ |
| 262 | "src/billing.py::charge", "src/billing.py::refund", |
| 263 | ]) |
| 264 | await db_session.commit() |
| 265 | await _run(db_session, repo.repo_id, c2) |
| 266 | assert await _fetch(db_session, repo.repo_id) == [] |
| 267 | |
| 268 | @pytest.mark.asyncio |
| 269 | async def test_CP_15_pair_stored_canonical_a_lt_b( |
| 270 | self, db_session: AsyncSession, repo: MusehubRepo |
| 271 | ) -> None: |
| 272 | """Stored pair always has file_a <= file_b lexicographically.""" |
| 273 | c1, c2 = _cid(), _cid() |
| 274 | await _seed_commit(db_session, repo.repo_id, c1) |
| 275 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 276 | for cid in [c1, c2]: |
| 277 | await _seed_history(db_session, repo.repo_id, cid, |
| 278 | ["src/z.py::zfn", "src/a.py::afn"]) |
| 279 | await db_session.commit() |
| 280 | await _run(db_session, repo.repo_id, c2) |
| 281 | pairs = await _fetch(db_session, repo.repo_id) |
| 282 | assert len(pairs) == 1 |
| 283 | assert pairs[0].file_a <= pairs[0].file_b |
| 284 | |
| 285 | @pytest.mark.asyncio |
| 286 | async def test_CP_16_ref_column_populated( |
| 287 | self, db_session: AsyncSession, repo: MusehubRepo |
| 288 | ) -> None: |
| 289 | """ref column on each row matches the HEAD ref passed to compute().""" |
| 290 | c1, c2 = _cid(), _cid() |
| 291 | await _seed_commit(db_session, repo.repo_id, c1) |
| 292 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 293 | for cid in [c1, c2]: |
| 294 | await _seed_history(db_session, repo.repo_id, cid, |
| 295 | ["src/a.py::fn", "src/b.py::fn"]) |
| 296 | await db_session.commit() |
| 297 | await _run(db_session, repo.repo_id, c2) |
| 298 | pairs = await _fetch(db_session, repo.repo_id) |
| 299 | assert all(p.ref == c2 for p in pairs) |
| 300 | |
| 301 | @pytest.mark.asyncio |
| 302 | async def test_CP_17_co_changes_count_exact( |
| 303 | self, db_session: AsyncSession, repo: MusehubRepo |
| 304 | ) -> None: |
| 305 | """co_changes is the exact number of commits where both files appeared.""" |
| 306 | commits = [_cid() for _ in range(4)] |
| 307 | prev = None |
| 308 | for cid in commits: |
| 309 | await _seed_commit(db_session, repo.repo_id, cid, |
| 310 | [prev] if prev else []) |
| 311 | prev = cid |
| 312 | for cid in commits: |
| 313 | await _seed_history(db_session, repo.repo_id, cid, |
| 314 | ["src/a.py::fn", "src/b.py::fn"]) |
| 315 | await db_session.commit() |
| 316 | await _run(db_session, repo.repo_id, commits[-1]) |
| 317 | pairs = await _fetch(db_session, repo.repo_id) |
| 318 | assert pairs[0].co_changes == 4 |
| 319 | |
| 320 | @pytest.mark.asyncio |
| 321 | async def test_CP_18_result_key_correct( |
| 322 | self, db_session: AsyncSession, repo: MusehubRepo |
| 323 | ) -> None: |
| 324 | """Provider returns result tuple with key 'intel.code.coupling'.""" |
| 325 | c1, c2 = _cid(), _cid() |
| 326 | await _seed_commit(db_session, repo.repo_id, c1) |
| 327 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 328 | for cid in [c1, c2]: |
| 329 | await _seed_history(db_session, repo.repo_id, cid, |
| 330 | ["src/a.py::fn", "src/b.py::fn"]) |
| 331 | await db_session.commit() |
| 332 | result = await _run(db_session, repo.repo_id, c2) |
| 333 | assert len(result) == 1 |
| 334 | key, payload = result[0] |
| 335 | assert key == "intel.code.coupling" |
| 336 | assert "count" in payload |
| 337 | assert "commits_analysed" in payload |
| 338 | assert "truncated" in payload |
| 339 | |
| 340 | |
| 341 | # ───────────────────────────────────────────────────────────────────────────── |
| 342 | # Tier 3 — E2E: full seeded scenarios |
| 343 | # ───────────────────────────────────────────────────────────────────────────── |
| 344 | |
| 345 | class TestCouplingE2E: |
| 346 | |
| 347 | @pytest.mark.asyncio |
| 348 | async def test_CP_19_three_files_correct_ranking( |
| 349 | self, db_session: AsyncSession, repo: MusehubRepo |
| 350 | ) -> None: |
| 351 | """A↔B co-changes more than A↔C → A↔B ranked first.""" |
| 352 | commits = [_cid() for _ in range(5)] |
| 353 | prev = None |
| 354 | for cid in commits: |
| 355 | await _seed_commit(db_session, repo.repo_id, cid, |
| 356 | [prev] if prev else []) |
| 357 | prev = cid |
| 358 | # A and B in all 5 commits |
| 359 | for cid in commits: |
| 360 | await _seed_history(db_session, repo.repo_id, cid, |
| 361 | ["src/a.py::fn", "src/b.py::fn"]) |
| 362 | # A and C only in first 2 |
| 363 | for cid in commits[:2]: |
| 364 | await _seed_history(db_session, repo.repo_id, cid, |
| 365 | ["src/c.py::fn"]) |
| 366 | await db_session.commit() |
| 367 | await _run(db_session, repo.repo_id, commits[-1]) |
| 368 | pairs = await _fetch(db_session, repo.repo_id) |
| 369 | assert pairs[0].co_changes == 5 |
| 370 | assert pairs[0].file_a in ("src/a.py", "src/b.py") |
| 371 | assert pairs[0].file_b in ("src/a.py", "src/b.py") |
| 372 | |
| 373 | @pytest.mark.asyncio |
| 374 | async def test_CP_20_result_count_matches_stored_rows( |
| 375 | self, db_session: AsyncSession, repo: MusehubRepo |
| 376 | ) -> None: |
| 377 | """metadata 'count' equals the number of rows actually stored.""" |
| 378 | c1, c2, c3 = _cid(), _cid(), _cid() |
| 379 | await _seed_commit(db_session, repo.repo_id, c1) |
| 380 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 381 | await _seed_commit(db_session, repo.repo_id, c3, [c2]) |
| 382 | for cid in [c1, c2, c3]: |
| 383 | await _seed_history(db_session, repo.repo_id, cid, |
| 384 | ["src/a.py::fn", "src/b.py::fn", "src/c.py::fn"]) |
| 385 | await db_session.commit() |
| 386 | result = await _run(db_session, repo.repo_id, c3) |
| 387 | key, payload = result[0] |
| 388 | pairs = await _fetch(db_session, repo.repo_id) |
| 389 | assert payload["count"] == len(pairs) |
| 390 | |
| 391 | @pytest.mark.asyncio |
| 392 | async def test_CP_21_truncated_true_over_max_pairs( |
| 393 | self, db_session: AsyncSession, repo: MusehubRepo |
| 394 | ) -> None: |
| 395 | """truncated=True when raw pair count exceeds MAX_PAIRS.""" |
| 396 | provider = CouplingProvider() |
| 397 | commits = [_cid() for _ in range(3)] |
| 398 | prev = None |
| 399 | for cid in commits: |
| 400 | await _seed_commit(db_session, repo.repo_id, cid, |
| 401 | [prev] if prev else []) |
| 402 | prev = cid |
| 403 | # 21 files → 210 pairs, exceeds MAX_PAIRS=200 |
| 404 | addrs = [f"src/file_{i}.py::fn" for i in range(21)] |
| 405 | for cid in commits: |
| 406 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 407 | await db_session.commit() |
| 408 | result = await _run(db_session, repo.repo_id, commits[-1]) |
| 409 | key, payload = result[0] |
| 410 | assert payload["truncated"] is True |
| 411 | |
| 412 | @pytest.mark.asyncio |
| 413 | async def test_CP_22_min_co_filter_in_route_helpers( |
| 414 | self, db_session: AsyncSession, repo: MusehubRepo |
| 415 | ) -> None: |
| 416 | """Pairs with co_changes below min_co are excluded from route results.""" |
| 417 | # Build: A↔B = 5, A↔C = 2 → with min_co=3 only A↔B appears |
| 418 | commits_ab = [_cid() for _ in range(5)] |
| 419 | commits_ac = [_cid() for _ in range(2)] |
| 420 | all_commits = commits_ab + commits_ac |
| 421 | prev = None |
| 422 | for cid in all_commits: |
| 423 | await _seed_commit(db_session, repo.repo_id, cid, |
| 424 | [prev] if prev else []) |
| 425 | prev = cid |
| 426 | for cid in commits_ab: |
| 427 | await _seed_history(db_session, repo.repo_id, cid, |
| 428 | ["src/a.py::fn", "src/b.py::fn"]) |
| 429 | for cid in commits_ac: |
| 430 | await _seed_history(db_session, repo.repo_id, cid, |
| 431 | ["src/a.py::fn", "src/c.py::fn"]) |
| 432 | await db_session.commit() |
| 433 | await _run(db_session, repo.repo_id, all_commits[-1]) |
| 434 | # Simulate route min_co=3 filter |
| 435 | repo_id = repo.repo_id |
| 436 | result = await db_session.execute( |
| 437 | sa.select(MusehubIntelCoupling) |
| 438 | .where( |
| 439 | MusehubIntelCoupling.repo_id == repo_id, |
| 440 | MusehubIntelCoupling.co_changes >= 3, |
| 441 | ) |
| 442 | .order_by(sa.desc(MusehubIntelCoupling.co_changes)) |
| 443 | ) |
| 444 | filtered = result.scalars().all() |
| 445 | assert all(p.co_changes >= 3 for p in filtered) |
| 446 | assert len(filtered) == 1 |
| 447 | assert filtered[0].co_changes == 5 |
| 448 | |
| 449 | @pytest.mark.asyncio |
| 450 | async def test_CP_23_top_limit_respected( |
| 451 | self, db_session: AsyncSession, repo: MusehubRepo |
| 452 | ) -> None: |
| 453 | """SQL LIMIT top correctly caps the number of rows returned.""" |
| 454 | commits = [_cid() for _ in range(3)] |
| 455 | prev = None |
| 456 | for cid in commits: |
| 457 | await _seed_commit(db_session, repo.repo_id, cid, |
| 458 | [prev] if prev else []) |
| 459 | prev = cid |
| 460 | # 10 files → 45 pairs |
| 461 | addrs = [f"src/f{i}.py::fn" for i in range(10)] |
| 462 | for cid in commits: |
| 463 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 464 | await db_session.commit() |
| 465 | await _run(db_session, repo.repo_id, commits[-1]) |
| 466 | result = await db_session.execute( |
| 467 | sa.select(MusehubIntelCoupling) |
| 468 | .where(MusehubIntelCoupling.repo_id == repo.repo_id) |
| 469 | .order_by(sa.desc(MusehubIntelCoupling.co_changes)) |
| 470 | .limit(5) |
| 471 | ) |
| 472 | assert len(result.scalars().all()) <= 5 |
| 473 | |
| 474 | @pytest.mark.asyncio |
| 475 | async def test_CP_24_heat_high_on_stored_pairs( |
| 476 | self, db_session: AsyncSession, repo: MusehubRepo |
| 477 | ) -> None: |
| 478 | """_cp_heat returns 'high' for pairs with co_changes >= 20.""" |
| 479 | commits = [_cid() for _ in range(22)] |
| 480 | prev = None |
| 481 | for cid in commits: |
| 482 | await _seed_commit(db_session, repo.repo_id, cid, |
| 483 | [prev] if prev else []) |
| 484 | prev = cid |
| 485 | for cid in commits: |
| 486 | await _seed_history(db_session, repo.repo_id, cid, |
| 487 | ["src/a.py::fn", "src/b.py::fn"]) |
| 488 | await db_session.commit() |
| 489 | await _run(db_session, repo.repo_id, commits[-1]) |
| 490 | pairs = await _fetch(db_session, repo.repo_id) |
| 491 | assert pairs[0].co_changes >= 20 |
| 492 | assert _cp_heat(pairs[0].co_changes) == "high" |
| 493 | |
| 494 | @pytest.mark.asyncio |
| 495 | async def test_CP_25_bar_pct_100_for_top_pair( |
| 496 | self, db_session: AsyncSession, repo: MusehubRepo |
| 497 | ) -> None: |
| 498 | """Top pair always gets bar_pct=100 (it is the normalisation anchor).""" |
| 499 | commits = [_cid() for _ in range(5)] |
| 500 | prev = None |
| 501 | for cid in commits: |
| 502 | await _seed_commit(db_session, repo.repo_id, cid, |
| 503 | [prev] if prev else []) |
| 504 | prev = cid |
| 505 | for cid in commits: |
| 506 | await _seed_history(db_session, repo.repo_id, cid, |
| 507 | ["src/a.py::fn", "src/b.py::fn"]) |
| 508 | await db_session.commit() |
| 509 | await _run(db_session, repo.repo_id, commits[-1]) |
| 510 | pairs = await _fetch(db_session, repo.repo_id) |
| 511 | max_co = pairs[0].co_changes |
| 512 | bar_pct = round((pairs[0].co_changes / max_co) * 100) |
| 513 | assert bar_pct == 100 |
| 514 | |
| 515 | |
| 516 | # ───────────────────────────────────────────────────────────────────────────── |
| 517 | # Tier 4 — Performance: timing bounds |
| 518 | # ───────────────────────────────────────────────────────────────────────────── |
| 519 | |
| 520 | class TestCouplingPerformance: |
| 521 | |
| 522 | @pytest.mark.asyncio |
| 523 | async def test_CP_26_ten_commits_ten_files_under_500ms( |
| 524 | self, db_session: AsyncSession, repo: MusehubRepo |
| 525 | ) -> None: |
| 526 | """10 commits × 10 files completes in under 500 ms.""" |
| 527 | commits = [_cid() for _ in range(10)] |
| 528 | prev = None |
| 529 | for cid in commits: |
| 530 | await _seed_commit(db_session, repo.repo_id, cid, |
| 531 | [prev] if prev else []) |
| 532 | prev = cid |
| 533 | addrs = [f"src/f{i}.py::fn" for i in range(10)] |
| 534 | for cid in commits: |
| 535 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 536 | await db_session.commit() |
| 537 | t0 = time.monotonic() |
| 538 | await _run(db_session, repo.repo_id, commits[-1]) |
| 539 | assert time.monotonic() - t0 < 0.5 |
| 540 | |
| 541 | @pytest.mark.asyncio |
| 542 | async def test_CP_27_100_commits_20_files_under_2s( |
| 543 | self, db_session: AsyncSession, repo: MusehubRepo |
| 544 | ) -> None: |
| 545 | """100 commits × 20 files completes in under 2 s.""" |
| 546 | commits = [_cid() for _ in range(100)] |
| 547 | prev = None |
| 548 | for cid in commits: |
| 549 | await _seed_commit(db_session, repo.repo_id, cid, |
| 550 | [prev] if prev else []) |
| 551 | prev = cid |
| 552 | addrs = [f"src/f{i}.py::fn" for i in range(20)] |
| 553 | for cid in commits: |
| 554 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 555 | await db_session.commit() |
| 556 | t0 = time.monotonic() |
| 557 | await _run(db_session, repo.repo_id, commits[-1]) |
| 558 | assert time.monotonic() - t0 < 2.0 |
| 559 | |
| 560 | @pytest.mark.asyncio |
| 561 | async def test_CP_28_empty_repo_fast_path_under_50ms( |
| 562 | self, db_session: AsyncSession, repo: MusehubRepo |
| 563 | ) -> None: |
| 564 | """Empty repo fast-path exits under 50 ms.""" |
| 565 | t0 = time.monotonic() |
| 566 | await _run(db_session, repo.repo_id, _cid()) |
| 567 | assert time.monotonic() - t0 < 0.05 |
| 568 | |
| 569 | @pytest.mark.asyncio |
| 570 | async def test_CP_29_rerun_not_5x_slower( |
| 571 | self, db_session: AsyncSession, repo: MusehubRepo |
| 572 | ) -> None: |
| 573 | """Second run is not more than 5× slower than the first.""" |
| 574 | c1, c2 = _cid(), _cid() |
| 575 | await _seed_commit(db_session, repo.repo_id, c1) |
| 576 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 577 | for cid in [c1, c2]: |
| 578 | await _seed_history(db_session, repo.repo_id, cid, |
| 579 | ["src/a.py::fn", "src/b.py::fn"]) |
| 580 | await db_session.commit() |
| 581 | t1 = time.monotonic(); await _run(db_session, repo.repo_id, c2); d1 = time.monotonic() - t1 |
| 582 | t2 = time.monotonic(); await _run(db_session, repo.repo_id, c2); d2 = time.monotonic() - t2 |
| 583 | assert d2 < max(d1 * 5, 0.5) |
| 584 | |
| 585 | @pytest.mark.asyncio |
| 586 | async def test_CP_30_point_lookup_under_10ms( |
| 587 | self, db_session: AsyncSession, repo: MusehubRepo |
| 588 | ) -> None: |
| 589 | """Fetching pairs for a repo is sub-10 ms after the provider run.""" |
| 590 | c1, c2 = _cid(), _cid() |
| 591 | await _seed_commit(db_session, repo.repo_id, c1) |
| 592 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 593 | for cid in [c1, c2]: |
| 594 | await _seed_history(db_session, repo.repo_id, cid, |
| 595 | ["src/a.py::fn", "src/b.py::fn"]) |
| 596 | await db_session.commit() |
| 597 | await _run(db_session, repo.repo_id, c2) |
| 598 | t0 = time.monotonic() |
| 599 | await _fetch(db_session, repo.repo_id) |
| 600 | assert time.monotonic() - t0 < 0.01 |
| 601 | |
| 602 | @pytest.mark.asyncio |
| 603 | async def test_CP_31_200_pairs_query_fast( |
| 604 | self, db_session: AsyncSession, repo: MusehubRepo |
| 605 | ) -> None: |
| 606 | """Fetching full 200-pair leaderboard is sub-50 ms.""" |
| 607 | commits = [_cid() for _ in range(3)] |
| 608 | prev = None |
| 609 | for cid in commits: |
| 610 | await _seed_commit(db_session, repo.repo_id, cid, |
| 611 | [prev] if prev else []) |
| 612 | prev = cid |
| 613 | # 21 files → 210 pairs → stored as 200 (MAX_PAIRS) |
| 614 | addrs = [f"src/f{i}.py::fn" for i in range(21)] |
| 615 | for cid in commits: |
| 616 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 617 | await db_session.commit() |
| 618 | await _run(db_session, repo.repo_id, commits[-1]) |
| 619 | t0 = time.monotonic() |
| 620 | await _fetch(db_session, repo.repo_id) |
| 621 | assert time.monotonic() - t0 < 0.05 |
| 622 | |
| 623 | @pytest.mark.asyncio |
| 624 | async def test_CP_32_dashboard_preview_query_fast( |
| 625 | self, db_session: AsyncSession, repo: MusehubRepo |
| 626 | ) -> None: |
| 627 | """Dashboard preview (top 3, LIMIT query) completes under 20 ms.""" |
| 628 | commits = [_cid() for _ in range(3)] |
| 629 | prev = None |
| 630 | for cid in commits: |
| 631 | await _seed_commit(db_session, repo.repo_id, cid, |
| 632 | [prev] if prev else []) |
| 633 | prev = cid |
| 634 | addrs = [f"src/f{i}.py::fn" for i in range(6)] |
| 635 | for cid in commits: |
| 636 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 637 | await db_session.commit() |
| 638 | await _run(db_session, repo.repo_id, commits[-1]) |
| 639 | t0 = time.monotonic() |
| 640 | await db_session.execute( |
| 641 | sa.select(MusehubIntelCoupling) |
| 642 | .where(MusehubIntelCoupling.repo_id == repo.repo_id) |
| 643 | .order_by(sa.desc(MusehubIntelCoupling.co_changes)) |
| 644 | .limit(3) |
| 645 | ) |
| 646 | assert time.monotonic() - t0 < 0.02 |
| 647 | |
| 648 | |
| 649 | # ───────────────────────────────────────────────────────────────────────────── |
| 650 | # Tier 5 — State: idempotency, stale-row purge, incremental updates |
| 651 | # ───────────────────────────────────────────────────────────────────────────── |
| 652 | |
| 653 | class TestCouplingState: |
| 654 | |
| 655 | @pytest.mark.asyncio |
| 656 | async def test_CP_33_idempotent_two_runs( |
| 657 | self, db_session: AsyncSession, repo: MusehubRepo |
| 658 | ) -> None: |
| 659 | """Running the provider twice produces identical rows.""" |
| 660 | c1, c2 = _cid(), _cid() |
| 661 | await _seed_commit(db_session, repo.repo_id, c1) |
| 662 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 663 | for cid in [c1, c2]: |
| 664 | await _seed_history(db_session, repo.repo_id, cid, |
| 665 | ["src/a.py::fn", "src/b.py::fn"]) |
| 666 | await db_session.commit() |
| 667 | await _run(db_session, repo.repo_id, c2) |
| 668 | first = {(p.file_a, p.file_b, p.co_changes) |
| 669 | for p in await _fetch(db_session, repo.repo_id)} |
| 670 | await _run(db_session, repo.repo_id, c2) |
| 671 | second = {(p.file_a, p.file_b, p.co_changes) |
| 672 | for p in await _fetch(db_session, repo.repo_id)} |
| 673 | assert first == second |
| 674 | |
| 675 | @pytest.mark.asyncio |
| 676 | async def test_CP_34_stale_rows_purged_on_rerun( |
| 677 | self, db_session: AsyncSession, repo: MusehubRepo |
| 678 | ) -> None: |
| 679 | """Re-run deletes all old rows before inserting fresh set.""" |
| 680 | c1, c2 = _cid(), _cid() |
| 681 | await _seed_commit(db_session, repo.repo_id, c1) |
| 682 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 683 | for cid in [c1, c2]: |
| 684 | await _seed_history(db_session, repo.repo_id, cid, |
| 685 | ["src/a.py::fn", "src/b.py::fn"]) |
| 686 | await db_session.commit() |
| 687 | await _run(db_session, repo.repo_id, c2) |
| 688 | count_after_first = (await db_session.execute( |
| 689 | sa.select(sa.func.count()).select_from(MusehubIntelCoupling) |
| 690 | .where(MusehubIntelCoupling.repo_id == repo.repo_id) |
| 691 | )).scalar_one() |
| 692 | await _run(db_session, repo.repo_id, c2) |
| 693 | count_after_second = (await db_session.execute( |
| 694 | sa.select(sa.func.count()).select_from(MusehubIntelCoupling) |
| 695 | .where(MusehubIntelCoupling.repo_id == repo.repo_id) |
| 696 | )).scalar_one() |
| 697 | assert count_after_first == count_after_second |
| 698 | |
| 699 | @pytest.mark.asyncio |
| 700 | async def test_CP_35_incremental_new_pair_appears( |
| 701 | self, db_session: AsyncSession, repo: MusehubRepo |
| 702 | ) -> None: |
| 703 | """After adding commits, a new pair materialises on re-run.""" |
| 704 | c1, c2 = _cid(), _cid() |
| 705 | await _seed_commit(db_session, repo.repo_id, c1) |
| 706 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 707 | for cid in [c1, c2]: |
| 708 | await _seed_history(db_session, repo.repo_id, cid, |
| 709 | ["src/a.py::fn", "src/b.py::fn"]) |
| 710 | await db_session.commit() |
| 711 | await _run(db_session, repo.repo_id, c2) |
| 712 | before = len(await _fetch(db_session, repo.repo_id)) |
| 713 | |
| 714 | c3, c4 = _cid(), _cid() |
| 715 | await _seed_commit(db_session, repo.repo_id, c3, [c2]) |
| 716 | await _seed_commit(db_session, repo.repo_id, c4, [c3]) |
| 717 | for cid in [c3, c4]: |
| 718 | await _seed_history(db_session, repo.repo_id, cid, |
| 719 | ["src/c.py::fn", "src/d.py::fn"]) |
| 720 | await db_session.commit() |
| 721 | await _run(db_session, repo.repo_id, c4) |
| 722 | after = len(await _fetch(db_session, repo.repo_id)) |
| 723 | assert after > before |
| 724 | |
| 725 | @pytest.mark.asyncio |
| 726 | async def test_CP_36_no_duplicate_pairs_after_3_runs( |
| 727 | self, db_session: AsyncSession, repo: MusehubRepo |
| 728 | ) -> None: |
| 729 | """No duplicate (file_a, file_b) rows after 3 consecutive runs.""" |
| 730 | c1, c2 = _cid(), _cid() |
| 731 | await _seed_commit(db_session, repo.repo_id, c1) |
| 732 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 733 | for cid in [c1, c2]: |
| 734 | await _seed_history(db_session, repo.repo_id, cid, |
| 735 | ["src/a.py::fn", "src/b.py::fn"]) |
| 736 | await db_session.commit() |
| 737 | for _ in range(3): |
| 738 | await _run(db_session, repo.repo_id, c2) |
| 739 | pairs = await _fetch(db_session, repo.repo_id) |
| 740 | keys = [(p.file_a, p.file_b) for p in pairs] |
| 741 | assert len(keys) == len(set(keys)) |
| 742 | |
| 743 | @pytest.mark.asyncio |
| 744 | async def test_CP_37_co_changes_increases_with_new_commits( |
| 745 | self, db_session: AsyncSession, repo: MusehubRepo |
| 746 | ) -> None: |
| 747 | """co_changes increases when more co-change commits are added.""" |
| 748 | c1, c2 = _cid(), _cid() |
| 749 | await _seed_commit(db_session, repo.repo_id, c1) |
| 750 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 751 | for cid in [c1, c2]: |
| 752 | await _seed_history(db_session, repo.repo_id, cid, |
| 753 | ["src/a.py::fn", "src/b.py::fn"]) |
| 754 | await db_session.commit() |
| 755 | await _run(db_session, repo.repo_id, c2) |
| 756 | before = (await _fetch(db_session, repo.repo_id))[0].co_changes |
| 757 | |
| 758 | c3 = _cid() |
| 759 | await _seed_commit(db_session, repo.repo_id, c3, [c2]) |
| 760 | await _seed_history(db_session, repo.repo_id, c3, |
| 761 | ["src/a.py::fn", "src/b.py::fn"]) |
| 762 | await db_session.commit() |
| 763 | await _run(db_session, repo.repo_id, c3) |
| 764 | after = (await _fetch(db_session, repo.repo_id))[0].co_changes |
| 765 | assert after > before |
| 766 | |
| 767 | @pytest.mark.asyncio |
| 768 | async def test_CP_38_truncated_false_when_under_cap( |
| 769 | self, db_session: AsyncSession, repo: MusehubRepo |
| 770 | ) -> None: |
| 771 | """truncated=False when pair count is within MAX_PAIRS.""" |
| 772 | c1, c2 = _cid(), _cid() |
| 773 | await _seed_commit(db_session, repo.repo_id, c1) |
| 774 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 775 | for cid in [c1, c2]: |
| 776 | await _seed_history(db_session, repo.repo_id, cid, |
| 777 | ["src/a.py::fn", "src/b.py::fn"]) |
| 778 | await db_session.commit() |
| 779 | result = await _run(db_session, repo.repo_id, c2) |
| 780 | key, payload = result[0] |
| 781 | assert payload["truncated"] is False |
| 782 | |
| 783 | |
| 784 | # ───────────────────────────────────────────────────────────────────────────── |
| 785 | # Tier 6 — Security: injection, isolation, unicode |
| 786 | # ───────────────────────────────────────────────────────────────────────────── |
| 787 | |
| 788 | class TestCouplingSecurity: |
| 789 | |
| 790 | @pytest.mark.asyncio |
| 791 | async def test_CP_39_sql_injection_stored_verbatim( |
| 792 | self, db_session: AsyncSession, repo: MusehubRepo |
| 793 | ) -> None: |
| 794 | """SQL injection in file path stored as-is; table survives.""" |
| 795 | inject = "src/a.py::fn'; DROP TABLE musehub_intel_coupling; --" |
| 796 | c1, c2 = _cid(), _cid() |
| 797 | await _seed_commit(db_session, repo.repo_id, c1) |
| 798 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 799 | for cid in [c1, c2]: |
| 800 | await _seed_history(db_session, repo.repo_id, cid, |
| 801 | [inject, "src/b.py::fn"]) |
| 802 | await db_session.commit() |
| 803 | await _run(db_session, repo.repo_id, c2) |
| 804 | pairs = await _fetch(db_session, repo.repo_id) |
| 805 | assert isinstance(pairs, list) |
| 806 | |
| 807 | @pytest.mark.asyncio |
| 808 | async def test_CP_40_xss_payload_stored_safely( |
| 809 | self, db_session: AsyncSession, repo: MusehubRepo |
| 810 | ) -> None: |
| 811 | """XSS payload in file path stored without execution.""" |
| 812 | xss = "src/<script>alert(1)</script>.py::fn" |
| 813 | c1, c2 = _cid(), _cid() |
| 814 | await _seed_commit(db_session, repo.repo_id, c1) |
| 815 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 816 | for cid in [c1, c2]: |
| 817 | await _seed_history(db_session, repo.repo_id, cid, |
| 818 | [xss, "src/b.py::fn"]) |
| 819 | await db_session.commit() |
| 820 | await _run(db_session, repo.repo_id, c2) |
| 821 | pairs = await _fetch(db_session, repo.repo_id) |
| 822 | assert isinstance(pairs, list) |
| 823 | |
| 824 | @pytest.mark.asyncio |
| 825 | async def test_CP_41_repo_isolation_strict( |
| 826 | self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo] |
| 827 | ) -> None: |
| 828 | """Pairs from repo A are never visible when querying repo B.""" |
| 829 | r1, r2 = two_repos |
| 830 | c1, c2 = _cid(), _cid() |
| 831 | await _seed_commit(db_session, r1.repo_id, c1) |
| 832 | await _seed_commit(db_session, r1.repo_id, c2, [c1]) |
| 833 | for cid in [c1, c2]: |
| 834 | await _seed_history(db_session, r1.repo_id, cid, |
| 835 | ["src/a.py::fn", "src/b.py::fn"]) |
| 836 | await db_session.commit() |
| 837 | await _run(db_session, r1.repo_id, c2) |
| 838 | assert await _fetch(db_session, r2.repo_id) == [] |
| 839 | |
| 840 | @pytest.mark.asyncio |
| 841 | async def test_CP_42_two_repos_independent_pairs( |
| 842 | self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo] |
| 843 | ) -> None: |
| 844 | """Two repos each produce their own independent pair sets.""" |
| 845 | r1, r2 = two_repos |
| 846 | for repo in [r1, r2]: |
| 847 | c1, c2 = _cid(), _cid() |
| 848 | await _seed_commit(db_session, repo.repo_id, c1) |
| 849 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 850 | for cid in [c1, c2]: |
| 851 | await _seed_history(db_session, repo.repo_id, cid, |
| 852 | ["src/a.py::fn", "src/b.py::fn"]) |
| 853 | await db_session.commit() |
| 854 | await _run(db_session, repo.repo_id, c2) |
| 855 | p1 = await _fetch(db_session, r1.repo_id) |
| 856 | p2 = await _fetch(db_session, r2.repo_id) |
| 857 | assert len(p1) == 1 and p1[0].repo_id == r1.repo_id |
| 858 | assert len(p2) == 1 and p2[0].repo_id == r2.repo_id |
| 859 | |
| 860 | @pytest.mark.asyncio |
| 861 | async def test_CP_43_rerun_updates_ref_column( |
| 862 | self, db_session: AsyncSession, repo: MusehubRepo |
| 863 | ) -> None: |
| 864 | """Re-run for a new ref updates the ref column on all rows.""" |
| 865 | c1, c2, c3 = _cid(), _cid(), _cid() |
| 866 | await _seed_commit(db_session, repo.repo_id, c1) |
| 867 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 868 | await _seed_commit(db_session, repo.repo_id, c3, [c2]) |
| 869 | for cid in [c1, c2, c3]: |
| 870 | await _seed_history(db_session, repo.repo_id, cid, |
| 871 | ["src/a.py::fn", "src/b.py::fn"]) |
| 872 | await db_session.commit() |
| 873 | await _run(db_session, repo.repo_id, c2) |
| 874 | await _run(db_session, repo.repo_id, c3) |
| 875 | pairs = await _fetch(db_session, repo.repo_id) |
| 876 | assert all(p.ref == c3 for p in pairs) |
| 877 | |
| 878 | @pytest.mark.asyncio |
| 879 | async def test_CP_44_unicode_in_path_handled( |
| 880 | self, db_session: AsyncSession, repo: MusehubRepo |
| 881 | ) -> None: |
| 882 | """Unicode characters in file paths do not crash the provider.""" |
| 883 | c1, c2 = _cid(), _cid() |
| 884 | await _seed_commit(db_session, repo.repo_id, c1) |
| 885 | await _seed_commit(db_session, repo.repo_id, c2, [c1]) |
| 886 | for cid in [c1, c2]: |
| 887 | await _seed_history(db_session, repo.repo_id, cid, |
| 888 | ["src/música.py::canción", "src/b.py::fn"]) |
| 889 | await db_session.commit() |
| 890 | await _run(db_session, repo.repo_id, c2) |
| 891 | assert isinstance(await _fetch(db_session, repo.repo_id), list) |
| 892 | |
| 893 | |
| 894 | # ───────────────────────────────────────────────────────────────────────────── |
| 895 | # Tier 7 — Stress: MAX_PAIRS cap, mass-commit exclusion, BFS cap |
| 896 | # ───────────────────────────────────────────────────────────────────────────── |
| 897 | |
| 898 | class TestCouplingStress: |
| 899 | |
| 900 | @pytest.mark.asyncio |
| 901 | async def test_CP_45_max_pairs_cap_respected( |
| 902 | self, db_session: AsyncSession, repo: MusehubRepo |
| 903 | ) -> None: |
| 904 | """Stored pair count never exceeds MAX_PAIRS.""" |
| 905 | provider = CouplingProvider() |
| 906 | commits = [_cid() for _ in range(3)] |
| 907 | prev = None |
| 908 | for cid in commits: |
| 909 | await _seed_commit(db_session, repo.repo_id, cid, |
| 910 | [prev] if prev else []) |
| 911 | prev = cid |
| 912 | # 21 files → 210 pairs; exceeds MAX_PAIRS=200 |
| 913 | addrs = [f"src/file_{i}.py::fn" for i in range(21)] |
| 914 | for cid in commits: |
| 915 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 916 | await db_session.commit() |
| 917 | await _run(db_session, repo.repo_id, commits[-1]) |
| 918 | pairs = await _fetch(db_session, repo.repo_id) |
| 919 | assert len(pairs) <= provider._MAX_PAIRS |
| 920 | |
| 921 | @pytest.mark.asyncio |
| 922 | async def test_CP_46_mass_commit_excluded( |
| 923 | self, db_session: AsyncSession, repo: MusehubRepo |
| 924 | ) -> None: |
| 925 | """Commits touching > MAX_FILES_PER_COMMIT files are skipped.""" |
| 926 | provider = CouplingProvider() |
| 927 | c_good1, c_good2, c_mass = _cid(), _cid(), _cid() |
| 928 | await _seed_commit(db_session, repo.repo_id, c_good1) |
| 929 | await _seed_commit(db_session, repo.repo_id, c_good2, [c_good1]) |
| 930 | await _seed_commit(db_session, repo.repo_id, c_mass, [c_good2]) |
| 931 | for cid in [c_good1, c_good2]: |
| 932 | await _seed_history(db_session, repo.repo_id, cid, |
| 933 | ["src/a.py::fn", "src/b.py::fn"]) |
| 934 | # Mass commit: 250 distinct files |
| 935 | big_addrs = [f"src/gen_{i}.py::fn" |
| 936 | for i in range(provider._MAX_FILES_PER_COMMIT + 50)] |
| 937 | await _seed_history(db_session, repo.repo_id, c_mass, big_addrs) |
| 938 | await db_session.commit() |
| 939 | await _run(db_session, repo.repo_id, c_mass) |
| 940 | pairs = await _fetch(db_session, repo.repo_id) |
| 941 | # The A↔B pair from good commits must still be present |
| 942 | assert any( |
| 943 | "src/a.py" in (p.file_a, p.file_b) for p in pairs |
| 944 | ) |
| 945 | |
| 946 | @pytest.mark.asyncio |
| 947 | async def test_CP_47_500_commits_completes( |
| 948 | self, db_session: AsyncSession, repo: MusehubRepo |
| 949 | ) -> None: |
| 950 | """500 commits × 5 files completes without error.""" |
| 951 | commits = [_cid() for _ in range(500)] |
| 952 | prev = None |
| 953 | for cid in commits: |
| 954 | await _seed_commit(db_session, repo.repo_id, cid, |
| 955 | [prev] if prev else []) |
| 956 | prev = cid |
| 957 | addrs = [f"src/f{i}.py::fn" for i in range(5)] |
| 958 | for cid in commits: |
| 959 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 960 | await db_session.commit() |
| 961 | result = await _run(db_session, repo.repo_id, commits[-1]) |
| 962 | assert result |
| 963 | |
| 964 | @pytest.mark.asyncio |
| 965 | async def test_CP_48_result_count_matches_stored( |
| 966 | self, db_session: AsyncSession, repo: MusehubRepo |
| 967 | ) -> None: |
| 968 | """metadata 'count' always equals len(stored rows).""" |
| 969 | commits = [_cid() for _ in range(4)] |
| 970 | prev = None |
| 971 | for cid in commits: |
| 972 | await _seed_commit(db_session, repo.repo_id, cid, |
| 973 | [prev] if prev else []) |
| 974 | prev = cid |
| 975 | addrs = [f"src/f{i}.py::fn" for i in range(6)] |
| 976 | for cid in commits: |
| 977 | await _seed_history(db_session, repo.repo_id, cid, addrs) |
| 978 | await db_session.commit() |
| 979 | result = await _run(db_session, repo.repo_id, commits[-1]) |
| 980 | key, payload = result[0] |
| 981 | stored = await _fetch(db_session, repo.repo_id) |
| 982 | assert payload["count"] == len(stored) |
| 983 | |
| 984 | @pytest.mark.asyncio |
| 985 | async def test_CP_49_bfs_walk_cap( |
| 986 | self, db_session: AsyncSession, repo: MusehubRepo |
| 987 | ) -> None: |
| 988 | """commits_analysed never exceeds MAX_WALK.""" |
| 989 | provider = CouplingProvider() |
| 990 | commits = [_cid() for _ in range(50)] |
| 991 | prev = None |
| 992 | for cid in commits: |
| 993 | await _seed_commit(db_session, repo.repo_id, cid, |
| 994 | [prev] if prev else []) |
| 995 | prev = cid |
| 996 | await _seed_history(db_session, repo.repo_id, commits[0], |
| 997 | ["src/a.py::fn", "src/b.py::fn"]) |
| 998 | await db_session.commit() |
| 999 | result = await _run(db_session, repo.repo_id, commits[-1]) |
| 1000 | if result: |
| 1001 | key, payload = result[0] |
| 1002 | assert payload["commits_analysed"] <= provider._MAX_WALK |
| 1003 | |
| 1004 | |
| 1005 | # ───────────────────────────────────────────────────────────────────────────── |
| 1006 | # Helpers — _cp_short correctness |
| 1007 | # ───────────────────────────────────────────────────────────────────────────── |
| 1008 | |
| 1009 | class TestCpShort: |
| 1010 | """Unit tests for the _cp_short display helper.""" |
| 1011 | |
| 1012 | def test_deep_path_truncated_to_two_parts(self) -> None: |
| 1013 | assert _cp_short("musehub/services/musehub_wire.py") == "services/musehub_wire.py" |
| 1014 | |
| 1015 | def test_single_component_unchanged(self) -> None: |
| 1016 | assert _cp_short("musehub_wire.py") == "musehub_wire.py" |
| 1017 | |
| 1018 | def test_two_components_unchanged(self) -> None: |
| 1019 | assert _cp_short("services/musehub_wire.py") == "services/musehub_wire.py" |
| 1020 | |
| 1021 | def test_very_deep_path(self) -> None: |
| 1022 | assert _cp_short("a/b/c/d/e.py") == "d/e.py" |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
20 days ago