test_intel_fidelity.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
20 days ago
| 1 | """Fidelity tests — providers and routes must match CLI data shape. |
| 2 | |
| 3 | Verifies three gaps found in the CLI-vs-DB sweep: |
| 4 | |
| 5 | 1. StableProvider — days_stable must be calendar days, not commit-walk index |
| 6 | 2. EntangleProvider — co_change_rate must use Jaccard (co / |union|), not min |
| 7 | 3. Hotspots route — must read MusehubSymbolIntel, not the legacy snapshot blob |
| 8 | |
| 9 | Cases: |
| 10 | F01 StableProvider: symbol untouched for 30 calendar days → days_stable ≈ 30 |
| 11 | F02 StableProvider: symbol changed today → days_stable = 0 |
| 12 | F03 EntangleProvider: rate = co_changes / |union(commits_a, commits_b)| |
| 13 | F04 EntangleProvider: commits_both_active stores union cardinality |
| 14 | F05 Hotspots route: 200 with no legacy snapshot row (no longer depends on it) |
| 15 | F06 Hotspots route: symbols ranked by churn_30d descending |
| 16 | F07 Hotspots route: address and change_count present in HTML body |
| 17 | """ |
| 18 | from __future__ import annotations |
| 19 | |
| 20 | from datetime import datetime, timedelta, timezone |
| 21 | |
| 22 | import pytest |
| 23 | import pytest_asyncio |
| 24 | import sqlalchemy as sa |
| 25 | from httpx import AsyncClient |
| 26 | from sqlalchemy.dialects.postgresql import insert as pg_insert |
| 27 | from sqlalchemy.ext.asyncio import AsyncSession |
| 28 | |
| 29 | from musehub.db.musehub_intel_models import MusehubIntelEntangle, MusehubIntelStable, MusehubSymbolHistoryEntry, MusehubSymbolIntel |
| 30 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo |
| 31 | from musehub.services.musehub_intel_providers import EntangleProvider, StableProvider |
| 32 | from tests.factories import create_repo |
| 33 | |
| 34 | _NOW = datetime.now(tz=timezone.utc) |
| 35 | |
| 36 | |
| 37 | # --------------------------------------------------------------------------- |
| 38 | # Shared helpers |
| 39 | # --------------------------------------------------------------------------- |
| 40 | |
| 41 | def _ts(days_ago: int) -> datetime: |
| 42 | return _NOW - timedelta(days=days_ago) |
| 43 | |
| 44 | |
| 45 | async def _insert_symbol_intel( |
| 46 | session: AsyncSession, |
| 47 | repo_id: str, |
| 48 | address: str, |
| 49 | churn_30d: int = 0, |
| 50 | last_changed: datetime | None = None, |
| 51 | ) -> None: |
| 52 | await session.execute( |
| 53 | pg_insert(MusehubSymbolIntel) |
| 54 | .values( |
| 55 | repo_id=repo_id, |
| 56 | address=address, |
| 57 | churn=churn_30d, |
| 58 | churn_30d=churn_30d, |
| 59 | churn_90d=0, |
| 60 | blast=0, |
| 61 | blast_direct=0, |
| 62 | blast_cross=0, |
| 63 | blast_top=[], |
| 64 | last_changed=last_changed, |
| 65 | author_count=1, |
| 66 | gravity=0.0, |
| 67 | weekly=[], |
| 68 | ) |
| 69 | .on_conflict_do_update( |
| 70 | index_elements=["repo_id", "address"], |
| 71 | set_={"churn_30d": churn_30d, "last_changed": last_changed}, |
| 72 | ) |
| 73 | ) |
| 74 | |
| 75 | |
| 76 | async def _insert_history_entry( |
| 77 | session: AsyncSession, |
| 78 | repo_id: str, |
| 79 | address: str, |
| 80 | commit_id: str, |
| 81 | committed_at: datetime, |
| 82 | op: str = "modify", |
| 83 | ) -> None: |
| 84 | await session.execute( |
| 85 | pg_insert(MusehubSymbolHistoryEntry) |
| 86 | .values( |
| 87 | repo_id=repo_id, |
| 88 | address=address, |
| 89 | commit_id=commit_id, |
| 90 | committed_at=committed_at, |
| 91 | op=op, |
| 92 | ) |
| 93 | .on_conflict_do_nothing() |
| 94 | ) |
| 95 | |
| 96 | |
| 97 | # --------------------------------------------------------------------------- |
| 98 | # F01 / F02 — StableProvider: calendar days, not commit-walk index |
| 99 | # --------------------------------------------------------------------------- |
| 100 | |
| 101 | @pytest_asyncio.fixture |
| 102 | async def stable_repo(db_session: AsyncSession) -> tuple[MusehubRepo, str]: |
| 103 | """Repo with two commits (today and 30 days ago) and two symbols.""" |
| 104 | from muse.core.types import blob_id |
| 105 | |
| 106 | repo = await create_repo(db_session, owner="fid", slug="stable-fid") |
| 107 | repo_id = str(repo.repo_id) |
| 108 | |
| 109 | c_old_id = blob_id(b"commit-30d") |
| 110 | c_new_id = blob_id(b"commit-today") |
| 111 | |
| 112 | # Chain: today's commit's parent is the 30-day-old commit |
| 113 | c_old = MusehubCommit( |
| 114 | commit_id=c_old_id, |
| 115 | message="old", |
| 116 | author="a", |
| 117 | branch="main", |
| 118 | parent_ids=[], |
| 119 | timestamp=_ts(30), |
| 120 | ) |
| 121 | c_new = MusehubCommit( |
| 122 | commit_id=c_new_id, |
| 123 | message="new", |
| 124 | author="a", |
| 125 | branch="main", |
| 126 | parent_ids=[c_old_id], |
| 127 | timestamp=_ts(0), |
| 128 | ) |
| 129 | db_session.add_all([c_old, c_new]) |
| 130 | await db_session.flush() |
| 131 | db_session.add_all([ |
| 132 | MusehubCommitRef(repo_id=repo_id, commit_id=c_old_id), |
| 133 | MusehubCommitRef(repo_id=repo_id, commit_id=c_new_id), |
| 134 | ]) |
| 135 | |
| 136 | # symbol_a was last touched today; symbol_b was last touched 30 days ago |
| 137 | await db_session.flush() |
| 138 | await _insert_history_entry(db_session, repo_id, "src/a.py::fn_a", c_new_id, _ts(0)) |
| 139 | await _insert_history_entry(db_session, repo_id, "src/b.py::fn_b", c_old_id, _ts(30)) |
| 140 | |
| 141 | # Both symbols must exist in MusehubSymbolIntel (provider reads current symbols from here) |
| 142 | await _insert_symbol_intel(db_session, repo_id, "src/a.py::fn_a", last_changed=_ts(0)) |
| 143 | await _insert_symbol_intel(db_session, repo_id, "src/b.py::fn_b", last_changed=_ts(30)) |
| 144 | |
| 145 | await db_session.commit() |
| 146 | return repo, c_new_id |
| 147 | |
| 148 | |
| 149 | class TestStableCalendarDays: |
| 150 | |
| 151 | @pytest.mark.asyncio |
| 152 | async def test_F01_symbol_30d_stale_has_days_stable_approx_30( |
| 153 | self, db_session: AsyncSession, stable_repo: tuple[MusehubRepo, str] |
| 154 | ) -> None: |
| 155 | """Symbol last touched 30 calendar days ago → days_stable ≈ 30, not 1.""" |
| 156 | repo, head = stable_repo |
| 157 | provider = StableProvider() |
| 158 | await provider.compute(db_session, str(repo.repo_id), head, {}) |
| 159 | |
| 160 | row = await db_session.scalar( |
| 161 | sa.select(MusehubIntelStable).where( |
| 162 | MusehubIntelStable.repo_id == str(repo.repo_id), |
| 163 | MusehubIntelStable.address == "src/b.py::fn_b", |
| 164 | ) |
| 165 | ) |
| 166 | assert row is not None |
| 167 | # Must be close to 30 calendar days — definitely not the commit index (1) |
| 168 | assert row.days_stable >= 28, f"Expected ~30, got {row.days_stable}" |
| 169 | assert row.days_stable <= 32, f"Expected ~30, got {row.days_stable}" |
| 170 | |
| 171 | @pytest.mark.asyncio |
| 172 | async def test_F02_symbol_changed_today_has_days_stable_zero( |
| 173 | self, db_session: AsyncSession, stable_repo: tuple[MusehubRepo, str] |
| 174 | ) -> None: |
| 175 | """Symbol changed today → days_stable = 0, not the commit index.""" |
| 176 | repo, head = stable_repo |
| 177 | provider = StableProvider() |
| 178 | await provider.compute(db_session, str(repo.repo_id), head, {}) |
| 179 | |
| 180 | row = await db_session.scalar( |
| 181 | sa.select(MusehubIntelStable).where( |
| 182 | MusehubIntelStable.repo_id == str(repo.repo_id), |
| 183 | MusehubIntelStable.address == "src/a.py::fn_a", |
| 184 | ) |
| 185 | ) |
| 186 | assert row is not None |
| 187 | assert row.days_stable == 0, f"Expected 0, got {row.days_stable}" |
| 188 | |
| 189 | |
| 190 | # --------------------------------------------------------------------------- |
| 191 | # F03 / F04 — EntangleProvider: Jaccard co_change_rate |
| 192 | # --------------------------------------------------------------------------- |
| 193 | |
| 194 | @pytest_asyncio.fixture |
| 195 | async def entangle_repo(db_session: AsyncSession) -> tuple[MusehubRepo, str]: |
| 196 | """Repo whose commit graph gives a clear Jaccard vs min distinction. |
| 197 | |
| 198 | symbol_a touched in: c1, c2, c3, c4, c5 → 5 commits |
| 199 | symbol_b touched in: c3, c4, c5, c6, c7 → 5 commits |
| 200 | co_changes = 3 (c3, c4, c5) |
| 201 | union = 7 (c1..c7) |
| 202 | Jaccard rate = 3/7 ≈ 0.4286 |
| 203 | min rate = 3/5 = 0.6 (the wrong answer) |
| 204 | """ |
| 205 | from muse.core.types import blob_id |
| 206 | |
| 207 | repo = await create_repo(db_session, owner="fid", slug="entangle-fid") |
| 208 | repo_id = str(repo.repo_id) |
| 209 | |
| 210 | # Build a linear chain c1 → c2 → … → c7 (c7 = HEAD) |
| 211 | commit_ids = [blob_id(f"entangle-c{i}".encode()) for i in range(1, 8)] |
| 212 | for i, cid in enumerate(commit_ids): |
| 213 | parent = [commit_ids[i - 1]] if i > 0 else [] |
| 214 | db_session.add(MusehubCommit( |
| 215 | commit_id=cid, |
| 216 | message=f"c{i+1}", |
| 217 | author="a", |
| 218 | branch="main", |
| 219 | parent_ids=parent, |
| 220 | timestamp=_ts(7 - i), |
| 221 | )) |
| 222 | await db_session.flush() |
| 223 | db_session.add_all([ |
| 224 | MusehubCommitRef(repo_id=repo_id, commit_id=cid) for cid in commit_ids |
| 225 | ]) |
| 226 | await db_session.flush() |
| 227 | |
| 228 | # symbol_a in c1–c5, symbol_b in c3–c7 |
| 229 | sym_a = "src/a.py::fn_a" |
| 230 | sym_b = "src/b.py::fn_b" |
| 231 | ts = _ts(1) |
| 232 | |
| 233 | for cid in commit_ids[:5]: # c1-c5 → symbol_a |
| 234 | await _insert_history_entry(db_session, repo_id, sym_a, cid, ts) |
| 235 | for cid in commit_ids[2:]: # c3-c7 → symbol_b |
| 236 | await _insert_history_entry(db_session, repo_id, sym_b, cid, ts) |
| 237 | |
| 238 | await db_session.commit() |
| 239 | return repo, commit_ids[-1] # HEAD = c7 |
| 240 | |
| 241 | |
| 242 | class TestEntangleJaccard: |
| 243 | |
| 244 | @pytest.mark.asyncio |
| 245 | async def test_F03_co_change_rate_is_jaccard( |
| 246 | self, db_session: AsyncSession, entangle_repo: tuple[MusehubRepo, str] |
| 247 | ) -> None: |
| 248 | """co_change_rate = co_changes / |union| (Jaccard), not co / min.""" |
| 249 | repo, head = entangle_repo |
| 250 | provider = EntangleProvider() |
| 251 | await provider.compute(db_session, str(repo.repo_id), head, {}) |
| 252 | |
| 253 | row = await db_session.scalar( |
| 254 | sa.select(MusehubIntelEntangle).where( |
| 255 | MusehubIntelEntangle.repo_id == str(repo.repo_id), |
| 256 | ) |
| 257 | ) |
| 258 | assert row is not None, "Expected one entangle pair to be stored" |
| 259 | |
| 260 | expected_jaccard = 3 / 7 |
| 261 | expected_min_rate = 3 / 5 |
| 262 | |
| 263 | assert abs(row.co_change_rate - expected_jaccard) < 0.001, ( |
| 264 | f"Rate {row.co_change_rate:.4f} looks like min ({expected_min_rate}) " |
| 265 | f"not Jaccard ({expected_jaccard:.4f})" |
| 266 | ) |
| 267 | |
| 268 | @pytest.mark.asyncio |
| 269 | async def test_F04_commits_both_active_is_union_cardinality( |
| 270 | self, db_session: AsyncSession, entangle_repo: tuple[MusehubRepo, str] |
| 271 | ) -> None: |
| 272 | """commits_both_active stores |union(commits_a, commits_b)| = 7.""" |
| 273 | repo, head = entangle_repo |
| 274 | provider = EntangleProvider() |
| 275 | await provider.compute(db_session, str(repo.repo_id), head, {}) |
| 276 | |
| 277 | row = await db_session.scalar( |
| 278 | sa.select(MusehubIntelEntangle).where( |
| 279 | MusehubIntelEntangle.repo_id == str(repo.repo_id), |
| 280 | ) |
| 281 | ) |
| 282 | assert row is not None |
| 283 | assert row.commits_both_active == 7, ( |
| 284 | f"Expected union cardinality 7, got {row.commits_both_active}" |
| 285 | ) |
| 286 | |
| 287 | |
| 288 | # --------------------------------------------------------------------------- |
| 289 | # F05 / F06 / F07 — Hotspots route: reads MusehubSymbolIntel, not snapshot |
| 290 | # --------------------------------------------------------------------------- |
| 291 | |
| 292 | @pytest_asyncio.fixture |
| 293 | async def hotspots_repo(db_session: AsyncSession) -> MusehubRepo: |
| 294 | """Repo with symbol intel rows but NO legacy snapshot.""" |
| 295 | repo = await create_repo(db_session, owner="fid", slug="hotspots-fid") |
| 296 | repo_id = str(repo.repo_id) |
| 297 | |
| 298 | # Three symbols with different churn_30d values |
| 299 | for addr, churn in [ |
| 300 | ("src/hot.py::fn_hot", 42), |
| 301 | ("src/med.py::fn_med", 15), |
| 302 | ("src/cold.py::fn_cold", 3), |
| 303 | ]: |
| 304 | await _insert_symbol_intel(db_session, repo_id, addr, churn_30d=churn) |
| 305 | |
| 306 | await db_session.commit() |
| 307 | return repo |
| 308 | |
| 309 | |
| 310 | class TestHotspotsRoute: |
| 311 | |
| 312 | @pytest.mark.asyncio |
| 313 | async def test_F05_hotspots_returns_200_without_legacy_snapshot( |
| 314 | self, client: AsyncClient, hotspots_repo: MusehubRepo |
| 315 | ) -> None: |
| 316 | """Route must not 500 when there is no legacy snapshot row.""" |
| 317 | r = await client.get("/fid/hotspots-fid/intel/hotspots") |
| 318 | assert r.status_code == 200 |
| 319 | |
| 320 | @pytest.mark.asyncio |
| 321 | async def test_F06_hotspots_ranked_by_churn_30d_descending( |
| 322 | self, client: AsyncClient, hotspots_repo: MusehubRepo |
| 323 | ) -> None: |
| 324 | """Symbols appear highest-churn first (42, 15, 3).""" |
| 325 | r = await client.get("/fid/hotspots-fid/intel/hotspots") |
| 326 | assert r.status_code == 200 |
| 327 | body = r.text |
| 328 | pos_hot = body.find("fn_hot") |
| 329 | pos_med = body.find("fn_med") |
| 330 | pos_cold = body.find("fn_cold") |
| 331 | assert pos_hot != -1 and pos_med != -1 and pos_cold != -1, ( |
| 332 | "Not all symbols found in response" |
| 333 | ) |
| 334 | assert pos_hot < pos_med < pos_cold, ( |
| 335 | "Symbols not in churn-descending order" |
| 336 | ) |
| 337 | |
| 338 | @pytest.mark.asyncio |
| 339 | async def test_F07_hotspots_renders_address_and_change_count( |
| 340 | self, client: AsyncClient, hotspots_repo: MusehubRepo |
| 341 | ) -> None: |
| 342 | """Address and change count appear in the rendered HTML.""" |
| 343 | r = await client.get("/fid/hotspots-fid/intel/hotspots") |
| 344 | assert r.status_code == 200 |
| 345 | assert "src/hot.py::fn_hot" in r.text |
| 346 | assert "42" in r.text |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
20 days ago