gabriel / musehub public
test_intel_fidelity.py python
346 lines 12.4 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 20 days ago
1 """Fidelity tests — providers and routes must match CLI data shape.
2
3 Verifies three gaps found in the CLI-vs-DB sweep:
4
5 1. StableProvider — days_stable must be calendar days, not commit-walk index
6 2. EntangleProvider — co_change_rate must use Jaccard (co / |union|), not min
7 3. Hotspots route — must read MusehubSymbolIntel, not the legacy snapshot blob
8
9 Cases:
10 F01 StableProvider: symbol untouched for 30 calendar days → days_stable ≈ 30
11 F02 StableProvider: symbol changed today → days_stable = 0
12 F03 EntangleProvider: rate = co_changes / |union(commits_a, commits_b)|
13 F04 EntangleProvider: commits_both_active stores union cardinality
14 F05 Hotspots route: 200 with no legacy snapshot row (no longer depends on it)
15 F06 Hotspots route: symbols ranked by churn_30d descending
16 F07 Hotspots route: address and change_count present in HTML body
17 """
18 from __future__ import annotations
19
20 from datetime import datetime, timedelta, timezone
21
22 import pytest
23 import pytest_asyncio
24 import sqlalchemy as sa
25 from httpx import AsyncClient
26 from sqlalchemy.dialects.postgresql import insert as pg_insert
27 from sqlalchemy.ext.asyncio import AsyncSession
28
29 from musehub.db.musehub_intel_models import MusehubIntelEntangle, MusehubIntelStable, MusehubSymbolHistoryEntry, MusehubSymbolIntel
30 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo
31 from musehub.services.musehub_intel_providers import EntangleProvider, StableProvider
32 from tests.factories import create_repo
33
34 _NOW = datetime.now(tz=timezone.utc)
35
36
37 # ---------------------------------------------------------------------------
38 # Shared helpers
39 # ---------------------------------------------------------------------------
40
41 def _ts(days_ago: int) -> datetime:
42 return _NOW - timedelta(days=days_ago)
43
44
45 async def _insert_symbol_intel(
46 session: AsyncSession,
47 repo_id: str,
48 address: str,
49 churn_30d: int = 0,
50 last_changed: datetime | None = None,
51 ) -> None:
52 await session.execute(
53 pg_insert(MusehubSymbolIntel)
54 .values(
55 repo_id=repo_id,
56 address=address,
57 churn=churn_30d,
58 churn_30d=churn_30d,
59 churn_90d=0,
60 blast=0,
61 blast_direct=0,
62 blast_cross=0,
63 blast_top=[],
64 last_changed=last_changed,
65 author_count=1,
66 gravity=0.0,
67 weekly=[],
68 )
69 .on_conflict_do_update(
70 index_elements=["repo_id", "address"],
71 set_={"churn_30d": churn_30d, "last_changed": last_changed},
72 )
73 )
74
75
76 async def _insert_history_entry(
77 session: AsyncSession,
78 repo_id: str,
79 address: str,
80 commit_id: str,
81 committed_at: datetime,
82 op: str = "modify",
83 ) -> None:
84 await session.execute(
85 pg_insert(MusehubSymbolHistoryEntry)
86 .values(
87 repo_id=repo_id,
88 address=address,
89 commit_id=commit_id,
90 committed_at=committed_at,
91 op=op,
92 )
93 .on_conflict_do_nothing()
94 )
95
96
97 # ---------------------------------------------------------------------------
98 # F01 / F02 — StableProvider: calendar days, not commit-walk index
99 # ---------------------------------------------------------------------------
100
101 @pytest_asyncio.fixture
102 async def stable_repo(db_session: AsyncSession) -> tuple[MusehubRepo, str]:
103 """Repo with two commits (today and 30 days ago) and two symbols."""
104 from muse.core.types import blob_id
105
106 repo = await create_repo(db_session, owner="fid", slug="stable-fid")
107 repo_id = str(repo.repo_id)
108
109 c_old_id = blob_id(b"commit-30d")
110 c_new_id = blob_id(b"commit-today")
111
112 # Chain: today's commit's parent is the 30-day-old commit
113 c_old = MusehubCommit(
114 commit_id=c_old_id,
115 message="old",
116 author="a",
117 branch="main",
118 parent_ids=[],
119 timestamp=_ts(30),
120 )
121 c_new = MusehubCommit(
122 commit_id=c_new_id,
123 message="new",
124 author="a",
125 branch="main",
126 parent_ids=[c_old_id],
127 timestamp=_ts(0),
128 )
129 db_session.add_all([c_old, c_new])
130 await db_session.flush()
131 db_session.add_all([
132 MusehubCommitRef(repo_id=repo_id, commit_id=c_old_id),
133 MusehubCommitRef(repo_id=repo_id, commit_id=c_new_id),
134 ])
135
136 # symbol_a was last touched today; symbol_b was last touched 30 days ago
137 await db_session.flush()
138 await _insert_history_entry(db_session, repo_id, "src/a.py::fn_a", c_new_id, _ts(0))
139 await _insert_history_entry(db_session, repo_id, "src/b.py::fn_b", c_old_id, _ts(30))
140
141 # Both symbols must exist in MusehubSymbolIntel (provider reads current symbols from here)
142 await _insert_symbol_intel(db_session, repo_id, "src/a.py::fn_a", last_changed=_ts(0))
143 await _insert_symbol_intel(db_session, repo_id, "src/b.py::fn_b", last_changed=_ts(30))
144
145 await db_session.commit()
146 return repo, c_new_id
147
148
149 class TestStableCalendarDays:
150
151 @pytest.mark.asyncio
152 async def test_F01_symbol_30d_stale_has_days_stable_approx_30(
153 self, db_session: AsyncSession, stable_repo: tuple[MusehubRepo, str]
154 ) -> None:
155 """Symbol last touched 30 calendar days ago → days_stable ≈ 30, not 1."""
156 repo, head = stable_repo
157 provider = StableProvider()
158 await provider.compute(db_session, str(repo.repo_id), head, {})
159
160 row = await db_session.scalar(
161 sa.select(MusehubIntelStable).where(
162 MusehubIntelStable.repo_id == str(repo.repo_id),
163 MusehubIntelStable.address == "src/b.py::fn_b",
164 )
165 )
166 assert row is not None
167 # Must be close to 30 calendar days — definitely not the commit index (1)
168 assert row.days_stable >= 28, f"Expected ~30, got {row.days_stable}"
169 assert row.days_stable <= 32, f"Expected ~30, got {row.days_stable}"
170
171 @pytest.mark.asyncio
172 async def test_F02_symbol_changed_today_has_days_stable_zero(
173 self, db_session: AsyncSession, stable_repo: tuple[MusehubRepo, str]
174 ) -> None:
175 """Symbol changed today → days_stable = 0, not the commit index."""
176 repo, head = stable_repo
177 provider = StableProvider()
178 await provider.compute(db_session, str(repo.repo_id), head, {})
179
180 row = await db_session.scalar(
181 sa.select(MusehubIntelStable).where(
182 MusehubIntelStable.repo_id == str(repo.repo_id),
183 MusehubIntelStable.address == "src/a.py::fn_a",
184 )
185 )
186 assert row is not None
187 assert row.days_stable == 0, f"Expected 0, got {row.days_stable}"
188
189
190 # ---------------------------------------------------------------------------
191 # F03 / F04 — EntangleProvider: Jaccard co_change_rate
192 # ---------------------------------------------------------------------------
193
194 @pytest_asyncio.fixture
195 async def entangle_repo(db_session: AsyncSession) -> tuple[MusehubRepo, str]:
196 """Repo whose commit graph gives a clear Jaccard vs min distinction.
197
198 symbol_a touched in: c1, c2, c3, c4, c5 → 5 commits
199 symbol_b touched in: c3, c4, c5, c6, c7 → 5 commits
200 co_changes = 3 (c3, c4, c5)
201 union = 7 (c1..c7)
202 Jaccard rate = 3/7 ≈ 0.4286
203 min rate = 3/5 = 0.6 (the wrong answer)
204 """
205 from muse.core.types import blob_id
206
207 repo = await create_repo(db_session, owner="fid", slug="entangle-fid")
208 repo_id = str(repo.repo_id)
209
210 # Build a linear chain c1 → c2 → … → c7 (c7 = HEAD)
211 commit_ids = [blob_id(f"entangle-c{i}".encode()) for i in range(1, 8)]
212 for i, cid in enumerate(commit_ids):
213 parent = [commit_ids[i - 1]] if i > 0 else []
214 db_session.add(MusehubCommit(
215 commit_id=cid,
216 message=f"c{i+1}",
217 author="a",
218 branch="main",
219 parent_ids=parent,
220 timestamp=_ts(7 - i),
221 ))
222 await db_session.flush()
223 db_session.add_all([
224 MusehubCommitRef(repo_id=repo_id, commit_id=cid) for cid in commit_ids
225 ])
226 await db_session.flush()
227
228 # symbol_a in c1–c5, symbol_b in c3–c7
229 sym_a = "src/a.py::fn_a"
230 sym_b = "src/b.py::fn_b"
231 ts = _ts(1)
232
233 for cid in commit_ids[:5]: # c1-c5 → symbol_a
234 await _insert_history_entry(db_session, repo_id, sym_a, cid, ts)
235 for cid in commit_ids[2:]: # c3-c7 → symbol_b
236 await _insert_history_entry(db_session, repo_id, sym_b, cid, ts)
237
238 await db_session.commit()
239 return repo, commit_ids[-1] # HEAD = c7
240
241
242 class TestEntangleJaccard:
243
244 @pytest.mark.asyncio
245 async def test_F03_co_change_rate_is_jaccard(
246 self, db_session: AsyncSession, entangle_repo: tuple[MusehubRepo, str]
247 ) -> None:
248 """co_change_rate = co_changes / |union| (Jaccard), not co / min."""
249 repo, head = entangle_repo
250 provider = EntangleProvider()
251 await provider.compute(db_session, str(repo.repo_id), head, {})
252
253 row = await db_session.scalar(
254 sa.select(MusehubIntelEntangle).where(
255 MusehubIntelEntangle.repo_id == str(repo.repo_id),
256 )
257 )
258 assert row is not None, "Expected one entangle pair to be stored"
259
260 expected_jaccard = 3 / 7
261 expected_min_rate = 3 / 5
262
263 assert abs(row.co_change_rate - expected_jaccard) < 0.001, (
264 f"Rate {row.co_change_rate:.4f} looks like min ({expected_min_rate}) "
265 f"not Jaccard ({expected_jaccard:.4f})"
266 )
267
268 @pytest.mark.asyncio
269 async def test_F04_commits_both_active_is_union_cardinality(
270 self, db_session: AsyncSession, entangle_repo: tuple[MusehubRepo, str]
271 ) -> None:
272 """commits_both_active stores |union(commits_a, commits_b)| = 7."""
273 repo, head = entangle_repo
274 provider = EntangleProvider()
275 await provider.compute(db_session, str(repo.repo_id), head, {})
276
277 row = await db_session.scalar(
278 sa.select(MusehubIntelEntangle).where(
279 MusehubIntelEntangle.repo_id == str(repo.repo_id),
280 )
281 )
282 assert row is not None
283 assert row.commits_both_active == 7, (
284 f"Expected union cardinality 7, got {row.commits_both_active}"
285 )
286
287
288 # ---------------------------------------------------------------------------
289 # F05 / F06 / F07 — Hotspots route: reads MusehubSymbolIntel, not snapshot
290 # ---------------------------------------------------------------------------
291
292 @pytest_asyncio.fixture
293 async def hotspots_repo(db_session: AsyncSession) -> MusehubRepo:
294 """Repo with symbol intel rows but NO legacy snapshot."""
295 repo = await create_repo(db_session, owner="fid", slug="hotspots-fid")
296 repo_id = str(repo.repo_id)
297
298 # Three symbols with different churn_30d values
299 for addr, churn in [
300 ("src/hot.py::fn_hot", 42),
301 ("src/med.py::fn_med", 15),
302 ("src/cold.py::fn_cold", 3),
303 ]:
304 await _insert_symbol_intel(db_session, repo_id, addr, churn_30d=churn)
305
306 await db_session.commit()
307 return repo
308
309
310 class TestHotspotsRoute:
311
312 @pytest.mark.asyncio
313 async def test_F05_hotspots_returns_200_without_legacy_snapshot(
314 self, client: AsyncClient, hotspots_repo: MusehubRepo
315 ) -> None:
316 """Route must not 500 when there is no legacy snapshot row."""
317 r = await client.get("/fid/hotspots-fid/intel/hotspots")
318 assert r.status_code == 200
319
320 @pytest.mark.asyncio
321 async def test_F06_hotspots_ranked_by_churn_30d_descending(
322 self, client: AsyncClient, hotspots_repo: MusehubRepo
323 ) -> None:
324 """Symbols appear highest-churn first (42, 15, 3)."""
325 r = await client.get("/fid/hotspots-fid/intel/hotspots")
326 assert r.status_code == 200
327 body = r.text
328 pos_hot = body.find("fn_hot")
329 pos_med = body.find("fn_med")
330 pos_cold = body.find("fn_cold")
331 assert pos_hot != -1 and pos_med != -1 and pos_cold != -1, (
332 "Not all symbols found in response"
333 )
334 assert pos_hot < pos_med < pos_cold, (
335 "Symbols not in churn-descending order"
336 )
337
338 @pytest.mark.asyncio
339 async def test_F07_hotspots_renders_address_and_change_count(
340 self, client: AsyncClient, hotspots_repo: MusehubRepo
341 ) -> None:
342 """Address and change count appear in the rendered HTML."""
343 r = await client.get("/fid/hotspots-fid/intel/hotspots")
344 assert r.status_code == 200
345 assert "src/hot.py::fn_hot" in r.text
346 assert "42" in r.text
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago