gabriel / musehub public

test_phase1_intel_schema.py file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:0 fix: fall back to any indexed mpack in read_object_bytes when push mpac… · gabriel · Jun 17, 2026
1 """TDD spec for Phase 1 β€” intel indexing schema expansion (issue #8).
2
3 New normalized tables replace JSON blobs for 11 new muse code intel types,
4 plus two new columns on musehub_symbol_intel.
5
6 New tables:
7 musehub_intel_coupling β€” co-changing file pairs
8 musehub_intel_entangle β€” symbol entanglement pairs
9 musehub_intel_dead β€” dead-code candidates
10 musehub_intel_blast_risk β€” composite pre-release risk per symbol
11 musehub_intel_stable β€” long-stable symbols
12 musehub_intel_velocity β€” module growth velocity
13 musehub_intel_clones β€” duplicate code clusters
14 musehub_intel_type β€” per-symbol type health
15 musehub_intel_api_surface β€” public API surface entries
16 musehub_intel_languages β€” language breakdown per push
17 musehub_intel_refactor_events β€” detected refactoring events
18
19 Extended columns on musehub_symbol_intel:
20 last_commit_id VARCHAR(128) β€” most recent commit that touched this symbol
21 op VARCHAR(16) β€” latest op (add/modify/delete)
22
23 Layers:
24 1. Schema β€” ORM model shape, column types, PK, indexes, FK cascade
25 2. Write β€” upsert helpers insert and overwrite correctly
26 3. Cascade β€” deleting repo removes all intel rows
27 4. Extended β€” new columns on musehub_symbol_intel
28 """
29 from __future__ import annotations
30
31 import secrets
32 from datetime import datetime, timezone
33
34 import pytest
35 from sqlalchemy import inspect, select, text
36 from sqlalchemy.ext.asyncio import AsyncSession
37
38 from muse.core.types import fake_id
39 from tests.factories import create_repo
40
41
42 def _uid() -> str:
43 return fake_id(secrets.token_hex(16))
44
45
46 def _now() -> datetime:
47 return datetime.now(tz=timezone.utc)
48
49
50 # ─────────────────────────────────────────────────────────────────────────────
51 # Layer 1 β€” Schema: ORM models exist, correct tablename, correct columns
52 # ─────────────────────────────────────────────────────────────────────────────
53
54 class TestPhase1SchemaModels:
55
56 def test_P1_01_coupling_model_importable(self) -> None:
57 from musehub.db.musehub_intel_models import MusehubIntelCoupling
58 assert MusehubIntelCoupling.__tablename__ == "musehub_intel_coupling"
59
60 def test_P1_02_coupling_columns(self) -> None:
61 from musehub.db.musehub_intel_models import MusehubIntelCoupling
62 cols = {c.name for c in MusehubIntelCoupling.__table__.columns}
63 assert cols >= {"repo_id", "file_a", "file_b", "co_changes", "ref"}
64
65 def test_P1_03_entangle_model_importable(self) -> None:
66 from musehub.db.musehub_intel_models import MusehubIntelEntangle
67 assert MusehubIntelEntangle.__tablename__ == "musehub_intel_entangle"
68
69 def test_P1_04_entangle_columns(self) -> None:
70 from musehub.db.musehub_intel_models import MusehubIntelEntangle
71 cols = {c.name for c in MusehubIntelEntangle.__table__.columns}
72 assert cols >= {
73 "repo_id", "symbol_a", "symbol_b",
74 "co_change_rate", "co_changes", "structurally_linked", "ref",
75 }
76
77 def test_P1_05_dead_model_importable(self) -> None:
78 from musehub.db.musehub_intel_models import MusehubIntelDead
79 assert MusehubIntelDead.__tablename__ == "musehub_intel_dead"
80
81 def test_P1_06_dead_columns(self) -> None:
82 from musehub.db.musehub_intel_models import MusehubIntelDead
83 cols = {c.name for c in MusehubIntelDead.__table__.columns}
84 assert cols >= {"repo_id", "address", "kind", "confidence", "reason", "ref"}
85
86 def test_P1_07_blast_risk_model_importable(self) -> None:
87 from musehub.db.musehub_intel_models import MusehubIntelBlastRisk
88 assert MusehubIntelBlastRisk.__tablename__ == "musehub_intel_blast_risk"
89
90 def test_P1_08_blast_risk_columns(self) -> None:
91 from musehub.db.musehub_intel_models import MusehubIntelBlastRisk
92 cols = {c.name for c in MusehubIntelBlastRisk.__table__.columns}
93 assert cols >= {
94 "repo_id", "address", "kind", "risk", "risk_score",
95 "impact_score", "churn_score", "test_gap_score", "coupling_score", "ref",
96 }
97
98 def test_P1_09_stable_model_importable(self) -> None:
99 from musehub.db.musehub_intel_models import MusehubIntelStable
100 assert MusehubIntelStable.__tablename__ == "musehub_intel_stable"
101
102 def test_P1_10_stable_columns(self) -> None:
103 from musehub.db.musehub_intel_models import MusehubIntelStable
104 cols = {c.name for c in MusehubIntelStable.__table__.columns}
105 assert cols >= {"repo_id", "address", "days_stable", "since_start", "ref"}
106
107 def test_P1_11_velocity_model_importable(self) -> None:
108 from musehub.db.musehub_intel_models import MusehubIntelVelocity
109 assert MusehubIntelVelocity.__tablename__ == "musehub_intel_velocity"
110
111 def test_P1_12_velocity_columns(self) -> None:
112 from musehub.db.musehub_intel_models import MusehubIntelVelocity
113 cols = {c.name for c in MusehubIntelVelocity.__table__.columns}
114 assert cols >= {
115 "repo_id", "module", "added", "removed", "net",
116 "modified", "active_commits", "acceleration", "ref",
117 }
118
119 def test_P1_13_clones_model_importable(self) -> None:
120 from musehub.db.musehub_intel_models import MusehubIntelClones
121 assert MusehubIntelClones.__tablename__ == "musehub_intel_clones"
122
123 def test_P1_14_clones_columns(self) -> None:
124 from musehub.db.musehub_intel_models import MusehubIntelClones
125 cols = {c.name for c in MusehubIntelClones.__table__.columns}
126 assert cols >= {"repo_id", "cluster_hash", "tier", "member_count", "members_json", "ref"}
127
128 def test_P1_15_type_model_importable(self) -> None:
129 from musehub.db.musehub_intel_models import MusehubIntelType
130 assert MusehubIntelType.__tablename__ == "musehub_intel_type"
131
132 def test_P1_16_type_columns(self) -> None:
133 from musehub.db.musehub_intel_models import MusehubIntelType
134 cols = {c.name for c in MusehubIntelType.__table__.columns}
135 assert cols >= {
136 "repo_id", "address", "kind", "type_score",
137 "params_total", "params_annotated", "params_with_any", "ref",
138 }
139
140 def test_P1_17_api_surface_model_importable(self) -> None:
141 from musehub.db.musehub_intel_models import MusehubIntelApiSurface
142 assert MusehubIntelApiSurface.__tablename__ == "musehub_intel_api_surface"
143
144 def test_P1_18_api_surface_columns(self) -> None:
145 from musehub.db.musehub_intel_models import MusehubIntelApiSurface
146 cols = {c.name for c in MusehubIntelApiSurface.__table__.columns}
147 assert cols >= {"repo_id", "address", "kind", "signature_id", "visibility", "ref"}
148
149 def test_P1_19_languages_model_importable(self) -> None:
150 from musehub.db.musehub_intel_models import MusehubIntelLanguages
151 assert MusehubIntelLanguages.__tablename__ == "musehub_intel_languages"
152
153 def test_P1_20_languages_columns(self) -> None:
154 from musehub.db.musehub_intel_models import MusehubIntelLanguages
155 cols = {c.name for c in MusehubIntelLanguages.__table__.columns}
156 assert cols >= {"repo_id", "language", "symbol_count", "file_count", "pct", "ref"}
157
158 def test_P1_21_refactor_events_model_importable(self) -> None:
159 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
160 assert MusehubIntelRefactorEvent.__tablename__ == "musehub_intel_refactor_events"
161
162 def test_P1_22_refactor_events_columns(self) -> None:
163 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
164 cols = {c.name for c in MusehubIntelRefactorEvent.__table__.columns}
165 assert cols >= {
166 "repo_id", "event_id", "kind",
167 "address", "detail", "commit_id", "committed_at",
168 }
169
170 def test_P1_23_symbol_intel_extended_columns(self) -> None:
171 from musehub.db.musehub_intel_models import MusehubSymbolIntel
172 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
173 assert "last_commit_id" in cols, "missing last_commit_id on musehub_symbol_intel"
174 assert "op" in cols, "missing op on musehub_symbol_intel"
175
176
177 # ─────────────────────────────────────────────────────────────────────────────
178 # Layer 2 β€” Write: rows insert and upsert correctly
179 # ─────────────────────────────────────────────────────────────────────────────
180
181 class TestPhase1Write:
182
183 @pytest.mark.asyncio
184 async def test_P1_24_coupling_insert(self, db_session: AsyncSession) -> None:
185 from musehub.db import musehub_intel_models as db
186 repo = await create_repo(db_session)
187 row = db.MusehubIntelCoupling(
188 repo_id=repo.repo_id,
189 file_a="musehub/services/musehub_jobs.py",
190 file_b="musehub/services/musehub_wire.py",
191 co_changes=12,
192 ref=_uid(),
193 )
194 db_session.add(row)
195 await db_session.flush()
196 result = await db_session.execute(
197 select(db.MusehubIntelCoupling).where(
198 db.MusehubIntelCoupling.repo_id == repo.repo_id
199 )
200 )
201 assert result.scalars().first() is not None
202
203 @pytest.mark.asyncio
204 async def test_P1_25_entangle_insert(self, db_session: AsyncSession) -> None:
205 from musehub.db import musehub_intel_models as db
206 repo = await create_repo(db_session)
207 row = db.MusehubIntelEntangle(
208 repo_id=repo.repo_id,
209 symbol_a="services/jobs.py::enqueue_push_intel",
210 symbol_b="services/wire.py::wire_push_unpack_mpack",
211 co_change_rate=0.85,
212 co_changes=17,
213 structurally_linked=False,
214 ref=_uid(),
215 )
216 db_session.add(row)
217 await db_session.flush()
218 result = await db_session.execute(
219 select(db.MusehubIntelEntangle).where(
220 db.MusehubIntelEntangle.repo_id == repo.repo_id
221 )
222 )
223 assert result.scalars().first() is not None
224
225 @pytest.mark.asyncio
226 async def test_P1_26_dead_insert(self, db_session: AsyncSession) -> None:
227 from musehub.db import musehub_intel_models as db
228 repo = await create_repo(db_session)
229 row = db.MusehubIntelDead(
230 repo_id=repo.repo_id,
231 address="musehub/utils/legacy.py::old_helper",
232 kind="function",
233 confidence="high",
234 reason="no callers found",
235 ref=_uid(),
236 )
237 db_session.add(row)
238 await db_session.flush()
239 result = await db_session.execute(
240 select(db.MusehubIntelDead).where(db.MusehubIntelDead.repo_id == repo.repo_id)
241 )
242 assert result.scalars().first() is not None
243
244 @pytest.mark.asyncio
245 async def test_P1_27_blast_risk_insert(self, db_session: AsyncSession) -> None:
246 from musehub.db import musehub_intel_models as db
247 repo = await create_repo(db_session)
248 row = db.MusehubIntelBlastRisk(
249 repo_id=repo.repo_id,
250 address="musehub/services/musehub_jobs.py::enqueue_push_intel",
251 kind="function",
252 risk="high",
253 risk_score=87,
254 impact_score=0.9,
255 churn_score=0.7,
256 test_gap_score=0.5,
257 coupling_score=0.6,
258 ref=_uid(),
259 )
260 db_session.add(row)
261 await db_session.flush()
262 result = await db_session.execute(
263 select(db.MusehubIntelBlastRisk).where(
264 db.MusehubIntelBlastRisk.repo_id == repo.repo_id
265 )
266 )
267 assert result.scalars().first() is not None
268
269 @pytest.mark.asyncio
270 async def test_P1_28_stable_insert(self, db_session: AsyncSession) -> None:
271 from musehub.db import musehub_intel_models as db
272 repo = await create_repo(db_session)
273 row = db.MusehubIntelStable(
274 repo_id=repo.repo_id,
275 address="musehub/core/genesis.py::compute_identity_id",
276 days_stable=90,
277 since_start=False,
278 ref=_uid(),
279 )
280 db_session.add(row)
281 await db_session.flush()
282 result = await db_session.execute(
283 select(db.MusehubIntelStable).where(
284 db.MusehubIntelStable.repo_id == repo.repo_id
285 )
286 )
287 assert result.scalars().first() is not None
288
289 @pytest.mark.asyncio
290 async def test_P1_29_velocity_insert(self, db_session: AsyncSession) -> None:
291 from musehub.db import musehub_intel_models as db
292 repo = await create_repo(db_session)
293 row = db.MusehubIntelVelocity(
294 repo_id=repo.repo_id,
295 module="musehub/services",
296 added=5,
297 removed=1,
298 net=4,
299 modified=3,
300 active_commits=10,
301 prior_added=3,
302 prior_net=2,
303 acceleration=1.5,
304 stagnant_commits=0,
305 ref=_uid(),
306 )
307 db_session.add(row)
308 await db_session.flush()
309 result = await db_session.execute(
310 select(db.MusehubIntelVelocity).where(
311 db.MusehubIntelVelocity.repo_id == repo.repo_id
312 )
313 )
314 assert result.scalars().first() is not None
315
316 @pytest.mark.asyncio
317 async def test_P1_30_clones_insert(self, db_session: AsyncSession) -> None:
318 from musehub.db import musehub_intel_models as db
319 repo = await create_repo(db_session)
320 row = db.MusehubIntelClones(
321 repo_id=repo.repo_id,
322 cluster_hash=_uid(),
323 tier="exact",
324 member_count=3,
325 members_json='["a.py::fn", "b.py::fn", "c.py::fn"]',
326 ref=_uid(),
327 )
328 db_session.add(row)
329 await db_session.flush()
330 result = await db_session.execute(
331 select(db.MusehubIntelClones).where(
332 db.MusehubIntelClones.repo_id == repo.repo_id
333 )
334 )
335 assert result.scalars().first() is not None
336
337 @pytest.mark.asyncio
338 async def test_P1_31_type_insert(self, db_session: AsyncSession) -> None:
339 from musehub.db import musehub_intel_models as db
340 repo = await create_repo(db_session)
341 row = db.MusehubIntelType(
342 repo_id=repo.repo_id,
343 address="musehub/services/musehub_jobs.py::enqueue_push_intel",
344 kind="function",
345 return_is_any=False,
346 params_total=4,
347 params_annotated=4,
348 params_with_any=0,
349 type_score=1.0,
350 ref=_uid(),
351 )
352 db_session.add(row)
353 await db_session.flush()
354 result = await db_session.execute(
355 select(db.MusehubIntelType).where(
356 db.MusehubIntelType.repo_id == repo.repo_id
357 )
358 )
359 assert result.scalars().first() is not None
360
361 @pytest.mark.asyncio
362 async def test_P1_32_api_surface_insert(self, db_session: AsyncSession) -> None:
363 from musehub.db import musehub_intel_models as db
364 repo = await create_repo(db_session)
365 row = db.MusehubIntelApiSurface(
366 repo_id=repo.repo_id,
367 address="musehub/api/routes/musehub/ui_commits.py::commits_page",
368 kind="function",
369 signature_id=_uid(),
370 visibility="public",
371 ref=_uid(),
372 )
373 db_session.add(row)
374 await db_session.flush()
375 result = await db_session.execute(
376 select(db.MusehubIntelApiSurface).where(
377 db.MusehubIntelApiSurface.repo_id == repo.repo_id
378 )
379 )
380 assert result.scalars().first() is not None
381
382 @pytest.mark.asyncio
383 async def test_P1_33_languages_insert(self, db_session: AsyncSession) -> None:
384 from musehub.db import musehub_intel_models as db
385 repo = await create_repo(db_session)
386 row = db.MusehubIntelLanguages(
387 repo_id=repo.repo_id,
388 language="Python",
389 symbol_count=1240,
390 file_count=88,
391 pct=97.5,
392 ref=_uid(),
393 )
394 db_session.add(row)
395 await db_session.flush()
396 result = await db_session.execute(
397 select(db.MusehubIntelLanguages).where(
398 db.MusehubIntelLanguages.repo_id == repo.repo_id
399 )
400 )
401 assert result.scalars().first() is not None
402
403 @pytest.mark.asyncio
404 async def test_P1_34_refactor_event_insert(self, db_session: AsyncSession) -> None:
405 from musehub.db import musehub_intel_models as db
406 repo = await create_repo(db_session)
407 commit_id = _uid()
408 row = db.MusehubIntelRefactorEvent(
409 event_id=_uid(),
410 repo_id=repo.repo_id,
411 kind="rename",
412 address="musehub/services/jobs.py::enqueue",
413 detail="β†’ enqueue_push_intel",
414 commit_id=commit_id,
415 committed_at=_now(),
416 )
417 db_session.add(row)
418 await db_session.flush()
419 result = await db_session.execute(
420 select(db.MusehubIntelRefactorEvent).where(
421 db.MusehubIntelRefactorEvent.repo_id == repo.repo_id
422 )
423 )
424 assert result.scalars().first() is not None
425
426 @pytest.mark.asyncio
427 async def test_P1_35_symbol_intel_extended_columns_write(self, db_session: AsyncSession) -> None:
428 from musehub.db import musehub_intel_models as db
429 repo = await create_repo(db_session)
430 commit_id = _uid()
431 row = db.MusehubSymbolIntel(
432 repo_id=repo.repo_id,
433 address="musehub/services/musehub_jobs.py::enqueue_push_intel",
434 last_commit_id=commit_id,
435 op="modify",
436 )
437 db_session.add(row)
438 await db_session.flush()
439 result = await db_session.execute(
440 select(db.MusehubSymbolIntel).where(
441 db.MusehubSymbolIntel.repo_id == repo.repo_id
442 )
443 )
444 row_back = result.scalars().first()
445 assert row_back is not None
446 assert row_back.last_commit_id == commit_id
447 assert row_back.op == "modify"
448
449
450 # ─────────────────────────────────────────────────────────────────────────────
451 # Layer 3 β€” Cascade: deleting repo removes all intel rows
452 # ─────────────────────────────────────────────────────────────────────────────
453
454 class TestPhase1Cascade:
455
456 @pytest.mark.asyncio
457 async def test_P1_36_cascade_delete_removes_coupling(self, db_session: AsyncSession) -> None:
458 from musehub.db import musehub_intel_models as db
459 repo = await create_repo(db_session)
460 ref = _uid()
461 db_session.add(db.MusehubIntelCoupling(
462 repo_id=repo.repo_id, file_a="a.py", file_b="b.py", co_changes=5, ref=ref,
463 ))
464 await db_session.flush()
465 await db_session.delete(repo)
466 await db_session.flush()
467 result = await db_session.execute(
468 select(db.MusehubIntelCoupling).where(
469 db.MusehubIntelCoupling.repo_id == repo.repo_id
470 )
471 )
472 assert result.scalars().first() is None
473
474 @pytest.mark.asyncio
475 async def test_P1_37_cascade_delete_removes_dead(self, db_session: AsyncSession) -> None:
476 from musehub.db import musehub_intel_models as db
477 repo = await create_repo(db_session)
478 db_session.add(db.MusehubIntelDead(
479 repo_id=repo.repo_id,
480 address="a.py::fn",
481 kind="function",
482 confidence="high",
483 reason="no callers",
484 ref=_uid(),
485 ))
486 await db_session.flush()
487 await db_session.delete(repo)
488 await db_session.flush()
489 result = await db_session.execute(
490 select(db.MusehubIntelDead).where(
491 db.MusehubIntelDead.repo_id == repo.repo_id
492 )
493 )
494 assert result.scalars().first() is None
495
496 @pytest.mark.asyncio
497 async def test_P1_38_cascade_delete_removes_refactor_events(self, db_session: AsyncSession) -> None:
498 from musehub.db import musehub_intel_models as db
499 repo = await create_repo(db_session)
500 db_session.add(db.MusehubIntelRefactorEvent(
501 event_id=_uid(),
502 repo_id=repo.repo_id,
503 kind="rename",
504 address="a.py::old",
505 detail="β†’ new",
506 commit_id=_uid(),
507 committed_at=_now(),
508 ))
509 await db_session.flush()
510 await db_session.delete(repo)
511 await db_session.flush()
512 result = await db_session.execute(
513 select(db.MusehubIntelRefactorEvent).where(
514 db.MusehubIntelRefactorEvent.repo_id == repo.repo_id
515 )
516 )
517 assert result.scalars().first() is None
518
519
520 # ─────────────────────────────────────────────────────────────────────────────
521 # Layer 4 β€” DB-level: tables exist in Postgres (not just ORM)
522 # ─────────────────────────────────────────────────────────────────────────────
523
524 class TestPhase1DatabaseTables:
525
526 @pytest.mark.asyncio
527 async def test_P1_39_all_new_tables_exist_in_db(self, db_session: AsyncSession) -> None:
528 expected = {
529 "musehub_intel_coupling",
530 "musehub_intel_entangle",
531 "musehub_intel_dead",
532 "musehub_intel_blast_risk",
533 "musehub_intel_stable",
534 "musehub_intel_velocity",
535 "musehub_intel_clones",
536 "musehub_intel_type",
537 "musehub_intel_api_surface",
538 "musehub_intel_languages",
539 "musehub_intel_refactor_events",
540 }
541 result = await db_session.execute(
542 text("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
543 )
544 existing = {row[0] for row in result}
545 missing = expected - existing
546 assert not missing, f"Tables missing from DB: {missing}"
547
548 @pytest.mark.asyncio
549 async def test_P1_40_symbol_intel_extended_columns_in_db(self, db_session: AsyncSession) -> None:
550 result = await db_session.execute(
551 text("""
552 SELECT column_name FROM information_schema.columns
553 WHERE table_name = 'musehub_symbol_intel'
554 AND column_name IN ('last_commit_id', 'op')
555 """)
556 )
557 found = {row[0] for row in result}
558 assert "last_commit_id" in found, "last_commit_id missing from musehub_symbol_intel"
559 assert "op" in found, "op missing from musehub_symbol_intel"