gabriel / musehub public
test_phase3_gravity_schema.py python
529 lines 23.3 KB
Raw
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 9 days ago
1 """TDD spec for Phase 3, Part 1 — gravity schema extension (issue #9).
2
3 Extends musehub_symbol_intel with 6 new columns that power /intel/gravity:
4
5 gravity_pct FLOAT — gravity_pct from muse code gravity
6 gravity_direct_dependents INTEGER — direct_dependents count
7 gravity_transitive_dependents INTEGER — transitive_dependents count
8 gravity_max_depth SMALLINT — deepest dependency chain
9 gravity_depth_distribution JSONB — {depth_level: count} for the sparkline
10 symbol_kind VARCHAR(64) — method/function/class/async_method
11
12 New index: (repo_id, gravity_pct DESC) — primary sort key for the page.
13
14 Layers:
15 1. Schema — column types, nullability, ORM model
16 2. Index — (repo_id, gravity_pct DESC) exists in DB metadata
17 3. Write — insert and read back all 6 new fields
18 4. JSONB — depth_dist round-trips as a Python dict
19 5. Upsert — gravity update leaves churn/blast columns untouched
20 6. Null-safe — existing rows without gravity data remain valid
21 7. Ordering — rows ordered by gravity_pct DESC via SQL
22 8. Kind — all four symbol_kind values round-trip correctly
23 """
24 from __future__ import annotations
25
26 import secrets
27 from datetime import datetime, timezone
28
29 import pytest
30 from sqlalchemy import inspect, select, text
31 from sqlalchemy.ext.asyncio import AsyncSession
32
33 from muse.core.types import fake_id
34 from tests.factories import create_repo
35
36
37 def _uid() -> str:
38 return fake_id(secrets.token_hex(16))
39
40
41 def _now() -> datetime:
42 return datetime.now(tz=timezone.utc)
43
44
45 # ─────────────────────────────────────────────────────────────────────────────
46 # Layer 1 — Schema: 6 new columns on MusehubSymbolIntel
47 # ─────────────────────────────────────────────────────────────────────────────
48
49 class TestGravitySchemaColumns:
50
51 def test_P3_01_gravity_pct_column_exists(self) -> None:
52 from musehub.db.musehub_intel_models import MusehubSymbolIntel
53 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
54 assert "gravity_pct" in cols
55
56 def test_P3_02_gravity_direct_dependents_column_exists(self) -> None:
57 from musehub.db.musehub_intel_models import MusehubSymbolIntel
58 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
59 assert "gravity_direct_dependents" in cols
60
61 def test_P3_03_gravity_transitive_dependents_column_exists(self) -> None:
62 from musehub.db.musehub_intel_models import MusehubSymbolIntel
63 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
64 assert "gravity_transitive_dependents" in cols
65
66 def test_P3_04_gravity_max_depth_column_exists(self) -> None:
67 from musehub.db.musehub_intel_models import MusehubSymbolIntel
68 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
69 assert "gravity_max_depth" in cols
70
71 def test_P3_05_gravity_depth_distribution_column_exists(self) -> None:
72 from musehub.db.musehub_intel_models import MusehubSymbolIntel
73 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
74 assert "gravity_depth_distribution" in cols
75
76 def test_P3_06_symbol_kind_column_exists(self) -> None:
77 from musehub.db.musehub_intel_models import MusehubSymbolIntel
78 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
79 assert "symbol_kind" in cols
80
81 def test_P3_07_all_six_columns_present(self) -> None:
82 from musehub.db.musehub_intel_models import MusehubSymbolIntel
83 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
84 assert cols >= {
85 "gravity_pct",
86 "gravity_direct_dependents",
87 "gravity_transitive_dependents",
88 "gravity_max_depth",
89 "gravity_depth_distribution",
90 "symbol_kind",
91 }
92
93 def test_P3_08_gravity_pct_is_nullable(self) -> None:
94 from musehub.db.musehub_intel_models import MusehubSymbolIntel
95 col = MusehubSymbolIntel.__table__.c.gravity_pct
96 assert col.nullable is True, "gravity_pct must be nullable — existing rows have no gravity data"
97
98 def test_P3_09_gravity_depth_distribution_is_nullable(self) -> None:
99 from musehub.db.musehub_intel_models import MusehubSymbolIntel
100 col = MusehubSymbolIntel.__table__.c.gravity_depth_distribution
101 assert col.nullable is True, "gravity_depth_distribution must be nullable"
102
103 def test_P3_10_symbol_kind_is_nullable(self) -> None:
104 from musehub.db.musehub_intel_models import MusehubSymbolIntel
105 col = MusehubSymbolIntel.__table__.c.symbol_kind
106 assert col.nullable is True, "symbol_kind must be nullable — backfilled on next push"
107
108
109 # ─────────────────────────────────────────────────────────────────────────────
110 # Layer 2 — Index: (repo_id, gravity_pct DESC) in table args
111 # ─────────────────────────────────────────────────────────────────────────────
112
113 class TestGravityIndex:
114
115 def test_P3_11_gravity_pct_index_defined(self) -> None:
116 from musehub.db.musehub_intel_models import MusehubSymbolIntel
117 index_names = {idx.name for idx in MusehubSymbolIntel.__table__.indexes}
118 assert any("gravity_pct" in name for name in index_names), (
119 f"Expected an index on gravity_pct, got: {index_names}"
120 )
121
122
123 # ─────────────────────────────────────────────────────────────────────────────
124 # Layer 3 — Write: all 6 new fields insert and read back
125 # ─────────────────────────────────────────────────────────────────────────────
126
127 class TestGravityWrite:
128
129 @pytest.mark.asyncio
130 async def test_P3_12_gravity_full_write(self, db_session: AsyncSession) -> None:
131 from musehub.db import musehub_intel_models as db
132 repo = await create_repo(db_session)
133 dist = {"1": 11, "2": 484, "3": 197, "4": 35, "5": 5, "6": 1}
134 row = db.MusehubSymbolIntel(
135 repo_id=repo.repo_id,
136 address="musehub/storage/backends.py::S3Backend._key",
137 gravity_pct=38.9,
138 gravity_direct_dependents=11,
139 gravity_transitive_dependents=733,
140 gravity_max_depth=6,
141 gravity_depth_distribution=dist,
142 symbol_kind="method",
143 )
144 db_session.add(row)
145 await db_session.flush()
146 result = await db_session.execute(
147 select(db.MusehubSymbolIntel).where(
148 db.MusehubSymbolIntel.repo_id == repo.repo_id
149 )
150 )
151 back = result.scalars().first()
152 assert back is not None
153 assert back.gravity_pct == pytest.approx(38.9)
154 assert back.gravity_direct_dependents == 11
155 assert back.gravity_transitive_dependents == 733
156 assert back.gravity_max_depth == 6
157 assert back.symbol_kind == "method"
158
159 @pytest.mark.asyncio
160 async def test_P3_13_gravity_write_without_optional_fields(self, db_session: AsyncSession) -> None:
161 from musehub.db import musehub_intel_models as db
162 repo = await create_repo(db_session)
163 row = db.MusehubSymbolIntel(
164 repo_id=repo.repo_id,
165 address="musehub/services/musehub_jobs.py::enqueue_push_intel",
166 )
167 db_session.add(row)
168 await db_session.flush()
169 result = await db_session.execute(
170 select(db.MusehubSymbolIntel).where(
171 db.MusehubSymbolIntel.repo_id == repo.repo_id
172 )
173 )
174 back = result.scalars().first()
175 assert back is not None
176 assert back.gravity_pct is None
177 assert back.gravity_depth_distribution is None
178 assert back.symbol_kind is None
179
180
181 # ─────────────────────────────────────────────────────────────────────────────
182 # Layer 4 — JSONB: depth_distribution round-trips as a Python dict
183 # ─────────────────────────────────────────────────────────────────────────────
184
185 class TestGravityDepthDistJsonb:
186
187 @pytest.mark.asyncio
188 async def test_P3_14_depth_dist_shallow_broad(self, db_session: AsyncSession) -> None:
189 from musehub.db import musehub_intel_models as db
190 repo = await create_repo(db_session)
191 dist = {"1": 424, "2": 206, "3": 46, "4": 5, "5": 1}
192 row = db.MusehubSymbolIntel(
193 repo_id=repo.repo_id,
194 address="musehub/storage/backends.py::StorageBackend.get",
195 gravity_depth_distribution=dist,
196 )
197 db_session.add(row)
198 await db_session.flush()
199 await db_session.refresh(row)
200 assert row.gravity_depth_distribution == dist
201
202 @pytest.mark.asyncio
203 async def test_P3_15_depth_dist_deep_narrow(self, db_session: AsyncSession) -> None:
204 from musehub.db import musehub_intel_models as db
205 repo = await create_repo(db_session)
206 dist = {"1": 1, "2": 17, "3": 27, "4": 406, "5": 189, "6": 42, "7": 5, "8": 1}
207 row = db.MusehubSymbolIntel(
208 repo_id=repo.repo_id,
209 address="musehub/storage/backends.py::BlobBackend",
210 gravity_depth_distribution=dist,
211 )
212 db_session.add(row)
213 await db_session.flush()
214 await db_session.refresh(row)
215 assert row.gravity_depth_distribution == dist
216
217 @pytest.mark.asyncio
218 async def test_P3_16_depth_dist_single_depth(self, db_session: AsyncSession) -> None:
219 from musehub.db import musehub_intel_models as db
220 repo = await create_repo(db_session)
221 dist = {"1": 3}
222 row = db.MusehubSymbolIntel(
223 repo_id=repo.repo_id,
224 address="musehub/services/some_leaf.py::leaf_fn",
225 gravity_depth_distribution=dist,
226 )
227 db_session.add(row)
228 await db_session.flush()
229 await db_session.refresh(row)
230 assert row.gravity_depth_distribution == dist
231
232 @pytest.mark.asyncio
233 async def test_P3_17_depth_dist_nine_levels(self, db_session: AsyncSession) -> None:
234 from musehub.db import musehub_intel_models as db
235 repo = await create_repo(db_session)
236 dist = {"1": 1, "2": 8, "3": 155, "4": 71, "5": 11, "6": 4, "7": 8, "8": 5, "9": 3}
237 row = db.MusehubSymbolIntel(
238 repo_id=repo.repo_id,
239 address="musehub/models/musehub.py::RepoResponse",
240 gravity_depth_distribution=dist,
241 )
242 db_session.add(row)
243 await db_session.flush()
244 await db_session.refresh(row)
245 assert row.gravity_depth_distribution == dist
246 assert len(row.gravity_depth_distribution) == 9
247
248
249 # ─────────────────────────────────────────────────────────────────────────────
250 # Layer 5 — Upsert: gravity update does not touch churn/blast columns
251 # ─────────────────────────────────────────────────────────────────────────────
252
253 class TestGravityUpsert:
254
255 @pytest.mark.asyncio
256 async def test_P3_18_upsert_gravity_preserves_churn(self, db_session: AsyncSession) -> None:
257 from sqlalchemy.dialects.postgresql import insert as pg_insert
258 from musehub.db import musehub_intel_models as db
259 repo = await create_repo(db_session)
260 address = "musehub/storage/backends.py::S3Backend._key"
261
262 # Insert base row with churn data
263 await db_session.execute(
264 pg_insert(db.MusehubSymbolIntel).values(
265 repo_id=repo.repo_id,
266 address=address,
267 churn=42,
268 gravity=0.0,
269 blast_top=[],
270 weekly=[],
271 ).on_conflict_do_update(
272 index_elements=["repo_id", "address"],
273 set_={"churn": 42},
274 )
275 )
276 await db_session.flush()
277
278 # Now upsert with gravity fields only
279 await db_session.execute(
280 pg_insert(db.MusehubSymbolIntel).values(
281 repo_id=repo.repo_id,
282 address=address,
283 blast_top=[],
284 weekly=[],
285 gravity_pct=38.9,
286 gravity_direct_dependents=11,
287 gravity_transitive_dependents=733,
288 gravity_max_depth=6,
289 gravity_depth_distribution={"1": 11, "2": 484, "3": 197, "4": 35, "5": 5, "6": 1},
290 symbol_kind="method",
291 ).on_conflict_do_update(
292 index_elements=["repo_id", "address"],
293 set_={
294 "gravity_pct": 38.9,
295 "gravity_direct_dependents": 11,
296 "gravity_transitive_dependents": 733,
297 "gravity_max_depth": 6,
298 "gravity_depth_distribution": {"1": 11, "2": 484, "3": 197, "4": 35, "5": 5, "6": 1},
299 "symbol_kind": "method",
300 },
301 )
302 )
303 await db_session.flush()
304
305 result = await db_session.execute(
306 select(db.MusehubSymbolIntel).where(
307 db.MusehubSymbolIntel.repo_id == repo.repo_id,
308 db.MusehubSymbolIntel.address == address,
309 )
310 )
311 back = result.scalars().first()
312 assert back is not None
313 assert back.churn == 42, "churn must be preserved after gravity upsert"
314 assert back.gravity_pct == pytest.approx(38.9)
315 assert back.gravity_direct_dependents == 11
316
317 @pytest.mark.asyncio
318 async def test_P3_19_upsert_gravity_idempotent(self, db_session: AsyncSession) -> None:
319 from sqlalchemy.dialects.postgresql import insert as pg_insert
320 from musehub.db import musehub_intel_models as db
321 repo = await create_repo(db_session)
322 address = "musehub/storage/backends.py::get_backend"
323 dist = {"1": 17, "2": 27, "3": 406, "4": 189, "5": 42, "6": 5, "7": 1}
324
325 for _ in range(3):
326 await db_session.execute(
327 pg_insert(db.MusehubSymbolIntel).values(
328 repo_id=repo.repo_id,
329 address=address,
330 blast_top=[],
331 weekly=[],
332 gravity_pct=36.5,
333 gravity_direct_dependents=17,
334 gravity_transitive_dependents=687,
335 gravity_max_depth=7,
336 gravity_depth_distribution=dist,
337 symbol_kind="function",
338 ).on_conflict_do_update(
339 index_elements=["repo_id", "address"],
340 set_={
341 "gravity_pct": 36.5,
342 "gravity_direct_dependents": 17,
343 "gravity_transitive_dependents": 687,
344 "gravity_max_depth": 7,
345 "gravity_depth_distribution": dist,
346 "symbol_kind": "function",
347 },
348 )
349 )
350 await db_session.flush()
351
352 result = await db_session.execute(
353 select(db.MusehubSymbolIntel).where(
354 db.MusehubSymbolIntel.repo_id == repo.repo_id
355 )
356 )
357 rows = result.scalars().all()
358 assert len(rows) == 1, "idempotent upsert must not create duplicate rows"
359 assert rows[0].gravity_pct == pytest.approx(36.5)
360
361
362 # ─────────────────────────────────────────────────────────────────────────────
363 # Layer 6 — Null-safe: rows without gravity data are valid
364 # ─────────────────────────────────────────────────────────────────────────────
365
366 class TestGravityNullSafe:
367
368 @pytest.mark.asyncio
369 async def test_P3_20_churn_only_row_valid(self, db_session: AsyncSession) -> None:
370 from musehub.db import musehub_intel_models as db
371 repo = await create_repo(db_session)
372 row = db.MusehubSymbolIntel(
373 repo_id=repo.repo_id,
374 address="musehub/services/musehub_jobs.py::some_fn",
375 churn=5,
376 blast=2,
377 )
378 db_session.add(row)
379 await db_session.flush()
380 result = await db_session.execute(
381 select(db.MusehubSymbolIntel).where(
382 db.MusehubSymbolIntel.repo_id == repo.repo_id
383 )
384 )
385 back = result.scalars().first()
386 assert back is not None
387 assert back.gravity_pct is None
388 assert back.gravity_depth_distribution is None
389 assert back.symbol_kind is None
390 assert back.churn == 5
391
392 @pytest.mark.asyncio
393 async def test_P3_21_gravity_pct_filter_excludes_nulls(self, db_session: AsyncSession) -> None:
394 from musehub.db import musehub_intel_models as db
395 repo = await create_repo(db_session)
396
397 # One row with gravity, one without
398 db_session.add(db.MusehubSymbolIntel(
399 repo_id=repo.repo_id,
400 address="backends.py::S3Backend._key",
401 gravity_pct=38.9,
402 ))
403 db_session.add(db.MusehubSymbolIntel(
404 repo_id=repo.repo_id,
405 address="backends.py::some_utility",
406 ))
407 await db_session.flush()
408
409 result = await db_session.execute(
410 select(db.MusehubSymbolIntel).where(
411 db.MusehubSymbolIntel.repo_id == repo.repo_id,
412 db.MusehubSymbolIntel.gravity_pct.is_not(None),
413 )
414 )
415 rows = result.scalars().all()
416 assert len(rows) == 1
417 assert rows[0].address == "backends.py::S3Backend._key"
418
419
420 # ─────────────────────────────────────────────────────────────────────────────
421 # Layer 7 — Ordering: gravity_pct DESC gives correct rank order
422 # ─────────────────────────────────────────────────────────────────────────────
423
424 class TestGravityOrdering:
425
426 @pytest.mark.asyncio
427 async def test_P3_22_ordered_by_gravity_pct_desc(self, db_session: AsyncSession) -> None:
428 from musehub.db import musehub_intel_models as db
429 repo = await create_repo(db_session)
430
431 symbols = [
432 ("backends.py::_key", 38.9),
433 ("backends.py::_get_client", 38.8),
434 ("backends.py::get_backend", 36.5),
435 ("models.py::RepoResponse", 14.1),
436 ]
437 for address, pct in symbols:
438 db_session.add(db.MusehubSymbolIntel(
439 repo_id=repo.repo_id,
440 address=address,
441 gravity_pct=pct,
442 ))
443 await db_session.flush()
444
445 result = await db_session.execute(
446 select(db.MusehubSymbolIntel)
447 .where(
448 db.MusehubSymbolIntel.repo_id == repo.repo_id,
449 db.MusehubSymbolIntel.gravity_pct.is_not(None),
450 )
451 .order_by(db.MusehubSymbolIntel.gravity_pct.desc())
452 )
453 rows = result.scalars().all()
454 pcts = [r.gravity_pct for r in rows]
455 assert pcts == sorted(pcts, reverse=True)
456 assert pcts[0] == pytest.approx(38.9)
457
458 @pytest.mark.asyncio
459 async def test_P3_23_top_n_query(self, db_session: AsyncSession) -> None:
460 from musehub.db import musehub_intel_models as db
461 repo = await create_repo(db_session)
462
463 for i in range(10):
464 db_session.add(db.MusehubSymbolIntel(
465 repo_id=repo.repo_id,
466 address=f"backends.py::sym_{i:02d}",
467 gravity_pct=float(i),
468 ))
469 await db_session.flush()
470
471 result = await db_session.execute(
472 select(db.MusehubSymbolIntel)
473 .where(
474 db.MusehubSymbolIntel.repo_id == repo.repo_id,
475 db.MusehubSymbolIntel.gravity_pct.is_not(None),
476 )
477 .order_by(db.MusehubSymbolIntel.gravity_pct.desc())
478 .limit(3)
479 )
480 rows = result.scalars().all()
481 assert len(rows) == 3
482 assert rows[0].gravity_pct == pytest.approx(9.0)
483
484
485 # ─────────────────────────────────────────────────────────────────────────────
486 # Layer 8 — Kind: all four symbol_kind values round-trip
487 # ─────────────────────────────────────────────────────────────────────────────
488
489 class TestGravitySymbolKind:
490
491 @pytest.mark.asyncio
492 @pytest.mark.parametrize("kind", ["method", "function", "class", "async_method"])
493 async def test_P3_24_symbol_kind_roundtrip(self, db_session: AsyncSession, kind: str) -> None:
494 from musehub.db import musehub_intel_models as db
495 repo = await create_repo(db_session)
496 row = db.MusehubSymbolIntel(
497 repo_id=repo.repo_id,
498 address=f"backends.py::sym_for_{kind}",
499 symbol_kind=kind,
500 gravity_pct=10.0,
501 )
502 db_session.add(row)
503 await db_session.flush()
504 await db_session.refresh(row)
505 assert row.symbol_kind == kind
506
507 @pytest.mark.asyncio
508 async def test_P3_28_kind_filter_query(self, db_session: AsyncSession) -> None:
509 from musehub.db import musehub_intel_models as db
510 repo = await create_repo(db_session)
511
512 for kind in ("method", "function", "class", "async_method"):
513 db_session.add(db.MusehubSymbolIntel(
514 repo_id=repo.repo_id,
515 address=f"backends.py::sym_{kind}",
516 symbol_kind=kind,
517 gravity_pct=10.0,
518 ))
519 await db_session.flush()
520
521 result = await db_session.execute(
522 select(db.MusehubSymbolIntel).where(
523 db.MusehubSymbolIntel.repo_id == repo.repo_id,
524 db.MusehubSymbolIntel.symbol_kind == "method",
525 )
526 )
527 rows = result.scalars().all()
528 assert len(rows) == 1
529 assert rows[0].symbol_kind == "method"
File History 1 commit
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 9 days ago