gabriel / musehub public
test_phase1_dead_provider.py python
418 lines 14.8 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 1 day ago
1 """TDD spec for Phase 1 — SQL-derived DeadProvider (issue #10).
2
3 DeadProvider replaces the muse-CLI subprocess approach with a pure SQL
4 derivation from `musehub_symbol_intel` blast and churn columns.
5
6 Confidence formula:
7 HIGH → symbol_kind IN tracked_kinds AND blast == 0 AND churn == 1
8 MEDIUM → symbol_kind IN tracked_kinds AND blast == 0 AND churn > 1
9 AND churn_30d == 0
10 LOW → symbol_kind IN tracked_kinds AND blast == 0 AND churn_30d > 0
11
12 tracked_kinds = {function, async_function, method, async_method, class}
13
14 Reason strings:
15 HIGH: "Added once, never modified. Zero blast radius in full history."
16 MEDIUM: "Modified in past but zero blast radius for ≥ 30 days."
17 LOW: "Zero blast radius. Recently active — verify before deleting."
18
19 Dismiss preservation:
20 On upsert, dismissed=True is NEVER overwritten by a re-run.
21 New rows always start with dismissed=False.
22
23 Layers:
24 1. Registry — "intel.code.dead" in _PROVIDER_REGISTRY
25 2. Protocol — satisfies IntelProvider
26 3. Dispatch — job_types_for_push("code") includes "intel.code.dead"
27 job_types_for_push("midi") excludes "intel.code.dead"
28 4. High conf — blast=0, churn=1 → confidence="high"
29 5. Medium conf — blast=0, churn>1, churn_30d=0 → confidence="medium"
30 6. Low conf — blast=0, churn_30d>0 → confidence="low"
31 7. Excluded — blast>0 → not a candidate
32 8. Kind filter — kind="import" (untracked) → excluded
33 9. Reasons — reason string correct per tier
34 10. Dismissed — new rows get dismissed=False; existing dismissed=True preserved
35 11. Empty — no symbol_intel rows → returns []
36 12. Idempotent — run twice, one row per address
37 13. Return type — returns [("intel.code.dead", {"count": N})]
38 14. No subprocess — compute() never calls asyncio.create_subprocess_exec
39 """
40 from __future__ import annotations
41
42 import secrets
43 from unittest.mock import patch
44
45 import pytest
46 import pytest_asyncio
47 from sqlalchemy import select
48 from sqlalchemy.dialects.postgresql import insert as pg_insert
49 from sqlalchemy.ext.asyncio import AsyncSession
50
51 from muse.core.types import fake_id
52 from musehub.db.musehub_intel_models import MusehubIntelDead, MusehubSymbolIntel
53 from musehub.types.json_types import JSONObject
54 from tests.factories import create_repo
55
56
57 def _uid() -> str:
58 return fake_id(secrets.token_hex(16))
59
60
61 _TRACKED_KINDS = ("function", "async_function", "method", "async_method", "class")
62
63
64 # ---------------------------------------------------------------------------
65 # Helpers
66 # ---------------------------------------------------------------------------
67
68 async def _seed_symbol(
69 session: AsyncSession,
70 repo_id: str,
71 *,
72 address: str,
73 kind: str = "function",
74 blast: int = 0,
75 blast_direct: int = 0,
76 blast_cross: int = 0,
77 churn: int = 1,
78 churn_30d: int = 0,
79 churn_90d: int = 0,
80 ) -> None:
81 stmt = (
82 pg_insert(MusehubSymbolIntel)
83 .values(
84 repo_id=repo_id,
85 address=address,
86 symbol_kind=kind,
87 blast=blast,
88 blast_direct=blast_direct,
89 blast_cross=blast_cross,
90 churn=churn,
91 churn_30d=churn_30d,
92 churn_90d=churn_90d,
93 author_count=1,
94 gravity=0.0,
95 weekly=[0] * 12,
96 blast_top=[],
97 )
98 .on_conflict_do_update(
99 index_elements=["repo_id", "address"],
100 set_={
101 "symbol_kind": kind,
102 "blast": blast,
103 "blast_direct": blast_direct,
104 "blast_cross": blast_cross,
105 "churn": churn,
106 "churn_30d": churn_30d,
107 "churn_90d": churn_90d,
108 },
109 )
110 )
111 await session.execute(stmt)
112 await session.flush()
113
114
115 async def _get_dead(
116 session: AsyncSession, repo_id: str, address: str
117 ) -> MusehubIntelDead | None:
118 result = await session.execute(
119 select(MusehubIntelDead).where(
120 MusehubIntelDead.repo_id == repo_id,
121 MusehubIntelDead.address == address,
122 )
123 )
124 return result.scalar_one_or_none()
125
126
127 async def _run_provider(session: AsyncSession, repo_id: str) -> list[tuple[str, JSONObject]]:
128 from musehub.services.musehub_intel_providers import DeadProvider
129 provider = DeadProvider()
130 return await provider.compute(session, repo_id, "—", {})
131
132
133 # ---------------------------------------------------------------------------
134 # Layer 1 — Registry
135 # ---------------------------------------------------------------------------
136
137 class TestDeadProviderRegistry:
138
139 def test_P1_01_dead_in_provider_registry(self) -> None:
140 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
141 assert "intel.code.dead" in _PROVIDER_REGISTRY
142
143 def test_P1_02_dead_satisfies_intel_provider_protocol(self) -> None:
144 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY, IntelProvider
145 provider = _PROVIDER_REGISTRY["intel.code.dead"]
146 assert isinstance(provider, IntelProvider)
147
148
149 # ---------------------------------------------------------------------------
150 # Layer 2 — Dispatch
151 # ---------------------------------------------------------------------------
152
153 class TestDeadProviderDispatch:
154
155 def test_P1_03_job_types_for_push_code_includes_dead(self) -> None:
156 from musehub.services.musehub_intel_providers import job_types_for_push
157 assert "intel.code.dead" in job_types_for_push("code")
158
159 def test_P1_04_job_types_for_push_midi_excludes_dead(self) -> None:
160 from musehub.services.musehub_intel_providers import job_types_for_push
161 assert "intel.code.dead" not in job_types_for_push("midi")
162
163
164 # ---------------------------------------------------------------------------
165 # Layer 3 — Confidence tiers
166 # ---------------------------------------------------------------------------
167
168 class TestDeadProviderConfidence:
169
170 @pytest.mark.asyncio
171 async def test_P1_05_blast0_churn1_yields_high(
172 self, db_session: AsyncSession
173 ) -> None:
174 repo = await create_repo(db_session)
175 await _seed_symbol(
176 db_session, repo.repo_id,
177 address="pkg/a.py::only_added",
178 blast=0, churn=1, churn_30d=0,
179 )
180 await _run_provider(db_session, repo.repo_id)
181 row = await _get_dead(db_session, repo.repo_id, "pkg/a.py::only_added")
182 assert row is not None
183 assert row.confidence == "high"
184
185 @pytest.mark.asyncio
186 async def test_P1_06_blast0_churn_gt1_churn30d0_yields_medium(
187 self, db_session: AsyncSession
188 ) -> None:
189 repo = await create_repo(db_session)
190 await _seed_symbol(
191 db_session, repo.repo_id,
192 address="pkg/b.py::touched_but_quiet",
193 blast=0, churn=5, churn_30d=0,
194 )
195 await _run_provider(db_session, repo.repo_id)
196 row = await _get_dead(db_session, repo.repo_id, "pkg/b.py::touched_but_quiet")
197 assert row is not None
198 assert row.confidence == "medium"
199
200 @pytest.mark.asyncio
201 async def test_P1_07_blast0_churn30d_gt0_yields_low(
202 self, db_session: AsyncSession
203 ) -> None:
204 repo = await create_repo(db_session)
205 await _seed_symbol(
206 db_session, repo.repo_id,
207 address="pkg/c.py::recently_active",
208 blast=0, churn=3, churn_30d=2,
209 )
210 await _run_provider(db_session, repo.repo_id)
211 row = await _get_dead(db_session, repo.repo_id, "pkg/c.py::recently_active")
212 assert row is not None
213 assert row.confidence == "low"
214
215
216 # ---------------------------------------------------------------------------
217 # Layer 4 — Exclusion rules
218 # ---------------------------------------------------------------------------
219
220 class TestDeadProviderExclusion:
221
222 @pytest.mark.asyncio
223 async def test_P1_08_blast_gt0_excluded(
224 self, db_session: AsyncSession
225 ) -> None:
226 repo = await create_repo(db_session)
227 await _seed_symbol(
228 db_session, repo.repo_id,
229 address="pkg/d.py::has_dependents",
230 blast=3, churn=1, churn_30d=0,
231 )
232 await _run_provider(db_session, repo.repo_id)
233 row = await _get_dead(db_session, repo.repo_id, "pkg/d.py::has_dependents")
234 assert row is None
235
236 @pytest.mark.asyncio
237 async def test_P1_09_untracked_kind_excluded(
238 self, db_session: AsyncSession
239 ) -> None:
240 repo = await create_repo(db_session)
241 await _seed_symbol(
242 db_session, repo.repo_id,
243 address="pkg/e.py::some_import",
244 kind="import",
245 blast=0, churn=1, churn_30d=0,
246 )
247 await _run_provider(db_session, repo.repo_id)
248 row = await _get_dead(db_session, repo.repo_id, "pkg/e.py::some_import")
249 assert row is None
250
251
252 # ---------------------------------------------------------------------------
253 # Layer 5 — Reason strings
254 # ---------------------------------------------------------------------------
255
256 class TestDeadProviderReasons:
257
258 @pytest.mark.asyncio
259 async def test_P1_10_high_reason_string(
260 self, db_session: AsyncSession
261 ) -> None:
262 repo = await create_repo(db_session)
263 await _seed_symbol(
264 db_session, repo.repo_id,
265 address="pkg/f.py::high_sym",
266 blast=0, churn=1, churn_30d=0,
267 )
268 await _run_provider(db_session, repo.repo_id)
269 row = await _get_dead(db_session, repo.repo_id, "pkg/f.py::high_sym")
270 assert row is not None
271 assert row.reason == "Added once, never modified. Zero blast radius in full history."
272
273 @pytest.mark.asyncio
274 async def test_P1_10b_medium_reason_string(
275 self, db_session: AsyncSession
276 ) -> None:
277 repo = await create_repo(db_session)
278 await _seed_symbol(
279 db_session, repo.repo_id,
280 address="pkg/g.py::medium_sym",
281 blast=0, churn=3, churn_30d=0,
282 )
283 await _run_provider(db_session, repo.repo_id)
284 row = await _get_dead(db_session, repo.repo_id, "pkg/g.py::medium_sym")
285 assert row is not None
286 assert row.reason == "Modified in past but zero blast radius for ≥ 30 days."
287
288 @pytest.mark.asyncio
289 async def test_P1_10c_low_reason_string(
290 self, db_session: AsyncSession
291 ) -> None:
292 repo = await create_repo(db_session)
293 await _seed_symbol(
294 db_session, repo.repo_id,
295 address="pkg/h.py::low_sym",
296 blast=0, churn=2, churn_30d=1,
297 )
298 await _run_provider(db_session, repo.repo_id)
299 row = await _get_dead(db_session, repo.repo_id, "pkg/h.py::low_sym")
300 assert row is not None
301 assert row.reason == "Zero blast radius. Recently active — verify before deleting."
302
303
304 # ---------------------------------------------------------------------------
305 # Layer 6 — Dismiss preservation
306 # ---------------------------------------------------------------------------
307
308 class TestDeadProviderDismiss:
309
310 @pytest.mark.asyncio
311 async def test_P1_11_new_rows_start_not_dismissed(
312 self, db_session: AsyncSession
313 ) -> None:
314 repo = await create_repo(db_session)
315 await _seed_symbol(
316 db_session, repo.repo_id,
317 address="pkg/i.py::new_sym",
318 blast=0, churn=1, churn_30d=0,
319 )
320 await _run_provider(db_session, repo.repo_id)
321 row = await _get_dead(db_session, repo.repo_id, "pkg/i.py::new_sym")
322 assert row is not None
323 assert row.dismissed is False
324
325 @pytest.mark.asyncio
326 async def test_P1_11b_existing_dismissed_preserved_on_rerun(
327 self, db_session: AsyncSession
328 ) -> None:
329 repo = await create_repo(db_session)
330 await _seed_symbol(
331 db_session, repo.repo_id,
332 address="pkg/j.py::dismissed_sym",
333 blast=0, churn=1, churn_30d=0,
334 )
335 # First run — creates the row
336 await _run_provider(db_session, repo.repo_id)
337 # Manually dismiss it
338 row = await _get_dead(db_session, repo.repo_id, "pkg/j.py::dismissed_sym")
339 row.dismissed = True
340 await db_session.flush()
341 # Second run — must NOT reset dismissed to False
342 await _run_provider(db_session, repo.repo_id)
343 row = await _get_dead(db_session, repo.repo_id, "pkg/j.py::dismissed_sym")
344 assert row is not None
345 assert row.dismissed is True
346
347
348 # ---------------------------------------------------------------------------
349 # Layer 7 — Edge cases
350 # ---------------------------------------------------------------------------
351
352 class TestDeadProviderEdgeCases:
353
354 @pytest.mark.asyncio
355 async def test_P1_12_empty_repo_returns_empty_list(
356 self, db_session: AsyncSession
357 ) -> None:
358 repo = await create_repo(db_session)
359 result = await _run_provider(db_session, repo.repo_id)
360 assert result == []
361
362 @pytest.mark.asyncio
363 async def test_P1_13_idempotent_run_twice_one_row(
364 self, db_session: AsyncSession
365 ) -> None:
366 repo = await create_repo(db_session)
367 await _seed_symbol(
368 db_session, repo.repo_id,
369 address="pkg/k.py::stable_sym",
370 blast=0, churn=1, churn_30d=0,
371 )
372 await _run_provider(db_session, repo.repo_id)
373 await _run_provider(db_session, repo.repo_id)
374 from sqlalchemy import func
375 count = (await db_session.execute(
376 select(func.count()).select_from(MusehubIntelDead).where(
377 MusehubIntelDead.repo_id == repo.repo_id,
378 MusehubIntelDead.address == "pkg/k.py::stable_sym",
379 )
380 )).scalar_one()
381 assert count == 1
382
383 @pytest.mark.asyncio
384 async def test_P1_14_return_type(
385 self, db_session: AsyncSession
386 ) -> None:
387 repo = await create_repo(db_session)
388 await _seed_symbol(
389 db_session, repo.repo_id,
390 address="pkg/l.py::ret_sym",
391 blast=0, churn=1, churn_30d=0,
392 )
393 result = await _run_provider(db_session, repo.repo_id)
394 assert len(result) == 1
395 intel_type, data = result[0]
396 assert intel_type == "intel.code.dead"
397 assert data["count"] == 1
398
399
400 # ---------------------------------------------------------------------------
401 # Layer 8 — No subprocess
402 # ---------------------------------------------------------------------------
403
404 class TestDeadProviderNoSubprocess:
405
406 @pytest.mark.asyncio
407 async def test_P1_15_no_subprocess_spawned(
408 self, db_session: AsyncSession
409 ) -> None:
410 repo = await create_repo(db_session)
411 await _seed_symbol(
412 db_session, repo.repo_id,
413 address="pkg/m.py::no_proc_sym",
414 blast=0, churn=1, churn_30d=0,
415 )
416 with patch("asyncio.create_subprocess_exec") as mock_exec:
417 await _run_provider(db_session, repo.repo_id)
418 mock_exec.assert_not_called()
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 1 day ago