gabriel / musehub public

test_phase2_intel_providers.py file-level

at dev · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:9 Merge 'fix/assignee-sigil-inline' into 'dev' β€” proposal: Assignee sigil… · gabriel · Jun 7, 2026
1 """TDD spec for Phase 2 β€” worker intel providers (issue #8).
2
3 11 new ``IntelProvider`` subclasses, one per normalized intel table, each
4 wrapping a ``muse code <command> --json`` subprocess call and upserting rows
5 into the corresponding DB table.
6
7 New job types (all in the ``intel.code.*`` namespace):
8 intel.code.coupling β†’ MusehubIntelCoupling
9 intel.code.entangle β†’ MusehubIntelEntangle
10 intel.code.dead β†’ MusehubIntelDead
11 intel.code.blast_risk β†’ MusehubIntelBlastRisk
12 intel.code.stable β†’ MusehubIntelStable
13 intel.code.velocity β†’ MusehubIntelVelocity
14 intel.code.clones β†’ MusehubIntelClones
15 intel.code.type β†’ MusehubIntelType
16 intel.code.api_surface β†’ MusehubIntelApiSurface
17 intel.code.languages β†’ MusehubIntelLanguages
18 intel.code.detect_refactor β†’ MusehubIntelRefactorEvent
19
20 Contract each provider must satisfy:
21 1. Registered under its job-type key in ``_PROVIDER_REGISTRY``.
22 2. ``compute()`` calls ``muse -C <repo_root> code <cmd> --json`` (or the
23 equivalent runner) and upserts result rows.
24 3. ``compute()`` returns a non-empty ``IntelResults`` list on success.
25 4. ``compute()`` returns ``[]`` gracefully when the muse command yields no
26 results (empty repo, no symbols, etc.).
27 5. ``compute()`` returns ``[]`` gracefully when the subprocess exits non-zero.
28
29 Layers:
30 1. Registry β€” job types present in _PROVIDER_REGISTRY
31 2. Dispatch β€” job_types_for_push("code") includes all 11 new types
32 3. Coupling β€” provider upserts MusehubIntelCoupling rows
33 4. Entangle β€” provider upserts MusehubIntelEntangle rows
34 5. Dead β€” provider upserts MusehubIntelDead rows
35 6. BlastRisk β€” provider upserts MusehubIntelBlastRisk rows
36 7. Stable β€” provider upserts MusehubIntelStable rows
37 8. Velocity β€” provider upserts MusehubIntelVelocity rows
38 9. Clones β€” provider upserts MusehubIntelClones rows
39 10. Type β€” provider upserts MusehubIntelType rows
40 11. ApiSurface β€” provider upserts MusehubIntelApiSurface rows
41 12. Languages β€” provider upserts MusehubIntelLanguages rows
42 13. Refactor β€” provider upserts MusehubIntelRefactorEvent rows
43 14. Empty β€” all providers handle empty muse output gracefully
44 15. Error β€” all providers handle non-zero exit gracefully
45 """
46 from __future__ import annotations
47
48 import json
49 import secrets
50 from datetime import datetime, timezone
51 from unittest.mock import AsyncMock, MagicMock, patch
52
53 import msgpack
54 import pytest
55 from sqlalchemy import select
56 from sqlalchemy.ext.asyncio import AsyncSession
57
58 from muse.core.types import fake_id
59 from tests.factories import create_repo
60
61 type _ContentMap = dict[str, bytes]
62
63 _ALL_PHASE2_JOB_TYPES = [
64 "intel.code.coupling",
65 "intel.code.entangle",
66 "intel.code.dead",
67 "intel.code.blast_risk",
68 "intel.code.stable",
69 "intel.code.velocity",
70 "intel.code.clones",
71 "intel.code.type",
72 "intel.code.api_surface",
73 "intel.code.languages",
74 "intel.code.detect_refactor",
75 ]
76
77
78 def _uid() -> str:
79 return fake_id(secrets.token_hex(16))
80
81
82 def _now() -> datetime:
83 return datetime.now(tz=timezone.utc)
84
85
86 def _mock_process(stdout: str, returncode: int = 0) -> AsyncMock:
87 """Return an asyncio.subprocess.Process mock."""
88 proc = AsyncMock()
89 proc.returncode = returncode
90 proc.communicate = AsyncMock(return_value=(stdout.encode(), b""))
91 return proc
92
93
94 async def _make_commit_and_snapshot(
95 session: AsyncSession,
96 repo_id: str,
97 manifest: dict[str, str],
98 parent_ids: list[str] | None = None,
99 ) -> tuple[str, str]:
100 """Insert MusehubSnapshot + MusehubCommit + MusehubObject rows; return (commit_id, snapshot_id)."""
101 from musehub.db.musehub_repo_models import (
102 MusehubCommit, MusehubCommitRef, MusehubObject, MusehubSnapshot, MusehubSnapshotRef,
103 )
104 from sqlalchemy.dialects.postgresql import insert as pg_insert
105 snap_id = _uid()
106 commit_id = _uid()
107 session.add(MusehubSnapshot(
108 snapshot_id=snap_id,
109 manifest_blob=msgpack.packb(manifest, use_bin_type=True),
110 ))
111 session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=snap_id))
112 session.add(MusehubCommit(
113 commit_id=commit_id,
114 branch="main",
115 message="test",
116 author="tester",
117 timestamp=datetime.now(tz=timezone.utc),
118 snapshot_id=snap_id,
119 parent_ids=parent_ids or [],
120 ))
121 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id))
122 # Seed MusehubObject rows so providers can find objects via session.get(MusehubObject, oid)
123 for path, oid in manifest.items():
124 await session.execute(
125 pg_insert(MusehubObject)
126 .values(object_id=oid, path=path, size_bytes=32, storage_uri=f"mem://{oid}")
127 .on_conflict_do_nothing(index_elements=["object_id"])
128 )
129 await session.flush()
130 return commit_id, snap_id
131
132
133 def _mock_backend(content_map: _ContentMap) -> AsyncMock:
134 backend = AsyncMock()
135 backend.get = AsyncMock(side_effect=lambda oid, **_: content_map.get(oid))
136 return backend
137
138
139 # ─────────────────────────────────────────────────────────────────────────────
140 # Layer 1 β€” Registry: all 11 providers registered
141 # ─────────────────────────────────────────────────────────────────────────────
142
143 class TestPhase2Registry:
144
145 def test_P2_01_all_phase2_job_types_in_registry(self) -> None:
146 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
147 missing = [jt for jt in _ALL_PHASE2_JOB_TYPES if jt not in _PROVIDER_REGISTRY]
148 assert not missing, f"Missing from _PROVIDER_REGISTRY: {missing}"
149
150 def test_P2_02_registry_providers_satisfy_protocol(self) -> None:
151 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY, IntelProvider
152 for jt in _ALL_PHASE2_JOB_TYPES:
153 provider = _PROVIDER_REGISTRY[jt]
154 assert isinstance(provider, IntelProvider), (
155 f"{jt} provider does not satisfy IntelProvider protocol"
156 )
157
158
159 # ─────────────────────────────────────────────────────────────────────────────
160 # Layer 2 β€” Dispatch: job_types_for_push includes all 11 new types
161 # ─────────────────────────────────────────────────────────────────────────────
162
163 class TestPhase2Dispatch:
164
165 def test_P2_03_job_types_for_push_code_includes_all_phase2_types(self) -> None:
166 from musehub.services.musehub_intel_providers import job_types_for_push
167 types = job_types_for_push("code")
168 missing = [jt for jt in _ALL_PHASE2_JOB_TYPES if jt not in types]
169 assert not missing, f"Missing from job_types_for_push('code'): {missing}"
170
171 def test_P2_04_job_types_for_push_code_still_includes_legacy_types(self) -> None:
172 from musehub.services.musehub_intel_providers import job_types_for_push
173 types = job_types_for_push("code")
174 assert "intel.structural" in types
175 assert "intel.code" in types
176 assert "gc" in types
177
178 def test_P2_05_job_types_for_push_midi_excludes_phase2_types(self) -> None:
179 from musehub.services.musehub_intel_providers import job_types_for_push
180 types = job_types_for_push("midi")
181 for jt in _ALL_PHASE2_JOB_TYPES:
182 assert jt not in types, f"{jt} should not run for midi repos"
183
184
185 # ─────────────────────────────────────────────────────────────────────────────
186 # Layer 10 β€” TypeProvider
187 # ─────────────────────────────────────────────────────────────────────────────
188
189 class TestPhase2TypeProvider:
190
191 @pytest.mark.asyncio
192 async def test_P2_14_type_upserts_rows(self, db_session: AsyncSession) -> None:
193 from musehub.db import musehub_intel_models as db
194 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
195 repo = await create_repo(db_session)
196
197 py_src = b"def fn(x: int, y: str) -> bool:\n pass\n"
198 obj_id = "obj-type-test"
199 backend = _mock_backend({obj_id: py_src})
200
201 commit_id, _ = await _make_commit_and_snapshot(
202 db_session, repo.repo_id, {"a.py": obj_id}
203 )
204
205 with patch("musehub.storage.backends.get_backend", return_value=backend), \
206 patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend), \
207 patch("musehub.storage.backends.read_object_bytes", new=AsyncMock(return_value=py_src)):
208 results = await _PROVIDER_REGISTRY["intel.code.type"].compute(
209 db_session, repo.repo_id, commit_id,
210 {"head": commit_id, "owner": repo.owner, "slug": repo.slug},
211 )
212
213 assert results
214 rows = (await db_session.execute(
215 select(db.MusehubIntelType).where(db.MusehubIntelType.repo_id == repo.repo_id)
216 )).scalars().all()
217 assert len(rows) == 1
218 assert rows[0].type_score == pytest.approx(1.0)
219 assert rows[0].return_is_any is False
220
221
222 # ─────────────────────────────────────────────────────────────────────────────
223 # Layer 11 β€” ApiSurfaceProvider
224 # ─────────────────────────────────────────────────────────────────────────────
225
226 class TestPhase2ApiSurfaceProvider:
227
228 @pytest.mark.asyncio
229 async def test_P2_15_api_surface_upserts_rows(self, db_session: AsyncSession) -> None:
230 from musehub.db import musehub_intel_models as db
231 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
232 repo = await create_repo(db_session)
233
234 py_src = b"def get_repo(repo_id: str) -> dict:\n pass\n"
235 obj_id = "obj-api-test"
236 backend = _mock_backend({obj_id: py_src})
237
238 commit_id, _ = await _make_commit_and_snapshot(
239 db_session, repo.repo_id, {"api/routes.py": obj_id}
240 )
241
242 with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend), \
243 patch("musehub.storage.backends.read_object_bytes", new=AsyncMock(return_value=py_src)):
244 results = await _PROVIDER_REGISTRY["intel.code.api_surface"].compute(
245 db_session, repo.repo_id, commit_id,
246 {"head": commit_id, "owner": repo.owner, "slug": repo.slug},
247 )
248
249 assert results
250 rows = (await db_session.execute(
251 select(db.MusehubIntelApiSurface).where(db.MusehubIntelApiSurface.repo_id == repo.repo_id)
252 )).scalars().all()
253 assert len(rows) == 1
254 assert rows[0].signature_id is not None
255 assert rows[0].visibility == "public"
256
257
258 # ─────────────────────────────────────────────────────────────────────────────
259 # Layer 12 β€” LanguagesProvider
260 # ─────────────────────────────────────────────────────────────────────────────
261
262 class TestPhase2LanguagesProvider:
263
264 @pytest.mark.asyncio
265 async def test_P2_16_languages_upserts_rows(self, db_session: AsyncSession) -> None:
266 from musehub.db import musehub_intel_models as db
267 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
268 repo = await create_repo(db_session)
269
270 py_src = b"def fn(x: int) -> bool:\n pass\n"
271 py_oid = "obj-lang-py"
272 toml_src = b"[workspace]\nversion = 1\n"
273 toml_oid = "obj-lang-toml"
274 backend = _mock_backend({py_oid: py_src, toml_oid: toml_src})
275
276 commit_id, _ = await _make_commit_and_snapshot(
277 db_session, repo.repo_id, {"src/main.py": py_oid, "pyproject.toml": toml_oid}
278 )
279
280 with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend):
281 results = await _PROVIDER_REGISTRY["intel.code.languages"].compute(
282 db_session, repo.repo_id, commit_id,
283 {"head": commit_id, "owner": repo.owner, "slug": repo.slug},
284 )
285
286 assert results
287 rows = (await db_session.execute(
288 select(db.MusehubIntelLanguages).where(db.MusehubIntelLanguages.repo_id == repo.repo_id)
289 )).scalars().all()
290 assert len(rows) == 2
291 py = next(r for r in rows if r.language == "Python")
292 assert py.file_count == 1
293 assert py.symbol_count == 1
294
295
296 # ─────────────────────────────────────────────────────────────────────────────
297 # Layer 13 β€” DetectRefactorProvider
298 # ─────────────────────────────────────────────────────────────────────────────
299
300 class TestPhase2DetectRefactorProvider:
301
302 @pytest.mark.asyncio
303 async def test_P2_17_detect_refactor_upserts_rows(self, db_session: AsyncSession) -> None:
304 from musehub.db import musehub_intel_models as db
305 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
306 repo = await create_repo(db_session)
307
308 # parent snapshot: a.py has old_name
309 parent_src = b"def old_name():\n pass\n"
310 parent_oid = "obj-refactor-parent"
311 # head snapshot: a.py has new_name (same body β†’ rename)
312 head_src = b"def new_name():\n pass\n"
313 head_oid = "obj-refactor-head"
314 backend = _mock_backend({parent_oid: parent_src, head_oid: head_src})
315
316 parent_commit_id, _ = await _make_commit_and_snapshot(
317 db_session, repo.repo_id, {"a.py": parent_oid}
318 )
319 head_commit_id, _ = await _make_commit_and_snapshot(
320 db_session, repo.repo_id, {"a.py": head_oid},
321 parent_ids=[parent_commit_id],
322 )
323
324 with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend):
325 results = await _PROVIDER_REGISTRY["intel.code.detect_refactor"].compute(
326 db_session, repo.repo_id, head_commit_id,
327 {"head": head_commit_id, "owner": repo.owner, "slug": repo.slug},
328 )
329
330 assert results
331 rows = (await db_session.execute(
332 select(db.MusehubIntelRefactorEvent).where(
333 db.MusehubIntelRefactorEvent.repo_id == repo.repo_id
334 )
335 )).scalars().all()
336 assert len(rows) == 1
337 assert rows[0].kind == "rename"
338 assert rows[0].address == "a.py::old_name"
339
340 @pytest.mark.asyncio
341 async def test_P2_18_detect_refactor_deduplicates_by_event_id(self, db_session: AsyncSession) -> None:
342 from musehub.db import musehub_intel_models as db
343 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
344 repo = await create_repo(db_session)
345
346 parent_src = b"def old_name():\n pass\n"
347 parent_oid = "obj-dedup-parent"
348 head_src = b"def new_name():\n pass\n"
349 head_oid = "obj-dedup-head"
350 backend = _mock_backend({parent_oid: parent_src, head_oid: head_src})
351
352 parent_commit_id, _ = await _make_commit_and_snapshot(
353 db_session, repo.repo_id, {"a.py": parent_oid}
354 )
355 head_commit_id, _ = await _make_commit_and_snapshot(
356 db_session, repo.repo_id, {"a.py": head_oid},
357 parent_ids=[parent_commit_id],
358 )
359
360 with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend):
361 await _PROVIDER_REGISTRY["intel.code.detect_refactor"].compute(
362 db_session, repo.repo_id, head_commit_id,
363 {"head": head_commit_id, "owner": repo.owner, "slug": repo.slug},
364 )
365 with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend):
366 await _PROVIDER_REGISTRY["intel.code.detect_refactor"].compute(
367 db_session, repo.repo_id, head_commit_id,
368 {"head": head_commit_id, "owner": repo.owner, "slug": repo.slug},
369 )
370
371 rows = (await db_session.execute(
372 select(db.MusehubIntelRefactorEvent).where(
373 db.MusehubIntelRefactorEvent.repo_id == repo.repo_id
374 )
375 )).scalars().all()
376 assert len(rows) == 1, "duplicate event inserted β€” event_id upsert is broken"
377
378
379 # ─────────────────────────────────────────────────────────────────────────────
380 # Layer 14 β€” Empty output: all providers return [] gracefully
381 # ─────────────────────────────────────────────────────────────────────────────
382
383 class TestPhase2EmptyOutput:
384
385 @pytest.mark.asyncio
386 @pytest.mark.parametrize("job_type,empty_key", [
387 ("intel.code.coupling", '{"pairs": []}'),
388 ("intel.code.entangle", '{"pairs": []}'),
389 ("intel.code.dead", '{"candidates": []}'),
390 ("intel.code.blast_risk", '{"symbols": []}'),
391 ("intel.code.stable", '{"symbols": []}'),
392 ("intel.code.velocity", '{"modules": []}'),
393 ("intel.code.clones", '{"clusters": []}'),
394 ("intel.code.type", '{"symbols": []}'),
395 ("intel.code.api_surface", '{"symbols": []}'),
396 ("intel.code.languages", '{"languages": []}'),
397 ("intel.code.detect_refactor",'{"events": []}'),
398 ])
399 async def test_P2_19_empty_muse_output_returns_empty_list(
400 self, job_type: str, empty_key: str, db_session: AsyncSession
401 ) -> None:
402 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
403 repo = await create_repo(db_session)
404 ref = _uid()
405 with patch("asyncio.create_subprocess_exec", return_value=_mock_process(empty_key)):
406 results = await _PROVIDER_REGISTRY[job_type].compute(
407 db_session, repo.repo_id, ref, {"head": ref, "owner": repo.owner, "slug": repo.slug}
408 )
409 assert results == []
410
411
412 # ─────────────────────────────────────────────────────────────────────────────
413 # Layer 15 β€” Non-zero exit: all providers return [] gracefully
414 # ─────────────────────────────────────────────────────────────────────────────
415
416 class TestPhase2ErrorHandling:
417
418 @pytest.mark.asyncio
419 @pytest.mark.parametrize("job_type", _ALL_PHASE2_JOB_TYPES)
420 async def test_P2_20_nonzero_exit_returns_empty_list(
421 self, job_type: str, db_session: AsyncSession
422 ) -> None:
423 from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY
424 repo = await create_repo(db_session)
425 ref = _uid()
426 with patch("asyncio.create_subprocess_exec", return_value=_mock_process("", returncode=1)):
427 results = await _PROVIDER_REGISTRY[job_type].compute(
428 db_session, repo.repo_id, ref, {"head": ref, "owner": repo.owner, "slug": repo.slug}
429 )
430 assert results == []