test_phase2_intel_providers.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """TDD spec for Phase 2 β worker intel providers (issue #8). |
| 2 | |
| 3 | 11 new ``IntelProvider`` subclasses, one per normalized intel table, each |
| 4 | wrapping a ``muse code <command> --json`` subprocess call and upserting rows |
| 5 | into the corresponding DB table. |
| 6 | |
| 7 | New job types (all in the ``intel.code.*`` namespace): |
| 8 | intel.code.coupling β MusehubIntelCoupling |
| 9 | intel.code.entangle β MusehubIntelEntangle |
| 10 | intel.code.dead β MusehubIntelDead |
| 11 | intel.code.blast_risk β MusehubIntelBlastRisk |
| 12 | intel.code.stable β MusehubIntelStable |
| 13 | intel.code.velocity β MusehubIntelVelocity |
| 14 | intel.code.clones β MusehubIntelClones |
| 15 | intel.code.type β MusehubIntelType |
| 16 | intel.code.api_surface β MusehubIntelApiSurface |
| 17 | intel.code.languages β MusehubIntelLanguages |
| 18 | intel.code.detect_refactor β MusehubIntelRefactorEvent |
| 19 | |
| 20 | Contract each provider must satisfy: |
| 21 | 1. Registered under its job-type key in ``_PROVIDER_REGISTRY``. |
| 22 | 2. ``compute()`` calls ``muse -C <repo_root> code <cmd> --json`` (or the |
| 23 | equivalent runner) and upserts result rows. |
| 24 | 3. ``compute()`` returns a non-empty ``IntelResults`` list on success. |
| 25 | 4. ``compute()`` returns ``[]`` gracefully when the muse command yields no |
| 26 | results (empty repo, no symbols, etc.). |
| 27 | 5. ``compute()`` returns ``[]`` gracefully when the subprocess exits non-zero. |
| 28 | |
| 29 | Layers: |
| 30 | 1. Registry β job types present in _PROVIDER_REGISTRY |
| 31 | 2. Dispatch β job_types_for_push("code") includes all 11 new types |
| 32 | 3. Coupling β provider upserts MusehubIntelCoupling rows |
| 33 | 4. Entangle β provider upserts MusehubIntelEntangle rows |
| 34 | 5. Dead β provider upserts MusehubIntelDead rows |
| 35 | 6. BlastRisk β provider upserts MusehubIntelBlastRisk rows |
| 36 | 7. Stable β provider upserts MusehubIntelStable rows |
| 37 | 8. Velocity β provider upserts MusehubIntelVelocity rows |
| 38 | 9. Clones β provider upserts MusehubIntelClones rows |
| 39 | 10. Type β provider upserts MusehubIntelType rows |
| 40 | 11. ApiSurface β provider upserts MusehubIntelApiSurface rows |
| 41 | 12. Languages β provider upserts MusehubIntelLanguages rows |
| 42 | 13. Refactor β provider upserts MusehubIntelRefactorEvent rows |
| 43 | 14. Empty β all providers handle empty muse output gracefully |
| 44 | 15. Error β all providers handle non-zero exit gracefully |
| 45 | """ |
| 46 | from __future__ import annotations |
| 47 | |
| 48 | import json |
| 49 | import secrets |
| 50 | from datetime import datetime, timezone |
| 51 | from unittest.mock import AsyncMock, MagicMock, patch |
| 52 | |
| 53 | import msgpack |
| 54 | import pytest |
| 55 | from sqlalchemy import select |
| 56 | from sqlalchemy.ext.asyncio import AsyncSession |
| 57 | |
| 58 | from muse.core.types import fake_id |
| 59 | from tests.factories import create_repo |
| 60 | |
| 61 | type _ContentMap = dict[str, bytes] |
| 62 | |
| 63 | _ALL_PHASE2_JOB_TYPES = [ |
| 64 | "intel.code.coupling", |
| 65 | "intel.code.entangle", |
| 66 | "intel.code.dead", |
| 67 | "intel.code.blast_risk", |
| 68 | "intel.code.stable", |
| 69 | "intel.code.velocity", |
| 70 | "intel.code.clones", |
| 71 | "intel.code.type", |
| 72 | "intel.code.api_surface", |
| 73 | "intel.code.languages", |
| 74 | "intel.code.detect_refactor", |
| 75 | ] |
| 76 | |
| 77 | |
| 78 | def _uid() -> str: |
| 79 | return fake_id(secrets.token_hex(16)) |
| 80 | |
| 81 | |
| 82 | def _now() -> datetime: |
| 83 | return datetime.now(tz=timezone.utc) |
| 84 | |
| 85 | |
| 86 | def _mock_process(stdout: str, returncode: int = 0) -> AsyncMock: |
| 87 | """Return an asyncio.subprocess.Process mock.""" |
| 88 | proc = AsyncMock() |
| 89 | proc.returncode = returncode |
| 90 | proc.communicate = AsyncMock(return_value=(stdout.encode(), b"")) |
| 91 | return proc |
| 92 | |
| 93 | |
| 94 | async def _make_commit_and_snapshot( |
| 95 | session: AsyncSession, |
| 96 | repo_id: str, |
| 97 | manifest: dict[str, str], |
| 98 | parent_ids: list[str] | None = None, |
| 99 | ) -> tuple[str, str]: |
| 100 | """Insert MusehubSnapshot + MusehubCommit + MusehubObject rows; return (commit_id, snapshot_id).""" |
| 101 | from musehub.db.musehub_repo_models import ( |
| 102 | MusehubCommit, MusehubCommitRef, MusehubObject, MusehubSnapshot, MusehubSnapshotRef, |
| 103 | ) |
| 104 | from sqlalchemy.dialects.postgresql import insert as pg_insert |
| 105 | snap_id = _uid() |
| 106 | commit_id = _uid() |
| 107 | session.add(MusehubSnapshot( |
| 108 | snapshot_id=snap_id, |
| 109 | manifest_blob=msgpack.packb(manifest, use_bin_type=True), |
| 110 | )) |
| 111 | session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=snap_id)) |
| 112 | session.add(MusehubCommit( |
| 113 | commit_id=commit_id, |
| 114 | branch="main", |
| 115 | message="test", |
| 116 | author="tester", |
| 117 | timestamp=datetime.now(tz=timezone.utc), |
| 118 | snapshot_id=snap_id, |
| 119 | parent_ids=parent_ids or [], |
| 120 | )) |
| 121 | session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id)) |
| 122 | # Seed MusehubObject rows so providers can find objects via session.get(MusehubObject, oid) |
| 123 | for path, oid in manifest.items(): |
| 124 | await session.execute( |
| 125 | pg_insert(MusehubObject) |
| 126 | .values(object_id=oid, path=path, size_bytes=32, storage_uri=f"mem://{oid}") |
| 127 | .on_conflict_do_nothing(index_elements=["object_id"]) |
| 128 | ) |
| 129 | await session.flush() |
| 130 | return commit_id, snap_id |
| 131 | |
| 132 | |
| 133 | def _mock_backend(content_map: _ContentMap) -> AsyncMock: |
| 134 | backend = AsyncMock() |
| 135 | backend.get = AsyncMock(side_effect=lambda oid, **_: content_map.get(oid)) |
| 136 | return backend |
| 137 | |
| 138 | |
| 139 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 140 | # Layer 1 β Registry: all 11 providers registered |
| 141 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 142 | |
| 143 | class TestPhase2Registry: |
| 144 | |
| 145 | def test_P2_01_all_phase2_job_types_in_registry(self) -> None: |
| 146 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 147 | missing = [jt for jt in _ALL_PHASE2_JOB_TYPES if jt not in _PROVIDER_REGISTRY] |
| 148 | assert not missing, f"Missing from _PROVIDER_REGISTRY: {missing}" |
| 149 | |
| 150 | def test_P2_02_registry_providers_satisfy_protocol(self) -> None: |
| 151 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY, IntelProvider |
| 152 | for jt in _ALL_PHASE2_JOB_TYPES: |
| 153 | provider = _PROVIDER_REGISTRY[jt] |
| 154 | assert isinstance(provider, IntelProvider), ( |
| 155 | f"{jt} provider does not satisfy IntelProvider protocol" |
| 156 | ) |
| 157 | |
| 158 | |
| 159 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 160 | # Layer 2 β Dispatch: job_types_for_push includes all 11 new types |
| 161 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 162 | |
| 163 | class TestPhase2Dispatch: |
| 164 | |
| 165 | def test_P2_03_job_types_for_push_code_includes_all_phase2_types(self) -> None: |
| 166 | from musehub.services.musehub_intel_providers import job_types_for_push |
| 167 | types = job_types_for_push("code") |
| 168 | missing = [jt for jt in _ALL_PHASE2_JOB_TYPES if jt not in types] |
| 169 | assert not missing, f"Missing from job_types_for_push('code'): {missing}" |
| 170 | |
| 171 | def test_P2_04_job_types_for_push_code_still_includes_legacy_types(self) -> None: |
| 172 | from musehub.services.musehub_intel_providers import job_types_for_push |
| 173 | types = job_types_for_push("code") |
| 174 | assert "intel.structural" in types |
| 175 | assert "intel.code" in types |
| 176 | assert "gc" in types |
| 177 | |
| 178 | def test_P2_05_job_types_for_push_midi_excludes_phase2_types(self) -> None: |
| 179 | from musehub.services.musehub_intel_providers import job_types_for_push |
| 180 | types = job_types_for_push("midi") |
| 181 | for jt in _ALL_PHASE2_JOB_TYPES: |
| 182 | assert jt not in types, f"{jt} should not run for midi repos" |
| 183 | |
| 184 | |
| 185 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 186 | # Layer 10 β TypeProvider |
| 187 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 188 | |
| 189 | class TestPhase2TypeProvider: |
| 190 | |
| 191 | @pytest.mark.asyncio |
| 192 | async def test_P2_14_type_upserts_rows(self, db_session: AsyncSession) -> None: |
| 193 | from musehub.db import musehub_intel_models as db |
| 194 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 195 | repo = await create_repo(db_session) |
| 196 | |
| 197 | py_src = b"def fn(x: int, y: str) -> bool:\n pass\n" |
| 198 | obj_id = "obj-type-test" |
| 199 | backend = _mock_backend({obj_id: py_src}) |
| 200 | |
| 201 | commit_id, _ = await _make_commit_and_snapshot( |
| 202 | db_session, repo.repo_id, {"a.py": obj_id} |
| 203 | ) |
| 204 | |
| 205 | with patch("musehub.storage.backends.get_backend", return_value=backend), \ |
| 206 | patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend), \ |
| 207 | patch("musehub.storage.backends.read_object_bytes", new=AsyncMock(return_value=py_src)): |
| 208 | results = await _PROVIDER_REGISTRY["intel.code.type"].compute( |
| 209 | db_session, repo.repo_id, commit_id, |
| 210 | {"head": commit_id, "owner": repo.owner, "slug": repo.slug}, |
| 211 | ) |
| 212 | |
| 213 | assert results |
| 214 | rows = (await db_session.execute( |
| 215 | select(db.MusehubIntelType).where(db.MusehubIntelType.repo_id == repo.repo_id) |
| 216 | )).scalars().all() |
| 217 | assert len(rows) == 1 |
| 218 | assert rows[0].type_score == pytest.approx(1.0) |
| 219 | assert rows[0].return_is_any is False |
| 220 | |
| 221 | |
| 222 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 223 | # Layer 11 β ApiSurfaceProvider |
| 224 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 225 | |
| 226 | class TestPhase2ApiSurfaceProvider: |
| 227 | |
| 228 | @pytest.mark.asyncio |
| 229 | async def test_P2_15_api_surface_upserts_rows(self, db_session: AsyncSession) -> None: |
| 230 | from musehub.db import musehub_intel_models as db |
| 231 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 232 | repo = await create_repo(db_session) |
| 233 | |
| 234 | py_src = b"def get_repo(repo_id: str) -> dict:\n pass\n" |
| 235 | obj_id = "obj-api-test" |
| 236 | backend = _mock_backend({obj_id: py_src}) |
| 237 | |
| 238 | commit_id, _ = await _make_commit_and_snapshot( |
| 239 | db_session, repo.repo_id, {"api/routes.py": obj_id} |
| 240 | ) |
| 241 | |
| 242 | with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend), \ |
| 243 | patch("musehub.storage.backends.read_object_bytes", new=AsyncMock(return_value=py_src)): |
| 244 | results = await _PROVIDER_REGISTRY["intel.code.api_surface"].compute( |
| 245 | db_session, repo.repo_id, commit_id, |
| 246 | {"head": commit_id, "owner": repo.owner, "slug": repo.slug}, |
| 247 | ) |
| 248 | |
| 249 | assert results |
| 250 | rows = (await db_session.execute( |
| 251 | select(db.MusehubIntelApiSurface).where(db.MusehubIntelApiSurface.repo_id == repo.repo_id) |
| 252 | )).scalars().all() |
| 253 | assert len(rows) == 1 |
| 254 | assert rows[0].signature_id is not None |
| 255 | assert rows[0].visibility == "public" |
| 256 | |
| 257 | |
| 258 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 259 | # Layer 12 β LanguagesProvider |
| 260 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 261 | |
| 262 | class TestPhase2LanguagesProvider: |
| 263 | |
| 264 | @pytest.mark.asyncio |
| 265 | async def test_P2_16_languages_upserts_rows(self, db_session: AsyncSession) -> None: |
| 266 | from musehub.db import musehub_intel_models as db |
| 267 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 268 | repo = await create_repo(db_session) |
| 269 | |
| 270 | py_src = b"def fn(x: int) -> bool:\n pass\n" |
| 271 | py_oid = "obj-lang-py" |
| 272 | toml_src = b"[workspace]\nversion = 1\n" |
| 273 | toml_oid = "obj-lang-toml" |
| 274 | backend = _mock_backend({py_oid: py_src, toml_oid: toml_src}) |
| 275 | |
| 276 | commit_id, _ = await _make_commit_and_snapshot( |
| 277 | db_session, repo.repo_id, {"src/main.py": py_oid, "pyproject.toml": toml_oid} |
| 278 | ) |
| 279 | |
| 280 | with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend): |
| 281 | results = await _PROVIDER_REGISTRY["intel.code.languages"].compute( |
| 282 | db_session, repo.repo_id, commit_id, |
| 283 | {"head": commit_id, "owner": repo.owner, "slug": repo.slug}, |
| 284 | ) |
| 285 | |
| 286 | assert results |
| 287 | rows = (await db_session.execute( |
| 288 | select(db.MusehubIntelLanguages).where(db.MusehubIntelLanguages.repo_id == repo.repo_id) |
| 289 | )).scalars().all() |
| 290 | assert len(rows) == 2 |
| 291 | py = next(r for r in rows if r.language == "Python") |
| 292 | assert py.file_count == 1 |
| 293 | assert py.symbol_count == 1 |
| 294 | |
| 295 | |
| 296 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 297 | # Layer 13 β DetectRefactorProvider |
| 298 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 299 | |
| 300 | class TestPhase2DetectRefactorProvider: |
| 301 | |
| 302 | @pytest.mark.asyncio |
| 303 | async def test_P2_17_detect_refactor_upserts_rows(self, db_session: AsyncSession) -> None: |
| 304 | from musehub.db import musehub_intel_models as db |
| 305 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 306 | repo = await create_repo(db_session) |
| 307 | |
| 308 | # parent snapshot: a.py has old_name |
| 309 | parent_src = b"def old_name():\n pass\n" |
| 310 | parent_oid = "obj-refactor-parent" |
| 311 | # head snapshot: a.py has new_name (same body β rename) |
| 312 | head_src = b"def new_name():\n pass\n" |
| 313 | head_oid = "obj-refactor-head" |
| 314 | backend = _mock_backend({parent_oid: parent_src, head_oid: head_src}) |
| 315 | |
| 316 | parent_commit_id, _ = await _make_commit_and_snapshot( |
| 317 | db_session, repo.repo_id, {"a.py": parent_oid} |
| 318 | ) |
| 319 | head_commit_id, _ = await _make_commit_and_snapshot( |
| 320 | db_session, repo.repo_id, {"a.py": head_oid}, |
| 321 | parent_ids=[parent_commit_id], |
| 322 | ) |
| 323 | |
| 324 | with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend): |
| 325 | results = await _PROVIDER_REGISTRY["intel.code.detect_refactor"].compute( |
| 326 | db_session, repo.repo_id, head_commit_id, |
| 327 | {"head": head_commit_id, "owner": repo.owner, "slug": repo.slug}, |
| 328 | ) |
| 329 | |
| 330 | assert results |
| 331 | rows = (await db_session.execute( |
| 332 | select(db.MusehubIntelRefactorEvent).where( |
| 333 | db.MusehubIntelRefactorEvent.repo_id == repo.repo_id |
| 334 | ) |
| 335 | )).scalars().all() |
| 336 | assert len(rows) == 1 |
| 337 | assert rows[0].kind == "rename" |
| 338 | assert rows[0].address == "a.py::old_name" |
| 339 | |
| 340 | @pytest.mark.asyncio |
| 341 | async def test_P2_18_detect_refactor_deduplicates_by_event_id(self, db_session: AsyncSession) -> None: |
| 342 | from musehub.db import musehub_intel_models as db |
| 343 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 344 | repo = await create_repo(db_session) |
| 345 | |
| 346 | parent_src = b"def old_name():\n pass\n" |
| 347 | parent_oid = "obj-dedup-parent" |
| 348 | head_src = b"def new_name():\n pass\n" |
| 349 | head_oid = "obj-dedup-head" |
| 350 | backend = _mock_backend({parent_oid: parent_src, head_oid: head_src}) |
| 351 | |
| 352 | parent_commit_id, _ = await _make_commit_and_snapshot( |
| 353 | db_session, repo.repo_id, {"a.py": parent_oid} |
| 354 | ) |
| 355 | head_commit_id, _ = await _make_commit_and_snapshot( |
| 356 | db_session, repo.repo_id, {"a.py": head_oid}, |
| 357 | parent_ids=[parent_commit_id], |
| 358 | ) |
| 359 | |
| 360 | with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend): |
| 361 | await _PROVIDER_REGISTRY["intel.code.detect_refactor"].compute( |
| 362 | db_session, repo.repo_id, head_commit_id, |
| 363 | {"head": head_commit_id, "owner": repo.owner, "slug": repo.slug}, |
| 364 | ) |
| 365 | with patch("musehub.services.musehub_intel_providers.get_backend", return_value=backend): |
| 366 | await _PROVIDER_REGISTRY["intel.code.detect_refactor"].compute( |
| 367 | db_session, repo.repo_id, head_commit_id, |
| 368 | {"head": head_commit_id, "owner": repo.owner, "slug": repo.slug}, |
| 369 | ) |
| 370 | |
| 371 | rows = (await db_session.execute( |
| 372 | select(db.MusehubIntelRefactorEvent).where( |
| 373 | db.MusehubIntelRefactorEvent.repo_id == repo.repo_id |
| 374 | ) |
| 375 | )).scalars().all() |
| 376 | assert len(rows) == 1, "duplicate event inserted β event_id upsert is broken" |
| 377 | |
| 378 | |
| 379 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 380 | # Layer 14 β Empty output: all providers return [] gracefully |
| 381 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 382 | |
| 383 | class TestPhase2EmptyOutput: |
| 384 | |
| 385 | @pytest.mark.asyncio |
| 386 | @pytest.mark.parametrize("job_type,empty_key", [ |
| 387 | ("intel.code.coupling", '{"pairs": []}'), |
| 388 | ("intel.code.entangle", '{"pairs": []}'), |
| 389 | ("intel.code.dead", '{"candidates": []}'), |
| 390 | ("intel.code.blast_risk", '{"symbols": []}'), |
| 391 | ("intel.code.stable", '{"symbols": []}'), |
| 392 | ("intel.code.velocity", '{"modules": []}'), |
| 393 | ("intel.code.clones", '{"clusters": []}'), |
| 394 | ("intel.code.type", '{"symbols": []}'), |
| 395 | ("intel.code.api_surface", '{"symbols": []}'), |
| 396 | ("intel.code.languages", '{"languages": []}'), |
| 397 | ("intel.code.detect_refactor",'{"events": []}'), |
| 398 | ]) |
| 399 | async def test_P2_19_empty_muse_output_returns_empty_list( |
| 400 | self, job_type: str, empty_key: str, db_session: AsyncSession |
| 401 | ) -> None: |
| 402 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 403 | repo = await create_repo(db_session) |
| 404 | ref = _uid() |
| 405 | with patch("asyncio.create_subprocess_exec", return_value=_mock_process(empty_key)): |
| 406 | results = await _PROVIDER_REGISTRY[job_type].compute( |
| 407 | db_session, repo.repo_id, ref, {"head": ref, "owner": repo.owner, "slug": repo.slug} |
| 408 | ) |
| 409 | assert results == [] |
| 410 | |
| 411 | |
| 412 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 413 | # Layer 15 β Non-zero exit: all providers return [] gracefully |
| 414 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 415 | |
| 416 | class TestPhase2ErrorHandling: |
| 417 | |
| 418 | @pytest.mark.asyncio |
| 419 | @pytest.mark.parametrize("job_type", _ALL_PHASE2_JOB_TYPES) |
| 420 | async def test_P2_20_nonzero_exit_returns_empty_list( |
| 421 | self, job_type: str, db_session: AsyncSession |
| 422 | ) -> None: |
| 423 | from musehub.services.musehub_intel_providers import _PROVIDER_REGISTRY |
| 424 | repo = await create_repo(db_session) |
| 425 | ref = _uid() |
| 426 | with patch("asyncio.create_subprocess_exec", return_value=_mock_process("", returncode=1)): |
| 427 | results = await _PROVIDER_REGISTRY[job_type].compute( |
| 428 | db_session, repo.repo_id, ref, {"head": ref, "owner": repo.owner, "slug": repo.slug} |
| 429 | ) |
| 430 | assert results == [] |