"""TDD spec — Phase 4: populate op and last_commit_id on musehub_symbol_intel. Problem ─────── musehub_symbol_intel.op and last_commit_id are always NULL. The columns exist on the model and are read by the symbol list route, but _compute_symbol_intel never computes them and _upsert_symbol_intel never writes them. Solution ──────── 1. Add last_op and last_commit_id to SymbolIntel TypedDict. 2. _compute_symbol_intel: track them alongside last_author (same loop, same "is this the newest ts?" guard). 3. _upsert_symbol_intel: include op and last_commit_id in the row dict and in the on_conflict set_{} update. 4. backfill_intel_fields(session, repo_id): one UPDATE...FROM that reads the most-recent history entry per (repo_id, address) and writes op + last_commit_id into intel rows — so existing records are fixed without a full re-index. Tier breakdown ────────────── I401 _compute_symbol_intel returns last_op per symbol I402 _compute_symbol_intel returns last_commit_id per symbol I403 last_op reflects the most recent commit's op, not the first I404 _upsert_symbol_intel writes op to musehub_symbol_intel I405 _upsert_symbol_intel writes last_commit_id to musehub_symbol_intel I406 build_symbol_index populates op end-to-end I407 build_symbol_index populates last_commit_id end-to-end I408 backfill_intel_fields fixes NULL op from existing history entries I409 backfill_intel_fields fixes NULL last_commit_id from existing history I410 backfill_intel_fields is idempotent (safe to run twice) """ from __future__ import annotations import secrets from datetime import datetime, timezone, timedelta import pytest from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry, MusehubSymbolIntel from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef from muse.core.types import blob_id, long_id from tests.factories import create_repo # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _now() -> datetime: return datetime.now(tz=timezone.utc) def _cid() -> str: return blob_id(secrets.token_bytes(32)) def _lid() -> str: return long_id(secrets.token_hex(32)) async def _make_commit( session: AsyncSession, repo_id: str, addresses: list[str], *, parent_id: str | None = None, branch: str = "dev", message: str = "feat: test", op: str = "insert", ts: datetime | None = None, ) -> MusehubCommit: commit_id = _lid() committed_at = ts or _now() commit = MusehubCommit( commit_id=commit_id, message=message, author="gabriel", branch=branch, timestamp=committed_at, parent_ids=[parent_id] if parent_id else [], structured_delta={ "ops": [ {"address": addr, "op": op, "new_content_id": _cid()} for addr in addresses ] }, ) session.add(commit) session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id)) await session.flush() return commit # --------------------------------------------------------------------------- # I401 — _compute_symbol_intel returns last_op per symbol # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i401_compute_returns_last_op() -> None: """_compute_symbol_intel must include last_op on every symbol entry.""" from musehub.services.musehub_symbol_indexer import _compute_symbol_intel history = { "src/a.py::fn": [ {"commit_id": _lid(), "committed_at": _now().isoformat(), "author": "gabriel", "op": "insert", "op_payload": {}, "content_id": _cid()}, ] } result = _compute_symbol_intel(history) assert "src/a.py::fn" in result assert "last_op" in result["src/a.py::fn"] assert result["src/a.py::fn"]["last_op"] == "insert" # --------------------------------------------------------------------------- # I402 — _compute_symbol_intel returns last_commit_id per symbol # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i402_compute_returns_last_commit_id() -> None: """_compute_symbol_intel must include last_commit_id on every symbol entry.""" from musehub.services.musehub_symbol_indexer import _compute_symbol_intel commit_id = _lid() history = { "src/b.py::fn": [ {"commit_id": commit_id, "committed_at": _now().isoformat(), "author": "gabriel", "op": "replace", "op_payload": {}, "content_id": _cid()}, ] } result = _compute_symbol_intel(history) assert result["src/b.py::fn"]["last_commit_id"] == commit_id # --------------------------------------------------------------------------- # I403 — last_op reflects the MOST RECENT commit, not the first # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i403_last_op_is_most_recent() -> None: """When a symbol has multiple history entries, last_op must be from the newest.""" from musehub.services.musehub_symbol_indexer import _compute_symbol_intel older = (_now() - timedelta(days=10)).isoformat() newer = _now().isoformat() cid_new = _lid() history = { "src/c.py::fn": [ {"commit_id": _lid(), "committed_at": older, "author": "gabriel", "op": "insert", "op_payload": {}, "content_id": _cid()}, {"commit_id": cid_new, "committed_at": newer, "author": "gabriel", "op": "replace", "op_payload": {}, "content_id": _cid()}, ] } result = _compute_symbol_intel(history) assert result["src/c.py::fn"]["last_op"] == "replace" assert result["src/c.py::fn"]["last_commit_id"] == cid_new # --------------------------------------------------------------------------- # I404 — _upsert_symbol_intel writes op to the DB # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i404_upsert_writes_op(db_session: AsyncSession) -> None: """After _upsert_symbol_intel, musehub_symbol_intel.op must be set.""" from musehub.services.musehub_symbol_indexer import _upsert_symbol_intel repo = await create_repo(db_session, owner="gabriel") intel = { "src/d.py::fn": { "churn": 2, "churn_30d": 1, "churn_90d": 2, "blast": 0, "blast_direct": 0, "blast_cross": 0, "blast_top": [], "last_changed": _now().isoformat(), "last_author": "gabriel", "author_count": 1, "gravity": 0.1, "weekly": [0] * 12, "last_op": "replace", "last_commit_id": _lid(), } } await _upsert_symbol_intel(db_session, repo.repo_id, intel) await db_session.flush() row = (await db_session.execute( select(MusehubSymbolIntel).where( MusehubSymbolIntel.repo_id == repo.repo_id, MusehubSymbolIntel.address == "src/d.py::fn", ) )).scalar_one() assert row.op == "replace" # --------------------------------------------------------------------------- # I405 — _upsert_symbol_intel writes last_commit_id to the DB # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i405_upsert_writes_last_commit_id(db_session: AsyncSession) -> None: """After _upsert_symbol_intel, musehub_symbol_intel.last_commit_id must be set.""" from musehub.services.musehub_symbol_indexer import _upsert_symbol_intel repo = await create_repo(db_session, owner="gabriel") commit_id = _lid() intel = { "src/e.py::fn": { "churn": 1, "churn_30d": 1, "churn_90d": 1, "blast": 0, "blast_direct": 0, "blast_cross": 0, "blast_top": [], "last_changed": _now().isoformat(), "last_author": "gabriel", "author_count": 1, "gravity": 0.0, "weekly": [0] * 12, "last_op": "insert", "last_commit_id": commit_id, } } await _upsert_symbol_intel(db_session, repo.repo_id, intel) await db_session.flush() row = (await db_session.execute( select(MusehubSymbolIntel).where( MusehubSymbolIntel.repo_id == repo.repo_id, MusehubSymbolIntel.address == "src/e.py::fn", ) )).scalar_one() assert row.last_commit_id == commit_id # --------------------------------------------------------------------------- # I406 — build_symbol_index populates op end-to-end # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i406_build_index_populates_op(db_session: AsyncSession) -> None: """After build_symbol_index, musehub_symbol_intel.op must not be NULL.""" from musehub.services.musehub_symbol_indexer import build_symbol_index repo = await create_repo(db_session, owner="gabriel") commit = await _make_commit( db_session, repo.repo_id, ["src/f.py::my_fn"], op="insert" ) await db_session.flush() await build_symbol_index(db_session, repo.repo_id, commit.commit_id) await db_session.flush() row = (await db_session.execute( select(MusehubSymbolIntel).where( MusehubSymbolIntel.repo_id == repo.repo_id, MusehubSymbolIntel.address == "src/f.py::my_fn", ) )).scalar_one() assert row.op is not None assert row.op == "insert" # --------------------------------------------------------------------------- # I407 — build_symbol_index populates last_commit_id end-to-end # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i407_build_index_populates_last_commit_id(db_session: AsyncSession) -> None: """After build_symbol_index, musehub_symbol_intel.last_commit_id must not be NULL.""" from musehub.services.musehub_symbol_indexer import build_symbol_index repo = await create_repo(db_session, owner="gabriel") commit = await _make_commit( db_session, repo.repo_id, ["src/g.py::helper"], op="insert" ) await db_session.flush() await build_symbol_index(db_session, repo.repo_id, commit.commit_id) await db_session.flush() row = (await db_session.execute( select(MusehubSymbolIntel).where( MusehubSymbolIntel.repo_id == repo.repo_id, MusehubSymbolIntel.address == "src/g.py::helper", ) )).scalar_one() assert row.last_commit_id is not None assert row.last_commit_id == commit.commit_id # --------------------------------------------------------------------------- # I408 — backfill_intel_fields fixes NULL op from existing history entries # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i408_backfill_fixes_null_op(db_session: AsyncSession) -> None: """backfill_intel_fields must set op from the most recent history entry.""" from musehub.services.musehub_symbol_indexer import backfill_intel_fields repo = await create_repo(db_session, owner="gabriel") commit_id = _lid() # Insert intel row with NULL op db_session.add(MusehubSymbolIntel( repo_id=repo.repo_id, address="src/h.py::fn", churn=1, churn_30d=1, churn_90d=1, blast=0, blast_direct=0, blast_cross=0, blast_top=[], last_changed=_now(), author_count=1, gravity=0.0, weekly=[0] * 12, )) # Insert matching history entry db_session.add(MusehubSymbolHistoryEntry( repo_id=repo.repo_id, address="src/h.py::fn", commit_id=commit_id, committed_at=_now(), author="gabriel", op="mutate", )) await db_session.flush() await backfill_intel_fields(db_session, repo.repo_id) await db_session.flush() row = (await db_session.execute( select(MusehubSymbolIntel).where( MusehubSymbolIntel.repo_id == repo.repo_id, MusehubSymbolIntel.address == "src/h.py::fn", ) )).scalar_one() assert row.op == "mutate" # --------------------------------------------------------------------------- # I409 — backfill_intel_fields fixes NULL last_commit_id # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i409_backfill_fixes_null_last_commit_id(db_session: AsyncSession) -> None: """backfill_intel_fields must set last_commit_id from the most recent history entry.""" from musehub.services.musehub_symbol_indexer import backfill_intel_fields repo = await create_repo(db_session, owner="gabriel") commit_id = _lid() db_session.add(MusehubSymbolIntel( repo_id=repo.repo_id, address="src/i.py::fn", churn=1, churn_30d=1, churn_90d=1, blast=0, blast_direct=0, blast_cross=0, blast_top=[], last_changed=_now(), author_count=1, gravity=0.0, weekly=[0] * 12, )) db_session.add(MusehubSymbolHistoryEntry( repo_id=repo.repo_id, address="src/i.py::fn", commit_id=commit_id, committed_at=_now(), author="gabriel", op="replace", )) await db_session.flush() await backfill_intel_fields(db_session, repo.repo_id) await db_session.flush() row = (await db_session.execute( select(MusehubSymbolIntel).where( MusehubSymbolIntel.repo_id == repo.repo_id, MusehubSymbolIntel.address == "src/i.py::fn", ) )).scalar_one() assert row.last_commit_id == commit_id # --------------------------------------------------------------------------- # I410 — backfill_intel_fields is idempotent # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_i410_backfill_idempotent(db_session: AsyncSession) -> None: """Running backfill_intel_fields twice must produce the same result.""" from musehub.services.musehub_symbol_indexer import backfill_intel_fields repo = await create_repo(db_session, owner="gabriel") commit_id = _lid() db_session.add(MusehubSymbolIntel( repo_id=repo.repo_id, address="src/j.py::fn", churn=1, churn_30d=1, churn_90d=1, blast=0, blast_direct=0, blast_cross=0, blast_top=[], last_changed=_now(), author_count=1, gravity=0.0, weekly=[0] * 12, )) db_session.add(MusehubSymbolHistoryEntry( repo_id=repo.repo_id, address="src/j.py::fn", commit_id=commit_id, committed_at=_now(), author="gabriel", op="patch", )) await db_session.flush() await backfill_intel_fields(db_session, repo.repo_id) await db_session.flush() await backfill_intel_fields(db_session, repo.repo_id) await db_session.flush() row = (await db_session.execute( select(MusehubSymbolIntel).where( MusehubSymbolIntel.repo_id == repo.repo_id, MusehubSymbolIntel.address == "src/j.py::fn", ) )).scalar_one() assert row.op == "patch" assert row.last_commit_id == commit_id