"""TDD tests for blame page performance fixes. Problem: _snapshot_diff_blame loads every commit in the repo (no LIMIT) and fetches all their snapshot manifests — O(N_commits * manifest_size) per page load. For musehub with 900+ commits this times out. Fix: use musehub_symbol_history_entries as fast path (same index as blob page). Fall back to snapshot scan only when no history entries exist, and cap that scan at a small window (50 commits newest-first, not all commits oldest-first). Test matrix ----------- test_snapshot_diff_blame_skips_manifest_scan_when_history_exists No snapshot manifests fetched when symbol_history_entries has an entry for the file path (bare or prefixed with ::). test_snapshot_diff_blame_returns_entry_from_history_index Returns a SymbolBlameEntry built from the most recent history index entry. test_snapshot_diff_blame_falls_back_when_no_history When no history entries exist, falls back to snapshot scan and returns entry. test_snapshot_diff_blame_caps_fallback_scan Fallback scan fetches at most 50 commits, not all commits in the repo. """ from __future__ import annotations import secrets from contextlib import asynccontextmanager from datetime import datetime, timezone, timedelta from typing import AsyncGenerator from unittest.mock import patch import msgpack import pytest from sqlalchemy.ext.asyncio import AsyncSession from musehub.core.genesis import compute_identity_id, compute_repo_id from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef from musehub.db import database as _database from musehub.types.json_types import JSONObject, StrDict ManifestBatch = dict[str, StrDict] from muse.core.types import long_id, now_utc_iso # ── Constants ───────────────────────────────────────────────────────────────── _OWNER_ID = compute_identity_id(b"blame-perf-tester") _FILE = ".env.example" _OTHER = "README.md" # ── Helpers ─────────────────────────────────────────────────────────────────── def _uid() -> str: return long_id(secrets.token_hex(32)) def _repo_id() -> str: return compute_repo_id( _OWNER_ID, f"blp-{secrets.token_hex(4)}", "code", now_utc_iso(), ) def _snap_id() -> str: return long_id(secrets.token_hex(32)) def _obj(tag: str) -> str: return long_id(tag.encode().hex().ljust(64, "0")) def _blob(manifest: StrDict) -> bytes: return msgpack.packb(manifest, use_bin_type=True) async def _make_repo(session: AsyncSession) -> str: rid = _repo_id() now = datetime.now(tz=timezone.utc) session.add(MusehubRepo( repo_id=rid, name="blp-test", owner="blp-tester", slug="blp-test", visibility="public", owner_user_id=_OWNER_ID, created_at=now, updated_at=now, )) await session.commit() return rid async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str: sid = _snap_id() now = datetime.now(tz=timezone.utc) session.add(MusehubSnapshot( snapshot_id=sid, directories=[], manifest_blob=_blob(manifest), entry_count=len(manifest), created_at=now, )) session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid, created_at=now)) await session.flush() return sid async def _commit( session: AsyncSession, repo_id: str, snapshot_id: str, offset: int = 0, message: str = "feat: change", ) -> MusehubCommit: cid = _uid() now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) row = MusehubCommit( commit_id=cid, branch="main", parent_ids=[], message=message, author="tester", timestamp=now, snapshot_id=snapshot_id, ) session.add(row) session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) await session.flush() return row async def _history_entry( session: AsyncSession, repo_id: str, commit_id: str, address: str, offset: int = 0, op: str = "modify", message: str = "feat: change", ) -> None: now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) session.add(MusehubSymbolHistoryEntry( repo_id=repo_id, address=address, commit_id=commit_id, committed_at=now, author="tester", op=op, message=message, )) await session.flush() @asynccontextmanager async def _fresh_session() -> AsyncGenerator[AsyncSession, None]: async with _database._async_session_factory() as session: yield session # ── Tests ───────────────────────────────────────────────────────────────────── @pytest.mark.anyio async def test_snapshot_diff_blame_skips_manifest_scan_when_history_exists( db_session: AsyncSession, ) -> None: """No snapshot manifests fetched when history index has entries for the file.""" from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame repo_id = await _make_repo(db_session) s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) c1 = await _commit(db_session, repo_id, s1, offset=0) await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0) await db_session.commit() batch_calls: list[list[str]] = [] async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch: batch_calls.append(ids) return {} with patch( "musehub.api.routes.musehub.ui_blame.get_snapshot_manifests_batch", side_effect=_spy_batch, ): async with _fresh_session() as rs: result = await _snapshot_diff_blame(rs, repo_id, _FILE, {c1.commit_id: {"message": "init", "author": "tester"}}) assert batch_calls == [], ( f"get_snapshot_manifests_batch called {len(batch_calls)} time(s) " "even though history index has entries for the file" ) assert len(result) == 1 assert result[0].commit_id == c1.commit_id @pytest.mark.anyio async def test_snapshot_diff_blame_returns_entry_from_history_index( db_session: AsyncSession, ) -> None: """Returns SymbolBlameEntry built from the most recent history index entry.""" from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame repo_id = await _make_repo(db_session) s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) c1 = await _commit(db_session, repo_id, s1, offset=0, message="init") s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) c2 = await _commit(db_session, repo_id, s2, offset=10, message="chore: update env") await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0) await _history_entry(db_session, repo_id, c2.commit_id, _FILE, offset=10) await db_session.commit() commit_map = { c1.commit_id: {"message": "init", "author": "tester"}, c2.commit_id: {"message": "chore: update env", "author": "tester"}, } async with _fresh_session() as rs: result = await _snapshot_diff_blame(rs, repo_id, _FILE, commit_map) assert len(result) == 1 assert result[0].commit_id == c2.commit_id assert result[0].symbol_name == _FILE.split("/")[-1] @pytest.mark.anyio async def test_snapshot_diff_blame_falls_back_when_no_history( db_session: AsyncSession, ) -> None: """Falls back to snapshot scan and returns entry when no history entries exist.""" from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame repo_id = await _make_repo(db_session) s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) c1 = await _commit(db_session, repo_id, s1, offset=0) # No history entries. await db_session.commit() async with _fresh_session() as rs: result = await _snapshot_diff_blame(rs, repo_id, _FILE, {c1.commit_id: {"message": "init", "author": "tester"}}) assert len(result) == 1 assert result[0].commit_id == c1.commit_id @pytest.mark.anyio async def test_snapshot_diff_blame_caps_fallback_scan( db_session: AsyncSession, ) -> None: """Fallback scan fetches at most 50 commits, not all commits in the repo.""" from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame repo_id = await _make_repo(db_session) # Seed 80 commits — more than the 50-commit cap. last_cid = "" for i in range(80): s = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) c = await _commit(db_session, repo_id, s, offset=i) last_cid = c.commit_id await db_session.commit() fetched_total = 0 async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch: nonlocal fetched_total fetched_total += len(ids) from musehub.services.musehub_snapshot import get_snapshot_manifests_batch as _real return await _real(session, ids) with patch( "musehub.api.routes.musehub.ui_blame.get_snapshot_manifests_batch", side_effect=_spy_batch, ): async with _fresh_session() as rs: await _snapshot_diff_blame(rs, repo_id, _FILE, {}) assert fetched_total <= 50, ( f"Fallback fetched {fetched_total} manifests — cap should be 50" )