"""TDD tests for _fetch_file_history performance fix. Problem: _fetch_file_history loads up to 300 commits and calls get_snapshot_manifest() once per commit — 300 individual DB queries + 300 full msgpack deserializations per file page view. Fix: batch-fetch all snapshot manifests with a single IN query using get_snapshot_manifests_batch(), then look up the file path in the resulting dict. Covers: _fetch_file_history — query count - test_file_history_does_not_call_per_commit_manifest_fetch - test_file_history_calls_batch_fetch_once _fetch_file_history — correctness - test_file_history_returns_only_commits_where_file_changed - test_file_history_returns_empty_when_file_not_in_head - test_file_history_returns_empty_when_no_commits - test_file_history_respects_limit - test_file_history_unchanged_file_returns_one_entry """ from __future__ import annotations import secrets from datetime import datetime, timezone, timedelta from contextlib import asynccontextmanager from typing import AsyncGenerator import msgpack import pytest from sqlalchemy.ext.asyncio import AsyncSession from muse.core.types import long_id, now_utc_iso from musehub.api.routes.musehub.ui_blob import _fetch_file_history from musehub.core.genesis import compute_identity_id, compute_repo_id from musehub.db import database as _database from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef from musehub.types.json_types import JSONObject, StrDict # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- _OWNER_ID = compute_identity_id(b"perf-tester") _FILE = "musehub/services/billing.py" _OTHER_FILE = "musehub/services/auth.py" def _uid() -> str: return long_id(secrets.token_hex(32)) def _repo_id() -> str: return compute_repo_id(_OWNER_ID, f"perf-test-{secrets.token_hex(4)}", "code", now_utc_iso()) def _snap_id() -> str: return long_id(secrets.token_hex(32)) def _obj_id(tag: str) -> str: return long_id(tag.encode().hex().ljust(64, "0")) def _manifest_blob(path_oid: StrDict) -> bytes: return msgpack.packb(path_oid, use_bin_type=True) async def _make_repo(session: AsyncSession) -> str: rid = _repo_id() now = datetime.now(tz=timezone.utc) session.add(MusehubRepo( repo_id=rid, name="perf-test", owner="perf-tester", slug="perf-test", visibility="public", owner_user_id=_OWNER_ID, created_at=now, updated_at=now, )) await session.commit() return rid async def _add_snapshot(session: AsyncSession, repo_id: str, manifest: StrDict) -> str: sid = _snap_id() session.add(MusehubSnapshot( snapshot_id=sid, directories=[], manifest_blob=_manifest_blob(manifest), entry_count=len(manifest), )) session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid)) await session.flush() return sid @asynccontextmanager async def _fresh_session() -> AsyncGenerator[AsyncSession, None]: """Open a fresh session from the (test-overridden) factory. Using the shared db_session for both writes and reads leaves asyncpg in an unexpected state on teardown — this helper avoids that by keeping read calls isolated in their own short-lived session. """ async with _database._async_session_factory() as session: yield session async def _add_commit( session: AsyncSession, repo_id: str, snapshot_id: str, branch: str = "main", ts_offset_seconds: int = 0, message: str = "feat: change", ) -> str: cid = _uid() now = datetime.now(tz=timezone.utc) + timedelta(seconds=ts_offset_seconds) session.add(MusehubCommit( commit_id=cid, branch=branch, parent_ids=[], message=message, author="tester", timestamp=now, snapshot_id=snapshot_id, )) session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) await session.flush() return cid # --------------------------------------------------------------------------- # Query-count tests — these fail until the N+1 is fixed # --------------------------------------------------------------------------- @pytest.mark.anyio async def test_file_history_does_not_call_per_commit_manifest_fetch( db_session: AsyncSession, monkeypatch: pytest.MonkeyPatch, ) -> None: """get_snapshot_manifest must NOT be called per-commit after the fix. The old code called it once per commit in the 300-row loop. The new code must never call the single-snapshot variant inside the loop. """ import musehub.api.routes.musehub.ui_blob as _module calls: list[str] = [] async def _spy_single(session: AsyncSession, snapshot_id: str) -> JSONObject: # type: ignore[override] calls.append(snapshot_id) return {} monkeypatch.setattr(_module, "get_snapshot_manifest", _spy_single) repo_id = await _make_repo(db_session) head_snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v2")}) head_id = await _add_commit(db_session, repo_id, head_snap, ts_offset_seconds=10) await db_session.commit() async with _fresh_session() as read_session: await _fetch_file_history(read_session, repo_id, _FILE, head_id) assert calls == [], f"get_snapshot_manifest was called {len(calls)} time(s) — N+1 not fixed" @pytest.mark.anyio async def test_file_history_calls_batch_fetch_once( db_session: AsyncSession, monkeypatch: pytest.MonkeyPatch, ) -> None: """get_snapshot_manifests_batch must be called instead of per-commit fetches.""" import musehub.api.routes.musehub.ui_blob as _module from musehub.services import musehub_snapshot as _snap_svc batch_calls: list[list[str]] = [] _real_batch = _snap_svc.get_snapshot_manifests_batch async def _spy_batch(session: AsyncSession, snapshot_ids: list[str]) -> JSONObject: # type: ignore[override] batch_calls.append(list(snapshot_ids)) return await _real_batch(session, snapshot_ids) monkeypatch.setattr(_module, "get_snapshot_manifests_batch", _spy_batch) repo_id = await _make_repo(db_session) head_snap_id = "" head_commit_id = "" for i in range(5): snap_id = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id(f"v{i}")}) cid = await _add_commit(db_session, repo_id, snap_id, ts_offset_seconds=i * 10) if i == 4: head_snap_id = snap_id head_commit_id = cid await db_session.commit() async with _fresh_session() as read_session: await _fetch_file_history(read_session, repo_id, _FILE, head_commit_id) assert len(batch_calls) >= 1, "get_snapshot_manifests_batch was never called" all_fetched = [sid for call in batch_calls for sid in call] assert head_snap_id in all_fetched, "head snapshot_id must be included in batch fetch" # --------------------------------------------------------------------------- # Correctness tests # --------------------------------------------------------------------------- @pytest.mark.anyio async def test_file_history_returns_only_commits_where_file_changed( db_session: AsyncSession, ) -> None: """Only commits where the file's object_id changes between adjacent snapshots are returned.""" repo_id = await _make_repo(db_session) # Commit 1: file = v1 (oldest) s1 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")}) c1 = await _add_commit(db_session, repo_id, s1, ts_offset_seconds=0, message="init") # Commit 2: file = v1 (unchanged — should NOT appear in history) s2 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")}) c2 = await _add_commit(db_session, repo_id, s2, ts_offset_seconds=10, message="no-op") # Commit 3: file = v2 (changed — should appear) s3 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v2")}) c3 = await _add_commit(db_session, repo_id, s3, ts_offset_seconds=20, message="feat: v2") # Commit 4: file = v3 (changed — HEAD, should appear) s4 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v3")}) c4 = await _add_commit(db_session, repo_id, s4, ts_offset_seconds=30, message="feat: v3") await db_session.commit() async with _fresh_session() as read_session: history = await _fetch_file_history(read_session, repo_id, _FILE, c4) commit_ids = {h["commit_id_full"] for h in history} assert c4 in commit_ids, "HEAD commit (v3) should be in history" assert c3 in commit_ids, "commit that introduced v2 should be in history" # Walking backward: c4(v3)→c3(v2)→c2(v1)→c1(v1) # c2 appears because the file changed from v2→v1 between c3 and c2. # c1 is skipped because c1 and c2 have the same oid — consecutive duplicates are collapsed. assert c1 not in commit_ids, "c1 has same oid as c2 — consecutive duplicate, should be skipped" @pytest.mark.anyio async def test_file_history_returns_empty_when_file_not_in_head( db_session: AsyncSession, ) -> None: """Returns [] when the file path does not exist in the head snapshot.""" repo_id = await _make_repo(db_session) snap = await _add_snapshot(db_session, repo_id, {_OTHER_FILE: _obj_id("v1")}) head_id = await _add_commit(db_session, repo_id, snap) await db_session.commit() async with _fresh_session() as read_session: history = await _fetch_file_history(read_session, repo_id, _FILE, head_id) assert history == [] @pytest.mark.anyio async def test_file_history_returns_empty_when_no_commits( db_session: AsyncSession, ) -> None: """Returns [] when the head commit cannot be found in the DB.""" repo_id = await _make_repo(db_session) await db_session.commit() async with _fresh_session() as read_session: history = await _fetch_file_history(read_session, repo_id, _FILE, _uid()) assert history == [] @pytest.mark.anyio async def test_file_history_respects_limit( db_session: AsyncSession, ) -> None: """History is capped at the requested limit even when more changes exist.""" repo_id = await _make_repo(db_session) head_snap = None head_cid = None for i in range(25): snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id(f"v{i}")}) cid = await _add_commit(db_session, repo_id, snap, ts_offset_seconds=i * 10) if i == 24: head_snap = snap head_cid = cid await db_session.commit() async with _fresh_session() as read_session: history = await _fetch_file_history(read_session, repo_id, _FILE, head_cid, limit=5) assert len(history) <= 5 @pytest.mark.anyio async def test_file_history_unchanged_file_returns_one_entry( db_session: AsyncSession, ) -> None: """A file that never changes shows only the initial commit.""" repo_id = await _make_repo(db_session) head_cid = None for i in range(4): snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")}) # always v1 cid = await _add_commit(db_session, repo_id, snap, ts_offset_seconds=i * 10) if i == 3: head_cid = cid await db_session.commit() async with _fresh_session() as read_session: history = await _fetch_file_history(read_session, repo_id, _FILE, head_cid) assert len(history) == 1