test_blame_page_perf.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """TDD tests for blame page performance fixes. |
| 2 | |
| 3 | Problem: _snapshot_diff_blame loads every commit in the repo (no LIMIT) and |
| 4 | fetches all their snapshot manifests β O(N_commits * manifest_size) per page load. |
| 5 | For musehub with 900+ commits this times out. |
| 6 | |
| 7 | Fix: use musehub_symbol_history_entries as fast path (same index as blob page). |
| 8 | Fall back to snapshot scan only when no history entries exist, and cap that scan |
| 9 | at a small window (50 commits newest-first, not all commits oldest-first). |
| 10 | |
| 11 | Test matrix |
| 12 | ----------- |
| 13 | test_snapshot_diff_blame_skips_manifest_scan_when_history_exists |
| 14 | No snapshot manifests fetched when symbol_history_entries has an entry for |
| 15 | the file path (bare or prefixed with ::). |
| 16 | |
| 17 | test_snapshot_diff_blame_returns_entry_from_history_index |
| 18 | Returns a SymbolBlameEntry built from the most recent history index entry. |
| 19 | |
| 20 | test_snapshot_diff_blame_falls_back_when_no_history |
| 21 | When no history entries exist, falls back to snapshot scan and returns entry. |
| 22 | |
| 23 | test_snapshot_diff_blame_caps_fallback_scan |
| 24 | Fallback scan fetches at most 50 commits, not all commits in the repo. |
| 25 | """ |
| 26 | from __future__ import annotations |
| 27 | |
| 28 | import secrets |
| 29 | from contextlib import asynccontextmanager |
| 30 | from datetime import datetime, timezone, timedelta |
| 31 | from typing import AsyncGenerator |
| 32 | from unittest.mock import patch |
| 33 | |
| 34 | import msgpack |
| 35 | import pytest |
| 36 | from sqlalchemy.ext.asyncio import AsyncSession |
| 37 | |
| 38 | from musehub.core.genesis import compute_identity_id, compute_repo_id |
| 39 | from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry |
| 40 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef |
| 41 | from musehub.db import database as _database |
| 42 | from musehub.types.json_types import JSONObject, StrDict |
| 43 | |
| 44 | ManifestBatch = dict[str, StrDict] |
| 45 | from muse.core.types import long_id, now_utc_iso |
| 46 | |
| 47 | # ββ Constants βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 48 | |
| 49 | _OWNER_ID = compute_identity_id(b"blame-perf-tester") |
| 50 | _FILE = ".env.example" |
| 51 | _OTHER = "README.md" |
| 52 | |
| 53 | |
| 54 | # ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 55 | |
| 56 | def _uid() -> str: |
| 57 | return long_id(secrets.token_hex(32)) |
| 58 | |
| 59 | |
| 60 | def _repo_id() -> str: |
| 61 | return compute_repo_id( |
| 62 | _OWNER_ID, f"blp-{secrets.token_hex(4)}", "code", now_utc_iso(), |
| 63 | ) |
| 64 | |
| 65 | |
| 66 | def _snap_id() -> str: |
| 67 | return long_id(secrets.token_hex(32)) |
| 68 | |
| 69 | |
| 70 | def _obj(tag: str) -> str: |
| 71 | return long_id(tag.encode().hex().ljust(64, "0")) |
| 72 | |
| 73 | |
| 74 | def _blob(manifest: StrDict) -> bytes: |
| 75 | return msgpack.packb(manifest, use_bin_type=True) |
| 76 | |
| 77 | |
| 78 | async def _make_repo(session: AsyncSession) -> str: |
| 79 | rid = _repo_id() |
| 80 | now = datetime.now(tz=timezone.utc) |
| 81 | session.add(MusehubRepo( |
| 82 | repo_id=rid, name="blp-test", owner="blp-tester", |
| 83 | slug="blp-test", visibility="public", owner_user_id=_OWNER_ID, |
| 84 | created_at=now, updated_at=now, |
| 85 | )) |
| 86 | await session.commit() |
| 87 | return rid |
| 88 | |
| 89 | |
| 90 | async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str: |
| 91 | sid = _snap_id() |
| 92 | now = datetime.now(tz=timezone.utc) |
| 93 | session.add(MusehubSnapshot( |
| 94 | snapshot_id=sid, directories=[], |
| 95 | manifest_blob=_blob(manifest), entry_count=len(manifest), |
| 96 | created_at=now, |
| 97 | )) |
| 98 | session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid, created_at=now)) |
| 99 | await session.flush() |
| 100 | return sid |
| 101 | |
| 102 | |
| 103 | async def _commit( |
| 104 | session: AsyncSession, |
| 105 | repo_id: str, |
| 106 | snapshot_id: str, |
| 107 | offset: int = 0, |
| 108 | message: str = "feat: change", |
| 109 | ) -> MusehubCommit: |
| 110 | cid = _uid() |
| 111 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) |
| 112 | row = MusehubCommit( |
| 113 | commit_id=cid, branch="main", parent_ids=[], |
| 114 | message=message, author="tester", timestamp=now, |
| 115 | snapshot_id=snapshot_id, |
| 116 | ) |
| 117 | session.add(row) |
| 118 | session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) |
| 119 | await session.flush() |
| 120 | return row |
| 121 | |
| 122 | |
| 123 | async def _history_entry( |
| 124 | session: AsyncSession, |
| 125 | repo_id: str, |
| 126 | commit_id: str, |
| 127 | address: str, |
| 128 | offset: int = 0, |
| 129 | op: str = "modify", |
| 130 | message: str = "feat: change", |
| 131 | ) -> None: |
| 132 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) |
| 133 | session.add(MusehubSymbolHistoryEntry( |
| 134 | repo_id=repo_id, |
| 135 | address=address, |
| 136 | commit_id=commit_id, |
| 137 | committed_at=now, |
| 138 | author="tester", |
| 139 | op=op, |
| 140 | message=message, |
| 141 | )) |
| 142 | await session.flush() |
| 143 | |
| 144 | |
| 145 | @asynccontextmanager |
| 146 | async def _fresh_session() -> AsyncGenerator[AsyncSession, None]: |
| 147 | async with _database._async_session_factory() as session: |
| 148 | yield session |
| 149 | |
| 150 | |
| 151 | # ββ Tests βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 152 | |
| 153 | |
| 154 | @pytest.mark.anyio |
| 155 | async def test_snapshot_diff_blame_skips_manifest_scan_when_history_exists( |
| 156 | db_session: AsyncSession, |
| 157 | ) -> None: |
| 158 | """No snapshot manifests fetched when history index has entries for the file.""" |
| 159 | from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame |
| 160 | |
| 161 | repo_id = await _make_repo(db_session) |
| 162 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 163 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 164 | await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0) |
| 165 | await db_session.commit() |
| 166 | |
| 167 | batch_calls: list[list[str]] = [] |
| 168 | |
| 169 | async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch: |
| 170 | batch_calls.append(ids) |
| 171 | return {} |
| 172 | |
| 173 | with patch( |
| 174 | "musehub.api.routes.musehub.ui_blame.get_snapshot_manifests_batch", |
| 175 | side_effect=_spy_batch, |
| 176 | ): |
| 177 | async with _fresh_session() as rs: |
| 178 | result = await _snapshot_diff_blame(rs, repo_id, _FILE, {c1.commit_id: {"message": "init", "author": "tester"}}) |
| 179 | |
| 180 | assert batch_calls == [], ( |
| 181 | f"get_snapshot_manifests_batch called {len(batch_calls)} time(s) " |
| 182 | "even though history index has entries for the file" |
| 183 | ) |
| 184 | assert len(result) == 1 |
| 185 | assert result[0].commit_id == c1.commit_id |
| 186 | |
| 187 | |
| 188 | @pytest.mark.anyio |
| 189 | async def test_snapshot_diff_blame_returns_entry_from_history_index( |
| 190 | db_session: AsyncSession, |
| 191 | ) -> None: |
| 192 | """Returns SymbolBlameEntry built from the most recent history index entry.""" |
| 193 | from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame |
| 194 | |
| 195 | repo_id = await _make_repo(db_session) |
| 196 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 197 | c1 = await _commit(db_session, repo_id, s1, offset=0, message="init") |
| 198 | s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 199 | c2 = await _commit(db_session, repo_id, s2, offset=10, message="chore: update env") |
| 200 | |
| 201 | await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0) |
| 202 | await _history_entry(db_session, repo_id, c2.commit_id, _FILE, offset=10) |
| 203 | await db_session.commit() |
| 204 | |
| 205 | commit_map = { |
| 206 | c1.commit_id: {"message": "init", "author": "tester"}, |
| 207 | c2.commit_id: {"message": "chore: update env", "author": "tester"}, |
| 208 | } |
| 209 | |
| 210 | async with _fresh_session() as rs: |
| 211 | result = await _snapshot_diff_blame(rs, repo_id, _FILE, commit_map) |
| 212 | |
| 213 | assert len(result) == 1 |
| 214 | assert result[0].commit_id == c2.commit_id |
| 215 | assert result[0].symbol_name == _FILE.split("/")[-1] |
| 216 | |
| 217 | |
| 218 | @pytest.mark.anyio |
| 219 | async def test_snapshot_diff_blame_falls_back_when_no_history( |
| 220 | db_session: AsyncSession, |
| 221 | ) -> None: |
| 222 | """Falls back to snapshot scan and returns entry when no history entries exist.""" |
| 223 | from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame |
| 224 | |
| 225 | repo_id = await _make_repo(db_session) |
| 226 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 227 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 228 | # No history entries. |
| 229 | await db_session.commit() |
| 230 | |
| 231 | async with _fresh_session() as rs: |
| 232 | result = await _snapshot_diff_blame(rs, repo_id, _FILE, {c1.commit_id: {"message": "init", "author": "tester"}}) |
| 233 | |
| 234 | assert len(result) == 1 |
| 235 | assert result[0].commit_id == c1.commit_id |
| 236 | |
| 237 | |
| 238 | @pytest.mark.anyio |
| 239 | async def test_snapshot_diff_blame_caps_fallback_scan( |
| 240 | db_session: AsyncSession, |
| 241 | ) -> None: |
| 242 | """Fallback scan fetches at most 50 commits, not all commits in the repo.""" |
| 243 | from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame |
| 244 | |
| 245 | repo_id = await _make_repo(db_session) |
| 246 | |
| 247 | # Seed 80 commits β more than the 50-commit cap. |
| 248 | last_cid = "" |
| 249 | for i in range(80): |
| 250 | s = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 251 | c = await _commit(db_session, repo_id, s, offset=i) |
| 252 | last_cid = c.commit_id |
| 253 | await db_session.commit() |
| 254 | |
| 255 | fetched_total = 0 |
| 256 | |
| 257 | async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch: |
| 258 | nonlocal fetched_total |
| 259 | fetched_total += len(ids) |
| 260 | from musehub.services.musehub_snapshot import get_snapshot_manifests_batch as _real |
| 261 | return await _real(session, ids) |
| 262 | |
| 263 | with patch( |
| 264 | "musehub.api.routes.musehub.ui_blame.get_snapshot_manifests_batch", |
| 265 | side_effect=_spy_batch, |
| 266 | ): |
| 267 | async with _fresh_session() as rs: |
| 268 | await _snapshot_diff_blame(rs, repo_id, _FILE, {}) |
| 269 | |
| 270 | assert fetched_total <= 50, ( |
| 271 | f"Fallback fetched {fetched_total} manifests β cap should be 50" |
| 272 | ) |