test_blob_page_perf.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
21 days ago
| 1 | """TDD tests for blob page performance fixes. |
| 2 | |
| 3 | Problem 1: _fetch_file_history scans 300 snapshot manifests every page load. |
| 4 | Fix: use musehub_symbol_history_entries as fast path (same index as |
| 5 | get_last_commit_for_file). Fall back to snapshot scan only when no entries exist. |
| 6 | |
| 7 | Problem 2: _fetch_file_symbols and _fetch_file_intel each call load_symbol_history |
| 8 | independently — two identical DB queries per page load. |
| 9 | Fix: call load_symbol_history once in blob_page, pass result to both phases. |
| 10 | |
| 11 | Test matrix |
| 12 | ----------- |
| 13 | test_fetch_file_history_skips_snapshot_scan_when_history_exists |
| 14 | No snapshot manifests fetched when symbol_history_entries has entries for the file. |
| 15 | |
| 16 | test_fetch_file_history_returns_commits_from_history_index |
| 17 | Returns correct commits ordered newest-first from the index. |
| 18 | |
| 19 | test_fetch_file_history_falls_back_when_no_history_entries |
| 20 | When no history entries exist, falls back to snapshot scan. |
| 21 | |
| 22 | test_fetch_file_history_deduplicates_commits |
| 23 | Multiple addresses for the same file (path::SymA, path::SymB) in the same |
| 24 | commit produce one history entry, not one per symbol. |
| 25 | |
| 26 | test_blob_page_calls_load_symbol_history_once |
| 27 | load_symbol_history is called at most once per blob_page request, never twice. |
| 28 | """ |
| 29 | from __future__ import annotations |
| 30 | |
| 31 | import secrets |
| 32 | from contextlib import asynccontextmanager |
| 33 | from datetime import datetime, timezone, timedelta |
| 34 | from typing import AsyncGenerator |
| 35 | from unittest.mock import AsyncMock, patch, call |
| 36 | |
| 37 | import msgpack |
| 38 | import pytest |
| 39 | from sqlalchemy.ext.asyncio import AsyncSession |
| 40 | |
| 41 | from musehub.core.genesis import compute_identity_id, compute_repo_id |
| 42 | from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry |
| 43 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef |
| 44 | from musehub.db import database as _database |
| 45 | from musehub.types.json_types import JSONObject, StrDict |
| 46 | from muse.core.types import long_id, now_utc_iso |
| 47 | |
| 48 | ManifestBatch = dict[str, StrDict] |
| 49 | |
| 50 | # ── Constants ───────────────────────────────────────────────────────────────── |
| 51 | |
| 52 | _OWNER_ID = compute_identity_id(b"blob-perf-tester") |
| 53 | _FILE = "src/billing.py" |
| 54 | _OTHER = "src/auth.py" |
| 55 | |
| 56 | |
| 57 | # ── Helpers ─────────────────────────────────────────────────────────────────── |
| 58 | |
| 59 | def _uid() -> str: |
| 60 | return long_id(secrets.token_hex(32)) |
| 61 | |
| 62 | |
| 63 | def _repo_id() -> str: |
| 64 | return compute_repo_id( |
| 65 | _OWNER_ID, f"bp-{secrets.token_hex(4)}", "code", now_utc_iso(), |
| 66 | ) |
| 67 | |
| 68 | |
| 69 | def _snap_id() -> str: |
| 70 | return long_id(secrets.token_hex(32)) |
| 71 | |
| 72 | |
| 73 | def _obj(tag: str) -> str: |
| 74 | return long_id(tag.encode().hex().ljust(64, "0")) |
| 75 | |
| 76 | |
| 77 | def _blob(manifest: StrDict) -> bytes: |
| 78 | return msgpack.packb(manifest, use_bin_type=True) |
| 79 | |
| 80 | |
| 81 | async def _make_repo(session: AsyncSession) -> str: |
| 82 | rid = _repo_id() |
| 83 | now = datetime.now(tz=timezone.utc) |
| 84 | session.add(MusehubRepo( |
| 85 | repo_id=rid, name="bp-test", owner="bp-tester", |
| 86 | slug="bp-test", visibility="public", owner_user_id=_OWNER_ID, |
| 87 | created_at=now, updated_at=now, |
| 88 | )) |
| 89 | await session.commit() |
| 90 | return rid |
| 91 | |
| 92 | |
| 93 | async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str: |
| 94 | sid = _snap_id() |
| 95 | now = datetime.now(tz=timezone.utc) |
| 96 | session.add(MusehubSnapshot( |
| 97 | snapshot_id=sid, directories=[], |
| 98 | manifest_blob=_blob(manifest), entry_count=len(manifest), |
| 99 | created_at=now, |
| 100 | )) |
| 101 | session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid, created_at=now)) |
| 102 | await session.flush() |
| 103 | return sid |
| 104 | |
| 105 | |
| 106 | async def _commit( |
| 107 | session: AsyncSession, |
| 108 | repo_id: str, |
| 109 | snapshot_id: str, |
| 110 | offset: int = 0, |
| 111 | message: str = "feat: change", |
| 112 | author: str = "tester", |
| 113 | ) -> MusehubCommit: |
| 114 | cid = _uid() |
| 115 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) |
| 116 | row = MusehubCommit( |
| 117 | commit_id=cid, branch="main", parent_ids=[], |
| 118 | message=message, author=author, timestamp=now, |
| 119 | snapshot_id=snapshot_id, |
| 120 | ) |
| 121 | session.add(row) |
| 122 | session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) |
| 123 | await session.flush() |
| 124 | return row |
| 125 | |
| 126 | |
| 127 | async def _history_entry( |
| 128 | session: AsyncSession, |
| 129 | repo_id: str, |
| 130 | commit_id: str, |
| 131 | address: str, |
| 132 | offset: int = 0, |
| 133 | op: str = "modify", |
| 134 | message: str = "feat: change", |
| 135 | ) -> None: |
| 136 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) |
| 137 | session.add(MusehubSymbolHistoryEntry( |
| 138 | repo_id=repo_id, |
| 139 | address=address, |
| 140 | commit_id=commit_id, |
| 141 | committed_at=now, |
| 142 | author="tester", |
| 143 | op=op, |
| 144 | message=message, |
| 145 | )) |
| 146 | await session.flush() |
| 147 | |
| 148 | |
| 149 | @asynccontextmanager |
| 150 | async def _fresh_session() -> AsyncGenerator[AsyncSession, None]: |
| 151 | async with _database._async_session_factory() as session: |
| 152 | yield session |
| 153 | |
| 154 | |
| 155 | # ── Tests: _fetch_file_history fast path ────────────────────────────────────── |
| 156 | |
| 157 | |
| 158 | @pytest.mark.anyio |
| 159 | async def test_fetch_file_history_skips_snapshot_scan_when_history_exists( |
| 160 | db_session: AsyncSession, |
| 161 | ) -> None: |
| 162 | """No snapshot manifests fetched when symbol_history_entries has file entries.""" |
| 163 | import musehub.api.routes.musehub.ui_blob as _blob_mod |
| 164 | |
| 165 | repo_id = await _make_repo(db_session) |
| 166 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 167 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 168 | await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0) |
| 169 | await db_session.commit() |
| 170 | |
| 171 | batch_calls: list[list[str]] = [] |
| 172 | |
| 173 | async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch: |
| 174 | batch_calls.append(ids) |
| 175 | return {} |
| 176 | |
| 177 | with patch( |
| 178 | "musehub.api.routes.musehub.ui_blob.get_snapshot_manifests_batch", |
| 179 | side_effect=_spy_batch, |
| 180 | ): |
| 181 | async with _fresh_session() as rs: |
| 182 | result = await _blob_mod._fetch_file_history( |
| 183 | rs, repo_id, _FILE, c1.commit_id |
| 184 | ) |
| 185 | |
| 186 | assert batch_calls == [], ( |
| 187 | f"get_snapshot_manifests_batch called {len(batch_calls)} time(s) " |
| 188 | "even though symbol_history_entries has entries" |
| 189 | ) |
| 190 | assert len(result) == 1 |
| 191 | assert result[0]["commit_id"] == c1.commit_id |
| 192 | |
| 193 | |
| 194 | @pytest.mark.anyio |
| 195 | async def test_fetch_file_history_returns_commits_from_history_index( |
| 196 | db_session: AsyncSession, |
| 197 | ) -> None: |
| 198 | """Returns commits ordered newest-first, drawn from the history index.""" |
| 199 | import musehub.api.routes.musehub.ui_blob as _blob_mod |
| 200 | |
| 201 | repo_id = await _make_repo(db_session) |
| 202 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 203 | c1 = await _commit(db_session, repo_id, s1, offset=0, message="init") |
| 204 | s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 205 | c2 = await _commit(db_session, repo_id, s2, offset=10, message="feat: v2") |
| 206 | s3 = await _snap(db_session, repo_id, {_FILE: _obj("v3")}) |
| 207 | c3 = await _commit(db_session, repo_id, s3, offset=20, message="feat: v3") |
| 208 | |
| 209 | await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0) |
| 210 | await _history_entry(db_session, repo_id, c2.commit_id, _FILE, offset=10) |
| 211 | await _history_entry(db_session, repo_id, c3.commit_id, _FILE, offset=20) |
| 212 | await db_session.commit() |
| 213 | |
| 214 | async with _fresh_session() as rs: |
| 215 | result = await _blob_mod._fetch_file_history( |
| 216 | rs, repo_id, _FILE, c3.commit_id |
| 217 | ) |
| 218 | |
| 219 | assert [r["commit_id"] for r in result] == [c3.commit_id, c2.commit_id, c1.commit_id] |
| 220 | |
| 221 | |
| 222 | @pytest.mark.anyio |
| 223 | async def test_fetch_file_history_falls_back_when_no_history_entries( |
| 224 | db_session: AsyncSession, |
| 225 | ) -> None: |
| 226 | """Falls back to snapshot scan when no history entries exist.""" |
| 227 | import musehub.api.routes.musehub.ui_blob as _blob_mod |
| 228 | |
| 229 | repo_id = await _make_repo(db_session) |
| 230 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 231 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 232 | # No history entries. |
| 233 | await db_session.commit() |
| 234 | |
| 235 | async with _fresh_session() as rs: |
| 236 | result = await _blob_mod._fetch_file_history( |
| 237 | rs, repo_id, _FILE, c1.commit_id |
| 238 | ) |
| 239 | |
| 240 | assert len(result) == 1 |
| 241 | assert result[0]["commit_id"] == c1.commit_id |
| 242 | |
| 243 | |
| 244 | @pytest.mark.anyio |
| 245 | async def test_fetch_file_history_deduplicates_commits( |
| 246 | db_session: AsyncSession, |
| 247 | ) -> None: |
| 248 | """Multiple symbol addresses from the same commit produce one history entry.""" |
| 249 | import musehub.api.routes.musehub.ui_blob as _blob_mod |
| 250 | |
| 251 | repo_id = await _make_repo(db_session) |
| 252 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 253 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 254 | |
| 255 | # Three addresses all pointing to the same commit. |
| 256 | await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0) |
| 257 | await _history_entry(db_session, repo_id, c1.commit_id, f"{_FILE}::compute", offset=0) |
| 258 | await _history_entry(db_session, repo_id, c1.commit_id, f"{_FILE}::validate", offset=0) |
| 259 | await db_session.commit() |
| 260 | |
| 261 | async with _fresh_session() as rs: |
| 262 | result = await _blob_mod._fetch_file_history( |
| 263 | rs, repo_id, _FILE, c1.commit_id |
| 264 | ) |
| 265 | |
| 266 | assert len(result) == 1, ( |
| 267 | f"Expected 1 deduplicated entry, got {len(result)}" |
| 268 | ) |
| 269 | assert result[0]["commit_id"] == c1.commit_id |
| 270 | |
| 271 | |
| 272 | # ── Tests: load_symbol_history called once per request ──────────────────────── |
| 273 | |
| 274 | |
| 275 | @pytest.mark.anyio |
| 276 | async def test_blob_page_calls_load_symbol_history_once( |
| 277 | db_session: AsyncSession, |
| 278 | monkeypatch: pytest.MonkeyPatch, |
| 279 | ) -> None: |
| 280 | """load_symbol_history is called at most once per blob_page request.""" |
| 281 | import musehub.api.routes.musehub.ui_blob as _blob_mod |
| 282 | |
| 283 | call_count = 0 |
| 284 | |
| 285 | async def _counting_load(session: AsyncSession, repo_id: str, *, file_path: str | None = None) -> None: |
| 286 | nonlocal call_count |
| 287 | call_count += 1 |
| 288 | return {} |
| 289 | |
| 290 | monkeypatch.setattr(_blob_mod, "load_symbol_history", _counting_load, raising=False) |
| 291 | |
| 292 | repo_id = await _make_repo(db_session) |
| 293 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 294 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 295 | await db_session.commit() |
| 296 | |
| 297 | # Call both phases the way blob_page does. |
| 298 | import asyncio |
| 299 | async with _fresh_session() as rs: |
| 300 | sh = await _counting_load(rs, repo_id, file_path=_FILE) |
| 301 | await asyncio.gather( |
| 302 | _blob_mod._fetch_file_symbols_from_history(rs, repo_id, _FILE, sh), |
| 303 | _blob_mod._fetch_file_intel_from_history(rs, repo_id, _FILE, sh), |
| 304 | ) |
| 305 | |
| 306 | assert call_count == 1, ( |
| 307 | f"load_symbol_history called {call_count} time(s), expected 1" |
| 308 | ) |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
21 days ago