gabriel / musehub public
test_blob_page_perf.py python
308 lines 10.6 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 21 days ago
1 """TDD tests for blob page performance fixes.
2
3 Problem 1: _fetch_file_history scans 300 snapshot manifests every page load.
4 Fix: use musehub_symbol_history_entries as fast path (same index as
5 get_last_commit_for_file). Fall back to snapshot scan only when no entries exist.
6
7 Problem 2: _fetch_file_symbols and _fetch_file_intel each call load_symbol_history
8 independently — two identical DB queries per page load.
9 Fix: call load_symbol_history once in blob_page, pass result to both phases.
10
11 Test matrix
12 -----------
13 test_fetch_file_history_skips_snapshot_scan_when_history_exists
14 No snapshot manifests fetched when symbol_history_entries has entries for the file.
15
16 test_fetch_file_history_returns_commits_from_history_index
17 Returns correct commits ordered newest-first from the index.
18
19 test_fetch_file_history_falls_back_when_no_history_entries
20 When no history entries exist, falls back to snapshot scan.
21
22 test_fetch_file_history_deduplicates_commits
23 Multiple addresses for the same file (path::SymA, path::SymB) in the same
24 commit produce one history entry, not one per symbol.
25
26 test_blob_page_calls_load_symbol_history_once
27 load_symbol_history is called at most once per blob_page request, never twice.
28 """
29 from __future__ import annotations
30
31 import secrets
32 from contextlib import asynccontextmanager
33 from datetime import datetime, timezone, timedelta
34 from typing import AsyncGenerator
35 from unittest.mock import AsyncMock, patch, call
36
37 import msgpack
38 import pytest
39 from sqlalchemy.ext.asyncio import AsyncSession
40
41 from musehub.core.genesis import compute_identity_id, compute_repo_id
42 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
43 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef
44 from musehub.db import database as _database
45 from musehub.types.json_types import JSONObject, StrDict
46 from muse.core.types import long_id, now_utc_iso
47
48 ManifestBatch = dict[str, StrDict]
49
50 # ── Constants ─────────────────────────────────────────────────────────────────
51
52 _OWNER_ID = compute_identity_id(b"blob-perf-tester")
53 _FILE = "src/billing.py"
54 _OTHER = "src/auth.py"
55
56
57 # ── Helpers ───────────────────────────────────────────────────────────────────
58
59 def _uid() -> str:
60 return long_id(secrets.token_hex(32))
61
62
63 def _repo_id() -> str:
64 return compute_repo_id(
65 _OWNER_ID, f"bp-{secrets.token_hex(4)}", "code", now_utc_iso(),
66 )
67
68
69 def _snap_id() -> str:
70 return long_id(secrets.token_hex(32))
71
72
73 def _obj(tag: str) -> str:
74 return long_id(tag.encode().hex().ljust(64, "0"))
75
76
77 def _blob(manifest: StrDict) -> bytes:
78 return msgpack.packb(manifest, use_bin_type=True)
79
80
81 async def _make_repo(session: AsyncSession) -> str:
82 rid = _repo_id()
83 now = datetime.now(tz=timezone.utc)
84 session.add(MusehubRepo(
85 repo_id=rid, name="bp-test", owner="bp-tester",
86 slug="bp-test", visibility="public", owner_user_id=_OWNER_ID,
87 created_at=now, updated_at=now,
88 ))
89 await session.commit()
90 return rid
91
92
93 async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str:
94 sid = _snap_id()
95 now = datetime.now(tz=timezone.utc)
96 session.add(MusehubSnapshot(
97 snapshot_id=sid, directories=[],
98 manifest_blob=_blob(manifest), entry_count=len(manifest),
99 created_at=now,
100 ))
101 session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid, created_at=now))
102 await session.flush()
103 return sid
104
105
106 async def _commit(
107 session: AsyncSession,
108 repo_id: str,
109 snapshot_id: str,
110 offset: int = 0,
111 message: str = "feat: change",
112 author: str = "tester",
113 ) -> MusehubCommit:
114 cid = _uid()
115 now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset)
116 row = MusehubCommit(
117 commit_id=cid, branch="main", parent_ids=[],
118 message=message, author=author, timestamp=now,
119 snapshot_id=snapshot_id,
120 )
121 session.add(row)
122 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid))
123 await session.flush()
124 return row
125
126
127 async def _history_entry(
128 session: AsyncSession,
129 repo_id: str,
130 commit_id: str,
131 address: str,
132 offset: int = 0,
133 op: str = "modify",
134 message: str = "feat: change",
135 ) -> None:
136 now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset)
137 session.add(MusehubSymbolHistoryEntry(
138 repo_id=repo_id,
139 address=address,
140 commit_id=commit_id,
141 committed_at=now,
142 author="tester",
143 op=op,
144 message=message,
145 ))
146 await session.flush()
147
148
149 @asynccontextmanager
150 async def _fresh_session() -> AsyncGenerator[AsyncSession, None]:
151 async with _database._async_session_factory() as session:
152 yield session
153
154
155 # ── Tests: _fetch_file_history fast path ──────────────────────────────────────
156
157
158 @pytest.mark.anyio
159 async def test_fetch_file_history_skips_snapshot_scan_when_history_exists(
160 db_session: AsyncSession,
161 ) -> None:
162 """No snapshot manifests fetched when symbol_history_entries has file entries."""
163 import musehub.api.routes.musehub.ui_blob as _blob_mod
164
165 repo_id = await _make_repo(db_session)
166 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
167 c1 = await _commit(db_session, repo_id, s1, offset=0)
168 await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0)
169 await db_session.commit()
170
171 batch_calls: list[list[str]] = []
172
173 async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch:
174 batch_calls.append(ids)
175 return {}
176
177 with patch(
178 "musehub.api.routes.musehub.ui_blob.get_snapshot_manifests_batch",
179 side_effect=_spy_batch,
180 ):
181 async with _fresh_session() as rs:
182 result = await _blob_mod._fetch_file_history(
183 rs, repo_id, _FILE, c1.commit_id
184 )
185
186 assert batch_calls == [], (
187 f"get_snapshot_manifests_batch called {len(batch_calls)} time(s) "
188 "even though symbol_history_entries has entries"
189 )
190 assert len(result) == 1
191 assert result[0]["commit_id"] == c1.commit_id
192
193
194 @pytest.mark.anyio
195 async def test_fetch_file_history_returns_commits_from_history_index(
196 db_session: AsyncSession,
197 ) -> None:
198 """Returns commits ordered newest-first, drawn from the history index."""
199 import musehub.api.routes.musehub.ui_blob as _blob_mod
200
201 repo_id = await _make_repo(db_session)
202 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
203 c1 = await _commit(db_session, repo_id, s1, offset=0, message="init")
204 s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")})
205 c2 = await _commit(db_session, repo_id, s2, offset=10, message="feat: v2")
206 s3 = await _snap(db_session, repo_id, {_FILE: _obj("v3")})
207 c3 = await _commit(db_session, repo_id, s3, offset=20, message="feat: v3")
208
209 await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0)
210 await _history_entry(db_session, repo_id, c2.commit_id, _FILE, offset=10)
211 await _history_entry(db_session, repo_id, c3.commit_id, _FILE, offset=20)
212 await db_session.commit()
213
214 async with _fresh_session() as rs:
215 result = await _blob_mod._fetch_file_history(
216 rs, repo_id, _FILE, c3.commit_id
217 )
218
219 assert [r["commit_id"] for r in result] == [c3.commit_id, c2.commit_id, c1.commit_id]
220
221
222 @pytest.mark.anyio
223 async def test_fetch_file_history_falls_back_when_no_history_entries(
224 db_session: AsyncSession,
225 ) -> None:
226 """Falls back to snapshot scan when no history entries exist."""
227 import musehub.api.routes.musehub.ui_blob as _blob_mod
228
229 repo_id = await _make_repo(db_session)
230 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
231 c1 = await _commit(db_session, repo_id, s1, offset=0)
232 # No history entries.
233 await db_session.commit()
234
235 async with _fresh_session() as rs:
236 result = await _blob_mod._fetch_file_history(
237 rs, repo_id, _FILE, c1.commit_id
238 )
239
240 assert len(result) == 1
241 assert result[0]["commit_id"] == c1.commit_id
242
243
244 @pytest.mark.anyio
245 async def test_fetch_file_history_deduplicates_commits(
246 db_session: AsyncSession,
247 ) -> None:
248 """Multiple symbol addresses from the same commit produce one history entry."""
249 import musehub.api.routes.musehub.ui_blob as _blob_mod
250
251 repo_id = await _make_repo(db_session)
252 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
253 c1 = await _commit(db_session, repo_id, s1, offset=0)
254
255 # Three addresses all pointing to the same commit.
256 await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0)
257 await _history_entry(db_session, repo_id, c1.commit_id, f"{_FILE}::compute", offset=0)
258 await _history_entry(db_session, repo_id, c1.commit_id, f"{_FILE}::validate", offset=0)
259 await db_session.commit()
260
261 async with _fresh_session() as rs:
262 result = await _blob_mod._fetch_file_history(
263 rs, repo_id, _FILE, c1.commit_id
264 )
265
266 assert len(result) == 1, (
267 f"Expected 1 deduplicated entry, got {len(result)}"
268 )
269 assert result[0]["commit_id"] == c1.commit_id
270
271
272 # ── Tests: load_symbol_history called once per request ────────────────────────
273
274
275 @pytest.mark.anyio
276 async def test_blob_page_calls_load_symbol_history_once(
277 db_session: AsyncSession,
278 monkeypatch: pytest.MonkeyPatch,
279 ) -> None:
280 """load_symbol_history is called at most once per blob_page request."""
281 import musehub.api.routes.musehub.ui_blob as _blob_mod
282
283 call_count = 0
284
285 async def _counting_load(session: AsyncSession, repo_id: str, *, file_path: str | None = None) -> None:
286 nonlocal call_count
287 call_count += 1
288 return {}
289
290 monkeypatch.setattr(_blob_mod, "load_symbol_history", _counting_load, raising=False)
291
292 repo_id = await _make_repo(db_session)
293 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
294 c1 = await _commit(db_session, repo_id, s1, offset=0)
295 await db_session.commit()
296
297 # Call both phases the way blob_page does.
298 import asyncio
299 async with _fresh_session() as rs:
300 sh = await _counting_load(rs, repo_id, file_path=_FILE)
301 await asyncio.gather(
302 _blob_mod._fetch_file_symbols_from_history(rs, repo_id, _FILE, sh),
303 _blob_mod._fetch_file_intel_from_history(rs, repo_id, _FILE, sh),
304 )
305
306 assert call_count == 1, (
307 f"load_symbol_history called {call_count} time(s), expected 1"
308 )
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 21 days ago