gabriel / musehub public
test_blame_page_perf.py python
272 lines 9.4 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 1 day ago
1 """TDD tests for blame page performance fixes.
2
3 Problem: _snapshot_diff_blame loads every commit in the repo (no LIMIT) and
4 fetches all their snapshot manifests — O(N_commits * manifest_size) per page load.
5 For musehub with 900+ commits this times out.
6
7 Fix: use musehub_symbol_history_entries as fast path (same index as blob page).
8 Fall back to snapshot scan only when no history entries exist, and cap that scan
9 at a small window (50 commits newest-first, not all commits oldest-first).
10
11 Test matrix
12 -----------
13 test_snapshot_diff_blame_skips_manifest_scan_when_history_exists
14 No snapshot manifests fetched when symbol_history_entries has an entry for
15 the file path (bare or prefixed with ::).
16
17 test_snapshot_diff_blame_returns_entry_from_history_index
18 Returns a SymbolBlameEntry built from the most recent history index entry.
19
20 test_snapshot_diff_blame_falls_back_when_no_history
21 When no history entries exist, falls back to snapshot scan and returns entry.
22
23 test_snapshot_diff_blame_caps_fallback_scan
24 Fallback scan fetches at most 50 commits, not all commits in the repo.
25 """
26 from __future__ import annotations
27
28 import secrets
29 from contextlib import asynccontextmanager
30 from datetime import datetime, timezone, timedelta
31 from typing import AsyncGenerator
32 from unittest.mock import patch
33
34 import msgpack
35 import pytest
36 from sqlalchemy.ext.asyncio import AsyncSession
37
38 from musehub.core.genesis import compute_identity_id, compute_repo_id
39 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
40 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef
41 from musehub.db import database as _database
42 from musehub.types.json_types import JSONObject, StrDict
43
44 ManifestBatch = dict[str, StrDict]
45 from muse.core.types import long_id, now_utc_iso
46
47 # ── Constants ─────────────────────────────────────────────────────────────────
48
49 _OWNER_ID = compute_identity_id(b"blame-perf-tester")
50 _FILE = ".env.example"
51 _OTHER = "README.md"
52
53
54 # ── Helpers ───────────────────────────────────────────────────────────────────
55
56 def _uid() -> str:
57 return long_id(secrets.token_hex(32))
58
59
60 def _repo_id() -> str:
61 return compute_repo_id(
62 _OWNER_ID, f"blp-{secrets.token_hex(4)}", "code", now_utc_iso(),
63 )
64
65
66 def _snap_id() -> str:
67 return long_id(secrets.token_hex(32))
68
69
70 def _obj(tag: str) -> str:
71 return long_id(tag.encode().hex().ljust(64, "0"))
72
73
74 def _blob(manifest: StrDict) -> bytes:
75 return msgpack.packb(manifest, use_bin_type=True)
76
77
78 async def _make_repo(session: AsyncSession) -> str:
79 rid = _repo_id()
80 now = datetime.now(tz=timezone.utc)
81 session.add(MusehubRepo(
82 repo_id=rid, name="blp-test", owner="blp-tester",
83 slug="blp-test", visibility="public", owner_user_id=_OWNER_ID,
84 created_at=now, updated_at=now,
85 ))
86 await session.commit()
87 return rid
88
89
90 async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str:
91 sid = _snap_id()
92 now = datetime.now(tz=timezone.utc)
93 session.add(MusehubSnapshot(
94 snapshot_id=sid, directories=[],
95 manifest_blob=_blob(manifest), entry_count=len(manifest),
96 created_at=now,
97 ))
98 session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid, created_at=now))
99 await session.flush()
100 return sid
101
102
103 async def _commit(
104 session: AsyncSession,
105 repo_id: str,
106 snapshot_id: str,
107 offset: int = 0,
108 message: str = "feat: change",
109 ) -> MusehubCommit:
110 cid = _uid()
111 now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset)
112 row = MusehubCommit(
113 commit_id=cid, branch="main", parent_ids=[],
114 message=message, author="tester", timestamp=now,
115 snapshot_id=snapshot_id,
116 )
117 session.add(row)
118 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid))
119 await session.flush()
120 return row
121
122
123 async def _history_entry(
124 session: AsyncSession,
125 repo_id: str,
126 commit_id: str,
127 address: str,
128 offset: int = 0,
129 op: str = "modify",
130 message: str = "feat: change",
131 ) -> None:
132 now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset)
133 session.add(MusehubSymbolHistoryEntry(
134 repo_id=repo_id,
135 address=address,
136 commit_id=commit_id,
137 committed_at=now,
138 author="tester",
139 op=op,
140 message=message,
141 ))
142 await session.flush()
143
144
145 @asynccontextmanager
146 async def _fresh_session() -> AsyncGenerator[AsyncSession, None]:
147 async with _database._async_session_factory() as session:
148 yield session
149
150
151 # ── Tests ─────────────────────────────────────────────────────────────────────
152
153
154 @pytest.mark.anyio
155 async def test_snapshot_diff_blame_skips_manifest_scan_when_history_exists(
156 db_session: AsyncSession,
157 ) -> None:
158 """No snapshot manifests fetched when history index has entries for the file."""
159 from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame
160
161 repo_id = await _make_repo(db_session)
162 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
163 c1 = await _commit(db_session, repo_id, s1, offset=0)
164 await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0)
165 await db_session.commit()
166
167 batch_calls: list[list[str]] = []
168
169 async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch:
170 batch_calls.append(ids)
171 return {}
172
173 with patch(
174 "musehub.api.routes.musehub.ui_blame.get_snapshot_manifests_batch",
175 side_effect=_spy_batch,
176 ):
177 async with _fresh_session() as rs:
178 result = await _snapshot_diff_blame(rs, repo_id, _FILE, {c1.commit_id: {"message": "init", "author": "tester"}})
179
180 assert batch_calls == [], (
181 f"get_snapshot_manifests_batch called {len(batch_calls)} time(s) "
182 "even though history index has entries for the file"
183 )
184 assert len(result) == 1
185 assert result[0].commit_id == c1.commit_id
186
187
188 @pytest.mark.anyio
189 async def test_snapshot_diff_blame_returns_entry_from_history_index(
190 db_session: AsyncSession,
191 ) -> None:
192 """Returns SymbolBlameEntry built from the most recent history index entry."""
193 from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame
194
195 repo_id = await _make_repo(db_session)
196 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
197 c1 = await _commit(db_session, repo_id, s1, offset=0, message="init")
198 s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")})
199 c2 = await _commit(db_session, repo_id, s2, offset=10, message="chore: update env")
200
201 await _history_entry(db_session, repo_id, c1.commit_id, _FILE, offset=0)
202 await _history_entry(db_session, repo_id, c2.commit_id, _FILE, offset=10)
203 await db_session.commit()
204
205 commit_map = {
206 c1.commit_id: {"message": "init", "author": "tester"},
207 c2.commit_id: {"message": "chore: update env", "author": "tester"},
208 }
209
210 async with _fresh_session() as rs:
211 result = await _snapshot_diff_blame(rs, repo_id, _FILE, commit_map)
212
213 assert len(result) == 1
214 assert result[0].commit_id == c2.commit_id
215 assert result[0].symbol_name == _FILE.split("/")[-1]
216
217
218 @pytest.mark.anyio
219 async def test_snapshot_diff_blame_falls_back_when_no_history(
220 db_session: AsyncSession,
221 ) -> None:
222 """Falls back to snapshot scan and returns entry when no history entries exist."""
223 from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame
224
225 repo_id = await _make_repo(db_session)
226 s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
227 c1 = await _commit(db_session, repo_id, s1, offset=0)
228 # No history entries.
229 await db_session.commit()
230
231 async with _fresh_session() as rs:
232 result = await _snapshot_diff_blame(rs, repo_id, _FILE, {c1.commit_id: {"message": "init", "author": "tester"}})
233
234 assert len(result) == 1
235 assert result[0].commit_id == c1.commit_id
236
237
238 @pytest.mark.anyio
239 async def test_snapshot_diff_blame_caps_fallback_scan(
240 db_session: AsyncSession,
241 ) -> None:
242 """Fallback scan fetches at most 50 commits, not all commits in the repo."""
243 from musehub.api.routes.musehub.ui_blame import _snapshot_diff_blame
244
245 repo_id = await _make_repo(db_session)
246
247 # Seed 80 commits — more than the 50-commit cap.
248 last_cid = ""
249 for i in range(80):
250 s = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
251 c = await _commit(db_session, repo_id, s, offset=i)
252 last_cid = c.commit_id
253 await db_session.commit()
254
255 fetched_total = 0
256
257 async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch:
258 nonlocal fetched_total
259 fetched_total += len(ids)
260 from musehub.services.musehub_snapshot import get_snapshot_manifests_batch as _real
261 return await _real(session, ids)
262
263 with patch(
264 "musehub.api.routes.musehub.ui_blame.get_snapshot_manifests_batch",
265 side_effect=_spy_batch,
266 ):
267 async with _fresh_session() as rs:
268 await _snapshot_diff_blame(rs, repo_id, _FILE, {})
269
270 assert fetched_total <= 50, (
271 f"Fallback fetched {fetched_total} manifests — cap should be 50"
272 )
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 1 day ago