test_file_history_performance.py
python
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2
feat: add repair-commit wire endpoint (API parity with repa…
Opus 4.8
minor
⚠ breaking
1 day ago
| 1 | """TDD tests for _fetch_file_history performance fix. |
| 2 | |
| 3 | Problem: _fetch_file_history loads up to 300 commits and calls |
| 4 | get_snapshot_manifest() once per commit — 300 individual DB queries + |
| 5 | 300 full msgpack deserializations per file page view. |
| 6 | |
| 7 | Fix: batch-fetch all snapshot manifests with a single IN query using |
| 8 | get_snapshot_manifests_batch(), then look up the file path in the |
| 9 | resulting dict. |
| 10 | |
| 11 | Covers: |
| 12 | _fetch_file_history — query count |
| 13 | - test_file_history_does_not_call_per_commit_manifest_fetch |
| 14 | - test_file_history_calls_batch_fetch_once |
| 15 | |
| 16 | _fetch_file_history — correctness |
| 17 | - test_file_history_returns_only_commits_where_file_changed |
| 18 | - test_file_history_returns_empty_when_file_not_in_head |
| 19 | - test_file_history_returns_empty_when_no_commits |
| 20 | - test_file_history_respects_limit |
| 21 | - test_file_history_unchanged_file_returns_one_entry |
| 22 | """ |
| 23 | from __future__ import annotations |
| 24 | |
| 25 | import secrets |
| 26 | from datetime import datetime, timezone, timedelta |
| 27 | from contextlib import asynccontextmanager |
| 28 | from typing import AsyncGenerator |
| 29 | |
| 30 | import msgpack |
| 31 | import pytest |
| 32 | from sqlalchemy.ext.asyncio import AsyncSession |
| 33 | |
| 34 | from muse.core.types import long_id, now_utc_iso |
| 35 | from musehub.api.routes.musehub.ui_blob import _fetch_file_history |
| 36 | from musehub.core.genesis import compute_identity_id, compute_repo_id |
| 37 | from musehub.db import database as _database |
| 38 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef |
| 39 | from musehub.types.json_types import JSONObject, StrDict |
| 40 | |
| 41 | # --------------------------------------------------------------------------- |
| 42 | # Helpers |
| 43 | # --------------------------------------------------------------------------- |
| 44 | |
| 45 | _OWNER_ID = compute_identity_id(b"perf-tester") |
| 46 | _FILE = "musehub/services/billing.py" |
| 47 | _OTHER_FILE = "musehub/services/auth.py" |
| 48 | |
| 49 | |
| 50 | def _uid() -> str: |
| 51 | return long_id(secrets.token_hex(32)) |
| 52 | |
| 53 | |
| 54 | def _repo_id() -> str: |
| 55 | return compute_repo_id(_OWNER_ID, f"perf-test-{secrets.token_hex(4)}", "code", |
| 56 | now_utc_iso()) |
| 57 | |
| 58 | |
| 59 | def _snap_id() -> str: |
| 60 | return long_id(secrets.token_hex(32)) |
| 61 | |
| 62 | |
| 63 | def _obj_id(tag: str) -> str: |
| 64 | return long_id(tag.encode().hex().ljust(64, "0")) |
| 65 | |
| 66 | |
| 67 | def _manifest_blob(path_oid: StrDict) -> bytes: |
| 68 | return msgpack.packb(path_oid, use_bin_type=True) |
| 69 | |
| 70 | |
| 71 | async def _make_repo(session: AsyncSession) -> str: |
| 72 | rid = _repo_id() |
| 73 | now = datetime.now(tz=timezone.utc) |
| 74 | session.add(MusehubRepo( |
| 75 | repo_id=rid, |
| 76 | name="perf-test", |
| 77 | owner="perf-tester", |
| 78 | slug="perf-test", |
| 79 | visibility="public", |
| 80 | owner_user_id=_OWNER_ID, |
| 81 | created_at=now, |
| 82 | updated_at=now, |
| 83 | )) |
| 84 | await session.commit() |
| 85 | return rid |
| 86 | |
| 87 | |
| 88 | async def _add_snapshot(session: AsyncSession, repo_id: str, manifest: StrDict) -> str: |
| 89 | sid = _snap_id() |
| 90 | session.add(MusehubSnapshot( |
| 91 | snapshot_id=sid, |
| 92 | directories=[], |
| 93 | manifest_blob=_manifest_blob(manifest), |
| 94 | entry_count=len(manifest), |
| 95 | )) |
| 96 | session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid)) |
| 97 | await session.flush() |
| 98 | return sid |
| 99 | |
| 100 | |
| 101 | @asynccontextmanager |
| 102 | async def _fresh_session() -> AsyncGenerator[AsyncSession, None]: |
| 103 | """Open a fresh session from the (test-overridden) factory. |
| 104 | |
| 105 | Using the shared db_session for both writes and reads leaves asyncpg in |
| 106 | an unexpected state on teardown — this helper avoids that by keeping |
| 107 | read calls isolated in their own short-lived session. |
| 108 | """ |
| 109 | async with _database._async_session_factory() as session: |
| 110 | yield session |
| 111 | |
| 112 | |
| 113 | async def _add_commit( |
| 114 | session: AsyncSession, |
| 115 | repo_id: str, |
| 116 | snapshot_id: str, |
| 117 | branch: str = "main", |
| 118 | ts_offset_seconds: int = 0, |
| 119 | message: str = "feat: change", |
| 120 | ) -> str: |
| 121 | cid = _uid() |
| 122 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=ts_offset_seconds) |
| 123 | session.add(MusehubCommit( |
| 124 | commit_id=cid, |
| 125 | branch=branch, |
| 126 | parent_ids=[], |
| 127 | message=message, |
| 128 | author="tester", |
| 129 | timestamp=now, |
| 130 | snapshot_id=snapshot_id, |
| 131 | )) |
| 132 | session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) |
| 133 | await session.flush() |
| 134 | return cid |
| 135 | |
| 136 | |
| 137 | # --------------------------------------------------------------------------- |
| 138 | # Query-count tests — these fail until the N+1 is fixed |
| 139 | # --------------------------------------------------------------------------- |
| 140 | |
| 141 | |
| 142 | @pytest.mark.anyio |
| 143 | async def test_file_history_does_not_call_per_commit_manifest_fetch( |
| 144 | db_session: AsyncSession, |
| 145 | monkeypatch: pytest.MonkeyPatch, |
| 146 | ) -> None: |
| 147 | """get_snapshot_manifest must NOT be called per-commit after the fix. |
| 148 | |
| 149 | The old code called it once per commit in the 300-row loop. |
| 150 | The new code must never call the single-snapshot variant inside the loop. |
| 151 | """ |
| 152 | import musehub.api.routes.musehub.ui_blob as _module |
| 153 | |
| 154 | calls: list[str] = [] |
| 155 | |
| 156 | async def _spy_single(session: AsyncSession, snapshot_id: str) -> JSONObject: # type: ignore[override] |
| 157 | calls.append(snapshot_id) |
| 158 | return {} |
| 159 | |
| 160 | monkeypatch.setattr(_module, "get_snapshot_manifest", _spy_single) |
| 161 | |
| 162 | repo_id = await _make_repo(db_session) |
| 163 | head_snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v2")}) |
| 164 | head_id = await _add_commit(db_session, repo_id, head_snap, ts_offset_seconds=10) |
| 165 | await db_session.commit() |
| 166 | |
| 167 | async with _fresh_session() as read_session: |
| 168 | await _fetch_file_history(read_session, repo_id, _FILE, head_id) |
| 169 | |
| 170 | assert calls == [], f"get_snapshot_manifest was called {len(calls)} time(s) — N+1 not fixed" |
| 171 | |
| 172 | |
| 173 | @pytest.mark.anyio |
| 174 | async def test_file_history_calls_batch_fetch_once( |
| 175 | db_session: AsyncSession, |
| 176 | monkeypatch: pytest.MonkeyPatch, |
| 177 | ) -> None: |
| 178 | """get_snapshot_manifests_batch must be called instead of per-commit fetches.""" |
| 179 | import musehub.api.routes.musehub.ui_blob as _module |
| 180 | from musehub.services import musehub_snapshot as _snap_svc |
| 181 | |
| 182 | batch_calls: list[list[str]] = [] |
| 183 | _real_batch = _snap_svc.get_snapshot_manifests_batch |
| 184 | |
| 185 | async def _spy_batch(session: AsyncSession, snapshot_ids: list[str]) -> JSONObject: # type: ignore[override] |
| 186 | batch_calls.append(list(snapshot_ids)) |
| 187 | return await _real_batch(session, snapshot_ids) |
| 188 | |
| 189 | monkeypatch.setattr(_module, "get_snapshot_manifests_batch", _spy_batch) |
| 190 | |
| 191 | repo_id = await _make_repo(db_session) |
| 192 | head_snap_id = "" |
| 193 | head_commit_id = "" |
| 194 | for i in range(5): |
| 195 | snap_id = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id(f"v{i}")}) |
| 196 | cid = await _add_commit(db_session, repo_id, snap_id, ts_offset_seconds=i * 10) |
| 197 | if i == 4: |
| 198 | head_snap_id = snap_id |
| 199 | head_commit_id = cid |
| 200 | |
| 201 | await db_session.commit() |
| 202 | |
| 203 | async with _fresh_session() as read_session: |
| 204 | await _fetch_file_history(read_session, repo_id, _FILE, head_commit_id) |
| 205 | |
| 206 | assert len(batch_calls) >= 1, "get_snapshot_manifests_batch was never called" |
| 207 | all_fetched = [sid for call in batch_calls for sid in call] |
| 208 | assert head_snap_id in all_fetched, "head snapshot_id must be included in batch fetch" |
| 209 | |
| 210 | |
| 211 | # --------------------------------------------------------------------------- |
| 212 | # Correctness tests |
| 213 | # --------------------------------------------------------------------------- |
| 214 | |
| 215 | |
| 216 | @pytest.mark.anyio |
| 217 | async def test_file_history_returns_only_commits_where_file_changed( |
| 218 | db_session: AsyncSession, |
| 219 | ) -> None: |
| 220 | """Only commits where the file's object_id changes between adjacent snapshots are returned.""" |
| 221 | repo_id = await _make_repo(db_session) |
| 222 | |
| 223 | # Commit 1: file = v1 (oldest) |
| 224 | s1 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")}) |
| 225 | c1 = await _add_commit(db_session, repo_id, s1, ts_offset_seconds=0, message="init") |
| 226 | |
| 227 | # Commit 2: file = v1 (unchanged — should NOT appear in history) |
| 228 | s2 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")}) |
| 229 | c2 = await _add_commit(db_session, repo_id, s2, ts_offset_seconds=10, message="no-op") |
| 230 | |
| 231 | # Commit 3: file = v2 (changed — should appear) |
| 232 | s3 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v2")}) |
| 233 | c3 = await _add_commit(db_session, repo_id, s3, ts_offset_seconds=20, message="feat: v2") |
| 234 | |
| 235 | # Commit 4: file = v3 (changed — HEAD, should appear) |
| 236 | s4 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v3")}) |
| 237 | c4 = await _add_commit(db_session, repo_id, s4, ts_offset_seconds=30, message="feat: v3") |
| 238 | |
| 239 | await db_session.commit() |
| 240 | |
| 241 | async with _fresh_session() as read_session: |
| 242 | history = await _fetch_file_history(read_session, repo_id, _FILE, c4) |
| 243 | |
| 244 | commit_ids = {h["commit_id_full"] for h in history} |
| 245 | assert c4 in commit_ids, "HEAD commit (v3) should be in history" |
| 246 | assert c3 in commit_ids, "commit that introduced v2 should be in history" |
| 247 | # Walking backward: c4(v3)→c3(v2)→c2(v1)→c1(v1) |
| 248 | # c2 appears because the file changed from v2→v1 between c3 and c2. |
| 249 | # c1 is skipped because c1 and c2 have the same oid — consecutive duplicates are collapsed. |
| 250 | assert c1 not in commit_ids, "c1 has same oid as c2 — consecutive duplicate, should be skipped" |
| 251 | |
| 252 | |
| 253 | @pytest.mark.anyio |
| 254 | async def test_file_history_returns_empty_when_file_not_in_head( |
| 255 | db_session: AsyncSession, |
| 256 | ) -> None: |
| 257 | """Returns [] when the file path does not exist in the head snapshot.""" |
| 258 | repo_id = await _make_repo(db_session) |
| 259 | snap = await _add_snapshot(db_session, repo_id, {_OTHER_FILE: _obj_id("v1")}) |
| 260 | head_id = await _add_commit(db_session, repo_id, snap) |
| 261 | await db_session.commit() |
| 262 | |
| 263 | async with _fresh_session() as read_session: |
| 264 | history = await _fetch_file_history(read_session, repo_id, _FILE, head_id) |
| 265 | assert history == [] |
| 266 | |
| 267 | |
| 268 | @pytest.mark.anyio |
| 269 | async def test_file_history_returns_empty_when_no_commits( |
| 270 | db_session: AsyncSession, |
| 271 | ) -> None: |
| 272 | """Returns [] when the head commit cannot be found in the DB.""" |
| 273 | repo_id = await _make_repo(db_session) |
| 274 | await db_session.commit() |
| 275 | |
| 276 | async with _fresh_session() as read_session: |
| 277 | history = await _fetch_file_history(read_session, repo_id, _FILE, _uid()) |
| 278 | assert history == [] |
| 279 | |
| 280 | |
| 281 | @pytest.mark.anyio |
| 282 | async def test_file_history_respects_limit( |
| 283 | db_session: AsyncSession, |
| 284 | ) -> None: |
| 285 | """History is capped at the requested limit even when more changes exist.""" |
| 286 | repo_id = await _make_repo(db_session) |
| 287 | |
| 288 | head_snap = None |
| 289 | head_cid = None |
| 290 | for i in range(25): |
| 291 | snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id(f"v{i}")}) |
| 292 | cid = await _add_commit(db_session, repo_id, snap, ts_offset_seconds=i * 10) |
| 293 | if i == 24: |
| 294 | head_snap = snap |
| 295 | head_cid = cid |
| 296 | |
| 297 | await db_session.commit() |
| 298 | |
| 299 | async with _fresh_session() as read_session: |
| 300 | history = await _fetch_file_history(read_session, repo_id, _FILE, head_cid, limit=5) |
| 301 | assert len(history) <= 5 |
| 302 | |
| 303 | |
| 304 | @pytest.mark.anyio |
| 305 | async def test_file_history_unchanged_file_returns_one_entry( |
| 306 | db_session: AsyncSession, |
| 307 | ) -> None: |
| 308 | """A file that never changes shows only the initial commit.""" |
| 309 | repo_id = await _make_repo(db_session) |
| 310 | |
| 311 | head_cid = None |
| 312 | for i in range(4): |
| 313 | snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")}) # always v1 |
| 314 | cid = await _add_commit(db_session, repo_id, snap, ts_offset_seconds=i * 10) |
| 315 | if i == 3: |
| 316 | head_cid = cid |
| 317 | |
| 318 | await db_session.commit() |
| 319 | |
| 320 | async with _fresh_session() as read_session: |
| 321 | history = await _fetch_file_history(read_session, repo_id, _FILE, head_cid) |
| 322 | assert len(history) == 1 |
File History
1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2
feat: add repair-commit wire endpoint (API parity with repa…
Opus 4.8
minor
⚠
1 day ago