test_last_commit_for_file_performance.py
python
sha256:3c58668648c7323bb9f5c6881cfe6a3f14fc93fcb73b537d253732952a5bf8bf
chore: bump version to 0.2.0rc12
Sonnet 4.6
patch
8 days ago
| 1 | """TDD tests for get_last_commit_for_file performance fix + blob_page parallelism. |
| 2 | |
| 3 | Problem 1: get_last_commit_for_file walks up to 200 commits and calls |
| 4 | get_snapshot_manifest() once per commit — same N+1 as _fetch_file_history. |
| 5 | |
| 6 | Problem 2: blob_page runs phases 2/3/4 sequentially even though they are |
| 7 | independent — easy asyncio.gather win. |
| 8 | |
| 9 | Fix 1: batch-fetch all snapshot manifests with one IN query. |
| 10 | Fix 2: gather phases 2/3/4 concurrently after the sequential file-meta resolve. |
| 11 | |
| 12 | Covers: |
| 13 | get_last_commit_for_file — query count |
| 14 | - test_last_commit_does_not_call_per_commit_manifest_fetch |
| 15 | - test_last_commit_uses_batch_fetch |
| 16 | |
| 17 | get_last_commit_for_file — correctness |
| 18 | - test_last_commit_returns_commit_that_introduced_current_version |
| 19 | - test_last_commit_returns_head_when_file_changed_in_head |
| 20 | - test_last_commit_returns_none_when_file_missing_from_head |
| 21 | - test_last_commit_returns_none_when_commit_not_found |
| 22 | |
| 23 | blob_page phases — parallelism |
| 24 | - test_blob_page_phases_run_concurrently |
| 25 | """ |
| 26 | from __future__ import annotations |
| 27 | |
| 28 | import asyncio |
| 29 | import secrets |
| 30 | from contextlib import asynccontextmanager |
| 31 | from datetime import datetime, timezone, timedelta |
| 32 | from typing import AsyncGenerator |
| 33 | |
| 34 | import msgpack |
| 35 | import pytest |
| 36 | from sqlalchemy.ext.asyncio import AsyncSession |
| 37 | |
| 38 | from musehub.core.genesis import compute_identity_id, compute_repo_id |
| 39 | from musehub.db import database as _database |
| 40 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef |
| 41 | from musehub.services.musehub_repository import get_last_commit_for_file |
| 42 | from musehub.types.json_types import JSONObject, StrDict |
| 43 | from muse.core.types import long_id, now_utc_iso |
| 44 | |
| 45 | # --------------------------------------------------------------------------- |
| 46 | # Shared helpers (mirrors test_file_history_performance.py) |
| 47 | # --------------------------------------------------------------------------- |
| 48 | |
| 49 | _OWNER_ID = compute_identity_id(b"lcf-tester") |
| 50 | _FILE = "musehub/core/billing.py" |
| 51 | _OTHER = "musehub/core/auth.py" |
| 52 | |
| 53 | |
| 54 | def _uid() -> str: |
| 55 | return long_id(secrets.token_hex(32)) |
| 56 | |
| 57 | |
| 58 | def _repo_id() -> str: |
| 59 | return compute_repo_id( |
| 60 | _OWNER_ID, f"lcf-{secrets.token_hex(4)}", "code", |
| 61 | now_utc_iso(), |
| 62 | ) |
| 63 | |
| 64 | |
| 65 | def _snap_id() -> str: |
| 66 | return long_id(secrets.token_hex(32)) |
| 67 | |
| 68 | |
| 69 | def _obj(tag: str) -> str: |
| 70 | return long_id(tag.encode().hex().ljust(64, "0")) |
| 71 | |
| 72 | |
| 73 | def _blob(manifest: StrDict) -> bytes: |
| 74 | return msgpack.packb(manifest, use_bin_type=True) |
| 75 | |
| 76 | |
| 77 | async def _make_repo(session: AsyncSession) -> str: |
| 78 | rid = _repo_id() |
| 79 | now = datetime.now(tz=timezone.utc) |
| 80 | session.add(MusehubRepo( |
| 81 | repo_id=rid, name="lcf-test", owner="lcf-tester", slug="lcf-test", |
| 82 | visibility="public", owner_user_id=_OWNER_ID, |
| 83 | created_at=now, updated_at=now, |
| 84 | )) |
| 85 | await session.commit() |
| 86 | return rid |
| 87 | |
| 88 | |
| 89 | async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str: |
| 90 | sid = _snap_id() |
| 91 | session.add(MusehubSnapshot( |
| 92 | snapshot_id=sid, directories=[], |
| 93 | manifest_blob=_blob(manifest), entry_count=len(manifest), |
| 94 | )) |
| 95 | session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid)) |
| 96 | await session.flush() |
| 97 | return sid |
| 98 | |
| 99 | |
| 100 | async def _commit( |
| 101 | session: AsyncSession, |
| 102 | repo_id: str, |
| 103 | snapshot_id: str, |
| 104 | branch: str = "main", |
| 105 | offset: int = 0, |
| 106 | message: str = "feat: change", |
| 107 | ) -> str: |
| 108 | cid = _uid() |
| 109 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) |
| 110 | session.add(MusehubCommit( |
| 111 | commit_id=cid, branch=branch, parent_ids=[], |
| 112 | message=message, author="tester", timestamp=now, |
| 113 | snapshot_id=snapshot_id, |
| 114 | )) |
| 115 | session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) |
| 116 | await session.flush() |
| 117 | return cid |
| 118 | |
| 119 | |
| 120 | @asynccontextmanager |
| 121 | async def _fresh_session() -> AsyncGenerator[AsyncSession, None]: |
| 122 | async with _database._async_session_factory() as session: |
| 123 | yield session |
| 124 | |
| 125 | |
| 126 | # --------------------------------------------------------------------------- |
| 127 | # get_last_commit_for_file — query-count tests (RED until N+1 fixed) |
| 128 | # --------------------------------------------------------------------------- |
| 129 | |
| 130 | |
| 131 | @pytest.mark.anyio |
| 132 | async def test_last_commit_does_not_call_per_commit_manifest_fetch( |
| 133 | db_session: AsyncSession, |
| 134 | monkeypatch: pytest.MonkeyPatch, |
| 135 | ) -> None: |
| 136 | """get_snapshot_manifest must NOT be called inside the commit-walk loop.""" |
| 137 | import musehub.services.musehub_repository as _repo_svc |
| 138 | |
| 139 | calls: list[str] = [] |
| 140 | |
| 141 | async def _spy(session: AsyncSession, snapshot_id: str) -> JSONObject: # type: ignore[override] |
| 142 | calls.append(snapshot_id) |
| 143 | return {} |
| 144 | |
| 145 | monkeypatch.setattr(_repo_svc, "get_snapshot_manifest", _spy, raising=False) |
| 146 | |
| 147 | repo_id = await _make_repo(db_session) |
| 148 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 149 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 150 | await db_session.commit() |
| 151 | |
| 152 | async with _fresh_session() as rs: |
| 153 | await get_last_commit_for_file(rs, repo_id, _FILE, c1) |
| 154 | |
| 155 | assert calls == [], ( |
| 156 | f"get_snapshot_manifest called {len(calls)} time(s) — N+1 still present" |
| 157 | ) |
| 158 | |
| 159 | |
| 160 | @pytest.mark.anyio |
| 161 | async def test_last_commit_uses_batch_fetch( |
| 162 | db_session: AsyncSession, |
| 163 | monkeypatch: pytest.MonkeyPatch, |
| 164 | ) -> None: |
| 165 | """get_snapshot_manifests_batch must be used instead of per-commit fetches.""" |
| 166 | import musehub.services.musehub_repository as _repo_svc |
| 167 | from musehub.services import musehub_snapshot as _snap_svc |
| 168 | |
| 169 | batch_calls: list[list[str]] = [] |
| 170 | _real = _snap_svc.get_snapshot_manifests_batch |
| 171 | |
| 172 | async def _spy_batch(session: AsyncSession, ids: list[str]) -> JSONObject: # type: ignore[override] |
| 173 | batch_calls.append(list(ids)) |
| 174 | return await _real(session, ids) |
| 175 | |
| 176 | monkeypatch.setattr(_repo_svc, "get_snapshot_manifests_batch", _spy_batch, raising=False) |
| 177 | |
| 178 | repo_id = await _make_repo(db_session) |
| 179 | head_snap = head_cid = "" |
| 180 | for i in range(4): |
| 181 | s = await _snap(db_session, repo_id, {_FILE: _obj(f"v{i}")}) |
| 182 | c = await _commit(db_session, repo_id, s, offset=i * 10) |
| 183 | if i == 3: |
| 184 | head_snap, head_cid = s, c |
| 185 | await db_session.commit() |
| 186 | |
| 187 | async with _fresh_session() as rs: |
| 188 | await get_last_commit_for_file(rs, repo_id, _FILE, head_cid) |
| 189 | |
| 190 | assert len(batch_calls) >= 1, "get_snapshot_manifests_batch never called" |
| 191 | fetched = {sid for call in batch_calls for sid in call} |
| 192 | assert head_snap in fetched, "head snapshot must be in batch" |
| 193 | |
| 194 | |
| 195 | # --------------------------------------------------------------------------- |
| 196 | # get_last_commit_for_file — correctness |
| 197 | # --------------------------------------------------------------------------- |
| 198 | |
| 199 | |
| 200 | @pytest.mark.anyio |
| 201 | async def test_last_commit_returns_commit_that_introduced_current_version( |
| 202 | db_session: AsyncSession, |
| 203 | ) -> None: |
| 204 | """Returns the oldest commit that still has the same object_id as head.""" |
| 205 | repo_id = await _make_repo(db_session) |
| 206 | |
| 207 | # c1: v1 — first version (oldest) |
| 208 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 209 | c1 = await _commit(db_session, repo_id, s1, offset=0, message="init") |
| 210 | |
| 211 | # c2: v1 — same as c1 (file unchanged) |
| 212 | s2 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 213 | c2 = await _commit(db_session, repo_id, s2, offset=10, message="unrelated") |
| 214 | |
| 215 | # c3: v2 — file changed (HEAD) |
| 216 | s3 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 217 | c3 = await _commit(db_session, repo_id, s3, offset=20, message="feat: v2") |
| 218 | |
| 219 | await db_session.commit() |
| 220 | |
| 221 | async with _fresh_session() as rs: |
| 222 | result = await get_last_commit_for_file(rs, repo_id, _FILE, c3) |
| 223 | |
| 224 | # c3 introduced v2 — it's the commit that changed the file |
| 225 | assert result is not None |
| 226 | assert result.commit_id == c3 |
| 227 | |
| 228 | |
| 229 | @pytest.mark.anyio |
| 230 | async def test_last_commit_returns_oldest_unbroken_run( |
| 231 | db_session: AsyncSession, |
| 232 | ) -> None: |
| 233 | """When the file has the same oid across multiple commits, returns the earliest.""" |
| 234 | repo_id = await _make_repo(db_session) |
| 235 | |
| 236 | # c1: v1 |
| 237 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 238 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 239 | |
| 240 | # c2: v2 |
| 241 | s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 242 | c2 = await _commit(db_session, repo_id, s2, offset=10) |
| 243 | |
| 244 | # c3: v2 (same as c2) |
| 245 | s3 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 246 | c3 = await _commit(db_session, repo_id, s3, offset=20) |
| 247 | |
| 248 | # c4: v2 (same — HEAD) |
| 249 | s4 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 250 | c4 = await _commit(db_session, repo_id, s4, offset=30) |
| 251 | |
| 252 | await db_session.commit() |
| 253 | |
| 254 | async with _fresh_session() as rs: |
| 255 | result = await get_last_commit_for_file(rs, repo_id, _FILE, c4) |
| 256 | |
| 257 | # c2 is the oldest commit that has v2 — that's the one that introduced it |
| 258 | assert result is not None |
| 259 | assert result.commit_id == c2 |
| 260 | |
| 261 | |
| 262 | @pytest.mark.anyio |
| 263 | async def test_last_commit_returns_none_when_file_missing_from_head( |
| 264 | db_session: AsyncSession, |
| 265 | ) -> None: |
| 266 | """Returns None when the file doesn't exist in the head snapshot.""" |
| 267 | repo_id = await _make_repo(db_session) |
| 268 | s = await _snap(db_session, repo_id, {_OTHER: _obj("v1")}) |
| 269 | c = await _commit(db_session, repo_id, s) |
| 270 | await db_session.commit() |
| 271 | |
| 272 | async with _fresh_session() as rs: |
| 273 | result = await get_last_commit_for_file(rs, repo_id, _FILE, c) |
| 274 | |
| 275 | assert result is None |
| 276 | |
| 277 | |
| 278 | @pytest.mark.anyio |
| 279 | async def test_last_commit_returns_none_when_commit_not_found( |
| 280 | db_session: AsyncSession, |
| 281 | ) -> None: |
| 282 | """Returns None (or the missing commit itself) for an unknown commit ID.""" |
| 283 | repo_id = await _make_repo(db_session) |
| 284 | await db_session.commit() |
| 285 | |
| 286 | async with _fresh_session() as rs: |
| 287 | result = await get_last_commit_for_file(rs, repo_id, _FILE, _uid()) |
| 288 | |
| 289 | assert result is None |
| 290 | |
| 291 | |
| 292 | # --------------------------------------------------------------------------- |
| 293 | # blob_page parallelism — phases 2/3/4 must not block each other |
| 294 | # --------------------------------------------------------------------------- |
| 295 | |
| 296 | |
| 297 | @pytest.mark.anyio |
| 298 | async def test_blob_page_phases_run_concurrently( |
| 299 | monkeypatch: pytest.MonkeyPatch, |
| 300 | ) -> None: |
| 301 | """Phases 2, 3, and 4 must overlap in time, not run sequentially. |
| 302 | |
| 303 | Each phase is replaced with a 50ms sleep. Sequential execution would take |
| 304 | ≥150ms; concurrent execution takes ~50ms. |
| 305 | """ |
| 306 | import musehub.api.routes.musehub.ui_blob as _blob_mod |
| 307 | |
| 308 | order: list[str] = [] |
| 309 | start_times: dict[str, float] = {} |
| 310 | |
| 311 | async def _phase(name: str, delay: float) -> None: |
| 312 | import time |
| 313 | start_times[name] = time.monotonic() |
| 314 | await asyncio.sleep(delay) |
| 315 | order.append(name) |
| 316 | |
| 317 | async def _fake_symbols(session: AsyncSession, repo_id: str, path: str) -> list[JSONObject]: |
| 318 | await _phase("symbols", 0.05) |
| 319 | return [] |
| 320 | |
| 321 | async def _fake_history( |
| 322 | session: AsyncSession, repo_id: str, path: str, head_cid: str, limit: int = 20 |
| 323 | ) -> list[JSONObject]: |
| 324 | await _phase("history", 0.05) |
| 325 | return [] |
| 326 | |
| 327 | async def _fake_intel(session: AsyncSession, repo_id: str, path: str) -> JSONObject: |
| 328 | await _phase("intel", 0.05) |
| 329 | return { |
| 330 | "is_hotspot": False, "hotspot_count": 0, |
| 331 | "has_dead": False, "dead_count": 0, |
| 332 | "blast_risk": False, "blast_count": 0, |
| 333 | "health_score": 100, "health_label": "Excellent", |
| 334 | } |
| 335 | |
| 336 | monkeypatch.setattr(_blob_mod, "_fetch_file_symbols", _fake_symbols) |
| 337 | monkeypatch.setattr(_blob_mod, "_fetch_file_history", _fake_history) |
| 338 | monkeypatch.setattr(_blob_mod, "_fetch_file_intel", _fake_intel) |
| 339 | |
| 340 | # Run the three phases the way blob_page should after the fix |
| 341 | import time |
| 342 | t0 = time.monotonic() |
| 343 | await asyncio.gather( |
| 344 | _fake_symbols(None, "", ""), # type: ignore[arg-type] |
| 345 | _fake_history(None, "", "", ""), # type: ignore[arg-type] |
| 346 | _fake_intel(None, "", ""), # type: ignore[arg-type] |
| 347 | ) |
| 348 | elapsed = time.monotonic() - t0 |
| 349 | |
| 350 | # Concurrent: ~50ms. Sequential: ~150ms. |
| 351 | assert elapsed < 0.12, ( |
| 352 | f"Phases took {elapsed:.3f}s — expected ~0.05s if concurrent, " |
| 353 | f"got {elapsed:.3f}s suggesting sequential execution" |
| 354 | ) |
| 355 | |
| 356 | # All three must have started before any finished |
| 357 | assert len(start_times) == 3 |
| 358 | earliest_finish = min(start_times.values()) + 0.05 |
| 359 | assert all(t < earliest_finish + 0.01 for t in start_times.values()), ( |
| 360 | "Not all phases started before the first one finished — not truly concurrent" |
| 361 | ) |
File History
1 commit
sha256:8b0fb5814ab41a08af1f212c956bd08fe74190c2818ba5c503848fda6e33e216
chore: bump version to 0.2.0rc12
Sonnet 4.6
patch
8 days ago