test_last_commit_index_lookup.py
python
sha256:5601f81903b6c70ddd11bd88a5a257ee6dfd38aa3b85b19746c100c030657f1e
chore: update smoke_muse.sh comment to reference rc9
Sonnet 4.6
minor
⚠ breaking
21 days ago
| 1 | """TDD tests for get_last_commit_for_file index-lookup optimization. |
| 2 | |
| 3 | Problem: get_last_commit_for_file scans up to 200 snapshot manifests to find |
| 4 | which commit last touched a file — O(N) per page load. |
| 5 | |
| 6 | Fix: query musehub_symbol_history_entries WHERE repo_id=? AND address=? |
| 7 | (or address LIKE 'path::%') ordered by committed_at DESC LIMIT 1. The index |
| 8 | ix_symbol_history_repo_address makes this O(1). Fall back to the snapshot |
| 9 | scan only when no history entries exist for the file. |
| 10 | |
| 11 | Test matrix |
| 12 | ----------- |
| 13 | test_get_last_commit_skips_snapshot_scan_when_history_exists |
| 14 | When symbol_history_entries has entries for the file, no snapshot manifests |
| 15 | must be fetched at all. |
| 16 | |
| 17 | test_get_last_commit_returns_most_recent_history_entry |
| 18 | Returns the commit whose committed_at is latest among file-level entries. |
| 19 | |
| 20 | test_get_last_commit_falls_back_to_snapshot_scan_when_no_history |
| 21 | When no history entries exist, falls back to snapshot scan (existing |
| 22 | behaviour preserved). |
| 23 | |
| 24 | test_get_last_commit_matches_symbol_entries_for_file |
| 25 | Symbol-level addresses (path::Symbol) for the same file also count — |
| 26 | if the file path appears as a prefix, the most recent matching entry |
| 27 | is used. |
| 28 | |
| 29 | test_get_last_commit_ignores_entries_for_other_files |
| 30 | Entries for a different file path don't pollute the result. |
| 31 | """ |
| 32 | from __future__ import annotations |
| 33 | |
| 34 | import secrets |
| 35 | from contextlib import asynccontextmanager |
| 36 | from datetime import datetime, timezone, timedelta |
| 37 | from typing import AsyncGenerator |
| 38 | from unittest.mock import AsyncMock, patch |
| 39 | |
| 40 | import msgpack |
| 41 | import pytest |
| 42 | from sqlalchemy.ext.asyncio import AsyncSession |
| 43 | |
| 44 | from musehub.core.genesis import compute_identity_id, compute_repo_id |
| 45 | from musehub.db import database as _database |
| 46 | from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry |
| 47 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef |
| 48 | from musehub.services.musehub_repository import get_last_commit_for_file |
| 49 | from musehub.types.json_types import StrDict |
| 50 | from muse.core.types import long_id, now_utc_iso |
| 51 | |
| 52 | ManifestBatch = dict[str, StrDict] |
| 53 | |
| 54 | # ── Constants ───────────────────────────────────────────────────────────────── |
| 55 | |
| 56 | _OWNER_ID = compute_identity_id(b"lcf-index-tester") |
| 57 | _FILE = "musehub/core/billing.py" |
| 58 | _OTHER = "musehub/core/auth.py" |
| 59 | |
| 60 | |
| 61 | # ── Helpers ─────────────────────────────────────────────────────────────────── |
| 62 | |
| 63 | def _uid() -> str: |
| 64 | return long_id(secrets.token_hex(32)) |
| 65 | |
| 66 | |
| 67 | def _repo_id() -> str: |
| 68 | return compute_repo_id( |
| 69 | _OWNER_ID, f"lcf-idx-{secrets.token_hex(4)}", "code", now_utc_iso(), |
| 70 | ) |
| 71 | |
| 72 | |
| 73 | def _snap_id() -> str: |
| 74 | return long_id(secrets.token_hex(32)) |
| 75 | |
| 76 | |
| 77 | def _obj(tag: str) -> str: |
| 78 | return long_id(tag.encode().hex().ljust(64, "0")) |
| 79 | |
| 80 | |
| 81 | def _blob(manifest: StrDict) -> bytes: |
| 82 | return msgpack.packb(manifest, use_bin_type=True) |
| 83 | |
| 84 | |
| 85 | async def _make_repo(session: AsyncSession) -> str: |
| 86 | rid = _repo_id() |
| 87 | now = datetime.now(tz=timezone.utc) |
| 88 | session.add(MusehubRepo( |
| 89 | repo_id=rid, name="lcf-idx-test", owner="lcf-idx-tester", |
| 90 | slug="lcf-idx-test", visibility="public", owner_user_id=_OWNER_ID, |
| 91 | created_at=now, updated_at=now, |
| 92 | )) |
| 93 | await session.commit() |
| 94 | return rid |
| 95 | |
| 96 | |
| 97 | async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str: |
| 98 | sid = _snap_id() |
| 99 | session.add(MusehubSnapshot( |
| 100 | snapshot_id=sid, directories=[], |
| 101 | manifest_blob=_blob(manifest), entry_count=len(manifest), |
| 102 | )) |
| 103 | session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid)) |
| 104 | await session.flush() |
| 105 | return sid |
| 106 | |
| 107 | |
| 108 | async def _commit( |
| 109 | session: AsyncSession, |
| 110 | repo_id: str, |
| 111 | snapshot_id: str, |
| 112 | branch: str = "main", |
| 113 | offset: int = 0, |
| 114 | message: str = "feat: change", |
| 115 | ) -> str: |
| 116 | cid = _uid() |
| 117 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) |
| 118 | session.add(MusehubCommit( |
| 119 | commit_id=cid, branch=branch, parent_ids=[], |
| 120 | message=message, author="tester", timestamp=now, |
| 121 | snapshot_id=snapshot_id, |
| 122 | )) |
| 123 | session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) |
| 124 | await session.flush() |
| 125 | return cid |
| 126 | |
| 127 | |
| 128 | async def _history_entry( |
| 129 | session: AsyncSession, |
| 130 | repo_id: str, |
| 131 | commit_id: str, |
| 132 | address: str, |
| 133 | offset: int = 0, |
| 134 | op: str = "modify", |
| 135 | message: str = "feat: change", |
| 136 | ) -> None: |
| 137 | """Insert a MusehubSymbolHistoryEntry for the given address.""" |
| 138 | now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset) |
| 139 | session.add(MusehubSymbolHistoryEntry( |
| 140 | repo_id=repo_id, |
| 141 | address=address, |
| 142 | commit_id=commit_id, |
| 143 | committed_at=now, |
| 144 | author="tester", |
| 145 | op=op, |
| 146 | message=message, |
| 147 | )) |
| 148 | await session.flush() |
| 149 | |
| 150 | |
| 151 | @asynccontextmanager |
| 152 | async def _fresh_session() -> AsyncGenerator[AsyncSession, None]: |
| 153 | async with _database._async_session_factory() as session: |
| 154 | yield session |
| 155 | |
| 156 | |
| 157 | # ── Tests ───────────────────────────────────────────────────────────────────── |
| 158 | |
| 159 | |
| 160 | @pytest.mark.anyio |
| 161 | async def test_get_last_commit_skips_snapshot_scan_when_history_exists( |
| 162 | db_session: AsyncSession, |
| 163 | ) -> None: |
| 164 | """No snapshot manifests fetched when symbol_history_entries has file entries.""" |
| 165 | repo_id = await _make_repo(db_session) |
| 166 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 167 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 168 | await _history_entry(db_session, repo_id, c1, _FILE, offset=0) |
| 169 | await db_session.commit() |
| 170 | |
| 171 | batch_calls: list[list[str]] = [] |
| 172 | |
| 173 | async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch: |
| 174 | batch_calls.append(ids) |
| 175 | return {} |
| 176 | |
| 177 | with patch( |
| 178 | "musehub.services.musehub_repository.get_snapshot_manifests_batch", |
| 179 | side_effect=_spy_batch, |
| 180 | ): |
| 181 | async with _fresh_session() as rs: |
| 182 | await get_last_commit_for_file(rs, repo_id, _FILE, c1) |
| 183 | |
| 184 | assert batch_calls == [], ( |
| 185 | f"get_snapshot_manifests_batch called {len(batch_calls)} time(s) " |
| 186 | "even though symbol_history_entries has entries for the file" |
| 187 | ) |
| 188 | |
| 189 | |
| 190 | @pytest.mark.anyio |
| 191 | async def test_get_last_commit_returns_most_recent_history_entry( |
| 192 | db_session: AsyncSession, |
| 193 | ) -> None: |
| 194 | """Returns the commit with the latest committed_at among file-level entries.""" |
| 195 | repo_id = await _make_repo(db_session) |
| 196 | |
| 197 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 198 | c1 = await _commit(db_session, repo_id, s1, offset=0, message="init") |
| 199 | |
| 200 | s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 201 | c2 = await _commit(db_session, repo_id, s2, offset=10, message="feat: v2") |
| 202 | |
| 203 | s3 = await _snap(db_session, repo_id, {_FILE: _obj("v3")}) |
| 204 | c3 = await _commit(db_session, repo_id, s3, offset=20, message="feat: v3") |
| 205 | |
| 206 | await _history_entry(db_session, repo_id, c1, _FILE, offset=0) |
| 207 | await _history_entry(db_session, repo_id, c2, _FILE, offset=10) |
| 208 | await _history_entry(db_session, repo_id, c3, _FILE, offset=20) |
| 209 | await db_session.commit() |
| 210 | |
| 211 | async with _fresh_session() as rs: |
| 212 | result = await get_last_commit_for_file(rs, repo_id, _FILE, c3) |
| 213 | |
| 214 | assert result is not None |
| 215 | assert result.commit_id == c3, ( |
| 216 | f"Expected most recent commit {c3}, got {result.commit_id}" |
| 217 | ) |
| 218 | |
| 219 | |
| 220 | @pytest.mark.anyio |
| 221 | async def test_get_last_commit_falls_back_to_snapshot_scan_when_no_history( |
| 222 | db_session: AsyncSession, |
| 223 | ) -> None: |
| 224 | """Falls back to snapshot scan when no history entries exist for the file.""" |
| 225 | repo_id = await _make_repo(db_session) |
| 226 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 227 | c1 = await _commit(db_session, repo_id, s1, offset=0, message="init") |
| 228 | # No history entries seeded — fallback path must be taken. |
| 229 | await db_session.commit() |
| 230 | |
| 231 | async with _fresh_session() as rs: |
| 232 | result = await get_last_commit_for_file(rs, repo_id, _FILE, c1) |
| 233 | |
| 234 | assert result is not None |
| 235 | assert result.commit_id == c1 |
| 236 | |
| 237 | |
| 238 | @pytest.mark.anyio |
| 239 | async def test_get_last_commit_matches_symbol_entries_for_file( |
| 240 | db_session: AsyncSession, |
| 241 | ) -> None: |
| 242 | """Symbol-level addresses (path::Symbol) for the file also trigger index path.""" |
| 243 | repo_id = await _make_repo(db_session) |
| 244 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")}) |
| 245 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 246 | s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")}) |
| 247 | c2 = await _commit(db_session, repo_id, s2, offset=10, message="feat: update fn") |
| 248 | |
| 249 | # Only a symbol-level entry, no bare file entry. |
| 250 | await _history_entry(db_session, repo_id, c2, f"{_FILE}::compute_total", offset=10) |
| 251 | await db_session.commit() |
| 252 | |
| 253 | batch_calls: list[list[str]] = [] |
| 254 | |
| 255 | async def _spy_batch(session: AsyncSession, ids: list[str]) -> ManifestBatch: |
| 256 | batch_calls.append(ids) |
| 257 | return {} |
| 258 | |
| 259 | with patch( |
| 260 | "musehub.services.musehub_repository.get_snapshot_manifests_batch", |
| 261 | side_effect=_spy_batch, |
| 262 | ): |
| 263 | async with _fresh_session() as rs: |
| 264 | result = await get_last_commit_for_file(rs, repo_id, _FILE, c2) |
| 265 | |
| 266 | assert batch_calls == [], "snapshot scan triggered despite symbol history entries" |
| 267 | assert result is not None |
| 268 | assert result.commit_id == c2 |
| 269 | |
| 270 | |
| 271 | @pytest.mark.anyio |
| 272 | async def test_get_last_commit_ignores_entries_for_other_files( |
| 273 | db_session: AsyncSession, |
| 274 | ) -> None: |
| 275 | """History entries for a different file don't affect the result.""" |
| 276 | repo_id = await _make_repo(db_session) |
| 277 | |
| 278 | s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1"), _OTHER: _obj("o1")}) |
| 279 | c1 = await _commit(db_session, repo_id, s1, offset=0) |
| 280 | |
| 281 | s2 = await _snap(db_session, repo_id, {_FILE: _obj("v1"), _OTHER: _obj("o2")}) |
| 282 | c2 = await _commit(db_session, repo_id, s2, offset=10) |
| 283 | |
| 284 | # Only _OTHER has history entries; _FILE has none. |
| 285 | await _history_entry(db_session, repo_id, c2, _OTHER, offset=10) |
| 286 | await db_session.commit() |
| 287 | |
| 288 | # The index path should NOT be taken for _FILE — must fall back to snapshot scan. |
| 289 | async with _fresh_session() as rs: |
| 290 | result = await get_last_commit_for_file(rs, repo_id, _FILE, c2) |
| 291 | |
| 292 | # File exists in both snapshots with same oid → c1 introduced it |
| 293 | assert result is not None |
| 294 | assert result.commit_id == c1 |
File History
2 commits
sha256:5601f81903b6c70ddd11bd88a5a257ee6dfd38aa3b85b19746c100c030657f1e
chore: update smoke_muse.sh comment to reference rc9
Sonnet 4.6
minor
⚠
21 days ago
sha256:39e9c4e6f2134da0732e6983268a218178973936f8d7ca03c91f2b5ad42133c8
fix: use read_object_bytes in blob viewer; add zstd magic d…
Sonnet 4.6
patch
21 days ago