"""TDD — L-tier fetch regression (issue #60). wire_fetch_mpack returns mpack_id='' for 1000-commit repos. WD-3 confirmed _walk_commit_delta works at 1000 commits. These tests isolate the two remaining unknowns: LF-1 wire_refs returns the correct tip commit ID after 1000-commit chain, AND that ID exists in musehub_commits. LF-2 wire_fetch_mpack(want=branch_heads.values(), have=[]) returns a non-empty mpack for a 1000-commit repo — the exact path the bench uses. If LF-1 fails: branch head mismatch — want != stored commit_id. If LF-2 fails with mpack_id='': _walk_commit_delta returns {} for want from wire_refs. If both pass: the bug is outside the server (bench wiring, URL, repo slug mismatch). """ from __future__ import annotations import msgpack import pytest from datetime import datetime, timezone from unittest.mock import AsyncMock from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from muse.core.types import fake_id, blob_id from musehub.db import musehub_repo_models as db from musehub.db.musehub_repo_models import MusehubMPackIndex from tests.factories import create_repo type _ByteStore = dict[str, bytes] def _now() -> datetime: return datetime.now(tz=timezone.utc) def _stub_backend(monkeypatch: pytest.MonkeyPatch) -> _ByteStore: store: dict[str, bytes] = {} async def _put(oid: str, data: bytes, **_: typing.Any) -> str: store[oid] = data return f"mem://{oid}" async def _get(oid: str) -> bytes | None: return store.get(oid) async def _exists(oid: str, **_: typing.Any) -> bool: return oid in store backend = AsyncMock() backend.put = _put backend.get = _get backend.exists = _exists backend.supports_presign = False backend.presign_get = AsyncMock(return_value="http://fake-minio/mpack") # Phase 1: get_mpack must return None (not an AsyncMock) so the mpack-native # path falls through to the per-object fallback, which reads from `store`. backend.get_mpack = AsyncMock(return_value=None) monkeypatch.setattr("musehub.services.musehub_wire.get_backend", lambda: backend) return store async def _build_chain_with_objects( session: AsyncSession, repo_id: str, n: int, store: _ByteStore ) -> list[str]: """Insert N commits with one object each. Returns [root_id, ..., tip_id].""" commit_ids: list[str] = [] parent: list[str] = [] for i in range(n): raw = f"content-{i}".encode() oid = blob_id(raw) store[oid] = raw snap_id = fake_id(f"snap-lf-{i}") snap = db.MusehubSnapshot( snapshot_id=snap_id, manifest_blob=msgpack.packb({f"file{i}.txt": oid}, use_bin_type=True), directories=[], entry_count=1, created_at=_now(), ) session.add(snap) # Index the object so wire_fetch_mpack step 3 does not raise FetchNotIndexedError. mpack_idx = MusehubMPackIndex( entity_id=oid, mpack_id=fake_id(f"mpack-lf-{i}"), entity_type="object", ) session.add(mpack_idx) cid = fake_id(f"commit-lf-{i}") commit = db.MusehubCommit( commit_id=cid, branch="main", parent_ids=parent, message=f"commit {i}", author="gabriel", timestamp=_now(), snapshot_id=snap_id, ) session.add(commit) # MusehubCommitGraph is required for _walk_commit_delta's generation-bounded fast path graph = db.MusehubCommitGraph( commit_id=cid, parent_ids=parent, generation=i, snapshot_id=snap_id, ) session.add(graph) commit_ids.append(cid) parent = [cid] tip = commit_ids[-1] # Set the branch head branch_row = db.MusehubBranch( branch_id=fake_id(f"branch-lf-{repo_id}"), repo_id=repo_id, name="main", head_commit_id=tip, ) session.add(branch_row) await session.commit() return commit_ids # ── LF-1 ────────────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_lf1_wire_refs_returns_correct_tip(db_session: AsyncSession) -> None: """wire_refs must return the tip commit ID and that ID must exist in musehub_commits.""" from musehub.services.musehub_wire import wire_refs repo = await create_repo(db_session, owner="gabriel", visibility="public") store: _ByteStore = {} ids = await _build_chain_with_objects(db_session, repo.repo_id, 1_000, store) tip = ids[-1] result = await wire_refs(db_session, repo.repo_id) assert result is not None, "wire_refs returned None" assert "main" in result.branch_heads, f"branch 'main' missing from branch_heads: {result.branch_heads}" assert result.branch_heads["main"] == tip, ( f"branch_heads['main'] = {result.branch_heads['main']!r} " f"does not match stored tip {tip!r}" ) # Confirm that ID actually exists in musehub_commits row = await db_session.get(db.MusehubCommit, tip) assert row is not None, ( f"tip commit {tip!r} exists in MusehubBranch.head_commit_id " f"but NOT in musehub_commits" ) # ── LF-2 ────────────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_lf2_wire_fetch_mpack_1000_commits( db_session: AsyncSession, monkeypatch: pytest.MonkeyPatch ) -> None: """wire_fetch_mpack with want=branch_heads.values() must return a non-empty mpack. This is the exact server-side call the bench makes during clone. """ from musehub.services.musehub_wire import wire_refs, wire_fetch_mpack store = _stub_backend(monkeypatch) repo = await create_repo(db_session, owner="gabriel", visibility="public") await _build_chain_with_objects(db_session, repo.repo_id, 1_000, store) refs = await wire_refs(db_session, repo.repo_id) assert refs is not None want = list(refs.branch_heads.values()) result = await wire_fetch_mpack(db_session, repo.repo_id, want=want, have=[]) assert result["mpack_id"] is not None, ( f"wire_fetch_mpack returned up-to-date (mpack_id=None) for 1000-commit repo. " f"want={want!r}" ) assert result["mpack_id"].startswith("sha256:"), "mpack_id must be sha256-prefixed" assert result["commit_count"] == 1_000, ( f"expected 1000 commits in mpack, got {result['commit_count']}" )