test_ltier_fetch_regression.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
23 days ago
| 1 | """TDD — L-tier fetch regression (issue #60). |
| 2 | |
| 3 | wire_fetch_mpack returns mpack_id='' for 1000-commit repos. |
| 4 | WD-3 confirmed _walk_commit_delta works at 1000 commits. |
| 5 | These tests isolate the two remaining unknowns: |
| 6 | |
| 7 | LF-1 wire_refs returns the correct tip commit ID after 1000-commit chain, |
| 8 | AND that ID exists in musehub_commits. |
| 9 | |
| 10 | LF-2 wire_fetch_mpack(want=branch_heads.values(), have=[]) returns a |
| 11 | non-empty mpack for a 1000-commit repo — the exact path the bench uses. |
| 12 | |
| 13 | If LF-1 fails: branch head mismatch — want != stored commit_id. |
| 14 | If LF-2 fails with mpack_id='': _walk_commit_delta returns {} for want from wire_refs. |
| 15 | If both pass: the bug is outside the server (bench wiring, URL, repo slug mismatch). |
| 16 | """ |
| 17 | from __future__ import annotations |
| 18 | |
| 19 | import msgpack |
| 20 | import pytest |
| 21 | from datetime import datetime, timezone |
| 22 | from unittest.mock import AsyncMock |
| 23 | |
| 24 | from sqlalchemy.ext.asyncio import AsyncSession |
| 25 | from sqlalchemy import select |
| 26 | |
| 27 | from muse.core.types import fake_id, blob_id |
| 28 | from musehub.db import musehub_repo_models as db |
| 29 | from musehub.db.musehub_repo_models import MusehubMPackIndex |
| 30 | from tests.factories import create_repo |
| 31 | |
| 32 | type _ByteStore = dict[str, bytes] |
| 33 | |
| 34 | |
| 35 | def _now() -> datetime: |
| 36 | return datetime.now(tz=timezone.utc) |
| 37 | |
| 38 | |
| 39 | def _stub_backend(monkeypatch: pytest.MonkeyPatch) -> _ByteStore: |
| 40 | store: dict[str, bytes] = {} |
| 41 | |
| 42 | async def _put(oid: str, data: bytes, **_: typing.Any) -> str: |
| 43 | store[oid] = data |
| 44 | return f"mem://{oid}" |
| 45 | |
| 46 | async def _get(oid: str) -> bytes | None: |
| 47 | return store.get(oid) |
| 48 | |
| 49 | async def _exists(oid: str, **_: typing.Any) -> bool: |
| 50 | return oid in store |
| 51 | |
| 52 | backend = AsyncMock() |
| 53 | backend.put = _put |
| 54 | backend.get = _get |
| 55 | backend.exists = _exists |
| 56 | backend.supports_presign = False |
| 57 | backend.presign_get = AsyncMock(return_value="http://fake-minio/mpack") |
| 58 | # Phase 1: get_mpack must return None (not an AsyncMock) so the mpack-native |
| 59 | # path falls through to the per-object fallback, which reads from `store`. |
| 60 | backend.get_mpack = AsyncMock(return_value=None) |
| 61 | monkeypatch.setattr("musehub.services.musehub_wire.get_backend", lambda: backend) |
| 62 | return store |
| 63 | |
| 64 | |
| 65 | async def _build_chain_with_objects( |
| 66 | session: AsyncSession, repo_id: str, n: int, store: _ByteStore |
| 67 | ) -> list[str]: |
| 68 | """Insert N commits with one object each. Returns [root_id, ..., tip_id].""" |
| 69 | commit_ids: list[str] = [] |
| 70 | parent: list[str] = [] |
| 71 | for i in range(n): |
| 72 | raw = f"content-{i}".encode() |
| 73 | oid = blob_id(raw) |
| 74 | store[oid] = raw |
| 75 | |
| 76 | snap_id = fake_id(f"snap-lf-{i}") |
| 77 | snap = db.MusehubSnapshot( |
| 78 | snapshot_id=snap_id, |
| 79 | manifest_blob=msgpack.packb({f"file{i}.txt": oid}, use_bin_type=True), |
| 80 | directories=[], |
| 81 | entry_count=1, |
| 82 | created_at=_now(), |
| 83 | ) |
| 84 | session.add(snap) |
| 85 | |
| 86 | # Index the object so wire_fetch_mpack step 3 does not raise FetchNotIndexedError. |
| 87 | mpack_idx = MusehubMPackIndex( |
| 88 | entity_id=oid, |
| 89 | mpack_id=fake_id(f"mpack-lf-{i}"), |
| 90 | entity_type="object", |
| 91 | ) |
| 92 | session.add(mpack_idx) |
| 93 | |
| 94 | cid = fake_id(f"commit-lf-{i}") |
| 95 | commit = db.MusehubCommit( |
| 96 | commit_id=cid, |
| 97 | branch="main", |
| 98 | parent_ids=parent, |
| 99 | message=f"commit {i}", |
| 100 | author="gabriel", |
| 101 | timestamp=_now(), |
| 102 | snapshot_id=snap_id, |
| 103 | ) |
| 104 | session.add(commit) |
| 105 | # MusehubCommitGraph is required for _walk_commit_delta's generation-bounded fast path |
| 106 | graph = db.MusehubCommitGraph( |
| 107 | commit_id=cid, |
| 108 | parent_ids=parent, |
| 109 | generation=i, |
| 110 | snapshot_id=snap_id, |
| 111 | ) |
| 112 | session.add(graph) |
| 113 | commit_ids.append(cid) |
| 114 | parent = [cid] |
| 115 | |
| 116 | tip = commit_ids[-1] |
| 117 | # Set the branch head |
| 118 | branch_row = db.MusehubBranch( |
| 119 | branch_id=fake_id(f"branch-lf-{repo_id}"), |
| 120 | repo_id=repo_id, |
| 121 | name="main", |
| 122 | head_commit_id=tip, |
| 123 | ) |
| 124 | session.add(branch_row) |
| 125 | await session.commit() |
| 126 | return commit_ids |
| 127 | |
| 128 | |
| 129 | # ── LF-1 ────────────────────────────────────────────────────────────────────── |
| 130 | |
| 131 | @pytest.mark.asyncio |
| 132 | async def test_lf1_wire_refs_returns_correct_tip(db_session: AsyncSession) -> None: |
| 133 | """wire_refs must return the tip commit ID and that ID must exist in musehub_commits.""" |
| 134 | from musehub.services.musehub_wire import wire_refs |
| 135 | |
| 136 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 137 | store: _ByteStore = {} |
| 138 | ids = await _build_chain_with_objects(db_session, repo.repo_id, 1_000, store) |
| 139 | tip = ids[-1] |
| 140 | |
| 141 | result = await wire_refs(db_session, repo.repo_id) |
| 142 | |
| 143 | assert result is not None, "wire_refs returned None" |
| 144 | assert "main" in result.branch_heads, f"branch 'main' missing from branch_heads: {result.branch_heads}" |
| 145 | assert result.branch_heads["main"] == tip, ( |
| 146 | f"branch_heads['main'] = {result.branch_heads['main']!r} " |
| 147 | f"does not match stored tip {tip!r}" |
| 148 | ) |
| 149 | |
| 150 | # Confirm that ID actually exists in musehub_commits |
| 151 | row = await db_session.get(db.MusehubCommit, tip) |
| 152 | assert row is not None, ( |
| 153 | f"tip commit {tip!r} exists in MusehubBranch.head_commit_id " |
| 154 | f"but NOT in musehub_commits" |
| 155 | ) |
| 156 | |
| 157 | |
| 158 | # ── LF-2 ────────────────────────────────────────────────────────────────────── |
| 159 | |
| 160 | @pytest.mark.asyncio |
| 161 | async def test_lf2_wire_fetch_mpack_1000_commits( |
| 162 | db_session: AsyncSession, monkeypatch: pytest.MonkeyPatch |
| 163 | ) -> None: |
| 164 | """wire_fetch_mpack with want=branch_heads.values() must return a non-empty mpack. |
| 165 | |
| 166 | This is the exact server-side call the bench makes during clone. |
| 167 | """ |
| 168 | from musehub.services.musehub_wire import wire_refs, wire_fetch_mpack |
| 169 | |
| 170 | store = _stub_backend(monkeypatch) |
| 171 | repo = await create_repo(db_session, owner="gabriel", visibility="public") |
| 172 | await _build_chain_with_objects(db_session, repo.repo_id, 1_000, store) |
| 173 | |
| 174 | refs = await wire_refs(db_session, repo.repo_id) |
| 175 | assert refs is not None |
| 176 | want = list(refs.branch_heads.values()) |
| 177 | |
| 178 | result = await wire_fetch_mpack(db_session, repo.repo_id, want=want, have=[]) |
| 179 | |
| 180 | assert result["mpack_id"] is not None, ( |
| 181 | f"wire_fetch_mpack returned up-to-date (mpack_id=None) for 1000-commit repo. " |
| 182 | f"want={want!r}" |
| 183 | ) |
| 184 | assert result["mpack_id"].startswith("sha256:"), "mpack_id must be sha256-prefixed" |
| 185 | assert result["commit_count"] == 1_000, ( |
| 186 | f"expected 1000 commits in mpack, got {result['commit_count']}" |
| 187 | ) |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
23 days ago