gabriel / musehub public
test_ltier_fetch_regression.py python
187 lines 6.7 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 23 days ago
1 """TDD — L-tier fetch regression (issue #60).
2
3 wire_fetch_mpack returns mpack_id='' for 1000-commit repos.
4 WD-3 confirmed _walk_commit_delta works at 1000 commits.
5 These tests isolate the two remaining unknowns:
6
7 LF-1 wire_refs returns the correct tip commit ID after 1000-commit chain,
8 AND that ID exists in musehub_commits.
9
10 LF-2 wire_fetch_mpack(want=branch_heads.values(), have=[]) returns a
11 non-empty mpack for a 1000-commit repo — the exact path the bench uses.
12
13 If LF-1 fails: branch head mismatch — want != stored commit_id.
14 If LF-2 fails with mpack_id='': _walk_commit_delta returns {} for want from wire_refs.
15 If both pass: the bug is outside the server (bench wiring, URL, repo slug mismatch).
16 """
17 from __future__ import annotations
18
19 import msgpack
20 import pytest
21 from datetime import datetime, timezone
22 from unittest.mock import AsyncMock
23
24 from sqlalchemy.ext.asyncio import AsyncSession
25 from sqlalchemy import select
26
27 from muse.core.types import fake_id, blob_id
28 from musehub.db import musehub_repo_models as db
29 from musehub.db.musehub_repo_models import MusehubMPackIndex
30 from tests.factories import create_repo
31
32 type _ByteStore = dict[str, bytes]
33
34
35 def _now() -> datetime:
36 return datetime.now(tz=timezone.utc)
37
38
39 def _stub_backend(monkeypatch: pytest.MonkeyPatch) -> _ByteStore:
40 store: dict[str, bytes] = {}
41
42 async def _put(oid: str, data: bytes, **_: typing.Any) -> str:
43 store[oid] = data
44 return f"mem://{oid}"
45
46 async def _get(oid: str) -> bytes | None:
47 return store.get(oid)
48
49 async def _exists(oid: str, **_: typing.Any) -> bool:
50 return oid in store
51
52 backend = AsyncMock()
53 backend.put = _put
54 backend.get = _get
55 backend.exists = _exists
56 backend.supports_presign = False
57 backend.presign_get = AsyncMock(return_value="http://fake-minio/mpack")
58 # Phase 1: get_mpack must return None (not an AsyncMock) so the mpack-native
59 # path falls through to the per-object fallback, which reads from `store`.
60 backend.get_mpack = AsyncMock(return_value=None)
61 monkeypatch.setattr("musehub.services.musehub_wire.get_backend", lambda: backend)
62 return store
63
64
65 async def _build_chain_with_objects(
66 session: AsyncSession, repo_id: str, n: int, store: _ByteStore
67 ) -> list[str]:
68 """Insert N commits with one object each. Returns [root_id, ..., tip_id]."""
69 commit_ids: list[str] = []
70 parent: list[str] = []
71 for i in range(n):
72 raw = f"content-{i}".encode()
73 oid = blob_id(raw)
74 store[oid] = raw
75
76 snap_id = fake_id(f"snap-lf-{i}")
77 snap = db.MusehubSnapshot(
78 snapshot_id=snap_id,
79 manifest_blob=msgpack.packb({f"file{i}.txt": oid}, use_bin_type=True),
80 directories=[],
81 entry_count=1,
82 created_at=_now(),
83 )
84 session.add(snap)
85
86 # Index the object so wire_fetch_mpack step 3 does not raise FetchNotIndexedError.
87 mpack_idx = MusehubMPackIndex(
88 entity_id=oid,
89 mpack_id=fake_id(f"mpack-lf-{i}"),
90 entity_type="object",
91 )
92 session.add(mpack_idx)
93
94 cid = fake_id(f"commit-lf-{i}")
95 commit = db.MusehubCommit(
96 commit_id=cid,
97 branch="main",
98 parent_ids=parent,
99 message=f"commit {i}",
100 author="gabriel",
101 timestamp=_now(),
102 snapshot_id=snap_id,
103 )
104 session.add(commit)
105 # MusehubCommitGraph is required for _walk_commit_delta's generation-bounded fast path
106 graph = db.MusehubCommitGraph(
107 commit_id=cid,
108 parent_ids=parent,
109 generation=i,
110 snapshot_id=snap_id,
111 )
112 session.add(graph)
113 commit_ids.append(cid)
114 parent = [cid]
115
116 tip = commit_ids[-1]
117 # Set the branch head
118 branch_row = db.MusehubBranch(
119 branch_id=fake_id(f"branch-lf-{repo_id}"),
120 repo_id=repo_id,
121 name="main",
122 head_commit_id=tip,
123 )
124 session.add(branch_row)
125 await session.commit()
126 return commit_ids
127
128
129 # ── LF-1 ──────────────────────────────────────────────────────────────────────
130
131 @pytest.mark.asyncio
132 async def test_lf1_wire_refs_returns_correct_tip(db_session: AsyncSession) -> None:
133 """wire_refs must return the tip commit ID and that ID must exist in musehub_commits."""
134 from musehub.services.musehub_wire import wire_refs
135
136 repo = await create_repo(db_session, owner="gabriel", visibility="public")
137 store: _ByteStore = {}
138 ids = await _build_chain_with_objects(db_session, repo.repo_id, 1_000, store)
139 tip = ids[-1]
140
141 result = await wire_refs(db_session, repo.repo_id)
142
143 assert result is not None, "wire_refs returned None"
144 assert "main" in result.branch_heads, f"branch 'main' missing from branch_heads: {result.branch_heads}"
145 assert result.branch_heads["main"] == tip, (
146 f"branch_heads['main'] = {result.branch_heads['main']!r} "
147 f"does not match stored tip {tip!r}"
148 )
149
150 # Confirm that ID actually exists in musehub_commits
151 row = await db_session.get(db.MusehubCommit, tip)
152 assert row is not None, (
153 f"tip commit {tip!r} exists in MusehubBranch.head_commit_id "
154 f"but NOT in musehub_commits"
155 )
156
157
158 # ── LF-2 ──────────────────────────────────────────────────────────────────────
159
160 @pytest.mark.asyncio
161 async def test_lf2_wire_fetch_mpack_1000_commits(
162 db_session: AsyncSession, monkeypatch: pytest.MonkeyPatch
163 ) -> None:
164 """wire_fetch_mpack with want=branch_heads.values() must return a non-empty mpack.
165
166 This is the exact server-side call the bench makes during clone.
167 """
168 from musehub.services.musehub_wire import wire_refs, wire_fetch_mpack
169
170 store = _stub_backend(monkeypatch)
171 repo = await create_repo(db_session, owner="gabriel", visibility="public")
172 await _build_chain_with_objects(db_session, repo.repo_id, 1_000, store)
173
174 refs = await wire_refs(db_session, repo.repo_id)
175 assert refs is not None
176 want = list(refs.branch_heads.values())
177
178 result = await wire_fetch_mpack(db_session, repo.repo_id, want=want, have=[])
179
180 assert result["mpack_id"] is not None, (
181 f"wire_fetch_mpack returned up-to-date (mpack_id=None) for 1000-commit repo. "
182 f"want={want!r}"
183 )
184 assert result["mpack_id"].startswith("sha256:"), "mpack_id must be sha256-prefixed"
185 assert result["commit_count"] == 1_000, (
186 f"expected 1000 commits in mpack, got {result['commit_count']}"
187 )
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 23 days ago