gabriel / musehub public
test_object_refs_on_repush.py python
262 lines 9.9 KB
Raw
sha256:0d5e0b2da626326b94809c9538e015ad168ec3f7b9333b7f25d5d5eaf411729f add tests for fixing object refs on repush Human 2 days ago
1 """Phase 1 — Failing tests for Bug A and Bug B (issue #76).
2
3 Bug A: object_refs only written for globally-new objects.
4 When the same objects are pushed to a second repo, _new_oids is empty
5 and _upsert_object_refs is never called for that repo.
6
7 Bug B: mpack_index byte-range entries have the same filter — pre-existing
8 objects get no inline byte_offset/byte_length row for the second repo.
9
10 These tests must be RED before any production fix lands. They become the
11 regression guard once Bug A and Bug B are fixed.
12
13 Tests
14 -----
15 ORP-1 First push to repo A writes object_refs for repo A.
16 ORP-2 Second push of the SAME mpack to repo B writes object_refs for repo B.
17 ORP-3 First push to repo A writes mpack_index byte ranges for the objects.
18 ORP-4 Second push of the SAME mpack to repo B writes mpack_index byte ranges.
19 ORP-5 object_refs count is correct for each repo independently (no cross-contamination).
20 """
21 from __future__ import annotations
22
23 import hashlib
24 import os
25 from unittest.mock import AsyncMock, MagicMock, patch
26
27 import pytest
28 import pytest_asyncio
29 from sqlalchemy import select, func
30 from sqlalchemy.ext.asyncio import AsyncSession
31
32 from muse.core.mpack import build_wire_mpack
33 from muse.core.types import blob_id, fake_id
34 from musehub.core.genesis import compute_identity_id, compute_repo_id
35 from musehub.db.musehub_repo_models import MusehubMPackIndex, MusehubObjectRef, MusehubRepo
36 from musehub.services.musehub_repository import create_repo
37
38 # ---------------------------------------------------------------------------
39 # Test fixtures and shared mpack
40 # ---------------------------------------------------------------------------
41
42 _OWNER = "gabriel"
43 _IDENTITY_ID = compute_identity_id(b"gabriel")
44
45 # Three distinct blobs with real content — deterministic IDs.
46 _BLOB_A_CONTENT = b"blob-alpha-content-for-repush-test"
47 _BLOB_B_CONTENT = b"blob-beta-content-for-repush-test"
48 _BLOB_C_CONTENT = b"blob-gamma-content-for-repush-test"
49
50 def _sha256_id(data: bytes) -> str:
51 return f"sha256:{hashlib.sha256(data).hexdigest()}"
52
53 _BLOB_A_OID = _sha256_id(_BLOB_A_CONTENT)
54 _BLOB_B_OID = _sha256_id(_BLOB_B_CONTENT)
55 _BLOB_C_OID = _sha256_id(_BLOB_C_CONTENT)
56
57 _BLOB_OIDS = [_BLOB_A_OID, _BLOB_B_OID, _BLOB_C_OID]
58
59 # One mpack containing all three blobs — reused across both repo pushes.
60 _SHARED_MPACK_BYTES = build_wire_mpack({
61 "blobs": [
62 {"object_id": _BLOB_A_OID, "content": _BLOB_A_CONTENT},
63 {"object_id": _BLOB_B_OID, "content": _BLOB_B_CONTENT},
64 {"object_id": _BLOB_C_OID, "content": _BLOB_C_CONTENT},
65 ],
66 "commits": [],
67 "snapshots": [],
68 "tags": [],
69 })
70 _SHARED_MPACK_KEY = blob_id(_SHARED_MPACK_BYTES)
71
72
73 async def _make_repo(session: AsyncSession, name: str) -> MusehubRepo:
74 r = await create_repo(
75 session,
76 name=name,
77 owner=_OWNER,
78 owner_user_id=_IDENTITY_ID,
79 visibility="public",
80 initialize=False,
81 )
82 await session.commit()
83 return r
84
85
86 def _mock_backend(mpack_bytes: bytes) -> MagicMock:
87 backend = MagicMock()
88 backend.get_mpack = AsyncMock(return_value=mpack_bytes)
89 return backend
90
91
92 async def _push(session: AsyncSession, repo_id: str, mpack_bytes: bytes, mpack_key: str) -> dict:
93 from musehub.services.musehub_wire_push import wire_push_unpack_mpack
94
95 backend = _mock_backend(mpack_bytes)
96 patches = [
97 patch("musehub.services.musehub_wire.get_backend", return_value=backend),
98 patch("musehub.services.musehub_wire_push.get_backend", return_value=backend),
99 patch("musehub.storage.backends.get_backend", return_value=backend),
100 ]
101 with patches[0], patches[1], patches[2]:
102 result = await wire_push_unpack_mpack(
103 session,
104 repo_id,
105 mpack_key,
106 pusher_id=_OWNER,
107 branch="main",
108 head_commit_id=fake_id("head"),
109 commits_count=0,
110 blobs_count=3,
111 )
112 return result
113
114
115 async def _object_ref_count(session: AsyncSession, repo_id: str) -> int:
116 row = (await session.execute(
117 select(func.count()).where(MusehubObjectRef.repo_id == repo_id)
118 )).scalar_one()
119 return row
120
121
122 async def _mpack_index_with_byte_range_count(session: AsyncSession, mpack_id: str) -> int:
123 row = (await session.execute(
124 select(func.count()).where(
125 MusehubMPackIndex.mpack_id == mpack_id,
126 MusehubMPackIndex.entity_type == "object",
127 MusehubMPackIndex.byte_offset.is_not(None),
128 )
129 )).scalar_one()
130 return row
131
132
133 # ---------------------------------------------------------------------------
134 # ORP-1 First push to repo A writes object_refs
135 # ---------------------------------------------------------------------------
136
137 @pytest.mark.asyncio
138 async def test_ORP1_first_push_writes_object_refs_for_repo_a(
139 db_session: AsyncSession,
140 ) -> None:
141 """Baseline: pushing new objects to repo A writes object_refs for repo A."""
142 repo_a = await _make_repo(db_session, "orp-repo-a-1")
143
144 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
145
146 count = await _object_ref_count(db_session, repo_a.repo_id)
147 assert count == 3, (
148 f"Expected 3 object_refs for repo A after first push, got {count}"
149 )
150
151
152 # ---------------------------------------------------------------------------
153 # ORP-2 Second push of the SAME mpack to repo B writes object_refs for repo B
154 # — Bug A: this FAILS before the fix
155 # ---------------------------------------------------------------------------
156
157 @pytest.mark.asyncio
158 async def test_ORP2_repush_same_mpack_writes_object_refs_for_repo_b(
159 db_session: AsyncSession,
160 ) -> None:
161 """Bug A: pushing the same objects to a second repo must write object_refs.
162
163 Currently wire_push_unpack_mpack filters to _new_oids (globally-new only).
164 After repo A's push the blobs exist in musehub_objects, so _new_oids is
165 empty for repo B's push and _upsert_object_refs is never called.
166 """
167 repo_a = await _make_repo(db_session, "orp-repo-a-2")
168 repo_b = await _make_repo(db_session, "orp-repo-b-2")
169
170 # Push to repo A first — objects land in musehub_objects as globally new.
171 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
172
173 # Push the identical mpack to repo B — objects are pre-existing.
174 await _push(db_session, repo_b.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
175
176 count = await _object_ref_count(db_session, repo_b.repo_id)
177 assert count == 3, (
178 f"Bug A: expected 3 object_refs for repo B after repush of same mpack, got {count}. "
179 f"_upsert_object_refs must be called with _cc_oids, not _new_oids."
180 )
181
182
183 # ---------------------------------------------------------------------------
184 # ORP-3 First push to repo A writes mpack_index byte ranges
185 # ---------------------------------------------------------------------------
186
187 @pytest.mark.asyncio
188 async def test_ORP3_first_push_writes_mpack_index_byte_ranges(
189 db_session: AsyncSession,
190 ) -> None:
191 """Baseline: first push writes byte_offset/byte_length for all blobs."""
192 repo_a = await _make_repo(db_session, "orp-repo-a-3")
193
194 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
195
196 count = await _mpack_index_with_byte_range_count(db_session, _SHARED_MPACK_KEY)
197 assert count == 3, (
198 f"Expected 3 mpack_index rows with byte ranges after first push, got {count}"
199 )
200
201
202 # ---------------------------------------------------------------------------
203 # ORP-4 Second push of the SAME mpack to repo B writes mpack_index byte ranges
204 # — Bug B: this FAILS before the fix
205 # ---------------------------------------------------------------------------
206
207 @pytest.mark.asyncio
208 async def test_ORP4_repush_same_mpack_writes_mpack_index_byte_ranges(
209 db_session: AsyncSession,
210 ) -> None:
211 """Bug B: byte-range entries must be written regardless of object novelty.
212
213 Currently _new_oids_for_idx uses the same globally-new filter. After repo
214 A's push the objects exist, so repo B's push inserts zero mpack_index rows.
215 """
216 repo_a = await _make_repo(db_session, "orp-repo-a-4")
217 repo_b = await _make_repo(db_session, "orp-repo-b-4")
218
219 # Push to repo A — objects and byte ranges written for the first time.
220 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
221
222 # Wipe the mpack_index rows to simulate a clean repush scenario.
223 await db_session.execute(
224 MusehubMPackIndex.__table__.delete().where(
225 MusehubMPackIndex.mpack_id == _SHARED_MPACK_KEY
226 )
227 )
228 await db_session.flush()
229
230 # Push the identical mpack to repo B — must still write byte ranges.
231 await _push(db_session, repo_b.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
232
233 count = await _mpack_index_with_byte_range_count(db_session, _SHARED_MPACK_KEY)
234 assert count == 3, (
235 f"Bug B: expected 3 mpack_index byte-range rows after repush, got {count}. "
236 f"Inline byte-range insert must cover _cc_oids not _new_oids."
237 )
238
239
240 # ---------------------------------------------------------------------------
241 # ORP-5 object_refs are per-repo — no cross-contamination
242 # ---------------------------------------------------------------------------
243
244 @pytest.mark.asyncio
245 async def test_ORP5_object_refs_are_scoped_per_repo(
246 db_session: AsyncSession,
247 ) -> None:
248 """object_refs rows must be keyed per repo_id, not shared globally.
249
250 Both repos must have their own 3 rows — not one set of 3 shared between them.
251 """
252 repo_a = await _make_repo(db_session, "orp-repo-a-5")
253 repo_b = await _make_repo(db_session, "orp-repo-b-5")
254
255 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
256 await _push(db_session, repo_b.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
257
258 count_a = await _object_ref_count(db_session, repo_a.repo_id)
259 count_b = await _object_ref_count(db_session, repo_b.repo_id)
260
261 assert count_a == 3, f"repo A should have 3 object_refs, got {count_a}"
262 assert count_b == 3, f"repo B should have 3 object_refs, got {count_b}"
File History 1 commit
sha256:0d5e0b2da626326b94809c9538e015ad168ec3f7b9333b7f25d5d5eaf411729f add tests for fixing object refs on repush Human 2 days ago