gabriel / musehub public
test_object_refs_on_repush.py python
263 lines 10.0 KB
Raw
sha256:0d5e0b2da626326b94809c9538e015ad168ec3f7b9333b7f25d5d5eaf411729f add tests for fixing object refs on repush Human 9 days ago
1 """Phase 1 — Failing tests for Bug A and Bug B (issue #76).
2
3 Bug A: object_refs only written for globally-new objects.
4 When the same objects are pushed to a second repo, _new_oids is empty
5 and _upsert_object_refs is never called for that repo.
6
7 Bug B: mpack_index byte-range entries have the same filter — pre-existing
8 objects get no inline byte_offset/byte_length row for the second repo.
9
10 These tests must be RED before any production fix lands. They become the
11 regression guard once Bug A and Bug B are fixed.
12
13 Tests
14 -----
15 ORP-1 First push to repo A writes object_refs for repo A.
16 ORP-2 Second push of the SAME mpack to repo B writes object_refs for repo B.
17 ORP-3 First push to repo A writes mpack_index byte ranges for the objects.
18 ORP-4 Second push of the SAME mpack to repo B writes mpack_index byte ranges.
19 ORP-5 object_refs count is correct for each repo independently (no cross-contamination).
20 """
21 from __future__ import annotations
22
23 import hashlib
24 import os
25 from unittest.mock import AsyncMock, MagicMock, patch
26
27 import pytest
28 import pytest_asyncio
29 from sqlalchemy import select, func
30 from sqlalchemy.ext.asyncio import AsyncSession
31
32 from muse.core.mpack import build_wire_mpack
33 from muse.core.types import blob_id, fake_id
34 from musehub.core.genesis import compute_identity_id, compute_repo_id
35 from musehub.db.musehub_repo_models import MusehubMPackIndex, MusehubObjectRef, MusehubRepo
36 from musehub.services.musehub_repository import create_repo
37 from musehub.types.json_types import JSONObject
38
39 # ---------------------------------------------------------------------------
40 # Test fixtures and shared mpack
41 # ---------------------------------------------------------------------------
42
43 _OWNER = "gabriel"
44 _IDENTITY_ID = compute_identity_id(b"gabriel")
45
46 # Three distinct blobs with real content — deterministic IDs.
47 _BLOB_A_CONTENT = b"blob-alpha-content-for-repush-test"
48 _BLOB_B_CONTENT = b"blob-beta-content-for-repush-test"
49 _BLOB_C_CONTENT = b"blob-gamma-content-for-repush-test"
50
51 def _sha256_id(data: bytes) -> str:
52 return f"sha256:{hashlib.sha256(data).hexdigest()}"
53
54 _BLOB_A_OID = _sha256_id(_BLOB_A_CONTENT)
55 _BLOB_B_OID = _sha256_id(_BLOB_B_CONTENT)
56 _BLOB_C_OID = _sha256_id(_BLOB_C_CONTENT)
57
58 _BLOB_OIDS = [_BLOB_A_OID, _BLOB_B_OID, _BLOB_C_OID]
59
60 # One mpack containing all three blobs — reused across both repo pushes.
61 _SHARED_MPACK_BYTES = build_wire_mpack({
62 "blobs": [
63 {"object_id": _BLOB_A_OID, "content": _BLOB_A_CONTENT},
64 {"object_id": _BLOB_B_OID, "content": _BLOB_B_CONTENT},
65 {"object_id": _BLOB_C_OID, "content": _BLOB_C_CONTENT},
66 ],
67 "commits": [],
68 "snapshots": [],
69 "tags": [],
70 })
71 _SHARED_MPACK_KEY = blob_id(_SHARED_MPACK_BYTES)
72
73
74 async def _make_repo(session: AsyncSession, name: str) -> MusehubRepo:
75 r = await create_repo(
76 session,
77 name=name,
78 owner=_OWNER,
79 owner_user_id=_IDENTITY_ID,
80 visibility="public",
81 initialize=False,
82 )
83 await session.commit()
84 return r
85
86
87 def _mock_backend(mpack_bytes: bytes) -> MagicMock:
88 backend = MagicMock()
89 backend.get_mpack = AsyncMock(return_value=mpack_bytes)
90 return backend
91
92
93 async def _push(session: AsyncSession, repo_id: str, mpack_bytes: bytes, mpack_key: str) -> JSONObject:
94 from musehub.services.musehub_wire_push import wire_push_unpack_mpack
95
96 backend = _mock_backend(mpack_bytes)
97 patches = [
98 patch("musehub.services.musehub_wire.get_backend", return_value=backend),
99 patch("musehub.services.musehub_wire_push.get_backend", return_value=backend),
100 patch("musehub.storage.backends.get_backend", return_value=backend),
101 ]
102 with patches[0], patches[1], patches[2]:
103 result = await wire_push_unpack_mpack(
104 session,
105 repo_id,
106 mpack_key,
107 pusher_id=_OWNER,
108 branch="main",
109 head_commit_id=fake_id("head"),
110 commits_count=0,
111 blobs_count=3,
112 )
113 return result
114
115
116 async def _object_ref_count(session: AsyncSession, repo_id: str) -> int:
117 row = (await session.execute(
118 select(func.count()).where(MusehubObjectRef.repo_id == repo_id)
119 )).scalar_one()
120 return row
121
122
123 async def _mpack_index_with_byte_range_count(session: AsyncSession, mpack_id: str) -> int:
124 row = (await session.execute(
125 select(func.count()).where(
126 MusehubMPackIndex.mpack_id == mpack_id,
127 MusehubMPackIndex.entity_type == "object",
128 MusehubMPackIndex.byte_offset.is_not(None),
129 )
130 )).scalar_one()
131 return row
132
133
134 # ---------------------------------------------------------------------------
135 # ORP-1 First push to repo A writes object_refs
136 # ---------------------------------------------------------------------------
137
138 @pytest.mark.asyncio
139 async def test_ORP1_first_push_writes_object_refs_for_repo_a(
140 db_session: AsyncSession,
141 ) -> None:
142 """Baseline: pushing new objects to repo A writes object_refs for repo A."""
143 repo_a = await _make_repo(db_session, "orp-repo-a-1")
144
145 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
146
147 count = await _object_ref_count(db_session, repo_a.repo_id)
148 assert count == 3, (
149 f"Expected 3 object_refs for repo A after first push, got {count}"
150 )
151
152
153 # ---------------------------------------------------------------------------
154 # ORP-2 Second push of the SAME mpack to repo B writes object_refs for repo B
155 # — Bug A: this FAILS before the fix
156 # ---------------------------------------------------------------------------
157
158 @pytest.mark.asyncio
159 async def test_ORP2_repush_same_mpack_writes_object_refs_for_repo_b(
160 db_session: AsyncSession,
161 ) -> None:
162 """Bug A: pushing the same objects to a second repo must write object_refs.
163
164 Currently wire_push_unpack_mpack filters to _new_oids (globally-new only).
165 After repo A's push the blobs exist in musehub_objects, so _new_oids is
166 empty for repo B's push and _upsert_object_refs is never called.
167 """
168 repo_a = await _make_repo(db_session, "orp-repo-a-2")
169 repo_b = await _make_repo(db_session, "orp-repo-b-2")
170
171 # Push to repo A first — objects land in musehub_objects as globally new.
172 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
173
174 # Push the identical mpack to repo B — objects are pre-existing.
175 await _push(db_session, repo_b.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
176
177 count = await _object_ref_count(db_session, repo_b.repo_id)
178 assert count == 3, (
179 f"Bug A: expected 3 object_refs for repo B after repush of same mpack, got {count}. "
180 f"_upsert_object_refs must be called with _cc_oids, not _new_oids."
181 )
182
183
184 # ---------------------------------------------------------------------------
185 # ORP-3 First push to repo A writes mpack_index byte ranges
186 # ---------------------------------------------------------------------------
187
188 @pytest.mark.asyncio
189 async def test_ORP3_first_push_writes_mpack_index_byte_ranges(
190 db_session: AsyncSession,
191 ) -> None:
192 """Baseline: first push writes byte_offset/byte_length for all blobs."""
193 repo_a = await _make_repo(db_session, "orp-repo-a-3")
194
195 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
196
197 count = await _mpack_index_with_byte_range_count(db_session, _SHARED_MPACK_KEY)
198 assert count == 3, (
199 f"Expected 3 mpack_index rows with byte ranges after first push, got {count}"
200 )
201
202
203 # ---------------------------------------------------------------------------
204 # ORP-4 Second push of the SAME mpack to repo B writes mpack_index byte ranges
205 # — Bug B: this FAILS before the fix
206 # ---------------------------------------------------------------------------
207
208 @pytest.mark.asyncio
209 async def test_ORP4_repush_same_mpack_writes_mpack_index_byte_ranges(
210 db_session: AsyncSession,
211 ) -> None:
212 """Bug B: byte-range entries must be written regardless of object novelty.
213
214 Currently _new_oids_for_idx uses the same globally-new filter. After repo
215 A's push the objects exist, so repo B's push inserts zero mpack_index rows.
216 """
217 repo_a = await _make_repo(db_session, "orp-repo-a-4")
218 repo_b = await _make_repo(db_session, "orp-repo-b-4")
219
220 # Push to repo A — objects and byte ranges written for the first time.
221 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
222
223 # Wipe the mpack_index rows to simulate a clean repush scenario.
224 await db_session.execute(
225 MusehubMPackIndex.__table__.delete().where(
226 MusehubMPackIndex.mpack_id == _SHARED_MPACK_KEY
227 )
228 )
229 await db_session.flush()
230
231 # Push the identical mpack to repo B — must still write byte ranges.
232 await _push(db_session, repo_b.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
233
234 count = await _mpack_index_with_byte_range_count(db_session, _SHARED_MPACK_KEY)
235 assert count == 3, (
236 f"Bug B: expected 3 mpack_index byte-range rows after repush, got {count}. "
237 f"Inline byte-range insert must cover _cc_oids not _new_oids."
238 )
239
240
241 # ---------------------------------------------------------------------------
242 # ORP-5 object_refs are per-repo — no cross-contamination
243 # ---------------------------------------------------------------------------
244
245 @pytest.mark.asyncio
246 async def test_ORP5_object_refs_are_scoped_per_repo(
247 db_session: AsyncSession,
248 ) -> None:
249 """object_refs rows must be keyed per repo_id, not shared globally.
250
251 Both repos must have their own 3 rows — not one set of 3 shared between them.
252 """
253 repo_a = await _make_repo(db_session, "orp-repo-a-5")
254 repo_b = await _make_repo(db_session, "orp-repo-b-5")
255
256 await _push(db_session, repo_a.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
257 await _push(db_session, repo_b.repo_id, _SHARED_MPACK_BYTES, _SHARED_MPACK_KEY)
258
259 count_a = await _object_ref_count(db_session, repo_a.repo_id)
260 count_b = await _object_ref_count(db_session, repo_b.repo_id)
261
262 assert count_a == 3, f"repo A should have 3 object_refs, got {count_a}"
263 assert count_b == 3, f"repo B should have 3 object_refs, got {count_b}"
File History 1 commit
sha256:0d5e0b2da626326b94809c9538e015ad168ec3f7b9333b7f25d5d5eaf411729f add tests for fixing object refs on repush Human 9 days ago