gabriel / musehub public
test_mpack_index_job_object_refs.py python
161 lines 5.8 KB
Raw
sha256:d8a98ffcade6226a977047fb68396a940db91fa1a2f6590b0e55bb1a0f0d2735 test(mpack-index-job): Phase 3 failing test for Bug D — job… Sonnet 4.6 9 days ago
1 """Phase 3 — Failing test for Bug D (issue #76).
2
3 Bug D: process_mpack_index_job reads the mpack, computes byte offsets, and
4 upserts mpack_index rows and musehub_objects.storage_uri — but never
5 calls _upsert_object_refs. So even after the job completes, the repo's
6 object_refs table stays empty.
7
8 These tests must be RED before the Bug D fix lands. They become the
9 regression guard once Bug D is fixed.
10
11 Tests
12 -----
13 MPIJ-1 process_mpack_index_job writes mpack_index byte ranges (baseline — PASS).
14 MPIJ-2 process_mpack_index_job writes object_refs for the job's repo (Bug D — FAIL).
15 """
16 from __future__ import annotations
17
18 import hashlib
19 from unittest.mock import AsyncMock, MagicMock, patch
20
21 import pytest
22 from sqlalchemy import func, select
23 from sqlalchemy.ext.asyncio import AsyncSession
24
25 from muse.core.mpack import build_wire_mpack
26 from muse.core.types import blob_id, fake_id
27 from musehub.core.genesis import compute_identity_id
28 from musehub.db.musehub_jobs_models import MusehubBackgroundJob
29 from musehub.db.musehub_repo_models import MusehubMPackIndex, MusehubObjectRef
30 from musehub.models.musehub import RepoResponse
31 from musehub.services.musehub_repository import create_repo
32 from musehub.services.musehub_wire_push import process_mpack_index_job
33 from musehub.types.json_types import JSONObject
34
35 # ---------------------------------------------------------------------------
36 # Shared fixtures
37 # ---------------------------------------------------------------------------
38
39 _OWNER = "gabriel"
40 _IDENTITY_ID = compute_identity_id(b"gabriel")
41
42 _BLOB_A_CONTENT = b"blob-alpha-mpij-test"
43 _BLOB_B_CONTENT = b"blob-beta-mpij-test"
44 _BLOB_C_CONTENT = b"blob-gamma-mpij-test"
45
46
47 def _sha256_id(data: bytes) -> str:
48 return f"sha256:{hashlib.sha256(data).hexdigest()}"
49
50
51 _BLOB_A_OID = _sha256_id(_BLOB_A_CONTENT)
52 _BLOB_B_OID = _sha256_id(_BLOB_B_CONTENT)
53 _BLOB_C_OID = _sha256_id(_BLOB_C_CONTENT)
54
55 _MPACK_BYTES = build_wire_mpack({
56 "blobs": [
57 {"object_id": _BLOB_A_OID, "content": _BLOB_A_CONTENT},
58 {"object_id": _BLOB_B_OID, "content": _BLOB_B_CONTENT},
59 {"object_id": _BLOB_C_OID, "content": _BLOB_C_CONTENT},
60 ],
61 "commits": [],
62 "snapshots": [],
63 "tags": [],
64 })
65 _MPACK_KEY = blob_id(_MPACK_BYTES)
66
67
68 async def _make_repo(session: AsyncSession, name: str) -> RepoResponse:
69 r = await create_repo(
70 session,
71 name=name,
72 owner=_OWNER,
73 owner_user_id=_IDENTITY_ID,
74 visibility="public",
75 initialize=False,
76 )
77 await session.commit()
78 return r
79
80
81 async def _make_mpack_index_job(
82 session: AsyncSession, repo_id: str, mpack_key: str
83 ) -> MusehubBackgroundJob:
84 job = MusehubBackgroundJob(
85 job_id=fake_id(f"job-{repo_id[:8]}"),
86 repo_id=repo_id,
87 job_type="mpack.index",
88 status="running",
89 payload={"mpack_key": mpack_key, "head": fake_id("head"), "branch": "main"},
90 )
91 session.add(job)
92 await session.flush()
93 return job
94
95
96 async def _run_mpack_index_job(
97 session: AsyncSession, job_id: str, mpack_bytes: bytes
98 ) -> JSONObject:
99 backend = MagicMock()
100 backend.get_mpack = AsyncMock(return_value=mpack_bytes)
101 with patch("musehub.storage.backends.get_backend", return_value=backend):
102 return await process_mpack_index_job(session, job_id)
103
104
105 async def _object_ref_count(session: AsyncSession, repo_id: str) -> int:
106 return (await session.execute(
107 select(func.count()).where(MusehubObjectRef.repo_id == repo_id)
108 )).scalar_one()
109
110
111 async def _mpack_index_byte_range_count(session: AsyncSession, mpack_id: str) -> int:
112 return (await session.execute(
113 select(func.count()).where(
114 MusehubMPackIndex.mpack_id == mpack_id,
115 MusehubMPackIndex.entity_type == "object",
116 MusehubMPackIndex.byte_offset.is_not(None),
117 )
118 )).scalar_one()
119
120
121 # ---------------------------------------------------------------------------
122 # MPIJ-1 Baseline: job writes mpack_index byte ranges
123 # ---------------------------------------------------------------------------
124
125 @pytest.mark.asyncio
126 async def test_MPIJ1_job_writes_mpack_index_byte_ranges(db_session: AsyncSession) -> None:
127 """Baseline: process_mpack_index_job writes byte_offset/byte_length for all blobs."""
128 repo = await _make_repo(db_session, "mpij-repo-1")
129 job = await _make_mpack_index_job(db_session, repo.repo_id, _MPACK_KEY)
130
131 await _run_mpack_index_job(db_session, job.job_id, _MPACK_BYTES)
132
133 count = await _mpack_index_byte_range_count(db_session, _MPACK_KEY)
134 assert count == 3, f"Expected 3 mpack_index byte-range rows from job, got {count}"
135
136
137 # ---------------------------------------------------------------------------
138 # MPIJ-2 Bug D: job does NOT write object_refs for the repo
139 # ---------------------------------------------------------------------------
140
141 @pytest.mark.asyncio
142 async def test_MPIJ2_job_writes_object_refs_for_repo(db_session: AsyncSession) -> None:
143 """Bug D: process_mpack_index_job must write object_refs for job_row.repo_id.
144
145 The job upserts mpack_index byte ranges and musehub_objects.storage_uri —
146 but never calls _upsert_object_refs. With no object_refs, wire fetch
147 has no object-to-mpack mapping for this repo.
148
149 Fix: add _upsert_object_refs(session, repo_id, all_blob_oids) to
150 process_mpack_index_job after the mpack_index upsert step.
151 """
152 repo = await _make_repo(db_session, "mpij-repo-2")
153 job = await _make_mpack_index_job(db_session, repo.repo_id, _MPACK_KEY)
154
155 await _run_mpack_index_job(db_session, job.job_id, _MPACK_BYTES)
156
157 count = await _object_ref_count(db_session, repo.repo_id)
158 assert count == 3, (
159 f"Bug D: expected 3 object_refs for repo after mpack.index job, got {count}. "
160 f"process_mpack_index_job must call _upsert_object_refs(session, repo_id, all_blob_oids)."
161 )
File History 1 commit
sha256:d8a98ffcade6226a977047fb68396a940db91fa1a2f6590b0e55bb1a0f0d2735 test(mpack-index-job): Phase 3 failing test for Bug D — job… Sonnet 4.6 9 days ago