test_mpack_index_job_object_refs.py
python
sha256:d8a98ffcade6226a977047fb68396a940db91fa1a2f6590b0e55bb1a0f0d2735
test(mpack-index-job): Phase 3 failing test for Bug D — job…
Sonnet 4.6
3 days ago
| 1 | """Phase 3 — Failing test for Bug D (issue #76). |
| 2 | |
| 3 | Bug D: process_mpack_index_job reads the mpack, computes byte offsets, and |
| 4 | upserts mpack_index rows and musehub_objects.storage_uri — but never |
| 5 | calls _upsert_object_refs. So even after the job completes, the repo's |
| 6 | object_refs table stays empty. |
| 7 | |
| 8 | These tests must be RED before the Bug D fix lands. They become the |
| 9 | regression guard once Bug D is fixed. |
| 10 | |
| 11 | Tests |
| 12 | ----- |
| 13 | MPIJ-1 process_mpack_index_job writes mpack_index byte ranges (baseline — PASS). |
| 14 | MPIJ-2 process_mpack_index_job writes object_refs for the job's repo (Bug D — FAIL). |
| 15 | """ |
| 16 | from __future__ import annotations |
| 17 | |
| 18 | import hashlib |
| 19 | from unittest.mock import AsyncMock, MagicMock, patch |
| 20 | |
| 21 | import pytest |
| 22 | from sqlalchemy import func, select |
| 23 | from sqlalchemy.ext.asyncio import AsyncSession |
| 24 | |
| 25 | from muse.core.mpack import build_wire_mpack |
| 26 | from muse.core.types import blob_id, fake_id |
| 27 | from musehub.core.genesis import compute_identity_id |
| 28 | from musehub.db.musehub_jobs_models import MusehubBackgroundJob |
| 29 | from musehub.db.musehub_repo_models import MusehubMPackIndex, MusehubObjectRef |
| 30 | from musehub.services.musehub_repository import create_repo |
| 31 | from musehub.services.musehub_wire_push import process_mpack_index_job |
| 32 | |
| 33 | # --------------------------------------------------------------------------- |
| 34 | # Shared fixtures |
| 35 | # --------------------------------------------------------------------------- |
| 36 | |
| 37 | _OWNER = "gabriel" |
| 38 | _IDENTITY_ID = compute_identity_id(b"gabriel") |
| 39 | |
| 40 | _BLOB_A_CONTENT = b"blob-alpha-mpij-test" |
| 41 | _BLOB_B_CONTENT = b"blob-beta-mpij-test" |
| 42 | _BLOB_C_CONTENT = b"blob-gamma-mpij-test" |
| 43 | |
| 44 | |
| 45 | def _sha256_id(data: bytes) -> str: |
| 46 | return f"sha256:{hashlib.sha256(data).hexdigest()}" |
| 47 | |
| 48 | |
| 49 | _BLOB_A_OID = _sha256_id(_BLOB_A_CONTENT) |
| 50 | _BLOB_B_OID = _sha256_id(_BLOB_B_CONTENT) |
| 51 | _BLOB_C_OID = _sha256_id(_BLOB_C_CONTENT) |
| 52 | |
| 53 | _MPACK_BYTES = build_wire_mpack({ |
| 54 | "blobs": [ |
| 55 | {"object_id": _BLOB_A_OID, "content": _BLOB_A_CONTENT}, |
| 56 | {"object_id": _BLOB_B_OID, "content": _BLOB_B_CONTENT}, |
| 57 | {"object_id": _BLOB_C_OID, "content": _BLOB_C_CONTENT}, |
| 58 | ], |
| 59 | "commits": [], |
| 60 | "snapshots": [], |
| 61 | "tags": [], |
| 62 | }) |
| 63 | _MPACK_KEY = blob_id(_MPACK_BYTES) |
| 64 | |
| 65 | |
| 66 | async def _make_repo(session: AsyncSession, name: str): |
| 67 | r = await create_repo( |
| 68 | session, |
| 69 | name=name, |
| 70 | owner=_OWNER, |
| 71 | owner_user_id=_IDENTITY_ID, |
| 72 | visibility="public", |
| 73 | initialize=False, |
| 74 | ) |
| 75 | await session.commit() |
| 76 | return r |
| 77 | |
| 78 | |
| 79 | async def _make_mpack_index_job( |
| 80 | session: AsyncSession, repo_id: str, mpack_key: str |
| 81 | ) -> MusehubBackgroundJob: |
| 82 | job = MusehubBackgroundJob( |
| 83 | job_id=fake_id(f"job-{repo_id[:8]}"), |
| 84 | repo_id=repo_id, |
| 85 | job_type="mpack.index", |
| 86 | status="running", |
| 87 | payload={"mpack_key": mpack_key, "head": fake_id("head"), "branch": "main"}, |
| 88 | ) |
| 89 | session.add(job) |
| 90 | await session.flush() |
| 91 | return job |
| 92 | |
| 93 | |
| 94 | async def _run_mpack_index_job( |
| 95 | session: AsyncSession, job_id: str, mpack_bytes: bytes |
| 96 | ) -> dict: |
| 97 | backend = MagicMock() |
| 98 | backend.get_mpack = AsyncMock(return_value=mpack_bytes) |
| 99 | with patch("musehub.storage.backends.get_backend", return_value=backend): |
| 100 | return await process_mpack_index_job(session, job_id) |
| 101 | |
| 102 | |
| 103 | async def _object_ref_count(session: AsyncSession, repo_id: str) -> int: |
| 104 | return (await session.execute( |
| 105 | select(func.count()).where(MusehubObjectRef.repo_id == repo_id) |
| 106 | )).scalar_one() |
| 107 | |
| 108 | |
| 109 | async def _mpack_index_byte_range_count(session: AsyncSession, mpack_id: str) -> int: |
| 110 | return (await session.execute( |
| 111 | select(func.count()).where( |
| 112 | MusehubMPackIndex.mpack_id == mpack_id, |
| 113 | MusehubMPackIndex.entity_type == "object", |
| 114 | MusehubMPackIndex.byte_offset.is_not(None), |
| 115 | ) |
| 116 | )).scalar_one() |
| 117 | |
| 118 | |
| 119 | # --------------------------------------------------------------------------- |
| 120 | # MPIJ-1 Baseline: job writes mpack_index byte ranges |
| 121 | # --------------------------------------------------------------------------- |
| 122 | |
| 123 | @pytest.mark.asyncio |
| 124 | async def test_MPIJ1_job_writes_mpack_index_byte_ranges(db_session: AsyncSession) -> None: |
| 125 | """Baseline: process_mpack_index_job writes byte_offset/byte_length for all blobs.""" |
| 126 | repo = await _make_repo(db_session, "mpij-repo-1") |
| 127 | job = await _make_mpack_index_job(db_session, repo.repo_id, _MPACK_KEY) |
| 128 | |
| 129 | await _run_mpack_index_job(db_session, job.job_id, _MPACK_BYTES) |
| 130 | |
| 131 | count = await _mpack_index_byte_range_count(db_session, _MPACK_KEY) |
| 132 | assert count == 3, f"Expected 3 mpack_index byte-range rows from job, got {count}" |
| 133 | |
| 134 | |
| 135 | # --------------------------------------------------------------------------- |
| 136 | # MPIJ-2 Bug D: job does NOT write object_refs for the repo |
| 137 | # --------------------------------------------------------------------------- |
| 138 | |
| 139 | @pytest.mark.asyncio |
| 140 | async def test_MPIJ2_job_writes_object_refs_for_repo(db_session: AsyncSession) -> None: |
| 141 | """Bug D: process_mpack_index_job must write object_refs for job_row.repo_id. |
| 142 | |
| 143 | The job upserts mpack_index byte ranges and musehub_objects.storage_uri — |
| 144 | but never calls _upsert_object_refs. With no object_refs, wire fetch |
| 145 | has no object-to-mpack mapping for this repo. |
| 146 | |
| 147 | Fix: add _upsert_object_refs(session, repo_id, all_blob_oids) to |
| 148 | process_mpack_index_job after the mpack_index upsert step. |
| 149 | """ |
| 150 | repo = await _make_repo(db_session, "mpij-repo-2") |
| 151 | job = await _make_mpack_index_job(db_session, repo.repo_id, _MPACK_KEY) |
| 152 | |
| 153 | await _run_mpack_index_job(db_session, job.job_id, _MPACK_BYTES) |
| 154 | |
| 155 | count = await _object_ref_count(db_session, repo.repo_id) |
| 156 | assert count == 3, ( |
| 157 | f"Bug D: expected 3 object_refs for repo after mpack.index job, got {count}. " |
| 158 | f"process_mpack_index_job must call _upsert_object_refs(session, repo_id, all_blob_oids)." |
| 159 | ) |
File History
1 commit
sha256:d8a98ffcade6226a977047fb68396a940db91fa1a2f6590b0e55bb1a0f0d2735
test(mpack-index-job): Phase 3 failing test for Bug D — job…
Sonnet 4.6
3 days ago