"""TDD — mpack.index must always be enqueued after every push. Root cause: mpack.index was missing from job_types_for_push, so the byte-range index was never populated. Without byte ranges, every blob fetch downloads the entire covering mpack (potentially hundreds of MB) instead of a targeted byte-range GET. Tests: MIE-1 mpack.index in job_types_for_push for code repos MIE-2 mpack.index in job_types_for_push for midi repos (domain-agnostic) MIE-3 mpack.index in job_types_for_push when domain_id is None MIE-4 enqueue_push_intel actually enqueues mpack.index after a push """ from __future__ import annotations import datetime import pytest from sqlalchemy.ext.asyncio import AsyncSession from muse.core.types import fake_id from musehub.core.genesis import compute_identity_id, compute_repo_id from musehub.db.musehub_jobs_models import MusehubBackgroundJob from musehub.db.musehub_repo_models import MusehubRepo # --------------------------------------------------------------------------- # MIE-1 mpack.index in code job types # --------------------------------------------------------------------------- def test_MIE1_mpack_index_in_code_push_jobs() -> None: from musehub.services.musehub_intel_providers import job_types_for_push types = job_types_for_push("code") assert "mpack.index" in types, ( "mpack.index must be in job_types_for_push('code') — " "without it blob byte ranges are never indexed and every page " "load fetches the entire mpack" ) # --------------------------------------------------------------------------- # MIE-2 mpack.index in midi job types (domain-agnostic) # --------------------------------------------------------------------------- def test_MIE2_mpack_index_in_midi_push_jobs() -> None: from musehub.services.musehub_intel_providers import job_types_for_push types = job_types_for_push("midi") assert "mpack.index" in types, ( "mpack.index must run for all domains — blob byte ranges are needed " "regardless of domain" ) # --------------------------------------------------------------------------- # MIE-3 mpack.index when domain_id is None # --------------------------------------------------------------------------- def test_MIE3_mpack_index_when_no_domain() -> None: from musehub.services.musehub_intel_providers import job_types_for_push types = job_types_for_push(None) assert "mpack.index" in types # --------------------------------------------------------------------------- # MIE-4 enqueue_push_intel creates a mpack.index job row # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_MIE4_enqueue_push_intel_creates_mpack_index_job( db_session: AsyncSession, ) -> None: from sqlalchemy import select from musehub.services.musehub_jobs import enqueue_push_intel now = datetime.datetime.now(tz=datetime.timezone.utc) owner_id = compute_identity_id(b"gabriel") repo_id = compute_repo_id(owner_id, "mie-test", "code", now.isoformat()) db_session.add(MusehubRepo( repo_id=repo_id, name="mie-test", owner="gabriel", slug="mie-test", visibility="public", owner_user_id=owner_id, created_at=now, updated_at=now, )) await db_session.flush() head = fake_id("mie-head") await enqueue_push_intel(db_session, repo_id, head, domain_id=None, branch="main") await db_session.flush() jobs = (await db_session.execute( select(MusehubBackgroundJob.job_type).where( MusehubBackgroundJob.repo_id == repo_id, MusehubBackgroundJob.status == "pending", ) )).scalars().all() assert "mpack.index" in jobs, ( f"enqueue_push_intel must enqueue mpack.index. Got: {sorted(jobs)}" ) # --------------------------------------------------------------------------- # MIE-5 mpack.index job payload contains mpack_key # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_MIE5_mpack_index_payload_contains_mpack_key( db_session: AsyncSession, ) -> None: """The mpack.index job payload must include mpack_key so the worker knows which mpack to index. Without it the worker raises ValueError and fails.""" from sqlalchemy import select from musehub.services.musehub_jobs import enqueue_push_intel now = datetime.datetime.now(tz=datetime.timezone.utc) owner_id = compute_identity_id(b"gabriel") repo_id = compute_repo_id(owner_id, f"mie-payload-{now.timestamp()}", "code", now.isoformat()) db_session.add(MusehubRepo( repo_id=repo_id, name="mie-payload", owner="gabriel", slug="mie-payload", visibility="public", owner_user_id=owner_id, created_at=now, updated_at=now, )) await db_session.flush() head = fake_id("mie-head-payload") mpack_key = fake_id("mpack-key-abc") await enqueue_push_intel( db_session, repo_id, head, domain_id=None, branch="main", mpack_key=mpack_key ) await db_session.flush() job = (await db_session.execute( select(MusehubBackgroundJob).where( MusehubBackgroundJob.repo_id == repo_id, MusehubBackgroundJob.job_type == "mpack.index", MusehubBackgroundJob.status == "pending", ) )).scalar_one_or_none() assert job is not None, "mpack.index job must be enqueued" assert job.payload.get("mpack_key") == mpack_key, ( f"mpack.index payload must contain mpack_key={mpack_key!r}. " f"Got payload: {job.payload}. " "The worker raises ValueError without it." )