gabriel / musehub public
test_mpack_index_always_enqueued.py python
143 lines 5.5 KB
Raw
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago
1 """TDD — mpack.index must always be enqueued after every push.
2
3 Root cause: mpack.index was missing from job_types_for_push, so the
4 byte-range index was never populated. Without byte ranges, every blob
5 fetch downloads the entire covering mpack (potentially hundreds of MB)
6 instead of a targeted byte-range GET.
7
8 Tests:
9 MIE-1 mpack.index in job_types_for_push for code repos
10 MIE-2 mpack.index in job_types_for_push for midi repos (domain-agnostic)
11 MIE-3 mpack.index in job_types_for_push when domain_id is None
12 MIE-4 enqueue_push_intel actually enqueues mpack.index after a push
13 """
14 from __future__ import annotations
15
16 import datetime
17 import pytest
18 from sqlalchemy.ext.asyncio import AsyncSession
19
20 from muse.core.types import fake_id
21 from musehub.core.genesis import compute_identity_id, compute_repo_id
22 from musehub.db.musehub_jobs_models import MusehubBackgroundJob
23 from musehub.db.musehub_repo_models import MusehubRepo
24
25
26 # ---------------------------------------------------------------------------
27 # MIE-1 mpack.index in code job types
28 # ---------------------------------------------------------------------------
29
30 def test_MIE1_mpack_index_in_code_push_jobs() -> None:
31 from musehub.services.musehub_intel_providers import job_types_for_push
32 types = job_types_for_push("code")
33 assert "mpack.index" in types, (
34 "mpack.index must be in job_types_for_push('code') — "
35 "without it blob byte ranges are never indexed and every page "
36 "load fetches the entire mpack"
37 )
38
39
40 # ---------------------------------------------------------------------------
41 # MIE-2 mpack.index in midi job types (domain-agnostic)
42 # ---------------------------------------------------------------------------
43
44 def test_MIE2_mpack_index_in_midi_push_jobs() -> None:
45 from musehub.services.musehub_intel_providers import job_types_for_push
46 types = job_types_for_push("midi")
47 assert "mpack.index" in types, (
48 "mpack.index must run for all domains — blob byte ranges are needed "
49 "regardless of domain"
50 )
51
52
53 # ---------------------------------------------------------------------------
54 # MIE-3 mpack.index when domain_id is None
55 # ---------------------------------------------------------------------------
56
57 def test_MIE3_mpack_index_when_no_domain() -> None:
58 from musehub.services.musehub_intel_providers import job_types_for_push
59 types = job_types_for_push(None)
60 assert "mpack.index" in types
61
62
63 # ---------------------------------------------------------------------------
64 # MIE-4 enqueue_push_intel creates a mpack.index job row
65 # ---------------------------------------------------------------------------
66
67 @pytest.mark.asyncio
68 async def test_MIE4_enqueue_push_intel_creates_mpack_index_job(
69 db_session: AsyncSession,
70 ) -> None:
71 from sqlalchemy import select
72 from musehub.services.musehub_jobs import enqueue_push_intel
73
74 now = datetime.datetime.now(tz=datetime.timezone.utc)
75 owner_id = compute_identity_id(b"gabriel")
76 repo_id = compute_repo_id(owner_id, "mie-test", "code", now.isoformat())
77 db_session.add(MusehubRepo(
78 repo_id=repo_id, name="mie-test", owner="gabriel", slug="mie-test",
79 visibility="public", owner_user_id=owner_id,
80 created_at=now, updated_at=now,
81 ))
82 await db_session.flush()
83
84 head = fake_id("mie-head")
85 await enqueue_push_intel(db_session, repo_id, head, domain_id=None, branch="main")
86 await db_session.flush()
87
88 jobs = (await db_session.execute(
89 select(MusehubBackgroundJob.job_type).where(
90 MusehubBackgroundJob.repo_id == repo_id,
91 MusehubBackgroundJob.status == "pending",
92 )
93 )).scalars().all()
94
95 assert "mpack.index" in jobs, (
96 f"enqueue_push_intel must enqueue mpack.index. Got: {sorted(jobs)}"
97 )
98
99
100 # ---------------------------------------------------------------------------
101 # MIE-5 mpack.index job payload contains mpack_key
102 # ---------------------------------------------------------------------------
103
104 @pytest.mark.asyncio
105 async def test_MIE5_mpack_index_payload_contains_mpack_key(
106 db_session: AsyncSession,
107 ) -> None:
108 """The mpack.index job payload must include mpack_key so the worker knows
109 which mpack to index. Without it the worker raises ValueError and fails."""
110 from sqlalchemy import select
111 from musehub.services.musehub_jobs import enqueue_push_intel
112
113 now = datetime.datetime.now(tz=datetime.timezone.utc)
114 owner_id = compute_identity_id(b"gabriel")
115 repo_id = compute_repo_id(owner_id, f"mie-payload-{now.timestamp()}", "code", now.isoformat())
116 db_session.add(MusehubRepo(
117 repo_id=repo_id, name="mie-payload", owner="gabriel", slug="mie-payload",
118 visibility="public", owner_user_id=owner_id,
119 created_at=now, updated_at=now,
120 ))
121 await db_session.flush()
122
123 head = fake_id("mie-head-payload")
124 mpack_key = fake_id("mpack-key-abc")
125 await enqueue_push_intel(
126 db_session, repo_id, head, domain_id=None, branch="main", mpack_key=mpack_key
127 )
128 await db_session.flush()
129
130 job = (await db_session.execute(
131 select(MusehubBackgroundJob).where(
132 MusehubBackgroundJob.repo_id == repo_id,
133 MusehubBackgroundJob.job_type == "mpack.index",
134 MusehubBackgroundJob.status == "pending",
135 )
136 )).scalar_one_or_none()
137
138 assert job is not None, "mpack.index job must be enqueued"
139 assert job.payload.get("mpack_key") == mpack_key, (
140 f"mpack.index payload must contain mpack_key={mpack_key!r}. "
141 f"Got payload: {job.payload}. "
142 "The worker raises ValueError without it."
143 )
File History 1 commit
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago