gabriel / musehub public

test_mist_phase8_smoke.py file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:0 fix: fall back to any indexed mpack in read_object_bytes when push mpac… · gabriel · Jun 17, 2026
1 """Phase 8 TDD: End-to-end Mist domain smoke test.
2
3 Exercises the full path for a mist-domain repo in one integration test:
4
5 seed repo + artifacts
6 β†’ job_types_for_push dispatches intel.mist
7 β†’ MistProvider.compute runs build_mist_anchor_index
8 β†’ symbol anchors persisted to musehub_symbol_history_entries + musehub_symbol_intel
9 β†’ persist_intel_results writes mist.anchor_index to musehub_intel_results
10 β†’ profile activity canvas includes a "mist" domain grid with total >= 1
11 β†’ GET /api/mists/explore returns 200
12 β†’ GET /api/{owner}/mists returns 200
13 β†’ GET /muse/mists returns 200
14 β†’ GET /api/openapi.json lists /api/mists paths
15
16 No mocks β€” all assertions run against the real PostgreSQL test DB and the
17 live FastAPI app instance (same fixtures as phases 1–7).
18 """
19 from __future__ import annotations
20
21 import secrets
22 from datetime import datetime, timezone
23
24 import msgpack
25 import pytest
26 from httpx import AsyncClient
27 from muse.core.types import blob_id
28 from sqlalchemy import func, select
29 from sqlalchemy.ext.asyncio import AsyncSession
30
31 from musehub.core.genesis import compute_identity_id, compute_repo_id
32 from musehub.db.musehub_intel_models import MusehubIntelResult, MusehubSymbolHistoryEntry, MusehubSymbolIntel
33 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubObject, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef
34 from musehub.types.json_types import StrDict
35
36
37 # ---------------------------------------------------------------------------
38 # Seed helper (inline β€” no cross-test import)
39 # ---------------------------------------------------------------------------
40
41 def _now() -> datetime:
42 return datetime.now(tz=timezone.utc)
43
44
45 def _oid(content: bytes) -> str:
46 return blob_id(content)
47
48
49 def _manifest_blob(manifest: StrDict) -> bytes:
50 return msgpack.packb(manifest, use_bin_type=True)
51
52
53 def _commit_id() -> str:
54 return blob_id(secrets.token_bytes(16))
55
56
57 def _snap_id(manifest: StrDict) -> str:
58 return blob_id(msgpack.packb(sorted(manifest.items()), use_bin_type=True))
59
60
61 async def _seed_mist_repo(
62 session: AsyncSession,
63 owner: str,
64 artifacts: dict[str, bytes],
65 ) -> tuple[MusehubRepo, MusehubCommit]:
66 """Create a mist-domain repo with a commit pointing at a snapshot."""
67 owner_id = compute_identity_id(owner.encode())
68 slug = f"smoke-{secrets.token_hex(4)}"
69 created_at = _now()
70 repo_id = compute_repo_id(owner_id, slug, "mist", created_at.isoformat())
71
72 repo = MusehubRepo(
73 repo_id=repo_id,
74 name=slug,
75 owner=owner,
76 slug=slug,
77 visibility="public",
78 owner_user_id=owner_id,
79 domain_id="mist",
80 description="smoke-test mist repo",
81 tags=[],
82 created_at=created_at,
83 )
84 session.add(repo)
85 await session.flush()
86
87 manifest: dict[str, str] = {}
88 for filename, raw in artifacts.items():
89 oid = _oid(raw)
90 manifest[filename] = oid
91 if await session.get(MusehubObject, oid) is None:
92 session.add(MusehubObject(
93 object_id=oid,
94 path=filename,
95 size_bytes=len(raw),
96 content_cache=raw,
97 ))
98 await session.flush()
99
100 snap_id = _snap_id(manifest)
101 if await session.get(MusehubSnapshot, snap_id) is None:
102 session.add(MusehubSnapshot(
103 snapshot_id=snap_id,
104 entry_count=len(manifest),
105 manifest_blob=_manifest_blob(manifest),
106 ))
107 session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=snap_id))
108 await session.flush()
109
110 cid = _commit_id()
111 commit = MusehubCommit(
112 commit_id=cid,
113 message="smoke: initial mist",
114 author=owner,
115 branch="main",
116 parent_ids=[],
117 snapshot_id=snap_id,
118 timestamp=_now(),
119 )
120 session.add(commit)
121 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid))
122 await session.flush()
123 return repo, commit
124
125
126 # ---------------------------------------------------------------------------
127 # Fixtures
128 # ---------------------------------------------------------------------------
129
130 _ARTIFACTS: dict[str, bytes] = {
131 "utils.py": b"def helper_one(): pass\ndef helper_two(): pass\n",
132 "schema.json": b'{"type": "object", "properties": {"id": {"type": "string"}}}',
133 "README.md": b"# Smoke test mist\nContent-addressed artifact share.\n",
134 }
135
136
137 # ---------------------------------------------------------------------------
138 # Phase 8 β€” smoke test
139 # ---------------------------------------------------------------------------
140
141 class TestMistDomainEndToEnd:
142 @pytest.mark.asyncio
143 async def test_intel_mist_dispatched_for_mist_domain(self) -> None:
144 """job_types_for_push('mist') must include 'intel.mist'."""
145 from musehub.services.musehub_intel_providers import job_types_for_push
146 types = job_types_for_push("mist")
147 assert "intel.mist" in types, (
148 f"'intel.mist' must be dispatched for mist repos; got {types}"
149 )
150
151 @pytest.mark.asyncio
152 async def test_anchors_persisted_after_indexing(
153 self, db_session: AsyncSession
154 ) -> None:
155 """After build_mist_anchor_index runs, symbol history entries must exist."""
156 from musehub.services.musehub_mist_indexer import build_mist_anchor_index
157 from sqlalchemy import select
158
159 owner = f"smoke_{secrets.token_hex(4)}"
160 repo, commit = await _seed_mist_repo(db_session, owner, _ARTIFACTS)
161
162 results = await build_mist_anchor_index(
163 db_session, repo.repo_id, commit.commit_id
164 )
165
166 history_count = (await db_session.execute(
167 select(func.count()).where(
168 MusehubSymbolHistoryEntry.repo_id == repo.repo_id
169 )
170 )).scalar_one()
171 assert history_count >= 1, (
172 f"Expected at least 1 symbol history entry after indexing; got {history_count}"
173 )
174
175 @pytest.mark.asyncio
176 async def test_intel_results_written_by_mist_provider(
177 self, db_session: AsyncSession
178 ) -> None:
179 """MistProvider.compute + persist_intel_results must write mist.anchor_index."""
180 from musehub.services.musehub_intel_providers import MistProvider, persist_intel_results
181
182 owner = f"smoke_{secrets.token_hex(4)}"
183 repo, commit = await _seed_mist_repo(db_session, owner, _ARTIFACTS)
184
185 provider = MistProvider()
186 results = await provider.compute(
187 db_session, repo.repo_id, commit.commit_id, {}
188 )
189
190 # Must return at least the anchor_index result.
191 result_types = [r[0] for r in results]
192 assert "mist.anchor_index" in result_types, (
193 f"MistProvider.compute must return 'mist.anchor_index'; got {result_types}"
194 )
195
196 await persist_intel_results(
197 db_session, repo.repo_id, commit.commit_id, results
198 )
199 await db_session.flush()
200
201 row = (await db_session.execute(
202 select(MusehubIntelResult).where(
203 MusehubIntelResult.repo_id == repo.repo_id,
204 MusehubIntelResult.intel_type == "mist.anchor_index",
205 )
206 )).scalar_one_or_none()
207 assert row is not None, (
208 "persist_intel_results must write a 'mist.anchor_index' row to musehub_intel_results"
209 )
210
211 @pytest.mark.asyncio
212 async def test_symbol_intel_rows_written(
213 self, db_session: AsyncSession
214 ) -> None:
215 """Symbol intel rows must be upserted for each anchor extracted."""
216 from musehub.services.musehub_mist_indexer import build_mist_anchor_index
217
218 owner = f"smoke_{secrets.token_hex(4)}"
219 repo, commit = await _seed_mist_repo(db_session, owner, _ARTIFACTS)
220
221 await build_mist_anchor_index(db_session, repo.repo_id, commit.commit_id)
222
223 intel_count = (await db_session.execute(
224 select(func.count()).where(
225 MusehubSymbolIntel.repo_id == repo.repo_id
226 )
227 )).scalar_one()
228 assert intel_count >= 1, (
229 f"Expected at least 1 symbol intel row after indexing; got {intel_count}"
230 )
231
232 @pytest.mark.asyncio
233 async def test_profile_canvas_has_mist_grid(
234 self, db_session: AsyncSession
235 ) -> None:
236 """After seeding a mist repo with commits, profile canvas includes 'mist' domain."""
237 from musehub.services.musehub_profile import build_activity_canvas
238
239 owner = f"smoke_{secrets.token_hex(4)}"
240 await _seed_mist_repo(db_session, owner, _ARTIFACTS)
241
242 domains = await build_activity_canvas(db_session, owner)
243 domain_names = [d.domain for d in domains]
244 assert "mist" in domain_names, (
245 f"Profile canvas must include 'mist' domain; got {domain_names}"
246 )
247
248 mist = next(d for d in domains if d.domain == "mist")
249 assert mist.total >= 0 # zero is fine for snapshot-only push; non-crash matters
250
251 @pytest.mark.asyncio
252 async def test_push_validator_rejects_path_traversal(self) -> None:
253 """validate_mist_manifest must reject path traversal filenames."""
254 from musehub.services.musehub_mist_push_validator import validate_mist_manifest
255 result = validate_mist_manifest({"../evil.py": "sha256:abc"})
256 assert not result.valid
257 assert len(result.errors) >= 1
258
259 @pytest.mark.asyncio
260 async def test_explore_endpoint_returns_200(self, client: AsyncClient) -> None:
261 """GET /api/mists/explore must return 200."""
262 r = await client.get("/api/mists/explore")
263 assert r.status_code == 200, (
264 f"GET /api/mists/explore returned {r.status_code}"
265 )
266
267 @pytest.mark.asyncio
268 async def test_owner_mists_endpoint_returns_200(
269 self, client: AsyncClient
270 ) -> None:
271 """GET /api/{owner}/mists must return 200 (empty list for unknown owner is fine)."""
272 r = await client.get("/api/gabriel/mists")
273 assert r.status_code == 200, (
274 f"GET /api/gabriel/mists returned {r.status_code}"
275 )
276
277 @pytest.mark.asyncio
278 async def test_docs_mists_page_returns_200(self, client: AsyncClient) -> None:
279 """GET /muse/mists must return 200 with HTML content."""
280 r = await client.get("/muse/mists")
281 assert r.status_code == 200
282 assert "text/html" in r.headers.get("content-type", "")
283
284 @pytest.mark.asyncio
285 async def test_openapi_schema_lists_mists_paths(
286 self, client: AsyncClient
287 ) -> None:
288 """GET /api/openapi.json must list /api/mists paths."""
289 r = await client.get("/api/openapi.json")
290 assert r.status_code == 200
291 paths = r.json().get("paths", {})
292 mist_paths = [p for p in paths if "/mists" in p]
293 assert len(mist_paths) > 0, (
294 f"No /mists paths in OpenAPI schema; sample: {list(paths.keys())[:20]}"
295 )
296
297 @pytest.mark.asyncio
298 async def test_full_pipeline_anchor_count_positive(
299 self, db_session: AsyncSession
300 ) -> None:
301 """Full pipeline: index β†’ intel β†’ confirm anchor_count > 0 in result data."""
302 import json
303 from musehub.services.musehub_intel_providers import MistProvider, persist_intel_results
304
305 owner = f"smoke_{secrets.token_hex(4)}"
306 # utils.py has two functions β†’ at least 2 anchors
307 repo, commit = await _seed_mist_repo(
308 db_session, owner,
309 {"utils.py": b"def alpha(): pass\ndef beta(): pass\n"}
310 )
311
312 provider = MistProvider()
313 results = await provider.compute(
314 db_session, repo.repo_id, commit.commit_id, {}
315 )
316 await persist_intel_results(
317 db_session, repo.repo_id, commit.commit_id, results
318 )
319 await db_session.flush()
320
321 row = (await db_session.execute(
322 select(MusehubIntelResult).where(
323 MusehubIntelResult.repo_id == repo.repo_id,
324 MusehubIntelResult.intel_type == "mist.anchor_index",
325 )
326 )).scalar_one_or_none()
327 assert row is not None
328 data = json.loads(row.data_json)
329 assert data.get("anchor_count", 0) >= 2, (
330 f"Expected anchor_count >= 2 for utils.py with 2 functions; got {data}"
331 )