test_mist_phase8_smoke.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """Phase 8 TDD: End-to-end Mist domain smoke test. |
| 2 | |
| 3 | Exercises the full path for a mist-domain repo in one integration test: |
| 4 | |
| 5 | seed repo + artifacts |
| 6 | β job_types_for_push dispatches intel.mist |
| 7 | β MistProvider.compute runs build_mist_anchor_index |
| 8 | β symbol anchors persisted to musehub_symbol_history_entries + musehub_symbol_intel |
| 9 | β persist_intel_results writes mist.anchor_index to musehub_intel_results |
| 10 | β profile activity canvas includes a "mist" domain grid with total >= 1 |
| 11 | β GET /api/mists/explore returns 200 |
| 12 | β GET /api/{owner}/mists returns 200 |
| 13 | β GET /muse/mists returns 200 |
| 14 | β GET /api/openapi.json lists /api/mists paths |
| 15 | |
| 16 | No mocks β all assertions run against the real PostgreSQL test DB and the |
| 17 | live FastAPI app instance (same fixtures as phases 1β7). |
| 18 | """ |
| 19 | from __future__ import annotations |
| 20 | |
| 21 | import secrets |
| 22 | from datetime import datetime, timezone |
| 23 | |
| 24 | import msgpack |
| 25 | import pytest |
| 26 | from httpx import AsyncClient |
| 27 | from muse.core.types import blob_id |
| 28 | from sqlalchemy import func, select |
| 29 | from sqlalchemy.ext.asyncio import AsyncSession |
| 30 | |
| 31 | from musehub.core.genesis import compute_identity_id, compute_repo_id |
| 32 | from musehub.db.musehub_intel_models import MusehubIntelResult, MusehubSymbolHistoryEntry, MusehubSymbolIntel |
| 33 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubObject, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef |
| 34 | from musehub.types.json_types import StrDict |
| 35 | |
| 36 | |
| 37 | # --------------------------------------------------------------------------- |
| 38 | # Seed helper (inline β no cross-test import) |
| 39 | # --------------------------------------------------------------------------- |
| 40 | |
| 41 | def _now() -> datetime: |
| 42 | return datetime.now(tz=timezone.utc) |
| 43 | |
| 44 | |
| 45 | def _oid(content: bytes) -> str: |
| 46 | return blob_id(content) |
| 47 | |
| 48 | |
| 49 | def _manifest_blob(manifest: StrDict) -> bytes: |
| 50 | return msgpack.packb(manifest, use_bin_type=True) |
| 51 | |
| 52 | |
| 53 | def _commit_id() -> str: |
| 54 | return blob_id(secrets.token_bytes(16)) |
| 55 | |
| 56 | |
| 57 | def _snap_id(manifest: StrDict) -> str: |
| 58 | return blob_id(msgpack.packb(sorted(manifest.items()), use_bin_type=True)) |
| 59 | |
| 60 | |
| 61 | async def _seed_mist_repo( |
| 62 | session: AsyncSession, |
| 63 | owner: str, |
| 64 | artifacts: dict[str, bytes], |
| 65 | ) -> tuple[MusehubRepo, MusehubCommit]: |
| 66 | """Create a mist-domain repo with a commit pointing at a snapshot.""" |
| 67 | owner_id = compute_identity_id(owner.encode()) |
| 68 | slug = f"smoke-{secrets.token_hex(4)}" |
| 69 | created_at = _now() |
| 70 | repo_id = compute_repo_id(owner_id, slug, "mist", created_at.isoformat()) |
| 71 | |
| 72 | repo = MusehubRepo( |
| 73 | repo_id=repo_id, |
| 74 | name=slug, |
| 75 | owner=owner, |
| 76 | slug=slug, |
| 77 | visibility="public", |
| 78 | owner_user_id=owner_id, |
| 79 | domain_id="mist", |
| 80 | description="smoke-test mist repo", |
| 81 | tags=[], |
| 82 | created_at=created_at, |
| 83 | ) |
| 84 | session.add(repo) |
| 85 | await session.flush() |
| 86 | |
| 87 | manifest: dict[str, str] = {} |
| 88 | for filename, raw in artifacts.items(): |
| 89 | oid = _oid(raw) |
| 90 | manifest[filename] = oid |
| 91 | if await session.get(MusehubObject, oid) is None: |
| 92 | session.add(MusehubObject( |
| 93 | object_id=oid, |
| 94 | path=filename, |
| 95 | size_bytes=len(raw), |
| 96 | content_cache=raw, |
| 97 | )) |
| 98 | await session.flush() |
| 99 | |
| 100 | snap_id = _snap_id(manifest) |
| 101 | if await session.get(MusehubSnapshot, snap_id) is None: |
| 102 | session.add(MusehubSnapshot( |
| 103 | snapshot_id=snap_id, |
| 104 | entry_count=len(manifest), |
| 105 | manifest_blob=_manifest_blob(manifest), |
| 106 | )) |
| 107 | session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=snap_id)) |
| 108 | await session.flush() |
| 109 | |
| 110 | cid = _commit_id() |
| 111 | commit = MusehubCommit( |
| 112 | commit_id=cid, |
| 113 | message="smoke: initial mist", |
| 114 | author=owner, |
| 115 | branch="main", |
| 116 | parent_ids=[], |
| 117 | snapshot_id=snap_id, |
| 118 | timestamp=_now(), |
| 119 | ) |
| 120 | session.add(commit) |
| 121 | session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid)) |
| 122 | await session.flush() |
| 123 | return repo, commit |
| 124 | |
| 125 | |
| 126 | # --------------------------------------------------------------------------- |
| 127 | # Fixtures |
| 128 | # --------------------------------------------------------------------------- |
| 129 | |
| 130 | _ARTIFACTS: dict[str, bytes] = { |
| 131 | "utils.py": b"def helper_one(): pass\ndef helper_two(): pass\n", |
| 132 | "schema.json": b'{"type": "object", "properties": {"id": {"type": "string"}}}', |
| 133 | "README.md": b"# Smoke test mist\nContent-addressed artifact share.\n", |
| 134 | } |
| 135 | |
| 136 | |
| 137 | # --------------------------------------------------------------------------- |
| 138 | # Phase 8 β smoke test |
| 139 | # --------------------------------------------------------------------------- |
| 140 | |
| 141 | class TestMistDomainEndToEnd: |
| 142 | @pytest.mark.asyncio |
| 143 | async def test_intel_mist_dispatched_for_mist_domain(self) -> None: |
| 144 | """job_types_for_push('mist') must include 'intel.mist'.""" |
| 145 | from musehub.services.musehub_intel_providers import job_types_for_push |
| 146 | types = job_types_for_push("mist") |
| 147 | assert "intel.mist" in types, ( |
| 148 | f"'intel.mist' must be dispatched for mist repos; got {types}" |
| 149 | ) |
| 150 | |
| 151 | @pytest.mark.asyncio |
| 152 | async def test_anchors_persisted_after_indexing( |
| 153 | self, db_session: AsyncSession |
| 154 | ) -> None: |
| 155 | """After build_mist_anchor_index runs, symbol history entries must exist.""" |
| 156 | from musehub.services.musehub_mist_indexer import build_mist_anchor_index |
| 157 | from sqlalchemy import select |
| 158 | |
| 159 | owner = f"smoke_{secrets.token_hex(4)}" |
| 160 | repo, commit = await _seed_mist_repo(db_session, owner, _ARTIFACTS) |
| 161 | |
| 162 | results = await build_mist_anchor_index( |
| 163 | db_session, repo.repo_id, commit.commit_id |
| 164 | ) |
| 165 | |
| 166 | history_count = (await db_session.execute( |
| 167 | select(func.count()).where( |
| 168 | MusehubSymbolHistoryEntry.repo_id == repo.repo_id |
| 169 | ) |
| 170 | )).scalar_one() |
| 171 | assert history_count >= 1, ( |
| 172 | f"Expected at least 1 symbol history entry after indexing; got {history_count}" |
| 173 | ) |
| 174 | |
| 175 | @pytest.mark.asyncio |
| 176 | async def test_intel_results_written_by_mist_provider( |
| 177 | self, db_session: AsyncSession |
| 178 | ) -> None: |
| 179 | """MistProvider.compute + persist_intel_results must write mist.anchor_index.""" |
| 180 | from musehub.services.musehub_intel_providers import MistProvider, persist_intel_results |
| 181 | |
| 182 | owner = f"smoke_{secrets.token_hex(4)}" |
| 183 | repo, commit = await _seed_mist_repo(db_session, owner, _ARTIFACTS) |
| 184 | |
| 185 | provider = MistProvider() |
| 186 | results = await provider.compute( |
| 187 | db_session, repo.repo_id, commit.commit_id, {} |
| 188 | ) |
| 189 | |
| 190 | # Must return at least the anchor_index result. |
| 191 | result_types = [r[0] for r in results] |
| 192 | assert "mist.anchor_index" in result_types, ( |
| 193 | f"MistProvider.compute must return 'mist.anchor_index'; got {result_types}" |
| 194 | ) |
| 195 | |
| 196 | await persist_intel_results( |
| 197 | db_session, repo.repo_id, commit.commit_id, results |
| 198 | ) |
| 199 | await db_session.flush() |
| 200 | |
| 201 | row = (await db_session.execute( |
| 202 | select(MusehubIntelResult).where( |
| 203 | MusehubIntelResult.repo_id == repo.repo_id, |
| 204 | MusehubIntelResult.intel_type == "mist.anchor_index", |
| 205 | ) |
| 206 | )).scalar_one_or_none() |
| 207 | assert row is not None, ( |
| 208 | "persist_intel_results must write a 'mist.anchor_index' row to musehub_intel_results" |
| 209 | ) |
| 210 | |
| 211 | @pytest.mark.asyncio |
| 212 | async def test_symbol_intel_rows_written( |
| 213 | self, db_session: AsyncSession |
| 214 | ) -> None: |
| 215 | """Symbol intel rows must be upserted for each anchor extracted.""" |
| 216 | from musehub.services.musehub_mist_indexer import build_mist_anchor_index |
| 217 | |
| 218 | owner = f"smoke_{secrets.token_hex(4)}" |
| 219 | repo, commit = await _seed_mist_repo(db_session, owner, _ARTIFACTS) |
| 220 | |
| 221 | await build_mist_anchor_index(db_session, repo.repo_id, commit.commit_id) |
| 222 | |
| 223 | intel_count = (await db_session.execute( |
| 224 | select(func.count()).where( |
| 225 | MusehubSymbolIntel.repo_id == repo.repo_id |
| 226 | ) |
| 227 | )).scalar_one() |
| 228 | assert intel_count >= 1, ( |
| 229 | f"Expected at least 1 symbol intel row after indexing; got {intel_count}" |
| 230 | ) |
| 231 | |
| 232 | @pytest.mark.asyncio |
| 233 | async def test_profile_canvas_has_mist_grid( |
| 234 | self, db_session: AsyncSession |
| 235 | ) -> None: |
| 236 | """After seeding a mist repo with commits, profile canvas includes 'mist' domain.""" |
| 237 | from musehub.services.musehub_profile import build_activity_canvas |
| 238 | |
| 239 | owner = f"smoke_{secrets.token_hex(4)}" |
| 240 | await _seed_mist_repo(db_session, owner, _ARTIFACTS) |
| 241 | |
| 242 | domains = await build_activity_canvas(db_session, owner) |
| 243 | domain_names = [d.domain for d in domains] |
| 244 | assert "mist" in domain_names, ( |
| 245 | f"Profile canvas must include 'mist' domain; got {domain_names}" |
| 246 | ) |
| 247 | |
| 248 | mist = next(d for d in domains if d.domain == "mist") |
| 249 | assert mist.total >= 0 # zero is fine for snapshot-only push; non-crash matters |
| 250 | |
| 251 | @pytest.mark.asyncio |
| 252 | async def test_push_validator_rejects_path_traversal(self) -> None: |
| 253 | """validate_mist_manifest must reject path traversal filenames.""" |
| 254 | from musehub.services.musehub_mist_push_validator import validate_mist_manifest |
| 255 | result = validate_mist_manifest({"../evil.py": "sha256:abc"}) |
| 256 | assert not result.valid |
| 257 | assert len(result.errors) >= 1 |
| 258 | |
| 259 | @pytest.mark.asyncio |
| 260 | async def test_explore_endpoint_returns_200(self, client: AsyncClient) -> None: |
| 261 | """GET /api/mists/explore must return 200.""" |
| 262 | r = await client.get("/api/mists/explore") |
| 263 | assert r.status_code == 200, ( |
| 264 | f"GET /api/mists/explore returned {r.status_code}" |
| 265 | ) |
| 266 | |
| 267 | @pytest.mark.asyncio |
| 268 | async def test_owner_mists_endpoint_returns_200( |
| 269 | self, client: AsyncClient |
| 270 | ) -> None: |
| 271 | """GET /api/{owner}/mists must return 200 (empty list for unknown owner is fine).""" |
| 272 | r = await client.get("/api/gabriel/mists") |
| 273 | assert r.status_code == 200, ( |
| 274 | f"GET /api/gabriel/mists returned {r.status_code}" |
| 275 | ) |
| 276 | |
| 277 | @pytest.mark.asyncio |
| 278 | async def test_docs_mists_page_returns_200(self, client: AsyncClient) -> None: |
| 279 | """GET /muse/mists must return 200 with HTML content.""" |
| 280 | r = await client.get("/muse/mists") |
| 281 | assert r.status_code == 200 |
| 282 | assert "text/html" in r.headers.get("content-type", "") |
| 283 | |
| 284 | @pytest.mark.asyncio |
| 285 | async def test_openapi_schema_lists_mists_paths( |
| 286 | self, client: AsyncClient |
| 287 | ) -> None: |
| 288 | """GET /api/openapi.json must list /api/mists paths.""" |
| 289 | r = await client.get("/api/openapi.json") |
| 290 | assert r.status_code == 200 |
| 291 | paths = r.json().get("paths", {}) |
| 292 | mist_paths = [p for p in paths if "/mists" in p] |
| 293 | assert len(mist_paths) > 0, ( |
| 294 | f"No /mists paths in OpenAPI schema; sample: {list(paths.keys())[:20]}" |
| 295 | ) |
| 296 | |
| 297 | @pytest.mark.asyncio |
| 298 | async def test_full_pipeline_anchor_count_positive( |
| 299 | self, db_session: AsyncSession |
| 300 | ) -> None: |
| 301 | """Full pipeline: index β intel β confirm anchor_count > 0 in result data.""" |
| 302 | import json |
| 303 | from musehub.services.musehub_intel_providers import MistProvider, persist_intel_results |
| 304 | |
| 305 | owner = f"smoke_{secrets.token_hex(4)}" |
| 306 | # utils.py has two functions β at least 2 anchors |
| 307 | repo, commit = await _seed_mist_repo( |
| 308 | db_session, owner, |
| 309 | {"utils.py": b"def alpha(): pass\ndef beta(): pass\n"} |
| 310 | ) |
| 311 | |
| 312 | provider = MistProvider() |
| 313 | results = await provider.compute( |
| 314 | db_session, repo.repo_id, commit.commit_id, {} |
| 315 | ) |
| 316 | await persist_intel_results( |
| 317 | db_session, repo.repo_id, commit.commit_id, results |
| 318 | ) |
| 319 | await db_session.flush() |
| 320 | |
| 321 | row = (await db_session.execute( |
| 322 | select(MusehubIntelResult).where( |
| 323 | MusehubIntelResult.repo_id == repo.repo_id, |
| 324 | MusehubIntelResult.intel_type == "mist.anchor_index", |
| 325 | ) |
| 326 | )).scalar_one_or_none() |
| 327 | assert row is not None |
| 328 | data = json.loads(row.data_json) |
| 329 | assert data.get("anchor_count", 0) >= 2, ( |
| 330 | f"Expected anchor_count >= 2 for utils.py with 2 functions; got {data}" |
| 331 | ) |