gabriel / musehub public
conftest.py python
926 lines 36.7 KB
Raw
sha256:77fc45e703f90c0d603ecb1a0ce21ff21095728ca7dd0e146eb5e966c8f9fcc9 more passing tests from full test suite fun Human patch 23 hours ago
1 """Pytest configuration and fixtures."""
2 from __future__ import annotations
3
4 from pathlib import Path
5
6 import logging
7 import os
8 import typing
9 from collections.abc import AsyncGenerator, Generator
10
11 if not os.environ.get("MUSE_ENV"):
12 os.environ["MUSE_ENV"] = "test"
13
14 import pytest
15 import pytest_asyncio
16 from httpx import AsyncClient, ASGITransport
17 from sqlalchemy.ext.asyncio import (
18 AsyncSession,
19 async_sessionmaker,
20 create_async_engine,
21 )
22 from sqlalchemy.pool import NullPool
23
24 from musehub.core.genesis import compute_identity_id
25 from musehub.db import database
26 from musehub.db.database import Base, get_db
27 from musehub.db.musehub_identity_models import MusehubIdentity
28 from musehub.types.json_types import JSONValue
29 # Force all ORM models into Base.metadata before any create_all/drop_all.
30 # muse_cli_models is only imported inside init_db() in production; without
31 # this explicit import, Base.metadata is non-deterministic in tests (depends
32 # on import order), causing drop_all to miss tables that create_all later
33 # tries to create — resulting in duplicate-key errors on pg_type.
34 import musehub.db.muse_cli_models as _muse_cli_models # noqa: F401
35 from musehub.auth.request_signing import MSignContext, optional_signed_request, require_signed_request
36 from musehub.main import app
37 from musehub.rate_limits import limiter
38
39 type _JobPayload = dict[str, str | int | bool | None]
40 import musehub.auth.failure_limiter as _failure_limiter
41
42
43 @pytest.fixture()
44 def _stub_push_background_tasks(monkeypatch: pytest.MonkeyPatch) -> None:
45 """Spy fixture for tests that need to assert enqueue_push_intel was called.
46
47 Opt-in only — add this as an explicit parameter to tests that need it.
48 Tests that exercise enqueue logic directly should NOT use this fixture;
49 they should call the real implementation and verify DB state.
50 """
51 import musehub.services.musehub_jobs as _jobs
52 import musehub.services.musehub_wire as _wire
53
54 _jobs._test_enqueued_calls.clear()
55
56 async def _spy_enqueue(
57 session: AsyncSession, repo_id: str, head: str, domain_id: str | None = None, branch: str = "", owner: str | None = None,
58 ) -> None:
59 _jobs._test_enqueued_calls.append((repo_id, "enqueue_push_intel", {"head": head, "domain_id": domain_id, "branch": branch}))
60
61 monkeypatch.setattr(_jobs, "enqueue_push_intel", _spy_enqueue)
62 monkeypatch.setattr(_wire, "enqueue_push_intel", _spy_enqueue)
63
64
65 @pytest.fixture(autouse=True)
66 def _tmp_objects_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
67 """Redirect object storage to the local MinIO instance for tests.
68
69 Uses the same BlobBackend (boto3 + S3-compatible) that runs in staging/prod,
70 pointed at MinIO on localhost:9000. Content-addressing gives natural test
71 isolation — tests cannot read each other's objects unless they wrote
72 identical bytes (correct behaviour).
73
74 Requires MinIO to be running: docker compose up minio createbuckets -d
75 """
76 import musehub.storage.backends as _backends_mod
77 import musehub.services.musehub_wire as _wire_svc
78 import musehub.services.musehub_wire_fetch as _wire_fetch_svc
79 import musehub.services.musehub_wire_push as _wire_push_svc
80 import musehub.services.musehub_wire_shared as _wire_shared_svc
81 import musehub.services.musehub_gc as _gc_svc
82 import musehub.api.routes.wire as _wire_route
83 from musehub.config import settings
84 from musehub.storage.backends import MemoryBackend
85
86 test_backend = MemoryBackend()
87 import importlib
88 _backend_fn = lambda *_a, **_kw: test_backend
89 monkeypatch.setattr(_backends_mod, "get_backend", _backend_fn)
90 monkeypatch.setattr(_wire_svc, "get_backend", _backend_fn)
91 monkeypatch.setattr(_wire_fetch_svc, "get_backend", _backend_fn)
92 monkeypatch.setattr(_wire_push_svc, "get_backend", _backend_fn)
93 monkeypatch.setattr(_wire_shared_svc, "get_backend", _backend_fn)
94 monkeypatch.setattr(_gc_svc, "get_backend", _backend_fn)
95 monkeypatch.setattr(_wire_route, "get_backend", _backend_fn)
96 try:
97 _repair_mod = importlib.import_module("deploy.repair_objects")
98 monkeypatch.setattr(_repair_mod, "get_backend", _backend_fn)
99 except ModuleNotFoundError:
100 pass
101 monkeypatch.setattr(settings, "musehub_repos_dir", str(tmp_path))
102
103 # Redirect the /releases StaticFiles mount to a temp dir so tests that
104 # hit /releases/* don't fail because /data/releases doesn't exist locally.
105 releases_dir = f"{tmp_path}/releases"
106 os.makedirs(releases_dir, exist_ok=True)
107 from musehub.main import app as _app
108 for _route in _app.routes:
109 if getattr(_route, "name", None) == "releases":
110 _static = _route.app # type: ignore[attr-defined]
111 _static.directory = releases_dir
112 _static.config_checked = False # force re-check with new dir
113 break
114
115
116 def pytest_configure(config: pytest.Config) -> None:
117 """Ensure asyncio_mode is auto so async fixtures work (e.g. in Docker when pyproject not in cwd)."""
118 if hasattr(config.option, "asyncio_mode") and config.option.asyncio_mode is None:
119 config.option.asyncio_mode = "auto"
120 # Suppress verbose library loggers that flood the test output with DEBUG lines.
121 for name in ("httpcore", "httpx", "sqlalchemy", "asyncio", "faker"):
122 logging.getLogger(name).setLevel(logging.WARNING)
123
124
125 @pytest.fixture(autouse=True)
126 def reset_rate_limiter() -> Generator[None, None, None]:
127 """Reset in-memory rate-limit counters before every test.
128
129 Without this, the shared MemoryStorage accumulates hits across all tests
130 in a session. Auth endpoints cap at 20/minute; running 30+ auth tests
131 back-to-back exhausts that budget and causes 429s for legitimate calls.
132 """
133 limiter.reset()
134 _failure_limiter._failures.clear()
135 yield
136
137
138 @pytest.fixture
139 def anyio_backend() -> str:
140 return "asyncio"
141
142
143 _WIRE_CONTEXT = MSignContext(
144 handle="test-user-wire",
145 identity_id="wire-test-user-id",
146 is_agent=False,
147 is_admin=False,
148 )
149
150
151 @pytest.fixture
152 def wire_headers() -> Generator[dict[str, str], None, None]:
153 """Override auth deps to inject a fake MSignContext for wire protocol tests."""
154 app.dependency_overrides[require_signed_request] = lambda: _WIRE_CONTEXT
155 app.dependency_overrides[optional_signed_request] = lambda: _WIRE_CONTEXT
156 yield {
157 "Content-Type": "application/x-msgpack",
158 "Accept": "application/x-msgpack",
159 }
160 app.dependency_overrides.pop(require_signed_request, None)
161 app.dependency_overrides.pop(optional_signed_request, None)
162
163
164 @pytest.fixture(autouse=True)
165 def _reset_variation_store() -> Generator[None, None, None]:
166 """Reset the singleton VariationStore between tests to prevent cross-test pollution.
167
168 Gracefully no-ops if the variation module has been removed (MuseHub extraction).
169 """
170 yield
171 try:
172 from musehub.variation.storage.variation_store import reset_variation_store
173 reset_variation_store()
174 except ModuleNotFoundError:
175 pass
176
177
178 _TEST_DATABASE_URL = os.environ.get(
179 "TEST_DATABASE_URL",
180 "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub_test",
181 )
182
183 # Sync URL for psycopg2 — used by the session-scoped schema fixture.
184 _TEST_DATABASE_URL_SYNC = _TEST_DATABASE_URL.replace("+asyncpg", "")
185
186 # Shared async engine for the whole test session (NullPool = no connection
187 # reuse between tests, but engine object creation is cheap so we create it
188 # once and share it).
189 _TEST_ENGINE = create_async_engine(_TEST_DATABASE_URL, poolclass=NullPool)
190 _TEST_SESSION_FACTORY = async_sessionmaker(
191 bind=_TEST_ENGINE,
192 class_=AsyncSession,
193 expire_on_commit=False,
194 )
195
196 # Pre-compute the TRUNCATE statement for all tables so we don't rebuild it
197 # each test. Reversed sorted_tables respects FK dependency order.
198 _TRUNCATE_SQL = "TRUNCATE {} RESTART IDENTITY CASCADE".format(
199 ", ".join(t.name for t in reversed(Base.metadata.sorted_tables))
200 )
201
202
203 @pytest.fixture(scope="session", autouse=True)
204 def _db_schema() -> Generator[None, None, None]:
205 """Create the test schema once per test session using a sync psycopg2 engine.
206
207 This replaces per-test drop_all/create_all (which took ~3 s per test on
208 PostgreSQL) with a single DDL pass at session start and end. Individual
209 tests get a clean slate via TRUNCATE in the db_session fixture instead.
210 """
211 from sqlalchemy import create_engine as _create_engine
212
213 from sqlalchemy import text as _text
214
215 # connect_timeout=10: if postgres is unreachable or still starting (e.g.
216 # Docker container not ready), fail fast instead of blocking in C forever.
217 # Without this, Ctrl+C cannot kill the process because psycopg2's socket
218 # read is a non-interruptible C-level call.
219 sync_engine = _create_engine(
220 _TEST_DATABASE_URL_SYNC,
221 connect_args={"connect_timeout": 10},
222 )
223 # Terminate any leftover connections from interrupted test runs before
224 # running drop_all. If a previous pytest session was killed with SIGQUIT
225 # (Ctrl+\) it leaves postgres backends idle-in-transaction holding locks on
226 # the test tables. drop_all then waits forever for those locks, which
227 # makes the next test run freeze with Ctrl+C unresponsive.
228 with sync_engine.connect() as _conn:
229 _conn.execute(_text(
230 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
231 "WHERE datname = current_database() AND pid != pg_backend_pid()"
232 ))
233 _conn.commit()
234 # Dispose so drop_all / create_all get fresh connections — the
235 # pg_terminate_backend above may have killed pooled connections.
236 sync_engine.dispose()
237 sync_engine2 = _create_engine(
238 _TEST_DATABASE_URL_SYNC,
239 connect_args={"connect_timeout": 10},
240 )
241 Base.metadata.drop_all(sync_engine2)
242 sync_engine2.dispose()
243 sync_engine2 = _create_engine(
244 _TEST_DATABASE_URL_SYNC,
245 connect_args={"connect_timeout": 10},
246 )
247 Base.metadata.create_all(sync_engine2)
248 # Seed claim types (mirrors alembic/versions/0043 seed logic)
249 from musehub.services.musehub_attestations import _CLAIM_TYPES
250 with sync_engine2.connect() as _conn:
251 for ct in _CLAIM_TYPES.values():
252 _conn.execute(_text(
253 "INSERT INTO musehub_attestation_claim_types "
254 "(type_key, category, label, description, valid_scopes, introduced_at) "
255 "VALUES (:key, :cat, :label, :desc, :scopes, NOW()) "
256 "ON CONFLICT (type_key) DO NOTHING"
257 ), {"key": ct["type_key"], "cat": ct["category"], "label": ct["label"],
258 "desc": ct["description"], "scopes": ct["valid_scopes"]})
259 _conn.commit()
260 sync_engine2.dispose()
261 yield
262 sync_engine3 = _create_engine(
263 _TEST_DATABASE_URL_SYNC,
264 connect_args={"connect_timeout": 10},
265 )
266 with sync_engine3.connect() as _conn:
267 _conn.execute(_text(
268 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
269 "WHERE datname = current_database() AND pid != pg_backend_pid()"
270 ))
271 _conn.commit()
272 sync_engine3.dispose()
273 sync_engine4 = _create_engine(
274 _TEST_DATABASE_URL_SYNC,
275 connect_args={"connect_timeout": 10},
276 )
277 Base.metadata.drop_all(sync_engine4)
278 sync_engine4.dispose()
279
280
281 @pytest_asyncio.fixture
282 async def db_session(_db_schema: None) -> AsyncGenerator[AsyncSession, None]:
283 """Provide a clean DB session for each test.
284
285 Tables are truncated (not dropped/recreated) between tests — a single
286 TRUNCATE … CASCADE is ~100× faster than drop_all + create_all on
287 PostgreSQL, cutting per-test overhead from ~3 s to ~30 ms.
288 """
289 from sqlalchemy import text as _text
290
291 async with _TEST_ENGINE.begin() as conn:
292 # Terminate ALL other backends before TRUNCATE. A failed test can
293 # leave a connection in any state (idle in transaction, idle in
294 # transaction (aborted), active) — filtering by state misses some
295 # cases and causes deadlocks when TRUNCATE races the stale transaction.
296 await conn.execute(_text(
297 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
298 "WHERE datname = current_database() AND pid != pg_backend_pid()"
299 ))
300 await conn.execute(_text(_TRUNCATE_SQL))
301 # Re-seed reference tables that are wiped by TRUNCATE CASCADE.
302 from musehub.services.musehub_attestations import _CLAIM_TYPES
303 for _ct in _CLAIM_TYPES.values():
304 await conn.execute(_text(
305 "INSERT INTO musehub_attestation_claim_types "
306 "(type_key, category, label, description, valid_scopes, introduced_at) "
307 "VALUES (:key, :cat, :label, :desc, :scopes, NOW()) "
308 "ON CONFLICT (type_key) DO NOTHING"
309 ), {"key": _ct["type_key"], "cat": _ct["category"], "label": _ct["label"],
310 "desc": _ct["description"], "scopes": _ct["valid_scopes"]})
311
312 old_engine = database._engine
313 old_factory = database._async_session_factory
314 database._engine = _TEST_ENGINE
315 database._async_session_factory = _TEST_SESSION_FACTORY
316 try:
317 async with _TEST_SESSION_FACTORY() as session:
318 async def override_get_db() -> AsyncGenerator[AsyncSession, None]:
319 # Each request gets its own session so concurrent requests
320 # (e.g. stress tests) don't share a single connection and
321 # raise "concurrent operations are not permitted".
322 # All test setup data is committed, so independent sessions
323 # see it without needing to share the test session.
324 async with _TEST_SESSION_FACTORY() as req_session:
325 yield req_session
326 app.dependency_overrides[get_db] = override_get_db
327 yield session
328 app.dependency_overrides.clear()
329 finally:
330 database._engine = old_engine
331 database._async_session_factory = old_factory
332
333
334 @pytest_asyncio.fixture
335 async def session_factory(_db_schema: None) -> async_sessionmaker:
336 """Expose the test session factory for tests needing multiple concurrent sessions."""
337 return _TEST_SESSION_FACTORY
338
339
340 class _Asgi24Wrapper:
341 """Inject spec_version='2.4' into every HTTP scope.
342
343 ASGI 2.4 tells Starlette to skip listen_for_disconnect on streaming responses,
344 which is required for correct behaviour with async body reads.
345 """
346
347 def __init__(self, app: typing.Any) -> None:
348 self._app = app
349
350 async def __call__(self, scope: typing.MutableMapping[str, typing.Any], receive: typing.Any, send: typing.Any) -> None:
351 if scope.get("type") == "http":
352 scope.setdefault("asgi", {})["spec_version"] = "2.4"
353 await self._app(scope, receive, send)
354
355
356 @pytest_asyncio.fixture
357 async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]:
358 """Create an async test client. Depends on db_session so auth revocation check uses test DB."""
359 transport = ASGITransport(app=_Asgi24Wrapper(app))
360 async with AsyncClient(transport=transport, base_url="http://test") as ac:
361 yield ac
362
363
364 # -----------------------------------------------------------------------------
365 # Auth fixtures for API contract and integration tests
366 # Uses dependency_overrides to inject a fake MSignContext so tests don't need
367 # real Ed25519 key pairs. Only active for tests that request auth_headers.
368 # -----------------------------------------------------------------------------
369
370 _TEST_IDENTITY_ID = compute_identity_id(b"testuser")
371 _TEST_HANDLE = "testuser"
372
373 _TEST_CONTEXT = MSignContext(
374 handle=_TEST_HANDLE,
375 identity_id=_TEST_IDENTITY_ID,
376 is_agent=False,
377 is_admin=False,
378 )
379
380
381 @pytest_asyncio.fixture
382 async def test_user(db_session: AsyncSession) -> MusehubIdentity:
383 """Create a test identity in the DB for authenticated route tests."""
384 identity = MusehubIdentity(
385 identity_id=_TEST_IDENTITY_ID,
386 handle=_TEST_HANDLE,
387 display_name="Test User",
388 identity_type="human",
389 )
390 db_session.add(identity)
391 await db_session.commit()
392 await db_session.refresh(identity)
393 # Close the autobegin transaction started by refresh() so subsequent
394 # test-body commits don't hit "another operation is in progress".
395 await db_session.commit()
396 return identity
397
398
399 @pytest.fixture
400 def auth_headers(test_user: MusehubIdentity) -> Generator[dict[str, str], None, None]:
401 """Override auth dependencies to inject a fake MSignContext for the test duration.
402
403 Tests that need to verify 401 behaviour for *unauthenticated* requests should
404 use a separate client call without passing ``auth_headers`` — note that while
405 this fixture is active the app-level dep overrides are set globally, so any
406 request made within the same test function will be treated as authenticated.
407 Tests that need to distinguish authed/unauthed flows within one function should
408 use ``app.dependency_overrides`` directly or split into two test functions.
409 """
410 app.dependency_overrides[require_signed_request] = lambda: _TEST_CONTEXT
411 app.dependency_overrides[optional_signed_request] = lambda: _TEST_CONTEXT
412 yield {"Content-Type": "application/json"}
413 app.dependency_overrides.pop(require_signed_request, None)
414 app.dependency_overrides.pop(optional_signed_request, None)
415
416
417 # ---------------------------------------------------------------------------
418 # Symbol-detail fixtures
419 # Used by test_symbol_detail_phase1.py (T2–T7 tiers).
420 # ---------------------------------------------------------------------------
421
422 import datetime as _dt
423 import contextlib as _contextlib
424 import time as _time
425 from muse.core.types import blob_id
426
427
428 def _utc_now() -> _dt.datetime:
429 return _dt.datetime.now(tz=_dt.timezone.utc)
430
431
432 async def _make_repo_row(session: AsyncSession, owner: str, slug: str) -> "MusehubRepo":
433 from musehub.db.musehub_repo_models import MusehubRepo
434 from musehub.core.genesis import compute_identity_id, compute_repo_id
435 owner_user_id = compute_identity_id(owner.encode())
436 created_at = _utc_now()
437 repo_id = compute_repo_id(owner_user_id, slug, "code", created_at.isoformat())
438 repo = MusehubRepo(
439 repo_id=repo_id,
440 name=slug,
441 owner=owner,
442 slug=slug,
443 visibility="public",
444 owner_user_id=owner_user_id,
445 description="",
446 tags=[],
447 created_at=created_at,
448 )
449 session.add(repo)
450 await session.commit()
451 return repo
452
453
454 async def _make_commit_row(session: AsyncSession, repo_id: str, commit_id: str, **kwargs: JSONValue) -> None:
455 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef
456 defaults = dict(
457 commit_id=commit_id,
458 branch="dev",
459 parent_ids=[],
460 message="feat: test",
461 author="gabriel",
462 timestamp=_utc_now(),
463 snapshot_id=blob_id(f"snap-{commit_id}".encode()),
464 agent_id="claude-code",
465 model_id="claude-sonnet-4-6",
466 commit_branch="task/test",
467 signature="",
468 )
469 defaults.update(kwargs)
470 session.add(MusehubCommit(**defaults))
471 await session.flush()
472 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id))
473 await session.commit()
474
475
476 async def _make_history_entry(
477 session: AsyncSession, repo_id: str, address: str, commit_id: str,
478 op: str = "add", content_id: str | None = None,
479 committed_at: _dt.datetime | None = None,
480 message: str | None = None,
481 ) -> None:
482 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
483 session.add(MusehubSymbolHistoryEntry(
484 repo_id=repo_id,
485 address=address,
486 commit_id=commit_id,
487 committed_at=committed_at or _utc_now(),
488 author="gabriel",
489 op=op,
490 content_id=content_id or blob_id(f"body-{address}-{commit_id}".encode()),
491 message=message,
492 ))
493 await session.commit()
494
495
496 @pytest_asyncio.fixture
497 async def repo_fixture(db_session: AsyncSession) -> tuple[str, str]:
498 """Create a bare repo with no symbol history. Returns (owner, slug)."""
499 repo = await _make_repo_row(db_session, "gabriel", "test-repo")
500 return ("gabriel", repo.slug)
501
502
503 @pytest_asyncio.fixture
504 async def seed_symbol(db_session: AsyncSession) -> tuple[str, str, str]:
505 """Create a repo with one symbol history entry and a commit. Returns (owner, slug, address)."""
506 owner, slug = "gabriel", "seed-repo"
507 address = "src/core.py::compute"
508 repo = await _make_repo_row(db_session, owner, slug)
509 commit_id = blob_id(f"commit-seed-{slug}".encode())
510 await _make_commit_row(db_session, repo.repo_id, commit_id)
511 await _make_history_entry(db_session, repo.repo_id, address, commit_id)
512 return (owner, slug, address)
513
514
515 @pytest_asyncio.fixture
516 async def seed_type_intel(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
517 """Add a MusehubIntelType row for the seeded symbol."""
518 from musehub.db.musehub_intel_models import MusehubIntelType
519 from musehub.db.musehub_repo_models import MusehubRepo
520 from sqlalchemy import select
521 owner, slug, address = seed_symbol
522 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
523 repo = result.scalar_one()
524 db_session.add(MusehubIntelType(
525 repo_id=repo.repo_id,
526 address=address,
527 kind="function",
528 return_is_any=False,
529 params_total=2,
530 params_annotated=2,
531 params_with_any=0,
532 type_score=0.95,
533 ref="dev",
534 ))
535 await db_session.commit()
536
537
538 @pytest_asyncio.fixture
539 async def seed_sym_intel(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
540 """Add a MusehubSymbolIntel row for the seeded symbol."""
541 from musehub.db.musehub_intel_models import MusehubSymbolIntel
542 from musehub.db.musehub_repo_models import MusehubRepo
543 from sqlalchemy import select
544 owner, slug, address = seed_symbol
545 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
546 repo = result.scalar_one()
547 db_session.add(MusehubSymbolIntel(
548 repo_id=repo.repo_id,
549 address=address,
550 churn=5,
551 churn_30d=2,
552 churn_90d=4,
553 blast=3,
554 blast_direct=2,
555 blast_cross=1,
556 blast_top=[],
557 author_count=1,
558 gravity=0.1,
559 weekly=[],
560 gravity_pct=10.0,
561 gravity_direct_dependents=2,
562 gravity_transitive_dependents=3,
563 gravity_max_depth=2,
564 ))
565 await db_session.commit()
566
567
568 @pytest_asyncio.fixture
569 async def seed_api_intel(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
570 """Add a MusehubIntelApiSurface row for the seeded symbol."""
571 from musehub.db.musehub_intel_models import MusehubIntelApiSurface
572 from musehub.db.musehub_repo_models import MusehubRepo
573 from sqlalchemy import select
574 owner, slug, address = seed_symbol
575 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
576 repo = result.scalar_one()
577 db_session.add(MusehubIntelApiSurface(
578 repo_id=repo.repo_id,
579 address=address,
580 kind="function",
581 visibility="public",
582 ref="dev",
583 ))
584 await db_session.commit()
585
586
587 @pytest_asyncio.fixture
588 async def seed_many_refactor_events(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
589 """Add 25 MusehubIntelRefactorEvent rows for the seeded symbol."""
590 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
591 from musehub.db.musehub_repo_models import MusehubRepo
592 from sqlalchemy import select
593 owner, slug, address = seed_symbol
594 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
595 repo = result.scalar_one()
596 for i in range(25):
597 db_session.add(MusehubIntelRefactorEvent(
598 event_id=blob_id(f"refactor-{i}-{slug}".encode()),
599 repo_id=repo.repo_id,
600 kind="implementation",
601 address=address,
602 detail=f"refactor event {i}",
603 commit_id=blob_id(f"rc-{i}".encode()),
604 committed_at=_utc_now(),
605 ))
606 await db_session.commit()
607
608
609 @pytest_asyncio.fixture
610 async def seed_refactor_event(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
611 """Add one MusehubIntelRefactorEvent with kind=implementation."""
612 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
613 from musehub.db.musehub_repo_models import MusehubRepo
614 from sqlalchemy import select
615 owner, slug, address = seed_symbol
616 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
617 repo = result.scalar_one()
618 db_session.add(MusehubIntelRefactorEvent(
619 event_id=blob_id(f"refactor-single-{slug}".encode()),
620 repo_id=repo.repo_id,
621 kind="implementation",
622 address=address,
623 detail="extracted helper",
624 commit_id=blob_id(f"rc-single-{slug}".encode()),
625 committed_at=_utc_now(),
626 ))
627 await db_session.commit()
628
629
630 @pytest_asyncio.fixture
631 async def seed_refactor_event_with_xss(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
632 """Add a refactor event whose detail field contains an XSS payload."""
633 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
634 from musehub.db.musehub_repo_models import MusehubRepo
635 from sqlalchemy import select
636 owner, slug, address = seed_symbol
637 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
638 repo = result.scalar_one()
639 db_session.add(MusehubIntelRefactorEvent(
640 event_id=blob_id(f"refactor-xss-{slug}".encode()),
641 repo_id=repo.repo_id,
642 kind="implementation",
643 address=address,
644 detail='<img src=x onerror=alert(1)>',
645 commit_id=blob_id(f"rc-xss-{slug}".encode()),
646 committed_at=_utc_now(),
647 ))
648 await db_session.commit()
649
650
651 @pytest_asyncio.fixture
652 async def seed_symbol_with_xss_commit(db_session: AsyncSession) -> tuple[str, str, str]:
653 """Create a symbol whose commit message contains an XSS payload."""
654 owner, slug = "gabriel", "xss-repo"
655 address = "src/evil.py::fn"
656 repo = await _make_repo_row(db_session, owner, slug)
657 commit_id = blob_id(f"commit-xss-{slug}".encode())
658 await _make_commit_row(
659 db_session, repo.repo_id, commit_id,
660 message='<img src=x onerror=alert(1)> feat: xss test',
661 )
662 await _make_history_entry(db_session, repo.repo_id, address, commit_id)
663 return (owner, slug, address)
664
665
666 @pytest_asyncio.fixture
667 async def seed_symbol_with_large_history(db_session: AsyncSession) -> tuple[str, str, str]:
668 """Create a symbol with 200 history entries (stress test — not 10k, keeps test fast)."""
669 owner, slug = "gabriel", "large-history-repo"
670 address = "src/big.py::process"
671 repo = await _make_repo_row(db_session, owner, slug)
672 for i in range(200):
673 cid = blob_id(f"commit-large-{i}-{slug}".encode())
674 await _make_commit_row(db_session, repo.repo_id, cid)
675 await _make_history_entry(db_session, repo.repo_id, address, cid, op="modify")
676 return (owner, slug, address)
677
678
679 @pytest_asyncio.fixture
680 async def seed_symbol_high_coupling(db_session: AsyncSession) -> tuple[str, str, str]:
681 """Create a symbol that co-changes with many partners."""
682 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
683 owner, slug = "gabriel", "coupling-repo"
684 address = "src/hub.py::dispatch"
685 repo = await _make_repo_row(db_session, owner, slug)
686 commit_id = blob_id(f"commit-coupling-{slug}".encode())
687 await _make_commit_row(db_session, repo.repo_id, commit_id)
688 await _make_history_entry(db_session, repo.repo_id, address, commit_id)
689 # 25 co-changing partners in the same commit
690 for i in range(25):
691 partner = f"src/partner_{i}.py::fn"
692 db_session.add(MusehubSymbolHistoryEntry(
693 repo_id=repo.repo_id,
694 address=partner,
695 commit_id=commit_id,
696 committed_at=_utc_now(),
697 author="gabriel",
698 op="modify",
699 content_id=blob_id(f"body-partner-{i}".encode()),
700 ))
701 await db_session.commit()
702 return (owner, slug, address)
703
704
705 @pytest_asyncio.fixture
706 async def seed_symbol_with_clones(db_session: AsyncSession) -> tuple[str, str, str]:
707 """Create a symbol with a clone entry."""
708 from musehub.db.musehub_intel_models import MusehubHashOccurrenceEntry
709 owner, slug = "gabriel", "clones-repo"
710 address = "src/original.py::fn"
711 repo = await _make_repo_row(db_session, owner, slug)
712 commit_id = blob_id(f"commit-clone-{slug}".encode())
713 content_id = blob_id(f"shared-body-{slug}".encode())
714 await _make_commit_row(db_session, repo.repo_id, commit_id)
715 await _make_history_entry(db_session, repo.repo_id, address, commit_id, content_id=content_id)
716 # Clone: same content_id, different address
717 db_session.add(MusehubHashOccurrenceEntry(
718 repo_id=repo.repo_id,
719 content_id=content_id,
720 address="src/copy.py::fn",
721 ))
722 db_session.add(MusehubHashOccurrenceEntry(
723 repo_id=repo.repo_id,
724 content_id=content_id,
725 address=address,
726 ))
727 await db_session.commit()
728 return (owner, slug, address)
729
730
731 @pytest.fixture
732 def benchmark_timer() -> typing.Callable[[float], typing.ContextManager[None]]:
733 """Context manager that asserts elapsed time stays under max_ms."""
734 @_contextlib.contextmanager
735 def _timer(max_ms: float) -> Generator[None, None, None]:
736 start = _time.monotonic()
737 yield
738 elapsed_ms = (_time.monotonic() - start) * 1000
739 assert elapsed_ms < max_ms, f"took {elapsed_ms:.0f}ms, limit {max_ms}ms"
740 return _timer
741
742
743 # ---------------------------------------------------------------------------
744 # Pagination fixtures
745 # Used by test_symbol_detail_pagination.py.
746 # ---------------------------------------------------------------------------
747
748 async def _seed_history_entries(
749 db_session: AsyncSession,
750 owner: str,
751 slug: str,
752 count: int,
753 ) -> tuple[str, str, str]:
754 """Create a repo + symbol with *count* history entries spaced 1 hour apart.
755
756 Commit messages are ``entry-{i}`` for i in 0..count-1.
757 entry-0 is the oldest, entry-(count-1) is the newest.
758 Returns (owner, slug, address).
759 """
760 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
761 address = "src/core.py::paginate_fn"
762 repo = await _make_repo_row(db_session, owner, slug)
763 base_ts = _dt.datetime(2026, 1, 1, 0, 0, 0, tzinfo=_dt.timezone.utc)
764 for i in range(count):
765 committed_at = base_ts + _dt.timedelta(hours=i)
766 commit_id = blob_id(f"commit-hist-{i}-{slug}".encode())
767 await _make_commit_row(
768 db_session, repo.repo_id, commit_id,
769 message=f"entry-{i}",
770 timestamp=committed_at,
771 )
772 await _make_history_entry(
773 db_session, repo.repo_id, address, commit_id,
774 op="modify", committed_at=committed_at,
775 message=f"entry-{i}",
776 )
777 return (owner, slug, address)
778
779
780 async def _seed_coupling_partners(
781 db_session: AsyncSession,
782 owner: str,
783 slug: str,
784 partner_count: int,
785 ) -> tuple[str, str, str]:
786 """Create a repo + symbol with *partner_count* coupling partners.
787
788 The target symbol appears in all *partner_count* commits.
789 Partner i appears in commits i..(partner_count-1), giving it
790 shared_commits = partner_count - i (descending: partner_0 has the most).
791 Returns (owner, slug, address).
792 """
793 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
794 address = "src/hub.py::dispatch"
795 repo = await _make_repo_row(db_session, owner, slug)
796 base_ts = _dt.datetime(2026, 2, 1, 0, 0, 0, tzinfo=_dt.timezone.utc)
797 for j in range(partner_count):
798 committed_at = base_ts + _dt.timedelta(hours=j)
799 commit_id = blob_id(f"commit-coup-{j}-{slug}".encode())
800 await _make_commit_row(
801 db_session, repo.repo_id, commit_id,
802 message=f"coupling-commit-{j}",
803 timestamp=committed_at,
804 )
805 # Target symbol in every commit
806 db_session.add(MusehubSymbolHistoryEntry(
807 repo_id=repo.repo_id,
808 address=address,
809 commit_id=commit_id,
810 committed_at=committed_at,
811 author="gabriel",
812 op="modify",
813 content_id=blob_id(f"body-target-{j}-{slug}".encode()),
814 ))
815 # Partner i appears in commit j only when i <= j
816 for i in range(j + 1):
817 partner = f"src/partner_{i}.py::fn_{i}"
818 db_session.add(MusehubSymbolHistoryEntry(
819 repo_id=repo.repo_id,
820 address=partner,
821 commit_id=commit_id,
822 committed_at=committed_at,
823 author="gabriel",
824 op="modify",
825 content_id=blob_id(f"body-partner-{i}-{j}-{slug}".encode()),
826 ))
827 await db_session.commit()
828 # Insert pre-computed coupling rows: partner_i appears in partner_count-i commits.
829 from musehub.db.musehub_intel_models import MusehubSymbolCoupling
830 for i in range(partner_count):
831 db_session.add(MusehubSymbolCoupling(
832 repo_id=repo.repo_id,
833 address=address,
834 co_address=f"src/partner_{i}.py::fn_{i}",
835 shared_commits=partner_count - i,
836 ))
837 await db_session.commit()
838 return (owner, slug, address)
839
840
841 @pytest_asyncio.fixture
842 async def seed_symbol_with_26_history(db_session: AsyncSession) -> tuple[str, str, str]:
843 return await _seed_history_entries(db_session, "gabriel", "hist26-repo", 26)
844
845
846 @pytest_asyncio.fixture
847 async def seed_symbol_with_exactly_10_history(db_session: AsyncSession) -> tuple[str, str, str]:
848 return await _seed_history_entries(db_session, "gabriel", "hist10-repo", 10)
849
850
851 @pytest_asyncio.fixture
852 async def seed_symbol_with_11_history(db_session: AsyncSession) -> tuple[str, str, str]:
853 return await _seed_history_entries(db_session, "gabriel", "hist11-repo", 11)
854
855
856 @pytest_asyncio.fixture
857 async def seed_symbol_high_coupling_40(db_session: AsyncSession) -> tuple[str, str, str]:
858 return await _seed_coupling_partners(db_session, "gabriel", "coup40-repo", 40)
859
860
861 @pytest_asyncio.fixture
862 async def seed_symbol_with_exactly_15_coupling(db_session: AsyncSession) -> tuple[str, str, str]:
863 return await _seed_coupling_partners(db_session, "gabriel", "coup15-repo", 15)
864
865
866 @pytest_asyncio.fixture
867 async def seed_symbol_with_16_coupling(db_session: AsyncSession) -> tuple[str, str, str]:
868 return await _seed_coupling_partners(db_session, "gabriel", "coup16-repo", 16)
869
870
871 @pytest_asyncio.fixture
872 async def seed_symbol_with_26_history_and_40_coupling(
873 db_session: AsyncSession,
874 ) -> tuple[str, str, str]:
875 """26 history entries + 26 coupling partners (from the same commits).
876
877 The target appears in all 26 commits. Partner_i appears in commits i..25,
878 giving shared_commits = 26 - i (descending). This yields 26 partners with
879 positive shared counts → 2 coupling pages (15 + 11) and 3 history pages
880 (10 + 10 + 6). Keeping partners in the history commits avoids inflating the
881 target's change_count with extra coupling-only commits.
882 """
883 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
884 owner, slug = "gabriel", "hist26-coup26-repo"
885 address = "src/core.py::paginate_fn"
886 repo = await _make_repo_row(db_session, owner, slug)
887 base_ts = _dt.datetime(2026, 1, 1, 0, 0, 0, tzinfo=_dt.timezone.utc)
888
889 # 26 commits — target appears in all; partner_i appears in commits i..25.
890 for i in range(26):
891 committed_at = base_ts + _dt.timedelta(hours=i)
892 commit_id = blob_id(f"commit-combo-{i}-{slug}".encode())
893 await _make_commit_row(
894 db_session, repo.repo_id, commit_id,
895 message=f"entry-{i}", timestamp=committed_at,
896 )
897 await _make_history_entry(
898 db_session, repo.repo_id, address, commit_id,
899 op="modify", committed_at=committed_at,
900 message=f"entry-{i}",
901 )
902 # Every partner whose index <= i is added to this commit.
903 # Partner_j appears in commits j..25 → shared = 26 - j.
904 for j in range(i + 1):
905 partner = f"src/partner_{j}.py::fn_{j}"
906 db_session.add(MusehubSymbolHistoryEntry(
907 repo_id=repo.repo_id,
908 address=partner,
909 commit_id=commit_id,
910 committed_at=committed_at,
911 author="gabriel",
912 op="modify",
913 content_id=blob_id(f"body-combo-partner-{j}-{i}-{slug}".encode()),
914 ))
915 await db_session.commit()
916 # Insert pre-computed coupling rows: partner_j appears in 26-j commits.
917 from musehub.db.musehub_intel_models import MusehubSymbolCoupling
918 for j in range(26):
919 db_session.add(MusehubSymbolCoupling(
920 repo_id=repo.repo_id,
921 address=address,
922 co_address=f"src/partner_{j}.py::fn_{j}",
923 shared_commits=26 - j,
924 ))
925 await db_session.commit()
926 return (owner, slug, address)
File History 2 commits
sha256:77fc45e703f90c0d603ecb1a0ce21ff21095728ca7dd0e146eb5e966c8f9fcc9 more passing tests from full test suite fun Human patch 23 hours ago
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 15 days ago