gabriel / musehub public
conftest.py python
932 lines 36.9 KB
Raw
sha256:77fc45e703f90c0d603ecb1a0ce21ff21095728ca7dd0e146eb5e966c8f9fcc9 more passing tests from full test suite fun Human patch 7 days ago
1 """Pytest configuration and fixtures."""
2 from __future__ import annotations
3
4 from pathlib import Path
5
6 import logging
7 import os
8 import typing
9 from collections.abc import AsyncGenerator, Generator
10
11 if not os.environ.get("MUSE_ENV"):
12 os.environ["MUSE_ENV"] = "test"
13
14 import pytest
15 import pytest_asyncio
16 from httpx import AsyncClient, ASGITransport
17 from sqlalchemy.ext.asyncio import (
18 AsyncSession,
19 async_sessionmaker,
20 create_async_engine,
21 )
22 from sqlalchemy.pool import NullPool
23
24 from musehub.core.genesis import compute_identity_id
25 from musehub.db import database
26 from musehub.db.database import Base, get_db
27 from musehub.db.musehub_identity_models import MusehubIdentity
28 from musehub.types.json_types import JSONValue
29 # Force all ORM models into Base.metadata before any create_all/drop_all.
30 # muse_cli_models is only imported inside init_db() in production; without
31 # this explicit import, Base.metadata is non-deterministic in tests (depends
32 # on import order), causing drop_all to miss tables that create_all later
33 # tries to create — resulting in duplicate-key errors on pg_type.
34 import musehub.db.muse_cli_models as _muse_cli_models # noqa: F401
35 from musehub.auth.request_signing import MSignContext, optional_signed_request, require_signed_request
36 from musehub.main import app
37 from musehub.rate_limits import limiter
38
39 type _JobPayload = dict[str, str | int | bool | None]
40 import musehub.auth.failure_limiter as _failure_limiter
41
42
43 @pytest.fixture()
44 def _stub_push_background_tasks(monkeypatch: pytest.MonkeyPatch) -> None:
45 """Spy fixture for tests that need to assert enqueue_push_intel was called.
46
47 Opt-in only — add this as an explicit parameter to tests that need it.
48 Tests that exercise enqueue logic directly should NOT use this fixture;
49 they should call the real implementation and verify DB state.
50 """
51 import musehub.services.musehub_jobs as _jobs
52 import musehub.services.musehub_wire as _wire
53
54 _jobs._test_enqueued_calls.clear()
55
56 async def _spy_enqueue(
57 session: AsyncSession, repo_id: str, head: str, domain_id: str | None = None, branch: str = "", owner: str | None = None,
58 ) -> None:
59 _jobs._test_enqueued_calls.append((repo_id, "enqueue_push_intel", {"head": head, "domain_id": domain_id, "branch": branch}))
60
61 monkeypatch.setattr(_jobs, "enqueue_push_intel", _spy_enqueue)
62 monkeypatch.setattr(_wire, "enqueue_push_intel", _spy_enqueue)
63
64
65 @pytest.fixture(autouse=True)
66 def _tmp_objects_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
67 """Redirect object storage to the local MinIO instance for tests.
68
69 Uses the same BlobBackend (boto3 + S3-compatible) that runs in staging/prod,
70 pointed at MinIO on localhost:9000. Content-addressing gives natural test
71 isolation — tests cannot read each other's objects unless they wrote
72 identical bytes (correct behaviour).
73
74 Requires MinIO to be running: docker compose up minio createbuckets -d
75 """
76 import musehub.storage.backends as _backends_mod
77 import musehub.services.musehub_wire as _wire_svc
78 import musehub.services.musehub_wire_fetch as _wire_fetch_svc
79 import musehub.services.musehub_wire_push as _wire_push_svc
80 import musehub.services.musehub_wire_shared as _wire_shared_svc
81 import musehub.services.musehub_gc as _gc_svc
82 import musehub.api.routes.wire as _wire_route
83 from musehub.config import settings
84 from musehub.storage.backends import BlobBackend
85
86 test_backend = BlobBackend(
87 bucket="muse-objects",
88 endpoint_url="http://localhost:9000",
89 access_key_id="minioadmin",
90 secret_access_key="minioadmin",
91 region="us-east-1",
92 )
93 import importlib
94 _backend_fn = lambda *_a, **_kw: test_backend
95 monkeypatch.setattr(_backends_mod, "get_backend", _backend_fn)
96 monkeypatch.setattr(_wire_svc, "get_backend", _backend_fn)
97 monkeypatch.setattr(_wire_fetch_svc, "get_backend", _backend_fn)
98 monkeypatch.setattr(_wire_push_svc, "get_backend", _backend_fn)
99 monkeypatch.setattr(_wire_shared_svc, "get_backend", _backend_fn)
100 monkeypatch.setattr(_gc_svc, "get_backend", _backend_fn)
101 monkeypatch.setattr(_wire_route, "get_backend", _backend_fn)
102 try:
103 _repair_mod = importlib.import_module("deploy.repair_objects")
104 monkeypatch.setattr(_repair_mod, "get_backend", _backend_fn)
105 except ModuleNotFoundError:
106 pass
107 monkeypatch.setattr(settings, "musehub_repos_dir", str(tmp_path))
108
109 # Redirect the /releases StaticFiles mount to a temp dir so tests that
110 # hit /releases/* don't fail because /data/releases doesn't exist locally.
111 releases_dir = f"{tmp_path}/releases"
112 os.makedirs(releases_dir, exist_ok=True)
113 from musehub.main import app as _app
114 for _route in _app.routes:
115 if getattr(_route, "name", None) == "releases":
116 _static = _route.app # type: ignore[attr-defined]
117 _static.directory = releases_dir
118 _static.config_checked = False # force re-check with new dir
119 break
120
121
122 def pytest_configure(config: pytest.Config) -> None:
123 """Ensure asyncio_mode is auto so async fixtures work (e.g. in Docker when pyproject not in cwd)."""
124 if hasattr(config.option, "asyncio_mode") and config.option.asyncio_mode is None:
125 config.option.asyncio_mode = "auto"
126 # Suppress verbose library loggers that flood the test output with DEBUG lines.
127 for name in ("httpcore", "httpx", "sqlalchemy", "asyncio", "faker"):
128 logging.getLogger(name).setLevel(logging.WARNING)
129
130
131 @pytest.fixture(autouse=True)
132 def reset_rate_limiter() -> Generator[None, None, None]:
133 """Reset in-memory rate-limit counters before every test.
134
135 Without this, the shared MemoryStorage accumulates hits across all tests
136 in a session. Auth endpoints cap at 20/minute; running 30+ auth tests
137 back-to-back exhausts that budget and causes 429s for legitimate calls.
138 """
139 limiter.reset()
140 _failure_limiter._failures.clear()
141 yield
142
143
144 @pytest.fixture
145 def anyio_backend() -> str:
146 return "asyncio"
147
148
149 _WIRE_CONTEXT = MSignContext(
150 handle="test-user-wire",
151 identity_id="wire-test-user-id",
152 is_agent=False,
153 is_admin=False,
154 )
155
156
157 @pytest.fixture
158 def wire_headers() -> Generator[dict[str, str], None, None]:
159 """Override auth deps to inject a fake MSignContext for wire protocol tests."""
160 app.dependency_overrides[require_signed_request] = lambda: _WIRE_CONTEXT
161 app.dependency_overrides[optional_signed_request] = lambda: _WIRE_CONTEXT
162 yield {
163 "Content-Type": "application/x-msgpack",
164 "Accept": "application/x-msgpack",
165 }
166 app.dependency_overrides.pop(require_signed_request, None)
167 app.dependency_overrides.pop(optional_signed_request, None)
168
169
170 @pytest.fixture(autouse=True)
171 def _reset_variation_store() -> Generator[None, None, None]:
172 """Reset the singleton VariationStore between tests to prevent cross-test pollution.
173
174 Gracefully no-ops if the variation module has been removed (MuseHub extraction).
175 """
176 yield
177 try:
178 from musehub.variation.storage.variation_store import reset_variation_store
179 reset_variation_store()
180 except ModuleNotFoundError:
181 pass
182
183
184 _TEST_DATABASE_URL = os.environ.get(
185 "TEST_DATABASE_URL",
186 "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub_test",
187 )
188
189 # Sync URL for psycopg2 — used by the session-scoped schema fixture.
190 _TEST_DATABASE_URL_SYNC = _TEST_DATABASE_URL.replace("+asyncpg", "")
191
192 # Shared async engine for the whole test session (NullPool = no connection
193 # reuse between tests, but engine object creation is cheap so we create it
194 # once and share it).
195 _TEST_ENGINE = create_async_engine(_TEST_DATABASE_URL, poolclass=NullPool)
196 _TEST_SESSION_FACTORY = async_sessionmaker(
197 bind=_TEST_ENGINE,
198 class_=AsyncSession,
199 expire_on_commit=False,
200 )
201
202 # Pre-compute the TRUNCATE statement for all tables so we don't rebuild it
203 # each test. Reversed sorted_tables respects FK dependency order.
204 _TRUNCATE_SQL = "TRUNCATE {} RESTART IDENTITY CASCADE".format(
205 ", ".join(t.name for t in reversed(Base.metadata.sorted_tables))
206 )
207
208
209 @pytest.fixture(scope="session", autouse=True)
210 def _db_schema() -> Generator[None, None, None]:
211 """Create the test schema once per test session using a sync psycopg2 engine.
212
213 This replaces per-test drop_all/create_all (which took ~3 s per test on
214 PostgreSQL) with a single DDL pass at session start and end. Individual
215 tests get a clean slate via TRUNCATE in the db_session fixture instead.
216 """
217 from sqlalchemy import create_engine as _create_engine
218
219 from sqlalchemy import text as _text
220
221 # connect_timeout=10: if postgres is unreachable or still starting (e.g.
222 # Docker container not ready), fail fast instead of blocking in C forever.
223 # Without this, Ctrl+C cannot kill the process because psycopg2's socket
224 # read is a non-interruptible C-level call.
225 sync_engine = _create_engine(
226 _TEST_DATABASE_URL_SYNC,
227 connect_args={"connect_timeout": 10},
228 )
229 # Terminate any leftover connections from interrupted test runs before
230 # running drop_all. If a previous pytest session was killed with SIGQUIT
231 # (Ctrl+\) it leaves postgres backends idle-in-transaction holding locks on
232 # the test tables. drop_all then waits forever for those locks, which
233 # makes the next test run freeze with Ctrl+C unresponsive.
234 with sync_engine.connect() as _conn:
235 _conn.execute(_text(
236 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
237 "WHERE datname = current_database() AND pid != pg_backend_pid()"
238 ))
239 _conn.commit()
240 # Dispose so drop_all / create_all get fresh connections — the
241 # pg_terminate_backend above may have killed pooled connections.
242 sync_engine.dispose()
243 sync_engine2 = _create_engine(
244 _TEST_DATABASE_URL_SYNC,
245 connect_args={"connect_timeout": 10},
246 )
247 Base.metadata.drop_all(sync_engine2)
248 sync_engine2.dispose()
249 sync_engine2 = _create_engine(
250 _TEST_DATABASE_URL_SYNC,
251 connect_args={"connect_timeout": 10},
252 )
253 Base.metadata.create_all(sync_engine2)
254 # Seed claim types (mirrors alembic/versions/0043 seed logic)
255 from musehub.services.musehub_attestations import _CLAIM_TYPES
256 with sync_engine2.connect() as _conn:
257 for ct in _CLAIM_TYPES.values():
258 _conn.execute(_text(
259 "INSERT INTO musehub_attestation_claim_types "
260 "(type_key, category, label, description, valid_scopes, introduced_at) "
261 "VALUES (:key, :cat, :label, :desc, :scopes, NOW()) "
262 "ON CONFLICT (type_key) DO NOTHING"
263 ), {"key": ct["type_key"], "cat": ct["category"], "label": ct["label"],
264 "desc": ct["description"], "scopes": ct["valid_scopes"]})
265 _conn.commit()
266 sync_engine2.dispose()
267 yield
268 sync_engine3 = _create_engine(
269 _TEST_DATABASE_URL_SYNC,
270 connect_args={"connect_timeout": 10},
271 )
272 with sync_engine3.connect() as _conn:
273 _conn.execute(_text(
274 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
275 "WHERE datname = current_database() AND pid != pg_backend_pid()"
276 ))
277 _conn.commit()
278 sync_engine3.dispose()
279 sync_engine4 = _create_engine(
280 _TEST_DATABASE_URL_SYNC,
281 connect_args={"connect_timeout": 10},
282 )
283 Base.metadata.drop_all(sync_engine4)
284 sync_engine4.dispose()
285
286
287 @pytest_asyncio.fixture
288 async def db_session(_db_schema: None) -> AsyncGenerator[AsyncSession, None]:
289 """Provide a clean DB session for each test.
290
291 Tables are truncated (not dropped/recreated) between tests — a single
292 TRUNCATE … CASCADE is ~100× faster than drop_all + create_all on
293 PostgreSQL, cutting per-test overhead from ~3 s to ~30 ms.
294 """
295 from sqlalchemy import text as _text
296
297 async with _TEST_ENGINE.begin() as conn:
298 # Terminate ALL other backends before TRUNCATE. A failed test can
299 # leave a connection in any state (idle in transaction, idle in
300 # transaction (aborted), active) — filtering by state misses some
301 # cases and causes deadlocks when TRUNCATE races the stale transaction.
302 await conn.execute(_text(
303 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
304 "WHERE datname = current_database() AND pid != pg_backend_pid()"
305 ))
306 await conn.execute(_text(_TRUNCATE_SQL))
307 # Re-seed reference tables that are wiped by TRUNCATE CASCADE.
308 from musehub.services.musehub_attestations import _CLAIM_TYPES
309 for _ct in _CLAIM_TYPES.values():
310 await conn.execute(_text(
311 "INSERT INTO musehub_attestation_claim_types "
312 "(type_key, category, label, description, valid_scopes, introduced_at) "
313 "VALUES (:key, :cat, :label, :desc, :scopes, NOW()) "
314 "ON CONFLICT (type_key) DO NOTHING"
315 ), {"key": _ct["type_key"], "cat": _ct["category"], "label": _ct["label"],
316 "desc": _ct["description"], "scopes": _ct["valid_scopes"]})
317
318 old_engine = database._engine
319 old_factory = database._async_session_factory
320 database._engine = _TEST_ENGINE
321 database._async_session_factory = _TEST_SESSION_FACTORY
322 try:
323 async with _TEST_SESSION_FACTORY() as session:
324 async def override_get_db() -> AsyncGenerator[AsyncSession, None]:
325 # Each request gets its own session so concurrent requests
326 # (e.g. stress tests) don't share a single connection and
327 # raise "concurrent operations are not permitted".
328 # All test setup data is committed, so independent sessions
329 # see it without needing to share the test session.
330 async with _TEST_SESSION_FACTORY() as req_session:
331 yield req_session
332 app.dependency_overrides[get_db] = override_get_db
333 yield session
334 app.dependency_overrides.clear()
335 finally:
336 database._engine = old_engine
337 database._async_session_factory = old_factory
338
339
340 @pytest_asyncio.fixture
341 async def session_factory(_db_schema: None) -> async_sessionmaker:
342 """Expose the test session factory for tests needing multiple concurrent sessions."""
343 return _TEST_SESSION_FACTORY
344
345
346 class _Asgi24Wrapper:
347 """Inject spec_version='2.4' into every HTTP scope.
348
349 ASGI 2.4 tells Starlette to skip listen_for_disconnect on streaming responses,
350 which is required for correct behaviour with async body reads.
351 """
352
353 def __init__(self, app: typing.Any) -> None:
354 self._app = app
355
356 async def __call__(self, scope: typing.MutableMapping[str, typing.Any], receive: typing.Any, send: typing.Any) -> None:
357 if scope.get("type") == "http":
358 scope.setdefault("asgi", {})["spec_version"] = "2.4"
359 await self._app(scope, receive, send)
360
361
362 @pytest_asyncio.fixture
363 async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]:
364 """Create an async test client. Depends on db_session so auth revocation check uses test DB."""
365 transport = ASGITransport(app=_Asgi24Wrapper(app))
366 async with AsyncClient(transport=transport, base_url="http://test") as ac:
367 yield ac
368
369
370 # -----------------------------------------------------------------------------
371 # Auth fixtures for API contract and integration tests
372 # Uses dependency_overrides to inject a fake MSignContext so tests don't need
373 # real Ed25519 key pairs. Only active for tests that request auth_headers.
374 # -----------------------------------------------------------------------------
375
376 _TEST_IDENTITY_ID = compute_identity_id(b"testuser")
377 _TEST_HANDLE = "testuser"
378
379 _TEST_CONTEXT = MSignContext(
380 handle=_TEST_HANDLE,
381 identity_id=_TEST_IDENTITY_ID,
382 is_agent=False,
383 is_admin=False,
384 )
385
386
387 @pytest_asyncio.fixture
388 async def test_user(db_session: AsyncSession) -> MusehubIdentity:
389 """Create a test identity in the DB for authenticated route tests."""
390 identity = MusehubIdentity(
391 identity_id=_TEST_IDENTITY_ID,
392 handle=_TEST_HANDLE,
393 display_name="Test User",
394 identity_type="human",
395 )
396 db_session.add(identity)
397 await db_session.commit()
398 await db_session.refresh(identity)
399 # Close the autobegin transaction started by refresh() so subsequent
400 # test-body commits don't hit "another operation is in progress".
401 await db_session.commit()
402 return identity
403
404
405 @pytest.fixture
406 def auth_headers(test_user: MusehubIdentity) -> Generator[dict[str, str], None, None]:
407 """Override auth dependencies to inject a fake MSignContext for the test duration.
408
409 Tests that need to verify 401 behaviour for *unauthenticated* requests should
410 use a separate client call without passing ``auth_headers`` — note that while
411 this fixture is active the app-level dep overrides are set globally, so any
412 request made within the same test function will be treated as authenticated.
413 Tests that need to distinguish authed/unauthed flows within one function should
414 use ``app.dependency_overrides`` directly or split into two test functions.
415 """
416 app.dependency_overrides[require_signed_request] = lambda: _TEST_CONTEXT
417 app.dependency_overrides[optional_signed_request] = lambda: _TEST_CONTEXT
418 yield {"Content-Type": "application/json"}
419 app.dependency_overrides.pop(require_signed_request, None)
420 app.dependency_overrides.pop(optional_signed_request, None)
421
422
423 # ---------------------------------------------------------------------------
424 # Symbol-detail fixtures
425 # Used by test_symbol_detail_phase1.py (T2–T7 tiers).
426 # ---------------------------------------------------------------------------
427
428 import datetime as _dt
429 import contextlib as _contextlib
430 import time as _time
431 from muse.core.types import blob_id
432
433
434 def _utc_now() -> _dt.datetime:
435 return _dt.datetime.now(tz=_dt.timezone.utc)
436
437
438 async def _make_repo_row(session: AsyncSession, owner: str, slug: str) -> "MusehubRepo":
439 from musehub.db.musehub_repo_models import MusehubRepo
440 from musehub.core.genesis import compute_identity_id, compute_repo_id
441 owner_user_id = compute_identity_id(owner.encode())
442 created_at = _utc_now()
443 repo_id = compute_repo_id(owner_user_id, slug, "code", created_at.isoformat())
444 repo = MusehubRepo(
445 repo_id=repo_id,
446 name=slug,
447 owner=owner,
448 slug=slug,
449 visibility="public",
450 owner_user_id=owner_user_id,
451 description="",
452 tags=[],
453 created_at=created_at,
454 )
455 session.add(repo)
456 await session.commit()
457 return repo
458
459
460 async def _make_commit_row(session: AsyncSession, repo_id: str, commit_id: str, **kwargs: JSONValue) -> None:
461 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef
462 defaults = dict(
463 commit_id=commit_id,
464 branch="dev",
465 parent_ids=[],
466 message="feat: test",
467 author="gabriel",
468 timestamp=_utc_now(),
469 snapshot_id=blob_id(f"snap-{commit_id}".encode()),
470 agent_id="claude-code",
471 model_id="claude-sonnet-4-6",
472 commit_branch="task/test",
473 signature="",
474 )
475 defaults.update(kwargs)
476 session.add(MusehubCommit(**defaults))
477 await session.flush()
478 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id))
479 await session.commit()
480
481
482 async def _make_history_entry(
483 session: AsyncSession, repo_id: str, address: str, commit_id: str,
484 op: str = "add", content_id: str | None = None,
485 committed_at: _dt.datetime | None = None,
486 message: str | None = None,
487 ) -> None:
488 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
489 session.add(MusehubSymbolHistoryEntry(
490 repo_id=repo_id,
491 address=address,
492 commit_id=commit_id,
493 committed_at=committed_at or _utc_now(),
494 author="gabriel",
495 op=op,
496 content_id=content_id or blob_id(f"body-{address}-{commit_id}".encode()),
497 message=message,
498 ))
499 await session.commit()
500
501
502 @pytest_asyncio.fixture
503 async def repo_fixture(db_session: AsyncSession) -> tuple[str, str]:
504 """Create a bare repo with no symbol history. Returns (owner, slug)."""
505 repo = await _make_repo_row(db_session, "gabriel", "test-repo")
506 return ("gabriel", repo.slug)
507
508
509 @pytest_asyncio.fixture
510 async def seed_symbol(db_session: AsyncSession) -> tuple[str, str, str]:
511 """Create a repo with one symbol history entry and a commit. Returns (owner, slug, address)."""
512 owner, slug = "gabriel", "seed-repo"
513 address = "src/core.py::compute"
514 repo = await _make_repo_row(db_session, owner, slug)
515 commit_id = blob_id(f"commit-seed-{slug}".encode())
516 await _make_commit_row(db_session, repo.repo_id, commit_id)
517 await _make_history_entry(db_session, repo.repo_id, address, commit_id)
518 return (owner, slug, address)
519
520
521 @pytest_asyncio.fixture
522 async def seed_type_intel(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
523 """Add a MusehubIntelType row for the seeded symbol."""
524 from musehub.db.musehub_intel_models import MusehubIntelType
525 from musehub.db.musehub_repo_models import MusehubRepo
526 from sqlalchemy import select
527 owner, slug, address = seed_symbol
528 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
529 repo = result.scalar_one()
530 db_session.add(MusehubIntelType(
531 repo_id=repo.repo_id,
532 address=address,
533 kind="function",
534 return_is_any=False,
535 params_total=2,
536 params_annotated=2,
537 params_with_any=0,
538 type_score=0.95,
539 ref="dev",
540 ))
541 await db_session.commit()
542
543
544 @pytest_asyncio.fixture
545 async def seed_sym_intel(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
546 """Add a MusehubSymbolIntel row for the seeded symbol."""
547 from musehub.db.musehub_intel_models import MusehubSymbolIntel
548 from musehub.db.musehub_repo_models import MusehubRepo
549 from sqlalchemy import select
550 owner, slug, address = seed_symbol
551 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
552 repo = result.scalar_one()
553 db_session.add(MusehubSymbolIntel(
554 repo_id=repo.repo_id,
555 address=address,
556 churn=5,
557 churn_30d=2,
558 churn_90d=4,
559 blast=3,
560 blast_direct=2,
561 blast_cross=1,
562 blast_top=[],
563 author_count=1,
564 gravity=0.1,
565 weekly=[],
566 gravity_pct=10.0,
567 gravity_direct_dependents=2,
568 gravity_transitive_dependents=3,
569 gravity_max_depth=2,
570 ))
571 await db_session.commit()
572
573
574 @pytest_asyncio.fixture
575 async def seed_api_intel(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
576 """Add a MusehubIntelApiSurface row for the seeded symbol."""
577 from musehub.db.musehub_intel_models import MusehubIntelApiSurface
578 from musehub.db.musehub_repo_models import MusehubRepo
579 from sqlalchemy import select
580 owner, slug, address = seed_symbol
581 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
582 repo = result.scalar_one()
583 db_session.add(MusehubIntelApiSurface(
584 repo_id=repo.repo_id,
585 address=address,
586 kind="function",
587 visibility="public",
588 ref="dev",
589 ))
590 await db_session.commit()
591
592
593 @pytest_asyncio.fixture
594 async def seed_many_refactor_events(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
595 """Add 25 MusehubIntelRefactorEvent rows for the seeded symbol."""
596 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
597 from musehub.db.musehub_repo_models import MusehubRepo
598 from sqlalchemy import select
599 owner, slug, address = seed_symbol
600 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
601 repo = result.scalar_one()
602 for i in range(25):
603 db_session.add(MusehubIntelRefactorEvent(
604 event_id=blob_id(f"refactor-{i}-{slug}".encode()),
605 repo_id=repo.repo_id,
606 kind="implementation",
607 address=address,
608 detail=f"refactor event {i}",
609 commit_id=blob_id(f"rc-{i}".encode()),
610 committed_at=_utc_now(),
611 ))
612 await db_session.commit()
613
614
615 @pytest_asyncio.fixture
616 async def seed_refactor_event(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
617 """Add one MusehubIntelRefactorEvent with kind=implementation."""
618 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
619 from musehub.db.musehub_repo_models import MusehubRepo
620 from sqlalchemy import select
621 owner, slug, address = seed_symbol
622 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
623 repo = result.scalar_one()
624 db_session.add(MusehubIntelRefactorEvent(
625 event_id=blob_id(f"refactor-single-{slug}".encode()),
626 repo_id=repo.repo_id,
627 kind="implementation",
628 address=address,
629 detail="extracted helper",
630 commit_id=blob_id(f"rc-single-{slug}".encode()),
631 committed_at=_utc_now(),
632 ))
633 await db_session.commit()
634
635
636 @pytest_asyncio.fixture
637 async def seed_refactor_event_with_xss(db_session: AsyncSession, seed_symbol: tuple[str, str, str]) -> None:
638 """Add a refactor event whose detail field contains an XSS payload."""
639 from musehub.db.musehub_intel_models import MusehubIntelRefactorEvent
640 from musehub.db.musehub_repo_models import MusehubRepo
641 from sqlalchemy import select
642 owner, slug, address = seed_symbol
643 result = await db_session.execute(select(MusehubRepo).where(MusehubRepo.owner == owner, MusehubRepo.slug == slug))
644 repo = result.scalar_one()
645 db_session.add(MusehubIntelRefactorEvent(
646 event_id=blob_id(f"refactor-xss-{slug}".encode()),
647 repo_id=repo.repo_id,
648 kind="implementation",
649 address=address,
650 detail='<img src=x onerror=alert(1)>',
651 commit_id=blob_id(f"rc-xss-{slug}".encode()),
652 committed_at=_utc_now(),
653 ))
654 await db_session.commit()
655
656
657 @pytest_asyncio.fixture
658 async def seed_symbol_with_xss_commit(db_session: AsyncSession) -> tuple[str, str, str]:
659 """Create a symbol whose commit message contains an XSS payload."""
660 owner, slug = "gabriel", "xss-repo"
661 address = "src/evil.py::fn"
662 repo = await _make_repo_row(db_session, owner, slug)
663 commit_id = blob_id(f"commit-xss-{slug}".encode())
664 await _make_commit_row(
665 db_session, repo.repo_id, commit_id,
666 message='<img src=x onerror=alert(1)> feat: xss test',
667 )
668 await _make_history_entry(db_session, repo.repo_id, address, commit_id)
669 return (owner, slug, address)
670
671
672 @pytest_asyncio.fixture
673 async def seed_symbol_with_large_history(db_session: AsyncSession) -> tuple[str, str, str]:
674 """Create a symbol with 200 history entries (stress test — not 10k, keeps test fast)."""
675 owner, slug = "gabriel", "large-history-repo"
676 address = "src/big.py::process"
677 repo = await _make_repo_row(db_session, owner, slug)
678 for i in range(200):
679 cid = blob_id(f"commit-large-{i}-{slug}".encode())
680 await _make_commit_row(db_session, repo.repo_id, cid)
681 await _make_history_entry(db_session, repo.repo_id, address, cid, op="modify")
682 return (owner, slug, address)
683
684
685 @pytest_asyncio.fixture
686 async def seed_symbol_high_coupling(db_session: AsyncSession) -> tuple[str, str, str]:
687 """Create a symbol that co-changes with many partners."""
688 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
689 owner, slug = "gabriel", "coupling-repo"
690 address = "src/hub.py::dispatch"
691 repo = await _make_repo_row(db_session, owner, slug)
692 commit_id = blob_id(f"commit-coupling-{slug}".encode())
693 await _make_commit_row(db_session, repo.repo_id, commit_id)
694 await _make_history_entry(db_session, repo.repo_id, address, commit_id)
695 # 25 co-changing partners in the same commit
696 for i in range(25):
697 partner = f"src/partner_{i}.py::fn"
698 db_session.add(MusehubSymbolHistoryEntry(
699 repo_id=repo.repo_id,
700 address=partner,
701 commit_id=commit_id,
702 committed_at=_utc_now(),
703 author="gabriel",
704 op="modify",
705 content_id=blob_id(f"body-partner-{i}".encode()),
706 ))
707 await db_session.commit()
708 return (owner, slug, address)
709
710
711 @pytest_asyncio.fixture
712 async def seed_symbol_with_clones(db_session: AsyncSession) -> tuple[str, str, str]:
713 """Create a symbol with a clone entry."""
714 from musehub.db.musehub_intel_models import MusehubHashOccurrenceEntry
715 owner, slug = "gabriel", "clones-repo"
716 address = "src/original.py::fn"
717 repo = await _make_repo_row(db_session, owner, slug)
718 commit_id = blob_id(f"commit-clone-{slug}".encode())
719 content_id = blob_id(f"shared-body-{slug}".encode())
720 await _make_commit_row(db_session, repo.repo_id, commit_id)
721 await _make_history_entry(db_session, repo.repo_id, address, commit_id, content_id=content_id)
722 # Clone: same content_id, different address
723 db_session.add(MusehubHashOccurrenceEntry(
724 repo_id=repo.repo_id,
725 content_id=content_id,
726 address="src/copy.py::fn",
727 ))
728 db_session.add(MusehubHashOccurrenceEntry(
729 repo_id=repo.repo_id,
730 content_id=content_id,
731 address=address,
732 ))
733 await db_session.commit()
734 return (owner, slug, address)
735
736
737 @pytest.fixture
738 def benchmark_timer() -> typing.Callable[[float], typing.ContextManager[None]]:
739 """Context manager that asserts elapsed time stays under max_ms."""
740 @_contextlib.contextmanager
741 def _timer(max_ms: float) -> Generator[None, None, None]:
742 start = _time.monotonic()
743 yield
744 elapsed_ms = (_time.monotonic() - start) * 1000
745 assert elapsed_ms < max_ms, f"took {elapsed_ms:.0f}ms, limit {max_ms}ms"
746 return _timer
747
748
749 # ---------------------------------------------------------------------------
750 # Pagination fixtures
751 # Used by test_symbol_detail_pagination.py.
752 # ---------------------------------------------------------------------------
753
754 async def _seed_history_entries(
755 db_session: AsyncSession,
756 owner: str,
757 slug: str,
758 count: int,
759 ) -> tuple[str, str, str]:
760 """Create a repo + symbol with *count* history entries spaced 1 hour apart.
761
762 Commit messages are ``entry-{i}`` for i in 0..count-1.
763 entry-0 is the oldest, entry-(count-1) is the newest.
764 Returns (owner, slug, address).
765 """
766 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
767 address = "src/core.py::paginate_fn"
768 repo = await _make_repo_row(db_session, owner, slug)
769 base_ts = _dt.datetime(2026, 1, 1, 0, 0, 0, tzinfo=_dt.timezone.utc)
770 for i in range(count):
771 committed_at = base_ts + _dt.timedelta(hours=i)
772 commit_id = blob_id(f"commit-hist-{i}-{slug}".encode())
773 await _make_commit_row(
774 db_session, repo.repo_id, commit_id,
775 message=f"entry-{i}",
776 timestamp=committed_at,
777 )
778 await _make_history_entry(
779 db_session, repo.repo_id, address, commit_id,
780 op="modify", committed_at=committed_at,
781 message=f"entry-{i}",
782 )
783 return (owner, slug, address)
784
785
786 async def _seed_coupling_partners(
787 db_session: AsyncSession,
788 owner: str,
789 slug: str,
790 partner_count: int,
791 ) -> tuple[str, str, str]:
792 """Create a repo + symbol with *partner_count* coupling partners.
793
794 The target symbol appears in all *partner_count* commits.
795 Partner i appears in commits i..(partner_count-1), giving it
796 shared_commits = partner_count - i (descending: partner_0 has the most).
797 Returns (owner, slug, address).
798 """
799 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
800 address = "src/hub.py::dispatch"
801 repo = await _make_repo_row(db_session, owner, slug)
802 base_ts = _dt.datetime(2026, 2, 1, 0, 0, 0, tzinfo=_dt.timezone.utc)
803 for j in range(partner_count):
804 committed_at = base_ts + _dt.timedelta(hours=j)
805 commit_id = blob_id(f"commit-coup-{j}-{slug}".encode())
806 await _make_commit_row(
807 db_session, repo.repo_id, commit_id,
808 message=f"coupling-commit-{j}",
809 timestamp=committed_at,
810 )
811 # Target symbol in every commit
812 db_session.add(MusehubSymbolHistoryEntry(
813 repo_id=repo.repo_id,
814 address=address,
815 commit_id=commit_id,
816 committed_at=committed_at,
817 author="gabriel",
818 op="modify",
819 content_id=blob_id(f"body-target-{j}-{slug}".encode()),
820 ))
821 # Partner i appears in commit j only when i <= j
822 for i in range(j + 1):
823 partner = f"src/partner_{i}.py::fn_{i}"
824 db_session.add(MusehubSymbolHistoryEntry(
825 repo_id=repo.repo_id,
826 address=partner,
827 commit_id=commit_id,
828 committed_at=committed_at,
829 author="gabriel",
830 op="modify",
831 content_id=blob_id(f"body-partner-{i}-{j}-{slug}".encode()),
832 ))
833 await db_session.commit()
834 # Insert pre-computed coupling rows: partner_i appears in partner_count-i commits.
835 from musehub.db.musehub_intel_models import MusehubSymbolCoupling
836 for i in range(partner_count):
837 db_session.add(MusehubSymbolCoupling(
838 repo_id=repo.repo_id,
839 address=address,
840 co_address=f"src/partner_{i}.py::fn_{i}",
841 shared_commits=partner_count - i,
842 ))
843 await db_session.commit()
844 return (owner, slug, address)
845
846
847 @pytest_asyncio.fixture
848 async def seed_symbol_with_26_history(db_session: AsyncSession) -> tuple[str, str, str]:
849 return await _seed_history_entries(db_session, "gabriel", "hist26-repo", 26)
850
851
852 @pytest_asyncio.fixture
853 async def seed_symbol_with_exactly_10_history(db_session: AsyncSession) -> tuple[str, str, str]:
854 return await _seed_history_entries(db_session, "gabriel", "hist10-repo", 10)
855
856
857 @pytest_asyncio.fixture
858 async def seed_symbol_with_11_history(db_session: AsyncSession) -> tuple[str, str, str]:
859 return await _seed_history_entries(db_session, "gabriel", "hist11-repo", 11)
860
861
862 @pytest_asyncio.fixture
863 async def seed_symbol_high_coupling_40(db_session: AsyncSession) -> tuple[str, str, str]:
864 return await _seed_coupling_partners(db_session, "gabriel", "coup40-repo", 40)
865
866
867 @pytest_asyncio.fixture
868 async def seed_symbol_with_exactly_15_coupling(db_session: AsyncSession) -> tuple[str, str, str]:
869 return await _seed_coupling_partners(db_session, "gabriel", "coup15-repo", 15)
870
871
872 @pytest_asyncio.fixture
873 async def seed_symbol_with_16_coupling(db_session: AsyncSession) -> tuple[str, str, str]:
874 return await _seed_coupling_partners(db_session, "gabriel", "coup16-repo", 16)
875
876
877 @pytest_asyncio.fixture
878 async def seed_symbol_with_26_history_and_40_coupling(
879 db_session: AsyncSession,
880 ) -> tuple[str, str, str]:
881 """26 history entries + 26 coupling partners (from the same commits).
882
883 The target appears in all 26 commits. Partner_i appears in commits i..25,
884 giving shared_commits = 26 - i (descending). This yields 26 partners with
885 positive shared counts → 2 coupling pages (15 + 11) and 3 history pages
886 (10 + 10 + 6). Keeping partners in the history commits avoids inflating the
887 target's change_count with extra coupling-only commits.
888 """
889 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
890 owner, slug = "gabriel", "hist26-coup26-repo"
891 address = "src/core.py::paginate_fn"
892 repo = await _make_repo_row(db_session, owner, slug)
893 base_ts = _dt.datetime(2026, 1, 1, 0, 0, 0, tzinfo=_dt.timezone.utc)
894
895 # 26 commits — target appears in all; partner_i appears in commits i..25.
896 for i in range(26):
897 committed_at = base_ts + _dt.timedelta(hours=i)
898 commit_id = blob_id(f"commit-combo-{i}-{slug}".encode())
899 await _make_commit_row(
900 db_session, repo.repo_id, commit_id,
901 message=f"entry-{i}", timestamp=committed_at,
902 )
903 await _make_history_entry(
904 db_session, repo.repo_id, address, commit_id,
905 op="modify", committed_at=committed_at,
906 message=f"entry-{i}",
907 )
908 # Every partner whose index <= i is added to this commit.
909 # Partner_j appears in commits j..25 → shared = 26 - j.
910 for j in range(i + 1):
911 partner = f"src/partner_{j}.py::fn_{j}"
912 db_session.add(MusehubSymbolHistoryEntry(
913 repo_id=repo.repo_id,
914 address=partner,
915 commit_id=commit_id,
916 committed_at=committed_at,
917 author="gabriel",
918 op="modify",
919 content_id=blob_id(f"body-combo-partner-{j}-{i}-{slug}".encode()),
920 ))
921 await db_session.commit()
922 # Insert pre-computed coupling rows: partner_j appears in 26-j commits.
923 from musehub.db.musehub_intel_models import MusehubSymbolCoupling
924 for j in range(26):
925 db_session.add(MusehubSymbolCoupling(
926 repo_id=repo.repo_id,
927 address=address,
928 co_address=f"src/partner_{j}.py::fn_{j}",
929 shared_commits=26 - j,
930 ))
931 await db_session.commit()
932 return (owner, slug, address)
File History 2 commits
sha256:77fc45e703f90c0d603ecb1a0ce21ff21095728ca7dd0e146eb5e966c8f9fcc9 more passing tests from full test suite fun Human patch 7 days ago
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 21 days ago