gabriel / musehub public
test_intel_code_scale.py python
151 lines 5.9 KB
Raw
sha256:65f2fd8d910e1eeb00b7bc8740d3cbf1b2e14dad83b2eb999fbbbc44e97cd936 getting intel jobs to run properly Human minor ⚠ breaking 13 days ago
1 """TDD — intel.code job scales cleanly at 1, 5, 50, 500 commits.
2
3 Each test creates a repo with N commits (each with structured_delta),
4 runs build_symbol_index, and asserts:
5 1. Completes without error
6 2. Returns code.intel_summary and code.intel_snapshot
7 3. Writes rows to musehub_symbol_intel
8
9 Scale ladder: 1 → 5 → 50 → 500
10 The first failing test tells us exactly where the ceiling is.
11 """
12 from __future__ import annotations
13
14 import datetime
15 import hashlib
16 import time
17
18 import pytest
19 from sqlalchemy import select
20 from sqlalchemy.ext.asyncio import AsyncSession
21
22 from muse.core.types import fake_id
23 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry, MusehubSymbolIntel
24 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef
25 from musehub.types.json_types import JSONObject
26 from tests.factories import create_repo
27
28 # ---------------------------------------------------------------------------
29 # Helpers
30 # ---------------------------------------------------------------------------
31
32 def _structured_delta(n_symbols: int, seed: str = "") -> JSONObject:
33 """Produce a realistic structured_delta for N symbols."""
34 return {
35 "ops": [
36 {
37 "op": "insert" if i % 3 != 1 else "replace",
38 "address": f"musehub/services/svc_{seed}_{i}.py::func_{i}",
39 "new_content_id": fake_id(f"{seed}-{i}-content"),
40 "symbol_kind": "function",
41 }
42 for i in range(n_symbols)
43 ]
44 }
45
46
47 async def _populate_repo(
48 db: AsyncSession,
49 repo_id: str,
50 n_commits: int,
51 symbols_per_commit: int,
52 ) -> str:
53 """Create n_commits on the repo, each touching symbols_per_commit symbols."""
54 parent_id: str | None = None
55 tip = ""
56 for i in range(n_commits):
57 commit_id = fake_id(f"{repo_id}-commit-{i}")
58 commit = MusehubCommit(
59 commit_id=commit_id,
60 branch="main",
61 parent_ids=[parent_id] if parent_id else [],
62 message=f"commit {i}",
63 author="gabriel",
64 timestamp=datetime.datetime.now(tz=datetime.timezone.utc),
65 structured_delta=_structured_delta(symbols_per_commit, seed=f"c{i}"),
66 )
67 db.add(commit)
68 db.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id))
69 parent_id = commit_id
70 tip = commit_id
71 await db.flush()
72 return tip
73
74
75 def _assert_fast(elapsed: float, limit: float, label: str) -> None:
76 assert elapsed < limit, (
77 f"{label} took {elapsed:.2f}s — limit is {limit:.1f}s. "
78 f"Bottleneck is in build_symbol_index; use the [intel.code] timing logs to pinpoint."
79 )
80
81
82 # ---------------------------------------------------------------------------
83 # Scale tests
84 # ---------------------------------------------------------------------------
85
86 @pytest.mark.asyncio
87 async def test_intel_code_1_commit(db_session: AsyncSession) -> None:
88 """Baseline: 1 commit, 5 symbols — must complete in <2s."""
89 from musehub.services.musehub_symbol_indexer import build_symbol_index
90 repo = await create_repo(db_session, name="intel-scale-1", owner="gabriel", visibility="public")
91 tip = await _populate_repo(db_session, repo.repo_id, n_commits=1, symbols_per_commit=5)
92 await db_session.flush()
93
94 t0 = time.monotonic()
95 result = await build_symbol_index(db_session, repo.repo_id, tip)
96 elapsed = time.monotonic() - t0
97
98 assert any(t == "code.intel_summary" for t, _ in result), "must return code.intel_summary"
99 rows = (await db_session.execute(
100 select(MusehubSymbolIntel).where(MusehubSymbolIntel.repo_id == repo.repo_id)
101 )).scalars().all()
102 assert len(rows) == 5, f"expected 5 symbol_intel rows, got {len(rows)}"
103 _assert_fast(elapsed, 2.0, "1 commit / 5 symbols")
104
105
106 @pytest.mark.asyncio
107 async def test_intel_code_5_commits(db_session: AsyncSession) -> None:
108 """5 commits × 10 symbols each — must complete in <5s."""
109 from musehub.services.musehub_symbol_indexer import build_symbol_index
110 repo = await create_repo(db_session, name="intel-scale-5", owner="gabriel", visibility="public")
111 tip = await _populate_repo(db_session, repo.repo_id, n_commits=5, symbols_per_commit=10)
112 await db_session.flush()
113
114 t0 = time.monotonic()
115 result = await build_symbol_index(db_session, repo.repo_id, tip)
116 elapsed = time.monotonic() - t0
117
118 assert any(t == "code.intel_summary" for t, _ in result)
119 _assert_fast(elapsed, 5.0, "5 commits / 10 symbols each")
120
121
122 @pytest.mark.asyncio
123 async def test_intel_code_50_commits(db_session: AsyncSession) -> None:
124 """50 commits × 20 symbols each — must complete in <15s."""
125 from musehub.services.musehub_symbol_indexer import build_symbol_index
126 repo = await create_repo(db_session, name="intel-scale-50", owner="gabriel", visibility="public")
127 tip = await _populate_repo(db_session, repo.repo_id, n_commits=50, symbols_per_commit=20)
128 await db_session.flush()
129
130 t0 = time.monotonic()
131 result = await build_symbol_index(db_session, repo.repo_id, tip)
132 elapsed = time.monotonic() - t0
133
134 assert any(t == "code.intel_summary" for t, _ in result)
135 _assert_fast(elapsed, 15.0, "50 commits / 20 symbols each")
136
137
138 @pytest.mark.asyncio
139 async def test_intel_code_500_commits(db_session: AsyncSession) -> None:
140 """500 commits × 30 symbols each — must complete in <60s (production SLA)."""
141 from musehub.services.musehub_symbol_indexer import build_symbol_index
142 repo = await create_repo(db_session, name="intel-scale-500", owner="gabriel", visibility="public")
143 tip = await _populate_repo(db_session, repo.repo_id, n_commits=500, symbols_per_commit=30)
144 await db_session.flush()
145
146 t0 = time.monotonic()
147 result = await build_symbol_index(db_session, repo.repo_id, tip)
148 elapsed = time.monotonic() - t0
149
150 assert any(t == "code.intel_summary" for t, _ in result)
151 _assert_fast(elapsed, 60.0, "500 commits / 30 symbols each")
File History 1 commit
sha256:65f2fd8d910e1eeb00b7bc8740d3cbf1b2e14dad83b2eb999fbbbc44e97cd936 getting intel jobs to run properly Human minor 13 days ago