gabriel / musehub public
test_velocity_provider.py python
941 lines 41.4 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 20 days ago
1 """TDD spec for VelocityProvider — issue #16, Phase 5.
2
3 Verifies that VelocityProvider reproduces module growth velocity from the
4 symbol history store without subprocess calls: module derivation from symbol
5 addresses, op categorisation (add/delete/modify), two-window BFS analysis
6 (current vs prior), acceleration, stagnant-commit detection, extended columns
7 (prior_modified, prior_active_commits, window_size, commits_analysed), TOP cap,
8 and strict repo isolation.
9
10 Seven test tiers (50 cases)
11 ----------------------------
12 Unit VL_01 – VL_08 module derivation, accel helpers, constants
13 Integration VL_09 – VL_18 provider upserts, new columns, op categorisation
14 E2E VL_19 – VL_25 full seeded scenarios, window semantics
15 Performance VL_26 – VL_32 timing bounds
16 State VL_33 – VL_38 idempotency, stale-row purge, incremental updates
17 Security VL_39 – VL_44 injection strings, repo isolation, unicode
18 Stress VL_45 – VL_50 TOP cap, BFS cap, extended-column completeness
19 """
20 from __future__ import annotations
21
22 import secrets
23 import time
24 from datetime import datetime, timezone
25
26 import pytest
27 import pytest_asyncio
28 import sqlalchemy as sa
29 from sqlalchemy.dialects.postgresql import insert as pg_insert
30 from sqlalchemy.ext.asyncio import AsyncSession
31
32 from muse.core.types import long_id
33 from musehub.db.musehub_intel_models import MusehubIntelVelocity, MusehubSymbolHistoryEntry
34 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo
35 from musehub.services.musehub_intel_providers import VelocityProvider
36 from musehub.types.json_types import JSONObject
37 from musehub.api.routes.musehub.ui_intel import _vel_accel_class, _vel_accel_fmt
38 from tests.factories import create_repo
39
40
41 # ─────────────────────────────────────────────────────────────────────────────
42 # Helpers
43 # ─────────────────────────────────────────────────────────────────────────────
44
45 def _cid() -> str:
46 return long_id(secrets.token_hex(32))
47
48
49 async def _seed_commit(
50 session: AsyncSession,
51 repo_id: str,
52 commit_id: str,
53 parent_ids: list[str] | None = None,
54 ) -> None:
55 """Insert a commit row; silently skip on conflict."""
56 await session.execute(
57 pg_insert(MusehubCommit)
58 .values(
59 commit_id=commit_id,
60 message="test commit",
61 author="test",
62 branch="dev",
63 parent_ids=parent_ids or [],
64 snapshot_id=None,
65 timestamp=datetime.now(timezone.utc),
66 )
67 .on_conflict_do_nothing()
68 )
69 await session.execute(
70 pg_insert(MusehubCommitRef)
71 .values(repo_id=repo_id, commit_id=commit_id)
72 .on_conflict_do_nothing()
73 )
74
75
76 async def _seed_history(
77 session: AsyncSession,
78 repo_id: str,
79 commit_id: str,
80 addresses: list[str],
81 op: str = "modify",
82 ) -> None:
83 """Insert symbol history entries with a given op code."""
84 for addr in addresses:
85 await session.execute(
86 pg_insert(MusehubSymbolHistoryEntry)
87 .values(
88 repo_id=repo_id,
89 address=addr,
90 commit_id=commit_id,
91 committed_at=datetime.now(timezone.utc),
92 op=op,
93 )
94 .on_conflict_do_nothing()
95 )
96
97
98 async def _run(session: AsyncSession, repo_id: str, ref: str) -> list[tuple[str, JSONObject]]:
99 return await VelocityProvider().compute(session, repo_id, ref, {})
100
101
102 async def _fetch(session: AsyncSession, repo_id: str) -> list[MusehubIntelVelocity]:
103 result = await session.execute(
104 sa.select(MusehubIntelVelocity)
105 .where(MusehubIntelVelocity.repo_id == repo_id)
106 .order_by(sa.desc(MusehubIntelVelocity.active_commits))
107 )
108 return list(result.scalars().all())
109
110
111 def _module(addr: str) -> str:
112 """Replicate VelocityProvider._module() for unit tests."""
113 file = addr.split("::")[0] if "::" in addr else addr
114 if "/" in file:
115 return f"{file.rsplit('/', 1)[0]}/"
116 return f"{file}/"
117
118
119 # ─────────────────────────────────────────────────────────────────────────────
120 # Fixtures
121 # ─────────────────────────────────────────────────────────────────────────────
122
123 @pytest_asyncio.fixture
124 async def repo(db_session: AsyncSession) -> MusehubRepo:
125 return await create_repo(db_session, owner="testuser", slug="velocityprovider")
126
127
128 @pytest_asyncio.fixture
129 async def two_repos(db_session: AsyncSession) -> tuple[MusehubRepo, MusehubRepo]:
130 r1 = await create_repo(db_session, owner="testuser", slug="vel-repo-1")
131 r2 = await create_repo(db_session, owner="testuser", slug="vel-repo-2")
132 return r1, r2
133
134
135 # ─────────────────────────────────────────────────────────────────────────────
136 # Tier 1 — Unit: module derivation, accel helpers, constants
137 # ─────────────────────────────────────────────────────────────────────────────
138
139 class TestVelocityUnit:
140 """Pure-function tests — no database required."""
141
142 def test_VL_01_module_from_deep_symbol_address(self) -> None:
143 """Module extracted as directory of file component of a deep address."""
144 assert _module("musehub/services/musehub_wire.py::MyClass") == "musehub/services/"
145
146 def test_VL_02_module_from_shallow_symbol_address(self) -> None:
147 """Shallow one-directory file extracts its directory."""
148 assert _module("src/billing.py::charge") == "src/"
149
150 def test_VL_03_module_from_bare_file_no_slash(self) -> None:
151 """Root-level file (no '/') maps to '<filename>/'."""
152 assert _module("billing.py") == "billing.py/"
153
154 def test_VL_04_module_from_bare_path_with_slash(self) -> None:
155 """Bare path with slash (no '::') derives module correctly."""
156 assert _module("musehub/services/foo.py") == "musehub/services/"
157
158 def test_VL_05_accel_class_positive(self) -> None:
159 """Positive acceleration → 'up' class."""
160 assert _vel_accel_class(5.0) == "up"
161 assert _vel_accel_class(0.1) == "up"
162
163 def test_VL_06_accel_class_negative(self) -> None:
164 """Negative acceleration → 'down' class."""
165 assert _vel_accel_class(-3.0) == "down"
166 assert _vel_accel_class(-0.1) == "down"
167
168 def test_VL_07_accel_class_zero(self) -> None:
169 """Zero acceleration → 'flat' class."""
170 assert _vel_accel_class(0.0) == "flat"
171
172 def test_VL_08_accel_fmt_positive_negative_zero(self) -> None:
173 """accel_fmt prefixes '+' for positive, keeps '-' for negative, '0' for zero."""
174 assert _vel_accel_fmt(4.0) == "+4"
175 assert _vel_accel_fmt(-3.0) == "-3"
176 assert _vel_accel_fmt(0.0) == "0"
177
178
179 # ─────────────────────────────────────────────────────────────────────────────
180 # Tier 2 — Integration: provider upserts, op categorisation, new columns
181 # ─────────────────────────────────────────────────────────────────────────────
182
183 class TestVelocityIntegration:
184
185 @pytest.mark.asyncio
186 async def test_VL_09_empty_repo_returns_empty(
187 self, db_session: AsyncSession, repo: MusehubRepo
188 ) -> None:
189 """Provider on a repo with no commits returns [] and stores no rows."""
190 result = await _run(db_session, repo.repo_id, _cid())
191 assert result == []
192 assert await _fetch(db_session, repo.repo_id) == []
193
194 @pytest.mark.asyncio
195 async def test_VL_10_no_history_entries_returns_empty(
196 self, db_session: AsyncSession, repo: MusehubRepo
197 ) -> None:
198 """Commits exist but no history entries → no rows stored."""
199 c1 = _cid()
200 await _seed_commit(db_session, repo.repo_id, c1)
201 await db_session.commit()
202 result = await _run(db_session, repo.repo_id, c1)
203 assert result == []
204
205 @pytest.mark.asyncio
206 async def test_VL_11_add_op_counted_as_added(
207 self, db_session: AsyncSession, repo: MusehubRepo
208 ) -> None:
209 """History entries with op='add' increment the added counter."""
210 c1 = _cid()
211 await _seed_commit(db_session, repo.repo_id, c1)
212 await _seed_history(db_session, repo.repo_id, c1,
213 ["src/billing.py::charge"], op="add")
214 await db_session.commit()
215 await _run(db_session, repo.repo_id, c1)
216 rows = await _fetch(db_session, repo.repo_id)
217 assert len(rows) == 1
218 assert rows[0].added == 1
219 assert rows[0].removed == 0
220 assert rows[0].modified == 0
221
222 @pytest.mark.asyncio
223 async def test_VL_12_delete_op_counted_as_removed(
224 self, db_session: AsyncSession, repo: MusehubRepo
225 ) -> None:
226 """History entries with op='delete' increment the removed counter."""
227 c1 = _cid()
228 await _seed_commit(db_session, repo.repo_id, c1)
229 await _seed_history(db_session, repo.repo_id, c1,
230 ["src/billing.py::charge"], op="delete")
231 await db_session.commit()
232 await _run(db_session, repo.repo_id, c1)
233 rows = await _fetch(db_session, repo.repo_id)
234 assert rows[0].removed == 1
235 assert rows[0].added == 0
236
237 @pytest.mark.asyncio
238 async def test_VL_13_modify_op_counted_as_modified(
239 self, db_session: AsyncSession, repo: MusehubRepo
240 ) -> None:
241 """History entries with op='modify' increment the modified counter."""
242 c1 = _cid()
243 await _seed_commit(db_session, repo.repo_id, c1)
244 await _seed_history(db_session, repo.repo_id, c1,
245 ["src/billing.py::charge"], op="modify")
246 await db_session.commit()
247 await _run(db_session, repo.repo_id, c1)
248 rows = await _fetch(db_session, repo.repo_id)
249 assert rows[0].modified == 1
250 assert rows[0].added == 0
251
252 @pytest.mark.asyncio
253 async def test_VL_14_net_equals_added_minus_removed(
254 self, db_session: AsyncSession, repo: MusehubRepo
255 ) -> None:
256 """net = added - removed for the current window."""
257 c1 = _cid()
258 await _seed_commit(db_session, repo.repo_id, c1)
259 await _seed_history(db_session, repo.repo_id, c1,
260 ["src/billing.py::a", "src/billing.py::b"], op="add")
261 await _seed_history(db_session, repo.repo_id, c1,
262 ["src/billing.py::c"], op="delete")
263 await db_session.commit()
264 await _run(db_session, repo.repo_id, c1)
265 rows = await _fetch(db_session, repo.repo_id)
266 assert rows[0].net == rows[0].added - rows[0].removed
267
268 @pytest.mark.asyncio
269 async def test_VL_15_active_commits_counts_distinct_commits(
270 self, db_session: AsyncSession, repo: MusehubRepo
271 ) -> None:
272 """active_commits equals the number of distinct commits that touched the module."""
273 commits = [_cid() for _ in range(3)]
274 prev = None
275 for cid in commits:
276 await _seed_commit(db_session, repo.repo_id, cid,
277 [prev] if prev else [])
278 prev = cid
279 for cid in commits:
280 await _seed_history(db_session, repo.repo_id, cid,
281 ["src/billing.py::fn"])
282 await db_session.commit()
283 await _run(db_session, repo.repo_id, commits[-1])
284 rows = await _fetch(db_session, repo.repo_id)
285 assert rows[0].active_commits == 3
286
287 @pytest.mark.asyncio
288 async def test_VL_16_window_size_column_populated(
289 self, db_session: AsyncSession, repo: MusehubRepo
290 ) -> None:
291 """window_size column reflects VelocityProvider._WINDOW."""
292 c1 = _cid()
293 await _seed_commit(db_session, repo.repo_id, c1)
294 await _seed_history(db_session, repo.repo_id, c1,
295 ["src/billing.py::fn"])
296 await db_session.commit()
297 await _run(db_session, repo.repo_id, c1)
298 rows = await _fetch(db_session, repo.repo_id)
299 assert rows[0].window_size == VelocityProvider._WINDOW
300
301 @pytest.mark.asyncio
302 async def test_VL_17_commits_analysed_column_populated(
303 self, db_session: AsyncSession, repo: MusehubRepo
304 ) -> None:
305 """commits_analysed column reflects the BFS walk length."""
306 commits = [_cid() for _ in range(5)]
307 prev = None
308 for cid in commits:
309 await _seed_commit(db_session, repo.repo_id, cid,
310 [prev] if prev else [])
311 prev = cid
312 await _seed_history(db_session, repo.repo_id, commits[0],
313 ["src/billing.py::fn"])
314 await db_session.commit()
315 await _run(db_session, repo.repo_id, commits[-1])
316 rows = await _fetch(db_session, repo.repo_id)
317 assert rows[0].commits_analysed == 5
318
319 @pytest.mark.asyncio
320 async def test_VL_18_result_key_correct(
321 self, db_session: AsyncSession, repo: MusehubRepo
322 ) -> None:
323 """Provider returns result tuple with key 'intel.code.velocity'."""
324 c1 = _cid()
325 await _seed_commit(db_session, repo.repo_id, c1)
326 await _seed_history(db_session, repo.repo_id, c1,
327 ["src/billing.py::fn"])
328 await db_session.commit()
329 result = await _run(db_session, repo.repo_id, c1)
330 assert len(result) == 1
331 key, payload = result[0]
332 assert key == "intel.code.velocity"
333 assert "count" in payload
334 assert "commits_analysed" in payload
335 assert "truncated" in payload
336
337
338 # ─────────────────────────────────────────────────────────────────────────────
339 # Tier 3 — E2E: full seeded scenarios, window semantics
340 # ─────────────────────────────────────────────────────────────────────────────
341
342 class TestVelocityE2E:
343
344 @pytest.mark.asyncio
345 async def test_VL_19_hottest_module_ranked_first(
346 self, db_session: AsyncSession, repo: MusehubRepo
347 ) -> None:
348 """Module with more active commits is ranked first by active_commits."""
349 commits = [_cid() for _ in range(5)]
350 prev = None
351 for cid in commits:
352 await _seed_commit(db_session, repo.repo_id, cid,
353 [prev] if prev else [])
354 prev = cid
355 # services/ in all 5; tests/ in only 2
356 for cid in commits:
357 await _seed_history(db_session, repo.repo_id, cid,
358 ["musehub/services/foo.py::fn"])
359 for cid in commits[:2]:
360 await _seed_history(db_session, repo.repo_id, cid,
361 ["tests/test_foo.py::test_fn"])
362 await db_session.commit()
363 await _run(db_session, repo.repo_id, commits[-1])
364 rows = await _fetch(db_session, repo.repo_id)
365 assert rows[0].module == "musehub/services/"
366
367 @pytest.mark.asyncio
368 async def test_VL_20_two_modules_produce_two_rows(
369 self, db_session: AsyncSession, repo: MusehubRepo
370 ) -> None:
371 """Symbols from two distinct modules produce two velocity rows."""
372 c1 = _cid()
373 await _seed_commit(db_session, repo.repo_id, c1)
374 await _seed_history(db_session, repo.repo_id, c1,
375 ["src/a.py::fn", "tests/test_a.py::test_fn"])
376 await db_session.commit()
377 await _run(db_session, repo.repo_id, c1)
378 rows = await _fetch(db_session, repo.repo_id)
379 modules = {r.module for r in rows}
380 assert "src/" in modules
381 assert "tests/" in modules
382
383 @pytest.mark.asyncio
384 async def test_VL_21_stagnant_commit_detected(
385 self, db_session: AsyncSession, repo: MusehubRepo
386 ) -> None:
387 """A commit where added==removed for a module increments stagnant_commits."""
388 c1 = _cid()
389 await _seed_commit(db_session, repo.repo_id, c1)
390 # One add + one delete in same module + same commit → net=0 → stagnant
391 await _seed_history(db_session, repo.repo_id, c1,
392 ["src/billing.py::new_fn"], op="add")
393 await _seed_history(db_session, repo.repo_id, c1,
394 ["src/billing.py::old_fn"], op="delete")
395 await db_session.commit()
396 await _run(db_session, repo.repo_id, c1)
397 rows = await _fetch(db_session, repo.repo_id)
398 assert rows[0].stagnant_commits == 1
399
400 @pytest.mark.asyncio
401 async def test_VL_22_non_stagnant_commit_not_counted(
402 self, db_session: AsyncSession, repo: MusehubRepo
403 ) -> None:
404 """A commit with net != 0 does not increment stagnant_commits."""
405 c1 = _cid()
406 await _seed_commit(db_session, repo.repo_id, c1)
407 await _seed_history(db_session, repo.repo_id, c1,
408 ["src/billing.py::fn"], op="add")
409 await db_session.commit()
410 await _run(db_session, repo.repo_id, c1)
411 rows = await _fetch(db_session, repo.repo_id)
412 assert rows[0].stagnant_commits == 0
413
414 @pytest.mark.asyncio
415 async def test_VL_23_prior_window_populates_prior_fields(
416 self, db_session: AsyncSession, repo: MusehubRepo
417 ) -> None:
418 """Commits beyond _WINDOW land in the prior window and set prior_* fields."""
419 provider = VelocityProvider()
420 n = provider._WINDOW + 3
421 commits = [_cid() for _ in range(n)]
422 prev = None
423 for cid in commits:
424 await _seed_commit(db_session, repo.repo_id, cid,
425 [prev] if prev else [])
426 prev = cid
427 # touch src/ in all commits → first _WINDOW go to current, rest to prior
428 for cid in commits:
429 await _seed_history(db_session, repo.repo_id, cid,
430 ["src/billing.py::fn"], op="add")
431 await db_session.commit()
432 await _run(db_session, repo.repo_id, commits[-1])
433 rows = await _fetch(db_session, repo.repo_id)
434 row = rows[0]
435 assert row.prior_active_commits > 0
436
437 @pytest.mark.asyncio
438 async def test_VL_24_positive_acceleration_when_current_more_active(
439 self, db_session: AsyncSession, repo: MusehubRepo
440 ) -> None:
441 """acceleration > 0 when current window has higher net than prior."""
442 provider = VelocityProvider()
443 # prior window: 1 add per commit; current window: 3 adds per commit
444 prior_commits = [_cid() for _ in range(provider._WINDOW)]
445 current_commits = [_cid() for _ in range(provider._WINDOW)]
446 all_commits = prior_commits + current_commits
447 prev = None
448 for cid in all_commits:
449 await _seed_commit(db_session, repo.repo_id, cid,
450 [prev] if prev else [])
451 prev = cid
452 for cid in prior_commits:
453 await _seed_history(db_session, repo.repo_id, cid,
454 ["src/billing.py::fn1"], op="add")
455 for cid in current_commits:
456 for sym in ["src/billing.py::fn1", "src/billing.py::fn2",
457 "src/billing.py::fn3"]:
458 await _seed_history(db_session, repo.repo_id, cid,
459 [sym], op="add")
460 await db_session.commit()
461 await _run(db_session, repo.repo_id, all_commits[-1])
462 rows = await _fetch(db_session, repo.repo_id)
463 src_row = next(r for r in rows if r.module == "src/")
464 assert src_row.acceleration > 0
465
466 @pytest.mark.asyncio
467 async def test_VL_25_module_only_in_current_has_zero_prior(
468 self, db_session: AsyncSession, repo: MusehubRepo
469 ) -> None:
470 """A module only touched in the current window has prior_active_commits=0."""
471 c1 = _cid()
472 await _seed_commit(db_session, repo.repo_id, c1)
473 await _seed_history(db_session, repo.repo_id, c1,
474 ["src/billing.py::fn"])
475 await db_session.commit()
476 await _run(db_session, repo.repo_id, c1)
477 rows = await _fetch(db_session, repo.repo_id)
478 assert rows[0].prior_active_commits == 0
479 assert rows[0].prior_net == 0
480
481
482 # ─────────────────────────────────────────────────────────────────────────────
483 # Tier 4 — Performance: timing bounds
484 # ─────────────────────────────────────────────────────────────────────────────
485
486 class TestVelocityPerformance:
487
488 @pytest.mark.asyncio
489 async def test_VL_26_ten_commits_five_modules_under_500ms(
490 self, db_session: AsyncSession, repo: MusehubRepo
491 ) -> None:
492 """10 commits × 5 modules completes in under 500 ms."""
493 commits = [_cid() for _ in range(10)]
494 prev = None
495 for cid in commits:
496 await _seed_commit(db_session, repo.repo_id, cid,
497 [prev] if prev else [])
498 prev = cid
499 for cid in commits:
500 for i in range(5):
501 await _seed_history(db_session, repo.repo_id, cid,
502 [f"mod{i}/file.py::fn"])
503 await db_session.commit()
504 t0 = time.monotonic()
505 await _run(db_session, repo.repo_id, commits[-1])
506 assert time.monotonic() - t0 < 0.5
507
508 @pytest.mark.asyncio
509 async def test_VL_27_forty_commits_ten_modules_under_2s(
510 self, db_session: AsyncSession, repo: MusehubRepo
511 ) -> None:
512 """40 commits × 10 modules completes in under 2 s."""
513 commits = [_cid() for _ in range(40)]
514 prev = None
515 for cid in commits:
516 await _seed_commit(db_session, repo.repo_id, cid,
517 [prev] if prev else [])
518 prev = cid
519 for cid in commits:
520 for i in range(10):
521 await _seed_history(db_session, repo.repo_id, cid,
522 [f"mod{i}/file.py::fn"])
523 await db_session.commit()
524 t0 = time.monotonic()
525 await _run(db_session, repo.repo_id, commits[-1])
526 assert time.monotonic() - t0 < 2.0
527
528 @pytest.mark.asyncio
529 async def test_VL_28_empty_repo_fast_path_under_50ms(
530 self, db_session: AsyncSession, repo: MusehubRepo
531 ) -> None:
532 """Empty repo fast-path exits under 50 ms."""
533 t0 = time.monotonic()
534 await _run(db_session, repo.repo_id, _cid())
535 assert time.monotonic() - t0 < 0.05
536
537 @pytest.mark.asyncio
538 async def test_VL_29_rerun_not_5x_slower(
539 self, db_session: AsyncSession, repo: MusehubRepo
540 ) -> None:
541 """Second run is not more than 5× slower than the first."""
542 c1 = _cid()
543 await _seed_commit(db_session, repo.repo_id, c1)
544 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
545 await db_session.commit()
546 t1 = time.monotonic(); await _run(db_session, repo.repo_id, c1); d1 = time.monotonic() - t1
547 t2 = time.monotonic(); await _run(db_session, repo.repo_id, c1); d2 = time.monotonic() - t2
548 assert d2 < max(d1 * 5, 0.5)
549
550 @pytest.mark.asyncio
551 async def test_VL_30_point_lookup_under_10ms(
552 self, db_session: AsyncSession, repo: MusehubRepo
553 ) -> None:
554 """Fetching velocity rows for a repo is sub-10 ms after provider run."""
555 c1 = _cid()
556 await _seed_commit(db_session, repo.repo_id, c1)
557 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
558 await db_session.commit()
559 await _run(db_session, repo.repo_id, c1)
560 t0 = time.monotonic()
561 await _fetch(db_session, repo.repo_id)
562 assert time.monotonic() - t0 < 0.01
563
564 @pytest.mark.asyncio
565 async def test_VL_31_top20_leaderboard_query_fast(
566 self, db_session: AsyncSession, repo: MusehubRepo
567 ) -> None:
568 """Fetching top-20 leaderboard from the table is sub-50 ms."""
569 commits = [_cid() for _ in range(5)]
570 prev = None
571 for cid in commits:
572 await _seed_commit(db_session, repo.repo_id, cid,
573 [prev] if prev else [])
574 prev = cid
575 for cid in commits:
576 for i in range(20):
577 await _seed_history(db_session, repo.repo_id, cid,
578 [f"mod{i}/file.py::fn"])
579 await db_session.commit()
580 await _run(db_session, repo.repo_id, commits[-1])
581 t0 = time.monotonic()
582 await db_session.execute(
583 sa.select(MusehubIntelVelocity)
584 .where(MusehubIntelVelocity.repo_id == repo.repo_id)
585 .order_by(sa.desc(MusehubIntelVelocity.active_commits))
586 .limit(20)
587 )
588 assert time.monotonic() - t0 < 0.05
589
590 @pytest.mark.asyncio
591 async def test_VL_32_dashboard_preview_query_fast(
592 self, db_session: AsyncSession, repo: MusehubRepo
593 ) -> None:
594 """Dashboard preview (top 5, LIMIT query) completes under 20 ms."""
595 c1 = _cid()
596 await _seed_commit(db_session, repo.repo_id, c1)
597 for i in range(5):
598 await _seed_history(db_session, repo.repo_id, c1,
599 [f"mod{i}/file.py::fn"])
600 await db_session.commit()
601 await _run(db_session, repo.repo_id, c1)
602 t0 = time.monotonic()
603 await db_session.execute(
604 sa.select(MusehubIntelVelocity)
605 .where(MusehubIntelVelocity.repo_id == repo.repo_id)
606 .order_by(sa.desc(MusehubIntelVelocity.active_commits))
607 .limit(5)
608 )
609 assert time.monotonic() - t0 < 0.02
610
611
612 # ─────────────────────────────────────────────────────────────────────────────
613 # Tier 5 — State: idempotency, stale-row purge, incremental updates
614 # ─────────────────────────────────────────────────────────────────────────────
615
616 class TestVelocityState:
617
618 @pytest.mark.asyncio
619 async def test_VL_33_idempotent_two_runs(
620 self, db_session: AsyncSession, repo: MusehubRepo
621 ) -> None:
622 """Running the provider twice produces identical rows."""
623 c1 = _cid()
624 await _seed_commit(db_session, repo.repo_id, c1)
625 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
626 await db_session.commit()
627 await _run(db_session, repo.repo_id, c1)
628 first = {(r.module, r.active_commits, r.net)
629 for r in await _fetch(db_session, repo.repo_id)}
630 await _run(db_session, repo.repo_id, c1)
631 second = {(r.module, r.active_commits, r.net)
632 for r in await _fetch(db_session, repo.repo_id)}
633 assert first == second
634
635 @pytest.mark.asyncio
636 async def test_VL_34_stale_rows_purged_on_rerun(
637 self, db_session: AsyncSession, repo: MusehubRepo
638 ) -> None:
639 """Re-run deletes all old rows before inserting fresh set."""
640 c1 = _cid()
641 await _seed_commit(db_session, repo.repo_id, c1)
642 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
643 await db_session.commit()
644 await _run(db_session, repo.repo_id, c1)
645 count_first = (await db_session.execute(
646 sa.select(sa.func.count()).select_from(MusehubIntelVelocity)
647 .where(MusehubIntelVelocity.repo_id == repo.repo_id)
648 )).scalar_one()
649 await _run(db_session, repo.repo_id, c1)
650 count_second = (await db_session.execute(
651 sa.select(sa.func.count()).select_from(MusehubIntelVelocity)
652 .where(MusehubIntelVelocity.repo_id == repo.repo_id)
653 )).scalar_one()
654 assert count_first == count_second
655
656 @pytest.mark.asyncio
657 async def test_VL_35_incremental_new_module_appears(
658 self, db_session: AsyncSession, repo: MusehubRepo
659 ) -> None:
660 """After adding commits to a new module, it materialises on re-run."""
661 c1 = _cid()
662 await _seed_commit(db_session, repo.repo_id, c1)
663 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
664 await db_session.commit()
665 await _run(db_session, repo.repo_id, c1)
666 modules_before = {r.module for r in await _fetch(db_session, repo.repo_id)}
667
668 c2 = _cid()
669 await _seed_commit(db_session, repo.repo_id, c2, [c1])
670 await _seed_history(db_session, repo.repo_id, c2, ["tests/test_a.py::test_fn"])
671 await db_session.commit()
672 await _run(db_session, repo.repo_id, c2)
673 modules_after = {r.module for r in await _fetch(db_session, repo.repo_id)}
674 assert len(modules_after) > len(modules_before)
675
676 @pytest.mark.asyncio
677 async def test_VL_36_no_duplicate_modules_after_three_runs(
678 self, db_session: AsyncSession, repo: MusehubRepo
679 ) -> None:
680 """No duplicate module rows after 3 consecutive runs."""
681 c1 = _cid()
682 await _seed_commit(db_session, repo.repo_id, c1)
683 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
684 await db_session.commit()
685 for _ in range(3):
686 await _run(db_session, repo.repo_id, c1)
687 rows = await _fetch(db_session, repo.repo_id)
688 modules = [r.module for r in rows]
689 assert len(modules) == len(set(modules))
690
691 @pytest.mark.asyncio
692 async def test_VL_37_active_commits_increases_with_new_commits(
693 self, db_session: AsyncSession, repo: MusehubRepo
694 ) -> None:
695 """active_commits increases when more commits touch the module."""
696 c1, c2 = _cid(), _cid()
697 await _seed_commit(db_session, repo.repo_id, c1)
698 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
699 await db_session.commit()
700 await _run(db_session, repo.repo_id, c1)
701 before = (await _fetch(db_session, repo.repo_id))[0].active_commits
702
703 await _seed_commit(db_session, repo.repo_id, c2, [c1])
704 await _seed_history(db_session, repo.repo_id, c2, ["src/a.py::fn"])
705 await db_session.commit()
706 await _run(db_session, repo.repo_id, c2)
707 after = (await _fetch(db_session, repo.repo_id))[0].active_commits
708 assert after > before
709
710 @pytest.mark.asyncio
711 async def test_VL_38_truncated_false_when_under_cap(
712 self, db_session: AsyncSession, repo: MusehubRepo
713 ) -> None:
714 """truncated=False when module count is within _TOP."""
715 c1 = _cid()
716 await _seed_commit(db_session, repo.repo_id, c1)
717 await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"])
718 await db_session.commit()
719 result = await _run(db_session, repo.repo_id, c1)
720 key, payload = result[0]
721 assert payload["truncated"] is False
722
723
724 # ─────────────────────────────────────────────────────────────────────────────
725 # Tier 6 — Security: injection, isolation, unicode
726 # ─────────────────────────────────────────────────────────────────────────────
727
728 class TestVelocitySecurity:
729
730 @pytest.mark.asyncio
731 async def test_VL_39_sql_injection_stored_verbatim(
732 self, db_session: AsyncSession, repo: MusehubRepo
733 ) -> None:
734 """SQL injection in symbol address stored as-is; table survives."""
735 inject = "src/a.py::fn'; DROP TABLE musehub_intel_velocity; --"
736 c1 = _cid()
737 await _seed_commit(db_session, repo.repo_id, c1)
738 await _seed_history(db_session, repo.repo_id, c1, [inject])
739 await db_session.commit()
740 await _run(db_session, repo.repo_id, c1)
741 assert isinstance(await _fetch(db_session, repo.repo_id), list)
742
743 @pytest.mark.asyncio
744 async def test_VL_40_xss_payload_stored_safely(
745 self, db_session: AsyncSession, repo: MusehubRepo
746 ) -> None:
747 """XSS payload in symbol address stored without execution."""
748 xss = "src/<script>alert(1)</script>.py::fn"
749 c1 = _cid()
750 await _seed_commit(db_session, repo.repo_id, c1)
751 await _seed_history(db_session, repo.repo_id, c1, [xss])
752 await db_session.commit()
753 await _run(db_session, repo.repo_id, c1)
754 assert isinstance(await _fetch(db_session, repo.repo_id), list)
755
756 @pytest.mark.asyncio
757 async def test_VL_41_repo_isolation_strict(
758 self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo]
759 ) -> None:
760 """Velocity rows from repo A are never visible when querying repo B."""
761 r1, r2 = two_repos
762 c1 = _cid()
763 await _seed_commit(db_session, r1.repo_id, c1)
764 await _seed_history(db_session, r1.repo_id, c1, ["src/a.py::fn"])
765 await db_session.commit()
766 await _run(db_session, r1.repo_id, c1)
767 assert await _fetch(db_session, r2.repo_id) == []
768
769 @pytest.mark.asyncio
770 async def test_VL_42_two_repos_independent_rows(
771 self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo]
772 ) -> None:
773 """Two repos each produce their own independent velocity rows."""
774 r1, r2 = two_repos
775 for repo in [r1, r2]:
776 c1 = _cid()
777 await _seed_commit(db_session, repo.repo_id, c1)
778 await _seed_history(db_session, repo.repo_id, c1,
779 ["src/a.py::fn"])
780 await db_session.commit()
781 await _run(db_session, repo.repo_id, c1)
782 rows1 = await _fetch(db_session, r1.repo_id)
783 rows2 = await _fetch(db_session, r2.repo_id)
784 assert all(r.repo_id == r1.repo_id for r in rows1)
785 assert all(r.repo_id == r2.repo_id for r in rows2)
786
787 @pytest.mark.asyncio
788 async def test_VL_43_rerun_updates_ref_column(
789 self, db_session: AsyncSession, repo: MusehubRepo
790 ) -> None:
791 """Re-run for a new ref updates the ref column on all rows."""
792 c1, c2 = _cid(), _cid()
793 await _seed_commit(db_session, repo.repo_id, c1)
794 await _seed_commit(db_session, repo.repo_id, c2, [c1])
795 for cid in [c1, c2]:
796 await _seed_history(db_session, repo.repo_id, cid,
797 ["src/a.py::fn"])
798 await db_session.commit()
799 await _run(db_session, repo.repo_id, c1)
800 await _run(db_session, repo.repo_id, c2)
801 rows = await _fetch(db_session, repo.repo_id)
802 assert all(r.ref == c2 for r in rows)
803
804 @pytest.mark.asyncio
805 async def test_VL_44_unicode_in_path_handled(
806 self, db_session: AsyncSession, repo: MusehubRepo
807 ) -> None:
808 """Unicode characters in symbol paths do not crash the provider."""
809 c1 = _cid()
810 await _seed_commit(db_session, repo.repo_id, c1)
811 await _seed_history(db_session, repo.repo_id, c1,
812 ["src/música.py::canción"])
813 await db_session.commit()
814 await _run(db_session, repo.repo_id, c1)
815 assert isinstance(await _fetch(db_session, repo.repo_id), list)
816
817
818 # ─────────────────────────────────────────────────────────────────────────────
819 # Tier 7 — Stress: TOP cap, BFS cap, extended-column completeness
820 # ─────────────────────────────────────────────────────────────────────────────
821
822 class TestVelocityStress:
823
824 @pytest.mark.asyncio
825 async def test_VL_45_top_cap_respected(
826 self, db_session: AsyncSession, repo: MusehubRepo
827 ) -> None:
828 """Stored module count never exceeds _TOP."""
829 provider = VelocityProvider()
830 c1 = _cid()
831 await _seed_commit(db_session, repo.repo_id, c1)
832 # _TOP + 5 distinct modules
833 for i in range(provider._TOP + 5):
834 await _seed_history(db_session, repo.repo_id, c1,
835 [f"mod{i:03d}/file.py::fn"])
836 await db_session.commit()
837 await _run(db_session, repo.repo_id, c1)
838 rows = await _fetch(db_session, repo.repo_id)
839 assert len(rows) <= provider._TOP
840
841 @pytest.mark.asyncio
842 async def test_VL_46_truncated_true_over_top_cap(
843 self, db_session: AsyncSession, repo: MusehubRepo
844 ) -> None:
845 """truncated=True when distinct module count exceeds _TOP."""
846 provider = VelocityProvider()
847 c1 = _cid()
848 await _seed_commit(db_session, repo.repo_id, c1)
849 for i in range(provider._TOP + 1):
850 await _seed_history(db_session, repo.repo_id, c1,
851 [f"mod{i:03d}/file.py::fn"])
852 await db_session.commit()
853 result = await _run(db_session, repo.repo_id, c1)
854 key, payload = result[0]
855 assert payload["truncated"] is True
856
857 @pytest.mark.asyncio
858 async def test_VL_47_500_commits_completes_without_error(
859 self, db_session: AsyncSession, repo: MusehubRepo
860 ) -> None:
861 """500 commits × 3 modules completes without error."""
862 commits = [_cid() for _ in range(500)]
863 prev = None
864 for cid in commits:
865 await _seed_commit(db_session, repo.repo_id, cid,
866 [prev] if prev else [])
867 prev = cid
868 for cid in commits:
869 for i in range(3):
870 await _seed_history(db_session, repo.repo_id, cid,
871 [f"mod{i}/file.py::fn"])
872 await db_session.commit()
873 result = await _run(db_session, repo.repo_id, commits[-1])
874 assert result
875
876 @pytest.mark.asyncio
877 async def test_VL_48_result_count_matches_stored_rows(
878 self, db_session: AsyncSession, repo: MusehubRepo
879 ) -> None:
880 """metadata 'count' always equals len(stored rows)."""
881 commits = [_cid() for _ in range(4)]
882 prev = None
883 for cid in commits:
884 await _seed_commit(db_session, repo.repo_id, cid,
885 [prev] if prev else [])
886 prev = cid
887 for cid in commits:
888 for i in range(3):
889 await _seed_history(db_session, repo.repo_id, cid,
890 [f"mod{i}/file.py::fn"])
891 await db_session.commit()
892 result = await _run(db_session, repo.repo_id, commits[-1])
893 key, payload = result[0]
894 rows = await _fetch(db_session, repo.repo_id)
895 assert payload["count"] == len(rows)
896
897 @pytest.mark.asyncio
898 async def test_VL_49_bfs_walk_cap_never_exceeded(
899 self, db_session: AsyncSession, repo: MusehubRepo
900 ) -> None:
901 """commits_analysed never exceeds _MAX_WALK."""
902 provider = VelocityProvider()
903 commits = [_cid() for _ in range(50)]
904 prev = None
905 for cid in commits:
906 await _seed_commit(db_session, repo.repo_id, cid,
907 [prev] if prev else [])
908 prev = cid
909 await _seed_history(db_session, repo.repo_id, commits[0],
910 ["src/a.py::fn"])
911 await db_session.commit()
912 result = await _run(db_session, repo.repo_id, commits[-1])
913 if result:
914 key, payload = result[0]
915 assert payload["commits_analysed"] <= provider._MAX_WALK
916
917 @pytest.mark.asyncio
918 async def test_VL_50_all_extended_columns_non_null(
919 self, db_session: AsyncSession, repo: MusehubRepo
920 ) -> None:
921 """Every stored row has non-null values for all four extended columns."""
922 provider = VelocityProvider()
923 n = provider._WINDOW + 3
924 commits = [_cid() for _ in range(n)]
925 prev = None
926 for cid in commits:
927 await _seed_commit(db_session, repo.repo_id, cid,
928 [prev] if prev else [])
929 prev = cid
930 for cid in commits:
931 await _seed_history(db_session, repo.repo_id, cid,
932 ["src/a.py::fn"])
933 await db_session.commit()
934 await _run(db_session, repo.repo_id, commits[-1])
935 rows = await _fetch(db_session, repo.repo_id)
936 assert rows, "expected at least one velocity row"
937 for r in rows:
938 assert r.prior_modified is not None
939 assert r.prior_active_commits is not None
940 assert r.window_size is not None
941 assert r.commits_analysed is not None
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago