"""TDD spec for VelocityProvider — issue #16, Phase 5. Verifies that VelocityProvider reproduces module growth velocity from the symbol history store without subprocess calls: module derivation from symbol addresses, op categorisation (add/delete/modify), two-window BFS analysis (current vs prior), acceleration, stagnant-commit detection, extended columns (prior_modified, prior_active_commits, window_size, commits_analysed), TOP cap, and strict repo isolation. Seven test tiers (50 cases) ---------------------------- Unit VL_01 – VL_08 module derivation, accel helpers, constants Integration VL_09 – VL_18 provider upserts, new columns, op categorisation E2E VL_19 – VL_25 full seeded scenarios, window semantics Performance VL_26 – VL_32 timing bounds State VL_33 – VL_38 idempotency, stale-row purge, incremental updates Security VL_39 – VL_44 injection strings, repo isolation, unicode Stress VL_45 – VL_50 TOP cap, BFS cap, extended-column completeness """ from __future__ import annotations import secrets import time from datetime import datetime, timezone import pytest import pytest_asyncio import sqlalchemy as sa from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.ext.asyncio import AsyncSession from muse.core.types import long_id from musehub.db.musehub_intel_models import MusehubIntelVelocity, MusehubSymbolHistoryEntry from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo from musehub.services.musehub_intel_providers import VelocityProvider from musehub.types.json_types import JSONObject from musehub.api.routes.musehub.ui_intel import _vel_accel_class, _vel_accel_fmt from tests.factories import create_repo # ───────────────────────────────────────────────────────────────────────────── # Helpers # ───────────────────────────────────────────────────────────────────────────── def _cid() -> str: return long_id(secrets.token_hex(32)) async def _seed_commit( session: AsyncSession, repo_id: str, commit_id: str, parent_ids: list[str] | None = None, ) -> None: """Insert a commit row; silently skip on conflict.""" await session.execute( pg_insert(MusehubCommit) .values( commit_id=commit_id, message="test commit", author="test", branch="dev", parent_ids=parent_ids or [], snapshot_id=None, timestamp=datetime.now(timezone.utc), ) .on_conflict_do_nothing() ) await session.execute( pg_insert(MusehubCommitRef) .values(repo_id=repo_id, commit_id=commit_id) .on_conflict_do_nothing() ) async def _seed_history( session: AsyncSession, repo_id: str, commit_id: str, addresses: list[str], op: str = "modify", ) -> None: """Insert symbol history entries with a given op code.""" for addr in addresses: await session.execute( pg_insert(MusehubSymbolHistoryEntry) .values( repo_id=repo_id, address=addr, commit_id=commit_id, committed_at=datetime.now(timezone.utc), op=op, ) .on_conflict_do_nothing() ) async def _run(session: AsyncSession, repo_id: str, ref: str) -> list[tuple[str, JSONObject]]: return await VelocityProvider().compute(session, repo_id, ref, {}) async def _fetch(session: AsyncSession, repo_id: str) -> list[MusehubIntelVelocity]: result = await session.execute( sa.select(MusehubIntelVelocity) .where(MusehubIntelVelocity.repo_id == repo_id) .order_by(sa.desc(MusehubIntelVelocity.active_commits)) ) return list(result.scalars().all()) def _module(addr: str) -> str: """Replicate VelocityProvider._module() for unit tests.""" file = addr.split("::")[0] if "::" in addr else addr if "/" in file: return f"{file.rsplit('/', 1)[0]}/" return f"{file}/" # ───────────────────────────────────────────────────────────────────────────── # Fixtures # ───────────────────────────────────────────────────────────────────────────── @pytest_asyncio.fixture async def repo(db_session: AsyncSession) -> MusehubRepo: return await create_repo(db_session, owner="testuser", slug="velocityprovider") @pytest_asyncio.fixture async def two_repos(db_session: AsyncSession) -> tuple[MusehubRepo, MusehubRepo]: r1 = await create_repo(db_session, owner="testuser", slug="vel-repo-1") r2 = await create_repo(db_session, owner="testuser", slug="vel-repo-2") return r1, r2 # ───────────────────────────────────────────────────────────────────────────── # Tier 1 — Unit: module derivation, accel helpers, constants # ───────────────────────────────────────────────────────────────────────────── class TestVelocityUnit: """Pure-function tests — no database required.""" def test_VL_01_module_from_deep_symbol_address(self) -> None: """Module extracted as directory of file component of a deep address.""" assert _module("musehub/services/musehub_wire.py::MyClass") == "musehub/services/" def test_VL_02_module_from_shallow_symbol_address(self) -> None: """Shallow one-directory file extracts its directory.""" assert _module("src/billing.py::charge") == "src/" def test_VL_03_module_from_bare_file_no_slash(self) -> None: """Root-level file (no '/') maps to '/'.""" assert _module("billing.py") == "billing.py/" def test_VL_04_module_from_bare_path_with_slash(self) -> None: """Bare path with slash (no '::') derives module correctly.""" assert _module("musehub/services/foo.py") == "musehub/services/" def test_VL_05_accel_class_positive(self) -> None: """Positive acceleration → 'up' class.""" assert _vel_accel_class(5.0) == "up" assert _vel_accel_class(0.1) == "up" def test_VL_06_accel_class_negative(self) -> None: """Negative acceleration → 'down' class.""" assert _vel_accel_class(-3.0) == "down" assert _vel_accel_class(-0.1) == "down" def test_VL_07_accel_class_zero(self) -> None: """Zero acceleration → 'flat' class.""" assert _vel_accel_class(0.0) == "flat" def test_VL_08_accel_fmt_positive_negative_zero(self) -> None: """accel_fmt prefixes '+' for positive, keeps '-' for negative, '0' for zero.""" assert _vel_accel_fmt(4.0) == "+4" assert _vel_accel_fmt(-3.0) == "-3" assert _vel_accel_fmt(0.0) == "0" # ───────────────────────────────────────────────────────────────────────────── # Tier 2 — Integration: provider upserts, op categorisation, new columns # ───────────────────────────────────────────────────────────────────────────── class TestVelocityIntegration: @pytest.mark.asyncio async def test_VL_09_empty_repo_returns_empty( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Provider on a repo with no commits returns [] and stores no rows.""" result = await _run(db_session, repo.repo_id, _cid()) assert result == [] assert await _fetch(db_session, repo.repo_id) == [] @pytest.mark.asyncio async def test_VL_10_no_history_entries_returns_empty( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Commits exist but no history entries → no rows stored.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await db_session.commit() result = await _run(db_session, repo.repo_id, c1) assert result == [] @pytest.mark.asyncio async def test_VL_11_add_op_counted_as_added( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """History entries with op='add' increment the added counter.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::charge"], op="add") await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert len(rows) == 1 assert rows[0].added == 1 assert rows[0].removed == 0 assert rows[0].modified == 0 @pytest.mark.asyncio async def test_VL_12_delete_op_counted_as_removed( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """History entries with op='delete' increment the removed counter.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::charge"], op="delete") await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert rows[0].removed == 1 assert rows[0].added == 0 @pytest.mark.asyncio async def test_VL_13_modify_op_counted_as_modified( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """History entries with op='modify' increment the modified counter.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::charge"], op="modify") await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert rows[0].modified == 1 assert rows[0].added == 0 @pytest.mark.asyncio async def test_VL_14_net_equals_added_minus_removed( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """net = added - removed for the current window.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::a", "src/billing.py::b"], op="add") await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::c"], op="delete") await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert rows[0].net == rows[0].added - rows[0].removed @pytest.mark.asyncio async def test_VL_15_active_commits_counts_distinct_commits( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """active_commits equals the number of distinct commits that touched the module.""" commits = [_cid() for _ in range(3)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in commits: await _seed_history(db_session, repo.repo_id, cid, ["src/billing.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, commits[-1]) rows = await _fetch(db_session, repo.repo_id) assert rows[0].active_commits == 3 @pytest.mark.asyncio async def test_VL_16_window_size_column_populated( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """window_size column reflects VelocityProvider._WINDOW.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert rows[0].window_size == VelocityProvider._WINDOW @pytest.mark.asyncio async def test_VL_17_commits_analysed_column_populated( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """commits_analysed column reflects the BFS walk length.""" commits = [_cid() for _ in range(5)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid await _seed_history(db_session, repo.repo_id, commits[0], ["src/billing.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, commits[-1]) rows = await _fetch(db_session, repo.repo_id) assert rows[0].commits_analysed == 5 @pytest.mark.asyncio async def test_VL_18_result_key_correct( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Provider returns result tuple with key 'intel.code.velocity'.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::fn"]) await db_session.commit() result = await _run(db_session, repo.repo_id, c1) assert len(result) == 1 key, payload = result[0] assert key == "intel.code.velocity" assert "count" in payload assert "commits_analysed" in payload assert "truncated" in payload # ───────────────────────────────────────────────────────────────────────────── # Tier 3 — E2E: full seeded scenarios, window semantics # ───────────────────────────────────────────────────────────────────────────── class TestVelocityE2E: @pytest.mark.asyncio async def test_VL_19_hottest_module_ranked_first( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Module with more active commits is ranked first by active_commits.""" commits = [_cid() for _ in range(5)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid # services/ in all 5; tests/ in only 2 for cid in commits: await _seed_history(db_session, repo.repo_id, cid, ["musehub/services/foo.py::fn"]) for cid in commits[:2]: await _seed_history(db_session, repo.repo_id, cid, ["tests/test_foo.py::test_fn"]) await db_session.commit() await _run(db_session, repo.repo_id, commits[-1]) rows = await _fetch(db_session, repo.repo_id) assert rows[0].module == "musehub/services/" @pytest.mark.asyncio async def test_VL_20_two_modules_produce_two_rows( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Symbols from two distinct modules produce two velocity rows.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn", "tests/test_a.py::test_fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) modules = {r.module for r in rows} assert "src/" in modules assert "tests/" in modules @pytest.mark.asyncio async def test_VL_21_stagnant_commit_detected( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """A commit where added==removed for a module increments stagnant_commits.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) # One add + one delete in same module + same commit → net=0 → stagnant await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::new_fn"], op="add") await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::old_fn"], op="delete") await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert rows[0].stagnant_commits == 1 @pytest.mark.asyncio async def test_VL_22_non_stagnant_commit_not_counted( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """A commit with net != 0 does not increment stagnant_commits.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::fn"], op="add") await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert rows[0].stagnant_commits == 0 @pytest.mark.asyncio async def test_VL_23_prior_window_populates_prior_fields( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Commits beyond _WINDOW land in the prior window and set prior_* fields.""" provider = VelocityProvider() n = provider._WINDOW + 3 commits = [_cid() for _ in range(n)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid # touch src/ in all commits → first _WINDOW go to current, rest to prior for cid in commits: await _seed_history(db_session, repo.repo_id, cid, ["src/billing.py::fn"], op="add") await db_session.commit() await _run(db_session, repo.repo_id, commits[-1]) rows = await _fetch(db_session, repo.repo_id) row = rows[0] assert row.prior_active_commits > 0 @pytest.mark.asyncio async def test_VL_24_positive_acceleration_when_current_more_active( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """acceleration > 0 when current window has higher net than prior.""" provider = VelocityProvider() # prior window: 1 add per commit; current window: 3 adds per commit prior_commits = [_cid() for _ in range(provider._WINDOW)] current_commits = [_cid() for _ in range(provider._WINDOW)] all_commits = prior_commits + current_commits prev = None for cid in all_commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in prior_commits: await _seed_history(db_session, repo.repo_id, cid, ["src/billing.py::fn1"], op="add") for cid in current_commits: for sym in ["src/billing.py::fn1", "src/billing.py::fn2", "src/billing.py::fn3"]: await _seed_history(db_session, repo.repo_id, cid, [sym], op="add") await db_session.commit() await _run(db_session, repo.repo_id, all_commits[-1]) rows = await _fetch(db_session, repo.repo_id) src_row = next(r for r in rows if r.module == "src/") assert src_row.acceleration > 0 @pytest.mark.asyncio async def test_VL_25_module_only_in_current_has_zero_prior( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """A module only touched in the current window has prior_active_commits=0.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/billing.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert rows[0].prior_active_commits == 0 assert rows[0].prior_net == 0 # ───────────────────────────────────────────────────────────────────────────── # Tier 4 — Performance: timing bounds # ───────────────────────────────────────────────────────────────────────────── class TestVelocityPerformance: @pytest.mark.asyncio async def test_VL_26_ten_commits_five_modules_under_500ms( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """10 commits × 5 modules completes in under 500 ms.""" commits = [_cid() for _ in range(10)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in commits: for i in range(5): await _seed_history(db_session, repo.repo_id, cid, [f"mod{i}/file.py::fn"]) await db_session.commit() t0 = time.monotonic() await _run(db_session, repo.repo_id, commits[-1]) assert time.monotonic() - t0 < 0.5 @pytest.mark.asyncio async def test_VL_27_forty_commits_ten_modules_under_2s( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """40 commits × 10 modules completes in under 2 s.""" commits = [_cid() for _ in range(40)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in commits: for i in range(10): await _seed_history(db_session, repo.repo_id, cid, [f"mod{i}/file.py::fn"]) await db_session.commit() t0 = time.monotonic() await _run(db_session, repo.repo_id, commits[-1]) assert time.monotonic() - t0 < 2.0 @pytest.mark.asyncio async def test_VL_28_empty_repo_fast_path_under_50ms( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Empty repo fast-path exits under 50 ms.""" t0 = time.monotonic() await _run(db_session, repo.repo_id, _cid()) assert time.monotonic() - t0 < 0.05 @pytest.mark.asyncio async def test_VL_29_rerun_not_5x_slower( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Second run is not more than 5× slower than the first.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() t1 = time.monotonic(); await _run(db_session, repo.repo_id, c1); d1 = time.monotonic() - t1 t2 = time.monotonic(); await _run(db_session, repo.repo_id, c1); d2 = time.monotonic() - t2 assert d2 < max(d1 * 5, 0.5) @pytest.mark.asyncio async def test_VL_30_point_lookup_under_10ms( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Fetching velocity rows for a repo is sub-10 ms after provider run.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) t0 = time.monotonic() await _fetch(db_session, repo.repo_id) assert time.monotonic() - t0 < 0.01 @pytest.mark.asyncio async def test_VL_31_top20_leaderboard_query_fast( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Fetching top-20 leaderboard from the table is sub-50 ms.""" commits = [_cid() for _ in range(5)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in commits: for i in range(20): await _seed_history(db_session, repo.repo_id, cid, [f"mod{i}/file.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, commits[-1]) t0 = time.monotonic() await db_session.execute( sa.select(MusehubIntelVelocity) .where(MusehubIntelVelocity.repo_id == repo.repo_id) .order_by(sa.desc(MusehubIntelVelocity.active_commits)) .limit(20) ) assert time.monotonic() - t0 < 0.05 @pytest.mark.asyncio async def test_VL_32_dashboard_preview_query_fast( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Dashboard preview (top 5, LIMIT query) completes under 20 ms.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) for i in range(5): await _seed_history(db_session, repo.repo_id, c1, [f"mod{i}/file.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) t0 = time.monotonic() await db_session.execute( sa.select(MusehubIntelVelocity) .where(MusehubIntelVelocity.repo_id == repo.repo_id) .order_by(sa.desc(MusehubIntelVelocity.active_commits)) .limit(5) ) assert time.monotonic() - t0 < 0.02 # ───────────────────────────────────────────────────────────────────────────── # Tier 5 — State: idempotency, stale-row purge, incremental updates # ───────────────────────────────────────────────────────────────────────────── class TestVelocityState: @pytest.mark.asyncio async def test_VL_33_idempotent_two_runs( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Running the provider twice produces identical rows.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) first = {(r.module, r.active_commits, r.net) for r in await _fetch(db_session, repo.repo_id)} await _run(db_session, repo.repo_id, c1) second = {(r.module, r.active_commits, r.net) for r in await _fetch(db_session, repo.repo_id)} assert first == second @pytest.mark.asyncio async def test_VL_34_stale_rows_purged_on_rerun( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Re-run deletes all old rows before inserting fresh set.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) count_first = (await db_session.execute( sa.select(sa.func.count()).select_from(MusehubIntelVelocity) .where(MusehubIntelVelocity.repo_id == repo.repo_id) )).scalar_one() await _run(db_session, repo.repo_id, c1) count_second = (await db_session.execute( sa.select(sa.func.count()).select_from(MusehubIntelVelocity) .where(MusehubIntelVelocity.repo_id == repo.repo_id) )).scalar_one() assert count_first == count_second @pytest.mark.asyncio async def test_VL_35_incremental_new_module_appears( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """After adding commits to a new module, it materialises on re-run.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) modules_before = {r.module for r in await _fetch(db_session, repo.repo_id)} c2 = _cid() await _seed_commit(db_session, repo.repo_id, c2, [c1]) await _seed_history(db_session, repo.repo_id, c2, ["tests/test_a.py::test_fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c2) modules_after = {r.module for r in await _fetch(db_session, repo.repo_id)} assert len(modules_after) > len(modules_before) @pytest.mark.asyncio async def test_VL_36_no_duplicate_modules_after_three_runs( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """No duplicate module rows after 3 consecutive runs.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() for _ in range(3): await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) modules = [r.module for r in rows] assert len(modules) == len(set(modules)) @pytest.mark.asyncio async def test_VL_37_active_commits_increases_with_new_commits( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """active_commits increases when more commits touch the module.""" c1, c2 = _cid(), _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) before = (await _fetch(db_session, repo.repo_id))[0].active_commits await _seed_commit(db_session, repo.repo_id, c2, [c1]) await _seed_history(db_session, repo.repo_id, c2, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c2) after = (await _fetch(db_session, repo.repo_id))[0].active_commits assert after > before @pytest.mark.asyncio async def test_VL_38_truncated_false_when_under_cap( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """truncated=False when module count is within _TOP.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() result = await _run(db_session, repo.repo_id, c1) key, payload = result[0] assert payload["truncated"] is False # ───────────────────────────────────────────────────────────────────────────── # Tier 6 — Security: injection, isolation, unicode # ───────────────────────────────────────────────────────────────────────────── class TestVelocitySecurity: @pytest.mark.asyncio async def test_VL_39_sql_injection_stored_verbatim( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """SQL injection in symbol address stored as-is; table survives.""" inject = "src/a.py::fn'; DROP TABLE musehub_intel_velocity; --" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, [inject]) await db_session.commit() await _run(db_session, repo.repo_id, c1) assert isinstance(await _fetch(db_session, repo.repo_id), list) @pytest.mark.asyncio async def test_VL_40_xss_payload_stored_safely( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """XSS payload in symbol address stored without execution.""" xss = "src/.py::fn" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, [xss]) await db_session.commit() await _run(db_session, repo.repo_id, c1) assert isinstance(await _fetch(db_session, repo.repo_id), list) @pytest.mark.asyncio async def test_VL_41_repo_isolation_strict( self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo] ) -> None: """Velocity rows from repo A are never visible when querying repo B.""" r1, r2 = two_repos c1 = _cid() await _seed_commit(db_session, r1.repo_id, c1) await _seed_history(db_session, r1.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, r1.repo_id, c1) assert await _fetch(db_session, r2.repo_id) == [] @pytest.mark.asyncio async def test_VL_42_two_repos_independent_rows( self, db_session: AsyncSession, two_repos: tuple[MusehubRepo, MusehubRepo] ) -> None: """Two repos each produce their own independent velocity rows.""" r1, r2 = two_repos for repo in [r1, r2]: c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) rows1 = await _fetch(db_session, r1.repo_id) rows2 = await _fetch(db_session, r2.repo_id) assert all(r.repo_id == r1.repo_id for r in rows1) assert all(r.repo_id == r2.repo_id for r in rows2) @pytest.mark.asyncio async def test_VL_43_rerun_updates_ref_column( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Re-run for a new ref updates the ref column on all rows.""" c1, c2 = _cid(), _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_commit(db_session, repo.repo_id, c2, [c1]) for cid in [c1, c2]: await _seed_history(db_session, repo.repo_id, cid, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) await _run(db_session, repo.repo_id, c2) rows = await _fetch(db_session, repo.repo_id) assert all(r.ref == c2 for r in rows) @pytest.mark.asyncio async def test_VL_44_unicode_in_path_handled( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Unicode characters in symbol paths do not crash the provider.""" c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) await _seed_history(db_session, repo.repo_id, c1, ["src/música.py::canción"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) assert isinstance(await _fetch(db_session, repo.repo_id), list) # ───────────────────────────────────────────────────────────────────────────── # Tier 7 — Stress: TOP cap, BFS cap, extended-column completeness # ───────────────────────────────────────────────────────────────────────────── class TestVelocityStress: @pytest.mark.asyncio async def test_VL_45_top_cap_respected( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Stored module count never exceeds _TOP.""" provider = VelocityProvider() c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) # _TOP + 5 distinct modules for i in range(provider._TOP + 5): await _seed_history(db_session, repo.repo_id, c1, [f"mod{i:03d}/file.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, c1) rows = await _fetch(db_session, repo.repo_id) assert len(rows) <= provider._TOP @pytest.mark.asyncio async def test_VL_46_truncated_true_over_top_cap( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """truncated=True when distinct module count exceeds _TOP.""" provider = VelocityProvider() c1 = _cid() await _seed_commit(db_session, repo.repo_id, c1) for i in range(provider._TOP + 1): await _seed_history(db_session, repo.repo_id, c1, [f"mod{i:03d}/file.py::fn"]) await db_session.commit() result = await _run(db_session, repo.repo_id, c1) key, payload = result[0] assert payload["truncated"] is True @pytest.mark.asyncio async def test_VL_47_500_commits_completes_without_error( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """500 commits × 3 modules completes without error.""" commits = [_cid() for _ in range(500)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in commits: for i in range(3): await _seed_history(db_session, repo.repo_id, cid, [f"mod{i}/file.py::fn"]) await db_session.commit() result = await _run(db_session, repo.repo_id, commits[-1]) assert result @pytest.mark.asyncio async def test_VL_48_result_count_matches_stored_rows( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """metadata 'count' always equals len(stored rows).""" commits = [_cid() for _ in range(4)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in commits: for i in range(3): await _seed_history(db_session, repo.repo_id, cid, [f"mod{i}/file.py::fn"]) await db_session.commit() result = await _run(db_session, repo.repo_id, commits[-1]) key, payload = result[0] rows = await _fetch(db_session, repo.repo_id) assert payload["count"] == len(rows) @pytest.mark.asyncio async def test_VL_49_bfs_walk_cap_never_exceeded( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """commits_analysed never exceeds _MAX_WALK.""" provider = VelocityProvider() commits = [_cid() for _ in range(50)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid await _seed_history(db_session, repo.repo_id, commits[0], ["src/a.py::fn"]) await db_session.commit() result = await _run(db_session, repo.repo_id, commits[-1]) if result: key, payload = result[0] assert payload["commits_analysed"] <= provider._MAX_WALK @pytest.mark.asyncio async def test_VL_50_all_extended_columns_non_null( self, db_session: AsyncSession, repo: MusehubRepo ) -> None: """Every stored row has non-null values for all four extended columns.""" provider = VelocityProvider() n = provider._WINDOW + 3 commits = [_cid() for _ in range(n)] prev = None for cid in commits: await _seed_commit(db_session, repo.repo_id, cid, [prev] if prev else []) prev = cid for cid in commits: await _seed_history(db_session, repo.repo_id, cid, ["src/a.py::fn"]) await db_session.commit() await _run(db_session, repo.repo_id, commits[-1]) rows = await _fetch(db_session, repo.repo_id) assert rows, "expected at least one velocity row" for r in rows: assert r.prior_modified is not None assert r.prior_active_commits is not None assert r.window_size is not None assert r.commits_analysed is not None