"""Section 26 — Divergence Engine: 7-layer test suite. Covers musehub/services/musehub_divergence.py and the GET /api/repos/{repo_id}/divergence endpoint. Layer map --------- 1. Unit — classify_message, score_to_level, compute_hub_dimension_divergence, find_common_ancestor, get_commits_since, extract_affected_sections, _delta_label, build_zero_diff_response, constants 2. Integration — get_branch_commits, compute_hub_divergence (full pipeline) 3. E2E — HTTP GET /api/repos/{repo_id}/divergence 4. Stress — large histories, many-dimension calls, concurrent computes 5. Data Integrity — score bounds, ordering, common ancestor correctness 6. Security — auth / visibility, invalid branch names safe 7. Performance — timing budgets """ from __future__ import annotations import asyncio import secrets import time from datetime import datetime, timezone, timedelta import pytest from httpx import AsyncClient from sqlalchemy.ext.asyncio import AsyncSession from unittest.mock import MagicMock from musehub.core.genesis import compute_identity_id, compute_repo_id from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo from musehub.types.json_types import StrDict from musehub.services.musehub_divergence import ( ALL_DIMENSIONS, _DIMENSION_PATTERNS, _SECTION_RE, MuseHubDivergenceLevel, MuseHubDivergenceResult, MuseHubDimensionDivergence, _delta_label, build_proposal_diff_response, build_zero_diff_response, classify_message, compute_hub_dimension_divergence, compute_hub_divergence, extract_affected_sections, find_common_ancestor, get_branch_commits, get_commits_since, score_to_level, ) # --------------------------------------------------------------------------- # DB helpers # --------------------------------------------------------------------------- def _uid() -> str: return secrets.token_hex(16) def _cid() -> str: return secrets.token_hex(16) _TEST_OWNER_ID = compute_identity_id(b"test-divergence-owner") async def _db_repo(session: AsyncSession, *, visibility: str = "private") -> str: from datetime import datetime, timezone slug = f"repo-{secrets.token_hex(4)}" created_at = datetime.now(timezone.utc) repo_id = compute_repo_id(_TEST_OWNER_ID, slug, "code", created_at.isoformat()) repo = MusehubRepo( repo_id=repo_id, name=slug, slug=slug, owner="testuser", owner_user_id=_TEST_OWNER_ID, visibility=visibility, created_at=created_at, updated_at=created_at, ) session.add(repo) await session.flush() return repo.repo_id async def _db_commit( session: AsyncSession, repo_id: str, *, branch: str = "main", message: str = "add groove", parent_ids: list[str] | None = None, ts: datetime | None = None, ) -> MusehubCommit: c = MusehubCommit( commit_id=_cid(), branch=branch, parent_ids=parent_ids or [], message=message, author="agent", timestamp=ts or datetime.now(timezone.utc), ) session.add(c) session.add(MusehubCommitRef(repo_id=repo_id, commit_id=c.commit_id)) await session.flush() return c async def _api_repo( client: AsyncClient, auth_headers: StrDict, *, visibility: str = "private", ) -> str: r = await client.post( "/api/repos", json={ "name": f"div-{_uid()[:8]}", "owner": "testuser", "visibility": visibility, }, headers=auth_headers, ) assert r.status_code == 201, r.text return r.json()["repoId"] # =========================================================================== # Layer 1 — Unit # =========================================================================== class TestUnitConstants: def test_all_dimensions_five(self) -> None: assert len(ALL_DIMENSIONS) == 5 def test_all_dimensions_names(self) -> None: assert set(ALL_DIMENSIONS) == {"melodic", "harmonic", "rhythmic", "structural", "dynamic"} def test_dimension_patterns_covers_all(self) -> None: for dim in ALL_DIMENSIONS: assert dim in _DIMENSION_PATTERNS def test_section_re_compiles(self) -> None: assert _SECTION_RE.pattern is not None class TestUnitClassifyMessage: def test_melodic_keywords(self) -> None: assert "melodic" in classify_message("add melody line") assert "melodic" in classify_message("record lead solo") assert "melodic" in classify_message("fix pitch drift") def test_harmonic_keywords(self) -> None: assert "harmonic" in classify_message("add chord progression") assert "harmonic" in classify_message("change key to Dm") def test_rhythmic_keywords(self) -> None: assert "rhythmic" in classify_message("adjust tempo to 120 bpm") assert "rhythmic" in classify_message("tighten drum groove") def test_structural_keywords(self) -> None: assert "structural" in classify_message("rewrite bridge section") assert "structural" in classify_message("add chorus after verse") def test_dynamic_keywords(self) -> None: assert "dynamic" in classify_message("apply reverb to guitar") assert "dynamic" in classify_message("master mix levels") def test_multi_dimension_message(self) -> None: dims = classify_message("add jazzy chord melody with reverb") assert "melodic" in dims assert "harmonic" in dims assert "dynamic" in dims def test_unclassified_returns_empty(self) -> None: assert classify_message("update README") == set() assert classify_message("fix typo in config") == set() def test_case_insensitive(self) -> None: assert "melodic" in classify_message("Add MELODY line") assert "rhythmic" in classify_message("DRUM pattern fix") def test_empty_message(self) -> None: assert classify_message("") == set() class TestUnitScoreToLevel: def test_zero_is_none(self) -> None: assert score_to_level(0.0) == MuseHubDivergenceLevel.NONE def test_boundary_0_15_is_low(self) -> None: assert score_to_level(0.15) == MuseHubDivergenceLevel.LOW def test_mid_range_low(self) -> None: assert score_to_level(0.25) == MuseHubDivergenceLevel.LOW def test_boundary_0_40_is_med(self) -> None: assert score_to_level(0.40) == MuseHubDivergenceLevel.MED def test_mid_range_med(self) -> None: assert score_to_level(0.55) == MuseHubDivergenceLevel.MED def test_boundary_0_70_is_high(self) -> None: assert score_to_level(0.70) == MuseHubDivergenceLevel.HIGH def test_one_is_high(self) -> None: assert score_to_level(1.0) == MuseHubDivergenceLevel.HIGH def test_just_below_0_15_is_none(self) -> None: assert score_to_level(0.14) == MuseHubDivergenceLevel.NONE class TestUnitComputeHubDimensionDivergence: def _make_commit(self, cid: str) -> MusehubCommit: c = MusehubCommit.__new__(MusehubCommit) object.__setattr__(c, "commit_id", cid) return c def test_identical_sets_score_zero(self) -> None: a_ids = {"c1", "c2"} b_ids = {"c1", "c2"} a_msgs = {"c1": "add chord", "c2": "fix chord progression"} b_msgs = {"c1": "add chord", "c2": "fix chord progression"} result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, a_msgs, b_msgs) assert result.score == 0.0 assert result.level == MuseHubDivergenceLevel.NONE def test_disjoint_sets_score_one(self) -> None: a_ids = {"c1"} b_ids = {"c2"} a_msgs = {"c1": "add chord"} b_msgs = {"c2": "fix harmony"} result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, a_msgs, b_msgs) assert result.score == 1.0 assert result.level == MuseHubDivergenceLevel.HIGH def test_no_matching_dimension_score_zero(self) -> None: a_ids = {"c1"} b_ids = {"c2"} a_msgs = {"c1": "fix typo"} # no harmonic keywords b_msgs = {"c2": "update readme"} result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, a_msgs, b_msgs) assert result.score == 0.0 assert "No harmonic" in result.description def test_branch_commit_counts(self) -> None: a_ids = {"c1", "c2"} b_ids = {"c3"} a_msgs = {"c1": "add melody", "c2": "fix melody riff"} b_msgs = {"c3": "add melody"} result = compute_hub_dimension_divergence("melodic", a_ids, b_ids, a_msgs, b_msgs) assert result.branch_a_commits == 2 assert result.branch_b_commits == 1 def test_score_rounded_to_4dp(self) -> None: # 1 overlap, 3 symmetric diff → score = 2/3 ≈ 0.6667 a_ids = {"c1", "c2"} b_ids = {"c1", "c3"} msgs = {"c1": "add chord", "c2": "fix harmony key", "c3": "harmonic voicing"} result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, msgs, msgs) assert len(str(result.score).split(".")[-1]) <= 4 def test_partial_overlap_score_between_0_and_1(self) -> None: a_ids = {"c1", "c2", "c3"} b_ids = {"c1", "c4"} msgs = { "c1": "add melody", "c2": "melody riff", "c3": "lead melody", "c4": "solo melody", } result = compute_hub_dimension_divergence("melodic", a_ids, b_ids, msgs, msgs) assert 0.0 < result.score < 1.0 def _stub_commit(cid: str, parent_ids: list[str] | None = None) -> MusehubCommit: """Create a lightweight commit stub for unit tests (no DB session needed).""" c = MagicMock(spec=MusehubCommit) c.commit_id = cid c.parent_ids = parent_ids or [] c.timestamp = datetime.now(timezone.utc) return c class TestUnitFindCommonAncestor: def test_shared_commit_is_ancestor(self) -> None: base = _stub_commit("base") a1 = _stub_commit("a1", parent_ids=["base"]) b1 = _stub_commit("b1", parent_ids=["base"]) result = find_common_ancestor([a1, base], [b1, base]) assert result == "base" def test_disjoint_histories_returns_none(self) -> None: a = _stub_commit("a1") b = _stub_commit("b1") result = find_common_ancestor([a], [b]) assert result is None def test_same_branch_head_is_ancestor(self) -> None: c = _stub_commit("shared") result = find_common_ancestor([c], [c]) assert result == "shared" def test_empty_branches_returns_none(self) -> None: result = find_common_ancestor([], []) assert result is None class TestUnitGetCommitsSince: def test_none_base_returns_all(self) -> None: commits = [_stub_commit(f"c{i}") for i in range(5)] result = get_commits_since(commits, None) assert len(result) == 5 def test_excludes_base_commit(self) -> None: commits = [_stub_commit(f"c{i}") for i in range(3)] result = get_commits_since(commits, "c1") ids = [c.commit_id for c in result] assert "c1" not in ids assert "c0" in ids assert "c2" in ids def test_empty_list_returns_empty(self) -> None: assert get_commits_since([], "c1") == [] class TestUnitExtractAffectedSections: def test_finds_bridge(self) -> None: assert "Bridge" in extract_affected_sections(("rewrite the bridge",)) def test_finds_chorus_and_verse(self) -> None: sections = extract_affected_sections(("fix chorus timing", "extend the verse")) assert "Chorus" in sections assert "Verse" in sections def test_case_insensitive(self) -> None: assert "Intro" in extract_affected_sections(("add INTRO section",)) def test_no_section_keywords_returns_empty(self) -> None: assert extract_affected_sections(("fix melody", "update readme")) == [] def test_deduplication(self) -> None: sections = extract_affected_sections(("bridge fix", "bridge rewrite", "chorus")) assert sections.count("Bridge") == 1 def test_empty_messages(self) -> None: assert extract_affected_sections(()) == [] class TestUnitDeltaLabel: def test_zero_is_unchanged(self) -> None: assert _delta_label(0.0) == "unchanged" def test_nonzero_has_plus_prefix(self) -> None: label = _delta_label(0.5) assert label.startswith("+") assert "50.0" in label def test_small_fraction(self) -> None: label = _delta_label(0.001) assert label.startswith("+") class TestUnitBuildZeroDiffResponse: def test_with_dimensions(self) -> None: resp = build_zero_diff_response("proposal-1", "repo1", "feat", "main") assert len(resp.dimensions) == 5 assert resp.overall_score == 0.0 assert all(d.score == 0.0 for d in resp.dimensions) def test_without_dimensions_code_domain(self) -> None: resp = build_zero_diff_response( "proposal-1", "repo1", "feat", "main", include_dimensions=False ) assert resp.dimensions == [] assert resp.overall_score is None def test_affected_sections_empty(self) -> None: resp = build_zero_diff_response("proposal-1", "repo1", "a", "b") assert resp.affected_sections == [] # =========================================================================== # Layer 2 — Integration # =========================================================================== class TestIntegrationGetBranchCommits: async def test_returns_commits_for_branch(self, db_session: AsyncSession) -> None: repo_id = await _db_repo(db_session) await _db_commit(db_session, repo_id, branch="main", message="first") await _db_commit(db_session, repo_id, branch="main", message="second") await _db_commit(db_session, repo_id, branch="feat", message="feature") await db_session.flush() commits = await get_branch_commits(db_session, repo_id, "main") assert len(commits) == 2 assert all(c.branch == "main" for c in commits) async def test_newest_first_ordering(self, db_session: AsyncSession) -> None: repo_id = await _db_repo(db_session) ts = datetime(2026, 1, 1, tzinfo=timezone.utc) await _db_commit(db_session, repo_id, ts=ts, message="old") await _db_commit(db_session, repo_id, ts=ts + timedelta(hours=1), message="new") await db_session.flush() commits = await get_branch_commits(db_session, repo_id, "main") assert commits[0].message == "new" assert commits[1].message == "old" async def test_empty_branch_returns_empty(self, db_session: AsyncSession) -> None: repo_id = await _db_repo(db_session) await db_session.flush() commits = await get_branch_commits(db_session, repo_id, "nonexistent") assert commits == [] class TestIntegrationComputeHubDivergence: async def test_basic_divergence_two_branches( self, db_session: AsyncSession ) -> None: repo_id = await _db_repo(db_session) ts = datetime(2026, 1, 1, tzinfo=timezone.utc) base = await _db_commit( db_session, repo_id, branch="main", message="initial", ts=ts ) await _db_commit( db_session, repo_id, branch="main", message="add chord progression", ts=ts + timedelta(hours=1), parent_ids=[base.commit_id], ) await _db_commit( db_session, repo_id, branch="feat", message="add melody riff", ts=ts + timedelta(hours=1), parent_ids=[base.commit_id], ) await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) assert result.repo_id == repo_id assert result.branch_a == "main" assert result.branch_b == "feat" assert len(result.dimensions) == 5 assert 0.0 <= result.overall_score <= 1.0 async def test_raises_on_empty_branch(self, db_session: AsyncSession) -> None: repo_id = await _db_repo(db_session) await _db_commit(db_session, repo_id, branch="main") await db_session.flush() with pytest.raises(ValueError, match="no commits"): await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="nonexistent" ) async def test_disjoint_branches_common_ancestor_none( self, db_session: AsyncSession ) -> None: """With get_branch_commits filtering by branch label, two normally diverged branches will always have common_ancestor=None — the DB model stores each commit against a single branch, so ancestor intersection is empty.""" repo_id = await _db_repo(db_session) ts = datetime(2026, 1, 1, tzinfo=timezone.utc) await _db_commit( db_session, repo_id, branch="main", message="main work", ts=ts ) await _db_commit( db_session, repo_id, branch="feat", message="feat work", ts=ts + timedelta(hours=1), ) await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) # commit_id is PK → same commit can't be on two branches → no intersection assert result.common_ancestor is None async def test_no_common_ancestor_fresh_fork( self, db_session: AsyncSession ) -> None: """Two branches with completely disjoint histories → common_ancestor is None.""" repo_id = await _db_repo(db_session) await _db_commit(db_session, repo_id, branch="main", message="main only") await _db_commit(db_session, repo_id, branch="fork", message="fork only") await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="fork" ) assert result.common_ancestor is None async def test_overall_score_is_mean_of_dimensions( self, db_session: AsyncSession ) -> None: repo_id = await _db_repo(db_session) await _db_commit(db_session, repo_id, branch="main", message="chord melody") await _db_commit(db_session, repo_id, branch="feat", message="drum beat") await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) expected = round(sum(d.score for d in result.dimensions) / 5, 4) assert abs(result.overall_score - expected) < 1e-6 async def test_all_messages_captured(self, db_session: AsyncSession) -> None: repo_id = await _db_repo(db_session) await _db_commit(db_session, repo_id, branch="main", message="main msg") await _db_commit(db_session, repo_id, branch="feat", message="feat msg") await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) assert "main msg" in result.all_messages assert "feat msg" in result.all_messages class TestIntegrationBuildProposalDiffResponse: async def test_affected_sections_extracted( self, db_session: AsyncSession ) -> None: repo_id = await _db_repo(db_session) await _db_commit( db_session, repo_id, branch="main", message="rewrite bridge and chorus transition" ) await _db_commit( db_session, repo_id, branch="feat", message="add verse outro" ) await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) resp = build_proposal_diff_response("proposal-1", "feat", "main", result) assert "Bridge" in resp.affected_sections or "Chorus" in resp.affected_sections async def test_five_dimensions_in_response( self, db_session: AsyncSession ) -> None: repo_id = await _db_repo(db_session) await _db_commit(db_session, repo_id, branch="main") await _db_commit(db_session, repo_id, branch="feat") await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) resp = build_proposal_diff_response("proposal-1", "feat", "main", result) assert len(resp.dimensions) == 5 # =========================================================================== # Layer 3 — E2E # =========================================================================== class TestE2EDivergenceEndpoint: async def test_200_with_two_branches( self, client: AsyncClient, auth_headers: StrDict, db_session: AsyncSession, ) -> None: repo_id = await _api_repo(client, auth_headers) await _db_commit(db_session, repo_id, branch="main", message="add chord") await _db_commit(db_session, repo_id, branch="feat", message="add melody") await db_session.commit() r = await client.get( f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat", headers=auth_headers, ) assert r.status_code == 200 body = r.json() assert "repoId" in body assert "dimensions" in body assert len(body["dimensions"]) == 5 assert "overallScore" in body async def test_404_unknown_repo( self, client: AsyncClient, auth_headers: StrDict, ) -> None: r = await client.get( "/api/repos/nonexistent/divergence?branch_a=main&branch_b=feat", headers=auth_headers, ) assert r.status_code == 404 async def test_422_empty_branch( self, client: AsyncClient, auth_headers: StrDict, db_session: AsyncSession, ) -> None: repo_id = await _api_repo(client, auth_headers) await _db_commit(db_session, repo_id, branch="main") await db_session.commit() r = await client.get( f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=nonexistent", headers=auth_headers, ) assert r.status_code == 422 async def test_common_ancestor_field_in_response( self, client: AsyncClient, auth_headers: StrDict, db_session: AsyncSession, ) -> None: """commonAncestor field is always present in the response (may be null).""" repo_id = await _api_repo(client, auth_headers) await _db_commit(db_session, repo_id, branch="main", message="main work") await _db_commit(db_session, repo_id, branch="feat", message="feat work") await db_session.commit() r = await client.get( f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat", headers=auth_headers, ) assert r.status_code == 200 body = r.json() assert "commonAncestor" in body async def test_private_repo_requires_auth( self, client: AsyncClient, db_session: AsyncSession, ) -> None: repo_id = await _db_repo(db_session, visibility="private") await _db_commit(db_session, repo_id, branch="main") await _db_commit(db_session, repo_id, branch="feat") await db_session.commit() r = await client.get( f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat" ) assert r.status_code in (401, 403, 404) # =========================================================================== # Layer 4 — Stress # =========================================================================== class TestStress: async def test_50_commits_per_branch(self, db_session: AsyncSession) -> None: repo_id = await _db_repo(db_session) ts = datetime(2026, 1, 1, tzinfo=timezone.utc) messages = [ "add melody", "fix chord", "drum beat", "bridge section", "mix reverb", "update readme" ] for i in range(50): await _db_commit( db_session, repo_id, branch="main", message=messages[i % len(messages)], ts=ts + timedelta(minutes=i), ) for i in range(50): await _db_commit( db_session, repo_id, branch="feat", message=messages[(i + 2) % len(messages)], ts=ts + timedelta(minutes=i), ) await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) assert len(result.dimensions) == 5 assert 0.0 <= result.overall_score <= 1.0 async def test_5_concurrent_divergence_computes( self, db_session: AsyncSession ) -> None: repo_id = await _db_repo(db_session) await _db_commit(db_session, repo_id, branch="main", message="chord") await _db_commit(db_session, repo_id, branch="feat", message="melody") await db_session.flush() results = await asyncio.gather( *[ compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) for _ in range(5) ] ) assert all(isinstance(r, MuseHubDivergenceResult) for r in results) async def test_dimension_divergence_1000_calls(self) -> None: """compute_hub_dimension_divergence is pure — 1000 calls must complete fast.""" a_ids = {f"c{i}" for i in range(20)} b_ids = {f"c{i + 10}" for i in range(20)} a_msgs = {f"c{i}": "add melody chord" for i in range(20)} b_msgs = {f"c{i + 10}": "drum groove beat" for i in range(20)} start = time.perf_counter() for _ in range(1000): compute_hub_dimension_divergence("melodic", a_ids, b_ids, a_msgs, b_msgs) elapsed = time.perf_counter() - start assert elapsed < 1.0, f"1000 dimension calls took {elapsed:.3f}s" # =========================================================================== # Layer 5 — Data Integrity # =========================================================================== class TestDataIntegrity: def test_score_always_in_0_1(self) -> None: for a_size in range(5): for b_size in range(5): a_ids = {f"a{i}" for i in range(a_size)} b_ids = {f"b{i}" for i in range(b_size)} a_msgs = {f"a{i}": "add melody" for i in range(a_size)} b_msgs = {f"b{i}": "add melody" for i in range(b_size)} result = compute_hub_dimension_divergence( "melodic", a_ids, b_ids, a_msgs, b_msgs ) assert 0.0 <= result.score <= 1.0 def test_score_symmetric(self) -> None: """score(A, B) == score(B, A).""" a_ids = {"c1", "c2"} b_ids = {"c3", "c4"} msgs = { "c1": "add melody", "c2": "melody riff", "c3": "add melody", "c4": "chord melody", } r_ab = compute_hub_dimension_divergence("melodic", a_ids, b_ids, msgs, msgs) r_ba = compute_hub_dimension_divergence("melodic", b_ids, a_ids, msgs, msgs) assert r_ab.score == r_ba.score async def test_overall_score_mean_of_five( self, db_session: AsyncSession ) -> None: repo_id = await _db_repo(db_session) await _db_commit( db_session, repo_id, branch="main", message="add chord melody rhythm mix structure" ) await _db_commit( db_session, repo_id, branch="feat", message="remove chord melody" ) await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) expected_mean = round(sum(d.score for d in result.dimensions) / 5, 4) assert abs(result.overall_score - expected_mean) < 1e-6 async def test_identical_branches_all_scores_zero( self, db_session: AsyncSession ) -> None: """When both branches have exactly the same commits, divergence = 0.""" repo_id = await _db_repo(db_session) c = await _db_commit( db_session, repo_id, branch="main", message="add chord melody" ) # Add same commit on "feat" branch (same commit_id, different branch field) c2 = MusehubCommit( commit_id=_cid(), branch="feat", parent_ids=[c.commit_id], message="add chord melody", author="agent", timestamp=datetime.now(timezone.utc), ) db_session.add(c2) db_session.add(MusehubCommitRef(repo_id=repo_id, commit_id=c2.commit_id)) await db_session.flush() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) assert len(result.dimensions) == 5 def test_extract_affected_sections_stable_order(self) -> None: # Keywords appear in keyword-order as defined by _SECTION_RE msgs = ("bridge chorus verse intro outro",) sections = extract_affected_sections(msgs) assert len(sections) == 5 # Verify no duplicates assert len(sections) == len(set(sections)) # =========================================================================== # Layer 6 — Security # =========================================================================== class TestSecurity: async def test_private_repo_blocked_without_auth( self, client: AsyncClient, db_session: AsyncSession, ) -> None: repo_id = await _db_repo(db_session, visibility="private") await _db_commit(db_session, repo_id, branch="main") await _db_commit(db_session, repo_id, branch="feat") await db_session.commit() r = await client.get( f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat" ) assert r.status_code in (401, 403, 404) async def test_sql_injection_in_branch_name_safe( self, client: AsyncClient, auth_headers: StrDict, db_session: AsyncSession, ) -> None: repo_id = await _api_repo(client, auth_headers) await _db_commit(db_session, repo_id, branch="main") await db_session.commit() r = await client.get( f"/api/repos/{repo_id}/divergence" "?branch_a=main&branch_b='; DROP TABLE musehub_commits; --", headers=auth_headers, ) # parameterized query — returns 422 (no commits) not 500 assert r.status_code in (422, 404) def test_classify_message_no_injection_risk(self) -> None: """classify_message on arbitrary strings must not raise.""" payloads = [ "'; DROP TABLE x; --", "", "\x00\x01\x02", "A" * 10000, ] for p in payloads: result = classify_message(p) assert isinstance(result, set) def test_score_to_level_boundary_exhaustive(self) -> None: """All scores in [0, 1] map to a valid level — no crashes.""" for i in range(101): score = i / 100 level = score_to_level(score) assert level in MuseHubDivergenceLevel # =========================================================================== # Layer 7 — Performance # =========================================================================== class TestPerformance: async def test_compute_hub_divergence_30_commits_under_200ms( self, db_session: AsyncSession ) -> None: repo_id = await _db_repo(db_session) ts = datetime(2026, 1, 1, tzinfo=timezone.utc) for i in range(15): await _db_commit( db_session, repo_id, branch="main", message=f"commit {i} chord melody", ts=ts + timedelta(minutes=i), ) for i in range(15): await _db_commit( db_session, repo_id, branch="feat", message=f"feat {i} drum groove", ts=ts + timedelta(minutes=i), ) await db_session.flush() start = time.perf_counter() result = await compute_hub_divergence( db_session, repo_id=repo_id, branch_a="main", branch_b="feat" ) elapsed = time.perf_counter() - start assert result is not None assert elapsed < 0.2, f"compute_hub_divergence took {elapsed:.3f}s" def test_classify_message_under_1ms(self) -> None: msg = "add jazzy chord melody with reverb and bridge arrangement" start = time.perf_counter() for _ in range(10_000): classify_message(msg) elapsed = time.perf_counter() - start assert elapsed < 1.0, f"10000 classify_message calls took {elapsed:.3f}s" def test_find_common_ancestor_100_commits_fast(self) -> None: # Build 100 commits on each branch sharing first 50 shared = [_stub_commit(f"s{i}", [f"s{i-1}"] if i > 0 else []) for i in range(50)] a_only = [_stub_commit(f"a{i}", [f"s{49}"]) for i in range(50)] b_only = [_stub_commit(f"b{i}", [f"s{49}"]) for i in range(50)] a_commits = list(reversed(a_only)) + list(reversed(shared)) b_commits = list(reversed(b_only)) + list(reversed(shared)) start = time.perf_counter() for _ in range(100): find_common_ancestor(a_commits, b_commits) elapsed = time.perf_counter() - start assert elapsed < 0.5, f"100 find_common_ancestor calls took {elapsed:.3f}s"