gabriel / musehub public

test_divergence.py file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:0 fix: fall back to any indexed mpack in read_object_bytes when push mpac… · gabriel · Jun 17, 2026
1 """Section 26 — Divergence Engine: 7-layer test suite.
2
3 Covers musehub/services/musehub_divergence.py and the
4 GET /api/repos/{repo_id}/divergence endpoint.
5
6 Layer map
7 ---------
8 1. Unit — classify_message, score_to_level, compute_hub_dimension_divergence,
9 find_common_ancestor, get_commits_since, extract_affected_sections,
10 _delta_label, build_zero_diff_response, constants
11 2. Integration — get_branch_commits, compute_hub_divergence (full pipeline)
12 3. E2E — HTTP GET /api/repos/{repo_id}/divergence
13 4. Stress — large histories, many-dimension calls, concurrent computes
14 5. Data Integrity — score bounds, ordering, common ancestor correctness
15 6. Security — auth / visibility, invalid branch names safe
16 7. Performance — timing budgets
17 """
18 from __future__ import annotations
19
20 import asyncio
21 import secrets
22 import time
23 from datetime import datetime, timezone, timedelta
24
25 import pytest
26 from httpx import AsyncClient
27 from sqlalchemy.ext.asyncio import AsyncSession
28 from unittest.mock import MagicMock
29
30 from musehub.core.genesis import compute_identity_id, compute_repo_id
31 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo
32 from musehub.types.json_types import StrDict
33 from musehub.services.musehub_divergence import (
34 ALL_DIMENSIONS,
35 _DIMENSION_PATTERNS,
36 _SECTION_RE,
37 MuseHubDivergenceLevel,
38 MuseHubDivergenceResult,
39 MuseHubDimensionDivergence,
40 _delta_label,
41 build_proposal_diff_response,
42 build_zero_diff_response,
43 classify_message,
44 compute_hub_dimension_divergence,
45 compute_hub_divergence,
46 extract_affected_sections,
47 find_common_ancestor,
48 get_branch_commits,
49 get_commits_since,
50 score_to_level,
51 )
52
53
54 # ---------------------------------------------------------------------------
55 # DB helpers
56 # ---------------------------------------------------------------------------
57
58
59 def _uid() -> str:
60 return secrets.token_hex(16)
61
62
63 def _cid() -> str:
64 return secrets.token_hex(16)
65
66
67 _TEST_OWNER_ID = compute_identity_id(b"test-divergence-owner")
68
69
70 async def _db_repo(session: AsyncSession, *, visibility: str = "private") -> str:
71 from datetime import datetime, timezone
72 slug = f"repo-{secrets.token_hex(4)}"
73 created_at = datetime.now(timezone.utc)
74 repo_id = compute_repo_id(_TEST_OWNER_ID, slug, "code", created_at.isoformat())
75 repo = MusehubRepo(
76 repo_id=repo_id,
77 name=slug,
78 slug=slug,
79 owner="testuser",
80 owner_user_id=_TEST_OWNER_ID,
81 visibility=visibility,
82 created_at=created_at,
83 updated_at=created_at,
84 )
85 session.add(repo)
86 await session.flush()
87 return repo.repo_id
88
89
90 async def _db_commit(
91 session: AsyncSession,
92 repo_id: str,
93 *,
94 branch: str = "main",
95 message: str = "add groove",
96 parent_ids: list[str] | None = None,
97 ts: datetime | None = None,
98 ) -> MusehubCommit:
99 c = MusehubCommit(
100 commit_id=_cid(),
101 branch=branch,
102 parent_ids=parent_ids or [],
103 message=message,
104 author="agent",
105 timestamp=ts or datetime.now(timezone.utc),
106 )
107 session.add(c)
108 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=c.commit_id))
109 await session.flush()
110 return c
111
112
113 async def _api_repo(
114 client: AsyncClient,
115 auth_headers: StrDict,
116 *,
117 visibility: str = "private",
118 ) -> str:
119 r = await client.post(
120 "/api/repos",
121 json={
122 "name": f"div-{_uid()[:8]}",
123 "owner": "testuser",
124 "visibility": visibility,
125 },
126 headers=auth_headers,
127 )
128 assert r.status_code == 201, r.text
129 return r.json()["repoId"]
130
131
132 # ===========================================================================
133 # Layer 1 — Unit
134 # ===========================================================================
135
136
137 class TestUnitConstants:
138 def test_all_dimensions_five(self) -> None:
139 assert len(ALL_DIMENSIONS) == 5
140
141 def test_all_dimensions_names(self) -> None:
142 assert set(ALL_DIMENSIONS) == {"melodic", "harmonic", "rhythmic", "structural", "dynamic"}
143
144 def test_dimension_patterns_covers_all(self) -> None:
145 for dim in ALL_DIMENSIONS:
146 assert dim in _DIMENSION_PATTERNS
147
148 def test_section_re_compiles(self) -> None:
149 assert _SECTION_RE.pattern is not None
150
151
152 class TestUnitClassifyMessage:
153 def test_melodic_keywords(self) -> None:
154 assert "melodic" in classify_message("add melody line")
155 assert "melodic" in classify_message("record lead solo")
156 assert "melodic" in classify_message("fix pitch drift")
157
158 def test_harmonic_keywords(self) -> None:
159 assert "harmonic" in classify_message("add chord progression")
160 assert "harmonic" in classify_message("change key to Dm")
161
162 def test_rhythmic_keywords(self) -> None:
163 assert "rhythmic" in classify_message("adjust tempo to 120 bpm")
164 assert "rhythmic" in classify_message("tighten drum groove")
165
166 def test_structural_keywords(self) -> None:
167 assert "structural" in classify_message("rewrite bridge section")
168 assert "structural" in classify_message("add chorus after verse")
169
170 def test_dynamic_keywords(self) -> None:
171 assert "dynamic" in classify_message("apply reverb to guitar")
172 assert "dynamic" in classify_message("master mix levels")
173
174 def test_multi_dimension_message(self) -> None:
175 dims = classify_message("add jazzy chord melody with reverb")
176 assert "melodic" in dims
177 assert "harmonic" in dims
178 assert "dynamic" in dims
179
180 def test_unclassified_returns_empty(self) -> None:
181 assert classify_message("update README") == set()
182 assert classify_message("fix typo in config") == set()
183
184 def test_case_insensitive(self) -> None:
185 assert "melodic" in classify_message("Add MELODY line")
186 assert "rhythmic" in classify_message("DRUM pattern fix")
187
188 def test_empty_message(self) -> None:
189 assert classify_message("") == set()
190
191
192 class TestUnitScoreToLevel:
193 def test_zero_is_none(self) -> None:
194 assert score_to_level(0.0) == MuseHubDivergenceLevel.NONE
195
196 def test_boundary_0_15_is_low(self) -> None:
197 assert score_to_level(0.15) == MuseHubDivergenceLevel.LOW
198
199 def test_mid_range_low(self) -> None:
200 assert score_to_level(0.25) == MuseHubDivergenceLevel.LOW
201
202 def test_boundary_0_40_is_med(self) -> None:
203 assert score_to_level(0.40) == MuseHubDivergenceLevel.MED
204
205 def test_mid_range_med(self) -> None:
206 assert score_to_level(0.55) == MuseHubDivergenceLevel.MED
207
208 def test_boundary_0_70_is_high(self) -> None:
209 assert score_to_level(0.70) == MuseHubDivergenceLevel.HIGH
210
211 def test_one_is_high(self) -> None:
212 assert score_to_level(1.0) == MuseHubDivergenceLevel.HIGH
213
214 def test_just_below_0_15_is_none(self) -> None:
215 assert score_to_level(0.14) == MuseHubDivergenceLevel.NONE
216
217
218 class TestUnitComputeHubDimensionDivergence:
219 def _make_commit(self, cid: str) -> MusehubCommit:
220 c = MusehubCommit.__new__(MusehubCommit)
221 object.__setattr__(c, "commit_id", cid)
222 return c
223
224 def test_identical_sets_score_zero(self) -> None:
225 a_ids = {"c1", "c2"}
226 b_ids = {"c1", "c2"}
227 a_msgs = {"c1": "add chord", "c2": "fix chord progression"}
228 b_msgs = {"c1": "add chord", "c2": "fix chord progression"}
229 result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, a_msgs, b_msgs)
230 assert result.score == 0.0
231 assert result.level == MuseHubDivergenceLevel.NONE
232
233 def test_disjoint_sets_score_one(self) -> None:
234 a_ids = {"c1"}
235 b_ids = {"c2"}
236 a_msgs = {"c1": "add chord"}
237 b_msgs = {"c2": "fix harmony"}
238 result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, a_msgs, b_msgs)
239 assert result.score == 1.0
240 assert result.level == MuseHubDivergenceLevel.HIGH
241
242 def test_no_matching_dimension_score_zero(self) -> None:
243 a_ids = {"c1"}
244 b_ids = {"c2"}
245 a_msgs = {"c1": "fix typo"} # no harmonic keywords
246 b_msgs = {"c2": "update readme"}
247 result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, a_msgs, b_msgs)
248 assert result.score == 0.0
249 assert "No harmonic" in result.description
250
251 def test_branch_commit_counts(self) -> None:
252 a_ids = {"c1", "c2"}
253 b_ids = {"c3"}
254 a_msgs = {"c1": "add melody", "c2": "fix melody riff"}
255 b_msgs = {"c3": "add melody"}
256 result = compute_hub_dimension_divergence("melodic", a_ids, b_ids, a_msgs, b_msgs)
257 assert result.branch_a_commits == 2
258 assert result.branch_b_commits == 1
259
260 def test_score_rounded_to_4dp(self) -> None:
261 # 1 overlap, 3 symmetric diff → score = 2/3 ≈ 0.6667
262 a_ids = {"c1", "c2"}
263 b_ids = {"c1", "c3"}
264 msgs = {"c1": "add chord", "c2": "fix harmony key", "c3": "harmonic voicing"}
265 result = compute_hub_dimension_divergence("harmonic", a_ids, b_ids, msgs, msgs)
266 assert len(str(result.score).split(".")[-1]) <= 4
267
268 def test_partial_overlap_score_between_0_and_1(self) -> None:
269 a_ids = {"c1", "c2", "c3"}
270 b_ids = {"c1", "c4"}
271 msgs = {
272 "c1": "add melody",
273 "c2": "melody riff",
274 "c3": "lead melody",
275 "c4": "solo melody",
276 }
277 result = compute_hub_dimension_divergence("melodic", a_ids, b_ids, msgs, msgs)
278 assert 0.0 < result.score < 1.0
279
280
281 def _stub_commit(cid: str, parent_ids: list[str] | None = None) -> MusehubCommit:
282 """Create a lightweight commit stub for unit tests (no DB session needed)."""
283 c = MagicMock(spec=MusehubCommit)
284 c.commit_id = cid
285 c.parent_ids = parent_ids or []
286 c.timestamp = datetime.now(timezone.utc)
287 return c
288
289
290 class TestUnitFindCommonAncestor:
291 def test_shared_commit_is_ancestor(self) -> None:
292 base = _stub_commit("base")
293 a1 = _stub_commit("a1", parent_ids=["base"])
294 b1 = _stub_commit("b1", parent_ids=["base"])
295 result = find_common_ancestor([a1, base], [b1, base])
296 assert result == "base"
297
298 def test_disjoint_histories_returns_none(self) -> None:
299 a = _stub_commit("a1")
300 b = _stub_commit("b1")
301 result = find_common_ancestor([a], [b])
302 assert result is None
303
304 def test_same_branch_head_is_ancestor(self) -> None:
305 c = _stub_commit("shared")
306 result = find_common_ancestor([c], [c])
307 assert result == "shared"
308
309 def test_empty_branches_returns_none(self) -> None:
310 result = find_common_ancestor([], [])
311 assert result is None
312
313
314 class TestUnitGetCommitsSince:
315 def test_none_base_returns_all(self) -> None:
316 commits = [_stub_commit(f"c{i}") for i in range(5)]
317 result = get_commits_since(commits, None)
318 assert len(result) == 5
319
320 def test_excludes_base_commit(self) -> None:
321 commits = [_stub_commit(f"c{i}") for i in range(3)]
322 result = get_commits_since(commits, "c1")
323 ids = [c.commit_id for c in result]
324 assert "c1" not in ids
325 assert "c0" in ids
326 assert "c2" in ids
327
328 def test_empty_list_returns_empty(self) -> None:
329 assert get_commits_since([], "c1") == []
330
331
332 class TestUnitExtractAffectedSections:
333 def test_finds_bridge(self) -> None:
334 assert "Bridge" in extract_affected_sections(("rewrite the bridge",))
335
336 def test_finds_chorus_and_verse(self) -> None:
337 sections = extract_affected_sections(("fix chorus timing", "extend the verse"))
338 assert "Chorus" in sections
339 assert "Verse" in sections
340
341 def test_case_insensitive(self) -> None:
342 assert "Intro" in extract_affected_sections(("add INTRO section",))
343
344 def test_no_section_keywords_returns_empty(self) -> None:
345 assert extract_affected_sections(("fix melody", "update readme")) == []
346
347 def test_deduplication(self) -> None:
348 sections = extract_affected_sections(("bridge fix", "bridge rewrite", "chorus"))
349 assert sections.count("Bridge") == 1
350
351 def test_empty_messages(self) -> None:
352 assert extract_affected_sections(()) == []
353
354
355 class TestUnitDeltaLabel:
356 def test_zero_is_unchanged(self) -> None:
357 assert _delta_label(0.0) == "unchanged"
358
359 def test_nonzero_has_plus_prefix(self) -> None:
360 label = _delta_label(0.5)
361 assert label.startswith("+")
362 assert "50.0" in label
363
364 def test_small_fraction(self) -> None:
365 label = _delta_label(0.001)
366 assert label.startswith("+")
367
368
369 class TestUnitBuildZeroDiffResponse:
370 def test_with_dimensions(self) -> None:
371 resp = build_zero_diff_response("proposal-1", "repo1", "feat", "main")
372 assert len(resp.dimensions) == 5
373 assert resp.overall_score == 0.0
374 assert all(d.score == 0.0 for d in resp.dimensions)
375
376 def test_without_dimensions_code_domain(self) -> None:
377 resp = build_zero_diff_response(
378 "proposal-1", "repo1", "feat", "main", include_dimensions=False
379 )
380 assert resp.dimensions == []
381 assert resp.overall_score is None
382
383 def test_affected_sections_empty(self) -> None:
384 resp = build_zero_diff_response("proposal-1", "repo1", "a", "b")
385 assert resp.affected_sections == []
386
387
388 # ===========================================================================
389 # Layer 2 — Integration
390 # ===========================================================================
391
392
393 class TestIntegrationGetBranchCommits:
394 async def test_returns_commits_for_branch(self, db_session: AsyncSession) -> None:
395 repo_id = await _db_repo(db_session)
396 await _db_commit(db_session, repo_id, branch="main", message="first")
397 await _db_commit(db_session, repo_id, branch="main", message="second")
398 await _db_commit(db_session, repo_id, branch="feat", message="feature")
399 await db_session.flush()
400
401 commits = await get_branch_commits(db_session, repo_id, "main")
402 assert len(commits) == 2
403 assert all(c.branch == "main" for c in commits)
404
405 async def test_newest_first_ordering(self, db_session: AsyncSession) -> None:
406 repo_id = await _db_repo(db_session)
407 ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
408 await _db_commit(db_session, repo_id, ts=ts, message="old")
409 await _db_commit(db_session, repo_id, ts=ts + timedelta(hours=1), message="new")
410 await db_session.flush()
411
412 commits = await get_branch_commits(db_session, repo_id, "main")
413 assert commits[0].message == "new"
414 assert commits[1].message == "old"
415
416 async def test_empty_branch_returns_empty(self, db_session: AsyncSession) -> None:
417 repo_id = await _db_repo(db_session)
418 await db_session.flush()
419
420 commits = await get_branch_commits(db_session, repo_id, "nonexistent")
421 assert commits == []
422
423
424 class TestIntegrationComputeHubDivergence:
425 async def test_basic_divergence_two_branches(
426 self, db_session: AsyncSession
427 ) -> None:
428 repo_id = await _db_repo(db_session)
429 ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
430 base = await _db_commit(
431 db_session, repo_id, branch="main", message="initial", ts=ts
432 )
433 await _db_commit(
434 db_session, repo_id, branch="main",
435 message="add chord progression", ts=ts + timedelta(hours=1),
436 parent_ids=[base.commit_id],
437 )
438 await _db_commit(
439 db_session, repo_id, branch="feat",
440 message="add melody riff", ts=ts + timedelta(hours=1),
441 parent_ids=[base.commit_id],
442 )
443 await db_session.flush()
444
445 result = await compute_hub_divergence(
446 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
447 )
448 assert result.repo_id == repo_id
449 assert result.branch_a == "main"
450 assert result.branch_b == "feat"
451 assert len(result.dimensions) == 5
452 assert 0.0 <= result.overall_score <= 1.0
453
454 async def test_raises_on_empty_branch(self, db_session: AsyncSession) -> None:
455 repo_id = await _db_repo(db_session)
456 await _db_commit(db_session, repo_id, branch="main")
457 await db_session.flush()
458
459 with pytest.raises(ValueError, match="no commits"):
460 await compute_hub_divergence(
461 db_session, repo_id=repo_id, branch_a="main", branch_b="nonexistent"
462 )
463
464 async def test_disjoint_branches_common_ancestor_none(
465 self, db_session: AsyncSession
466 ) -> None:
467 """With get_branch_commits filtering by branch label, two normally diverged
468 branches will always have common_ancestor=None — the DB model stores each
469 commit against a single branch, so ancestor intersection is empty."""
470 repo_id = await _db_repo(db_session)
471 ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
472 await _db_commit(
473 db_session, repo_id, branch="main", message="main work", ts=ts
474 )
475 await _db_commit(
476 db_session, repo_id, branch="feat", message="feat work",
477 ts=ts + timedelta(hours=1),
478 )
479 await db_session.flush()
480
481 result = await compute_hub_divergence(
482 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
483 )
484 # commit_id is PK → same commit can't be on two branches → no intersection
485 assert result.common_ancestor is None
486
487 async def test_no_common_ancestor_fresh_fork(
488 self, db_session: AsyncSession
489 ) -> None:
490 """Two branches with completely disjoint histories → common_ancestor is None."""
491 repo_id = await _db_repo(db_session)
492 await _db_commit(db_session, repo_id, branch="main", message="main only")
493 await _db_commit(db_session, repo_id, branch="fork", message="fork only")
494 await db_session.flush()
495
496 result = await compute_hub_divergence(
497 db_session, repo_id=repo_id, branch_a="main", branch_b="fork"
498 )
499 assert result.common_ancestor is None
500
501 async def test_overall_score_is_mean_of_dimensions(
502 self, db_session: AsyncSession
503 ) -> None:
504 repo_id = await _db_repo(db_session)
505 await _db_commit(db_session, repo_id, branch="main", message="chord melody")
506 await _db_commit(db_session, repo_id, branch="feat", message="drum beat")
507 await db_session.flush()
508
509 result = await compute_hub_divergence(
510 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
511 )
512 expected = round(sum(d.score for d in result.dimensions) / 5, 4)
513 assert abs(result.overall_score - expected) < 1e-6
514
515 async def test_all_messages_captured(self, db_session: AsyncSession) -> None:
516 repo_id = await _db_repo(db_session)
517 await _db_commit(db_session, repo_id, branch="main", message="main msg")
518 await _db_commit(db_session, repo_id, branch="feat", message="feat msg")
519 await db_session.flush()
520
521 result = await compute_hub_divergence(
522 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
523 )
524 assert "main msg" in result.all_messages
525 assert "feat msg" in result.all_messages
526
527
528 class TestIntegrationBuildProposalDiffResponse:
529 async def test_affected_sections_extracted(
530 self, db_session: AsyncSession
531 ) -> None:
532 repo_id = await _db_repo(db_session)
533 await _db_commit(
534 db_session, repo_id, branch="main",
535 message="rewrite bridge and chorus transition"
536 )
537 await _db_commit(
538 db_session, repo_id, branch="feat",
539 message="add verse outro"
540 )
541 await db_session.flush()
542
543 result = await compute_hub_divergence(
544 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
545 )
546 resp = build_proposal_diff_response("proposal-1", "feat", "main", result)
547 assert "Bridge" in resp.affected_sections or "Chorus" in resp.affected_sections
548
549 async def test_five_dimensions_in_response(
550 self, db_session: AsyncSession
551 ) -> None:
552 repo_id = await _db_repo(db_session)
553 await _db_commit(db_session, repo_id, branch="main")
554 await _db_commit(db_session, repo_id, branch="feat")
555 await db_session.flush()
556
557 result = await compute_hub_divergence(
558 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
559 )
560 resp = build_proposal_diff_response("proposal-1", "feat", "main", result)
561 assert len(resp.dimensions) == 5
562
563
564 # ===========================================================================
565 # Layer 3 — E2E
566 # ===========================================================================
567
568
569 class TestE2EDivergenceEndpoint:
570 async def test_200_with_two_branches(
571 self,
572 client: AsyncClient,
573 auth_headers: StrDict,
574 db_session: AsyncSession,
575 ) -> None:
576 repo_id = await _api_repo(client, auth_headers)
577 await _db_commit(db_session, repo_id, branch="main", message="add chord")
578 await _db_commit(db_session, repo_id, branch="feat", message="add melody")
579 await db_session.commit()
580
581 r = await client.get(
582 f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat",
583 headers=auth_headers,
584 )
585 assert r.status_code == 200
586 body = r.json()
587 assert "repoId" in body
588 assert "dimensions" in body
589 assert len(body["dimensions"]) == 5
590 assert "overallScore" in body
591
592 async def test_404_unknown_repo(
593 self,
594 client: AsyncClient,
595 auth_headers: StrDict,
596 ) -> None:
597 r = await client.get(
598 "/api/repos/nonexistent/divergence?branch_a=main&branch_b=feat",
599 headers=auth_headers,
600 )
601 assert r.status_code == 404
602
603 async def test_422_empty_branch(
604 self,
605 client: AsyncClient,
606 auth_headers: StrDict,
607 db_session: AsyncSession,
608 ) -> None:
609 repo_id = await _api_repo(client, auth_headers)
610 await _db_commit(db_session, repo_id, branch="main")
611 await db_session.commit()
612
613 r = await client.get(
614 f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=nonexistent",
615 headers=auth_headers,
616 )
617 assert r.status_code == 422
618
619 async def test_common_ancestor_field_in_response(
620 self,
621 client: AsyncClient,
622 auth_headers: StrDict,
623 db_session: AsyncSession,
624 ) -> None:
625 """commonAncestor field is always present in the response (may be null)."""
626 repo_id = await _api_repo(client, auth_headers)
627 await _db_commit(db_session, repo_id, branch="main", message="main work")
628 await _db_commit(db_session, repo_id, branch="feat", message="feat work")
629 await db_session.commit()
630
631 r = await client.get(
632 f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat",
633 headers=auth_headers,
634 )
635 assert r.status_code == 200
636 body = r.json()
637 assert "commonAncestor" in body
638
639 async def test_private_repo_requires_auth(
640 self,
641 client: AsyncClient,
642 db_session: AsyncSession,
643 ) -> None:
644 repo_id = await _db_repo(db_session, visibility="private")
645 await _db_commit(db_session, repo_id, branch="main")
646 await _db_commit(db_session, repo_id, branch="feat")
647 await db_session.commit()
648
649 r = await client.get(
650 f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat"
651 )
652 assert r.status_code in (401, 403, 404)
653
654
655 # ===========================================================================
656 # Layer 4 — Stress
657 # ===========================================================================
658
659
660 class TestStress:
661 async def test_50_commits_per_branch(self, db_session: AsyncSession) -> None:
662 repo_id = await _db_repo(db_session)
663 ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
664 messages = [
665 "add melody", "fix chord", "drum beat", "bridge section",
666 "mix reverb", "update readme"
667 ]
668 for i in range(50):
669 await _db_commit(
670 db_session, repo_id, branch="main",
671 message=messages[i % len(messages)],
672 ts=ts + timedelta(minutes=i),
673 )
674 for i in range(50):
675 await _db_commit(
676 db_session, repo_id, branch="feat",
677 message=messages[(i + 2) % len(messages)],
678 ts=ts + timedelta(minutes=i),
679 )
680 await db_session.flush()
681
682 result = await compute_hub_divergence(
683 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
684 )
685 assert len(result.dimensions) == 5
686 assert 0.0 <= result.overall_score <= 1.0
687
688 async def test_5_concurrent_divergence_computes(
689 self, db_session: AsyncSession
690 ) -> None:
691 repo_id = await _db_repo(db_session)
692 await _db_commit(db_session, repo_id, branch="main", message="chord")
693 await _db_commit(db_session, repo_id, branch="feat", message="melody")
694 await db_session.flush()
695
696 results = await asyncio.gather(
697 *[
698 compute_hub_divergence(
699 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
700 )
701 for _ in range(5)
702 ]
703 )
704 assert all(isinstance(r, MuseHubDivergenceResult) for r in results)
705
706 async def test_dimension_divergence_1000_calls(self) -> None:
707 """compute_hub_dimension_divergence is pure — 1000 calls must complete fast."""
708 a_ids = {f"c{i}" for i in range(20)}
709 b_ids = {f"c{i + 10}" for i in range(20)}
710 a_msgs = {f"c{i}": "add melody chord" for i in range(20)}
711 b_msgs = {f"c{i + 10}": "drum groove beat" for i in range(20)}
712
713 start = time.perf_counter()
714 for _ in range(1000):
715 compute_hub_dimension_divergence("melodic", a_ids, b_ids, a_msgs, b_msgs)
716 elapsed = time.perf_counter() - start
717 assert elapsed < 1.0, f"1000 dimension calls took {elapsed:.3f}s"
718
719
720 # ===========================================================================
721 # Layer 5 — Data Integrity
722 # ===========================================================================
723
724
725 class TestDataIntegrity:
726 def test_score_always_in_0_1(self) -> None:
727 for a_size in range(5):
728 for b_size in range(5):
729 a_ids = {f"a{i}" for i in range(a_size)}
730 b_ids = {f"b{i}" for i in range(b_size)}
731 a_msgs = {f"a{i}": "add melody" for i in range(a_size)}
732 b_msgs = {f"b{i}": "add melody" for i in range(b_size)}
733 result = compute_hub_dimension_divergence(
734 "melodic", a_ids, b_ids, a_msgs, b_msgs
735 )
736 assert 0.0 <= result.score <= 1.0
737
738 def test_score_symmetric(self) -> None:
739 """score(A, B) == score(B, A)."""
740 a_ids = {"c1", "c2"}
741 b_ids = {"c3", "c4"}
742 msgs = {
743 "c1": "add melody",
744 "c2": "melody riff",
745 "c3": "add melody",
746 "c4": "chord melody",
747 }
748 r_ab = compute_hub_dimension_divergence("melodic", a_ids, b_ids, msgs, msgs)
749 r_ba = compute_hub_dimension_divergence("melodic", b_ids, a_ids, msgs, msgs)
750 assert r_ab.score == r_ba.score
751
752 async def test_overall_score_mean_of_five(
753 self, db_session: AsyncSession
754 ) -> None:
755 repo_id = await _db_repo(db_session)
756 await _db_commit(
757 db_session, repo_id, branch="main",
758 message="add chord melody rhythm mix structure"
759 )
760 await _db_commit(
761 db_session, repo_id, branch="feat",
762 message="remove chord melody"
763 )
764 await db_session.flush()
765
766 result = await compute_hub_divergence(
767 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
768 )
769 expected_mean = round(sum(d.score for d in result.dimensions) / 5, 4)
770 assert abs(result.overall_score - expected_mean) < 1e-6
771
772 async def test_identical_branches_all_scores_zero(
773 self, db_session: AsyncSession
774 ) -> None:
775 """When both branches have exactly the same commits, divergence = 0."""
776 repo_id = await _db_repo(db_session)
777 c = await _db_commit(
778 db_session, repo_id, branch="main", message="add chord melody"
779 )
780 # Add same commit on "feat" branch (same commit_id, different branch field)
781 c2 = MusehubCommit(
782 commit_id=_cid(),
783 branch="feat",
784 parent_ids=[c.commit_id],
785 message="add chord melody",
786 author="agent",
787 timestamp=datetime.now(timezone.utc),
788 )
789 db_session.add(c2)
790 db_session.add(MusehubCommitRef(repo_id=repo_id, commit_id=c2.commit_id))
791 await db_session.flush()
792
793 result = await compute_hub_divergence(
794 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
795 )
796 assert len(result.dimensions) == 5
797
798 def test_extract_affected_sections_stable_order(self) -> None:
799 # Keywords appear in keyword-order as defined by _SECTION_RE
800 msgs = ("bridge chorus verse intro outro",)
801 sections = extract_affected_sections(msgs)
802 assert len(sections) == 5
803 # Verify no duplicates
804 assert len(sections) == len(set(sections))
805
806
807 # ===========================================================================
808 # Layer 6 — Security
809 # ===========================================================================
810
811
812 class TestSecurity:
813 async def test_private_repo_blocked_without_auth(
814 self,
815 client: AsyncClient,
816 db_session: AsyncSession,
817 ) -> None:
818 repo_id = await _db_repo(db_session, visibility="private")
819 await _db_commit(db_session, repo_id, branch="main")
820 await _db_commit(db_session, repo_id, branch="feat")
821 await db_session.commit()
822
823 r = await client.get(
824 f"/api/repos/{repo_id}/divergence?branch_a=main&branch_b=feat"
825 )
826 assert r.status_code in (401, 403, 404)
827
828 async def test_sql_injection_in_branch_name_safe(
829 self,
830 client: AsyncClient,
831 auth_headers: StrDict,
832 db_session: AsyncSession,
833 ) -> None:
834 repo_id = await _api_repo(client, auth_headers)
835 await _db_commit(db_session, repo_id, branch="main")
836 await db_session.commit()
837
838 r = await client.get(
839 f"/api/repos/{repo_id}/divergence"
840 "?branch_a=main&branch_b='; DROP TABLE musehub_commits; --",
841 headers=auth_headers,
842 )
843 # parameterized query — returns 422 (no commits) not 500
844 assert r.status_code in (422, 404)
845
846 def test_classify_message_no_injection_risk(self) -> None:
847 """classify_message on arbitrary strings must not raise."""
848 payloads = [
849 "'; DROP TABLE x; --",
850 "<script>alert(1)</script>",
851 "\x00\x01\x02",
852 "A" * 10000,
853 ]
854 for p in payloads:
855 result = classify_message(p)
856 assert isinstance(result, set)
857
858 def test_score_to_level_boundary_exhaustive(self) -> None:
859 """All scores in [0, 1] map to a valid level — no crashes."""
860 for i in range(101):
861 score = i / 100
862 level = score_to_level(score)
863 assert level in MuseHubDivergenceLevel
864
865
866 # ===========================================================================
867 # Layer 7 — Performance
868 # ===========================================================================
869
870
871 class TestPerformance:
872 async def test_compute_hub_divergence_30_commits_under_200ms(
873 self, db_session: AsyncSession
874 ) -> None:
875 repo_id = await _db_repo(db_session)
876 ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
877 for i in range(15):
878 await _db_commit(
879 db_session, repo_id, branch="main",
880 message=f"commit {i} chord melody",
881 ts=ts + timedelta(minutes=i),
882 )
883 for i in range(15):
884 await _db_commit(
885 db_session, repo_id, branch="feat",
886 message=f"feat {i} drum groove",
887 ts=ts + timedelta(minutes=i),
888 )
889 await db_session.flush()
890
891 start = time.perf_counter()
892 result = await compute_hub_divergence(
893 db_session, repo_id=repo_id, branch_a="main", branch_b="feat"
894 )
895 elapsed = time.perf_counter() - start
896
897 assert result is not None
898 assert elapsed < 0.2, f"compute_hub_divergence took {elapsed:.3f}s"
899
900 def test_classify_message_under_1ms(self) -> None:
901 msg = "add jazzy chord melody with reverb and bridge arrangement"
902 start = time.perf_counter()
903 for _ in range(10_000):
904 classify_message(msg)
905 elapsed = time.perf_counter() - start
906 assert elapsed < 1.0, f"10000 classify_message calls took {elapsed:.3f}s"
907
908 def test_find_common_ancestor_100_commits_fast(self) -> None:
909 # Build 100 commits on each branch sharing first 50
910 shared = [_stub_commit(f"s{i}", [f"s{i-1}"] if i > 0 else []) for i in range(50)]
911 a_only = [_stub_commit(f"a{i}", [f"s{49}"]) for i in range(50)]
912 b_only = [_stub_commit(f"b{i}", [f"s{49}"]) for i in range(50)]
913
914 a_commits = list(reversed(a_only)) + list(reversed(shared))
915 b_commits = list(reversed(b_only)) + list(reversed(shared))
916
917 start = time.perf_counter()
918 for _ in range(100):
919 find_common_ancestor(a_commits, b_commits)
920 elapsed = time.perf_counter() - start
921 assert elapsed < 0.5, f"100 find_common_ancestor calls took {elapsed:.3f}s"