gabriel / musehub public
test_clones_state_integrity.py python
232 lines 8.0 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 21 days ago
1 """Tier 5 — State integrity tests for musehub_intel_clones (issue #17).
2
3 Validates that the DB table invariants hold under normal and pathological
4 conditions: JSON parseability, tier enum, count/content agreement, upsert
5 idempotency, stale-row update, and CASCADE delete behavior.
6
7 Cases:
8 SI01 All DB rows have parseable members_json
9 SI02 All tier values are exactly "exact" or "near"
10 SI03 member_count matches len(json.loads(members_json)) for each row
11 SI04 ClonesProvider upsert is idempotent — running twice = same row count
12 SI05 ClonesProvider upsert updates existing row on re-run with new count
13 SI06 CASCADE delete — deleting repo removes all clones rows
14 """
15 from __future__ import annotations
16
17 import json
18 from unittest.mock import AsyncMock, patch
19
20 import pytest
21 import pytest_asyncio
22 import sqlalchemy as sa
23 from sqlalchemy.dialects.postgresql import insert as pg_insert
24 from sqlalchemy.ext.asyncio import AsyncSession
25
26 from musehub.db.musehub_intel_models import MusehubIntelClones
27 from musehub.db.musehub_repo_models import MusehubRepo
28 from muse.core.types import long_id
29 from tests.factories import create_repo
30
31 _REF = long_id("a" * 64)
32
33
34 def _make_members(n: int, single_file: bool = False) -> str:
35 return json.dumps([
36 {
37 "address": f"src/{'a' if single_file else chr(97+i%4)}.py::fn_{i}",
38 "kind": "function",
39 "language": "Python",
40 "body_hash": long_id("a" * 64),
41 "signature_id": long_id("b" * 64),
42 "content_id": long_id("a" * 64),
43 }
44 for i in range(n)
45 ])
46
47
48 async def _insert(
49 session: AsyncSession,
50 repo_id: str,
51 cluster_hash: str,
52 tier: str = "exact",
53 member_count: int = 2,
54 members_json: str | None = None,
55 ) -> None:
56 mj = members_json if members_json is not None else _make_members(member_count)
57 await session.execute(
58 pg_insert(MusehubIntelClones)
59 .values(
60 repo_id=repo_id,
61 cluster_hash=cluster_hash,
62 tier=tier,
63 member_count=member_count,
64 members_json=mj,
65 ref=_REF,
66 )
67 .on_conflict_do_update(
68 index_elements=["repo_id", "cluster_hash"],
69 set_={"tier": tier, "member_count": member_count, "members_json": mj},
70 )
71 )
72 await session.commit()
73
74
75 @pytest_asyncio.fixture
76 async def repo(db_session: AsyncSession) -> MusehubRepo:
77 return await create_repo(db_session, owner="siuser", slug="state-integrity")
78
79
80 class TestClonesStateIntegrity:
81
82 @pytest.mark.asyncio
83 async def test_SI01_all_rows_parseable_members_json(
84 self, db_session: AsyncSession, repo: MusehubRepo
85 ) -> None:
86 """Every members_json stored in the DB must deserialise without error."""
87 for i in range(5):
88 await _insert(
89 db_session, str(repo.repo_id),
90 cluster_hash=f"sha256:si01{str(i).zfill(60)}",
91 member_count=i + 2,
92 members_json=_make_members(i + 2),
93 )
94
95 result = await db_session.execute(
96 sa.select(MusehubIntelClones).where(
97 MusehubIntelClones.repo_id == str(repo.repo_id)
98 )
99 )
100 rows = result.scalars().all()
101 for row in rows:
102 try:
103 parsed = json.loads(row.members_json)
104 assert isinstance(parsed, list)
105 except json.JSONDecodeError as exc:
106 pytest.fail(f"Unparseable members_json for {row.cluster_hash}: {exc}")
107
108 @pytest.mark.asyncio
109 async def test_SI02_tier_values_in_valid_set(
110 self, db_session: AsyncSession, repo: MusehubRepo
111 ) -> None:
112 """All tier values must be exactly 'exact' or 'near'."""
113 for tier in ("exact", "near", "exact"):
114 await _insert(
115 db_session, str(repo.repo_id),
116 cluster_hash=f"sha256:si02{tier[:1]}{str(id(tier)).zfill(59)}",
117 tier=tier,
118 )
119
120 result = await db_session.execute(
121 sa.select(MusehubIntelClones.tier).where(
122 MusehubIntelClones.repo_id == str(repo.repo_id)
123 )
124 )
125 for (tier,) in result.all():
126 assert tier in ("exact", "near"), f"Unexpected tier value: {tier!r}"
127
128 @pytest.mark.asyncio
129 async def test_SI03_member_count_matches_json_length(
130 self, db_session: AsyncSession, repo: MusehubRepo
131 ) -> None:
132 """member_count must equal the number of entries in members_json."""
133 for n in (2, 5, 10):
134 await _insert(
135 db_session, str(repo.repo_id),
136 cluster_hash=f"sha256:si03n{n}{str(n).zfill(58)}",
137 member_count=n,
138 members_json=_make_members(n),
139 )
140
141 result = await db_session.execute(
142 sa.select(MusehubIntelClones).where(
143 MusehubIntelClones.repo_id == str(repo.repo_id)
144 )
145 )
146 for row in result.scalars().all():
147 actual = len(json.loads(row.members_json))
148 assert actual == row.member_count, (
149 f"{row.cluster_hash}: member_count={row.member_count} "
150 f"but members_json has {actual} entries"
151 )
152
153 @pytest.mark.asyncio
154 async def test_SI04_upsert_is_idempotent(
155 self, db_session: AsyncSession, repo: MusehubRepo
156 ) -> None:
157 """Inserting the same cluster twice leaves exactly one row."""
158 h = long_id("4" * 64)
159 mj = _make_members(3)
160 for _ in range(2):
161 await _insert(
162 db_session, str(repo.repo_id),
163 cluster_hash=h, member_count=3, members_json=mj,
164 )
165
166 count_result = await db_session.execute(
167 sa.select(sa.func.count())
168 .select_from(MusehubIntelClones)
169 .where(
170 MusehubIntelClones.repo_id == str(repo.repo_id),
171 MusehubIntelClones.cluster_hash == h,
172 )
173 )
174 assert count_result.scalar_one() == 1
175
176 @pytest.mark.asyncio
177 async def test_SI05_upsert_updates_existing_row(
178 self, db_session: AsyncSession, repo: MusehubRepo
179 ) -> None:
180 """Re-running with a new member_count updates the existing row."""
181 h = long_id("5" * 64)
182 await _insert(db_session, str(repo.repo_id), cluster_hash=h, member_count=2)
183 new_mj = _make_members(7)
184 await _insert(
185 db_session, str(repo.repo_id),
186 cluster_hash=h, member_count=7, members_json=new_mj,
187 )
188
189 result = await db_session.execute(
190 sa.select(MusehubIntelClones).where(
191 MusehubIntelClones.repo_id == str(repo.repo_id),
192 MusehubIntelClones.cluster_hash == h,
193 )
194 )
195 row = result.scalar_one()
196 assert row.member_count == 7
197 assert len(json.loads(row.members_json)) == 7
198
199 @pytest.mark.asyncio
200 async def test_SI06_cascade_delete_removes_clones(
201 self, db_session: AsyncSession, repo: MusehubRepo
202 ) -> None:
203 """Deleting the repo cascades and removes all associated clone rows."""
204 for i in range(3):
205 await _insert(
206 db_session, str(repo.repo_id),
207 cluster_hash=f"sha256:si06{str(i).zfill(60)}",
208 )
209
210 # Verify rows exist
211 before = await db_session.execute(
212 sa.select(sa.func.count())
213 .select_from(MusehubIntelClones)
214 .where(MusehubIntelClones.repo_id == str(repo.repo_id))
215 )
216 assert before.scalar_one() == 3
217
218 # Delete the repo
219 await db_session.execute(
220 sa.delete(MusehubRepo).where(
221 MusehubRepo.repo_id == str(repo.repo_id)
222 )
223 )
224 await db_session.commit()
225
226 # Clones rows must be gone
227 after = await db_session.execute(
228 sa.select(sa.func.count())
229 .select_from(MusehubIntelClones)
230 .where(MusehubIntelClones.repo_id == str(repo.repo_id))
231 )
232 assert after.scalar_one() == 0
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 21 days ago