test_clones_state_integrity.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
21 days ago
| 1 | """Tier 5 — State integrity tests for musehub_intel_clones (issue #17). |
| 2 | |
| 3 | Validates that the DB table invariants hold under normal and pathological |
| 4 | conditions: JSON parseability, tier enum, count/content agreement, upsert |
| 5 | idempotency, stale-row update, and CASCADE delete behavior. |
| 6 | |
| 7 | Cases: |
| 8 | SI01 All DB rows have parseable members_json |
| 9 | SI02 All tier values are exactly "exact" or "near" |
| 10 | SI03 member_count matches len(json.loads(members_json)) for each row |
| 11 | SI04 ClonesProvider upsert is idempotent — running twice = same row count |
| 12 | SI05 ClonesProvider upsert updates existing row on re-run with new count |
| 13 | SI06 CASCADE delete — deleting repo removes all clones rows |
| 14 | """ |
| 15 | from __future__ import annotations |
| 16 | |
| 17 | import json |
| 18 | from unittest.mock import AsyncMock, patch |
| 19 | |
| 20 | import pytest |
| 21 | import pytest_asyncio |
| 22 | import sqlalchemy as sa |
| 23 | from sqlalchemy.dialects.postgresql import insert as pg_insert |
| 24 | from sqlalchemy.ext.asyncio import AsyncSession |
| 25 | |
| 26 | from musehub.db.musehub_intel_models import MusehubIntelClones |
| 27 | from musehub.db.musehub_repo_models import MusehubRepo |
| 28 | from muse.core.types import long_id |
| 29 | from tests.factories import create_repo |
| 30 | |
| 31 | _REF = long_id("a" * 64) |
| 32 | |
| 33 | |
| 34 | def _make_members(n: int, single_file: bool = False) -> str: |
| 35 | return json.dumps([ |
| 36 | { |
| 37 | "address": f"src/{'a' if single_file else chr(97+i%4)}.py::fn_{i}", |
| 38 | "kind": "function", |
| 39 | "language": "Python", |
| 40 | "body_hash": long_id("a" * 64), |
| 41 | "signature_id": long_id("b" * 64), |
| 42 | "content_id": long_id("a" * 64), |
| 43 | } |
| 44 | for i in range(n) |
| 45 | ]) |
| 46 | |
| 47 | |
| 48 | async def _insert( |
| 49 | session: AsyncSession, |
| 50 | repo_id: str, |
| 51 | cluster_hash: str, |
| 52 | tier: str = "exact", |
| 53 | member_count: int = 2, |
| 54 | members_json: str | None = None, |
| 55 | ) -> None: |
| 56 | mj = members_json if members_json is not None else _make_members(member_count) |
| 57 | await session.execute( |
| 58 | pg_insert(MusehubIntelClones) |
| 59 | .values( |
| 60 | repo_id=repo_id, |
| 61 | cluster_hash=cluster_hash, |
| 62 | tier=tier, |
| 63 | member_count=member_count, |
| 64 | members_json=mj, |
| 65 | ref=_REF, |
| 66 | ) |
| 67 | .on_conflict_do_update( |
| 68 | index_elements=["repo_id", "cluster_hash"], |
| 69 | set_={"tier": tier, "member_count": member_count, "members_json": mj}, |
| 70 | ) |
| 71 | ) |
| 72 | await session.commit() |
| 73 | |
| 74 | |
| 75 | @pytest_asyncio.fixture |
| 76 | async def repo(db_session: AsyncSession) -> MusehubRepo: |
| 77 | return await create_repo(db_session, owner="siuser", slug="state-integrity") |
| 78 | |
| 79 | |
| 80 | class TestClonesStateIntegrity: |
| 81 | |
| 82 | @pytest.mark.asyncio |
| 83 | async def test_SI01_all_rows_parseable_members_json( |
| 84 | self, db_session: AsyncSession, repo: MusehubRepo |
| 85 | ) -> None: |
| 86 | """Every members_json stored in the DB must deserialise without error.""" |
| 87 | for i in range(5): |
| 88 | await _insert( |
| 89 | db_session, str(repo.repo_id), |
| 90 | cluster_hash=f"sha256:si01{str(i).zfill(60)}", |
| 91 | member_count=i + 2, |
| 92 | members_json=_make_members(i + 2), |
| 93 | ) |
| 94 | |
| 95 | result = await db_session.execute( |
| 96 | sa.select(MusehubIntelClones).where( |
| 97 | MusehubIntelClones.repo_id == str(repo.repo_id) |
| 98 | ) |
| 99 | ) |
| 100 | rows = result.scalars().all() |
| 101 | for row in rows: |
| 102 | try: |
| 103 | parsed = json.loads(row.members_json) |
| 104 | assert isinstance(parsed, list) |
| 105 | except json.JSONDecodeError as exc: |
| 106 | pytest.fail(f"Unparseable members_json for {row.cluster_hash}: {exc}") |
| 107 | |
| 108 | @pytest.mark.asyncio |
| 109 | async def test_SI02_tier_values_in_valid_set( |
| 110 | self, db_session: AsyncSession, repo: MusehubRepo |
| 111 | ) -> None: |
| 112 | """All tier values must be exactly 'exact' or 'near'.""" |
| 113 | for tier in ("exact", "near", "exact"): |
| 114 | await _insert( |
| 115 | db_session, str(repo.repo_id), |
| 116 | cluster_hash=f"sha256:si02{tier[:1]}{str(id(tier)).zfill(59)}", |
| 117 | tier=tier, |
| 118 | ) |
| 119 | |
| 120 | result = await db_session.execute( |
| 121 | sa.select(MusehubIntelClones.tier).where( |
| 122 | MusehubIntelClones.repo_id == str(repo.repo_id) |
| 123 | ) |
| 124 | ) |
| 125 | for (tier,) in result.all(): |
| 126 | assert tier in ("exact", "near"), f"Unexpected tier value: {tier!r}" |
| 127 | |
| 128 | @pytest.mark.asyncio |
| 129 | async def test_SI03_member_count_matches_json_length( |
| 130 | self, db_session: AsyncSession, repo: MusehubRepo |
| 131 | ) -> None: |
| 132 | """member_count must equal the number of entries in members_json.""" |
| 133 | for n in (2, 5, 10): |
| 134 | await _insert( |
| 135 | db_session, str(repo.repo_id), |
| 136 | cluster_hash=f"sha256:si03n{n}{str(n).zfill(58)}", |
| 137 | member_count=n, |
| 138 | members_json=_make_members(n), |
| 139 | ) |
| 140 | |
| 141 | result = await db_session.execute( |
| 142 | sa.select(MusehubIntelClones).where( |
| 143 | MusehubIntelClones.repo_id == str(repo.repo_id) |
| 144 | ) |
| 145 | ) |
| 146 | for row in result.scalars().all(): |
| 147 | actual = len(json.loads(row.members_json)) |
| 148 | assert actual == row.member_count, ( |
| 149 | f"{row.cluster_hash}: member_count={row.member_count} " |
| 150 | f"but members_json has {actual} entries" |
| 151 | ) |
| 152 | |
| 153 | @pytest.mark.asyncio |
| 154 | async def test_SI04_upsert_is_idempotent( |
| 155 | self, db_session: AsyncSession, repo: MusehubRepo |
| 156 | ) -> None: |
| 157 | """Inserting the same cluster twice leaves exactly one row.""" |
| 158 | h = long_id("4" * 64) |
| 159 | mj = _make_members(3) |
| 160 | for _ in range(2): |
| 161 | await _insert( |
| 162 | db_session, str(repo.repo_id), |
| 163 | cluster_hash=h, member_count=3, members_json=mj, |
| 164 | ) |
| 165 | |
| 166 | count_result = await db_session.execute( |
| 167 | sa.select(sa.func.count()) |
| 168 | .select_from(MusehubIntelClones) |
| 169 | .where( |
| 170 | MusehubIntelClones.repo_id == str(repo.repo_id), |
| 171 | MusehubIntelClones.cluster_hash == h, |
| 172 | ) |
| 173 | ) |
| 174 | assert count_result.scalar_one() == 1 |
| 175 | |
| 176 | @pytest.mark.asyncio |
| 177 | async def test_SI05_upsert_updates_existing_row( |
| 178 | self, db_session: AsyncSession, repo: MusehubRepo |
| 179 | ) -> None: |
| 180 | """Re-running with a new member_count updates the existing row.""" |
| 181 | h = long_id("5" * 64) |
| 182 | await _insert(db_session, str(repo.repo_id), cluster_hash=h, member_count=2) |
| 183 | new_mj = _make_members(7) |
| 184 | await _insert( |
| 185 | db_session, str(repo.repo_id), |
| 186 | cluster_hash=h, member_count=7, members_json=new_mj, |
| 187 | ) |
| 188 | |
| 189 | result = await db_session.execute( |
| 190 | sa.select(MusehubIntelClones).where( |
| 191 | MusehubIntelClones.repo_id == str(repo.repo_id), |
| 192 | MusehubIntelClones.cluster_hash == h, |
| 193 | ) |
| 194 | ) |
| 195 | row = result.scalar_one() |
| 196 | assert row.member_count == 7 |
| 197 | assert len(json.loads(row.members_json)) == 7 |
| 198 | |
| 199 | @pytest.mark.asyncio |
| 200 | async def test_SI06_cascade_delete_removes_clones( |
| 201 | self, db_session: AsyncSession, repo: MusehubRepo |
| 202 | ) -> None: |
| 203 | """Deleting the repo cascades and removes all associated clone rows.""" |
| 204 | for i in range(3): |
| 205 | await _insert( |
| 206 | db_session, str(repo.repo_id), |
| 207 | cluster_hash=f"sha256:si06{str(i).zfill(60)}", |
| 208 | ) |
| 209 | |
| 210 | # Verify rows exist |
| 211 | before = await db_session.execute( |
| 212 | sa.select(sa.func.count()) |
| 213 | .select_from(MusehubIntelClones) |
| 214 | .where(MusehubIntelClones.repo_id == str(repo.repo_id)) |
| 215 | ) |
| 216 | assert before.scalar_one() == 3 |
| 217 | |
| 218 | # Delete the repo |
| 219 | await db_session.execute( |
| 220 | sa.delete(MusehubRepo).where( |
| 221 | MusehubRepo.repo_id == str(repo.repo_id) |
| 222 | ) |
| 223 | ) |
| 224 | await db_session.commit() |
| 225 | |
| 226 | # Clones rows must be gone |
| 227 | after = await db_session.execute( |
| 228 | sa.select(sa.func.count()) |
| 229 | .select_from(MusehubIntelClones) |
| 230 | .where(MusehubIntelClones.repo_id == str(repo.repo_id)) |
| 231 | ) |
| 232 | assert after.scalar_one() == 0 |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
21 days ago