test_data_integrity.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """Tests for checklist section 5.1 β Database integrity. |
| 2 | |
| 3 | Covers: |
| 4 | - updated_at present on critical tables |
| 5 | - FK constraints enforced (PostgreSQL) |
| 6 | - Orphan object scan (scan and delete) |
| 7 | """ |
| 8 | from __future__ import annotations |
| 9 | |
| 10 | import pytest |
| 11 | from muse.core.types import fake_id |
| 12 | from sqlalchemy.ext.asyncio import AsyncSession |
| 13 | |
| 14 | from tests.factories import create_repo |
| 15 | |
| 16 | |
| 17 | # ββ updated_at on critical tables βββββββββββββββββββββββββββββββββββββββββββββ |
| 18 | |
| 19 | def test_musehub_repo_has_updated_at() -> None: |
| 20 | from musehub.db.musehub_repo_models import MusehubRepo |
| 21 | cols = {c.name for c in MusehubRepo.__table__.columns} |
| 22 | assert "updated_at" in cols, "MusehubRepo is missing updated_at column" |
| 23 | |
| 24 | |
| 25 | def test_musehub_proposal_has_updated_at() -> None: |
| 26 | from musehub.db.musehub_social_models import MusehubProposal |
| 27 | cols = {c.name for c in MusehubProposal.__table__.columns} |
| 28 | assert "updated_at" in cols, "MusehubProposal is missing updated_at column" |
| 29 | |
| 30 | |
| 31 | def test_musehub_webhook_has_updated_at() -> None: |
| 32 | from musehub.db.musehub_webhook_models import MusehubWebhook |
| 33 | cols = {c.name for c in MusehubWebhook.__table__.columns} |
| 34 | assert "updated_at" in cols, "MusehubWebhook is missing updated_at column" |
| 35 | |
| 36 | |
| 37 | def test_musehub_release_has_updated_at() -> None: |
| 38 | from musehub.db.musehub_release_models import MusehubRelease |
| 39 | cols = {c.name for c in MusehubRelease.__table__.columns} |
| 40 | assert "updated_at" in cols, "MusehubRelease is missing updated_at column" |
| 41 | |
| 42 | |
| 43 | def test_existing_critical_tables_have_updated_at() -> None: |
| 44 | """Spot-check that pre-existing updated_at columns are still present.""" |
| 45 | from musehub.db.musehub_social_models import MusehubIssue, MusehubIssueComment |
| 46 | for model in (MusehubIssue, MusehubIssueComment): |
| 47 | cols = {c.name for c in model.__table__.columns} |
| 48 | assert "updated_at" in cols, f"{model.__name__} is missing updated_at" |
| 49 | |
| 50 | |
| 51 | async def test_repo_updated_at_is_populated_on_create( |
| 52 | db_session: AsyncSession, |
| 53 | ) -> None: |
| 54 | """A freshly created repo must have a non-null updated_at.""" |
| 55 | repo = await create_repo(db_session, slug="updated-at-test", owner="testuser") |
| 56 | assert repo.updated_at is not None, "updated_at must be set on repo creation" |
| 57 | |
| 58 | |
| 59 | # ββ Orphan object scan βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 60 | |
| 61 | async def test_orphan_scan_returns_empty_when_no_orphans( |
| 62 | db_session: AsyncSession, |
| 63 | ) -> None: |
| 64 | """scan_orphan_objects must return empty when all objects have at least one ref.""" |
| 65 | from musehub.maintenance.orphan_scan import scan_orphan_objects |
| 66 | from musehub.db.musehub_repo_models import MusehubObject, MusehubObjectRef |
| 67 | |
| 68 | repo = await create_repo(db_session, slug="orphan-scan-clean", owner="testuser") |
| 69 | |
| 70 | oid = fake_id("clean-object") |
| 71 | obj = MusehubObject( |
| 72 | object_id=oid, |
| 73 | path="test.bin", |
| 74 | size_bytes=4, |
| 75 | storage_uri=f"s3://muse-objects/objects/{oid}", |
| 76 | ) |
| 77 | db_session.add(obj) |
| 78 | db_session.add(MusehubObjectRef(repo_id=repo.repo_id, object_id=oid)) |
| 79 | await db_session.commit() |
| 80 | |
| 81 | result = await scan_orphan_objects(db_session) |
| 82 | assert result.ok |
| 83 | assert result.count == 0 |
| 84 | |
| 85 | |
| 86 | async def test_orphan_scan_detects_objects_with_no_refs( |
| 87 | db_session: AsyncSession, |
| 88 | ) -> None: |
| 89 | """scan_orphan_objects must find objects that have no row in musehub_object_refs.""" |
| 90 | from musehub.maintenance.orphan_scan import scan_orphan_objects |
| 91 | from musehub.db.musehub_repo_models import MusehubObject |
| 92 | |
| 93 | orphan_obj_id = fake_id("orphan-object") |
| 94 | |
| 95 | # Insert an object with no corresponding ref row. |
| 96 | obj = MusehubObject( |
| 97 | object_id=orphan_obj_id, |
| 98 | path="orphan.bin", |
| 99 | size_bytes=4, |
| 100 | storage_uri=f"s3://muse-objects/objects/{orphan_obj_id}", |
| 101 | ) |
| 102 | db_session.add(obj) |
| 103 | await db_session.commit() |
| 104 | |
| 105 | result = await scan_orphan_objects(db_session) |
| 106 | assert not result.ok, "Orphan scan should detect the unreferenced object" |
| 107 | assert orphan_obj_id in result.orphaned_object_ids |
| 108 | |
| 109 | |
| 110 | # ββ ingest_push parent validation (Phase 8 / invariant 8 parity) ββββββββββββββ |
| 111 | |
| 112 | |
| 113 | async def test_ingest_push_rejects_missing_external_parent( |
| 114 | db_session: AsyncSession, |
| 115 | ) -> None: |
| 116 | """ingest_push() must raise ValueError when a parent commit is not in DB. |
| 117 | |
| 118 | A client pushing a commit that references a fabricated or missing parent_id |
| 119 | must be rejected. Without this guard, history becomes unreadable because |
| 120 | muse log walks off the end when it tries to fetch the non-existent parent. |
| 121 | """ |
| 122 | from musehub.models.musehub import CommitInput, ObjectInput |
| 123 | from musehub.services.musehub_sync import ingest_push |
| 124 | from tests.factories import create_repo |
| 125 | from muse.core.types import blob_id |
| 126 | |
| 127 | repo = await create_repo(db_session, slug="parent-val-test", owner="testuser") |
| 128 | repo_id = str(repo.repo_id) |
| 129 | |
| 130 | ghost_parent_id = blob_id(b"nonexistent parent") |
| 131 | commit_id = blob_id(b"orphan commit") |
| 132 | |
| 133 | commits = [ |
| 134 | CommitInput( |
| 135 | commit_id=commit_id, |
| 136 | branch="main", |
| 137 | parent_ids=[ghost_parent_id], |
| 138 | message="commit with bogus parent", |
| 139 | author="tester", |
| 140 | timestamp="2026-01-01T00:00:00Z", |
| 141 | snapshot_id=None, |
| 142 | ) |
| 143 | ] |
| 144 | |
| 145 | with pytest.raises(ValueError, match="missing_parent_commits"): |
| 146 | await ingest_push( |
| 147 | db_session, |
| 148 | repo_id=repo_id, |
| 149 | branch="main", |
| 150 | head_commit_id=commit_id, |
| 151 | commits=commits, |
| 152 | snapshots=[], |
| 153 | objects=[], |
| 154 | force=False, |
| 155 | author="tester", |
| 156 | ) |
| 157 | |
| 158 | |
| 159 | async def test_ingest_push_accepts_parent_in_same_mpack( |
| 160 | db_session: AsyncSession, |
| 161 | ) -> None: |
| 162 | """ingest_push() must accept a commit whose parent is in the same push mpack.""" |
| 163 | from musehub.models.musehub import CommitInput, ObjectInput |
| 164 | from musehub.services.musehub_sync import ingest_push |
| 165 | from tests.factories import create_repo |
| 166 | from muse.core.types import blob_id |
| 167 | |
| 168 | repo = await create_repo(db_session, slug="mpack-parent-test", owner="testuser") |
| 169 | repo_id = str(repo.repo_id) |
| 170 | |
| 171 | first_id = blob_id(b"first commit") |
| 172 | second_id = blob_id(b"second commit") |
| 173 | |
| 174 | commits = [ |
| 175 | CommitInput( |
| 176 | commit_id=first_id, |
| 177 | branch="main", |
| 178 | parent_ids=[], |
| 179 | message="genesis", |
| 180 | author="tester", |
| 181 | timestamp="2026-01-01T00:00:00Z", |
| 182 | snapshot_id=None, |
| 183 | ), |
| 184 | CommitInput( |
| 185 | commit_id=second_id, |
| 186 | branch="main", |
| 187 | parent_ids=[first_id], |
| 188 | message="second", |
| 189 | author="tester", |
| 190 | timestamp="2026-01-01T00:01:00Z", |
| 191 | snapshot_id=None, |
| 192 | ), |
| 193 | ] |
| 194 | |
| 195 | result = await ingest_push( |
| 196 | db_session, |
| 197 | repo_id=repo_id, |
| 198 | branch="main", |
| 199 | head_commit_id=second_id, |
| 200 | commits=commits, |
| 201 | snapshots=[], |
| 202 | objects=[], |
| 203 | force=False, |
| 204 | author="tester", |
| 205 | ) |
| 206 | assert result.ok |
| 207 | assert result.remote_head == second_id |
| 208 | |
| 209 | |
| 210 | async def test_ingest_push_accepts_parent_already_in_db( |
| 211 | db_session: AsyncSession, |
| 212 | ) -> None: |
| 213 | """ingest_push() must accept a commit whose parent is already stored in the DB.""" |
| 214 | from musehub.models.musehub import CommitInput |
| 215 | from musehub.services.musehub_sync import ingest_push |
| 216 | from tests.factories import create_repo |
| 217 | from muse.core.types import blob_id |
| 218 | |
| 219 | repo = await create_repo(db_session, slug="db-parent-test", owner="testuser") |
| 220 | repo_id = str(repo.repo_id) |
| 221 | |
| 222 | first_id = blob_id(b"db-stored first commit") |
| 223 | |
| 224 | # Push the first commit to establish it in DB. |
| 225 | await ingest_push( |
| 226 | db_session, |
| 227 | repo_id=repo_id, |
| 228 | branch="main", |
| 229 | head_commit_id=first_id, |
| 230 | commits=[ |
| 231 | CommitInput( |
| 232 | commit_id=first_id, |
| 233 | branch="main", |
| 234 | parent_ids=[], |
| 235 | message="genesis", |
| 236 | author="tester", |
| 237 | timestamp="2026-01-01T00:00:00Z", |
| 238 | snapshot_id=None, |
| 239 | ) |
| 240 | ], |
| 241 | snapshots=[], |
| 242 | objects=[], |
| 243 | force=False, |
| 244 | author="tester", |
| 245 | ) |
| 246 | |
| 247 | # Now push a second commit that references the DB-stored first commit. |
| 248 | second_id = blob_id(b"db-stored second commit") |
| 249 | result = await ingest_push( |
| 250 | db_session, |
| 251 | repo_id=repo_id, |
| 252 | branch="main", |
| 253 | head_commit_id=second_id, |
| 254 | commits=[ |
| 255 | CommitInput( |
| 256 | commit_id=second_id, |
| 257 | branch="main", |
| 258 | parent_ids=[first_id], |
| 259 | message="incremental push", |
| 260 | author="tester", |
| 261 | timestamp="2026-01-01T00:01:00Z", |
| 262 | snapshot_id=None, |
| 263 | ) |
| 264 | ], |
| 265 | snapshots=[], |
| 266 | objects=[], |
| 267 | force=False, |
| 268 | author="tester", |
| 269 | ) |
| 270 | assert result.ok |
| 271 | assert result.remote_head == second_id |
| 272 | |
| 273 | |
| 274 | async def test_ingest_push_genesis_commit_no_parent_accepted( |
| 275 | db_session: AsyncSession, |
| 276 | ) -> None: |
| 277 | """ingest_push() must accept a genesis commit with an empty parent_ids list.""" |
| 278 | from musehub.models.musehub import CommitInput |
| 279 | from musehub.services.musehub_sync import ingest_push |
| 280 | from tests.factories import create_repo |
| 281 | from muse.core.types import blob_id |
| 282 | |
| 283 | repo = await create_repo(db_session, slug="genesis-test", owner="testuser") |
| 284 | genesis_id = blob_id(b"genesis commit fresh") |
| 285 | |
| 286 | result = await ingest_push( |
| 287 | db_session, |
| 288 | repo_id=str(repo.repo_id), |
| 289 | branch="main", |
| 290 | head_commit_id=genesis_id, |
| 291 | commits=[ |
| 292 | CommitInput( |
| 293 | commit_id=genesis_id, |
| 294 | branch="main", |
| 295 | parent_ids=[], |
| 296 | message="initial commit", |
| 297 | author="tester", |
| 298 | timestamp="2026-01-01T00:00:00Z", |
| 299 | snapshot_id=None, |
| 300 | ) |
| 301 | ], |
| 302 | snapshots=[], |
| 303 | objects=[], |
| 304 | force=False, |
| 305 | author="tester", |
| 306 | ) |
| 307 | assert result.ok |
| 308 | assert result.remote_head == genesis_id |