gabriel / musehub public
test_data_integrity.py python
308 lines 10.1 KB
Raw
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 9 days ago
1 """Tests for checklist section 5.1 — Database integrity.
2
3 Covers:
4 - updated_at present on critical tables
5 - FK constraints enforced (PostgreSQL)
6 - Orphan object scan (scan and delete)
7 """
8 from __future__ import annotations
9
10 import pytest
11 from muse.core.types import fake_id
12 from sqlalchemy.ext.asyncio import AsyncSession
13
14 from tests.factories import create_repo
15
16
17 # ── updated_at on critical tables ─────────────────────────────────────────────
18
19 def test_musehub_repo_has_updated_at() -> None:
20 from musehub.db.musehub_repo_models import MusehubRepo
21 cols = {c.name for c in MusehubRepo.__table__.columns}
22 assert "updated_at" in cols, "MusehubRepo is missing updated_at column"
23
24
25 def test_musehub_proposal_has_updated_at() -> None:
26 from musehub.db.musehub_social_models import MusehubProposal
27 cols = {c.name for c in MusehubProposal.__table__.columns}
28 assert "updated_at" in cols, "MusehubProposal is missing updated_at column"
29
30
31 def test_musehub_webhook_has_updated_at() -> None:
32 from musehub.db.musehub_webhook_models import MusehubWebhook
33 cols = {c.name for c in MusehubWebhook.__table__.columns}
34 assert "updated_at" in cols, "MusehubWebhook is missing updated_at column"
35
36
37 def test_musehub_release_has_updated_at() -> None:
38 from musehub.db.musehub_release_models import MusehubRelease
39 cols = {c.name for c in MusehubRelease.__table__.columns}
40 assert "updated_at" in cols, "MusehubRelease is missing updated_at column"
41
42
43 def test_existing_critical_tables_have_updated_at() -> None:
44 """Spot-check that pre-existing updated_at columns are still present."""
45 from musehub.db.musehub_social_models import MusehubIssue, MusehubIssueComment
46 for model in (MusehubIssue, MusehubIssueComment):
47 cols = {c.name for c in model.__table__.columns}
48 assert "updated_at" in cols, f"{model.__name__} is missing updated_at"
49
50
51 async def test_repo_updated_at_is_populated_on_create(
52 db_session: AsyncSession,
53 ) -> None:
54 """A freshly created repo must have a non-null updated_at."""
55 repo = await create_repo(db_session, slug="updated-at-test", owner="testuser")
56 assert repo.updated_at is not None, "updated_at must be set on repo creation"
57
58
59 # ── Orphan object scan ─────────────────────────────────────────────────────────
60
61 async def test_orphan_scan_returns_empty_when_no_orphans(
62 db_session: AsyncSession,
63 ) -> None:
64 """scan_orphan_objects must return empty when all objects have at least one ref."""
65 from musehub.maintenance.orphan_scan import scan_orphan_objects
66 from musehub.db.musehub_repo_models import MusehubObject, MusehubObjectRef
67
68 repo = await create_repo(db_session, slug="orphan-scan-clean", owner="testuser")
69
70 oid = fake_id("clean-object")
71 obj = MusehubObject(
72 object_id=oid,
73 path="test.bin",
74 size_bytes=4,
75 storage_uri=f"s3://muse-objects/objects/{oid}",
76 )
77 db_session.add(obj)
78 db_session.add(MusehubObjectRef(repo_id=repo.repo_id, object_id=oid))
79 await db_session.commit()
80
81 result = await scan_orphan_objects(db_session)
82 assert result.ok
83 assert result.count == 0
84
85
86 async def test_orphan_scan_detects_objects_with_no_refs(
87 db_session: AsyncSession,
88 ) -> None:
89 """scan_orphan_objects must find objects that have no row in musehub_object_refs."""
90 from musehub.maintenance.orphan_scan import scan_orphan_objects
91 from musehub.db.musehub_repo_models import MusehubObject
92
93 orphan_obj_id = fake_id("orphan-object")
94
95 # Insert an object with no corresponding ref row.
96 obj = MusehubObject(
97 object_id=orphan_obj_id,
98 path="orphan.bin",
99 size_bytes=4,
100 storage_uri=f"s3://muse-objects/objects/{orphan_obj_id}",
101 )
102 db_session.add(obj)
103 await db_session.commit()
104
105 result = await scan_orphan_objects(db_session)
106 assert not result.ok, "Orphan scan should detect the unreferenced object"
107 assert orphan_obj_id in result.orphaned_object_ids
108
109
110 # ── ingest_push parent validation (Phase 8 / invariant 8 parity) ──────────────
111
112
113 async def test_ingest_push_rejects_missing_external_parent(
114 db_session: AsyncSession,
115 ) -> None:
116 """ingest_push() must raise ValueError when a parent commit is not in DB.
117
118 A client pushing a commit that references a fabricated or missing parent_id
119 must be rejected. Without this guard, history becomes unreadable because
120 muse log walks off the end when it tries to fetch the non-existent parent.
121 """
122 from musehub.models.musehub import CommitInput, ObjectInput
123 from musehub.services.musehub_sync import ingest_push
124 from tests.factories import create_repo
125 from muse.core.types import blob_id
126
127 repo = await create_repo(db_session, slug="parent-val-test", owner="testuser")
128 repo_id = str(repo.repo_id)
129
130 ghost_parent_id = blob_id(b"nonexistent parent")
131 commit_id = blob_id(b"orphan commit")
132
133 commits = [
134 CommitInput(
135 commit_id=commit_id,
136 branch="main",
137 parent_ids=[ghost_parent_id],
138 message="commit with bogus parent",
139 author="tester",
140 timestamp="2026-01-01T00:00:00Z",
141 snapshot_id=None,
142 )
143 ]
144
145 with pytest.raises(ValueError, match="missing_parent_commits"):
146 await ingest_push(
147 db_session,
148 repo_id=repo_id,
149 branch="main",
150 head_commit_id=commit_id,
151 commits=commits,
152 snapshots=[],
153 objects=[],
154 force=False,
155 author="tester",
156 )
157
158
159 async def test_ingest_push_accepts_parent_in_same_mpack(
160 db_session: AsyncSession,
161 ) -> None:
162 """ingest_push() must accept a commit whose parent is in the same push mpack."""
163 from musehub.models.musehub import CommitInput, ObjectInput
164 from musehub.services.musehub_sync import ingest_push
165 from tests.factories import create_repo
166 from muse.core.types import blob_id
167
168 repo = await create_repo(db_session, slug="mpack-parent-test", owner="testuser")
169 repo_id = str(repo.repo_id)
170
171 first_id = blob_id(b"first commit")
172 second_id = blob_id(b"second commit")
173
174 commits = [
175 CommitInput(
176 commit_id=first_id,
177 branch="main",
178 parent_ids=[],
179 message="genesis",
180 author="tester",
181 timestamp="2026-01-01T00:00:00Z",
182 snapshot_id=None,
183 ),
184 CommitInput(
185 commit_id=second_id,
186 branch="main",
187 parent_ids=[first_id],
188 message="second",
189 author="tester",
190 timestamp="2026-01-01T00:01:00Z",
191 snapshot_id=None,
192 ),
193 ]
194
195 result = await ingest_push(
196 db_session,
197 repo_id=repo_id,
198 branch="main",
199 head_commit_id=second_id,
200 commits=commits,
201 snapshots=[],
202 objects=[],
203 force=False,
204 author="tester",
205 )
206 assert result.ok
207 assert result.remote_head == second_id
208
209
210 async def test_ingest_push_accepts_parent_already_in_db(
211 db_session: AsyncSession,
212 ) -> None:
213 """ingest_push() must accept a commit whose parent is already stored in the DB."""
214 from musehub.models.musehub import CommitInput
215 from musehub.services.musehub_sync import ingest_push
216 from tests.factories import create_repo
217 from muse.core.types import blob_id
218
219 repo = await create_repo(db_session, slug="db-parent-test", owner="testuser")
220 repo_id = str(repo.repo_id)
221
222 first_id = blob_id(b"db-stored first commit")
223
224 # Push the first commit to establish it in DB.
225 await ingest_push(
226 db_session,
227 repo_id=repo_id,
228 branch="main",
229 head_commit_id=first_id,
230 commits=[
231 CommitInput(
232 commit_id=first_id,
233 branch="main",
234 parent_ids=[],
235 message="genesis",
236 author="tester",
237 timestamp="2026-01-01T00:00:00Z",
238 snapshot_id=None,
239 )
240 ],
241 snapshots=[],
242 objects=[],
243 force=False,
244 author="tester",
245 )
246
247 # Now push a second commit that references the DB-stored first commit.
248 second_id = blob_id(b"db-stored second commit")
249 result = await ingest_push(
250 db_session,
251 repo_id=repo_id,
252 branch="main",
253 head_commit_id=second_id,
254 commits=[
255 CommitInput(
256 commit_id=second_id,
257 branch="main",
258 parent_ids=[first_id],
259 message="incremental push",
260 author="tester",
261 timestamp="2026-01-01T00:01:00Z",
262 snapshot_id=None,
263 )
264 ],
265 snapshots=[],
266 objects=[],
267 force=False,
268 author="tester",
269 )
270 assert result.ok
271 assert result.remote_head == second_id
272
273
274 async def test_ingest_push_genesis_commit_no_parent_accepted(
275 db_session: AsyncSession,
276 ) -> None:
277 """ingest_push() must accept a genesis commit with an empty parent_ids list."""
278 from musehub.models.musehub import CommitInput
279 from musehub.services.musehub_sync import ingest_push
280 from tests.factories import create_repo
281 from muse.core.types import blob_id
282
283 repo = await create_repo(db_session, slug="genesis-test", owner="testuser")
284 genesis_id = blob_id(b"genesis commit fresh")
285
286 result = await ingest_push(
287 db_session,
288 repo_id=str(repo.repo_id),
289 branch="main",
290 head_commit_id=genesis_id,
291 commits=[
292 CommitInput(
293 commit_id=genesis_id,
294 branch="main",
295 parent_ids=[],
296 message="initial commit",
297 author="tester",
298 timestamp="2026-01-01T00:00:00Z",
299 snapshot_id=None,
300 )
301 ],
302 snapshots=[],
303 objects=[],
304 force=False,
305 author="tester",
306 )
307 assert result.ok
308 assert result.remote_head == genesis_id
File History 1 commit
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 9 days ago