gabriel / musehub public
test_gc.py python
719 lines 28.7 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 1 day ago
1 """Section 32 — Garbage Collector: 7-layer test suite.
2
3 Covers:
4 musehub/services/musehub_gc.py — GCResult, run_gc
5 musehub/api/routes/wire.py — _run_gc_async (fire-and-forget background task)
6
7 Key behaviour:
8 - run_gc collects all branch head commit IDs then BFS through parent_ids
9 - Commits reachable from any branch head are preserved
10 - Orphaned commits (not reachable from any branch) are deleted
11 - Snapshots referenced only by orphaned commits are deleted
12 - Snapshots also referenced by a reachable commit are preserved
13 - Repos with no branch heads → GC skips (returns empty result)
14 - Repos already clean → GC is a no-op (returns 0 deletes)
15 - run_gc commits the session itself
16 """
17 from __future__ import annotations
18
19 import secrets
20 from datetime import datetime, timezone
21
22 import pytest
23 from sqlalchemy import select
24 from sqlalchemy.ext.asyncio import AsyncSession
25
26 from musehub.core.genesis import compute_identity_id, compute_repo_id
27 from musehub.db.musehub_repo_models import (
28 MusehubBranch,
29 MusehubCommit,
30 MusehubCommitRef,
31 MusehubRepo,
32 MusehubSnapshot,
33 MusehubSnapshotRef,
34 )
35 from musehub.services.musehub_gc import GCResult, run_gc
36
37
38 # ── helpers ───────────────────────────────────────────────────────────────────
39
40
41 def _uid() -> str:
42 return secrets.token_hex(16)
43
44
45 def _cid() -> str:
46 return secrets.token_hex(8)
47
48
49 def _now() -> datetime:
50 return datetime.now(tz=timezone.utc)
51
52
53 async def _db_repo(session: AsyncSession) -> MusehubRepo:
54 slug = f"gc-repo-{_uid()[:8]}"
55 created_at = _now()
56 owner_id = compute_identity_id(b"testuser")
57 repo = MusehubRepo(
58 repo_id=compute_repo_id(owner_id, slug, "code", created_at.isoformat()),
59 name=slug,
60 slug=slug,
61 owner="testuser",
62 owner_user_id=owner_id,
63 visibility="private",
64 created_at=created_at,
65 updated_at=created_at,
66 )
67 session.add(repo)
68 await session.flush()
69 return repo
70
71
72 async def _db_branch(
73 session: AsyncSession,
74 repo_id: str,
75 *,
76 name: str = "dev",
77 head_commit_id: str | None = None,
78 ) -> MusehubBranch:
79 branch = MusehubBranch(
80 branch_id=_uid(),
81 repo_id=repo_id,
82 name=name,
83 head_commit_id=head_commit_id,
84 )
85 session.add(branch)
86 await session.flush()
87 return branch
88
89
90 async def _db_commit(
91 session: AsyncSession,
92 repo_id: str,
93 *,
94 commit_id: str | None = None,
95 parent_ids: list[str] | None = None,
96 snapshot_id: str | None = None,
97 branch: str = "dev",
98 ) -> MusehubCommit:
99 commit = MusehubCommit(
100 commit_id=commit_id or _cid(),
101 branch=branch,
102 parent_ids=parent_ids or [],
103 message="test commit",
104 author="testuser",
105 timestamp=_now(),
106 snapshot_id=snapshot_id,
107 )
108 session.add(commit)
109 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit.commit_id))
110 await session.flush()
111 return commit
112
113
114 async def _db_snapshot(
115 session: AsyncSession,
116 repo_id: str,
117 *,
118 snapshot_id: str | None = None,
119 ) -> MusehubSnapshot:
120 snap = MusehubSnapshot(
121 snapshot_id=snapshot_id or _cid(),
122 manifest_blob=b"",
123 created_at=_now(),
124 )
125 session.add(snap)
126 session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=snap.snapshot_id))
127 await session.flush()
128 return snap
129
130
131 # ═══════════════════════════════════════════════════════════════════════════════
132 # Layer 1 — Unit
133 # ═══════════════════════════════════════════════════════════════════════════════
134
135
136 class TestUnitGC:
137 def test_gcresult_defaults(self) -> None:
138 r = GCResult(repo_id="repo-abc")
139 assert r.commits_deleted == 0
140 assert r.snapshots_deleted == 0
141 assert r.reachable_commit_count == 0
142 assert r.errors == []
143
144 def test_gcresult_is_dataclass(self) -> None:
145 import dataclasses
146 assert dataclasses.is_dataclass(GCResult)
147
148 def test_gcresult_with_values(self) -> None:
149 r = GCResult(
150 repo_id="abc",
151 commits_deleted=5,
152 snapshots_deleted=3,
153 reachable_commit_count=10,
154 )
155 assert r.commits_deleted == 5
156 assert r.snapshots_deleted == 3
157 assert r.reachable_commit_count == 10
158
159 def test_gcresult_errors_is_list(self) -> None:
160 r = GCResult(repo_id="x")
161 r.errors.append("something failed")
162 assert len(r.errors) == 1
163
164 def test_bfs_reachability_logic(self) -> None:
165 """Verify BFS logic in isolation using the same algorithm as run_gc."""
166 # Simulate a simple commit graph:
167 # head → c2 → c1 → root
168 # ↑
169 # orphan (not reachable from head)
170 all_commits = {
171 "head": ["c2"],
172 "c2": ["c1"],
173 "c1": ["root"],
174 "root": [],
175 "orphan": ["root"], # orphan points to root but no branch points to it
176 }
177 heads = ["head"]
178
179 reachable: set[str] = set()
180 queue = list(heads)
181 while queue:
182 cid = queue.pop()
183 if cid in reachable or cid not in all_commits:
184 continue
185 reachable.add(cid)
186 queue.extend(all_commits[cid])
187
188 assert "head" in reachable
189 assert "c2" in reachable
190 assert "c1" in reachable
191 assert "root" in reachable
192 assert "orphan" not in reachable
193
194 def test_bfs_handles_merge_commits(self) -> None:
195 """Merge commits have two parents — BFS must traverse both."""
196 all_commits = {
197 "merge": ["left", "right"],
198 "left": ["base"],
199 "right": ["base"],
200 "base": [],
201 }
202 heads = ["merge"]
203
204 reachable: set[str] = set()
205 queue = list(heads)
206 while queue:
207 cid = queue.pop()
208 if cid in reachable or cid not in all_commits:
209 continue
210 reachable.add(cid)
211 queue.extend(all_commits[cid])
212
213 assert reachable == {"merge", "left", "right", "base"}
214
215 def test_bfs_handles_cycle_guard(self) -> None:
216 """Circular parent references must not infinite-loop (already-visited guard)."""
217 all_commits = {"a": ["b"], "b": ["a"]}
218 heads = ["a"]
219
220 reachable: set[str] = set()
221 queue = list(heads)
222 while queue:
223 cid = queue.pop()
224 if cid in reachable or cid not in all_commits:
225 continue
226 reachable.add(cid)
227 queue.extend(all_commits[cid])
228
229 assert reachable == {"a", "b"}
230
231
232 # ═══════════════════════════════════════════════════════════════════════════════
233 # Layer 2 — Integration
234 # ═══════════════════════════════════════════════════════════════════════════════
235
236
237 class TestIntegrationGC:
238 async def test_gc_clean_repo_no_deletes(self, db_session: AsyncSession) -> None:
239 repo = await _db_repo(db_session)
240 c1 = await _db_commit(db_session, repo.repo_id)
241 await _db_branch(db_session, repo.repo_id, head_commit_id=c1.commit_id)
242 await db_session.commit()
243
244 result = await run_gc(db_session, repo.repo_id)
245 assert result.commits_deleted == 0
246 assert result.snapshots_deleted == 0
247 assert result.reachable_commit_count == 1
248
249 async def test_gc_no_branch_heads_skips(self, db_session: AsyncSession) -> None:
250 repo = await _db_repo(db_session)
251 # Branch with no head_commit_id
252 await _db_branch(db_session, repo.repo_id, head_commit_id=None)
253 await _db_commit(db_session, repo.repo_id)
254 await db_session.commit()
255
256 result = await run_gc(db_session, repo.repo_id)
257 # No heads → GC skips immediately; nothing deleted
258 assert result.commits_deleted == 0
259 assert result.reachable_commit_count == 0
260
261 async def test_gc_deletes_orphaned_commit(self, db_session: AsyncSession) -> None:
262 repo = await _db_repo(db_session)
263 live = await _db_commit(db_session, repo.repo_id)
264 orphan = await _db_commit(db_session, repo.repo_id)
265 await _db_branch(db_session, repo.repo_id, head_commit_id=live.commit_id)
266 await db_session.commit()
267
268 result = await run_gc(db_session, repo.repo_id)
269 assert result.commits_deleted == 1
270 assert result.reachable_commit_count == 1
271
272 # Verify orphan is gone
273 row = await db_session.get(MusehubCommit, orphan.commit_id)
274 assert row is None
275
276 async def test_gc_preserves_reachable_chain(self, db_session: AsyncSession) -> None:
277 repo = await _db_repo(db_session)
278 root_cid = _cid()
279 mid_cid = _cid()
280 head_cid = _cid()
281 root = await _db_commit(db_session, repo.repo_id, commit_id=root_cid)
282 mid = await _db_commit(
283 db_session, repo.repo_id, commit_id=mid_cid, parent_ids=[root_cid]
284 )
285 head = await _db_commit(
286 db_session, repo.repo_id, commit_id=head_cid, parent_ids=[mid_cid]
287 )
288 await _db_branch(db_session, repo.repo_id, head_commit_id=head_cid)
289 await db_session.commit()
290
291 result = await run_gc(db_session, repo.repo_id)
292 assert result.commits_deleted == 0
293 assert result.reachable_commit_count == 3
294
295 for cid in [root_cid, mid_cid, head_cid]:
296 row = await db_session.get(MusehubCommit, cid)
297 assert row is not None
298
299 async def test_gc_deletes_orphaned_snapshot(self, db_session: AsyncSession) -> None:
300 repo = await _db_repo(db_session)
301 snap = await _db_snapshot(db_session, repo.repo_id)
302 live = await _db_commit(db_session, repo.repo_id)
303 orphan = await _db_commit(
304 db_session, repo.repo_id, snapshot_id=snap.snapshot_id
305 )
306 await _db_branch(db_session, repo.repo_id, head_commit_id=live.commit_id)
307 await db_session.commit()
308
309 result = await run_gc(db_session, repo.repo_id)
310 assert result.commits_deleted == 1
311 assert result.snapshots_deleted == 1
312
313 snap_row = await db_session.get(MusehubSnapshot, snap.snapshot_id)
314 assert snap_row is None
315
316 async def test_gc_preserves_snapshot_referenced_by_live_commit(
317 self, db_session: AsyncSession
318 ) -> None:
319 repo = await _db_repo(db_session)
320 shared_snap = await _db_snapshot(db_session, repo.repo_id)
321 # Both reachable and orphan point to same snapshot
322 live_cid = _cid()
323 orphan_cid = _cid()
324 await _db_commit(
325 db_session, repo.repo_id,
326 commit_id=live_cid, snapshot_id=shared_snap.snapshot_id
327 )
328 await _db_commit(
329 db_session, repo.repo_id,
330 commit_id=orphan_cid, snapshot_id=shared_snap.snapshot_id
331 )
332 await _db_branch(db_session, repo.repo_id, head_commit_id=live_cid)
333 await db_session.commit()
334
335 result = await run_gc(db_session, repo.repo_id)
336 assert result.commits_deleted == 1 # orphan commit removed
337 assert result.snapshots_deleted == 0 # snapshot still used by live commit
338
339 snap_row = await db_session.get(MusehubSnapshot, shared_snap.snapshot_id)
340 assert snap_row is not None
341
342 async def test_gc_multiple_branches_union_of_reachable(
343 self, db_session: AsyncSession
344 ) -> None:
345 repo = await _db_repo(db_session)
346 c1 = await _db_commit(db_session, repo.repo_id)
347 c2 = await _db_commit(db_session, repo.repo_id)
348 await _db_branch(db_session, repo.repo_id, name="dev", head_commit_id=c1.commit_id)
349 await _db_branch(db_session, repo.repo_id, name="main", head_commit_id=c2.commit_id)
350 await db_session.commit()
351
352 result = await run_gc(db_session, repo.repo_id)
353 assert result.commits_deleted == 0
354 assert result.reachable_commit_count == 2
355
356 async def test_gc_returns_gcresult(self, db_session: AsyncSession) -> None:
357 repo = await _db_repo(db_session)
358 c = await _db_commit(db_session, repo.repo_id)
359 await _db_branch(db_session, repo.repo_id, head_commit_id=c.commit_id)
360 await db_session.commit()
361
362 result = await run_gc(db_session, repo.repo_id)
363 assert isinstance(result, GCResult)
364 assert result.repo_id == repo.repo_id
365
366 async def test_gc_only_affects_target_repo(self, db_session: AsyncSession) -> None:
367 repo1 = await _db_repo(db_session)
368 repo2 = await _db_repo(db_session)
369 live = await _db_commit(db_session, repo1.repo_id)
370 r2_orphan = await _db_commit(db_session, repo2.repo_id)
371 await _db_branch(db_session, repo1.repo_id, head_commit_id=live.commit_id)
372 await _db_branch(db_session, repo2.repo_id, head_commit_id=None)
373 await db_session.commit()
374
375 # Run GC on repo1 only
376 result = await run_gc(db_session, repo1.repo_id)
377 assert result.commits_deleted == 0
378
379 # repo2's commit must still exist
380 row = await db_session.get(MusehubCommit, r2_orphan.commit_id)
381 assert row is not None
382
383
384 # ═══════════════════════════════════════════════════════════════════════════════
385 # Layer 3 — End-to-End
386 # ═══════════════════════════════════════════════════════════════════════════════
387
388
389 class TestE2EGC:
390 """GC has no direct HTTP endpoint; _run_gc_async is fire-and-forget after push.
391 We test GC end-to-end by calling run_gc directly after setting up realistic
392 repo state and verifying full database consistency.
393 """
394
395 async def test_e2e_gc_linear_history(self, db_session: AsyncSession) -> None:
396 """Full pipeline: 5-commit linear chain, 2 orphans, run GC, verify state."""
397 repo = await _db_repo(db_session)
398 ids = [_cid() for _ in range(7)]
399 # Chain: 0←1←2←3←4 (reachable); 5, 6 (orphans)
400 for i, cid in enumerate(ids[:5]):
401 parents = [ids[i - 1]] if i > 0 else []
402 await _db_commit(
403 db_session, repo.repo_id, commit_id=cid, parent_ids=parents
404 )
405 for cid in ids[5:]:
406 await _db_commit(db_session, repo.repo_id, commit_id=cid)
407
408 await _db_branch(db_session, repo.repo_id, head_commit_id=ids[4])
409 await db_session.commit()
410
411 result = await run_gc(db_session, repo.repo_id)
412
413 assert result.reachable_commit_count == 5
414 assert result.commits_deleted == 2
415
416 for cid in ids[:5]:
417 assert await db_session.get(MusehubCommit, cid) is not None
418 for cid in ids[5:]:
419 assert await db_session.get(MusehubCommit, cid) is None
420
421 async def test_e2e_gc_empty_repo(self, db_session: AsyncSession) -> None:
422 repo = await _db_repo(db_session)
423 await db_session.commit()
424
425 result = await run_gc(db_session, repo.repo_id)
426 assert result.commits_deleted == 0
427 assert result.snapshots_deleted == 0
428
429 async def test_e2e_gc_snapshot_lifecycle(self, db_session: AsyncSession) -> None:
430 repo = await _db_repo(db_session)
431 live_snap = await _db_snapshot(db_session, repo.repo_id)
432 dead_snap = await _db_snapshot(db_session, repo.repo_id)
433
434 live_cid = _cid()
435 dead_cid = _cid()
436 await _db_commit(
437 db_session, repo.repo_id, commit_id=live_cid, snapshot_id=live_snap.snapshot_id
438 )
439 await _db_commit(
440 db_session, repo.repo_id, commit_id=dead_cid, snapshot_id=dead_snap.snapshot_id
441 )
442 await _db_branch(db_session, repo.repo_id, head_commit_id=live_cid)
443 await db_session.commit()
444
445 result = await run_gc(db_session, repo.repo_id)
446 assert result.commits_deleted == 1
447 assert result.snapshots_deleted == 1
448
449 assert await db_session.get(MusehubSnapshot, live_snap.snapshot_id) is not None
450 assert await db_session.get(MusehubSnapshot, dead_snap.snapshot_id) is None
451
452
453 # ═══════════════════════════════════════════════════════════════════════════════
454 # Layer 4 — Stress
455 # ═══════════════════════════════════════════════════════════════════════════════
456
457
458 class TestStressGC:
459 async def test_gc_large_orphan_set(self, db_session: AsyncSession) -> None:
460 """GC must handle a repo with 200 orphaned commits."""
461 repo = await _db_repo(db_session)
462 live = await _db_commit(db_session, repo.repo_id)
463 await _db_branch(db_session, repo.repo_id, head_commit_id=live.commit_id)
464
465 for _ in range(200):
466 await _db_commit(db_session, repo.repo_id)
467
468 await db_session.commit()
469
470 result = await run_gc(db_session, repo.repo_id)
471 assert result.commits_deleted == 200
472 assert result.reachable_commit_count == 1
473
474 async def test_gc_deep_chain(self, db_session: AsyncSession) -> None:
475 """GC must traverse a 100-commit linear chain without stack overflow."""
476 repo = await _db_repo(db_session)
477 ids = [_cid() for _ in range(100)]
478 for i, cid in enumerate(ids):
479 parents = [ids[i - 1]] if i > 0 else []
480 await _db_commit(
481 db_session, repo.repo_id, commit_id=cid, parent_ids=parents
482 )
483 await _db_branch(db_session, repo.repo_id, head_commit_id=ids[-1])
484 await db_session.commit()
485
486 result = await run_gc(db_session, repo.repo_id)
487 assert result.commits_deleted == 0
488 assert result.reachable_commit_count == 100
489
490 async def test_gc_many_orphaned_snapshots(self, db_session: AsyncSession) -> None:
491 repo = await _db_repo(db_session)
492 live = await _db_commit(db_session, repo.repo_id)
493 await _db_branch(db_session, repo.repo_id, head_commit_id=live.commit_id)
494
495 for _ in range(50):
496 snap = await _db_snapshot(db_session, repo.repo_id)
497 await _db_commit(db_session, repo.repo_id, snapshot_id=snap.snapshot_id)
498
499 await db_session.commit()
500
501 result = await run_gc(db_session, repo.repo_id)
502 assert result.commits_deleted == 50
503 assert result.snapshots_deleted == 50
504
505 async def test_gc_idempotent_on_clean_repo(self, db_session: AsyncSession) -> None:
506 """Running GC twice on an already-clean repo must be a no-op both times."""
507 repo = await _db_repo(db_session)
508 c = await _db_commit(db_session, repo.repo_id)
509 await _db_branch(db_session, repo.repo_id, head_commit_id=c.commit_id)
510 await db_session.commit()
511
512 r1 = await run_gc(db_session, repo.repo_id)
513 r2 = await run_gc(db_session, repo.repo_id)
514
515 assert r1.commits_deleted == 0
516 assert r2.commits_deleted == 0
517
518
519 # ═══════════════════════════════════════════════════════════════════════════════
520 # Layer 5 — Data Integrity
521 # ═══════════════════════════════════════════════════════════════════════════════
522
523
524 class TestDataIntegrityGC:
525 async def test_gc_does_not_delete_head_commit(self, db_session: AsyncSession) -> None:
526 repo = await _db_repo(db_session)
527 head = await _db_commit(db_session, repo.repo_id)
528 await _db_branch(db_session, repo.repo_id, head_commit_id=head.commit_id)
529 await db_session.commit()
530
531 await run_gc(db_session, repo.repo_id)
532 row = await db_session.get(MusehubCommit, head.commit_id)
533 assert row is not None
534
535 async def test_gc_does_not_delete_live_snapshot(self, db_session: AsyncSession) -> None:
536 repo = await _db_repo(db_session)
537 snap = await _db_snapshot(db_session, repo.repo_id)
538 c = await _db_commit(db_session, repo.repo_id, snapshot_id=snap.snapshot_id)
539 await _db_branch(db_session, repo.repo_id, head_commit_id=c.commit_id)
540 await db_session.commit()
541
542 await run_gc(db_session, repo.repo_id)
543 snap_row = await db_session.get(MusehubSnapshot, snap.snapshot_id)
544 assert snap_row is not None
545
546 async def test_gc_counts_match_actual_deletes(self, db_session: AsyncSession) -> None:
547 repo = await _db_repo(db_session)
548 live = await _db_commit(db_session, repo.repo_id)
549 snaps = [await _db_snapshot(db_session, repo.repo_id) for _ in range(3)]
550 for snap in snaps:
551 await _db_commit(db_session, repo.repo_id, snapshot_id=snap.snapshot_id)
552 await _db_branch(db_session, repo.repo_id, head_commit_id=live.commit_id)
553 await db_session.commit()
554
555 result = await run_gc(db_session, repo.repo_id)
556 assert result.commits_deleted == 3
557 assert result.snapshots_deleted == 3
558
559 # Verify actual DB state matches reported counts
560 remaining_commits = await db_session.execute(
561 select(MusehubCommit)
562 .join(MusehubCommitRef, MusehubCommitRef.commit_id == MusehubCommit.commit_id)
563 .where(MusehubCommitRef.repo_id == repo.repo_id)
564 )
565 assert len(remaining_commits.scalars().all()) == 1
566
567 remaining_snaps = await db_session.execute(
568 select(MusehubSnapshot)
569 .join(MusehubSnapshotRef, MusehubSnapshotRef.snapshot_id == MusehubSnapshot.snapshot_id)
570 .where(MusehubSnapshotRef.repo_id == repo.repo_id)
571 )
572 assert len(remaining_snaps.scalars().all()) == 0
573
574 async def test_gc_merge_commit_both_parents_preserved(
575 self, db_session: AsyncSession
576 ) -> None:
577 repo = await _db_repo(db_session)
578 base_cid = _cid()
579 left_cid = _cid()
580 right_cid = _cid()
581 merge_cid = _cid()
582
583 await _db_commit(db_session, repo.repo_id, commit_id=base_cid)
584 await _db_commit(
585 db_session, repo.repo_id, commit_id=left_cid, parent_ids=[base_cid]
586 )
587 await _db_commit(
588 db_session, repo.repo_id, commit_id=right_cid, parent_ids=[base_cid]
589 )
590 await _db_commit(
591 db_session, repo.repo_id,
592 commit_id=merge_cid, parent_ids=[left_cid, right_cid]
593 )
594 await _db_branch(db_session, repo.repo_id, head_commit_id=merge_cid)
595 await db_session.commit()
596
597 result = await run_gc(db_session, repo.repo_id)
598 assert result.commits_deleted == 0
599 assert result.reachable_commit_count == 4
600
601 for cid in [base_cid, left_cid, right_cid, merge_cid]:
602 assert await db_session.get(MusehubCommit, cid) is not None
603
604 async def test_gc_commit_with_no_snapshot_skips_snapshot_delete(
605 self, db_session: AsyncSession
606 ) -> None:
607 repo = await _db_repo(db_session)
608 live = await _db_commit(db_session, repo.repo_id)
609 # orphan has no snapshot
610 await _db_commit(db_session, repo.repo_id, snapshot_id=None)
611 await _db_branch(db_session, repo.repo_id, head_commit_id=live.commit_id)
612 await db_session.commit()
613
614 result = await run_gc(db_session, repo.repo_id)
615 assert result.commits_deleted == 1
616 assert result.snapshots_deleted == 0
617
618
619 # ═══════════════════════════════════════════════════════════════════════════════
620 # Layer 6 — Security
621 # ═══════════════════════════════════════════════════════════════════════════════
622
623
624 class TestSecurityGC:
625 async def test_gc_does_not_cross_repo_boundaries(self, db_session: AsyncSession) -> None:
626 """GC for repo1 must never delete commits belonging to repo2."""
627 repo1 = await _db_repo(db_session)
628 repo2 = await _db_repo(db_session)
629
630 # repo2 has a commit not referenced by any branch
631 r2_commit = await _db_commit(db_session, repo2.repo_id)
632
633 # repo1 has a live commit
634 r1_live = await _db_commit(db_session, repo1.repo_id)
635 await _db_branch(db_session, repo1.repo_id, head_commit_id=r1_live.commit_id)
636 await _db_branch(db_session, repo2.repo_id, head_commit_id=None)
637 await db_session.commit()
638
639 await run_gc(db_session, repo1.repo_id)
640
641 # repo2's commit must still be there
642 row = await db_session.get(MusehubCommit, r2_commit.commit_id)
643 assert row is not None
644
645 async def test_gc_nonexistent_repo_returns_empty_result(
646 self, db_session: AsyncSession
647 ) -> None:
648 """Calling GC on a non-existent repo_id must not raise and return empty result."""
649 result = await run_gc(db_session, "nonexistent-repo-id")
650 assert result.commits_deleted == 0
651 assert result.reachable_commit_count == 0
652
653 async def test_gc_with_unknown_parent_ids_does_not_crash(
654 self, db_session: AsyncSession
655 ) -> None:
656 """Commits that reference parent IDs not in the DB (dangling refs) are handled."""
657 repo = await _db_repo(db_session)
658 head_cid = _cid()
659 # parent_ids references a commit that doesn't exist in DB
660 await _db_commit(
661 db_session, repo.repo_id,
662 commit_id=head_cid, parent_ids=["phantom-commit-id-not-in-db"]
663 )
664 await _db_branch(db_session, repo.repo_id, head_commit_id=head_cid)
665 await db_session.commit()
666
667 # BFS encounters unknown parent, skips it — must not raise
668 result = await run_gc(db_session, repo.repo_id)
669 assert result.commits_deleted == 0
670 assert result.reachable_commit_count == 1
671
672
673 # ═══════════════════════════════════════════════════════════════════════════════
674 # Layer 7 — Performance
675 # ═══════════════════════════════════════════════════════════════════════════════
676
677
678 class TestPerformanceGC:
679 async def test_gc_completes_quickly_small_repo(self, db_session: AsyncSession) -> None:
680 import time
681
682 repo = await _db_repo(db_session)
683 ids = [_cid() for _ in range(20)]
684 for i, cid in enumerate(ids):
685 parents = [ids[i - 1]] if i > 0 else []
686 await _db_commit(db_session, repo.repo_id, commit_id=cid, parent_ids=parents)
687 await _db_branch(db_session, repo.repo_id, head_commit_id=ids[-1])
688 await db_session.commit()
689
690 start = time.perf_counter()
691 result = await run_gc(db_session, repo.repo_id)
692 elapsed = time.perf_counter() - start
693
694 assert result.commits_deleted == 0
695 assert elapsed < 1.0
696
697 async def test_gc_with_mixed_load(self, db_session: AsyncSession) -> None:
698 import time
699
700 repo = await _db_repo(db_session)
701 live_ids = [_cid() for _ in range(30)]
702 for i, cid in enumerate(live_ids):
703 parents = [live_ids[i - 1]] if i > 0 else []
704 await _db_commit(db_session, repo.repo_id, commit_id=cid, parent_ids=parents)
705
706 for _ in range(50):
707 snap = await _db_snapshot(db_session, repo.repo_id)
708 await _db_commit(db_session, repo.repo_id, snapshot_id=snap.snapshot_id)
709
710 await _db_branch(db_session, repo.repo_id, head_commit_id=live_ids[-1])
711 await db_session.commit()
712
713 start = time.perf_counter()
714 result = await run_gc(db_session, repo.repo_id)
715 elapsed = time.perf_counter() - start
716
717 assert result.commits_deleted == 50
718 assert result.snapshots_deleted == 50
719 assert elapsed < 2.0
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 1 day ago