gabriel / musehub public

test_mpack_index_stale_cleanup.py file-level

at sha256:9 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:9 Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: Assignee sigil… · gabriel · Jun 7, 2026
1 """TDD — stale musehub_mpack_index entries must be cleaned up.
2
3 Two kinds of stale entries:
4 A. mpack_id points to a mpack no longer in MinIO (GC deleted it)
5 B. entity_id has no musehub_object_refs (GC deleted the repo membership)
6
7 Stale entries are rows where mpack_id points to a mpack no longer in MinIO.
8 They were left behind when mpack.gc deleted old mpacks but didn't prune the
9 index. They're harmless for correctness (objects fall back to s3:// URIs)
10 but accumulate forever without cleanup.
11
12 Tests:
13 SC-1 purge_stale_mpack_index_entries removes entries for missing mpacks
14 SC-2 purge leaves entries alone when mpack exists in storage
15 SC-3 purge returns accurate counts of removed vs kept rows
16 """
17 from __future__ import annotations
18
19 import datetime
20 import pytest
21 from sqlalchemy.ext.asyncio import AsyncSession
22 from sqlalchemy import select
23
24 from muse.core.types import fake_id
25 from musehub.db.musehub_repo_models import MusehubMPackIndex
26
27
28 # ---------------------------------------------------------------------------
29 # SC-1 removes stale entries
30 # ---------------------------------------------------------------------------
31
32 @pytest.mark.asyncio
33 async def test_SC1_purge_removes_stale_entries(db_session: AsyncSession) -> None:
34 """Entries pointing to non-existent mpacks are deleted."""
35 from musehub.services.musehub_wire_push import purge_stale_mpack_index_entries
36 from musehub.storage.backends import get_backend
37 from unittest.mock import AsyncMock, MagicMock, patch
38
39 dead_mpack_id = fake_id("dead-mpack")
40 oid = fake_id("obj-in-dead-mpack")
41 now = datetime.datetime.now(tz=datetime.timezone.utc)
42
43 db_session.add(MusehubMPackIndex(
44 entity_id=oid, mpack_id=dead_mpack_id, entity_type="object", created_at=now,
45 ))
46 await db_session.flush()
47
48 # Backend says the mpack doesn't exist
49 mock_backend = MagicMock()
50 mock_backend.exists_mpack = AsyncMock(return_value=False)
51
52 with patch("musehub.storage.backends.get_backend", return_value=mock_backend):
53 result = await purge_stale_mpack_index_entries(db_session)
54
55 await db_session.flush()
56 remaining = (await db_session.execute(
57 select(MusehubMPackIndex).where(
58 MusehubMPackIndex.mpack_id == dead_mpack_id
59 )
60 )).scalars().all()
61
62 assert not remaining, "Stale index entries must be deleted"
63 assert result["removed"] >= 1
64
65
66 # ---------------------------------------------------------------------------
67 # SC-2 keeps entries for live mpacks
68 # ---------------------------------------------------------------------------
69
70 @pytest.mark.asyncio
71 async def test_SC2_purge_keeps_live_entries(db_session: AsyncSession) -> None:
72 """Entries for mpacks that still exist in storage are preserved."""
73 from musehub.services.musehub_wire_push import purge_stale_mpack_index_entries
74 from unittest.mock import AsyncMock, MagicMock, patch
75
76 live_mpack_id = fake_id("live-mpack")
77 oid = fake_id("obj-in-live-mpack")
78 now = datetime.datetime.now(tz=datetime.timezone.utc)
79
80 db_session.add(MusehubMPackIndex(
81 entity_id=oid, mpack_id=live_mpack_id, entity_type="object", created_at=now,
82 ))
83 await db_session.flush()
84
85 mock_backend = MagicMock()
86 mock_backend.exists_mpack = AsyncMock(return_value=True)
87
88 with patch("musehub.storage.backends.get_backend", return_value=mock_backend):
89 result = await purge_stale_mpack_index_entries(db_session)
90
91 remaining = (await db_session.execute(
92 select(MusehubMPackIndex).where(
93 MusehubMPackIndex.mpack_id == live_mpack_id
94 )
95 )).scalars().all()
96
97 assert remaining, "Live index entries must be preserved"
98 assert result["kept"] >= 1
99
100
101 # ---------------------------------------------------------------------------
102 # SC-3 accurate counts
103 # ---------------------------------------------------------------------------
104
105 @pytest.mark.asyncio
106 async def test_SC3_purge_returns_accurate_counts(db_session: AsyncSession) -> None:
107 """removed + kept counts reflect actual rows processed."""
108 from musehub.services.musehub_wire_push import purge_stale_mpack_index_entries
109 from unittest.mock import AsyncMock, MagicMock, patch
110
111 now = datetime.datetime.now(tz=datetime.timezone.utc)
112 dead1, dead2 = fake_id("dead1"), fake_id("dead2")
113 live1 = fake_id("live1")
114
115 for mpack_id, oid in [
116 (dead1, fake_id("obj-d1")),
117 (dead2, fake_id("obj-d2")),
118 (live1, fake_id("obj-l1")),
119 ]:
120 db_session.add(MusehubMPackIndex(
121 entity_id=oid, mpack_id=mpack_id, entity_type="object", created_at=now,
122 ))
123 await db_session.flush()
124
125 async def _exists(mpack_id: str) -> bool:
126 return mpack_id == live1
127
128 mock_backend = MagicMock()
129 mock_backend.exists_mpack = AsyncMock(side_effect=_exists)
130
131 with patch("musehub.storage.backends.get_backend", return_value=mock_backend):
132 result = await purge_stale_mpack_index_entries(db_session)
133
134 assert result["removed"] >= 2
135 assert result["kept"] >= 1