"""TDD — stale musehub_mpack_index entries must be cleaned up. Two kinds of stale entries: A. mpack_id points to a mpack no longer in MinIO (GC deleted it) B. entity_id has no musehub_object_refs (GC deleted the repo membership) Stale entries are rows where mpack_id points to a mpack no longer in MinIO. They were left behind when mpack.gc deleted old mpacks but didn't prune the index. They're harmless for correctness (objects fall back to s3:// URIs) but accumulate forever without cleanup. Tests: SC-1 purge_stale_mpack_index_entries removes entries for missing mpacks SC-2 purge leaves entries alone when mpack exists in storage SC-3 purge returns accurate counts of removed vs kept rows """ from __future__ import annotations import datetime import pytest from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from muse.core.types import fake_id from musehub.db.musehub_repo_models import MusehubMPackIndex # --------------------------------------------------------------------------- # SC-1 removes stale entries # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_SC1_purge_removes_stale_entries(db_session: AsyncSession) -> None: """Entries pointing to non-existent mpacks are deleted.""" from musehub.services.musehub_wire_push import purge_stale_mpack_index_entries from musehub.storage.backends import get_backend from unittest.mock import AsyncMock, MagicMock, patch dead_mpack_id = fake_id("dead-mpack") oid = fake_id("obj-in-dead-mpack") now = datetime.datetime.now(tz=datetime.timezone.utc) db_session.add(MusehubMPackIndex( entity_id=oid, mpack_id=dead_mpack_id, entity_type="object", created_at=now, )) await db_session.flush() # Backend says the mpack doesn't exist mock_backend = MagicMock() mock_backend.exists_mpack = AsyncMock(return_value=False) with patch("musehub.storage.backends.get_backend", return_value=mock_backend): result = await purge_stale_mpack_index_entries(db_session) await db_session.flush() remaining = (await db_session.execute( select(MusehubMPackIndex).where( MusehubMPackIndex.mpack_id == dead_mpack_id ) )).scalars().all() assert not remaining, "Stale index entries must be deleted" assert result["removed"] >= 1 # --------------------------------------------------------------------------- # SC-2 keeps entries for live mpacks # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_SC2_purge_keeps_live_entries(db_session: AsyncSession) -> None: """Entries for mpacks that still exist in storage are preserved.""" from musehub.services.musehub_wire_push import purge_stale_mpack_index_entries from unittest.mock import AsyncMock, MagicMock, patch live_mpack_id = fake_id("live-mpack") oid = fake_id("obj-in-live-mpack") now = datetime.datetime.now(tz=datetime.timezone.utc) db_session.add(MusehubMPackIndex( entity_id=oid, mpack_id=live_mpack_id, entity_type="object", created_at=now, )) await db_session.flush() mock_backend = MagicMock() mock_backend.exists_mpack = AsyncMock(return_value=True) with patch("musehub.storage.backends.get_backend", return_value=mock_backend): result = await purge_stale_mpack_index_entries(db_session) remaining = (await db_session.execute( select(MusehubMPackIndex).where( MusehubMPackIndex.mpack_id == live_mpack_id ) )).scalars().all() assert remaining, "Live index entries must be preserved" assert result["kept"] >= 1 # --------------------------------------------------------------------------- # SC-3 accurate counts # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_SC3_purge_returns_accurate_counts(db_session: AsyncSession) -> None: """removed + kept counts reflect actual rows processed.""" from musehub.services.musehub_wire_push import purge_stale_mpack_index_entries from unittest.mock import AsyncMock, MagicMock, patch now = datetime.datetime.now(tz=datetime.timezone.utc) dead1, dead2 = fake_id("dead1"), fake_id("dead2") live1 = fake_id("live1") for mpack_id, oid in [ (dead1, fake_id("obj-d1")), (dead2, fake_id("obj-d2")), (live1, fake_id("obj-l1")), ]: db_session.add(MusehubMPackIndex( entity_id=oid, mpack_id=mpack_id, entity_type="object", created_at=now, )) await db_session.flush() async def _exists(mpack_id: str) -> bool: return mpack_id == live1 mock_backend = MagicMock() mock_backend.exists_mpack = AsyncMock(side_effect=_exists) with patch("musehub.storage.backends.get_backend", return_value=mock_backend): result = await purge_stale_mpack_index_entries(db_session) assert result["removed"] >= 2 assert result["kept"] >= 1