test_blob_only_object_store.py
python
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa
Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As…
Human
1 day ago
| 1 | """TDD — S3/MinIO is for blobs only. Commits and snapshots live in the DB. |
| 2 | |
| 3 | Architecture: |
| 4 | - Blobs: MinIO mpack:// (byte-range indexed via musehub_mpack_index) |
| 5 | - Commits: DB only (musehub_commits) — no individual S3 writes |
| 6 | - Snapshots: DB only (musehub_snapshots, manifest_blob cache) — no individual S3 writes |
| 7 | |
| 8 | Phase 2/3 added individual S3 writes for commits and snapshots. This was |
| 9 | wrong — commits/snapshots are structured metadata that belongs in the DB. |
| 10 | Writing them to S3 individually creates triple-storage and S3-read overhead |
| 11 | on the hot serving path when the DB already has the data faster. |
| 12 | |
| 13 | Blobs stay in the covering mpack (mpack:// URI). Commits and snapshots have |
| 14 | storage_uri=NULL (not written to S3 individually). |
| 15 | |
| 16 | Tests: |
| 17 | BOS-1 push → commit storage_uri is NULL (no individual S3 write) |
| 18 | BOS-2 push → snapshot storage_uri is NULL (no individual S3 write) |
| 19 | BOS-3 push → blob storage_uri is mpack:// (still correct) |
| 20 | BOS-4 wire_fetch_mpack serves commits from DB, not S3 backend.get() |
| 21 | BOS-5 wire_fetch_mpack serves snapshots from DB, not S3 backend.get() |
| 22 | BOS-6 merge_proposal → merge commit storage_uri is NULL |
| 23 | """ |
| 24 | from __future__ import annotations |
| 25 | |
| 26 | import datetime |
| 27 | import pytest |
| 28 | from sqlalchemy import select |
| 29 | from sqlalchemy.ext.asyncio import AsyncSession |
| 30 | from unittest.mock import AsyncMock, MagicMock, patch, call |
| 31 | |
| 32 | from muse.core.types import fake_id, blob_id |
| 33 | from musehub.core.genesis import compute_identity_id, compute_repo_id |
| 34 | from musehub.db.musehub_repo_models import ( |
| 35 | MusehubBranch, MusehubCommit, MusehubCommitRef, MusehubCommitGraph, |
| 36 | MusehubObject, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef, |
| 37 | ) |
| 38 | |
| 39 | |
| 40 | def _uid() -> str: |
| 41 | import secrets |
| 42 | return secrets.token_hex(6) |
| 43 | |
| 44 | |
| 45 | async def _make_repo(session: AsyncSession, slug: str) -> MusehubRepo: |
| 46 | now = datetime.datetime.now(tz=datetime.timezone.utc) |
| 47 | owner_id = compute_identity_id(b"gabriel") |
| 48 | repo = MusehubRepo( |
| 49 | repo_id=compute_repo_id(owner_id, slug, "code", now.isoformat()), |
| 50 | name=slug, owner="gabriel", slug=slug, visibility="public", |
| 51 | owner_user_id=owner_id, created_at=now, updated_at=now, |
| 52 | ) |
| 53 | session.add(repo) |
| 54 | await session.flush() |
| 55 | return repo |
| 56 | |
| 57 | |
| 58 | def _mock_backend() -> MagicMock: |
| 59 | b = MagicMock() |
| 60 | b.put = AsyncMock(return_value="s3://muse-objects/fake") |
| 61 | b.get = AsyncMock(return_value=None) |
| 62 | b.get_mpack = AsyncMock(return_value=None) |
| 63 | b.put_mpack = AsyncMock(return_value=None) |
| 64 | b.presign_mpack_get = AsyncMock(return_value="https://minio.example.com/mpack?sig=x") |
| 65 | return b |
| 66 | |
| 67 | |
| 68 | # --------------------------------------------------------------------------- |
| 69 | # BOS-1 commit storage_uri is NULL after push |
| 70 | # --------------------------------------------------------------------------- |
| 71 | |
| 72 | @pytest.mark.asyncio |
| 73 | async def test_BOS1_commit_storage_uri_is_null_after_push( |
| 74 | db_session: AsyncSession, |
| 75 | ) -> None: |
| 76 | """Commits must NOT be written to S3 individually — storage_uri stays NULL.""" |
| 77 | from musehub.services.musehub_sync import commit_files_to_repo |
| 78 | |
| 79 | repo = await _make_repo(db_session, f"bos-commit-{_uid()}") |
| 80 | backend = _mock_backend() |
| 81 | |
| 82 | with patch("musehub.storage.backends.get_backend", return_value=backend): |
| 83 | commit_id = await commit_files_to_repo( |
| 84 | db_session, repo_id=repo.repo_id, branch="main", |
| 85 | files={"a.py": b"x=1"}, message="test", author="gabriel", |
| 86 | ) |
| 87 | |
| 88 | commit_row = await db_session.get(MusehubCommit, commit_id) |
| 89 | assert commit_row is not None |
| 90 | |
| 91 | assert commit_row.storage_uri is None, ( |
| 92 | f"Commits must NOT be written to S3. Got storage_uri={commit_row.storage_uri!r}. " |
| 93 | "Commits belong in the DB only — S3 is for blobs." |
| 94 | ) |
| 95 | |
| 96 | # Verify backend.put was NOT called with a commit key |
| 97 | for c in backend.put.call_args_list: |
| 98 | assert commit_id not in str(c), ( |
| 99 | "backend.put must not be called for commit objects" |
| 100 | ) |
| 101 | |
| 102 | |
| 103 | # --------------------------------------------------------------------------- |
| 104 | # BOS-2 snapshot storage_uri is NULL after push |
| 105 | # --------------------------------------------------------------------------- |
| 106 | |
| 107 | @pytest.mark.asyncio |
| 108 | async def test_BOS2_snapshot_storage_uri_is_null_after_push( |
| 109 | db_session: AsyncSession, |
| 110 | ) -> None: |
| 111 | """Snapshots must NOT be written to S3 individually — storage_uri stays NULL.""" |
| 112 | from musehub.services.musehub_sync import commit_files_to_repo |
| 113 | from musehub.db.musehub_repo_models import MusehubCommit as DbCommit |
| 114 | |
| 115 | repo = await _make_repo(db_session, f"bos-snap-{_uid()}") |
| 116 | backend = _mock_backend() |
| 117 | |
| 118 | with patch("musehub.storage.backends.get_backend", return_value=backend): |
| 119 | commit_id = await commit_files_to_repo( |
| 120 | db_session, repo_id=repo.repo_id, branch="main", |
| 121 | files={"b.py": b"y=2"}, message="test", author="gabriel", |
| 122 | ) |
| 123 | |
| 124 | commit_row = await db_session.get(DbCommit, commit_id) |
| 125 | snap_row = await db_session.get(MusehubSnapshot, commit_row.snapshot_id) |
| 126 | assert snap_row is not None |
| 127 | |
| 128 | assert snap_row.storage_uri is None, ( |
| 129 | f"Snapshots must NOT be written to S3. Got storage_uri={snap_row.storage_uri!r}. " |
| 130 | "Snapshots belong in the DB only — S3 is for blobs." |
| 131 | ) |
| 132 | |
| 133 | |
| 134 | # --------------------------------------------------------------------------- |
| 135 | # BOS-3 blob storage_uri is mpack:// after push receive |
| 136 | # --------------------------------------------------------------------------- |
| 137 | |
| 138 | @pytest.mark.asyncio |
| 139 | async def test_BOS3_blob_storage_uri_is_mpack_after_push( |
| 140 | db_session: AsyncSession, |
| 141 | ) -> None: |
| 142 | """Blobs must still get mpack:// storage_uri — byte-range served from MinIO.""" |
| 143 | from muse.core.mpack import build_wire_mpack |
| 144 | from musehub.services.musehub_wire_push import wire_push_unpack_mpack |
| 145 | |
| 146 | repo = await _make_repo(db_session, f"bos-blob-{_uid()}") |
| 147 | content = b"blob content for bos test" |
| 148 | oid = blob_id(content) |
| 149 | wire = build_wire_mpack({ |
| 150 | "blobs": [{"object_id": oid, "content": content}], |
| 151 | "commits": [], "snapshots": [], |
| 152 | }) |
| 153 | mpack_key = blob_id(wire) |
| 154 | |
| 155 | backend = _mock_backend() |
| 156 | backend.get_mpack = AsyncMock(return_value=wire) |
| 157 | |
| 158 | with patch("musehub.storage.backends.get_backend", return_value=backend): |
| 159 | await wire_push_unpack_mpack( |
| 160 | db_session, repo.repo_id, mpack_key, "gabriel", |
| 161 | branch="main", head_commit_id="", commits_count=0, blobs_count=1, |
| 162 | ) |
| 163 | |
| 164 | obj_row = await db_session.get(MusehubObject, oid) |
| 165 | assert obj_row is not None, "Blob must be in musehub_objects" |
| 166 | assert obj_row.storage_uri and obj_row.storage_uri.startswith("mpack://"), ( |
| 167 | f"Blob storage_uri must be mpack://, got {obj_row.storage_uri!r}" |
| 168 | ) |
| 169 | |
| 170 | |
| 171 | # --------------------------------------------------------------------------- |
| 172 | # BOS-4 wire_fetch serves commits from DB, no S3 backend.get() |
| 173 | # --------------------------------------------------------------------------- |
| 174 | |
| 175 | @pytest.mark.asyncio |
| 176 | async def test_BOS4_commits_served_from_db_not_s3( |
| 177 | db_session: AsyncSession, |
| 178 | ) -> None: |
| 179 | """wire_fetch_mpack must serve commits from DB rows, not S3 backend.get().""" |
| 180 | from musehub.services.musehub_wire_fetch import wire_fetch_mpack |
| 181 | |
| 182 | repo = await _make_repo(db_session, f"bos-fetch-commit-{_uid()}") |
| 183 | now = datetime.datetime.now(tz=datetime.timezone.utc) |
| 184 | snap_id = fake_id("snap-bos4") |
| 185 | commit_id = fake_id("commit-bos4") |
| 186 | |
| 187 | db_session.add(MusehubSnapshot( |
| 188 | snapshot_id=snap_id, manifest_blob=b"\x80", entry_count=0, directories=[], |
| 189 | storage_uri=None, |
| 190 | )) |
| 191 | db_session.add(MusehubSnapshotRef(repo_id=repo.repo_id, snapshot_id=snap_id)) |
| 192 | db_session.add(MusehubCommit( |
| 193 | commit_id=commit_id, branch="main", parent_ids=[], message="bos4", |
| 194 | author="gabriel", timestamp=now, snapshot_id=snap_id, storage_uri=None, |
| 195 | )) |
| 196 | db_session.add(MusehubCommitRef(repo_id=repo.repo_id, commit_id=commit_id)) |
| 197 | from musehub.db.musehub_repo_models import MusehubCommitGraph |
| 198 | db_session.add(MusehubCommitGraph( |
| 199 | commit_id=commit_id, parent_ids=[], generation=0, snapshot_id=snap_id, |
| 200 | )) |
| 201 | await db_session.flush() |
| 202 | |
| 203 | backend = _mock_backend() |
| 204 | |
| 205 | with patch("musehub.services.musehub_wire_fetch.get_backend", return_value=backend), \ |
| 206 | patch("musehub.storage.backends.get_backend", return_value=backend): |
| 207 | result = await wire_fetch_mpack( |
| 208 | db_session, repo.repo_id, want=[commit_id], have=[] |
| 209 | ) |
| 210 | |
| 211 | # backend.get must NOT be called with a commit_id — commits come from DB |
| 212 | get_calls = [str(c) for c in backend.get.call_args_list] |
| 213 | commit_s3_reads = [c for c in get_calls if commit_id in c] |
| 214 | assert not commit_s3_reads, ( |
| 215 | "wire_fetch_mpack must NOT call backend.get() for commits. " |
| 216 | f"Got S3 reads: {commit_s3_reads}. Commits are served from DB." |
| 217 | ) |
| 218 | |
| 219 | |
| 220 | # --------------------------------------------------------------------------- |
| 221 | # BOS-6 merge_proposal commit storage_uri is NULL |
| 222 | # --------------------------------------------------------------------------- |
| 223 | |
| 224 | @pytest.mark.asyncio |
| 225 | async def test_BOS6_merge_proposal_commit_storage_uri_is_null( |
| 226 | db_session: AsyncSession, |
| 227 | ) -> None: |
| 228 | """Merge commits created server-side must NOT have individual S3 storage_uri.""" |
| 229 | from musehub.services import musehub_proposals |
| 230 | |
| 231 | repo = await _make_repo(db_session, f"bos-merge-{_uid()}") |
| 232 | now = datetime.datetime.now(tz=datetime.timezone.utc) |
| 233 | snap_id = fake_id("snap-merge-bos") |
| 234 | commit_id = fake_id("commit-merge-bos") |
| 235 | |
| 236 | db_session.add(MusehubSnapshot( |
| 237 | snapshot_id=snap_id, manifest_blob=b"\x80", entry_count=0, directories=[], |
| 238 | )) |
| 239 | db_session.add(MusehubSnapshotRef(repo_id=repo.repo_id, snapshot_id=snap_id)) |
| 240 | db_session.add(MusehubCommit( |
| 241 | commit_id=commit_id, branch="feat", parent_ids=[], message="feat commit", |
| 242 | author="gabriel", timestamp=now, snapshot_id=snap_id, |
| 243 | )) |
| 244 | db_session.add(MusehubCommitRef(repo_id=repo.repo_id, commit_id=commit_id)) |
| 245 | from musehub.core.genesis import compute_branch_id |
| 246 | db_session.add(MusehubBranch( |
| 247 | branch_id=compute_branch_id(repo.repo_id, "feat"), |
| 248 | repo_id=repo.repo_id, name="feat", head_commit_id=commit_id, |
| 249 | )) |
| 250 | await db_session.flush() |
| 251 | |
| 252 | proposal = await musehub_proposals.create_proposal( |
| 253 | db_session, repo_id=repo.repo_id, |
| 254 | title="Merge feat", from_branch="feat", to_branch="main", |
| 255 | ) |
| 256 | |
| 257 | backend = _mock_backend() |
| 258 | with patch("musehub.storage.backends.get_backend", return_value=backend): |
| 259 | merged = await musehub_proposals.merge_proposal( |
| 260 | db_session, repo.repo_id, proposal.proposal_id |
| 261 | ) |
| 262 | |
| 263 | merge_commit = await db_session.get(MusehubCommit, merged.merge_commit_id) |
| 264 | assert merge_commit is not None |
| 265 | |
| 266 | assert merge_commit.storage_uri is None, ( |
| 267 | f"Merge commit must NOT be written to S3. Got storage_uri={merge_commit.storage_uri!r}. " |
| 268 | "Commits belong in DB only." |
| 269 | ) |
File History
1 commit
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa
Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As…
Human
1 day ago