count_compressed.py
python
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2
feat: add repair-commit wire endpoint (API parity with repa…
Opus 4.8
minor
⚠ breaking
1 day ago
| 1 | """Count how many objects in R2 are still zlib-compressed. |
| 2 | |
| 3 | Prints a progress line every 100 objects and a final summary. |
| 4 | Run: docker exec musehub-blue python3 /app/deploy/count_compressed.py |
| 5 | """ |
| 6 | import sys |
| 7 | |
| 8 | import sqlalchemy as sa |
| 9 | from sqlalchemy import create_engine |
| 10 | from sqlalchemy.orm import Session |
| 11 | |
| 12 | from musehub.config import settings |
| 13 | from musehub.db.musehub_repo_models import MusehubObject |
| 14 | from musehub.storage import get_backend |
| 15 | |
| 16 | ZLIB_MAGIC = (b"\x78\x01", b"\x78\x9c", b"\x78\xda", b"\x78\x5e") |
| 17 | |
| 18 | |
| 19 | def check_header(backend, oid: str) -> bool: |
| 20 | """Return True if the object starts with a zlib magic header.""" |
| 21 | client = backend._get_client() |
| 22 | key = backend._key(oid) |
| 23 | try: |
| 24 | resp = client.get_object(Bucket=backend._bucket, Key=key, Range="bytes=0-1") |
| 25 | header = resp["Body"].read(2) |
| 26 | return header in ZLIB_MAGIC |
| 27 | except Exception as e: |
| 28 | print(f" ERROR {oid[:20]}: {e}", flush=True) |
| 29 | return False |
| 30 | |
| 31 | |
| 32 | def main() -> None: |
| 33 | # Sync engine — no asyncio, no threads, no surprises. |
| 34 | sync_url = settings.database_url.replace("+asyncpg", "").replace("+aiosqlite", "") |
| 35 | engine = create_engine(sync_url) |
| 36 | backend = get_backend() |
| 37 | |
| 38 | with Session(engine) as session: |
| 39 | rows = session.execute( |
| 40 | sa.select(MusehubObject.object_id) |
| 41 | .where( |
| 42 | MusehubObject.storage_uri.like("s3://%"), |
| 43 | MusehubObject.deleted_at.is_(None), |
| 44 | ) |
| 45 | .order_by(MusehubObject.object_id) |
| 46 | ).scalars().all() |
| 47 | |
| 48 | total = len(rows) |
| 49 | print(f"Total objects: {total}", flush=True) |
| 50 | |
| 51 | compressed = 0 |
| 52 | plain = 0 |
| 53 | |
| 54 | for i, oid in enumerate(rows, 1): |
| 55 | if check_header(backend, oid): |
| 56 | compressed += 1 |
| 57 | else: |
| 58 | plain += 1 |
| 59 | |
| 60 | if i % 100 == 0 or i == total: |
| 61 | print(f" [{i}/{total}] plain={plain} compressed={compressed}", flush=True) |
| 62 | |
| 63 | print(f"\nDone. plain={plain} compressed={compressed}", flush=True) |
| 64 | sys.exit(1 if compressed else 0) |
| 65 | |
| 66 | |
| 67 | if __name__ == "__main__": |
| 68 | main() |
File History
1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2
feat: add repair-commit wire endpoint (API parity with repa…
Opus 4.8
minor
⚠
1 day ago