gabriel / musehub public
count_compressed.py python
68 lines 2.0 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 1 day ago
1 """Count how many objects in R2 are still zlib-compressed.
2
3 Prints a progress line every 100 objects and a final summary.
4 Run: docker exec musehub-blue python3 /app/deploy/count_compressed.py
5 """
6 import sys
7
8 import sqlalchemy as sa
9 from sqlalchemy import create_engine
10 from sqlalchemy.orm import Session
11
12 from musehub.config import settings
13 from musehub.db.musehub_repo_models import MusehubObject
14 from musehub.storage import get_backend
15
16 ZLIB_MAGIC = (b"\x78\x01", b"\x78\x9c", b"\x78\xda", b"\x78\x5e")
17
18
19 def check_header(backend, oid: str) -> bool:
20 """Return True if the object starts with a zlib magic header."""
21 client = backend._get_client()
22 key = backend._key(oid)
23 try:
24 resp = client.get_object(Bucket=backend._bucket, Key=key, Range="bytes=0-1")
25 header = resp["Body"].read(2)
26 return header in ZLIB_MAGIC
27 except Exception as e:
28 print(f" ERROR {oid[:20]}: {e}", flush=True)
29 return False
30
31
32 def main() -> None:
33 # Sync engine — no asyncio, no threads, no surprises.
34 sync_url = settings.database_url.replace("+asyncpg", "").replace("+aiosqlite", "")
35 engine = create_engine(sync_url)
36 backend = get_backend()
37
38 with Session(engine) as session:
39 rows = session.execute(
40 sa.select(MusehubObject.object_id)
41 .where(
42 MusehubObject.storage_uri.like("s3://%"),
43 MusehubObject.deleted_at.is_(None),
44 )
45 .order_by(MusehubObject.object_id)
46 ).scalars().all()
47
48 total = len(rows)
49 print(f"Total objects: {total}", flush=True)
50
51 compressed = 0
52 plain = 0
53
54 for i, oid in enumerate(rows, 1):
55 if check_header(backend, oid):
56 compressed += 1
57 else:
58 plain += 1
59
60 if i % 100 == 0 or i == total:
61 print(f" [{i}/{total}] plain={plain} compressed={compressed}", flush=True)
62
63 print(f"\nDone. plain={plain} compressed={compressed}", flush=True)
64 sys.exit(1 if compressed else 0)
65
66
67 if __name__ == "__main__":
68 main()
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 1 day ago