backfill_raw_ops_from_commits.py
python
sha256:9b711047e27df5ac91681c74aadfb0e31f69ffd4269932ea52f0c113764d8c0a
docs(phase-03): rewrite Domain Protocol — AddressedMergePlu…
Sonnet 4.6
minor
⚠ breaking
23 days ago
| 1 | #!/usr/bin/env python3 |
| 2 | """One-time backfill: re-index stale coarse-op symbol history entries by |
| 3 | reading the original structured_delta from each commit's commit_meta. |
| 4 | |
| 5 | Background |
| 6 | ---------- |
| 7 | Before migration 0018, the indexer collapsed all DomainOp types to four |
| 8 | coarse values: insert→add, replace/patch→modify, directory_rename→rename. |
| 9 | 'move' and 'delete' were already stored verbatim. |
| 10 | |
| 11 | This script corrects every 'add', 'modify', and 'rename' entry by looking up |
| 12 | the raw op type and full payload from the structured_delta stored in the |
| 13 | commit record. Entries where no matching address is found in the delta are |
| 14 | left untouched. |
| 15 | |
| 16 | Usage |
| 17 | ----- |
| 18 | # Dry-run: count rows that would be changed (no writes) |
| 19 | docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py --dry-run |
| 20 | |
| 21 | # Run for all repos |
| 22 | docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py |
| 23 | |
| 24 | # Run for a single repo |
| 25 | docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py --repo-id <repo_id> |
| 26 | |
| 27 | # Quiet (no progress output) |
| 28 | docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py -q |
| 29 | """ |
| 30 | from __future__ import annotations |
| 31 | |
| 32 | import argparse |
| 33 | import asyncio |
| 34 | import sys |
| 35 | import time |
| 36 | |
| 37 | from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine |
| 38 | from sqlalchemy.orm import sessionmaker |
| 39 | |
| 40 | sys.path.insert(0, "/app") |
| 41 | sys.path.insert(0, "/tmp/devpkgs") |
| 42 | |
| 43 | from musehub.db.database import get_database_url |
| 44 | from musehub.services.musehub_symbol_indexer import backfill_raw_ops_from_commits |
| 45 | |
| 46 | |
| 47 | async def run(dry_run: bool, quiet: bool, repo_id: str | None) -> int: |
| 48 | engine = create_async_engine(get_database_url(), echo=False) |
| 49 | Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) |
| 50 | |
| 51 | async with Session() as session: |
| 52 | if not quiet: |
| 53 | scope = f"repo {repo_id}" if repo_id else "all repos" |
| 54 | mode = "[DRY RUN] " if dry_run else "" |
| 55 | print(f"{mode}Re-indexing coarse-op entries for {scope} …") |
| 56 | |
| 57 | t0 = time.monotonic() |
| 58 | count = await backfill_raw_ops_from_commits(session, repo_id=repo_id, dry_run=dry_run) |
| 59 | |
| 60 | if not dry_run: |
| 61 | await session.commit() |
| 62 | |
| 63 | elapsed = time.monotonic() - t0 |
| 64 | |
| 65 | if dry_run: |
| 66 | print(f"Would update {count} entr{'y' if count == 1 else 'ies'} ({elapsed:.1f}s)") |
| 67 | else: |
| 68 | print(f"Updated {count} entr{'y' if count == 1 else 'ies'} ({elapsed:.1f}s)") |
| 69 | |
| 70 | await engine.dispose() |
| 71 | return count |
| 72 | |
| 73 | |
| 74 | def main() -> None: |
| 75 | p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
| 76 | p.add_argument("--dry-run", action="store_true", help="Count rows without writing") |
| 77 | p.add_argument("--repo-id", metavar="REPO_ID", help="Limit to a single repo") |
| 78 | p.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output") |
| 79 | args = p.parse_args() |
| 80 | |
| 81 | count = asyncio.run(run(dry_run=args.dry_run, quiet=args.quiet, repo_id=args.repo_id)) |
| 82 | sys.exit(0 if count >= 0 else 1) |
| 83 | |
| 84 | |
| 85 | if __name__ == "__main__": |
| 86 | main() |
File History
1 commit
sha256:9b711047e27df5ac91681c74aadfb0e31f69ffd4269932ea52f0c113764d8c0a
docs(phase-03): rewrite Domain Protocol — AddressedMergePlu…
Sonnet 4.6
minor
⚠
23 days ago