#!/usr/bin/env python3 """One-time backfill: re-index stale coarse-op symbol history entries by reading the original structured_delta from each commit's commit_meta. Background ---------- Before migration 0018, the indexer collapsed all DomainOp types to four coarse values: insert→add, replace/patch→modify, directory_rename→rename. 'move' and 'delete' were already stored verbatim. This script corrects every 'add', 'modify', and 'rename' entry by looking up the raw op type and full payload from the structured_delta stored in the commit record. Entries where no matching address is found in the delta are left untouched. Usage ----- # Dry-run: count rows that would be changed (no writes) docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py --dry-run # Run for all repos docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py # Run for a single repo docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py --repo-id # Quiet (no progress output) docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py -q """ from __future__ import annotations import argparse import asyncio import sys import time from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker sys.path.insert(0, "/app") sys.path.insert(0, "/tmp/devpkgs") from musehub.db.database import get_database_url from musehub.services.musehub_symbol_indexer import backfill_raw_ops_from_commits async def run(dry_run: bool, quiet: bool, repo_id: str | None) -> int: engine = create_async_engine(get_database_url(), echo=False) Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) async with Session() as session: if not quiet: scope = f"repo {repo_id}" if repo_id else "all repos" mode = "[DRY RUN] " if dry_run else "" print(f"{mode}Re-indexing coarse-op entries for {scope} …") t0 = time.monotonic() count = await backfill_raw_ops_from_commits(session, repo_id=repo_id, dry_run=dry_run) if not dry_run: await session.commit() elapsed = time.monotonic() - t0 if dry_run: print(f"Would update {count} entr{'y' if count == 1 else 'ies'} ({elapsed:.1f}s)") else: print(f"Updated {count} entr{'y' if count == 1 else 'ies'} ({elapsed:.1f}s)") await engine.dispose() return count def main() -> None: p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) p.add_argument("--dry-run", action="store_true", help="Count rows without writing") p.add_argument("--repo-id", metavar="REPO_ID", help="Limit to a single repo") p.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output") args = p.parse_args() count = asyncio.run(run(dry_run=args.dry_run, quiet=args.quiet, repo_id=args.repo_id)) sys.exit(0 if count >= 0 else 1) if __name__ == "__main__": main()