"""Backfill symbol history entries inferred from snapshot diffs. For every commit whose files were never indexed via structured_delta, this script diffs adjacent snapshot manifests and creates history rows with op in (insert, replace, delete, move). Usage: docker exec musehub python3 /app/deploy/backfill_history_from_snapshots.py --dry-run docker exec musehub python3 /app/deploy/backfill_history_from_snapshots.py docker exec musehub python3 /app/deploy/backfill_history_from_snapshots.py --repo-id """ from __future__ import annotations import argparse import asyncio import sys import time from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from musehub.db.database import get_database_url from musehub.services.musehub_symbol_indexer import backfill_history_from_snapshots async def run(repo_id: str | None, dry_run: bool) -> None: engine = create_async_engine(get_database_url(), echo=False) async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) async with async_session() as session: t0 = time.monotonic() count = await backfill_history_from_snapshots( session, repo_id=repo_id, dry_run=dry_run ) if not dry_run: await session.commit() elapsed = time.monotonic() - t0 verb = "Would create" if dry_run else "Created" scope = f" for repo {repo_id}" if repo_id else " across all repos" print(f"{verb} {count} history entries{scope} in {elapsed:.1f}s") def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--dry-run", action="store_true") parser.add_argument("--repo-id", default=None) parser.add_argument("-q", "--quiet", action="store_true") args = parser.parse_args() if args.quiet: import logging logging.disable(logging.CRITICAL) asyncio.run(run(args.repo_id, args.dry_run)) if __name__ == "__main__": main()