gabriel / musehub public
backfill_raw_ops_from_commits.py python
86 lines 3.0 KB
Raw
sha256:9b711047e27df5ac91681c74aadfb0e31f69ffd4269932ea52f0c113764d8c0a docs(phase-03): rewrite Domain Protocol — AddressedMergePlu… Sonnet 4.6 minor ⚠ breaking 23 days ago
1 #!/usr/bin/env python3
2 """One-time backfill: re-index stale coarse-op symbol history entries by
3 reading the original structured_delta from each commit's commit_meta.
4
5 Background
6 ----------
7 Before migration 0018, the indexer collapsed all DomainOp types to four
8 coarse values: insert→add, replace/patch→modify, directory_rename→rename.
9 'move' and 'delete' were already stored verbatim.
10
11 This script corrects every 'add', 'modify', and 'rename' entry by looking up
12 the raw op type and full payload from the structured_delta stored in the
13 commit record. Entries where no matching address is found in the delta are
14 left untouched.
15
16 Usage
17 -----
18 # Dry-run: count rows that would be changed (no writes)
19 docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py --dry-run
20
21 # Run for all repos
22 docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py
23
24 # Run for a single repo
25 docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py --repo-id <repo_id>
26
27 # Quiet (no progress output)
28 docker exec musehub python3 /app/deploy/backfill_raw_ops_from_commits.py -q
29 """
30 from __future__ import annotations
31
32 import argparse
33 import asyncio
34 import sys
35 import time
36
37 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
38 from sqlalchemy.orm import sessionmaker
39
40 sys.path.insert(0, "/app")
41 sys.path.insert(0, "/tmp/devpkgs")
42
43 from musehub.db.database import get_database_url
44 from musehub.services.musehub_symbol_indexer import backfill_raw_ops_from_commits
45
46
47 async def run(dry_run: bool, quiet: bool, repo_id: str | None) -> int:
48 engine = create_async_engine(get_database_url(), echo=False)
49 Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
50
51 async with Session() as session:
52 if not quiet:
53 scope = f"repo {repo_id}" if repo_id else "all repos"
54 mode = "[DRY RUN] " if dry_run else ""
55 print(f"{mode}Re-indexing coarse-op entries for {scope} …")
56
57 t0 = time.monotonic()
58 count = await backfill_raw_ops_from_commits(session, repo_id=repo_id, dry_run=dry_run)
59
60 if not dry_run:
61 await session.commit()
62
63 elapsed = time.monotonic() - t0
64
65 if dry_run:
66 print(f"Would update {count} entr{'y' if count == 1 else 'ies'} ({elapsed:.1f}s)")
67 else:
68 print(f"Updated {count} entr{'y' if count == 1 else 'ies'} ({elapsed:.1f}s)")
69
70 await engine.dispose()
71 return count
72
73
74 def main() -> None:
75 p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
76 p.add_argument("--dry-run", action="store_true", help="Count rows without writing")
77 p.add_argument("--repo-id", metavar="REPO_ID", help="Limit to a single repo")
78 p.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output")
79 args = p.parse_args()
80
81 count = asyncio.run(run(dry_run=args.dry_run, quiet=args.quiet, repo_id=args.repo_id))
82 sys.exit(0 if count >= 0 else 1)
83
84
85 if __name__ == "__main__":
86 main()
File History 1 commit
sha256:9b711047e27df5ac91681c74aadfb0e31f69ffd4269932ea52f0c113764d8c0a docs(phase-03): rewrite Domain Protocol — AddressedMergePlu… Sonnet 4.6 minor 23 days ago