"""One-shot backfill: enqueue intel jobs for specified repos. Usage: python deploy/backfill_intel_jobs.py gabriel/muse gabriel/musehub """ from __future__ import annotations import asyncio import sys import os # Ensure app root is on path when run inside the container sys.path.insert(0, "/app") from sqlalchemy import select, text from musehub.db.database import AsyncSessionLocal, init_db from musehub.db.musehub_repo_models import MusehubRepo, MusehubCommit, MusehubCommitRef from musehub.services.musehub_jobs import enqueue_job async def backfill(slugs: list[str]) -> None: await init_db() async with AsyncSessionLocal() as session: for slug in slugs: owner, name = slug.split("/", 1) repo_id: str | None = (await session.execute( select(MusehubRepo.repo_id).where( MusehubRepo.owner == owner, MusehubRepo.name == name, ).limit(1) )).scalar_one_or_none() if not repo_id: print(f" ✗ {slug} — not found") continue # Get head commit for every branch rows = (await session.execute( select(MusehubCommit.branch, MusehubCommit.commit_id) .join(MusehubCommitRef, MusehubCommitRef.commit_id == MusehubCommit.commit_id) .where(MusehubCommitRef.repo_id == repo_id) .order_by(MusehubCommit.timestamp.desc()) )).all() if not rows: print(f" ✗ {slug} — no commits") continue # Dedupe: keep newest commit per branch seen: set[str] = set() branches: list[tuple[str, str]] = [] for branch, commit_id in rows: if branch and branch not in seen: seen.add(branch) branches.append((branch, commit_id)) for branch, head in branches: payload = {"head": head, "branch": branch} for job_type in ("intel.structural", "intel.code", "push.file_last_commits"): job_id = await enqueue_job(session, repo_id, job_type, payload) status = job_id[:16] + "…" if job_id else "already pending" print(f" ✓ {slug} [{branch}] {job_type} → {status}") await session.commit() print("\nAll jobs committed — worker will pick them up.") if __name__ == "__main__": targets = sys.argv[1:] or ["gabriel/muse", "gabriel/musehub"] asyncio.run(backfill(targets))