#!/usr/bin/env bash set -euo pipefail INSTANCE_ID="i-07547cd20bee2dea5" REGION="us-east-1" BATCHES=2 LIMIT=50 # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --batches) BATCHES="$2" shift 2 ;; --limit) LIMIT="$2" shift 2 ;; *) echo "Unknown argument: $1" >&2 echo "Usage: $0 [--batches N] [--limit N]" >&2 exit 1 ;; esac done log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" } log "Starting backfill loop: batches=$BATCHES, limit=$LIMIT" successful_batches=0 failed_batches=0 for ((batch=1; batch<=BATCHES; batch++)); do log "=== Batch $batch of $BATCHES ===" log "Sending SSM command..." COMMAND_ID=$(aws ssm send-command \ --region "$REGION" \ --instance-ids "$INSTANCE_ID" \ --document-name AWS-RunShellScript \ --parameters "{\"commands\":[\"sudo docker exec musehub-blue python3 -u /app/deploy/backfill_object_store.py --limit $LIMIT 2>&1\"]}" \ --query 'Command.CommandId' \ --output text \ --no-cli-pager) log "CommandId: $COMMAND_ID" # Poll loop while true; do log "Polling status for command $COMMAND_ID..." RESULT=$(aws ssm get-command-invocation \ --region "$REGION" \ --command-id "$COMMAND_ID" \ --instance-id "$INSTANCE_ID" \ --no-cli-pager \ --query '{Status: Status, Output: StandardOutputContent, Error: StandardErrorContent}' \ --output json 2>&1) || { log "WARNING: get-command-invocation failed (command may not be registered yet), retrying in 30s..." sleep 30 continue } STATUS=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin)['Status'])") log "Status: $STATUS" if [[ "$STATUS" == "InProgress" || "$STATUS" == "Pending" ]]; then log "Command still running, waiting 30s..." sleep 30 continue elif [[ "$STATUS" == "Success" ]]; then OUTPUT=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin)['Output'])") REMAINING=$(echo "$OUTPUT" | grep -E "remaining:" || echo "(no remaining line found)") log "Batch $batch SUCCESS" log "Remaining: $REMAINING" successful_batches=$((successful_batches + 1)) break elif [[ "$STATUS" == "Failed" || "$STATUS" == "Cancelled" || "$STATUS" == "TimedOut" ]]; then ERROR=$(echo "$RESULT" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('Error') or d.get('Output') or '(no output)')") log "Batch $batch FAILED with status=$STATUS" log "Error output: $ERROR" failed_batches=$((failed_batches + 1)) break else log "Unexpected status '$STATUS', waiting 30s..." sleep 30 fi done # Brief pause between batches to avoid hammering the instance if [[ $batch -lt $BATCHES ]]; then log "Pausing 5s before next batch..." sleep 5 fi done log "=== Backfill complete ===" log "Successful batches: $successful_batches" log "Failed batches: $failed_batches" log "Total batches: $BATCHES" if [[ $failed_batches -gt 0 ]]; then exit 1 fi