gabriel / musehub public
backfill_loop.sh bash
107 lines 3.0 KB
Raw
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 1 day ago
1 #!/usr/bin/env bash
2 set -euo pipefail
3
4 INSTANCE_ID="i-07547cd20bee2dea5"
5 REGION="us-east-1"
6 BATCHES=2
7 LIMIT=50
8
9 # Parse arguments
10 while [[ $# -gt 0 ]]; do
11 case "$1" in
12 --batches)
13 BATCHES="$2"
14 shift 2
15 ;;
16 --limit)
17 LIMIT="$2"
18 shift 2
19 ;;
20 *)
21 echo "Unknown argument: $1" >&2
22 echo "Usage: $0 [--batches N] [--limit N]" >&2
23 exit 1
24 ;;
25 esac
26 done
27
28 log() {
29 echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
30 }
31
32 log "Starting backfill loop: batches=$BATCHES, limit=$LIMIT"
33
34 successful_batches=0
35 failed_batches=0
36
37 for ((batch=1; batch<=BATCHES; batch++)); do
38 log "=== Batch $batch of $BATCHES ==="
39
40 log "Sending SSM command..."
41 COMMAND_ID=$(aws ssm send-command \
42 --region "$REGION" \
43 --instance-ids "$INSTANCE_ID" \
44 --document-name AWS-RunShellScript \
45 --parameters "{\"commands\":[\"sudo docker exec musehub-blue python3 -u /app/deploy/backfill_object_store.py --limit $LIMIT 2>&1\"]}" \
46 --query 'Command.CommandId' \
47 --output text \
48 --no-cli-pager)
49
50 log "CommandId: $COMMAND_ID"
51
52 # Poll loop
53 while true; do
54 log "Polling status for command $COMMAND_ID..."
55 RESULT=$(aws ssm get-command-invocation \
56 --region "$REGION" \
57 --command-id "$COMMAND_ID" \
58 --instance-id "$INSTANCE_ID" \
59 --no-cli-pager \
60 --query '{Status: Status, Output: StandardOutputContent, Error: StandardErrorContent}' \
61 --output json 2>&1) || {
62 log "WARNING: get-command-invocation failed (command may not be registered yet), retrying in 30s..."
63 sleep 30
64 continue
65 }
66
67 STATUS=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin)['Status'])")
68 log "Status: $STATUS"
69
70 if [[ "$STATUS" == "InProgress" || "$STATUS" == "Pending" ]]; then
71 log "Command still running, waiting 30s..."
72 sleep 30
73 continue
74 elif [[ "$STATUS" == "Success" ]]; then
75 OUTPUT=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin)['Output'])")
76 REMAINING=$(echo "$OUTPUT" | grep -E "remaining:" || echo "(no remaining line found)")
77 log "Batch $batch SUCCESS"
78 log "Remaining: $REMAINING"
79 successful_batches=$((successful_batches + 1))
80 break
81 elif [[ "$STATUS" == "Failed" || "$STATUS" == "Cancelled" || "$STATUS" == "TimedOut" ]]; then
82 ERROR=$(echo "$RESULT" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('Error') or d.get('Output') or '(no output)')")
83 log "Batch $batch FAILED with status=$STATUS"
84 log "Error output: $ERROR"
85 failed_batches=$((failed_batches + 1))
86 break
87 else
88 log "Unexpected status '$STATUS', waiting 30s..."
89 sleep 30
90 fi
91 done
92
93 # Brief pause between batches to avoid hammering the instance
94 if [[ $batch -lt $BATCHES ]]; then
95 log "Pausing 5s before next batch..."
96 sleep 5
97 fi
98 done
99
100 log "=== Backfill complete ==="
101 log "Successful batches: $successful_batches"
102 log "Failed batches: $failed_batches"
103 log "Total batches: $BATCHES"
104
105 if [[ $failed_batches -gt 0 ]]; then
106 exit 1
107 fi
File History 1 commit
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 1 day ago