gabriel / musehub public
push.sh bash
239 lines 10.2 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 1 day ago
1 #!/usr/bin/env bash
2 # MuseHub deploy orchestrator — build, push to ECR, trigger blue-green via SSM.
3 #
4 # Usage:
5 # bash deploy/push.sh staging # deploy to staging only
6 # bash deploy/push.sh prod # deploy to prod only
7 # bash deploy/push.sh staging prod # staging first, then prod
8 #
9 # What it does:
10 # 1. Builds a linux/amd64 Docker image from the local repo.
11 # 2. Tags it with <commit-hash>-<timestamp> for traceability.
12 # 3. Pushes the image to ECR (musehub/musehub).
13 # 4. Sends an SSM command to each target instance to run deploy.sh,
14 # which pulls the image and performs a zero-downtime blue-green swap.
15 # 5. Polls SSM until the deploy completes or fails, streaming the output.
16 #
17 # Prerequisites (one-time, already done):
18 # - AWS CLI configured (musehub-infra as default profile)
19 # - Docker Desktop running
20 # - musehub-infra has ecr push permissions (musehub-ecr-push policy)
21 # - musehub-ec2-ssm role has ecr pull permissions (musehub-ecr-pull policy)
22 # - AWS CLI installed on instances (run deploy/bootstrap-instance.sh once)
23 #
24 # Rollback to a previous image:
25 # IMAGE_TAG=<previous-tag> bash deploy/push.sh staging
26 # (skips build+push, triggers SSM with the specified tag directly)
27
28 set -euo pipefail
29
30 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
31 REPO_DIR="$(dirname "$SCRIPT_DIR")"
32 ECOSYSTEM_DIR="$(dirname "$REPO_DIR")"
33
34 ECR_REGISTRY="992382692655.dkr.ecr.us-east-1.amazonaws.com"
35 ECR_REPO="musehub/musehub"
36 ECR_IMAGE="${ECR_REGISTRY}/${ECR_REPO}"
37 REGION="us-east-1"
38
39 STAGING_INSTANCE="i-07547cd20bee2dea5"
40 PROD_INSTANCE="i-0855d6efe7fa1a49d"
41
42 # ── Parse targets ─────────────────────────────────────────────────────────────
43
44 if [ $# -eq 0 ]; then
45 echo "Usage: bash deploy/push.sh [staging] [prod]"
46 echo " staging deploy to staging.musehub.ai"
47 echo " prod deploy to musehub.ai"
48 echo " staging prod staging first, then prod"
49 echo ""
50 echo "Rollback (skips build+push, redeploys a previous tag):"
51 echo " IMAGE_TAG=<tag> bash deploy/push.sh staging"
52 exit 1
53 fi
54
55 TARGETS=()
56 for arg in "$@"; do
57 case "$arg" in
58 staging|prod) TARGETS+=("$arg") ;;
59 *) echo "Unknown target: $arg (must be staging or prod)" >&2; exit 1 ;;
60 esac
61 done
62
63 # ── Helpers ───────────────────────────────────────────────────────────────────
64
65 log() { echo "[push] $*"; }
66 die() { echo "[push] ERROR: $*" >&2; exit 1; }
67
68 # ── Image tag ─────────────────────────────────────────────────────────────────
69
70 # If IMAGE_TAG is already set (rollback mode), skip build+push.
71 if [ -n "${IMAGE_TAG:-}" ]; then
72 log "Rollback mode — using existing tag: $IMAGE_TAG"
73 SKIP_BUILD=true
74 else
75 COMMIT_HASH=$(muse -C "$REPO_DIR" rev-parse HEAD --json 2>/dev/null \
76 | python3 -c "import sys,json; cid=json.load(sys.stdin)['commit_id']; print(cid.removeprefix('sha256:')[:8])" \
77 2>/dev/null || echo "local")
78 IMAGE_TAG="${COMMIT_HASH}-$(date +%Y%m%d%H%M%S)"
79 SKIP_BUILD=false
80 fi
81
82 FULL_IMAGE="${ECR_IMAGE}:${IMAGE_TAG}"
83 log "Target image: $FULL_IMAGE"
84
85 # ── Build ─────────────────────────────────────────────────────────────────────
86
87 if [ "$SKIP_BUILD" = false ]; then
88 log "[1/3] Building image for linux/amd64..."
89 docker build \
90 --platform linux/amd64 \
91 --tag "$FULL_IMAGE" \
92 --tag "${ECR_IMAGE}:latest" \
93 -f "$REPO_DIR/Dockerfile" \
94 "$ECOSYSTEM_DIR"
95 log "Build complete."
96
97 # ── Push to ECR via crane ─────────────────────────────────────────────────
98 # crane bypasses Docker Desktop's VPNKit proxy, which drops connections
99 # mid-upload on large layer pushes. Never use `docker push` to ECR.
100
101 log "[2/3] Saving image and pushing to ECR via crane..."
102 CRANE_TAR="/tmp/musehub-${IMAGE_TAG}.tar"
103 docker save "$FULL_IMAGE" -o "$CRANE_TAR"
104 aws ecr get-login-password --region "$REGION" | \
105 crane auth login "$ECR_REGISTRY" --username AWS --password-stdin
106 crane push "$CRANE_TAR" "$FULL_IMAGE"
107 crane push "$CRANE_TAR" "${ECR_IMAGE}:latest"
108 rm -f "$CRANE_TAR"
109 log "Push complete. Tag: $IMAGE_TAG"
110 else
111 log "[1/3] Skipping build (rollback mode)."
112 log "[2/3] Skipping push (rollback mode)."
113 fi
114
115 # ── Trigger deploy via SSM ────────────────────────────────────────────────────
116
117 log "[3/3] Triggering deploy on: ${TARGETS[*]}"
118
119 deploy_to() {
120 local env="$1"
121 local instance_id="$2"
122
123 log ""
124 log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
125 log "→ Deploying to $env ($instance_id)"
126 log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
127
128 local deploy_sh_b64
129 deploy_sh_b64=$(base64 -i "$SCRIPT_DIR/deploy.sh" | tr -d '\n')
130
131 local nginx_conf_b64
132 nginx_conf_b64=$(base64 -i "$SCRIPT_DIR/nginx-cf.conf" | tr -d '\n')
133
134 local set_slot_b64
135 set_slot_b64=$(base64 -i "$SCRIPT_DIR/set-active-slot.sh" | tr -d '\n')
136
137 # ── Fire the deploy command ───────────────────────────────────────────────
138 local deploy_cmd_id
139 deploy_cmd_id=$(aws ssm send-command \
140 --region "$REGION" \
141 --instance-ids "$instance_id" \
142 --document-name "AWS-RunShellScript" \
143 --parameters "commands=[
144 \"echo '${deploy_sh_b64}' | base64 -d > /opt/musehub/deploy/deploy.sh && chmod +x /opt/musehub/deploy/deploy.sh\",
145 \"echo '${nginx_conf_b64}' | base64 -d > /opt/musehub/deploy/nginx-cf.conf\",
146 \"echo '${set_slot_b64}' | base64 -d > /usr/local/bin/musehub-set-slot && chmod +x /usr/local/bin/musehub-set-slot\",
147 \"export ECR_IMAGE=${ECR_IMAGE}\",
148 \"export IMAGE_TAG=${IMAGE_TAG}\",
149 \"bash /opt/musehub/deploy/deploy.sh\"
150 ]" \
151 --comment "musehub ${IMAGE_TAG} → ${env}" \
152 --timeout-seconds 600 \
153 --query "Command.CommandId" \
154 --output text)
155
156 log " SSM command: $deploy_cmd_id"
157
158 # ── Stream deploy output live ─────────────────────────────────────────────
159 # SSM updates StandardOutputContent as the command runs. We poll every 5s,
160 # diff against what we've already printed, and show new lines immediately.
161 log " Live output:"
162 log ""
163
164 local lines_seen=0
165 local elapsed=0
166 local final_status=""
167
168 while true; do
169 sleep 5
170 elapsed=$((elapsed + 5))
171
172 local invocation
173 invocation=$(aws ssm get-command-invocation \
174 --region "$REGION" \
175 --command-id "$deploy_cmd_id" \
176 --instance-id "$instance_id" \
177 --output json 2>/dev/null || echo '{"Status":"Pending","StandardOutputContent":"","StandardErrorContent":""}')
178
179 local cmd_status all_stdout all_stderr
180 cmd_status=$(echo "$invocation" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('Status','Pending'))" 2>/dev/null || echo "Pending")
181 all_stdout=$(echo "$invocation" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('StandardOutputContent',''),end='')" 2>/dev/null || echo "")
182 all_stderr=$(echo "$invocation" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('StandardErrorContent',''),end='')" 2>/dev/null || echo "")
183
184 # Print any new stdout lines
185 local total_lines
186 total_lines=$(echo "$all_stdout" | wc -l)
187 if [ "$total_lines" -gt "$lines_seen" ]; then
188 echo "$all_stdout" | tail -n +"$((lines_seen + 1))"
189 lines_seen=$total_lines
190 fi
191
192 case "$cmd_status" in
193 Success)
194 final_status="success"
195 break
196 ;;
197 Failed|Cancelled|TimedOut|Cancelling)
198 final_status="failed"
199 [ -n "$all_stderr" ] && echo "STDERR: $all_stderr"
200 break
201 ;;
202 esac
203
204 if [ "$elapsed" -ge 600 ]; then
205 die "Deploy timed out after 10 min. SSM command: $deploy_cmd_id"
206 fi
207 done
208
209 echo ""
210 if [ "$final_status" = "success" ]; then
211 log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
212 log "✅ $env deploy succeeded."
213 log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
214 return 0
215 else
216 log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
217 log "❌ $env deploy FAILED. Full output:"
218 log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
219 aws ssm get-command-invocation \
220 --region "$REGION" \
221 --command-id "$deploy_cmd_id" \
222 --instance-id "$instance_id" \
223 --query "[StandardOutputContent,StandardErrorContent]" \
224 --output text 2>/dev/null || true
225 return 1
226 fi
227 }
228
229 for target in "${TARGETS[@]}"; do
230 case "$target" in
231 staging) deploy_to "staging" "$STAGING_INSTANCE" ;;
232 prod) deploy_to "prod" "$PROD_INSTANCE" ;;
233 esac
234 done
235
236 log ""
237 log "All done."
238 log " Tag: $IMAGE_TAG"
239 log " Image: $FULL_IMAGE"
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 1 day ago