#!/usr/bin/env bash # MuseHub deploy orchestrator — build, push to ECR, trigger blue-green via SSM. # # Usage: # bash deploy/push.sh staging # deploy to staging only # bash deploy/push.sh prod # deploy to prod only # bash deploy/push.sh staging prod # staging first, then prod # # What it does: # 1. Builds a linux/amd64 Docker image from the local repo. # 2. Tags it with - for traceability. # 3. Pushes the image to ECR (musehub/musehub). # 4. Sends an SSM command to each target instance to run deploy.sh, # which pulls the image and performs a zero-downtime blue-green swap. # 5. Polls SSM until the deploy completes or fails, streaming the output. # # Prerequisites (one-time, already done): # - AWS CLI configured (musehub-infra as default profile) # - Docker Desktop running # - musehub-infra has ecr push permissions (musehub-ecr-push policy) # - musehub-ec2-ssm role has ecr pull permissions (musehub-ecr-pull policy) # - AWS CLI installed on instances (run deploy/bootstrap-instance.sh once) # # Rollback to a previous image: # IMAGE_TAG= bash deploy/push.sh staging # (skips build+push, triggers SSM with the specified tag directly) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_DIR="$(dirname "$SCRIPT_DIR")" ECOSYSTEM_DIR="$(dirname "$REPO_DIR")" ECR_REGISTRY="992382692655.dkr.ecr.us-east-1.amazonaws.com" ECR_REPO="musehub/musehub" ECR_IMAGE="${ECR_REGISTRY}/${ECR_REPO}" REGION="us-east-1" STAGING_INSTANCE="i-07547cd20bee2dea5" PROD_INSTANCE="i-0855d6efe7fa1a49d" # ── Parse targets ───────────────────────────────────────────────────────────── if [ $# -eq 0 ]; then echo "Usage: bash deploy/push.sh [staging] [prod]" echo " staging deploy to staging.musehub.ai" echo " prod deploy to musehub.ai" echo " staging prod staging first, then prod" echo "" echo "Rollback (skips build+push, redeploys a previous tag):" echo " IMAGE_TAG= bash deploy/push.sh staging" exit 1 fi TARGETS=() for arg in "$@"; do case "$arg" in staging|prod) TARGETS+=("$arg") ;; *) echo "Unknown target: $arg (must be staging or prod)" >&2; exit 1 ;; esac done # ── Helpers ─────────────────────────────────────────────────────────────────── log() { echo "[push] $*"; } die() { echo "[push] ERROR: $*" >&2; exit 1; } # ── Image tag ───────────────────────────────────────────────────────────────── # If IMAGE_TAG is already set (rollback mode), skip build+push. if [ -n "${IMAGE_TAG:-}" ]; then log "Rollback mode — using existing tag: $IMAGE_TAG" SKIP_BUILD=true else COMMIT_HASH=$(muse -C "$REPO_DIR" rev-parse HEAD --json 2>/dev/null \ | python3 -c "import sys,json; cid=json.load(sys.stdin)['commit_id']; print(cid.removeprefix('sha256:')[:8])" \ 2>/dev/null || echo "local") IMAGE_TAG="${COMMIT_HASH}-$(date +%Y%m%d%H%M%S)" SKIP_BUILD=false fi FULL_IMAGE="${ECR_IMAGE}:${IMAGE_TAG}" log "Target image: $FULL_IMAGE" # ── Build ───────────────────────────────────────────────────────────────────── if [ "$SKIP_BUILD" = false ]; then log "[1/3] Building image for linux/amd64..." docker build \ --platform linux/amd64 \ --tag "$FULL_IMAGE" \ --tag "${ECR_IMAGE}:latest" \ -f "$REPO_DIR/Dockerfile" \ "$ECOSYSTEM_DIR" log "Build complete." # ── Push to ECR via crane ───────────────────────────────────────────────── # crane bypasses Docker Desktop's VPNKit proxy, which drops connections # mid-upload on large layer pushes. Never use `docker push` to ECR. log "[2/3] Saving image and pushing to ECR via crane..." CRANE_TAR="/tmp/musehub-${IMAGE_TAG}.tar" docker save "$FULL_IMAGE" -o "$CRANE_TAR" aws ecr get-login-password --region "$REGION" | \ crane auth login "$ECR_REGISTRY" --username AWS --password-stdin crane push "$CRANE_TAR" "$FULL_IMAGE" crane push "$CRANE_TAR" "${ECR_IMAGE}:latest" rm -f "$CRANE_TAR" log "Push complete. Tag: $IMAGE_TAG" else log "[1/3] Skipping build (rollback mode)." log "[2/3] Skipping push (rollback mode)." fi # ── Trigger deploy via SSM ──────────────────────────────────────────────────── log "[3/3] Triggering deploy on: ${TARGETS[*]}" deploy_to() { local env="$1" local instance_id="$2" log "" log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" log "→ Deploying to $env ($instance_id)" log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" local deploy_sh_b64 deploy_sh_b64=$(base64 -i "$SCRIPT_DIR/deploy.sh" | tr -d '\n') local nginx_conf_b64 nginx_conf_b64=$(base64 -i "$SCRIPT_DIR/nginx-cf.conf" | tr -d '\n') local set_slot_b64 set_slot_b64=$(base64 -i "$SCRIPT_DIR/set-active-slot.sh" | tr -d '\n') # ── Fire the deploy command ─────────────────────────────────────────────── local deploy_cmd_id deploy_cmd_id=$(aws ssm send-command \ --region "$REGION" \ --instance-ids "$instance_id" \ --document-name "AWS-RunShellScript" \ --parameters "commands=[ \"echo '${deploy_sh_b64}' | base64 -d > /opt/musehub/deploy/deploy.sh && chmod +x /opt/musehub/deploy/deploy.sh\", \"echo '${nginx_conf_b64}' | base64 -d > /opt/musehub/deploy/nginx-cf.conf\", \"echo '${set_slot_b64}' | base64 -d > /usr/local/bin/musehub-set-slot && chmod +x /usr/local/bin/musehub-set-slot\", \"export ECR_IMAGE=${ECR_IMAGE}\", \"export IMAGE_TAG=${IMAGE_TAG}\", \"bash /opt/musehub/deploy/deploy.sh\" ]" \ --comment "musehub ${IMAGE_TAG} → ${env}" \ --timeout-seconds 600 \ --query "Command.CommandId" \ --output text) log " SSM command: $deploy_cmd_id" # ── Stream deploy output live ───────────────────────────────────────────── # SSM updates StandardOutputContent as the command runs. We poll every 5s, # diff against what we've already printed, and show new lines immediately. log " Live output:" log "" local lines_seen=0 local elapsed=0 local final_status="" while true; do sleep 5 elapsed=$((elapsed + 5)) local invocation invocation=$(aws ssm get-command-invocation \ --region "$REGION" \ --command-id "$deploy_cmd_id" \ --instance-id "$instance_id" \ --output json 2>/dev/null || echo '{"Status":"Pending","StandardOutputContent":"","StandardErrorContent":""}') local cmd_status all_stdout all_stderr cmd_status=$(echo "$invocation" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('Status','Pending'))" 2>/dev/null || echo "Pending") all_stdout=$(echo "$invocation" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('StandardOutputContent',''),end='')" 2>/dev/null || echo "") all_stderr=$(echo "$invocation" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('StandardErrorContent',''),end='')" 2>/dev/null || echo "") # Print any new stdout lines local total_lines total_lines=$(echo "$all_stdout" | wc -l) if [ "$total_lines" -gt "$lines_seen" ]; then echo "$all_stdout" | tail -n +"$((lines_seen + 1))" lines_seen=$total_lines fi case "$cmd_status" in Success) final_status="success" break ;; Failed|Cancelled|TimedOut|Cancelling) final_status="failed" [ -n "$all_stderr" ] && echo "STDERR: $all_stderr" break ;; esac if [ "$elapsed" -ge 600 ]; then die "Deploy timed out after 10 min. SSM command: $deploy_cmd_id" fi done echo "" if [ "$final_status" = "success" ]; then log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" log "✅ $env deploy succeeded." log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" return 0 else log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" log "❌ $env deploy FAILED. Full output:" log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" aws ssm get-command-invocation \ --region "$REGION" \ --command-id "$deploy_cmd_id" \ --instance-id "$instance_id" \ --query "[StandardOutputContent,StandardErrorContent]" \ --output text 2>/dev/null || true return 1 fi } for target in "${TARGETS[@]}"; do case "$target" in staging) deploy_to "staging" "$STAGING_INSTANCE" ;; prod) deploy_to "prod" "$PROD_INSTANCE" ;; esac done log "" log "All done." log " Tag: $IMAGE_TAG" log " Image: $FULL_IMAGE"