deploy.sh
bash
sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32
fix: fall back to DB ancestry check when mpack-only fast-fo…
Sonnet 4.6
patch
6 days ago
| 1 | #!/usr/bin/env bash |
| 2 | # Zero-downtime blue-green deploy for MuseHub. |
| 3 | # |
| 4 | # Strategy: |
| 5 | # Two slots — blue (port 1337) and green (port 1338). |
| 6 | # The active slot serves traffic via nginx. The inactive slot is stopped. |
| 7 | # Deploy: |
| 8 | # 1. Pull the new image from ECR (old slot keeps serving). |
| 9 | # 2. Run migrations against the live DB (before swap — forward-compatible). |
| 10 | # 3. Start the inactive slot with the new image. |
| 11 | # 4. Health-check the new slot. |
| 12 | # 5. Flip nginx to the new slot (nginx -s reload — instant, zero downtime). |
| 13 | # 6. Stop the old slot. |
| 14 | # |
| 15 | # Called by deploy/push.sh via SSM — do not run directly in production. |
| 16 | # For manual use on the instance: |
| 17 | # ECR_IMAGE=992382692655.dkr.ecr.us-east-1.amazonaws.com/musehub/musehub \ |
| 18 | # IMAGE_TAG=<tag> bash deploy/deploy.sh |
| 19 | # |
| 20 | # First-time setup: |
| 21 | # bash deploy/deploy.sh --init |
| 22 | # (Initialises .active-slot and /etc/nginx/musehub-active-port if missing) |
| 23 | |
| 24 | set -euo pipefail |
| 25 | |
| 26 | APP_DIR="/opt/musehub" |
| 27 | DEPLOY_LOG="/tmp/musehub-deploy.log" |
| 28 | |
| 29 | # Tee all output to a log file so push.sh can stream it live via a second SSM call. |
| 30 | exec > >(tee -a "$DEPLOY_LOG") 2>&1 |
| 31 | echo "" >> "$DEPLOY_LOG" |
| 32 | echo "=== deploy started at $(date -u '+%Y-%m-%dT%H:%M:%SZ') ===" >> "$DEPLOY_LOG" |
| 33 | SLOT_FILE="$APP_DIR/.active-slot" |
| 34 | NGINX_PORT_FILE="/etc/nginx/musehub-active-port" |
| 35 | ECR_REGISTRY="992382692655.dkr.ecr.us-east-1.amazonaws.com" |
| 36 | ECR_IMAGE="${ECR_IMAGE:-${ECR_REGISTRY}/musehub/musehub}" |
| 37 | IMAGE_TAG="${IMAGE_TAG:-latest}" |
| 38 | FULL_IMAGE="${ECR_IMAGE}:${IMAGE_TAG}" |
| 39 | REGION="us-east-1" |
| 40 | HEALTH_URL_BLUE="http://127.0.0.1:1337/healthz" |
| 41 | HEALTH_URL_GREEN="http://127.0.0.1:1338/healthz" |
| 42 | HEALTH_RETRIES=30 # × 2s = 60s max wait |
| 43 | |
| 44 | cd "$APP_DIR" |
| 45 | |
| 46 | # ── Helpers ─────────────────────────────────────────────────────────────────── |
| 47 | |
| 48 | log() { echo "[deploy] $*"; } |
| 49 | die() { echo "[deploy] ERROR: $*" >&2; exit 1; } |
| 50 | |
| 51 | health_check() { |
| 52 | local url="$1" |
| 53 | local slot="$2" |
| 54 | log "Health-checking $slot at $url ..." |
| 55 | for i in $(seq 1 "$HEALTH_RETRIES"); do |
| 56 | if curl -sf --max-time 3 "$url" > /dev/null 2>&1; then |
| 57 | log "$slot is healthy (attempt $i)" |
| 58 | return 0 |
| 59 | fi |
| 60 | sleep 2 |
| 61 | done |
| 62 | die "$slot failed health check after $((HEALTH_RETRIES * 2))s" |
| 63 | } |
| 64 | |
| 65 | nginx_point_to() { |
| 66 | local slot="$1" |
| 67 | sudo musehub-set-slot "$slot" |
| 68 | log "nginx now pointing to $slot" |
| 69 | } |
| 70 | |
| 71 | # Repair the active-port file if it contains a bare port number instead of |
| 72 | # a full nginx upstream directive. Called once at startup so a botched |
| 73 | # manual intervention cannot be the root cause of a new deploy failing. |
| 74 | sanitize_nginx_port_file() { |
| 75 | [ -f "$NGINX_PORT_FILE" ] || return 0 |
| 76 | local content |
| 77 | content=$(cat "$NGINX_PORT_FILE") |
| 78 | # Already correct — nothing to do |
| 79 | if echo "$content" | grep -qE '^server 127\.0\.0\.1:[0-9]+;$'; then |
| 80 | return 0 |
| 81 | fi |
| 82 | # Derive correct slot from .active-slot file, or fall back to blue |
| 83 | local slot |
| 84 | slot=$(cat "$SLOT_FILE" 2>/dev/null || echo "blue") |
| 85 | if [ "$slot" != "blue" ] && [ "$slot" != "green" ]; then |
| 86 | slot="blue" |
| 87 | fi |
| 88 | log "WARNING: $NGINX_PORT_FILE has unexpected content — correcting via musehub-set-slot $slot" |
| 89 | sudo musehub-set-slot "$slot" |
| 90 | log "Sanitized active-port file; nginx reloaded." |
| 91 | } |
| 92 | |
| 93 | # ── Init mode ───────────────────────────────────────────────────────────────── |
| 94 | |
| 95 | if [ "${1:-}" = "--init" ]; then |
| 96 | log "Init: installing musehub-set-slot and pointing nginx to blue" |
| 97 | sudo cp "$APP_DIR/deploy/set-active-slot.sh" /usr/local/bin/musehub-set-slot |
| 98 | sudo chmod +x /usr/local/bin/musehub-set-slot |
| 99 | sudo musehub-set-slot blue |
| 100 | log "Done. Run 'bash deploy/deploy.sh' (with ECR_IMAGE and IMAGE_TAG set) to deploy." |
| 101 | exit 0 |
| 102 | fi |
| 103 | |
| 104 | # ── Validate required env vars ──────────────────────────────────────────────── |
| 105 | |
| 106 | [ -n "${ECR_IMAGE:-}" ] || die "ECR_IMAGE is not set." |
| 107 | [ -n "${IMAGE_TAG:-}" ] || die "IMAGE_TAG is not set." |
| 108 | |
| 109 | # ── Read active slot ────────────────────────────────────────────────────────── |
| 110 | |
| 111 | if [ ! -f "$SLOT_FILE" ]; then |
| 112 | die ".active-slot not found. Run: bash deploy/deploy.sh --init" |
| 113 | fi |
| 114 | |
| 115 | ACTIVE_SLOT=$(cat "$SLOT_FILE") |
| 116 | if [ "$ACTIVE_SLOT" = "blue" ]; then |
| 117 | NEW_SLOT="green" |
| 118 | NEW_PORT=1338 |
| 119 | OLD_CONTAINER="musehub-blue" |
| 120 | NEW_CONTAINER="musehub-green" |
| 121 | HEALTH_URL="$HEALTH_URL_GREEN" |
| 122 | else |
| 123 | NEW_SLOT="blue" |
| 124 | NEW_PORT=1337 |
| 125 | OLD_CONTAINER="musehub-green" |
| 126 | NEW_CONTAINER="musehub-blue" |
| 127 | HEALTH_URL="$HEALTH_URL_BLUE" |
| 128 | fi |
| 129 | |
| 130 | log "Image: $FULL_IMAGE" |
| 131 | log "Active slot: $ACTIVE_SLOT → deploying to: $NEW_SLOT (port $NEW_PORT)" |
| 132 | |
| 133 | # Guard: ensure the nginx upstream file is well-formed before we touch anything. |
| 134 | sanitize_nginx_port_file |
| 135 | |
| 136 | # ── Step 0: Apply nginx config if updated ──────────────────────────────────── |
| 137 | # Determine the domain from the current installed config, re-substitute, and |
| 138 | # reload nginx if the content changed. Safe to run on every deploy. |
| 139 | |
| 140 | NGINX_CONF_SRC="$APP_DIR/deploy/nginx-cf.conf" |
| 141 | NGINX_CONF_DEST="/etc/nginx/sites-available/musehub-staging" |
| 142 | NGINX_CONF_DEST_PROD="/etc/nginx/sites-available/musehub" |
| 143 | |
| 144 | if [ -f "$NGINX_CONF_SRC" ]; then |
| 145 | # Detect which installed config exists (staging vs prod) |
| 146 | if [ -f "$NGINX_CONF_DEST" ]; then |
| 147 | NGINX_CONF_INSTALLED="$NGINX_CONF_DEST" |
| 148 | elif [ -f "$NGINX_CONF_DEST_PROD" ]; then |
| 149 | NGINX_CONF_INSTALLED="$NGINX_CONF_DEST_PROD" |
| 150 | else |
| 151 | NGINX_CONF_INSTALLED="" |
| 152 | fi |
| 153 | |
| 154 | if [ -n "$NGINX_CONF_INSTALLED" ]; then |
| 155 | # Extract domain from the installed config (first server_name line) |
| 156 | DOMAIN=$(grep -m1 'server_name' "$NGINX_CONF_INSTALLED" | awk '{print $2}' | tr -d ';') |
| 157 | if [ -n "$DOMAIN" ]; then |
| 158 | NEW_CONF=$(sed "s/DOMAIN_PLACEHOLDER/$DOMAIN/g" "$NGINX_CONF_SRC") |
| 159 | CURRENT_CONF=$(cat "$NGINX_CONF_INSTALLED") |
| 160 | if [ "$NEW_CONF" != "$CURRENT_CONF" ]; then |
| 161 | log "[0/6] nginx config changed — applying update for $DOMAIN..." |
| 162 | echo "$NEW_CONF" | sudo tee "$NGINX_CONF_INSTALLED" > /dev/null |
| 163 | if sudo nginx -t 2>&1; then |
| 164 | sudo nginx -s reload |
| 165 | log "nginx config updated and reloaded." |
| 166 | else |
| 167 | log "WARNING: new nginx config failed validation — reverting." |
| 168 | echo "$CURRENT_CONF" | sudo tee "$NGINX_CONF_INSTALLED" > /dev/null |
| 169 | fi |
| 170 | else |
| 171 | log "[0/6] nginx config unchanged — skipping reload." |
| 172 | fi |
| 173 | fi |
| 174 | fi |
| 175 | fi |
| 176 | |
| 177 | # ── Step 1: Login to ECR and pull new image ─────────────────────────────────── |
| 178 | |
| 179 | log "[1/6] Pulling image from ECR..." |
| 180 | aws ecr get-login-password --region "$REGION" | \ |
| 181 | sudo docker login --username AWS --password-stdin "$ECR_REGISTRY" |
| 182 | sudo docker pull "$FULL_IMAGE" |
| 183 | log "Pull complete." |
| 184 | |
| 185 | # ── Step 2: Run migrations against the live DB ──────────────────────────────── |
| 186 | |
| 187 | log "[2/6] Running migrations..." |
| 188 | |
| 189 | _alembic() { |
| 190 | sudo docker run --rm \ |
| 191 | --network musehub_musehub-internal \ |
| 192 | --env-file "$APP_DIR/.env" \ |
| 193 | -e SKIP_MIGRATIONS=0 \ |
| 194 | "$FULL_IMAGE" "$@" |
| 195 | } |
| 196 | |
| 197 | # If upgrade head fails (e.g. stale revision ID from a migration history reset), |
| 198 | # stamp to the current head to re-anchor Alembic's tracking, then retry. |
| 199 | # The retry is a no-op when the schema already matches head. |
| 200 | if ! _alembic alembic upgrade head; then |
| 201 | log "upgrade head failed — re-anchoring Alembic revision to head and retrying..." |
| 202 | _alembic alembic stamp --purge head |
| 203 | _alembic alembic upgrade head |
| 204 | fi |
| 205 | log "Migrations complete." |
| 206 | |
| 207 | # Schema parity gate — hard fail. Uses the same benign-diff filter as the S2 |
| 208 | # test (alembic_version table, semantically-equivalent server_default variants, |
| 209 | # column comments) so spurious false positives never block a deploy. |
| 210 | _alembic python -m musehub.db.schema_gate \ |
| 211 | || die "Schema gate failed — ORM drift detected. Write a migration (alembic revision --autogenerate) before deploying." |
| 212 | |
| 213 | # ── Step 3: Start the new slot ──────────────────────────────────────────────── |
| 214 | |
| 215 | log "[3/6] Starting $NEW_SLOT on port $NEW_PORT..." |
| 216 | |
| 217 | # Remove if a failed previous deploy left it around |
| 218 | sudo docker rm -f "$NEW_CONTAINER" 2>/dev/null || true |
| 219 | |
| 220 | sudo docker run -d \ |
| 221 | --name "$NEW_CONTAINER" \ |
| 222 | --network musehub_musehub-internal \ |
| 223 | --env-file "$APP_DIR/.env" \ |
| 224 | -e SKIP_MIGRATIONS=1 \ |
| 225 | -v musehub_data:/data \ |
| 226 | -p "127.0.0.1:${NEW_PORT}:1337" \ |
| 227 | --restart unless-stopped \ |
| 228 | --log-driver awslogs \ |
| 229 | --log-opt awslogs-region=us-east-1 \ |
| 230 | --log-opt awslogs-group=/musehub/staging \ |
| 231 | --log-opt awslogs-stream="$NEW_CONTAINER" \ |
| 232 | --log-opt awslogs-create-group=true \ |
| 233 | "$FULL_IMAGE" |
| 234 | |
| 235 | # ── Step 4: Health-check the new slot ──────────────────────────────────────── |
| 236 | |
| 237 | health_check "$HEALTH_URL" "$NEW_SLOT" |
| 238 | |
| 239 | # ── Step 5: Flip nginx to the new slot (instant, zero downtime) ─────────────── |
| 240 | |
| 241 | log "[5/6] Switching nginx to $NEW_SLOT (port $NEW_PORT)..." |
| 242 | nginx_point_to "$NEW_SLOT" |
| 243 | |
| 244 | # ── Step 6: Stop the old slot ──────────────────────────────────────────────── |
| 245 | |
| 246 | log "[6/6] Stopping old slot ($ACTIVE_SLOT)..." |
| 247 | sudo docker rm -f "$OLD_CONTAINER" 2>/dev/null || true |
| 248 | |
| 249 | # ── Step 7: Restart the background worker ──────────────────────────────────── |
| 250 | |
| 251 | log "[7/7] Restarting background worker..." |
| 252 | sudo docker rm -f musehub-worker 2>/dev/null || true |
| 253 | sudo docker run -d \ |
| 254 | --name musehub-worker \ |
| 255 | --network musehub_musehub-internal \ |
| 256 | --env-file "$APP_DIR/.env" \ |
| 257 | -e SKIP_MIGRATIONS=1 \ |
| 258 | -v musehub_data:/data \ |
| 259 | --restart unless-stopped \ |
| 260 | --no-healthcheck \ |
| 261 | --log-driver awslogs \ |
| 262 | --log-opt awslogs-region=us-east-1 \ |
| 263 | --log-opt awslogs-group=/musehub/staging \ |
| 264 | --log-opt awslogs-stream=musehub-worker \ |
| 265 | --log-opt awslogs-create-group=true \ |
| 266 | "$FULL_IMAGE" python -m musehub.worker |
| 267 | log "Worker started." |
| 268 | |
| 269 | # ── Step 8: Prune old images (keep last 3) ─────────────────────────────────── |
| 270 | |
| 271 | log "[8/8] Pruning old images (keeping last 3)..." |
| 272 | KEEP_IMAGES=3 |
| 273 | OLD_IDS=$(sudo docker images "$ECR_IMAGE" --format "{{.ID}}" \ |
| 274 | | awk '!seen[$0]++' \ |
| 275 | | tail -n +$((KEEP_IMAGES + 1))) |
| 276 | if [ -n "$OLD_IDS" ]; then |
| 277 | echo "$OLD_IDS" | xargs sudo docker rmi -f 2>/dev/null || true |
| 278 | log "Image prune complete." |
| 279 | else |
| 280 | log "No old images to prune." |
| 281 | fi |
| 282 | |
| 283 | log "" |
| 284 | log "Deploy complete. Active slot: $NEW_SLOT (port $NEW_PORT)" |
| 285 | log "Image: $FULL_IMAGE" |
File History
1 commit
sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32
fix: fall back to DB ancestry check when mpack-only fast-fo…
Sonnet 4.6
patch
6 days ago