gabriel / musehub public
test_deployment.py python
318 lines 15.0 KB
Raw
sha256:25d96102cb2d69a038356dff26f4633156da2f1faf98fe0d0e4438ff3f367f12 refactor: rename 0054/0055 migrations to standard convention Sonnet 4.6 minor ⚠ breaking 21 days ago
1 """Section 7.3 — Deployment readiness tests.
2
3 Covers:
4 Zero-downtime : deploy.sh is blue-green (two slots, nginx flip); health
5 check URLs point to /healthz not a UI page.
6 /healthz : returns 200 when DB + storage healthy; 503 with JSON body
7 when either is down; exempt from auth; fast.
8 Non-root user : Dockerfile uses USER instruction (non-root); container
9 runs as the 'musehub' system user.
10 Read-only FS : docker-compose.yml sets read_only: true on musehub service;
11 /tmp is mounted as tmpfs; /data is a named volume.
12 Resource limits: CPU and memory limits set on musehub, postgres, runner.
13 """
14 from __future__ import annotations
15
16 import re
17 from pathlib import Path
18 from unittest.mock import AsyncMock, MagicMock, patch
19
20 import pytest
21 from httpx import AsyncClient
22
23 _ROOT = Path(__file__).resolve().parents[1]
24 _DOCKERFILE = _ROOT / "Dockerfile"
25 _COMPOSE = _ROOT / "docker-compose.yml"
26 _DEPLOY_SH = _ROOT / "deploy" / "deploy.sh"
27
28
29 # ═══════════════════════════════════════════════════════════════════════════════
30 # Zero-downtime deploy
31 # ═══════════════════════════════════════════════════════════════════════════════
32
33 class TestZeroDowntimeDeploy:
34 _src = _DEPLOY_SH.read_text()
35
36 def test_two_slots_defined(self) -> None:
37 """deploy.sh must define both blue and green slots."""
38 assert "blue" in self._src and "green" in self._src
39
40 def test_nginx_flip_present(self) -> None:
41 """deploy.sh must reload nginx after health check passes (atomic flip)."""
42 assert "nginx -s reload" in self._src or "nginx_point_to" in self._src
43
44 def test_health_check_before_nginx_flip(self) -> None:
45 """Health check must happen before the nginx flip — never flip a sick slot."""
46 src = self._src
47 health_pos = src.find("health_check")
48 nginx_pos = src.find("nginx_point_to")
49 assert health_pos != -1, "health_check function not found in deploy.sh"
50 assert nginx_pos != -1, "nginx_point_to not found in deploy.sh"
51 assert health_pos < nginx_pos, (
52 "nginx flip happens before health check — would route to an unhealthy slot"
53 )
54
55 def test_health_urls_point_to_healthz(self) -> None:
56 """deploy.sh must use /healthz not a UI page as the readiness signal."""
57 assert "/healthz" in self._src, (
58 "deploy.sh does not use /healthz — health check may pass even when "
59 "DB or storage is down (UI pages don't probe dependencies)"
60 )
61 assert "/explore" not in self._src, (
62 "deploy.sh still references /explore as a health URL — update to /healthz"
63 )
64
65 def test_old_slot_stopped_after_flip(self) -> None:
66 """deploy.sh must stop the old slot after the nginx flip to free resources."""
67 src = self._src
68 nginx_pos = src.find("nginx_point_to")
69 stop_pos = src.find("docker rm -f", nginx_pos)
70 assert stop_pos != -1, (
71 "deploy.sh does not stop the old slot after the nginx flip"
72 )
73
74 def test_dockerfile_healthcheck_uses_healthz(self) -> None:
75 """Dockerfile HEALTHCHECK must probe /healthz."""
76 src = _DOCKERFILE.read_text()
77 hc_lines = [l for l in src.splitlines() if "HEALTHCHECK" in l or l.strip().startswith("CMD")]
78 combined = " ".join(hc_lines)
79 assert "/healthz" in combined, (
80 "Dockerfile HEALTHCHECK does not probe /healthz — "
81 "docker will report 'healthy' even when DB is down"
82 )
83
84
85 # ═══════════════════════════════════════════════════════════════════════════════
86 # /healthz endpoint
87 # ═══════════════════════════════════════════════════════════════════════════════
88
89 class TestHealthzEndpoint:
90 async def test_healthz_returns_200_when_healthy(self, client: AsyncClient) -> None:
91 """GET /healthz must return 200 when DB and storage are reachable."""
92 resp = await client.get("/healthz")
93 assert resp.status_code == 200
94 body = resp.json()
95 assert body["status"] == "ok"
96 assert body["db"] is True
97 assert body["storage"] is True
98
99 async def test_healthz_returns_json(self, client: AsyncClient) -> None:
100 resp = await client.get("/healthz")
101 assert resp.headers["content-type"].startswith("application/json")
102
103 async def test_healthz_no_auth_required(self, client: AsyncClient) -> None:
104 """Healthz must be reachable without any Authorization header."""
105 resp = await client.get("/healthz")
106 # Must not be 401 or 403
107 assert resp.status_code not in (401, 403), (
108 f"/healthz returned {resp.status_code} — health check must be unauthenticated"
109 )
110
111 async def test_healthz_503_when_db_down(self, client: AsyncClient) -> None:
112 """GET /healthz must return 503 when the DB probe fails."""
113 from sqlalchemy.exc import OperationalError
114
115 # Patch the DB execute to simulate a broken connection
116 with patch(
117 "musehub.main.AsyncSession.execute",
118 new_callable=AsyncMock,
119 side_effect=OperationalError("connection refused", None, None),
120 ):
121 resp = await client.get("/healthz")
122
123 assert resp.status_code == 503
124 body = resp.json()
125 assert body["status"] == "unhealthy"
126 assert body["db"] is False
127
128 async def test_healthz_503_when_storage_down(self, client: AsyncClient) -> None:
129 """GET /healthz must return 503 when the storage probe fails."""
130 from musehub.storage.backends import BlobBackend
131
132 # Backend pointing at an unreachable endpoint — head_bucket will fail.
133 bad_backend = BlobBackend(
134 bucket="muse-objects",
135 endpoint_url="http://127.0.0.1:19999", # nothing listening here
136 access_key_id="x",
137 secret_access_key="x",
138 region="us-east-1",
139 )
140
141 with patch("musehub.storage.backends.get_backend", return_value=bad_backend):
142 resp = await client.get("/healthz")
143
144 assert resp.status_code == 503
145 body = resp.json()
146 assert body["status"] == "unhealthy"
147 assert body["storage"] is False
148
149 async def test_healthz_body_has_db_and_storage_keys(self, client: AsyncClient) -> None:
150 """Response body must expose both db and storage status for monitoring."""
151 resp = await client.get("/healthz")
152 body = resp.json()
153 assert "db" in body, "healthz response missing 'db' key"
154 assert "storage" in body, "healthz response missing 'storage' key"
155
156 async def test_healthz_fast(self, client: AsyncClient) -> None:
157 """Healthz must respond in under 2 s (load balancer timeout is typically 5 s)."""
158 import time
159 start = time.monotonic()
160 await client.get("/healthz")
161 elapsed = time.monotonic() - start
162 assert elapsed < 2.0, f"/healthz took {elapsed:.2f}s — too slow for a probe"
163
164 def test_healthz_route_registered(self) -> None:
165 """The /healthz route must be registered in the FastAPI app."""
166 from musehub.main import app
167 paths = [route.path for route in app.routes]
168 assert "/healthz" in paths, "/healthz route not registered in app"
169
170
171 # ═══════════════════════════════════════════════════════════════════════════════
172 # Non-root container user
173 # ═══════════════════════════════════════════════════════════════════════════════
174
175 class TestNonRootUser:
176 _src = _DOCKERFILE.read_text()
177
178 def test_dockerfile_has_user_instruction(self) -> None:
179 """Dockerfile must have a USER instruction."""
180 user_lines = [l.strip() for l in self._src.splitlines()
181 if l.strip().upper().startswith("USER ")]
182 assert user_lines, "Dockerfile has no USER instruction — container runs as root"
183
184 def test_dockerfile_user_is_not_root(self) -> None:
185 """Dockerfile USER must not be root or UID 0."""
186 user_lines = [l.strip() for l in self._src.splitlines()
187 if l.strip().upper().startswith("USER ")]
188 for line in user_lines:
189 user = line.split()[1].lower()
190 assert user not in ("root", "0"), (
191 f"Dockerfile sets USER to {user!r} — container must run as non-root"
192 )
193
194 def test_dockerfile_creates_system_user(self) -> None:
195 """Dockerfile must create a dedicated system user (groupadd + useradd)."""
196 assert "groupadd" in self._src and "useradd" in self._src, (
197 "Dockerfile does not create a dedicated system user"
198 )
199
200 def test_dockerfile_user_applied_after_installs(self) -> None:
201 """USER instruction must come after RUN pip install (installs need root)."""
202 lines = self._src.splitlines()
203 user_idx = next(
204 (i for i, l in enumerate(lines) if l.strip().upper().startswith("USER ")), None
205 )
206 pip_idx = max(
207 (i for i, l in enumerate(lines) if "pip install" in l), default=None
208 )
209 assert user_idx is not None and pip_idx is not None
210 assert user_idx > pip_idx, (
211 "USER instruction appears before pip install — "
212 "package installation would fail without root"
213 )
214
215
216 # ═══════════════════════════════════════════════════════════════════════════════
217 # Read-only filesystem
218 # ═══════════════════════════════════════════════════════════════════════════════
219
220 class TestReadOnlyFilesystem:
221 def _parse_yaml(self) -> None:
222 import yaml
223 return yaml.safe_load(_COMPOSE.read_text())
224
225 def test_musehub_service_read_only(self) -> None:
226 """musehub service must have read_only: true."""
227 src = _COMPOSE.read_text()
228 # Structural check: read_only appears in the musehub service block
229 # Find musehub service block (between 'musehub:' and the next top-level key)
230 in_musehub = False
231 for line in src.splitlines():
232 if re.match(r'^ musehub:', line):
233 in_musehub = True
234 elif re.match(r'^ \w', line) and in_musehub:
235 in_musehub = False
236 if in_musehub and "read_only: true" in line:
237 return
238 pytest.fail(
239 "musehub service in docker-compose.yml does not have read_only: true"
240 )
241
242 def test_tmp_is_tmpfs_or_volume(self) -> None:
243 """/tmp must be writable (tmpfs or volume) so uvicorn can write temp files."""
244 src = _COMPOSE.read_text()
245 assert "tmpfs" in src or "/tmp" in src, (
246 "No tmpfs mount for /tmp — uvicorn and Python will fail to write temp files "
247 "when the root filesystem is read-only"
248 )
249
250 def test_data_volume_is_explicit(self) -> None:
251 """/data object store must be an explicit named volume (not read-only)."""
252 src = _COMPOSE.read_text()
253 assert "musehub_data:/data" in src, (
254 "/data is not mounted as an explicit volume — objects cannot be written "
255 "when the root filesystem is read-only"
256 )
257
258
259 # ═══════════════════════════════════════════════════════════════════════════════
260 # Resource limits
261 # ═══════════════════════════════════════════════════════════════════════════════
262
263 class TestResourceLimits:
264 _src = _COMPOSE.read_text()
265
266 def _service_limits(self, service_name: str) -> str:
267 """Extract the text block for a given service."""
268 lines = self._src.splitlines()
269 in_service = False
270 block_lines = []
271 for line in lines:
272 if re.match(rf'^ {re.escape(service_name)}:', line):
273 in_service = True
274 elif re.match(r'^ \w', line) and in_service:
275 break
276 if in_service:
277 block_lines.append(line)
278 return "\n".join(block_lines)
279
280 def test_musehub_has_cpu_limit(self) -> None:
281 block = self._service_limits("musehub")
282 assert "cpus:" in block, "musehub service has no CPU limit"
283
284 def test_musehub_has_memory_limit(self) -> None:
285 block = self._service_limits("musehub")
286 assert "memory:" in block, "musehub service has no memory limit"
287
288 def test_musehub_memory_limit_sane(self) -> None:
289 """musehub memory limit must be ≥ 256 MiB (app needs headroom)."""
290 block = self._service_limits("musehub")
291 m = re.search(r'memory:\s*(\d+)([MmGg])', block)
292 if m:
293 amount = int(m.group(1))
294 unit = m.group(2).upper()
295 mb = amount * 1024 if unit == "G" else amount
296 assert mb >= 256, f"musehub memory limit {mb}M is below 256M minimum"
297
298 def test_postgres_has_cpu_limit(self) -> None:
299 block = self._service_limits("postgres")
300 assert "cpus:" in block, "postgres service has no CPU limit"
301
302 def test_postgres_has_memory_limit(self) -> None:
303 block = self._service_limits("postgres")
304 assert "memory:" in block, "postgres service has no memory limit"
305
306 def test_runner_has_cpu_limit(self) -> None:
307 block = self._service_limits("musehub-runner")
308 assert "cpus:" in block, "musehub-runner service has no CPU limit"
309
310 def test_runner_has_memory_limit(self) -> None:
311 block = self._service_limits("musehub-runner")
312 assert "memory:" in block, "musehub-runner service has no memory limit"
313
314 def test_all_services_have_deploy_block(self) -> None:
315 """All three services must have a deploy: block (where limits live)."""
316 for svc in ("musehub", "postgres", "musehub-runner"):
317 block = self._service_limits(svc)
318 assert "deploy:" in block, f"{svc} service has no deploy: block"
File History 2 commits
sha256:25d96102cb2d69a038356dff26f4633156da2f1faf98fe0d0e4438ff3f367f12 refactor: rename 0054/0055 migrations to standard convention Sonnet 4.6 minor 21 days ago
sha256:4aed3d8601c8dd3ed37074de35f11f4a9699a0a4b99d43727048fd3f8e6fd13d chore: doc sweep, ignore wrangler build state, misc fixes Sonnet 4.6 minor 23 days ago