"""Section 7.4 — Logging & alerting tests. Covers: Structured JSON logs : JsonFormatter emits timestamp, level, logger, message, request_id, user_id; optional method/path/status/duration_ms. PII scrubbing : PiiFilter redacts Bearer tokens, passwords, secrets from message strings before they reach any handler. Access log middleware: Injects request_id + user_id into contextvars; emits per-request access line; logs /healthz at DEBUG; injects X-Request-Id response header. No PII in access log : Raw Authorization token never logged — only the MSign handle. CloudWatch script : Thresholds match checklist (5xx >1%, p99 >2 s, disk >80%, DB conns >90%); 30-day hot retention; SNS wired up. On-call runbook : docs/on-call-runbook.md exists and references gabriel. main.py wiring : configure_logging used (not basicConfig); AccessLogMiddleware registered. """ from __future__ import annotations import json import logging import re from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch import pytest from httpx import AsyncClient from musehub.types.json_types import JSONObject _ROOT = Path(__file__).resolve().parents[1] _MAIN_PY = _ROOT / "musehub" / "main.py" _LOGGING_CONFIG = _ROOT / "musehub" / "logging_config.py" _ACCESS_LOG_MW = _ROOT / "musehub" / "middleware" / "access_log.py" _CW_SCRIPT = _ROOT / "deploy" / "cloudwatch-alerts.sh" _ON_CALL_DOC = _ROOT / "docs" / "on-call-runbook.md" # ═══════════════════════════════════════════════════════════════════════════════ # JSON formatter # ═══════════════════════════════════════════════════════════════════════════════ class TestJsonFormatter: def _make_record(self, msg: str = "hello", level: int = logging.INFO) -> logging.LogRecord: record = logging.LogRecord( name="musehub.test", level=level, pathname="test.py", lineno=1, msg=msg, args=(), exc_info=None, ) return record def _format(self, record: logging.LogRecord) -> JSONObject: from musehub.logging_config import JsonFormatter return json.loads(JsonFormatter().format(record)) def test_has_timestamp(self) -> None: doc = self._format(self._make_record()) assert "timestamp" in doc # ISO-8601 UTC — ends with +00:00 assert "+00:00" in doc["timestamp"] or doc["timestamp"].endswith("Z") def test_has_level(self) -> None: doc = self._format(self._make_record(level=logging.WARNING)) assert doc["level"] == "WARNING" def test_has_logger(self) -> None: doc = self._format(self._make_record()) assert doc["logger"] == "musehub.test" def test_has_message(self) -> None: doc = self._format(self._make_record("hello world")) assert doc["message"] == "hello world" def test_has_request_id(self) -> None: from musehub.logging_config import request_id_var token = request_id_var.set("test-req-id-123") try: doc = self._format(self._make_record()) assert doc["request_id"] == "test-req-id-123" finally: request_id_var.reset(token) def test_has_user_id(self) -> None: from musehub.logging_config import user_id_var token = user_id_var.set("gabriel") try: doc = self._format(self._make_record()) assert doc["user_id"] == "gabriel" finally: user_id_var.reset(token) def test_request_id_defaults_to_empty_string(self) -> None: from musehub.logging_config import request_id_var # Reset to default. request_id_var.set("") doc = self._format(self._make_record()) assert doc["request_id"] == "" def test_access_fields_included_when_present(self) -> None: from musehub.logging_config import JsonFormatter record = self._make_record("GET /api/v1 200") record.__dict__.update( method="GET", path="/api/v1", status=200, duration_ms=12.3 ) doc = json.loads(JsonFormatter().format(record)) assert doc["method"] == "GET" assert doc["path"] == "/api/v1" assert doc["status"] == 200 assert doc["duration_ms"] == 12.3 def test_access_fields_absent_when_not_set(self) -> None: doc = self._format(self._make_record()) assert "method" not in doc assert "path" not in doc assert "status" not in doc assert "duration_ms" not in doc def test_exc_info_included(self) -> None: from musehub.logging_config import JsonFormatter try: raise ValueError("boom") except ValueError: import sys record = logging.LogRecord( name="musehub.test", level=logging.ERROR, pathname="test.py", lineno=1, msg="error", args=(), exc_info=sys.exc_info(), ) doc = json.loads(JsonFormatter().format(record)) assert "exc_info" in doc assert "ValueError" in doc["exc_info"] def test_output_is_valid_json(self) -> None: raw = self._format(self._make_record()) assert isinstance(raw, dict) # _format already parses — just check no exception # ═══════════════════════════════════════════════════════════════════════════════ # PII filter # ═══════════════════════════════════════════════════════════════════════════════ class TestPiiFilter: def _apply(self, msg: str) -> str: from musehub.logging_config import PiiFilter record = logging.LogRecord( name="test", level=logging.INFO, pathname="test.py", lineno=1, msg=msg, args=(), exc_info=None, ) f = PiiFilter() f.filter(record) return record.getMessage() def test_bearer_token_scrubbed(self) -> None: result = self._apply("Authorization: Bearer eyJhbGciOiJSUzI1NiJ9.payload.sig") assert "eyJhbGciOiJSUzI1NiJ9" not in result assert "Bearer ***" in result def test_short_bearer_not_scrubbed(self) -> None: # Strings < 8 chars after Bearer are unlikely to be real tokens. result = self._apply("Bearer short") # No crash — passes through assert result is not None def test_password_scrubbed_equals(self) -> None: result = self._apply("password=supersecret123") assert "supersecret123" not in result assert "password=" in result assert "***" in result def test_password_scrubbed_colon(self) -> None: result = self._apply("password: mysecret") assert "mysecret" not in result assert "***" in result def test_token_scrubbed(self) -> None: result = self._apply("token=abc123def456ghi") assert "abc123def456ghi" not in result assert "***" in result def test_secret_scrubbed(self) -> None: result = self._apply("secret=webhook_key_abc123") assert "webhook_key_abc123" not in result assert "***" in result def test_normal_message_unchanged(self) -> None: result = self._apply("User gabriel pushed 3 commits to dev") assert result == "User gabriel pushed 3 commits to dev" def test_args_expanded_and_scrubbed(self) -> None: from musehub.logging_config import PiiFilter record = logging.LogRecord( name="test", level=logging.INFO, pathname="test.py", lineno=1, msg="auth header: %s", args=("Bearer supersecrettoken123",), exc_info=None, ) f = PiiFilter() f.filter(record) result = record.getMessage() assert "supersecrettoken123" not in result def test_filter_returns_true(self) -> None: from musehub.logging_config import PiiFilter record = logging.LogRecord( name="test", level=logging.INFO, pathname="test.py", lineno=1, msg="ok", args=(), exc_info=None, ) assert PiiFilter().filter(record) is True # ═══════════════════════════════════════════════════════════════════════════════ # configure_logging # ═══════════════════════════════════════════════════════════════════════════════ class TestConfigureLogging: def test_removes_existing_handlers(self) -> None: from musehub.logging_config import configure_logging, JsonFormatter root = logging.getLogger() # Pre-install a plain handler to simulate basicConfig. root.addHandler(logging.StreamHandler()) configure_logging(debug=False) # After configure_logging there should be exactly one handler. assert len(root.handlers) == 1 assert isinstance(root.handlers[0].formatter, JsonFormatter) def test_debug_sets_debug_level(self) -> None: from musehub.logging_config import configure_logging configure_logging(debug=True) assert logging.getLogger().level == logging.DEBUG def test_non_debug_sets_info_level(self) -> None: from musehub.logging_config import configure_logging configure_logging(debug=False) assert logging.getLogger().level == logging.INFO def test_handler_has_pii_filter(self) -> None: from musehub.logging_config import configure_logging, PiiFilter configure_logging(debug=False) root = logging.getLogger() filters = [f for h in root.handlers for f in h.filters] assert any(isinstance(f, PiiFilter) for f in filters), ( "No PiiFilter found on root logger handlers" ) # ═══════════════════════════════════════════════════════════════════════════════ # AccessLogMiddleware # ═══════════════════════════════════════════════════════════════════════════════ class TestAccessLogMiddleware: async def test_request_id_injected_into_response_header(self, client: AsyncClient) -> None: resp = await client.get("/healthz") assert "x-request-id" in resp.headers, ( "X-Request-Id header missing — cannot correlate client errors with log entries" ) async def test_request_id_format(self, client: AsyncClient) -> None: resp = await client.get("/healthz") rid = resp.headers.get("x-request-id", "") assert re.match( r"^[0-9a-f]{32}$", rid, ), f"X-Request-Id has unexpected format: {rid!r}" async def test_different_requests_get_different_ids(self, client: AsyncClient) -> None: r1 = await client.get("/healthz") r2 = await client.get("/healthz") assert r1.headers["x-request-id"] != r2.headers["x-request-id"] async def test_healthz_logged_at_debug(self, client: AsyncClient) -> None: """Verify /healthz access records are emitted at DEBUG (not INFO).""" from musehub.middleware.access_log import AccessLogMiddleware import musehub.middleware.access_log as al_mod log_calls: list[tuple] = [] original_log = al_mod.logger.log def capture_log(level: int, *args: str, **kwargs: str) -> None: log_calls.append((level,) + args) return original_log(level, *args, **kwargs) with patch.object(al_mod.logger, "log", side_effect=capture_log): await client.get("/healthz") healthz_calls = [c for c in log_calls if c[0] == logging.DEBUG and "/healthz" in str(c)] assert healthz_calls, "/healthz request was not logged at DEBUG" async def test_non_healthz_logged_at_info(self, client: AsyncClient) -> None: """Verify normal routes are logged at INFO.""" import musehub.middleware.access_log as al_mod log_calls: list[tuple] = [] original_log = al_mod.logger.log def capture_log(level: int, *args: str, **kwargs: str) -> None: log_calls.append((level,) + args) return original_log(level, *args, **kwargs) with patch.object(al_mod.logger, "log", side_effect=capture_log): await client.get("/healthz") # one call to prime, then await client.get("/api/v1/repos") info_calls = [c for c in log_calls if c[0] == logging.INFO and "/api/v1/repos" in str(c)] assert info_calls, "/api/v1/repos was not logged at INFO" def test_msign_handle_extracted_no_raw_token_logged(self) -> None: """MSign Authorization header: only the handle is extracted, never the raw token.""" src = _ACCESS_LOG_MW.read_text() # The middleware must use _MSIGN_HANDLE_RE to extract the handle. assert "_MSIGN_HANDLE_RE" in src or "MSIGN_HANDLE" in src, ( "AccessLogMiddleware does not extract the MSign handle" ) # The raw authorization value must NOT be logged. assert "auth_header" not in src.split("user_id")[1].split("logger.log")[0] if "logger.log" in src else True, ( "Raw Authorization value may be logged" ) def test_raw_authorization_not_in_log_call(self) -> None: """The raw auth header value must never be passed to logger.log.""" src = _ACCESS_LOG_MW.read_text() # logger.log call must not reference auth_header or raw_val log_section = src[src.find("logger.log"):] assert "auth_header" not in log_section, ( "auth_header (raw token value) passed to logger.log" ) # ═══════════════════════════════════════════════════════════════════════════════ # main.py wiring # ═══════════════════════════════════════════════════════════════════════════════ class TestMainPyLoggingWiring: _src = _MAIN_PY.read_text() def test_configure_logging_used_not_basicconfig(self) -> None: """main.py must call configure_logging(), not logging.basicConfig().""" assert "configure_logging" in self._src, ( "main.py does not call configure_logging() from musehub.logging_config" ) # basicConfig must be gone assert "logging.basicConfig" not in self._src, ( "main.py still calls logging.basicConfig — replace with configure_logging()" ) def test_access_log_middleware_imported(self) -> None: assert "AccessLogMiddleware" in self._src, ( "AccessLogMiddleware not imported in main.py" ) def test_access_log_middleware_added(self) -> None: assert "add_middleware(AccessLogMiddleware)" in self._src, ( "AccessLogMiddleware not registered with app.add_middleware()" ) def test_access_log_middleware_is_outermost(self) -> None: """AccessLogMiddleware must be added AFTER CORSMiddleware (= outermost layer).""" cors_pos = self._src.find("add_middleware(\n CORSMiddleware") if cors_pos == -1: cors_pos = self._src.find("add_middleware(CORSMiddleware") access_pos = self._src.find("add_middleware(AccessLogMiddleware)") assert cors_pos != -1, "CORSMiddleware not found in main.py" assert access_pos != -1, "AccessLogMiddleware not found in main.py" assert access_pos > cors_pos, ( "AccessLogMiddleware is not outermost — it must be add_middleware'd AFTER CORSMiddleware" ) # ═══════════════════════════════════════════════════════════════════════════════ # CloudWatch alerts script # ═══════════════════════════════════════════════════════════════════════════════ class TestCloudWatchAlertsScript: _src = _CW_SCRIPT.read_text() def test_script_exists(self) -> None: assert _CW_SCRIPT.exists(), "deploy/cloudwatch-alerts.sh not found" def test_log_group_defined(self) -> None: assert "/musehub/app" in self._src, ( "CloudWatch log group /musehub/app not defined in cloudwatch-alerts.sh" ) def test_30_day_hot_retention(self) -> None: assert "30" in self._src and "retention" in self._src.lower(), ( "30-day hot retention not set in cloudwatch-alerts.sh" ) # The actual AWS CLI call assert "put-retention-policy" in self._src, ( "aws logs put-retention-policy not called in cloudwatch-alerts.sh" ) assert "retention-in-days" in self._src def test_5xx_alarm_threshold_1_percent(self) -> None: assert 'THRESHOLD_5XX_RATE="1"' in self._src or "THRESHOLD_5XX_RATE=1" in self._src, ( "5xx rate alarm threshold must be 1% — change THRESHOLD_5XX_RATE" ) def test_5xx_metric_filter_defined(self) -> None: assert "5xx" in self._src.lower() and "put-metric-filter" in self._src, ( "No metric filter for 5xx responses in cloudwatch-alerts.sh" ) def test_p99_latency_alarm_threshold_2000ms(self) -> None: assert 'THRESHOLD_P99_LATENCY_MS="2000"' in self._src or "2000" in self._src, ( "p99 latency alarm threshold must be 2000ms" ) assert "p99" in self._src.lower() or "P99" in self._src, ( "p99 statistic not used for latency alarm" ) def test_duration_metric_filter_defined(self) -> None: assert "duration_ms" in self._src or "RequestDuration" in self._src, ( "No metric filter for request duration in cloudwatch-alerts.sh" ) def test_disk_alarm_threshold_80_percent(self) -> None: assert 'THRESHOLD_DISK_PCT="80"' in self._src or "THRESHOLD_DISK_PCT=80" in self._src, ( "Disk alarm threshold must be 80%" ) assert "disk" in self._src.lower(), ( "No disk alarm in cloudwatch-alerts.sh" ) def test_db_connections_alarm_threshold_90_percent(self) -> None: assert 'THRESHOLD_DB_CONN_PCT="90"' in self._src or "THRESHOLD_DB_CONN_PCT=90" in self._src, ( "DB connections alarm threshold must be 90%" ) assert "DatabaseConnections" in self._src or "db_connections" in self._src.lower(), ( "No DB connections alarm in cloudwatch-alerts.sh" ) def test_sns_topic_created(self) -> None: assert "sns create-topic" in self._src, ( "SNS topic not created in cloudwatch-alerts.sh" ) def test_sns_sms_subscription(self) -> None: assert "protocol sms" in self._src or "--protocol sms" in self._src, ( "No SMS subscription in cloudwatch-alerts.sh" ) def test_sns_email_subscription(self) -> None: assert "protocol email" in self._src or "--protocol email" in self._src, ( "No email subscription in cloudwatch-alerts.sh" ) def test_alarm_actions_use_sns(self) -> None: assert "alarm-actions" in self._src and "SNS_ARN" in self._src, ( "CloudWatch alarms must route to the SNS topic" ) # ═══════════════════════════════════════════════════════════════════════════════ # On-call runbook # ═══════════════════════════════════════════════════════════════════════════════ class TestOnCallRunbook: _src = _ON_CALL_DOC.read_text() def test_runbook_exists(self) -> None: assert _ON_CALL_DOC.exists(), "docs/on-call-runbook.md not found" def test_references_gabriel(self) -> None: assert "gabriel" in self._src.lower(), ( "On-call runbook does not name gabriel as the on-call contact" ) def test_references_sms_or_phone(self) -> None: assert "sms" in self._src.lower() or "phone" in self._src.lower(), ( "On-call runbook does not mention SMS / phone alerting" ) def test_references_cloudwatch_log_group(self) -> None: assert "/musehub/app" in self._src, ( "On-call runbook does not reference CloudWatch log group /musehub/app" ) def test_documents_cold_log_retention(self) -> None: assert "365" in self._src or "1-year" in self._src or "one year" in self._src.lower(), ( "On-call runbook does not document 1-year cold log retention" ) def test_all_four_alarms_documented(self) -> None: for alarm in ("5xx", "p99", "disk", "connections"): assert alarm.lower() in self._src.lower(), ( f"On-call runbook does not document the '{alarm}' alarm" )