"""TDD — ORM models and alembic migrations must stay in sync. Problem ------- Migrations run locally when the dev server starts. They run on staging only at deploy time. If someone adds a column to the ORM model and forgets to write a migration, the local create_all-based test DB matches the model but staging (migration-based) does not. The result is exactly the class of drift that caused the created_at NOT NULL failure in production. Fix --- Two automated gates: S1 The alembic migration chain is unbroken — every revision has a parent and they form a single DAG from the initial revision to head. A broken chain means a migration was merged without updating down_revision. S2 Applying all alembic migrations to a fresh database produces a schema that matches Base.metadata exactly (zero compare_metadata diff). If an ORM model change has no corresponding migration, S2 fails loudly. How S2 works ------------ 1. Creates a disposable PostgreSQL database: musehub_parity_test. 2. Applies all migrations via alembic upgrade head. 3. Runs alembic's compare_metadata() against that schema. 4. Asserts the diff list is empty. 5. Drops the parity database. This test must run against a real PostgreSQL instance. It is skipped if the admin connection to postgres fails (e.g. in environments without Postgres). Ignored diff types ------------------ compare_metadata sometimes reports benign differences that reflect alembic internals rather than real model drift: - The alembic_version table itself. - server_default expression string differences when the SQL text is semantically identical (e.g. "now()" vs "CURRENT_TIMESTAMP"). These are filtered before the assertion. """ from __future__ import annotations import os from pathlib import Path import pytest # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- _REPO_ROOT = Path(__file__).parent.parent _TEST_DB_URL = os.environ.get( "TEST_DATABASE_URL", "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub_test", ) _PARITY_DBNAME = "musehub_parity_test" # Derive URLs for admin connection (postgres DB) and parity DB. _BASE_URL = _TEST_DB_URL.replace("+asyncpg", "").rsplit("/", 1)[0] _ADMIN_URL = f"{_BASE_URL}/postgres" _PARITY_URL_SYNC = f"{_BASE_URL}/{_PARITY_DBNAME}" _PARITY_URL_ASYNC = f"{_TEST_DB_URL.rsplit('/', 1)[0]}/{_PARITY_DBNAME}" def _is_benign_diff(diff_item: tuple[str, ...] | list[tuple[str, ...]]) -> bool: """Return True for known-harmless compare_metadata entries to filter out.""" try: # alembic wraps some diff items in an extra list, e.g. [('modify_comment', ...)] item = diff_item[0] if isinstance(diff_item, list) and len(diff_item) == 1 and isinstance(diff_item[0], tuple) else diff_item diff_type = item[0] if isinstance(item, (list, tuple)) else None if diff_type == "add_table": table = item[1] if hasattr(table, "name") and table.name == "alembic_version": return True if diff_type == "modify_comment": # Column comments are cosmetic metadata — they don't affect behaviour # and may be added directly in SQL without a formal migration. return True if diff_type == "modify_default": # server_default text differences that are semantically identical # (e.g. alembic normalises "now()" differently across pg versions) existing = str(item[3]) if len(item) > 3 else "" generated = str(item[4]) if len(item) > 4 else "" _NOW_VARIANTS = {"now()", "CURRENT_TIMESTAMP", "current_timestamp"} if existing in _NOW_VARIANTS and generated in _NOW_VARIANTS: return True except (IndexError, TypeError): pass return False # --------------------------------------------------------------------------- # S1 — migration chain is unbroken # --------------------------------------------------------------------------- def test_s1_migration_chain_is_unbroken() -> None: """Every alembic revision must have a resolvable parent forming a single DAG. A broken chain (e.g. two revisions claiming the same down_revision) causes alembic upgrade to fail with a confusing error and staging deploys to halt. This test catches the break before it ships. """ from alembic.config import Config from alembic.script import ScriptDirectory cfg = Config(str(_REPO_ROOT / "alembic.ini")) scripts = ScriptDirectory.from_config(cfg) # Collect all revisions and verify the chain can be walked to head. # walk_revisions() raises if there are gaps or multiple heads (unless # explicit merge points exist). revisions = list(scripts.walk_revisions()) assert len(revisions) > 0, "No alembic revisions found — run alembic init" # Verify there is exactly one head (or explicit merge point at the top). heads = scripts.get_heads() assert len(heads) == 1, ( f"Multiple alembic heads found: {heads}.\n" "Create a merge migration: alembic merge -m 'merge heads' " ) # --------------------------------------------------------------------------- # S2 — migration-applied schema matches ORM models (zero compare_metadata diff) # --------------------------------------------------------------------------- @pytest.fixture(scope="module") def parity_db_url() -> str: # type: ignore[return] """Create a fresh DB, apply all migrations, yield its sync URL, then drop it.""" try: import psycopg2 # type: ignore[import] except ImportError: pytest.skip("psycopg2 not available — skipping schema parity test") # Create the parity database. try: admin_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5) except Exception as exc: pytest.skip(f"Cannot connect to postgres admin DB ({exc}) — skipping S2") admin_conn.autocommit = True cur = admin_conn.cursor() # Terminate any stale connections then drop + recreate. cur.execute( "SELECT pg_terminate_backend(pid) FROM pg_stat_activity " f"WHERE datname = '{_PARITY_DBNAME}'" ) cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}") cur.execute(f"CREATE DATABASE {_PARITY_DBNAME}") cur.close() admin_conn.close() # Apply all migrations. from alembic.config import Config from alembic import command as alembic_command import musehub.config as _mhconfig original_db_url = _mhconfig.settings.database_url _mhconfig.settings.database_url = _PARITY_URL_ASYNC try: cfg = Config(str(_REPO_ROOT / "alembic.ini")) # Also set it on the config object in case env.py reads from there. cfg.set_main_option("sqlalchemy.url", _PARITY_URL_SYNC) alembic_command.upgrade(cfg, "head") finally: _mhconfig.settings.database_url = original_db_url yield _PARITY_URL_SYNC # Teardown: drop the parity database. try: teardown_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5) teardown_conn.autocommit = True td_cur = teardown_conn.cursor() td_cur.execute( "SELECT pg_terminate_backend(pid) FROM pg_stat_activity " f"WHERE datname = '{_PARITY_DBNAME}'" ) td_cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}") td_cur.close() teardown_conn.close() except Exception: pass # Best effort — don't fail the test suite on teardown def test_s3_alembic_check_exits_clean(parity_db_url: str) -> None: """alembic check must exit 0 (no autogenerate diffs) against a fresh migration-applied DB. This is the exact gate the deploy script uses: alembic upgrade head && alembic check. A failure here means the ORM has a change with no corresponding migration — the deploy would fail on its alembic check step. """ from alembic.config import Config from alembic import command as alembic_command from alembic.util.exc import AutogenerateDiffsDetected import musehub.config as _mhconfig original_db_url = _mhconfig.settings.database_url _mhconfig.settings.database_url = _PARITY_URL_ASYNC try: cfg = Config(str(_REPO_ROOT / "alembic.ini")) cfg.set_main_option("sqlalchemy.url", parity_db_url) try: alembic_command.check(cfg) except AutogenerateDiffsDetected as exc: pytest.fail( "alembic check detected ORM↔migration drift — add a migration:\n" + str(exc) ) finally: _mhconfig.settings.database_url = original_db_url def test_s2_orm_models_match_migrations(parity_db_url: str) -> None: """Applying all alembic migrations to a fresh DB must produce zero drift from Base.metadata. Fails if an ORM model has a column, index, or constraint that no migration creates. This is the automated equivalent of running: alembic upgrade head && alembic check against a clean database. """ from alembic.autogenerate import compare_metadata from alembic.runtime.migration import MigrationContext from sqlalchemy import create_engine import musehub.db.musehub_repo_models # noqa: F401 import musehub.db.musehub_social_models # noqa: F401 import musehub.db.musehub_release_models # noqa: F401 import musehub.db.musehub_webhook_models # noqa: F401 import musehub.db.musehub_intel_models # noqa: F401 import musehub.db.musehub_identity_models # noqa: F401 import musehub.db.musehub_abuse_models # noqa: F401 import musehub.db.musehub_jobs_models # noqa: F401 import musehub.db.muse_cli_models # noqa: F401 — register CLI models from musehub.db.database import Base engine = create_engine(parity_db_url, connect_args={"connect_timeout": 10}) try: with engine.connect() as conn: ctx = MigrationContext.configure(conn, opts={"compare_type": True}) raw_diff = compare_metadata(ctx, Base.metadata) finally: engine.dispose() meaningful_diff = [item for item in raw_diff if not _is_benign_diff(item)] assert meaningful_diff == [], ( "Schema drift detected: ORM models have changes not captured in migrations.\n\n" "Diff items:\n" + "\n".join(f" {item}" for item in meaningful_diff) + "\n\n" "Fix: run alembic revision --autogenerate -m 'describe the change' " "and commit the generated migration file." )