test_schema_parity.py
python
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠ breaking
20 days ago
| 1 | """TDD — ORM models and alembic migrations must stay in sync. |
| 2 | |
| 3 | Problem |
| 4 | ------- |
| 5 | Migrations run locally when the dev server starts. They run on staging only |
| 6 | at deploy time. If someone adds a column to the ORM model and forgets to |
| 7 | write a migration, the local create_all-based test DB matches the model but |
| 8 | staging (migration-based) does not. The result is exactly the class of drift |
| 9 | that caused the created_at NOT NULL failure in production. |
| 10 | |
| 11 | Fix |
| 12 | --- |
| 13 | Two automated gates: |
| 14 | |
| 15 | S1 The alembic migration chain is unbroken — every revision has a parent |
| 16 | and they form a single DAG from the initial revision to head. A broken |
| 17 | chain means a migration was merged without updating down_revision. |
| 18 | |
| 19 | S2 Applying all alembic migrations to a fresh database produces a schema |
| 20 | that matches Base.metadata exactly (zero compare_metadata diff). If an |
| 21 | ORM model change has no corresponding migration, S2 fails loudly. |
| 22 | |
| 23 | How S2 works |
| 24 | ------------ |
| 25 | 1. Creates a disposable PostgreSQL database: musehub_parity_test. |
| 26 | 2. Applies all migrations via alembic upgrade head. |
| 27 | 3. Runs alembic's compare_metadata() against that schema. |
| 28 | 4. Asserts the diff list is empty. |
| 29 | 5. Drops the parity database. |
| 30 | |
| 31 | This test must run against a real PostgreSQL instance. It is skipped if the |
| 32 | admin connection to postgres fails (e.g. in environments without Postgres). |
| 33 | |
| 34 | Ignored diff types |
| 35 | ------------------ |
| 36 | compare_metadata sometimes reports benign differences that reflect alembic |
| 37 | internals rather than real model drift: |
| 38 | - The alembic_version table itself. |
| 39 | - server_default expression string differences when the SQL text is |
| 40 | semantically identical (e.g. "now()" vs "CURRENT_TIMESTAMP"). |
| 41 | |
| 42 | These are filtered before the assertion. |
| 43 | """ |
| 44 | from __future__ import annotations |
| 45 | |
| 46 | import os |
| 47 | from pathlib import Path |
| 48 | |
| 49 | import pytest |
| 50 | |
| 51 | |
| 52 | # --------------------------------------------------------------------------- |
| 53 | # Helpers |
| 54 | # --------------------------------------------------------------------------- |
| 55 | |
| 56 | _REPO_ROOT = Path(__file__).parent.parent |
| 57 | _TEST_DB_URL = os.environ.get( |
| 58 | "TEST_DATABASE_URL", |
| 59 | "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub_test", |
| 60 | ) |
| 61 | _PARITY_DBNAME = "musehub_parity_test" |
| 62 | |
| 63 | # Derive URLs for admin connection (postgres DB) and parity DB. |
| 64 | _BASE_URL = _TEST_DB_URL.replace("+asyncpg", "").rsplit("/", 1)[0] |
| 65 | _ADMIN_URL = f"{_BASE_URL}/postgres" |
| 66 | _PARITY_URL_SYNC = f"{_BASE_URL}/{_PARITY_DBNAME}" |
| 67 | _PARITY_URL_ASYNC = f"{_TEST_DB_URL.rsplit('/', 1)[0]}/{_PARITY_DBNAME}" |
| 68 | |
| 69 | |
| 70 | def _is_benign_diff(diff_item: tuple[str, ...] | list[tuple[str, ...]]) -> bool: |
| 71 | """Return True for known-harmless compare_metadata entries to filter out.""" |
| 72 | try: |
| 73 | # alembic wraps some diff items in an extra list, e.g. [('modify_comment', ...)] |
| 74 | item = diff_item[0] if isinstance(diff_item, list) and len(diff_item) == 1 and isinstance(diff_item[0], tuple) else diff_item |
| 75 | diff_type = item[0] if isinstance(item, (list, tuple)) else None |
| 76 | if diff_type == "add_table": |
| 77 | table = item[1] |
| 78 | if hasattr(table, "name") and table.name == "alembic_version": |
| 79 | return True |
| 80 | if diff_type == "modify_comment": |
| 81 | # Column comments are cosmetic metadata — they don't affect behaviour |
| 82 | # and may be added directly in SQL without a formal migration. |
| 83 | return True |
| 84 | if diff_type == "modify_default": |
| 85 | # server_default text differences that are semantically identical |
| 86 | # (e.g. alembic normalises "now()" differently across pg versions) |
| 87 | existing = str(item[3]) if len(item) > 3 else "" |
| 88 | generated = str(item[4]) if len(item) > 4 else "" |
| 89 | _NOW_VARIANTS = {"now()", "CURRENT_TIMESTAMP", "current_timestamp"} |
| 90 | if existing in _NOW_VARIANTS and generated in _NOW_VARIANTS: |
| 91 | return True |
| 92 | except (IndexError, TypeError): |
| 93 | pass |
| 94 | return False |
| 95 | |
| 96 | |
| 97 | # --------------------------------------------------------------------------- |
| 98 | # S1 — migration chain is unbroken |
| 99 | # --------------------------------------------------------------------------- |
| 100 | |
| 101 | def test_s1_migration_chain_is_unbroken() -> None: |
| 102 | """Every alembic revision must have a resolvable parent forming a single DAG. |
| 103 | |
| 104 | A broken chain (e.g. two revisions claiming the same down_revision) causes |
| 105 | alembic upgrade to fail with a confusing error and staging deploys to halt. |
| 106 | This test catches the break before it ships. |
| 107 | """ |
| 108 | from alembic.config import Config |
| 109 | from alembic.script import ScriptDirectory |
| 110 | |
| 111 | cfg = Config(str(_REPO_ROOT / "alembic.ini")) |
| 112 | scripts = ScriptDirectory.from_config(cfg) |
| 113 | |
| 114 | # Collect all revisions and verify the chain can be walked to head. |
| 115 | # walk_revisions() raises if there are gaps or multiple heads (unless |
| 116 | # explicit merge points exist). |
| 117 | revisions = list(scripts.walk_revisions()) |
| 118 | assert len(revisions) > 0, "No alembic revisions found — run alembic init" |
| 119 | |
| 120 | # Verify there is exactly one head (or explicit merge point at the top). |
| 121 | heads = scripts.get_heads() |
| 122 | assert len(heads) == 1, ( |
| 123 | f"Multiple alembic heads found: {heads}.\n" |
| 124 | "Create a merge migration: alembic merge -m 'merge heads' <rev1> <rev2>" |
| 125 | ) |
| 126 | |
| 127 | |
| 128 | # --------------------------------------------------------------------------- |
| 129 | # S2 — migration-applied schema matches ORM models (zero compare_metadata diff) |
| 130 | # --------------------------------------------------------------------------- |
| 131 | |
| 132 | @pytest.fixture(scope="module") |
| 133 | def parity_db_url() -> str: # type: ignore[return] |
| 134 | """Create a fresh DB, apply all migrations, yield its sync URL, then drop it.""" |
| 135 | try: |
| 136 | import psycopg2 # type: ignore[import] |
| 137 | except ImportError: |
| 138 | pytest.skip("psycopg2 not available — skipping schema parity test") |
| 139 | |
| 140 | # Create the parity database. |
| 141 | try: |
| 142 | admin_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5) |
| 143 | except Exception as exc: |
| 144 | pytest.skip(f"Cannot connect to postgres admin DB ({exc}) — skipping S2") |
| 145 | |
| 146 | admin_conn.autocommit = True |
| 147 | cur = admin_conn.cursor() |
| 148 | # Terminate any stale connections then drop + recreate. |
| 149 | cur.execute( |
| 150 | "SELECT pg_terminate_backend(pid) FROM pg_stat_activity " |
| 151 | f"WHERE datname = '{_PARITY_DBNAME}'" |
| 152 | ) |
| 153 | cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}") |
| 154 | cur.execute(f"CREATE DATABASE {_PARITY_DBNAME}") |
| 155 | cur.close() |
| 156 | admin_conn.close() |
| 157 | |
| 158 | # Apply all migrations. |
| 159 | from alembic.config import Config |
| 160 | from alembic import command as alembic_command |
| 161 | import musehub.config as _mhconfig |
| 162 | |
| 163 | original_db_url = _mhconfig.settings.database_url |
| 164 | _mhconfig.settings.database_url = _PARITY_URL_ASYNC |
| 165 | try: |
| 166 | cfg = Config(str(_REPO_ROOT / "alembic.ini")) |
| 167 | # Also set it on the config object in case env.py reads from there. |
| 168 | cfg.set_main_option("sqlalchemy.url", _PARITY_URL_SYNC) |
| 169 | alembic_command.upgrade(cfg, "head") |
| 170 | finally: |
| 171 | _mhconfig.settings.database_url = original_db_url |
| 172 | |
| 173 | yield _PARITY_URL_SYNC |
| 174 | |
| 175 | # Teardown: drop the parity database. |
| 176 | try: |
| 177 | teardown_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5) |
| 178 | teardown_conn.autocommit = True |
| 179 | td_cur = teardown_conn.cursor() |
| 180 | td_cur.execute( |
| 181 | "SELECT pg_terminate_backend(pid) FROM pg_stat_activity " |
| 182 | f"WHERE datname = '{_PARITY_DBNAME}'" |
| 183 | ) |
| 184 | td_cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}") |
| 185 | td_cur.close() |
| 186 | teardown_conn.close() |
| 187 | except Exception: |
| 188 | pass # Best effort — don't fail the test suite on teardown |
| 189 | |
| 190 | |
| 191 | def test_s3_alembic_check_exits_clean(parity_db_url: str) -> None: |
| 192 | """alembic check must exit 0 (no autogenerate diffs) against a fresh migration-applied DB. |
| 193 | |
| 194 | This is the exact gate the deploy script uses: alembic upgrade head && alembic check. |
| 195 | A failure here means the ORM has a change with no corresponding migration — the deploy |
| 196 | would fail on its alembic check step. |
| 197 | """ |
| 198 | from alembic.config import Config |
| 199 | from alembic import command as alembic_command |
| 200 | from alembic.util.exc import AutogenerateDiffsDetected |
| 201 | import musehub.config as _mhconfig |
| 202 | |
| 203 | original_db_url = _mhconfig.settings.database_url |
| 204 | _mhconfig.settings.database_url = _PARITY_URL_ASYNC |
| 205 | try: |
| 206 | cfg = Config(str(_REPO_ROOT / "alembic.ini")) |
| 207 | cfg.set_main_option("sqlalchemy.url", parity_db_url) |
| 208 | try: |
| 209 | alembic_command.check(cfg) |
| 210 | except AutogenerateDiffsDetected as exc: |
| 211 | pytest.fail( |
| 212 | "alembic check detected ORM↔migration drift — add a migration:\n" |
| 213 | + str(exc) |
| 214 | ) |
| 215 | finally: |
| 216 | _mhconfig.settings.database_url = original_db_url |
| 217 | |
| 218 | |
| 219 | def test_s2_orm_models_match_migrations(parity_db_url: str) -> None: |
| 220 | """Applying all alembic migrations to a fresh DB must produce zero drift |
| 221 | from Base.metadata. |
| 222 | |
| 223 | Fails if an ORM model has a column, index, or constraint that no migration |
| 224 | creates. This is the automated equivalent of running: |
| 225 | |
| 226 | alembic upgrade head && alembic check |
| 227 | |
| 228 | against a clean database. |
| 229 | """ |
| 230 | from alembic.autogenerate import compare_metadata |
| 231 | from alembic.runtime.migration import MigrationContext |
| 232 | from sqlalchemy import create_engine |
| 233 | |
| 234 | import musehub.db.musehub_repo_models # noqa: F401 |
| 235 | import musehub.db.musehub_social_models # noqa: F401 |
| 236 | import musehub.db.musehub_release_models # noqa: F401 |
| 237 | import musehub.db.musehub_webhook_models # noqa: F401 |
| 238 | import musehub.db.musehub_intel_models # noqa: F401 |
| 239 | import musehub.db.musehub_identity_models # noqa: F401 |
| 240 | import musehub.db.musehub_abuse_models # noqa: F401 |
| 241 | import musehub.db.musehub_jobs_models # noqa: F401 |
| 242 | import musehub.db.muse_cli_models # noqa: F401 — register CLI models |
| 243 | from musehub.db.database import Base |
| 244 | |
| 245 | engine = create_engine(parity_db_url, connect_args={"connect_timeout": 10}) |
| 246 | try: |
| 247 | with engine.connect() as conn: |
| 248 | ctx = MigrationContext.configure(conn, opts={"compare_type": True}) |
| 249 | raw_diff = compare_metadata(ctx, Base.metadata) |
| 250 | finally: |
| 251 | engine.dispose() |
| 252 | |
| 253 | meaningful_diff = [item for item in raw_diff if not _is_benign_diff(item)] |
| 254 | |
| 255 | assert meaningful_diff == [], ( |
| 256 | "Schema drift detected: ORM models have changes not captured in migrations.\n\n" |
| 257 | "Diff items:\n" |
| 258 | + "\n".join(f" {item}" for item in meaningful_diff) |
| 259 | + "\n\n" |
| 260 | "Fix: run alembic revision --autogenerate -m 'describe the change' " |
| 261 | "and commit the generated migration file." |
| 262 | ) |
File History
1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
20 days ago