gabriel / musehub public
test_schema_parity.py python
262 lines 10.4 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 20 days ago
1 """TDD — ORM models and alembic migrations must stay in sync.
2
3 Problem
4 -------
5 Migrations run locally when the dev server starts. They run on staging only
6 at deploy time. If someone adds a column to the ORM model and forgets to
7 write a migration, the local create_all-based test DB matches the model but
8 staging (migration-based) does not. The result is exactly the class of drift
9 that caused the created_at NOT NULL failure in production.
10
11 Fix
12 ---
13 Two automated gates:
14
15 S1 The alembic migration chain is unbroken — every revision has a parent
16 and they form a single DAG from the initial revision to head. A broken
17 chain means a migration was merged without updating down_revision.
18
19 S2 Applying all alembic migrations to a fresh database produces a schema
20 that matches Base.metadata exactly (zero compare_metadata diff). If an
21 ORM model change has no corresponding migration, S2 fails loudly.
22
23 How S2 works
24 ------------
25 1. Creates a disposable PostgreSQL database: musehub_parity_test.
26 2. Applies all migrations via alembic upgrade head.
27 3. Runs alembic's compare_metadata() against that schema.
28 4. Asserts the diff list is empty.
29 5. Drops the parity database.
30
31 This test must run against a real PostgreSQL instance. It is skipped if the
32 admin connection to postgres fails (e.g. in environments without Postgres).
33
34 Ignored diff types
35 ------------------
36 compare_metadata sometimes reports benign differences that reflect alembic
37 internals rather than real model drift:
38 - The alembic_version table itself.
39 - server_default expression string differences when the SQL text is
40 semantically identical (e.g. "now()" vs "CURRENT_TIMESTAMP").
41
42 These are filtered before the assertion.
43 """
44 from __future__ import annotations
45
46 import os
47 from pathlib import Path
48
49 import pytest
50
51
52 # ---------------------------------------------------------------------------
53 # Helpers
54 # ---------------------------------------------------------------------------
55
56 _REPO_ROOT = Path(__file__).parent.parent
57 _TEST_DB_URL = os.environ.get(
58 "TEST_DATABASE_URL",
59 "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub_test",
60 )
61 _PARITY_DBNAME = "musehub_parity_test"
62
63 # Derive URLs for admin connection (postgres DB) and parity DB.
64 _BASE_URL = _TEST_DB_URL.replace("+asyncpg", "").rsplit("/", 1)[0]
65 _ADMIN_URL = f"{_BASE_URL}/postgres"
66 _PARITY_URL_SYNC = f"{_BASE_URL}/{_PARITY_DBNAME}"
67 _PARITY_URL_ASYNC = f"{_TEST_DB_URL.rsplit('/', 1)[0]}/{_PARITY_DBNAME}"
68
69
70 def _is_benign_diff(diff_item: tuple[str, ...] | list[tuple[str, ...]]) -> bool:
71 """Return True for known-harmless compare_metadata entries to filter out."""
72 try:
73 # alembic wraps some diff items in an extra list, e.g. [('modify_comment', ...)]
74 item = diff_item[0] if isinstance(diff_item, list) and len(diff_item) == 1 and isinstance(diff_item[0], tuple) else diff_item
75 diff_type = item[0] if isinstance(item, (list, tuple)) else None
76 if diff_type == "add_table":
77 table = item[1]
78 if hasattr(table, "name") and table.name == "alembic_version":
79 return True
80 if diff_type == "modify_comment":
81 # Column comments are cosmetic metadata — they don't affect behaviour
82 # and may be added directly in SQL without a formal migration.
83 return True
84 if diff_type == "modify_default":
85 # server_default text differences that are semantically identical
86 # (e.g. alembic normalises "now()" differently across pg versions)
87 existing = str(item[3]) if len(item) > 3 else ""
88 generated = str(item[4]) if len(item) > 4 else ""
89 _NOW_VARIANTS = {"now()", "CURRENT_TIMESTAMP", "current_timestamp"}
90 if existing in _NOW_VARIANTS and generated in _NOW_VARIANTS:
91 return True
92 except (IndexError, TypeError):
93 pass
94 return False
95
96
97 # ---------------------------------------------------------------------------
98 # S1 — migration chain is unbroken
99 # ---------------------------------------------------------------------------
100
101 def test_s1_migration_chain_is_unbroken() -> None:
102 """Every alembic revision must have a resolvable parent forming a single DAG.
103
104 A broken chain (e.g. two revisions claiming the same down_revision) causes
105 alembic upgrade to fail with a confusing error and staging deploys to halt.
106 This test catches the break before it ships.
107 """
108 from alembic.config import Config
109 from alembic.script import ScriptDirectory
110
111 cfg = Config(str(_REPO_ROOT / "alembic.ini"))
112 scripts = ScriptDirectory.from_config(cfg)
113
114 # Collect all revisions and verify the chain can be walked to head.
115 # walk_revisions() raises if there are gaps or multiple heads (unless
116 # explicit merge points exist).
117 revisions = list(scripts.walk_revisions())
118 assert len(revisions) > 0, "No alembic revisions found — run alembic init"
119
120 # Verify there is exactly one head (or explicit merge point at the top).
121 heads = scripts.get_heads()
122 assert len(heads) == 1, (
123 f"Multiple alembic heads found: {heads}.\n"
124 "Create a merge migration: alembic merge -m 'merge heads' <rev1> <rev2>"
125 )
126
127
128 # ---------------------------------------------------------------------------
129 # S2 — migration-applied schema matches ORM models (zero compare_metadata diff)
130 # ---------------------------------------------------------------------------
131
132 @pytest.fixture(scope="module")
133 def parity_db_url() -> str: # type: ignore[return]
134 """Create a fresh DB, apply all migrations, yield its sync URL, then drop it."""
135 try:
136 import psycopg2 # type: ignore[import]
137 except ImportError:
138 pytest.skip("psycopg2 not available — skipping schema parity test")
139
140 # Create the parity database.
141 try:
142 admin_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5)
143 except Exception as exc:
144 pytest.skip(f"Cannot connect to postgres admin DB ({exc}) — skipping S2")
145
146 admin_conn.autocommit = True
147 cur = admin_conn.cursor()
148 # Terminate any stale connections then drop + recreate.
149 cur.execute(
150 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
151 f"WHERE datname = '{_PARITY_DBNAME}'"
152 )
153 cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}")
154 cur.execute(f"CREATE DATABASE {_PARITY_DBNAME}")
155 cur.close()
156 admin_conn.close()
157
158 # Apply all migrations.
159 from alembic.config import Config
160 from alembic import command as alembic_command
161 import musehub.config as _mhconfig
162
163 original_db_url = _mhconfig.settings.database_url
164 _mhconfig.settings.database_url = _PARITY_URL_ASYNC
165 try:
166 cfg = Config(str(_REPO_ROOT / "alembic.ini"))
167 # Also set it on the config object in case env.py reads from there.
168 cfg.set_main_option("sqlalchemy.url", _PARITY_URL_SYNC)
169 alembic_command.upgrade(cfg, "head")
170 finally:
171 _mhconfig.settings.database_url = original_db_url
172
173 yield _PARITY_URL_SYNC
174
175 # Teardown: drop the parity database.
176 try:
177 teardown_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5)
178 teardown_conn.autocommit = True
179 td_cur = teardown_conn.cursor()
180 td_cur.execute(
181 "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
182 f"WHERE datname = '{_PARITY_DBNAME}'"
183 )
184 td_cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}")
185 td_cur.close()
186 teardown_conn.close()
187 except Exception:
188 pass # Best effort — don't fail the test suite on teardown
189
190
191 def test_s3_alembic_check_exits_clean(parity_db_url: str) -> None:
192 """alembic check must exit 0 (no autogenerate diffs) against a fresh migration-applied DB.
193
194 This is the exact gate the deploy script uses: alembic upgrade head && alembic check.
195 A failure here means the ORM has a change with no corresponding migration — the deploy
196 would fail on its alembic check step.
197 """
198 from alembic.config import Config
199 from alembic import command as alembic_command
200 from alembic.util.exc import AutogenerateDiffsDetected
201 import musehub.config as _mhconfig
202
203 original_db_url = _mhconfig.settings.database_url
204 _mhconfig.settings.database_url = _PARITY_URL_ASYNC
205 try:
206 cfg = Config(str(_REPO_ROOT / "alembic.ini"))
207 cfg.set_main_option("sqlalchemy.url", parity_db_url)
208 try:
209 alembic_command.check(cfg)
210 except AutogenerateDiffsDetected as exc:
211 pytest.fail(
212 "alembic check detected ORM↔migration drift — add a migration:\n"
213 + str(exc)
214 )
215 finally:
216 _mhconfig.settings.database_url = original_db_url
217
218
219 def test_s2_orm_models_match_migrations(parity_db_url: str) -> None:
220 """Applying all alembic migrations to a fresh DB must produce zero drift
221 from Base.metadata.
222
223 Fails if an ORM model has a column, index, or constraint that no migration
224 creates. This is the automated equivalent of running:
225
226 alembic upgrade head && alembic check
227
228 against a clean database.
229 """
230 from alembic.autogenerate import compare_metadata
231 from alembic.runtime.migration import MigrationContext
232 from sqlalchemy import create_engine
233
234 import musehub.db.musehub_repo_models # noqa: F401
235 import musehub.db.musehub_social_models # noqa: F401
236 import musehub.db.musehub_release_models # noqa: F401
237 import musehub.db.musehub_webhook_models # noqa: F401
238 import musehub.db.musehub_intel_models # noqa: F401
239 import musehub.db.musehub_identity_models # noqa: F401
240 import musehub.db.musehub_abuse_models # noqa: F401
241 import musehub.db.musehub_jobs_models # noqa: F401
242 import musehub.db.muse_cli_models # noqa: F401 — register CLI models
243 from musehub.db.database import Base
244
245 engine = create_engine(parity_db_url, connect_args={"connect_timeout": 10})
246 try:
247 with engine.connect() as conn:
248 ctx = MigrationContext.configure(conn, opts={"compare_type": True})
249 raw_diff = compare_metadata(ctx, Base.metadata)
250 finally:
251 engine.dispose()
252
253 meaningful_diff = [item for item in raw_diff if not _is_benign_diff(item)]
254
255 assert meaningful_diff == [], (
256 "Schema drift detected: ORM models have changes not captured in migrations.\n\n"
257 "Diff items:\n"
258 + "\n".join(f" {item}" for item in meaningful_diff)
259 + "\n\n"
260 "Fix: run alembic revision --autogenerate -m 'describe the change' "
261 "and commit the generated migration file."
262 )
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago