"""TDD — ORM models and alembic migrations must stay in sync.

Problem
-------
Migrations run locally when the dev server starts.  They run on staging only
at deploy time.  If someone adds a column to the ORM model and forgets to
write a migration, the local create_all-based test DB matches the model but
staging (migration-based) does not.  The result is exactly the class of drift
that caused the created_at NOT NULL failure in production.

Fix
---
Two automated gates:

  S1  The alembic migration chain is unbroken — every revision has a parent
      and they form a single DAG from the initial revision to head.  A broken
      chain means a migration was merged without updating down_revision.

  S2  Applying all alembic migrations to a fresh database produces a schema
      that matches Base.metadata exactly (zero compare_metadata diff).  If an
      ORM model change has no corresponding migration, S2 fails loudly.

How S2 works
------------
1. Creates a disposable PostgreSQL database: musehub_parity_test.
2. Applies all migrations via alembic upgrade head.
3. Runs alembic's compare_metadata() against that schema.
4. Asserts the diff list is empty.
5. Drops the parity database.

This test must run against a real PostgreSQL instance.  It is skipped if the
admin connection to postgres fails (e.g. in environments without Postgres).

Ignored diff types
------------------
compare_metadata sometimes reports benign differences that reflect alembic
internals rather than real model drift:
  - The alembic_version table itself.
  - server_default expression string differences when the SQL text is
    semantically identical (e.g. "now()" vs "CURRENT_TIMESTAMP").

These are filtered before the assertion.
"""
from __future__ import annotations

import os
from pathlib import Path

import pytest


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_REPO_ROOT = Path(__file__).parent.parent
_TEST_DB_URL = os.environ.get(
    "TEST_DATABASE_URL",
    "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub_test",
)
_PARITY_DBNAME = "musehub_parity_test"

# Derive URLs for admin connection (postgres DB) and parity DB.
_BASE_URL = _TEST_DB_URL.replace("+asyncpg", "").rsplit("/", 1)[0]
_ADMIN_URL = f"{_BASE_URL}/postgres"
_PARITY_URL_SYNC = f"{_BASE_URL}/{_PARITY_DBNAME}"
_PARITY_URL_ASYNC = f"{_TEST_DB_URL.rsplit('/', 1)[0]}/{_PARITY_DBNAME}"


def _is_benign_diff(diff_item: tuple[str, ...] | list[tuple[str, ...]]) -> bool:
    """Return True for known-harmless compare_metadata entries to filter out."""
    try:
        # alembic wraps some diff items in an extra list, e.g. [('modify_comment', ...)]
        item = diff_item[0] if isinstance(diff_item, list) and len(diff_item) == 1 and isinstance(diff_item[0], tuple) else diff_item
        diff_type = item[0] if isinstance(item, (list, tuple)) else None
        if diff_type == "add_table":
            table = item[1]
            if hasattr(table, "name") and table.name == "alembic_version":
                return True
        if diff_type == "modify_comment":
            # Column comments are cosmetic metadata — they don't affect behaviour
            # and may be added directly in SQL without a formal migration.
            return True
        if diff_type == "modify_default":
            # server_default text differences that are semantically identical
            # (e.g. alembic normalises "now()" differently across pg versions)
            existing = str(item[3]) if len(item) > 3 else ""
            generated = str(item[4]) if len(item) > 4 else ""
            _NOW_VARIANTS = {"now()", "CURRENT_TIMESTAMP", "current_timestamp"}
            if existing in _NOW_VARIANTS and generated in _NOW_VARIANTS:
                return True
    except (IndexError, TypeError):
        pass
    return False


# ---------------------------------------------------------------------------
# S1 — migration chain is unbroken
# ---------------------------------------------------------------------------

def test_s1_migration_chain_is_unbroken() -> None:
    """Every alembic revision must have a resolvable parent forming a single DAG.

    A broken chain (e.g. two revisions claiming the same down_revision) causes
    alembic upgrade to fail with a confusing error and staging deploys to halt.
    This test catches the break before it ships.
    """
    from alembic.config import Config
    from alembic.script import ScriptDirectory

    cfg = Config(str(_REPO_ROOT / "alembic.ini"))
    scripts = ScriptDirectory.from_config(cfg)

    # Collect all revisions and verify the chain can be walked to head.
    # walk_revisions() raises if there are gaps or multiple heads (unless
    # explicit merge points exist).
    revisions = list(scripts.walk_revisions())
    assert len(revisions) > 0, "No alembic revisions found — run alembic init"

    # Verify there is exactly one head (or explicit merge point at the top).
    heads = scripts.get_heads()
    assert len(heads) == 1, (
        f"Multiple alembic heads found: {heads}.\n"
        "Create a merge migration: alembic merge -m 'merge heads' <rev1> <rev2>"
    )


# ---------------------------------------------------------------------------
# S2 — migration-applied schema matches ORM models (zero compare_metadata diff)
# ---------------------------------------------------------------------------

@pytest.fixture(scope="module")
def parity_db_url() -> str:  # type: ignore[return]
    """Create a fresh DB, apply all migrations, yield its sync URL, then drop it."""
    try:
        import psycopg2  # type: ignore[import]
    except ImportError:
        pytest.skip("psycopg2 not available — skipping schema parity test")

    # Create the parity database.
    try:
        admin_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5)
    except Exception as exc:
        pytest.skip(f"Cannot connect to postgres admin DB ({exc}) — skipping S2")

    admin_conn.autocommit = True
    cur = admin_conn.cursor()
    # Terminate any stale connections then drop + recreate.
    cur.execute(
        "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
        f"WHERE datname = '{_PARITY_DBNAME}'"
    )
    cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}")
    cur.execute(f"CREATE DATABASE {_PARITY_DBNAME}")
    cur.close()
    admin_conn.close()

    # Apply all migrations.
    from alembic.config import Config
    from alembic import command as alembic_command
    import musehub.config as _mhconfig

    original_db_url = _mhconfig.settings.database_url
    _mhconfig.settings.database_url = _PARITY_URL_ASYNC
    try:
        cfg = Config(str(_REPO_ROOT / "alembic.ini"))
        # Also set it on the config object in case env.py reads from there.
        cfg.set_main_option("sqlalchemy.url", _PARITY_URL_SYNC)
        alembic_command.upgrade(cfg, "head")
    finally:
        _mhconfig.settings.database_url = original_db_url

    yield _PARITY_URL_SYNC

    # Teardown: drop the parity database.
    try:
        teardown_conn = psycopg2.connect(_ADMIN_URL, connect_timeout=5)
        teardown_conn.autocommit = True
        td_cur = teardown_conn.cursor()
        td_cur.execute(
            "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
            f"WHERE datname = '{_PARITY_DBNAME}'"
        )
        td_cur.execute(f"DROP DATABASE IF EXISTS {_PARITY_DBNAME}")
        td_cur.close()
        teardown_conn.close()
    except Exception:
        pass  # Best effort — don't fail the test suite on teardown


def test_s3_alembic_check_exits_clean(parity_db_url: str) -> None:
    """alembic check must exit 0 (no autogenerate diffs) against a fresh migration-applied DB.

    This is the exact gate the deploy script uses: alembic upgrade head && alembic check.
    A failure here means the ORM has a change with no corresponding migration — the deploy
    would fail on its alembic check step.
    """
    from alembic.config import Config
    from alembic import command as alembic_command
    from alembic.util.exc import AutogenerateDiffsDetected
    import musehub.config as _mhconfig

    original_db_url = _mhconfig.settings.database_url
    _mhconfig.settings.database_url = _PARITY_URL_ASYNC
    try:
        cfg = Config(str(_REPO_ROOT / "alembic.ini"))
        cfg.set_main_option("sqlalchemy.url", parity_db_url)
        try:
            alembic_command.check(cfg)
        except AutogenerateDiffsDetected as exc:
            pytest.fail(
                "alembic check detected ORM↔migration drift — add a migration:\n"
                + str(exc)
            )
    finally:
        _mhconfig.settings.database_url = original_db_url


def test_s2_orm_models_match_migrations(parity_db_url: str) -> None:
    """Applying all alembic migrations to a fresh DB must produce zero drift
    from Base.metadata.

    Fails if an ORM model has a column, index, or constraint that no migration
    creates.  This is the automated equivalent of running:

        alembic upgrade head && alembic check

    against a clean database.
    """
    from alembic.autogenerate import compare_metadata
    from alembic.runtime.migration import MigrationContext
    from sqlalchemy import create_engine

    import musehub.db.musehub_repo_models  # noqa: F401
    import musehub.db.musehub_social_models  # noqa: F401
    import musehub.db.musehub_release_models  # noqa: F401
    import musehub.db.musehub_webhook_models  # noqa: F401
    import musehub.db.musehub_intel_models  # noqa: F401
    import musehub.db.musehub_identity_models  # noqa: F401
    import musehub.db.musehub_abuse_models  # noqa: F401
    import musehub.db.musehub_jobs_models  # noqa: F401
    import musehub.db.muse_cli_models  # noqa: F401 — register CLI models
    from musehub.db.database import Base

    engine = create_engine(parity_db_url, connect_args={"connect_timeout": 10})
    try:
        with engine.connect() as conn:
            ctx = MigrationContext.configure(conn, opts={"compare_type": True})
            raw_diff = compare_metadata(ctx, Base.metadata)
    finally:
        engine.dispose()

    meaningful_diff = [item for item in raw_diff if not _is_benign_diff(item)]

    assert meaningful_diff == [], (
        "Schema drift detected: ORM models have changes not captured in migrations.\n\n"
        "Diff items:\n"
        + "\n".join(f"  {item}" for item in meaningful_diff)
        + "\n\n"
        "Fix: run  alembic revision --autogenerate -m 'describe the change'  "
        "and commit the generated migration file."
    )