"""Migrate all JSON columns to JSONB or PostgreSQL ARRAY. Two changes: 1. Homogeneous list[str] columns: JSON → ARRAY (text[] or varchar(128)[]). ARRAY is type-safe, GIN-indexable, and has no JSON parsing overhead. 2. Heterogeneous blob columns: JSON → JSONB. JSONB is binary-stored, supports GIN indexing, and is faster for lookups. PostgreSQL allows ALTER COLUMN type with USING for JSON→JSONB and JSON→ARRAY casts. The USING clause handles the coercion transparently. Revision ID: 0024 Revises: 0023 """ from __future__ import annotations from alembic import op import sqlalchemy as sa from sqlalchemy.dialects.postgresql import JSONB revision = "0024" down_revision = "0023" branch_labels = None depends_on = None # --------------------------------------------------------------------------- # Columns to convert to ARRAY(TEXT) # --------------------------------------------------------------------------- _TEXT_ARRAY_COLS: list[tuple[str, str]] = [ ("musehub_repos", "tags"), ("musehub_snapshots", "directories"), ("musehub_identities", "agent_capabilities"), ("musehub_identities", "scope"), ("musehub_identities", "org_members"), ("musehub_issues", "labels"), ("musehub_issues", "symbol_anchors"), ("musehub_proposals", "touched_symbols"), ("musehub_webhooks", "events"), ("musehub_sessions", "participants"), ("musehub_mists", "tags"), ("musehub_mists", "symbol_anchors"), ] # Columns to convert to ARRAY(VARCHAR(128)) _VARCHAR_ARRAY_COLS: list[tuple[str, str]] = [ ("musehub_identities", "pinned_repo_ids"), ("musehub_issues", "commit_anchors"), ("musehub_sessions", "commits"), ("musehub_coord_tasks", "depends_on"), ] # Columns to convert from JSON → JSONB (heterogeneous blobs) _JSONB_COLS: list[tuple[str, str]] = [ ("musehub_repos", "domain_meta"), ("musehub_repos", "settings"), ("musehub_issue_events", "payload"), ("musehub_proposals", "domain_diff"), ("musehub_proposal_comments", "dimension_ref"), ("musehub_releases", "download_urls"), ("musehub_background_jobs", "payload"), ("musehub_symbol_history_entries", "op_payload"), # table renamed in dataclass refactor ("musehub_symbol_intel", "gravity_depth_distribution"), ("musehub_intel_codemap_meta", "cycles_json"), ("musehub_coord_records", "payload"), ("musehub_coord_tasks", "payload"), ("muse_snapshots", "manifest"), ("muse_commits", "commit_metadata"), ("musehub_domains", "capabilities"), ] def upgrade() -> None: conn = op.get_bind() # PostgreSQL forbids subqueries in ALTER TABLE ... USING. # A helper function sidesteps the restriction — functions are callable # in USING clauses even when they contain subqueries internally. conn.execute(sa.text(""" CREATE OR REPLACE FUNCTION _muse_json_to_text_array(v text) RETURNS text[] LANGUAGE sql IMMUTABLE AS $$ SELECT CASE WHEN v IS NULL THEN NULL ELSE ARRAY(SELECT jsonb_array_elements_text(v::jsonb)) END $$ """)) for table, col in _TEXT_ARRAY_COLS: conn.execute(sa.text( f"ALTER TABLE {table} ALTER COLUMN {col} TYPE text[] " f"USING _muse_json_to_text_array({col}::text)" )) for table, col in _VARCHAR_ARRAY_COLS: conn.execute(sa.text( f"ALTER TABLE {table} ALTER COLUMN {col} TYPE varchar(128)[] " f"USING _muse_json_to_text_array({col}::text)::varchar(128)[]" )) conn.execute(sa.text("DROP FUNCTION _muse_json_to_text_array(text)")) for table, col in _JSONB_COLS: conn.execute(sa.text( f"ALTER TABLE {table} ALTER COLUMN {col} TYPE jsonb " f"USING {col}::jsonb" )) def downgrade() -> None: conn = op.get_bind() def _has_column(table: str, column: str) -> bool: r = conn.execute(sa.text( "SELECT 1 FROM information_schema.columns " "WHERE table_schema='public' AND table_name=:t AND column_name=:c" ), {"t": table, "c": column}) return r.fetchone() is not None for table, col in _TEXT_ARRAY_COLS + _VARCHAR_ARRAY_COLS: if _has_column(table, col): conn.execute(sa.text( f"ALTER TABLE {table} ALTER COLUMN {col} TYPE json " f"USING to_json({col})" )) for table, col in _JSONB_COLS: if _has_column(table, col): conn.execute(sa.text( f"ALTER TABLE {table} ALTER COLUMN {col} TYPE json " f"USING {col}::json" ))