0024_json_to_jsonb_and_array.py
python
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa
Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As…
Human
2 days ago
| 1 | """Migrate all JSON columns to JSONB or PostgreSQL ARRAY. |
| 2 | |
| 3 | Two changes: |
| 4 | 1. Homogeneous list[str] columns: JSON → ARRAY (text[] or varchar(128)[]). |
| 5 | ARRAY is type-safe, GIN-indexable, and has no JSON parsing overhead. |
| 6 | |
| 7 | 2. Heterogeneous blob columns: JSON → JSONB. |
| 8 | JSONB is binary-stored, supports GIN indexing, and is faster for lookups. |
| 9 | |
| 10 | PostgreSQL allows ALTER COLUMN type with USING for JSON→JSONB and JSON→ARRAY |
| 11 | casts. The USING clause handles the coercion transparently. |
| 12 | |
| 13 | Revision ID: 0024 |
| 14 | Revises: 0023 |
| 15 | """ |
| 16 | from __future__ import annotations |
| 17 | |
| 18 | from alembic import op |
| 19 | import sqlalchemy as sa |
| 20 | from sqlalchemy.dialects.postgresql import JSONB |
| 21 | |
| 22 | revision = "0024" |
| 23 | down_revision = "0023" |
| 24 | branch_labels = None |
| 25 | depends_on = None |
| 26 | |
| 27 | |
| 28 | # --------------------------------------------------------------------------- |
| 29 | # Columns to convert to ARRAY(TEXT) |
| 30 | # --------------------------------------------------------------------------- |
| 31 | _TEXT_ARRAY_COLS: list[tuple[str, str]] = [ |
| 32 | ("musehub_repos", "tags"), |
| 33 | ("musehub_snapshots", "directories"), |
| 34 | ("musehub_identities", "agent_capabilities"), |
| 35 | ("musehub_identities", "scope"), |
| 36 | ("musehub_identities", "org_members"), |
| 37 | ("musehub_issues", "labels"), |
| 38 | ("musehub_issues", "symbol_anchors"), |
| 39 | ("musehub_proposals", "touched_symbols"), |
| 40 | ("musehub_webhooks", "events"), |
| 41 | ("musehub_sessions", "participants"), |
| 42 | ("musehub_mists", "tags"), |
| 43 | ("musehub_mists", "symbol_anchors"), |
| 44 | ] |
| 45 | |
| 46 | # Columns to convert to ARRAY(VARCHAR(128)) |
| 47 | _VARCHAR_ARRAY_COLS: list[tuple[str, str]] = [ |
| 48 | ("musehub_identities", "pinned_repo_ids"), |
| 49 | ("musehub_issues", "commit_anchors"), |
| 50 | ("musehub_sessions", "commits"), |
| 51 | ("musehub_coord_tasks", "depends_on"), |
| 52 | ] |
| 53 | |
| 54 | # Columns to convert from JSON → JSONB (heterogeneous blobs) |
| 55 | _JSONB_COLS: list[tuple[str, str]] = [ |
| 56 | ("musehub_repos", "domain_meta"), |
| 57 | ("musehub_repos", "settings"), |
| 58 | ("musehub_issue_events", "payload"), |
| 59 | ("musehub_proposals", "domain_diff"), |
| 60 | ("musehub_proposal_comments", "dimension_ref"), |
| 61 | ("musehub_releases", "download_urls"), |
| 62 | ("musehub_background_jobs", "payload"), |
| 63 | ("musehub_symbol_history_entries", "op_payload"), # table renamed in dataclass refactor |
| 64 | ("musehub_symbol_intel", "gravity_depth_distribution"), |
| 65 | ("musehub_intel_codemap_meta", "cycles_json"), |
| 66 | ("musehub_coord_records", "payload"), |
| 67 | ("musehub_coord_tasks", "payload"), |
| 68 | ("muse_snapshots", "manifest"), |
| 69 | ("muse_commits", "commit_metadata"), |
| 70 | ("musehub_domains", "capabilities"), |
| 71 | ] |
| 72 | |
| 73 | |
| 74 | def upgrade() -> None: |
| 75 | conn = op.get_bind() |
| 76 | |
| 77 | # PostgreSQL forbids subqueries in ALTER TABLE ... USING. |
| 78 | # A helper function sidesteps the restriction — functions are callable |
| 79 | # in USING clauses even when they contain subqueries internally. |
| 80 | conn.execute(sa.text(""" |
| 81 | CREATE OR REPLACE FUNCTION _muse_json_to_text_array(v text) |
| 82 | RETURNS text[] |
| 83 | LANGUAGE sql IMMUTABLE AS $$ |
| 84 | SELECT CASE WHEN v IS NULL THEN NULL |
| 85 | ELSE ARRAY(SELECT jsonb_array_elements_text(v::jsonb)) |
| 86 | END |
| 87 | $$ |
| 88 | """)) |
| 89 | |
| 90 | for table, col in _TEXT_ARRAY_COLS: |
| 91 | conn.execute(sa.text( |
| 92 | f"ALTER TABLE {table} ALTER COLUMN {col} TYPE text[] " |
| 93 | f"USING _muse_json_to_text_array({col}::text)" |
| 94 | )) |
| 95 | |
| 96 | for table, col in _VARCHAR_ARRAY_COLS: |
| 97 | conn.execute(sa.text( |
| 98 | f"ALTER TABLE {table} ALTER COLUMN {col} TYPE varchar(128)[] " |
| 99 | f"USING _muse_json_to_text_array({col}::text)::varchar(128)[]" |
| 100 | )) |
| 101 | |
| 102 | conn.execute(sa.text("DROP FUNCTION _muse_json_to_text_array(text)")) |
| 103 | |
| 104 | for table, col in _JSONB_COLS: |
| 105 | conn.execute(sa.text( |
| 106 | f"ALTER TABLE {table} ALTER COLUMN {col} TYPE jsonb " |
| 107 | f"USING {col}::jsonb" |
| 108 | )) |
| 109 | |
| 110 | |
| 111 | def downgrade() -> None: |
| 112 | conn = op.get_bind() |
| 113 | |
| 114 | def _has_column(table: str, column: str) -> bool: |
| 115 | r = conn.execute(sa.text( |
| 116 | "SELECT 1 FROM information_schema.columns " |
| 117 | "WHERE table_schema='public' AND table_name=:t AND column_name=:c" |
| 118 | ), {"t": table, "c": column}) |
| 119 | return r.fetchone() is not None |
| 120 | |
| 121 | for table, col in _TEXT_ARRAY_COLS + _VARCHAR_ARRAY_COLS: |
| 122 | if _has_column(table, col): |
| 123 | conn.execute(sa.text( |
| 124 | f"ALTER TABLE {table} ALTER COLUMN {col} TYPE json " |
| 125 | f"USING to_json({col})" |
| 126 | )) |
| 127 | |
| 128 | for table, col in _JSONB_COLS: |
| 129 | if _has_column(table, col): |
| 130 | conn.execute(sa.text( |
| 131 | f"ALTER TABLE {table} ALTER COLUMN {col} TYPE json " |
| 132 | f"USING {col}::json" |
| 133 | )) |
File History
3 commits
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa
Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As…
Human
2 days ago
sha256:6b1949fc2797ca4c1936a637a4cbfec828ef56cf52398a2e74ca3c4f494e728f
fix: use wire_bytes not mpack_bytes_raw in compute_object_b…
Sonnet 4.6
patch
10 days ago
sha256:4aed3d8601c8dd3ed37074de35f11f4a9699a0a4b99d43727048fd3f8e6fd13d
chore: doc sweep, ignore wrangler build state, misc fixes
Sonnet 4.6
minor
⚠
13 days ago