gabriel / musehub public
0024_json_to_jsonb_and_array.py python
133 lines 4.5 KB
Raw
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago
1 """Migrate all JSON columns to JSONB or PostgreSQL ARRAY.
2
3 Two changes:
4 1. Homogeneous list[str] columns: JSON → ARRAY (text[] or varchar(128)[]).
5 ARRAY is type-safe, GIN-indexable, and has no JSON parsing overhead.
6
7 2. Heterogeneous blob columns: JSON → JSONB.
8 JSONB is binary-stored, supports GIN indexing, and is faster for lookups.
9
10 PostgreSQL allows ALTER COLUMN type with USING for JSON→JSONB and JSON→ARRAY
11 casts. The USING clause handles the coercion transparently.
12
13 Revision ID: 0024
14 Revises: 0023
15 """
16 from __future__ import annotations
17
18 from alembic import op
19 import sqlalchemy as sa
20 from sqlalchemy.dialects.postgresql import JSONB
21
22 revision = "0024"
23 down_revision = "0023"
24 branch_labels = None
25 depends_on = None
26
27
28 # ---------------------------------------------------------------------------
29 # Columns to convert to ARRAY(TEXT)
30 # ---------------------------------------------------------------------------
31 _TEXT_ARRAY_COLS: list[tuple[str, str]] = [
32 ("musehub_repos", "tags"),
33 ("musehub_snapshots", "directories"),
34 ("musehub_identities", "agent_capabilities"),
35 ("musehub_identities", "scope"),
36 ("musehub_identities", "org_members"),
37 ("musehub_issues", "labels"),
38 ("musehub_issues", "symbol_anchors"),
39 ("musehub_proposals", "touched_symbols"),
40 ("musehub_webhooks", "events"),
41 ("musehub_sessions", "participants"),
42 ("musehub_mists", "tags"),
43 ("musehub_mists", "symbol_anchors"),
44 ]
45
46 # Columns to convert to ARRAY(VARCHAR(128))
47 _VARCHAR_ARRAY_COLS: list[tuple[str, str]] = [
48 ("musehub_identities", "pinned_repo_ids"),
49 ("musehub_issues", "commit_anchors"),
50 ("musehub_sessions", "commits"),
51 ("musehub_coord_tasks", "depends_on"),
52 ]
53
54 # Columns to convert from JSON → JSONB (heterogeneous blobs)
55 _JSONB_COLS: list[tuple[str, str]] = [
56 ("musehub_repos", "domain_meta"),
57 ("musehub_repos", "settings"),
58 ("musehub_issue_events", "payload"),
59 ("musehub_proposals", "domain_diff"),
60 ("musehub_proposal_comments", "dimension_ref"),
61 ("musehub_releases", "download_urls"),
62 ("musehub_background_jobs", "payload"),
63 ("musehub_symbol_history_entries", "op_payload"), # table renamed in dataclass refactor
64 ("musehub_symbol_intel", "gravity_depth_distribution"),
65 ("musehub_intel_codemap_meta", "cycles_json"),
66 ("musehub_coord_records", "payload"),
67 ("musehub_coord_tasks", "payload"),
68 ("muse_snapshots", "manifest"),
69 ("muse_commits", "commit_metadata"),
70 ("musehub_domains", "capabilities"),
71 ]
72
73
74 def upgrade() -> None:
75 conn = op.get_bind()
76
77 # PostgreSQL forbids subqueries in ALTER TABLE ... USING.
78 # A helper function sidesteps the restriction — functions are callable
79 # in USING clauses even when they contain subqueries internally.
80 conn.execute(sa.text("""
81 CREATE OR REPLACE FUNCTION _muse_json_to_text_array(v text)
82 RETURNS text[]
83 LANGUAGE sql IMMUTABLE AS $$
84 SELECT CASE WHEN v IS NULL THEN NULL
85 ELSE ARRAY(SELECT jsonb_array_elements_text(v::jsonb))
86 END
87 $$
88 """))
89
90 for table, col in _TEXT_ARRAY_COLS:
91 conn.execute(sa.text(
92 f"ALTER TABLE {table} ALTER COLUMN {col} TYPE text[] "
93 f"USING _muse_json_to_text_array({col}::text)"
94 ))
95
96 for table, col in _VARCHAR_ARRAY_COLS:
97 conn.execute(sa.text(
98 f"ALTER TABLE {table} ALTER COLUMN {col} TYPE varchar(128)[] "
99 f"USING _muse_json_to_text_array({col}::text)::varchar(128)[]"
100 ))
101
102 conn.execute(sa.text("DROP FUNCTION _muse_json_to_text_array(text)"))
103
104 for table, col in _JSONB_COLS:
105 conn.execute(sa.text(
106 f"ALTER TABLE {table} ALTER COLUMN {col} TYPE jsonb "
107 f"USING {col}::jsonb"
108 ))
109
110
111 def downgrade() -> None:
112 conn = op.get_bind()
113
114 def _has_column(table: str, column: str) -> bool:
115 r = conn.execute(sa.text(
116 "SELECT 1 FROM information_schema.columns "
117 "WHERE table_schema='public' AND table_name=:t AND column_name=:c"
118 ), {"t": table, "c": column})
119 return r.fetchone() is not None
120
121 for table, col in _TEXT_ARRAY_COLS + _VARCHAR_ARRAY_COLS:
122 if _has_column(table, col):
123 conn.execute(sa.text(
124 f"ALTER TABLE {table} ALTER COLUMN {col} TYPE json "
125 f"USING to_json({col})"
126 ))
127
128 for table, col in _JSONB_COLS:
129 if _has_column(table, col):
130 conn.execute(sa.text(
131 f"ALTER TABLE {table} ALTER COLUMN {col} TYPE json "
132 f"USING {col}::json"
133 ))
File History 3 commits
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago
sha256:6b1949fc2797ca4c1936a637a4cbfec828ef56cf52398a2e74ca3c4f494e728f fix: use wire_bytes not mpack_bytes_raw in compute_object_b… Sonnet 4.6 patch 10 days ago
sha256:4aed3d8601c8dd3ed37074de35f11f4a9699a0a4b99d43727048fd3f8e6fd13d chore: doc sweep, ignore wrangler build state, misc fixes Sonnet 4.6 minor 13 days ago