gabriel / muse public
schema.py python
209 lines 8.2 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 23 days ago
1 """Domain schema declaration types.
2
3 A plugin implements :meth:`~muse.domain.MuseDomainPlugin.schema` returning a
4 :class:`DomainSchema` to declare the structural shape of its data. The core
5 engine uses this declaration to:
6
7 1. Select the correct diff algorithm for each dimension via
8 :func:`~muse.core.diff_algorithms.diff_by_schema`.
9 2. Provide informed conflict messages (citing dimension names) during address-keyed merge.
10 3. Route to CRDT convergent join when ``merge_mode`` is ``"crdt"``.
11
12 Every schema type is a ``TypedDict`` — JSON-serialisable, zero-``Any``, and
13 verifiable by mypy in strict mode.
14
15 CRDT dimension spec
16 -------------------
17 :class:`CRDTDimensionSpec` declares which CRDT primitive a dimension uses when
18 ``DomainSchema.merge_mode`` is ``"crdt"``. Plugins that mix three-way and
19 CRDT semantics per-dimension use :class:`CRDTDimensionSpec` for their CRDT
20 dimensions and :class:`DimensionSpec` for their three-way dimensions; both are
21 listed in :class:`DomainSchema`.
22
23 Design note on ``MapSchema.value_schema``
24 -----------------------------------------
25 ``MapSchema.value_schema`` carries the type ``ElementSchema``, which is
26 defined *after* ``MapSchema`` in this file. With ``from __future__ import
27 annotations`` all annotations are evaluated lazily, so this forward reference
28 is resolved correctly by both the Python runtime and mypy.
29 """
30
31 from typing import Literal, TypedDict
32
33 # ---------------------------------------------------------------------------
34 # Element schema types — one per structural primitive
35 # ---------------------------------------------------------------------------
36
37 class SequenceSchema(TypedDict):
38 """Ordered sequence of homogeneous elements (LCS-diffable).
39
40 Use for any domain data that is fundamentally a list: note events in a
41 MIDI track, nucleotides in a DNA strand, frames in an animation.
42
43 ``diff_algorithm`` selects the variant of LCS:
44 - ``"lcs"`` — classic O(nm) LCS, minimal insertions and deletions.
45 - ``"myers"`` — O(nd) Myers algorithm, same semantics, faster for low
46 edit distance (this is what Git uses).
47 - ``"patience"`` — patience-sort variant, produces more human-readable
48 diffs for sequences with many repeated elements.
49 """
50
51 kind: Literal["sequence"]
52 element_type: str
53 identity: Literal["by_id", "by_position", "by_content"]
54 diff_algorithm: Literal["lcs", "myers", "patience"]
55 alphabet: list[str] | None
56
57 class TreeSchema(TypedDict):
58 """Hierarchical labeled ordered tree (tree-edit-diffable).
59
60 Use for domain data with parent-child relationships: scene graphs, XML /
61 AST nodes, track hierarchies in a DAW.
62
63 ``diff_algorithm`` selects the tree edit algorithm:
64 - ``"zhang_shasha"`` — Zhang-Shasha 1989 O(n²m) minimum edit distance.
65 - ``"gumtree"`` — GumTree heuristic, better for large ASTs.
66 """
67
68 kind: Literal["tree"]
69 node_type: str
70 diff_algorithm: Literal["zhang_shasha", "gumtree"]
71
72 class TensorSchema(TypedDict):
73 """N-dimensional numerical array (sparse-numerical-diffable).
74
75 Use for simulation state, velocity curves, weight matrices, voxel grids.
76 Floating-point drift below ``epsilon`` is *not* considered a change.
77
78 ``diff_mode`` controls the output granularity:
79 - ``"sparse"`` — one ``ReplaceOp`` per changed element.
80 - ``"block"`` — groups adjacent changes into contiguous range ops.
81 - ``"full"`` — one ``ReplaceOp`` for the entire array if anything changed.
82 """
83
84 kind: Literal["tensor"]
85 dtype: Literal["float32", "float64", "int8", "int16", "int32", "int64"]
86 rank: int
87 epsilon: float
88 diff_mode: Literal["sparse", "block", "full"]
89
90 class SetSchema(TypedDict):
91 """Unordered collection of unique elements (set-algebra-diffable).
92
93 Use for collections where order is irrelevant: a set of files, a set of
94 annotations, a set of material IDs in a 3D scene.
95
96 ``identity`` determines what makes two elements "the same":
97 - ``"by_content"`` — SHA-256 of content (structural equality).
98 - ``"by_id"`` — stable element ID (e.g. content-addressed ID).
99 """
100
101 kind: Literal["set"]
102 element_type: str
103 identity: Literal["by_content", "by_id"]
104
105 class MapSchema(TypedDict):
106 """Key-value map with known or dynamic keys.
107
108 Use for dictionaries where both key and value structure matter: a map of
109 chromosome name → nucleotide sequence, or annotation key → quality scores.
110
111 ``value_schema`` is itself an ``ElementSchema``, allowing recursive
112 declarations (e.g. a map of sequences, a map of trees).
113 """
114
115 kind: Literal["map"]
116 key_type: str
117 value_schema: ElementSchema # forward reference — resolved lazily
118 identity: Literal["by_key"]
119
120 #: Union of all element schema types.
121 #: This is the type of ``DimensionSpec.schema`` and ``DomainSchema.top_level``.
122 ElementSchema = SequenceSchema | TreeSchema | TensorSchema | MapSchema | SetSchema
123
124 # ---------------------------------------------------------------------------
125 # Dimension spec — a named semantic sub-dimension
126 # ---------------------------------------------------------------------------
127
128 class DimensionSpec(TypedDict):
129 """A named semantic sub-dimension of the domain's state.
130
131 Domains are multi-dimensional. MIDI has notes, pitch_bend, cc_volume, and
132 track_structure dimensions. Genomics has coding regions, regulatory elements,
133 and metadata dimensions. 3D spatial design has geometry, materials,
134 lighting, and animation dimensions.
135
136 Each dimension can use a different element schema and diff algorithm.
137 The merge engine processes independent dimensions in parallel
138 without blocking on each other.
139
140 ``independent_merge`` — when ``True``, a conflict in this dimension does
141 not block merging other dimensions. When ``False`` (e.g. track_structure changes
142 in a DAW session), all dimensions must wait for this one to resolve.
143 """
144
145 name: str
146 description: str
147 schema: ElementSchema
148 independent_merge: bool
149
150 # ---------------------------------------------------------------------------
151 # CRDT per-dimension schema
152 # ---------------------------------------------------------------------------
153
154 #: The CRDT primitive types available for a dimension.
155 CRDTPrimitive = Literal["lww_register", "or_set", "rga", "aw_map", "g_counter"]
156
157 class CRDTDimensionSpec(TypedDict):
158 """Schema for a single dimension that uses CRDT convergent merge semantics.
159
160 Plugins declare a ``CRDTDimensionSpec`` for each dimension they want the
161 core engine to merge via :meth:`~muse.domain.CRDTPlugin.join` rather than
162 the three-way merge path.
163
164 ``crdt_type`` selects the primitive:
165
166 - ``"lww_register"`` — scalar, last-write-wins (timestamps).
167 - ``"or_set"`` — unordered set, adds win over concurrent removes.
168 - ``"rga"`` — ordered sequence (collaborative text / note editing).
169 - ``"aw_map"`` — key-value map, adds win.
170 - ``"g_counter"`` — monotonically increasing integer counter.
171
172 ``independent_merge`` mirrors :class:`DimensionSpec`: when ``True``,
173 conflicts in other dimensions do not block this one.
174 """
175
176 name: str
177 description: str
178 crdt_type: CRDTPrimitive
179 independent_merge: bool
180
181 # ---------------------------------------------------------------------------
182 # Top-level domain schema
183 # ---------------------------------------------------------------------------
184
185 class DomainSchema(TypedDict):
186 """Complete structural declaration for a domain plugin.
187
188 Returned by :meth:`~muse.domain.MuseDomainPlugin.schema`. The core engine
189 reads this once at plugin registration time.
190
191 ``top_level`` declares the primary collection structure (e.g. a set of
192 files for music, a map of chromosome sequences for genomics).
193
194 ``dimensions`` declares the semantic sub-dimensions. The merge engine
195 The merge engine uses these to determine which changes can be merged independently.
196
197 ``merge_mode`` controls the merge strategy:
198 - ``"three_way"`` — standard three-way merge (Phases 1–3).
199 - ``"crdt"`` — convergent CRDT join.
200
201 ``schema_version`` is the Muse package version (read from ``muse._version``).
202 """
203
204 domain: str
205 description: str
206 dimensions: list[DimensionSpec]
207 top_level: ElementSchema
208 merge_mode: Literal["three_way", "crdt"]
209 schema_version: str
File History 5 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 23 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 25 days ago
sha256:be3641f35bdbcc094677776a77b9aa6a5dab891f8fab201dc162d03c2bab5aea fix(read): strip position:null from structured_delta ops in… Sonnet 4.6 patch 26 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 31 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 31 days ago