gabriel / muse public
test_schema_supercharge.py python
460 lines 20.6 KB
Raw
sha256:f6cd81bc71702f5c1c6890bd39aaba994fe58c75f019d7c03934724fa2739bb4 fix: carry dev changes harmony dropped in merge — detached … Sonnet 4.6 minor ⚠ breaking 16 days ago
1 """Seven-tier tests for ``muse/core/schema.py``.
2
3 All types are TypedDicts — no runtime logic, so the tiers focus on:
4
5 Unit — field presence/types on every TypedDict, Literal constraints.
6 Integration — ElementSchema union membership, MapSchema recursive nesting,
7 DomainSchema round-trips through json.dumps / json.loads.
8 End-to-end — schema instances accepted by functions that consume DomainSchema.
9 Stress — 10 000 construction cycles; deeply-nested MapSchema.
10 Data integrity — field values survive JSON round-trip unchanged.
11 Security — hostile strings in str fields do not cause crashes.
12 Performance — 10 000 constructions under 1 s.
13 """
14
15 from __future__ import annotations
16
17 import json
18 import time
19 import typing
20 from collections.abc import Mapping
21
22 import pytest
23
24 type _KwVal = str | int | float | bool | None
25
26
27 # ──────────────────────────────────────────────────────────────────────────────
28 # Helpers
29 # ──────────────────────────────────────────────────────────────────────────────
30
31
32 def _seq(**kw: _KwVal) -> Mapping[str, object]:
33 base = dict(
34 kind="sequence",
35 element_type="note",
36 identity="by_position",
37 diff_algorithm="lcs",
38 alphabet=None,
39 )
40 base.update(kw)
41 return base
42
43
44 def _tree(**kw: _KwVal) -> Mapping[str, object]:
45 base = dict(kind="tree", node_type="ast_node", diff_algorithm="zhang_shasha")
46 base.update(kw)
47 return base
48
49
50 def _tensor(**kw: _KwVal) -> Mapping[str, object]:
51 base = dict(
52 kind="tensor", dtype="float32", rank=2, epsilon=1e-6, diff_mode="sparse"
53 )
54 base.update(kw)
55 return base
56
57
58 def _set(**kw: _KwVal) -> Mapping[str, object]:
59 base = dict(kind="set", element_type="file_id", identity="by_id")
60 base.update(kw)
61 return base
62
63
64 def _map(value_schema: Mapping[str, object] | None = None, **kw: _KwVal) -> Mapping[str, object]:
65 base = dict(
66 kind="map",
67 key_type="str",
68 value_schema=value_schema or _seq(),
69 identity="by_key",
70 )
71 base.update(kw)
72 return base
73
74
75 def _dim(**kw: _KwVal) -> Mapping[str, object]:
76 base = dict(
77 name="notes",
78 description="MIDI note events",
79 schema=_seq(),
80 independent_merge=True,
81 )
82 base.update(kw)
83 return base
84
85
86 def _crdt_dim(**kw: _KwVal) -> Mapping[str, object]:
87 base = dict(
88 name="tempo",
89 description="BPM value",
90 crdt_type="lww_register",
91 independent_merge=True,
92 )
93 base.update(kw)
94 return base
95
96
97 def _domain(**kw: _KwVal) -> Mapping[str, object]:
98 base = dict(
99 domain="midi",
100 description="MIDI music domain",
101 dimensions=[_dim()],
102 top_level=_set(),
103 merge_mode="three_way",
104 schema_version="0.1.0",
105 )
106 base.update(kw)
107 return base
108
109
110 # ──────────────────────────────────────────────────────────────────────────────
111 # Unit — SequenceSchema
112 # ──────────────────────────────────────────────────────────────────────────────
113
114
115 class TestSequenceSchema:
116 def test_required_keys(self) -> None:
117 from muse.core.schema import SequenceSchema
118 hints = typing.get_type_hints(SequenceSchema)
119 assert {"kind", "element_type", "identity", "diff_algorithm", "alphabet"} <= set(hints)
120
121 def test_kind_literal_is_sequence(self) -> None:
122 from muse.core.schema import SequenceSchema
123 hints = typing.get_type_hints(SequenceSchema)
124 args = typing.get_args(hints["kind"])
125 assert "sequence" in args
126
127 def test_valid_diff_algorithms(self) -> None:
128 from muse.core.schema import SequenceSchema
129 hints = typing.get_type_hints(SequenceSchema)
130 algos = set(typing.get_args(hints["diff_algorithm"]))
131 assert algos == {"lcs", "myers", "patience"}
132
133 def test_valid_identity_values(self) -> None:
134 from muse.core.schema import SequenceSchema
135 hints = typing.get_type_hints(SequenceSchema)
136 ids = set(typing.get_args(hints["identity"]))
137 assert ids == {"by_id", "by_position", "by_content"}
138
139 def test_alphabet_is_optional_list(self) -> None:
140 from muse.core.schema import SequenceSchema
141 hints = typing.get_type_hints(SequenceSchema)
142 # Should be list[str] | None
143 args = typing.get_args(hints["alphabet"])
144 assert type(None) in args
145
146
147 # ──────────────────────────────────────────────────────────────────────────────
148 # Unit — TreeSchema
149 # ──────────────────────────────────────────────────────────────────────────────
150
151
152 class TestTreeSchema:
153 def test_required_keys(self) -> None:
154 from muse.core.schema import TreeSchema
155 hints = typing.get_type_hints(TreeSchema)
156 assert {"kind", "node_type", "diff_algorithm"} <= set(hints)
157
158 def test_valid_diff_algorithms(self) -> None:
159 from muse.core.schema import TreeSchema
160 hints = typing.get_type_hints(TreeSchema)
161 algos = set(typing.get_args(hints["diff_algorithm"]))
162 assert algos == {"zhang_shasha", "gumtree"}
163
164
165 # ──────────────────────────────────────────────────────────────────────────────
166 # Unit — TensorSchema
167 # ──────────────────────────────────────────────────────────────────────────────
168
169
170 class TestTensorSchema:
171 def test_required_keys(self) -> None:
172 from muse.core.schema import TensorSchema
173 hints = typing.get_type_hints(TensorSchema)
174 assert {"kind", "dtype", "rank", "epsilon", "diff_mode"} <= set(hints)
175
176 def test_valid_dtypes(self) -> None:
177 from muse.core.schema import TensorSchema
178 hints = typing.get_type_hints(TensorSchema)
179 dtypes = set(typing.get_args(hints["dtype"]))
180 assert dtypes == {"float32", "float64", "int8", "int16", "int32", "int64"}
181
182 def test_valid_diff_modes(self) -> None:
183 from muse.core.schema import TensorSchema
184 hints = typing.get_type_hints(TensorSchema)
185 modes = set(typing.get_args(hints["diff_mode"]))
186 assert modes == {"sparse", "block", "full"}
187
188
189 # ──────────────────────────────────────────────────────────────────────────────
190 # Unit — SetSchema
191 # ──────────────────────────────────────────────────────────────────────────────
192
193
194 class TestSetSchema:
195 def test_required_keys(self) -> None:
196 from muse.core.schema import SetSchema
197 hints = typing.get_type_hints(SetSchema)
198 assert {"kind", "element_type", "identity"} <= set(hints)
199
200 def test_valid_identity_values(self) -> None:
201 from muse.core.schema import SetSchema
202 hints = typing.get_type_hints(SetSchema)
203 ids = set(typing.get_args(hints["identity"]))
204 assert ids == {"by_content", "by_id"}
205
206
207 # ──────────────────────────────────────────────────────────────────────────────
208 # Unit — MapSchema
209 # ──────────────────────────────────────────────────────────────────────────────
210
211
212 class TestMapSchema:
213 def test_required_keys(self) -> None:
214 from muse.core.schema import MapSchema
215 hints = typing.get_type_hints(MapSchema)
216 assert {"kind", "key_type", "value_schema", "identity"} <= set(hints)
217
218 def test_identity_is_by_key(self) -> None:
219 from muse.core.schema import MapSchema
220 hints = typing.get_type_hints(MapSchema)
221 args = typing.get_args(hints["identity"])
222 assert "by_key" in args
223
224
225 # ──────────────────────────────────────────────────────────────────────────────
226 # Unit — DimensionSpec
227 # ──────────────────────────────────────────────────────────────────────────────
228
229
230 class TestDimensionSpec:
231 def test_required_keys(self) -> None:
232 from muse.core.schema import DimensionSpec
233 hints = typing.get_type_hints(DimensionSpec)
234 assert {"name", "description", "schema", "independent_merge"} <= set(hints)
235
236 def test_independent_merge_is_bool(self) -> None:
237 from muse.core.schema import DimensionSpec
238 hints = typing.get_type_hints(DimensionSpec)
239 assert hints["independent_merge"] is bool
240
241
242 # ──────────────────────────────────────────────────────────────────────────────
243 # Unit — CRDTDimensionSpec
244 # ──────────────────────────────────────────────────────────────────────────────
245
246
247 class TestCRDTDimensionSpec:
248 def test_required_keys(self) -> None:
249 from muse.core.schema import CRDTDimensionSpec
250 hints = typing.get_type_hints(CRDTDimensionSpec)
251 assert {"name", "description", "crdt_type", "independent_merge"} <= set(hints)
252
253 def test_valid_crdt_types(self) -> None:
254 from muse.core.schema import CRDTPrimitive
255 args = set(typing.get_args(CRDTPrimitive))
256 assert args == {"lww_register", "or_set", "rga", "aw_map", "g_counter"}
257
258
259 # ──────────────────────────────────────────────────────────────────────────────
260 # Unit — DomainSchema
261 # ──────────────────────────────────────────────────────────────────────────────
262
263
264 class TestDomainSchema:
265 def test_required_keys(self) -> None:
266 from muse.core.schema import DomainSchema
267 hints = typing.get_type_hints(DomainSchema)
268 assert {"domain", "description", "dimensions", "top_level", "merge_mode", "schema_version"} <= set(hints)
269
270 def test_valid_merge_modes(self) -> None:
271 from muse.core.schema import DomainSchema
272 hints = typing.get_type_hints(DomainSchema)
273 modes = set(typing.get_args(hints["merge_mode"]))
274 assert modes == {"three_way", "crdt"}
275
276
277 # ──────────────────────────────────────────────────────────────────────────────
278 # Integration — ElementSchema union, recursive nesting, JSON round-trip
279 # ──────────────────────────────────────────────────────────────────────────────
280
281
282 class TestIntegration:
283 def test_element_schema_includes_all_five_types(self) -> None:
284 from muse.core.schema import (
285 ElementSchema, MapSchema, SequenceSchema,
286 SetSchema, TensorSchema, TreeSchema,
287 )
288 members = typing.get_args(ElementSchema)
289 assert SequenceSchema in members
290 assert TreeSchema in members
291 assert TensorSchema in members
292 assert SetSchema in members
293 assert MapSchema in members
294
295 def test_map_schema_recursive_nesting(self) -> None:
296 """MapSchema.value_schema can itself be a MapSchema — recursive."""
297 inner = _map(value_schema=_seq())
298 outer = _map(value_schema=inner)
299 # Should be JSON-serialisable without error.
300 json.dumps(outer)
301
302 def test_domain_schema_json_round_trip(self) -> None:
303 schema = _domain()
304 raw = json.dumps(schema)
305 back = json.loads(raw)
306 assert back == schema
307
308 def test_dimension_spec_json_round_trip(self) -> None:
309 dim = _dim()
310 assert json.loads(json.dumps(dim)) == dim
311
312 def test_crdt_dimension_spec_json_round_trip(self) -> None:
313 cdim = _crdt_dim()
314 assert json.loads(json.dumps(cdim)) == cdim
315
316 def test_all_element_schema_types_json_serialisable(self) -> None:
317 for schema in [_seq(), _tree(), _tensor(), _set(), _map()]:
318 json.dumps(schema) # must not raise
319
320 def test_domain_with_crdt_merge_mode(self) -> None:
321 schema = _domain(merge_mode="crdt")
322 assert json.loads(json.dumps(schema))["merge_mode"] == "crdt"
323
324 def test_multiple_dimensions_in_domain(self) -> None:
325 schema = _domain(dimensions=[_dim(name="notes"), _dim(name="tempo")])
326 raw = json.dumps(schema)
327 back = json.loads(raw)
328 assert len(back["dimensions"]) == 2
329
330
331 # ──────────────────────────────────────────────────────────────────────────────
332 # End-to-end — schema used as plugin contract
333 # ──────────────────────────────────────────────────────────────────────────────
334
335
336 class TestEndToEnd:
337 def test_schema_importable_from_public_path(self) -> None:
338 from muse.core.schema import DomainSchema # noqa: F401
339
340 def test_element_schema_importable(self) -> None:
341 from muse.core.schema import ElementSchema # noqa: F401
342
343 def test_crdt_primitive_importable(self) -> None:
344 from muse.core.schema import CRDTPrimitive # noqa: F401
345
346 def test_domain_schema_dict_passable_to_json_dumps(self) -> None:
347 schema = _domain()
348 result = json.dumps(schema, sort_keys=True)
349 assert '"domain": "midi"' in result
350
351 def test_sequence_schema_with_alphabet(self) -> None:
352 seq = _seq(alphabet=["C", "D", "E", "F", "G", "A", "B"])
353 assert json.loads(json.dumps(seq))["alphabet"] == ["C", "D", "E", "F", "G", "A", "B"]
354
355
356 # ──────────────────────────────────────────────────────────────────────────────
357 # Stress
358 # ──────────────────────────────────────────────────────────────────────────────
359
360
361 class TestStress:
362 def test_10000_domain_schema_constructions(self) -> None:
363 for i in range(10_000):
364 schema = _domain(domain=f"domain_{i}", schema_version=f"0.{i}.0")
365 assert schema["domain"] == f"domain_{i}"
366
367 def test_deeply_nested_map_schema(self) -> None:
368 """MapSchema.value_schema is recursive — 50 levels deep must not crash."""
369 schema = _seq()
370 for _ in range(50):
371 schema = _map(value_schema=schema)
372 # Must be JSON-serialisable regardless of depth.
373 json.dumps(schema)
374
375 def test_domain_with_100_dimensions(self) -> None:
376 dims = [_dim(name=f"dim_{i}") for i in range(100)]
377 schema = _domain(dimensions=dims)
378 raw = json.loads(json.dumps(schema))
379 assert len(raw["dimensions"]) == 100
380
381
382 # ──────────────────────────────────────────────────────────────────────────────
383 # Data integrity
384 # ──────────────────────────────────────────────────────────────────────────────
385
386
387 class TestDataIntegrity:
388 def test_tensor_epsilon_survives_json_round_trip(self) -> None:
389 t = _tensor(epsilon=1e-9)
390 back = json.loads(json.dumps(t))
391 assert abs(back["epsilon"] - 1e-9) < 1e-20
392
393 def test_tensor_rank_survives_json_round_trip(self) -> None:
394 t = _tensor(rank=4)
395 assert json.loads(json.dumps(t))["rank"] == 4
396
397 def test_independent_merge_bool_survives_round_trip(self) -> None:
398 dim = _dim(independent_merge=False)
399 back = json.loads(json.dumps(dim))
400 assert back["independent_merge"] is False
401
402 def test_domain_schema_version_string_preserved(self) -> None:
403 schema = _domain(schema_version="1.2.3")
404 back = json.loads(json.dumps(schema))
405 assert back["schema_version"] == "1.2.3"
406
407 def test_set_element_type_preserved(self) -> None:
408 s = _set(element_type="track_id")
409 assert json.loads(json.dumps(s))["element_type"] == "track_id"
410
411
412 # ──────────────────────────────────────────────────────────────────────────────
413 # Security
414 # ──────────────────────────────────────────────────────────────────────────────
415
416
417 class TestSecurity:
418 def test_hostile_string_in_domain_name_survives_json(self) -> None:
419 malicious = '"; DROP TABLE domains; --'
420 schema = _domain(domain=malicious)
421 back = json.loads(json.dumps(schema))
422 assert back["domain"] == malicious
423
424 def test_ansi_in_description_survives_json(self) -> None:
425 desc = "\x1b[31mmalicious\x1b[0m"
426 schema = _domain(description=desc)
427 back = json.loads(json.dumps(schema))
428 assert back["description"] == desc
429
430 def test_null_byte_in_element_type_survives_json(self) -> None:
431 s = _seq(element_type="note\x00malicious")
432 back = json.loads(json.dumps(s))
433 assert back["element_type"] == "note\x00malicious"
434
435 def test_unicode_in_dimension_name_survives_json(self) -> None:
436 dim = _dim(name="音符")
437 back = json.loads(json.dumps(dim))
438 assert back["name"] == "音符"
439
440
441 # ──────────────────────────────────────────────────────────────────────────────
442 # Performance
443 # ──────────────────────────────────────────────────────────────────────────────
444
445
446 class TestPerformance:
447 def test_10000_constructions_under_1s(self) -> None:
448 start = time.perf_counter()
449 for i in range(10_000):
450 _domain(schema_version=f"0.{i}.0")
451 elapsed = time.perf_counter() - start
452 assert elapsed < 1.0
453
454 def test_json_round_trip_10000_times_under_2s(self) -> None:
455 schema = _domain()
456 start = time.perf_counter()
457 for _ in range(10_000):
458 json.loads(json.dumps(schema))
459 elapsed = time.perf_counter() - start
460 assert elapsed < 2.0
File History 2 commits
sha256:fb67fed5a4d3e40de84bdd163de94ef1386570bef1dd1a020a732c8a038962ce Merge branch 'dev' into main Human 20 days ago