gabriel / muse public
plugin.py python
1,235 lines 48.2 KB
Raw
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402 Merge branch 'dev' into main Human 22 days ago
1 """MIDI domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the six Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same six interfaces.
9
10 Live State
11 ----------
12 For the MIDI domain, ``LiveState`` is either:
13
14 1. A ``pathlib.Path`` pointing to the repository root (the working tree) — the
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "midi"
34 }
35
36 The ``files`` key maps POSIX paths (relative to the repository root) to their
37 SHA-256 content digests.
38
39 Delta Format
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53
54 import json
55 import logging
56 import os
57 import pathlib
58 import stat as _stat
59
60 from muse._version import __version__
61 from muse.core.types import blob_id, short_id
62 from muse.core.schema import (
63 DimensionSpec,
64 DomainSchema,
65 SequenceSchema,
66 SetSchema,
67 TensorSchema,
68 TreeSchema,
69 )
70 from muse.domain import (
71 DeleteOp,
72 DomainOp,
73 DriftReport,
74 InsertOp,
75 LiveState,
76 MergeResult,
77 MuseDomainPlugin,
78 PatchOp,
79 ReplaceOp,
80 SnapshotManifest,
81 StateDelta,
82 StateSnapshot,
83 StructuredDelta,
84 AddressedMergePlugin,
85 )
86 from muse.core.stat_cache import load_cache
87 from muse.core.types import Manifest
88 from muse.plugins.midi.midi_diff import NoteKey
89
90 logger = logging.getLogger(__name__)
91
92 type MidiFileMap = dict[str, str] # file_path → content hash (midi manifest)
93 type AppliedStrategies = dict[str, str] # file_path → strategy name
94 type DimensionReport = dict[str, str] # dimension → report string
95 type DimensionReports = dict[str, DimensionReport] # file → dimension report
96 type PatchOpMap = dict[str, PatchOp] # address → patch op
97 type NoteIdMap = dict[str, NoteKey] # content_id → note key
98
99 _DOMAIN_TAG = "midi"
100
101 class MidiPlugin:
102 """MIDI domain plugin for the Muse VCS.
103
104 Implements :class:`~muse.domain.MuseDomainPlugin` (six core interfaces)
105 and :class:`~muse.domain.AddressedMergePlugin` (operation-level merge)
106 for MIDI state stored as files in the working tree.
107
108 At the file level, MIDI uses address-keyed merge — two agents adding or
109 removing different ``.mid`` files commute automatically. Within a file,
110 concurrent note insertions are ordered by the MidiRGA voice-lane CRDT
111 (bass → tenor → alto → soprano, then by op-id), preventing voice crossings
112 without human intervention.
113
114 This is the reference implementation for sequence-CRDT sub-file merge.
115 The :class:`~muse.domain.AddressedMergePlugin` extension is optional but
116 strongly recommended for domains that produce sub-file diffs.
117 """
118
119 # ------------------------------------------------------------------
120 # 1. snapshot — capture live state as a content-addressed dict
121 # ------------------------------------------------------------------
122
123 def snapshot(self, live_state: LiveState) -> StateSnapshot:
124 """Capture the current working tree as a snapshot dict.
125
126 Args:
127 live_state: A ``pathlib.Path`` pointing to the repository root (working tree)
128 or an existing snapshot dict (returned as-is).
129
130 Returns:
131 A JSON-serialisable ``{"files": {path: sha256}, "domain": "midi"}``
132 dict. The ``files`` mapping is the canonical snapshot manifest used
133 by the core VCS engine for commit / checkout / diff.
134
135 Ignore rules
136 ------------
137 When *live_state* is a ``pathlib.Path``, the plugin reads
138 ``.museignore`` from the repository root
139 and excludes any matching paths from the snapshot. Dotfiles are always
140 excluded regardless of ``.museignore``.
141 """
142 if isinstance(live_state, pathlib.Path):
143 from muse.core.ignore import is_ignored, load_ignore_config, resolve_patterns
144 workdir = live_state
145 patterns = resolve_patterns(load_ignore_config(workdir), _DOMAIN_TAG)
146 cache = load_cache(workdir)
147 files: MidiFileMap = {}
148 root_str = str(workdir)
149 prefix_len = len(root_str) + 1
150
151 for dirpath, dirnames, filenames in os.walk(root_str, followlinks=False):
152 dirnames[:] = sorted(d for d in dirnames if not d.startswith("."))
153 for fname in sorted(filenames):
154 if fname.startswith("."):
155 continue
156 abs_str = os.path.join(dirpath, fname)
157 try:
158 st = os.lstat(abs_str)
159 except OSError:
160 continue
161 if not _stat.S_ISREG(st.st_mode):
162 continue
163 rel = abs_str[prefix_len:]
164 if os.sep != "/":
165 rel = rel.replace(os.sep, "/")
166 if is_ignored(rel, patterns):
167 continue
168 files[rel] = cache.get_cached(rel, abs_str, st.st_mtime, st.st_size, st.st_ino)
169
170 cache.prune(set(files))
171 cache.save()
172 return SnapshotManifest(files=files, domain=_DOMAIN_TAG, directories=[])
173
174 return live_state
175
176 # ------------------------------------------------------------------
177 # 2. diff — compute the structured delta between two snapshots
178 # ------------------------------------------------------------------
179
180 def diff(
181 self,
182 base: StateSnapshot,
183 target: StateSnapshot,
184 *,
185 repo_root: pathlib.Path | None = None,
186 ) -> StateDelta:
187 """Compute a ``StructuredDelta`` between two music snapshots.
188
189 File additions and removals produce ``InsertOp`` and ``DeleteOp``
190 entries respectively. For modified files:
191
192 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
193 from the object store and produce a ``PatchOp`` with note-level
194 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
195 when the object store is unavailable or parsing fails.
196 - All other files: ``ReplaceOp`` with file-level content IDs.
197
198 Args:
199 base: The ancestor snapshot.
200 target: The later snapshot.
201 repo_root: Repository root directory. When provided, MIDI files are
202 loaded from ``.muse/objects/`` for note-level diffing.
203
204 Returns:
205 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
206 *target* and whose ``summary`` is human-readable.
207 """
208 base_files = base["files"]
209 target_files = target["files"]
210
211 base_paths = set(base_files)
212 target_paths = set(target_files)
213
214 ops: list[DomainOp] = []
215
216 # Added files — try symbol extraction; fall back to plain InsertOp.
217 for path in sorted(target_paths - base_paths):
218 patch = _new_file_patch(
219 path=path,
220 content_id=target_files[path],
221 repo_root=repo_root,
222 )
223 if patch is not None:
224 ops.append(patch)
225 else:
226 ops.append(
227 InsertOp(
228 op="insert",
229 address=path,
230 position=None,
231 content_id=target_files[path],
232 content_summary=f"new file: {path}",
233 )
234 )
235
236 # Removed files → DeleteOp
237 for path in sorted(base_paths - target_paths):
238 ops.append(
239 DeleteOp(
240 op="delete",
241 address=path,
242 position=None,
243 content_id=base_files[path],
244 content_summary=f"deleted: {path}",
245 )
246 )
247
248 # Modified files
249 for path in sorted(
250 p for p in base_paths & target_paths if base_files[p] != target_files[p]
251 ):
252 op = _diff_modified_file(
253 path=path,
254 old_hash=base_files[path],
255 new_hash=target_files[path],
256 repo_root=repo_root,
257 )
258 ops.append(op)
259
260 summary = _summarise_ops(ops)
261 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
262
263 # ------------------------------------------------------------------
264 # 3. merge — three-way reconciliation
265 # ------------------------------------------------------------------
266
267 def merge(
268 self,
269 base: StateSnapshot,
270 left: StateSnapshot,
271 right: StateSnapshot,
272 *,
273 repo_root: pathlib.Path | None = None,
274 ) -> MergeResult:
275 """Three-way merge two divergent music state lines against a common base.
276
277 A file is auto-merged when only one side changed it. When both sides
278 changed the same file, the merge proceeds in two stages:
279
280 1. **File-level strategy** — if ``.museattributes`` contains an
281 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
282 the rule is applied and the file is removed from the conflict list.
283
284 2. **Dimension-level merge** — for ``.mid`` files that survive the
285 file-level check, the MIDI event stream is split into orthogonal
286 dimension slices (notes/melodic/rhythmic, harmonic, dynamic, structural).
287 Each dimension is merged independently. Dimension-specific
288 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
289 Only dimensions where *both* sides changed AND no resolvable rule
290 exists cause a true file-level conflict.
291
292 3. **Manual override** — ``manual`` strategy in ``.museattributes``
293 forces a path into the conflict list even when the engine would
294 normally auto-resolve it.
295
296 Args:
297 base: Snapshot at the common ancestor commit.
298 left: Snapshot for the *ours* (current) branch. The distinction
299 between ``left`` and ``right`` only affects the ``applied_strategies``
300 key in the result; the merge is symmetric for clean paths.
301 right: Snapshot for the *theirs* (incoming) branch.
302 repo_root: Path to the repository root so ``.museattributes`` and the
303 object store can be located. ``None`` disables attribute
304 loading and MIDI reconstruction (all conflicts become hard).
305
306 Returns:
307 A :class:`~muse.domain.MergeResult` whose ``snapshot`` holds the
308 merged manifest (conflict paths absent), ``conflicts`` lists the
309 unresolvable paths, and ``applied_strategies`` records which
310 ``.museattributes`` rules were used.
311 """
312 from muse.core.attributes import load_attributes, resolve_strategy
313 from muse.core.object_store import read_object, write_object
314 from muse.plugins.midi.midi_merge import merge_midi_dimensions
315
316 base_files = base["files"]
317 left_files = left["files"]
318 right_files = right["files"]
319
320 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
321
322 left_changed: set[str] = _changed_paths(base_files, left_files)
323 right_changed: set[str] = _changed_paths(base_files, right_files)
324 all_conflict_paths: set[str] = left_changed & right_changed
325
326 merged: MidiFileMap = dict(base_files)
327
328 # Apply clean single-side changes first.
329 for path in left_changed - all_conflict_paths:
330 if path in left_files:
331 merged[path] = left_files[path]
332 else:
333 merged.pop(path, None)
334
335 for path in right_changed - all_conflict_paths:
336 if path in right_files:
337 merged[path] = right_files[path]
338 else:
339 merged.pop(path, None)
340
341 # Consensus deletions (both sides removed the same file) — not a conflict.
342 consensus_deleted = {
343 p for p in all_conflict_paths
344 if p not in left_files and p not in right_files
345 }
346 for path in consensus_deleted:
347 merged.pop(path, None)
348
349 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
350
351 applied_strategies: AppliedStrategies = {}
352 dimension_reports: DimensionReports = {}
353 final_conflicts: list[str] = []
354
355 for path in sorted(real_conflicts):
356 file_strategy = resolve_strategy(attrs, path, "*")
357
358 if file_strategy == "ours":
359 if path in left_files:
360 merged[path] = left_files[path]
361 else:
362 merged.pop(path, None)
363 applied_strategies[path] = "ours"
364 continue
365
366 if file_strategy == "theirs":
367 if path in right_files:
368 merged[path] = right_files[path]
369 else:
370 merged.pop(path, None)
371 applied_strategies[path] = "theirs"
372 continue
373
374 if (
375 repo_root is not None
376 and path.lower().endswith(".mid")
377 and path in left_files
378 and path in right_files
379 and path in base_files
380 ):
381 base_obj = read_object(repo_root, base_files[path])
382 left_obj = read_object(repo_root, left_files[path])
383 right_obj = read_object(repo_root, right_files[path])
384
385 if base_obj is not None and left_obj is not None and right_obj is not None:
386 try:
387 dim_result = merge_midi_dimensions(
388 base_obj, left_obj, right_obj,
389 attrs,
390 path,
391 )
392 except ValueError:
393 dim_result = None
394
395 if dim_result is not None:
396 merged_bytes, dim_report = dim_result
397 new_hash = blob_id(merged_bytes)
398 write_object(repo_root, new_hash, merged_bytes)
399 merged[path] = new_hash
400 applied_strategies[path] = "dimension-merge"
401 dimension_reports[path] = dim_report
402 continue
403
404 final_conflicts.append(path)
405
406 for path in sorted((left_changed | right_changed) - real_conflicts):
407 if path in consensus_deleted:
408 continue
409 if resolve_strategy(attrs, path, "*") == "manual":
410 final_conflicts.append(path)
411 applied_strategies[path] = "manual"
412 if path in base_files:
413 merged[path] = base_files[path]
414 else:
415 merged.pop(path, None)
416
417 return MergeResult(
418 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG, directories=[]),
419 conflicts=sorted(final_conflicts),
420 applied_strategies=applied_strategies,
421 dimension_reports=dimension_reports,
422 )
423
424 # ------------------------------------------------------------------
425 # 4. drift — compare committed state vs live state
426 # ------------------------------------------------------------------
427
428 def drift(
429 self,
430 committed: StateSnapshot,
431 live: LiveState,
432 ) -> DriftReport:
433 """Detect uncommitted changes in the working tree relative to *committed*.
434
435 Args:
436 committed: The last committed snapshot.
437 live: Either a ``pathlib.Path`` (repository root) or a snapshot
438 dict representing current live state.
439
440 Returns:
441 A :class:`~muse.domain.DriftReport` describing whether and how the
442 live state differs from the committed snapshot.
443 """
444 live_snapshot = self.snapshot(live)
445 delta = self.diff(committed, live_snapshot)
446
447 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
448 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
449 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
450 has_drift = bool(inserts or deletes or modified)
451
452 parts: list[str] = []
453 if inserts:
454 parts.append(f"{inserts} added")
455 if deletes:
456 parts.append(f"{deletes} removed")
457 if modified:
458 parts.append(f"{modified} modified")
459
460 summary = ", ".join(parts) if parts else "working tree clean"
461 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
462
463 # ------------------------------------------------------------------
464 # 5. apply — execute a delta against live state (checkout)
465 # ------------------------------------------------------------------
466
467 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
468 """Apply a structured delta to produce a new live state.
469
470 When ``live_state`` is a ``pathlib.Path`` the physical files have
471 already been updated by the caller (``muse checkout`` restores objects
472 from the store before calling this). Rescanning the directory is the
473 cheapest correct way to reflect the new state.
474
475 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
476 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
477 at the file level requires the new content to be on disk; callers that
478 need those should pass the workdir ``pathlib.Path`` instead.
479 ``PatchOp`` entries are skipped in-memory since reconstructing patched
480 file content requires both the original bytes and the object store.
481
482 Args:
483 delta: A ``StructuredDelta`` produced by :meth:`diff`.
484 live_state: The workdir path (preferred) or a snapshot dict.
485
486 Returns:
487 The updated live state as a ``SnapshotManifest``.
488 """
489 if isinstance(live_state, pathlib.Path):
490 return self.snapshot(live_state)
491
492 current_files = dict(live_state["files"])
493
494 for op in delta["ops"]:
495 if op["op"] == "delete":
496 current_files.pop(op["address"], None)
497 elif op["op"] == "replace":
498 current_files[op["address"]] = op["new_content_id"]
499 elif op["op"] == "insert":
500 current_files[op["address"]] = op["content_id"]
501 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
502
503 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG, directories=[])
504
505 # ------------------------------------------------------------------
506 # 6. schema — declare structural schema for the algorithm library
507 # ------------------------------------------------------------------
508
509 def schema(self) -> DomainSchema:
510 """Return the full structural schema for the MIDI domain.
511
512 Declares 21 semantic dimensions — one per independent MIDI event class
513 — that the core diff algorithm library and merge engine use to drive
514 per-dimension operations. This is a significant expansion from the
515 original 5 dimensions; the finer granularity means two agents can edit
516 completely different aspects of the same MIDI file (e.g. sustain pedal
517 and channel volume) without ever creating a merge conflict.
518
519 Top level is a ``SetSchema``: the music workspace is an unordered
520 collection of audio/MIDI files, each identified by its SHA-256 content
521 hash.
522
523 Independent dimensions (conflicts do not block merging others):
524 - **notes** (melodic/rhythmic) — note_on / note_off events
525 - **pitch_bend** — pitchwheel controller
526 - **channel_pressure** — monophonic aftertouch
527 - **poly_pressure** — per-note polyphonic aftertouch
528 - **cc_modulation** — CC 1 modulation wheel
529 - **cc_volume** — CC 7 channel volume
530 - **cc_pan** — CC 10 stereo pan
531 - **cc_expression** — CC 11 expression controller
532 - **cc_sustain** — CC 64 damper / sustain pedal
533 - **cc_portamento** — CC 65 portamento on/off
534 - **cc_sostenuto** — CC 66 sostenuto pedal
535 - **cc_soft_pedal** — CC 67 soft pedal (una corda)
536 - **cc_reverb** — CC 91 reverb send level
537 - **cc_chorus** — CC 93 chorus send level
538 - **cc_other** — all other numbered CC controllers
539 - **program_change** — instrument / patch selection
540 - **key_signatures** — key signature meta events
541 - **markers** — section markers, cue points, text annotations
542
543 Non-independent dimensions (conflicts block all others):
544 - **tempo_map** — set_tempo meta events; tempo changes shift the
545 musical meaning of every subsequent tick position, so a bilateral
546 tempo conflict requires human resolution before other dimensions
547 can be finalised.
548 - **time_signatures** — time_signature meta events; bar structure
549 changes have the same semantic blocking effect as tempo changes.
550 - **track_structure** — track name, instrument name, sysex, and
551 unknown meta events affecting routing and session layout.
552 """
553 seq_schema = SequenceSchema(
554 kind="sequence",
555 element_type="note_event",
556 identity="by_position",
557 diff_algorithm="lcs",
558 alphabet=None,
559 )
560 cc_schema = TensorSchema(
561 kind="tensor",
562 dtype="float32",
563 rank=1,
564 epsilon=0.5,
565 diff_mode="sparse",
566 )
567 tree_schema = TreeSchema(
568 kind="tree",
569 node_type="track_node",
570 diff_algorithm="zhang_shasha",
571 )
572 meta_schema = SequenceSchema(
573 kind="sequence",
574 element_type="meta_event",
575 identity="by_position",
576 diff_algorithm="lcs",
577 alphabet=None,
578 )
579 return DomainSchema(
580 domain=_DOMAIN_TAG,
581 description=(
582 "MIDI and audio file versioning with note-level diff and "
583 "21-dimension independent merge"
584 ),
585 top_level=SetSchema(
586 kind="set",
587 element_type="audio_file",
588 identity="by_content",
589 ),
590 dimensions=[
591 # --- Expressive note content ---
592 DimensionSpec(
593 name="notes",
594 description="Note pitches, durations, and timing (melodic + rhythmic)",
595 schema=seq_schema,
596 independent_merge=True,
597 ),
598 DimensionSpec(
599 name="pitch_bend",
600 description="Pitchwheel controller — expressive pitch deviation",
601 schema=cc_schema,
602 independent_merge=True,
603 ),
604 DimensionSpec(
605 name="channel_pressure",
606 description="Monophonic aftertouch — channel-wide pressure",
607 schema=cc_schema,
608 independent_merge=True,
609 ),
610 DimensionSpec(
611 name="poly_pressure",
612 description="Polyphonic aftertouch — per-note pressure",
613 schema=cc_schema,
614 independent_merge=True,
615 ),
616 # --- Named CC controllers ---
617 DimensionSpec(
618 name="cc_modulation",
619 description="CC 1 — modulation wheel",
620 schema=cc_schema,
621 independent_merge=True,
622 ),
623 DimensionSpec(
624 name="cc_volume",
625 description="CC 7 — channel volume",
626 schema=cc_schema,
627 independent_merge=True,
628 ),
629 DimensionSpec(
630 name="cc_pan",
631 description="CC 10 — stereo pan position",
632 schema=cc_schema,
633 independent_merge=True,
634 ),
635 DimensionSpec(
636 name="cc_expression",
637 description="CC 11 — expression controller",
638 schema=cc_schema,
639 independent_merge=True,
640 ),
641 DimensionSpec(
642 name="cc_sustain",
643 description="CC 64 — damper / sustain pedal",
644 schema=cc_schema,
645 independent_merge=True,
646 ),
647 DimensionSpec(
648 name="cc_portamento",
649 description="CC 65 — portamento on/off",
650 schema=cc_schema,
651 independent_merge=True,
652 ),
653 DimensionSpec(
654 name="cc_sostenuto",
655 description="CC 66 — sostenuto pedal",
656 schema=cc_schema,
657 independent_merge=True,
658 ),
659 DimensionSpec(
660 name="cc_soft_pedal",
661 description="CC 67 — soft pedal (una corda)",
662 schema=cc_schema,
663 independent_merge=True,
664 ),
665 DimensionSpec(
666 name="cc_reverb",
667 description="CC 91 — reverb send level",
668 schema=cc_schema,
669 independent_merge=True,
670 ),
671 DimensionSpec(
672 name="cc_chorus",
673 description="CC 93 — chorus send level",
674 schema=cc_schema,
675 independent_merge=True,
676 ),
677 DimensionSpec(
678 name="cc_other",
679 description="All other numbered CC controllers",
680 schema=cc_schema,
681 independent_merge=True,
682 ),
683 # --- Patch / program selection ---
684 DimensionSpec(
685 name="program_change",
686 description="Instrument / patch selection events",
687 schema=meta_schema,
688 independent_merge=True,
689 ),
690 # --- Non-independent timeline metadata ---
691 DimensionSpec(
692 name="tempo_map",
693 description=(
694 "Tempo (BPM) changes — non-independent: a conflict "
695 "blocks merging all other dimensions"
696 ),
697 schema=meta_schema,
698 independent_merge=False,
699 ),
700 DimensionSpec(
701 name="time_signatures",
702 description=(
703 "Time signature changes — non-independent: affects "
704 "bar structure for all other dimensions"
705 ),
706 schema=meta_schema,
707 independent_merge=False,
708 ),
709 # --- Tonal and annotation metadata ---
710 DimensionSpec(
711 name="key_signatures",
712 description="Key signature events",
713 schema=meta_schema,
714 independent_merge=True,
715 ),
716 DimensionSpec(
717 name="markers",
718 description="Section markers, cue points, text, lyrics, copyright",
719 schema=meta_schema,
720 independent_merge=True,
721 ),
722 # --- Track structure (non-independent) ---
723 DimensionSpec(
724 name="track_structure",
725 description=(
726 "Track name, instrument name, sysex, unknown meta — "
727 "non-independent: routing changes affect all tracks"
728 ),
729 schema=tree_schema,
730 independent_merge=False,
731 ),
732 ],
733 merge_mode="three_way",
734 schema_version=__version__,
735 )
736
737 # ------------------------------------------------------------------
738 # 7. merge_ops — address-keyed merge (AddressedMergePlugin)
739 # ------------------------------------------------------------------
740
741 def merge_ops(
742 self,
743 base: StateSnapshot,
744 ours_snap: StateSnapshot,
745 theirs_snap: StateSnapshot,
746 ours_ops: list[DomainOp],
747 theirs_ops: list[DomainOp],
748 *,
749 repo_root: pathlib.Path | None = None,
750 ) -> MergeResult:
751 """Operation-level three-way merge using address-keyed map semantics.
752
753 Extends the file-level ``merge()`` method with sub-file granularity: two
754 changes to non-overlapping notes in the same MIDI file no longer produce
755 a conflict.
756
757 Algorithm
758 ---------
759 1. Run :func:`~muse.core.op_merge.merge_op_lists` on the flat op
760 lists to classify each (ours, theirs) pair as commuting or
761 conflicting.
762 2. Build the merged manifest from *base* by applying all clean merged
763 ops. ``InsertOp`` and ``ReplaceOp`` entries supply a ``content_id``
764 / ``new_content_id`` directly. For ``PatchOp`` entries (sub-file
765 note changes), the final file hash is looked up from *ours_snap* or
766 *theirs_snap*. When both sides produced a ``PatchOp`` for the same
767 MIDI file and the note-level ops commute, an attempt is made to
768 reconstruct the merged MIDI bytes; on failure the file falls back to
769 a conflict.
770 3. For conflicting pairs, consult ``.museattributes``. Strategies
771 ``"ours"`` and ``"theirs"`` are applied automatically; everything
772 else enters ``MergeResult.conflicts``.
773
774 Args:
775 base: Common ancestor snapshot.
776 ours_snap: Final snapshot of our branch.
777 theirs_snap: Final snapshot of their branch.
778 ours_ops: Operations from our branch delta (base → ours).
779 theirs_ops: Operations from their branch delta (base → theirs).
780 repo_root: Repository root for object store and attributes.
781
782 Returns:
783 A :class:`~muse.domain.MergeResult` with the reconciled snapshot
784 and any remaining unresolvable conflicts.
785 """
786 from muse.core.attributes import load_attributes, resolve_strategy
787 from muse.core.op_merge import merge_op_lists
788
789 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
790
791 # Commutativity classification: find commuting and conflicting op pairs.
792 merge_result = merge_op_lists([], ours_ops, theirs_ops)
793
794 # Build the merged manifest starting from base.
795 merged_files: MidiFileMap = dict(base["files"])
796 applied_strategies: AppliedStrategies = {}
797 final_conflicts: list[str] = []
798 op_log: list[DomainOp] = list(merge_result.merged_ops)
799
800 # Group PatchOps by address so we can detect same-file note merges.
801 ours_patches: PatchOpMap = {}
802 theirs_patches: PatchOpMap = {}
803 for op in ours_ops:
804 if op["op"] == "patch":
805 ours_patches[op["address"]] = op
806 for op in theirs_ops:
807 if op["op"] == "patch":
808 theirs_patches[op["address"]] = op
809
810 # Track which addresses are involved in a conflict.
811 conflicting_addresses: set[str] = {
812 our_op["address"] for our_op, _ in merge_result.conflict_ops
813 }
814
815 # --- Apply clean merged ops ---
816 for op in merge_result.merged_ops:
817 addr = op["address"]
818 if addr in conflicting_addresses:
819 continue # handled in conflict resolution below
820
821 if op["op"] == "insert":
822 merged_files[addr] = op["content_id"]
823
824 elif op["op"] == "delete":
825 merged_files.pop(addr, None)
826
827 elif op["op"] == "replace":
828 merged_files[addr] = op["new_content_id"]
829
830 elif op["op"] == "patch":
831 # PatchOp: determine which side(s) patched this file.
832 has_ours = addr in ours_patches
833 has_theirs = addr in theirs_patches
834
835 if has_ours and not has_theirs:
836 # Only our side changed this file — take our version.
837 if addr in ours_snap["files"]:
838 merged_files[addr] = ours_snap["files"][addr]
839 else:
840 merged_files.pop(addr, None)
841
842 elif has_theirs and not has_ours:
843 # Only their side changed this file — take their version.
844 if addr in theirs_snap["files"]:
845 merged_files[addr] = theirs_snap["files"][addr]
846 else:
847 merged_files.pop(addr, None)
848
849 else:
850 # Both sides patched the same file with commuting note ops.
851 # Attempt note-level MIDI reconstruction.
852 merged_content_id = _merge_patch_ops(
853 addr=addr,
854 ours_patch=ours_patches[addr],
855 theirs_patch=theirs_patches[addr],
856 base_files=dict(base["files"]),
857 ours_snap_files=dict(ours_snap["files"]),
858 theirs_snap_files=dict(theirs_snap["files"]),
859 repo_root=repo_root,
860 )
861 if merged_content_id is not None:
862 merged_files[addr] = merged_content_id
863 else:
864 # Reconstruction failed — treat as manual conflict.
865 final_conflicts.append(addr)
866
867 # --- Resolve conflicts ---
868 for our_op, their_op in merge_result.conflict_ops:
869 addr = our_op["address"]
870 strategy = resolve_strategy(attrs, addr, "*")
871
872 if strategy == "ours":
873 if addr in ours_snap["files"]:
874 merged_files[addr] = ours_snap["files"][addr]
875 else:
876 merged_files.pop(addr, None)
877 applied_strategies[addr] = "ours"
878
879 elif strategy == "theirs":
880 if addr in theirs_snap["files"]:
881 merged_files[addr] = theirs_snap["files"][addr]
882 else:
883 merged_files.pop(addr, None)
884 applied_strategies[addr] = "theirs"
885
886 else:
887 # Strategy "manual" or "auto" without a clear resolution.
888 final_conflicts.append(addr)
889
890 return MergeResult(
891 merged=SnapshotManifest(files=merged_files, domain=_DOMAIN_TAG, directories=[]),
892 conflicts=sorted(set(final_conflicts)),
893 applied_strategies=applied_strategies,
894 op_log=op_log,
895 )
896
897 # ---------------------------------------------------------------------------
898 # Module-level helpers
899 # ---------------------------------------------------------------------------
900
901 def _merge_patch_ops(
902 *,
903 addr: str,
904 ours_patch: PatchOp,
905 theirs_patch: PatchOp,
906 base_files: Manifest,
907 ours_snap_files: Manifest,
908 theirs_snap_files: Manifest,
909 repo_root: pathlib.Path | None,
910 ) -> str | None:
911 """Attempt note-level MIDI merge for two ``PatchOp``\\s on the same file.
912
913 Runs OT on the child_ops of each PatchOp. If the note-level ops all
914 commute, reconstructs the merged MIDI by:
915
916 1. Loading base, ours, and theirs MIDI bytes from the object store.
917 2. Extracting note sequences from all three versions.
918 3. Building ``content_id → NoteKey`` look-ups for the ours and theirs
919 sequences (so that InsertOp content IDs can be resolved to real notes).
920 4. Applying the merged note ops (deletions then insertions) to the base
921 note sequence.
922 5. Calling :func:`~muse.plugins.midi.midi_diff.reconstruct_midi` and
923 storing the resulting bytes.
924
925 Returns the SHA-256 hash of the reconstructed MIDI (ready to store in the
926 object store) on success, or ``None`` when:
927
928 - *repo_root* is ``None`` (cannot access object store).
929 - Base or branch bytes are not in the local object store.
930 - Note-level OT found conflicts.
931 - MIDI reconstruction raised any exception.
932
933 Args:
934 addr: Workspace-relative MIDI file path.
935 ours_patch: Our PatchOp for this file.
936 theirs_patch: Their PatchOp for this file.
937 base_files: Content-ID map for the common ancestor snapshot.
938 ours_snap_files: Content-ID map for our branch's final snapshot.
939 theirs_snap_files: Content-ID map for their branch's final snapshot.
940 repo_root: Repository root for object store access.
941
942 Returns:
943 Content-ID (SHA-256 hex) of the merged MIDI, or ``None`` on failure.
944 """
945 if repo_root is None or addr not in base_files:
946 return None
947
948 from muse.core.object_store import read_object, write_object
949 from muse.core.op_merge import merge_op_lists
950 from muse.plugins.midi.midi_diff import NoteKey, extract_notes, reconstruct_midi
951
952 # Run OT on note-level ops to classify conflicts.
953 note_result = merge_op_lists([], ours_patch["child_ops"], theirs_patch["child_ops"])
954 if not note_result.is_clean:
955 logger.debug(
956 "⚠️ Note-level conflict in %r: %d pair(s) — falling back to file conflict",
957 addr,
958 len(note_result.conflict_ops),
959 )
960 return None
961
962 try:
963 base_bytes = read_object(repo_root, base_files[addr])
964 if base_bytes is None:
965 return None
966
967 ours_hash = ours_snap_files.get(addr)
968 theirs_hash = theirs_snap_files.get(addr)
969 ours_bytes = read_object(repo_root, ours_hash) if ours_hash else None
970 theirs_bytes = read_object(repo_root, theirs_hash) if theirs_hash else None
971
972 base_notes, ticks_per_beat = extract_notes(base_bytes)
973
974 # Build content_id → NoteKey lookups from ours and theirs versions.
975 ours_by_id: NoteIdMap = {}
976 if ours_bytes is not None:
977 ours_notes, _ = extract_notes(ours_bytes)
978 ours_by_id = {_note_content_id(n): n for n in ours_notes}
979
980 theirs_by_id: NoteIdMap = {}
981 if theirs_bytes is not None:
982 theirs_notes, _ = extract_notes(theirs_bytes)
983 theirs_by_id = {_note_content_id(n): n for n in theirs_notes}
984
985 # Collect content IDs to delete.
986 delete_ids: set[str] = {
987 op["content_id"] for op in note_result.merged_ops if op["op"] == "delete"
988 }
989
990 # Apply deletions to base note list.
991 base_note_by_id = {_note_content_id(n): n for n in base_notes}
992 surviving: list[NoteKey] = [
993 n for n in base_notes if _note_content_id(n) not in delete_ids
994 ]
995
996 # Collect insertions: resolve content_id → NoteKey via ours then theirs.
997 inserted: list[NoteKey] = []
998 for op in note_result.merged_ops:
999 if op["op"] == "insert":
1000 cid = op["content_id"]
1001 note = ours_by_id.get(cid) or theirs_by_id.get(cid)
1002 if note is None:
1003 # Fallback: base itself shouldn't have it, but check anyway.
1004 note = base_note_by_id.get(cid)
1005 if note is None:
1006 logger.debug(
1007 "⚠️ Cannot resolve note content_id %s for %r — skipping",
1008 short_id(cid, strip=True),
1009 addr,
1010 )
1011 continue
1012 inserted.append(note)
1013
1014 merged_notes = surviving + inserted
1015 merged_bytes = reconstruct_midi(merged_notes, ticks_per_beat=ticks_per_beat)
1016
1017 merged_hash = blob_id(merged_bytes)
1018 write_object(repo_root, merged_hash, merged_bytes)
1019
1020 logger.info(
1021 "✅ Note-level MIDI merge for %r: %d ops clean, %d notes in result",
1022 addr,
1023 len(note_result.merged_ops),
1024 len(merged_notes),
1025 )
1026 return merged_hash
1027
1028 except Exception as exc: # noqa: BLE001 intentional broad catch
1029 logger.debug("⚠️ MIDI note-level reconstruction failed for %r: %s", addr, exc)
1030 return None
1031
1032 def _note_content_id(note: NoteKey) -> str:
1033 """Return the SHA-256 content ID for a :class:`~muse.plugins.midi.midi_diff.NoteKey`.
1034
1035 Delegates to the same algorithm used in :mod:`muse.plugins.midi.midi_diff`
1036 so that content IDs computed here are identical to those stored in
1037 ``InsertOp`` / ``DeleteOp`` entries.
1038 """
1039 payload = (
1040 f"{note['pitch']}:{note['velocity']}:"
1041 f"{note['start_tick']}:{note['duration_ticks']}:{note['channel']}"
1042 )
1043 return blob_id(payload.encode())
1044
1045 def _new_file_patch(
1046 *,
1047 path: str,
1048 content_id: str,
1049 repo_root: pathlib.Path | None,
1050 ) -> PatchOp | None:
1051 """Return a ``PatchOp`` with symbol child ops for a newly-added file, or
1052 ``None`` when the file type is unsupported or content is unreadable.
1053
1054 Reads content from the object store first; falls back to the on-disk path
1055 under ``repo_root`` so uncommitted working-tree files are handled correctly.
1056
1057 Supported symbol extraction:
1058 - ``.md`` / ``.markdown`` — ATX headings (``#`` prefix)
1059 - ``.py`` — top-level ``def`` and ``class`` definitions
1060 """
1061 if repo_root is None:
1062 return None
1063
1064 lower = path.lower()
1065 is_md = lower.endswith(".md") or lower.endswith(".markdown")
1066 is_py = lower.endswith(".py")
1067 if not is_md and not is_py:
1068 return None
1069
1070 # Prefer object store; fall through to disk for uncommitted files.
1071 from muse.core.object_store import read_object
1072
1073 content: bytes | None = read_object(repo_root, content_id)
1074 if content is None:
1075 disk = repo_root / path
1076 if disk.is_file():
1077 content = disk.read_bytes()
1078 if content is None:
1079 return None
1080
1081 text = content.decode("utf-8", errors="replace")
1082 child_ops: list[DomainOp] = []
1083
1084 if is_md:
1085 for lineno, line in enumerate(text.splitlines(), 1):
1086 if line.startswith("#"):
1087 heading = line.lstrip("#").strip()
1088 if heading:
1089 child_ops.append(
1090 InsertOp(
1091 op="insert",
1092 address=heading,
1093 position=None,
1094 content_id="",
1095 content_summary=f"{heading} L{lineno}–{lineno}",
1096 )
1097 )
1098 elif is_py:
1099 for lineno, line in enumerate(text.splitlines(), 1):
1100 stripped = line.strip()
1101 if stripped.startswith("def ") or stripped.startswith("class "):
1102 tokens = stripped.split("(")[0].split()
1103 if len(tokens) >= 2:
1104 kind, name = tokens[0], tokens[1]
1105 child_ops.append(
1106 InsertOp(
1107 op="insert",
1108 address=name,
1109 position=None,
1110 content_id="",
1111 content_summary=f"{kind} {name} L{lineno}–{lineno}",
1112 )
1113 )
1114
1115 if not child_ops:
1116 return None
1117
1118 return PatchOp(
1119 op="patch",
1120 address=path,
1121 child_ops=child_ops,
1122 child_domain=_DOMAIN_TAG,
1123 child_summary=f"new file: {path} ({len(child_ops)} symbol(s))",
1124 )
1125
1126 def _diff_modified_file(
1127 *,
1128 path: str,
1129 old_hash: str,
1130 new_hash: str,
1131 repo_root: pathlib.Path | None,
1132 ) -> DomainOp:
1133 """Produce the richest available operation for a modified file.
1134
1135 For ``.mid`` files where both content revisions are readable from the
1136 object store, performs a full note-level MIDI diff and returns a
1137 ``PatchOp`` carrying the individual ``InsertOp``/``DeleteOp`` child
1138 operations. Falls back to a ``ReplaceOp`` (opaque before/after hash
1139 pair) when the file is not a MIDI file, ``repo_root`` is ``None``, or
1140 either content revision cannot be retrieved from the store.
1141
1142 Args:
1143 path: Workspace-relative POSIX path of the modified file.
1144 old_hash: SHA-256 of the base content in the object store.
1145 new_hash: SHA-256 of the current content in the object store.
1146 repo_root: Repository root for object store access. ``None`` forces
1147 immediate fallback to ``ReplaceOp``.
1148
1149 Returns:
1150 A ``PatchOp`` with note-level child ops when deep diff succeeds,
1151 otherwise a ``ReplaceOp`` with the opaque before/after content hashes.
1152 """
1153 if path.lower().endswith(".mid") and repo_root is not None:
1154 from muse.core.object_store import read_object
1155 from muse.plugins.midi.midi_diff import diff_midi_notes
1156
1157 base_bytes = read_object(repo_root, old_hash)
1158 target_bytes = read_object(repo_root, new_hash)
1159
1160 if base_bytes is not None and target_bytes is not None:
1161 try:
1162 child_delta = diff_midi_notes(
1163 base_bytes, target_bytes, file_path=path
1164 )
1165 return PatchOp(
1166 op="patch",
1167 address=path,
1168 child_ops=child_delta["ops"],
1169 child_domain=child_delta["domain"],
1170 child_summary=child_delta["summary"],
1171 )
1172 except (ValueError, Exception) as exc:
1173 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
1174
1175 return ReplaceOp(
1176 op="replace",
1177 address=path,
1178 position=None,
1179 old_content_id=old_hash,
1180 new_content_id=new_hash,
1181 old_summary=f"{path} (previous)",
1182 new_summary=f"{path} (updated)",
1183 )
1184
1185 def _summarise_ops(ops: list[DomainOp]) -> str:
1186 """Build a human-readable summary string from a list of domain ops."""
1187 inserts = 0
1188 deletes = 0
1189 replaces = 0
1190 patches = 0
1191
1192 for op in ops:
1193 kind = op["op"]
1194 if kind == "insert":
1195 inserts += 1
1196 elif kind == "delete":
1197 deletes += 1
1198 elif kind == "replace":
1199 replaces += 1
1200 elif kind == "patch":
1201 patches += 1
1202
1203 parts: list[str] = []
1204 if inserts:
1205 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
1206 if deletes:
1207 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
1208 if replaces:
1209 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
1210 if patches:
1211 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
1212
1213 return ", ".join(parts) if parts else "no changes"
1214
1215 def _changed_paths(
1216 base: Manifest, other: Manifest
1217 ) -> set[str]:
1218 """Return paths that differ between *base* and *other*."""
1219 base_p = set(base)
1220 other_p = set(other)
1221 added = other_p - base_p
1222 deleted = base_p - other_p
1223 common = base_p & other_p
1224 modified = {p for p in common if base[p] != other[p]}
1225 return added | deleted | modified
1226
1227 #: Module-level singleton — import and use directly.
1228 plugin = MidiPlugin()
1229
1230 assert isinstance(plugin, MuseDomainPlugin), (
1231 "MidiPlugin does not satisfy the MuseDomainPlugin protocol"
1232 )
1233 assert isinstance(plugin, AddressedMergePlugin), (
1234 "MidiPlugin does not satisfy the AddressedMergePlugin protocol"
1235 )
File History 2 commits
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402 Merge branch 'dev' into main Human 22 days ago
sha256:1c4b3e3a9a1f300774c3ee662b572a698d5fd405bf765a71e6011a2e9c3eaaaa feat: Muse — version control for the agent era Human 74 days ago