gabriel / muse public
manifest.py python
319 lines 10.9 KB
Raw
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 1 day ago
1 """Hierarchical chunk manifests for the Muse MIDI plugin.
2
3 Evolves the flat ``{"files": {"track.mid": "<sha256>"}}`` snapshot beyond
4 a single content hash per file to a rich, per-bar, per-track manifest that
5 enables:
6
7 - **Partial diff** — compare only the bars that changed.
8 - **Query acceleration** — answer note queries without reading full MIDI blobs.
9 - **Targeted merge** — attempt to merge only the bars with conflicts.
10 - **Historical analytics** — aggregate statistics over commit history without
11 re-parsing MIDI bytes on every query.
12
13 Backward compatibility
14 ----------------------
15 The ``MusicManifest.files`` field is identical to the standard
16 ``SnapshotManifest.files`` so that the core Muse engine and all existing
17 commands continue to work unchanged. The ``tracks`` field is additive
18 metadata stored as a sidecar under ``.muse/music_manifests/`` — never
19 replacing the canonical flat manifest.
20
21 Storage layout::
22
23 .muse/music_manifests/
24 <snapshot_id>.json — full MusicManifest for this snapshot
25 (rebuildable from history; add to .museignore [domain.midi] in CI)
26
27 Public API
28 ----------
29 - :class:`BarChunk` — per-bar chunk descriptor.
30 - :class:`TrackManifest` — rich metadata for one MIDI track.
31 - :class:`MusicManifest` — top-level sidecar manifest.
32 - :func:`build_bar_chunk` — build a :class:`BarChunk` from a bar's notes.
33 - :func:`build_track_manifest` — build a :class:`TrackManifest`.
34 - :func:`build_music_manifest` — build the full :class:`MusicManifest`.
35 - :func:`write_music_manifest` — persist to ``.muse/music_manifests/``.
36 - :func:`read_music_manifest` — load from the sidecar store.
37 """
38
39 import json
40 import logging
41 import pathlib
42 from typing import Literal, TypedDict
43
44 from muse._version import __version__
45 from muse.core.paths import music_manifests_dir
46 from muse.core.types import Manifest, blob_id, load_json_file
47 from muse.plugins.midi._query import (
48 NoteInfo,
49 detect_chord,
50 key_signature_guess,
51 notes_by_bar,
52 )
53 from muse.plugins.midi.midi_diff import extract_notes
54
55 logger = logging.getLogger(__name__)
56
57 type _BarMap = dict[str, "BarChunk"]
58 type _TrackMap = dict[str, "TrackManifest"]
59 type _ChangeMap = dict[str, list[int]]
60
61 # ---------------------------------------------------------------------------
62 # Types
63 # ---------------------------------------------------------------------------
64
65 class BarChunk(TypedDict):
66 """Descriptor for one bar's worth of note events in a MIDI track.
67
68 ``bar`` 1-indexed bar number (assumes 4/4 time).
69 ``chunk_hash`` ``sha256:``-prefixed ID of the canonical JSON of all notes in this bar.
70 Used for per-bar change detection without re-parsing MIDI.
71 ``note_count`` Number of notes in this bar.
72 ``chord`` Best-guess chord name for this bar (e.g. ``"Cmaj"``).
73 ``pitch_range`` ``[min_pitch, max_pitch]`` MIDI pitch values in this bar.
74 """
75
76 bar: int
77 chunk_hash: str
78 note_count: int
79 chord: str
80 pitch_range: list[int]
81
82 class TrackManifest(TypedDict):
83 """Rich metadata descriptor for one MIDI track at a specific snapshot.
84
85 ``track_id`` Stable identifier for this track (SHA-256 of file path).
86 Stable across renames if you track by content; changes
87 on rename. Use entity IDs in the entity index for
88 true cross-rename continuity.
89 ``file_path`` Workspace-relative MIDI file path.
90 ``content_hash`` SHA-256 of the full MIDI file bytes (same as the flat
91 manifest entry — the canonical content address).
92 ``bars`` Mapping from ``str(bar_number)`` → :class:`BarChunk`.
93 JSON keys are always strings; callers convert to int.
94 ``ticks_per_beat`` MIDI ticks per quarter note for this file.
95 ``note_count`` Total note count across all bars.
96 ``key_guess`` Krumhansl-Schmuckler key estimate (e.g. ``"G major"``).
97 ``bar_count`` Number of bars with at least one note.
98 """
99
100 track_id: str
101 file_path: str
102 content_hash: str
103 bars: _BarMap
104 ticks_per_beat: int
105 note_count: int
106 key_guess: str
107 bar_count: int
108
109 class MusicManifest(TypedDict):
110 """Top-level hierarchical manifest for a music snapshot.
111
112 This is the sidecar companion to the standard :class:`~muse.domain.SnapshotManifest`.
113 The ``files`` field is identical to the flat manifest — the core engine
114 reads only ``files`` for content addressing. The ``tracks`` field is
115 additive richness for music-domain queries, diff, and merge.
116
117 ``schema_version`` The Muse package version (read from ``muse._version``).
118 ``snapshot_id`` The snapshot this manifest belongs to.
119 ``files`` Standard flat ``{path: sha256}`` manifest (compat layer).
120 ``tracks`` ``{path: TrackManifest}`` for each MIDI file.
121 """
122
123 domain: Literal["midi"]
124 schema_version: str
125 snapshot_id: str
126 files: Manifest
127 tracks: _TrackMap
128
129 # ---------------------------------------------------------------------------
130 # Builders
131 # ---------------------------------------------------------------------------
132
133 def _bar_chunk_hash(notes: list[NoteInfo]) -> str:
134 """Return a ``sha256:``-prefixed ID of the canonical JSON of a bar's notes."""
135 payload = json.dumps(
136 [
137 {
138 "pitch": n.pitch,
139 "velocity": n.velocity,
140 "start_tick": n.start_tick,
141 "duration_ticks": n.duration_ticks,
142 "channel": n.channel,
143 }
144 for n in sorted(notes, key=lambda n: (n.start_tick, n.pitch))
145 ],
146 sort_keys=True,
147 separators=(",", ":"),
148 ).encode()
149 return blob_id(payload)
150
151 def build_bar_chunk(bar_num: int, notes: list[NoteInfo]) -> BarChunk:
152 """Build a :class:`BarChunk` descriptor for *bar_num*.
153
154 Args:
155 bar_num: 1-indexed bar number.
156 notes: All notes in this bar.
157
158 Returns:
159 A populated :class:`BarChunk`.
160 """
161 pcs = frozenset(n.pitch_class for n in notes)
162 chord = detect_chord(pcs)
163 pitches = [n.pitch for n in notes]
164 pitch_range: list[int] = [min(pitches), max(pitches)] if pitches else [0, 0]
165 return BarChunk(
166 bar=bar_num,
167 chunk_hash=_bar_chunk_hash(notes),
168 note_count=len(notes),
169 chord=chord,
170 pitch_range=pitch_range,
171 )
172
173 def build_track_manifest(
174 notes: list[NoteInfo],
175 file_path: str,
176 content_hash: str,
177 ticks_per_beat: int,
178 ) -> TrackManifest:
179 """Build a :class:`TrackManifest` from a parsed note list.
180
181 Args:
182 notes: All notes extracted from the MIDI file.
183 file_path: Workspace-relative MIDI file path.
184 content_hash: SHA-256 of the MIDI file bytes (from the flat manifest).
185 ticks_per_beat: MIDI timing resolution.
186
187 Returns:
188 A populated :class:`TrackManifest`.
189 """
190 track_id = blob_id(file_path.encode())
191 bars_map = notes_by_bar(notes)
192 bars: _BarMap = {}
193 for bar_num, bar_notes in sorted(bars_map.items()):
194 bars[str(bar_num)] = build_bar_chunk(bar_num, bar_notes)
195
196 key_guess = key_signature_guess(notes)
197
198 return TrackManifest(
199 track_id=track_id,
200 file_path=file_path,
201 content_hash=content_hash,
202 bars=bars,
203 ticks_per_beat=ticks_per_beat,
204 note_count=len(notes),
205 key_guess=key_guess,
206 bar_count=len(bars),
207 )
208
209 # ---------------------------------------------------------------------------
210 # Persistence
211 # ---------------------------------------------------------------------------
212
213 def _manifest_path(repo_root: pathlib.Path, snapshot_id: str) -> pathlib.Path:
214 return music_manifests_dir(repo_root) / f"{snapshot_id}.json"
215
216 def write_music_manifest(
217 repo_root: pathlib.Path,
218 manifest: MusicManifest,
219 ) -> pathlib.Path:
220 """Persist *manifest* to ``.muse/music_manifests/<snapshot_id>.json``.
221
222 Args:
223 repo_root: Repository root.
224 manifest: The manifest to write.
225
226 Returns:
227 Path to the written file.
228 """
229 snapshot_id = manifest.get("snapshot_id", "")
230 if not snapshot_id:
231 raise ValueError("MusicManifest.snapshot_id must be non-empty")
232 path = _manifest_path(repo_root, snapshot_id)
233 path.parent.mkdir(parents=True, exist_ok=True)
234 path.write_text(f"{json.dumps(manifest, indent=2)}\n")
235 logger.debug(
236 "✅ Music manifest written: %s (%d tracks)",
237 snapshot_id[:8],
238 len(manifest["tracks"]),
239 )
240 return path
241
242 def read_music_manifest(
243 repo_root: pathlib.Path,
244 snapshot_id: str,
245 ) -> MusicManifest | None:
246 """Load the music manifest for *snapshot_id*, or ``None`` if absent.
247
248 Args:
249 repo_root: Repository root.
250 snapshot_id: Snapshot ID.
251
252 Returns:
253 The :class:`MusicManifest`, or ``None`` when the sidecar file does
254 not exist.
255 """
256 path = _manifest_path(repo_root, snapshot_id)
257 if not path.exists():
258 return None
259 raw: MusicManifest | None = load_json_file(path)
260 if raw is None:
261 logger.warning("⚠️ Corrupt music manifest %s: unreadable or invalid JSON", path)
262 return None
263 return raw
264
265 # ---------------------------------------------------------------------------
266 # Partial diff helper
267 # ---------------------------------------------------------------------------
268
269 def diff_manifests_by_bar(
270 base: MusicManifest,
271 target: MusicManifest,
272 ) -> _ChangeMap:
273 """Return a per-track list of bars that changed between two manifests.
274
275 Uses the per-bar ``chunk_hash`` values to detect changes without
276 loading any MIDI bytes.
277
278 Args:
279 base: Ancestor manifest.
280 target: Newer manifest.
281
282 Returns:
283 ``{track_path: [changed_bar_numbers]}`` for all tracks where at
284 least one bar differs. Tracks added or removed appear with ``[-1]``
285 as a sentinel indicating the whole track changed.
286 """
287 changed: _ChangeMap = {}
288
289 all_tracks = set(base["tracks"]) | set(target["tracks"])
290
291 for track in sorted(all_tracks):
292 base_track = base["tracks"].get(track)
293 target_track = target["tracks"].get(track)
294
295 if base_track is None or target_track is None:
296 changed[track] = [-1]
297 continue
298
299 if base_track["content_hash"] == target_track["content_hash"]:
300 continue
301
302 # Content changed — find which bars.
303 base_bars = base_track["bars"]
304 target_bars = target_track["bars"]
305 all_bar_keys = set(base_bars) | set(target_bars)
306
307 changed_bars: list[int] = []
308 for bar_key in sorted(all_bar_keys, key=lambda k: int(k)):
309 base_chunk = base_bars.get(bar_key)
310 target_chunk = target_bars.get(bar_key)
311 if base_chunk is None or target_chunk is None:
312 changed_bars.append(int(bar_key))
313 elif base_chunk["chunk_hash"] != target_chunk["chunk_hash"]:
314 changed_bars.append(int(bar_key))
315
316 if changed_bars:
317 changed[track] = sorted(changed_bars)
318
319 return changed
File History 7 commits
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 1 day ago
sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8 chore: prebuild timing test Sonnet 4.6 9 days ago
sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1 merge: pull staging/dev — advance to 0.2.0rc12 Sonnet 4.6 patch 11 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 22 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 25 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 31 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 31 days ago