gabriel / musehub public
musehub_repo_models.py python
728 lines 33.8 KB
Raw
sha256:5dfc96524e3921eb9acb8372241b6bec70b5f3e6598f79099a0ead16ff7cbb75 feat(phase1): add musehub_fetch_mpack_cache table (issue #9… Sonnet 4.6 patch 8 days ago
1 """ORM models for core repo and VCS objects.
2
3 Tables:
4 - musehub_repos: Remote repos (one per project, across any Muse domain)
5 - musehub_branches: Named branch pointers inside a repo
6 - musehub_commits: Content-addressed commit records — globally shared, no repo ownership
7 - musehub_commit_refs: Materialized reachability index — (repo_id, commit_id) membership
8 - musehub_objects: Content-addressed binary artifact storage — globally shared
9 - musehub_object_refs: Materialized reachability index — (repo_id, object_id) membership
10 - musehub_snapshots: Content-addressed file-tree records — globally shared, no repo ownership
11 - musehub_snapshot_refs: Materialized reachability index — (repo_id, snapshot_id) membership
12 - musehub_snapshot_entries: Normalized per-file rows within a snapshot
13 - musehub_sessions: Recording session records pushed from the CLI
14 - musehub_wire_tags: Lightweight semantic tags pushed via wire protocol
15 - musehub_bridge_mirrors: Git mirror registrations for a MuseHub repo
16 - musehub_mists: Content-addressed, signed, forkable single-artifact shares
17 - musehub_mpack_index: MPack index — maps every pushed object to its mpack in MinIO
18 - musehub_commit_graph: Precomputed commit reachability for O(frontier) DAG walks
19 """
20
21 from __future__ import annotations
22
23 from datetime import datetime, timezone
24
25 import sqlalchemy as sa
26 from sqlalchemy import ARRAY, Boolean, DateTime, ForeignKey, Index, Integer, String, Text, UniqueConstraint
27 from sqlalchemy.orm import Mapped, MappedAsDataclass, mapped_column, relationship
28 from sqlalchemy.dialects.postgresql import JSONB
29
30 from musehub.db.database import Base
31 from musehub.types.json_types import JSONObject, JSONValue # JSONValue needed for ForwardRef resolution in Mapped[]
32
33
34 def _utc_now() -> datetime:
35 return datetime.now(tz=timezone.utc)
36
37
38 class MusehubRepo(MappedAsDataclass, Base):
39 """A remote Muse repository — the hub-side equivalent of a Git remote.
40
41 ``owner`` is the URL-visible username (e.g. "gabriel") and ``slug`` is the
42 URL-safe repo name auto-generated from ``name`` (e.g. "neo-soul-experiment").
43 Together they form the canonical /{owner}/{slug} URL scheme. The internal
44 ``repo_id`` sha256 genesis hash remains the primary key — external URLs never expose it.
45
46 ``domain_id`` links this repo to a registered Muse domain plugin
47 (e.g. ``@gabriel/code``). ``domain_meta`` is a free-form JSON object for
48 domain-specific metadata declared by that plugin.
49 Tags are free-form strings that make repos discoverable on the explore page.
50 """
51
52 __tablename__ = "musehub_repos"
53 __table_args__ = (
54 UniqueConstraint("owner", "slug", name="uq_musehub_repos_owner_slug"),
55 # Explore page: list public repos by owner
56 Index("ix_musehub_repos_owner_visibility", "owner", "visibility"),
57 )
58
59 # --- Required fields (no default) — must precede optional fields ---
60 repo_id: Mapped[str] = mapped_column(String(128), primary_key=True)
61 name: Mapped[str] = mapped_column(String(255), nullable=False)
62 # URL-visible owner username, e.g. "gabriel" — forms the /{owner}/{slug} path
63 owner: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
64 # URL-safe slug auto-generated from name, e.g. "neo-soul-experiment"
65 slug: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
66 owner_user_id: Mapped[str] = mapped_column(String(128), nullable=False, index=True)
67
68 # --- Optional fields with Python-side defaults ---
69 visibility: Mapped[str] = mapped_column(String(20), nullable=False, default="public", server_default="public")
70 description: Mapped[str] = mapped_column(Text, nullable=False, default="")
71 # list of free-form tag strings for discovery
72 tags: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, default_factory=list)
73 # FK to musehub_domains — null means a legacy row predating this default.
74 # New repos always have domain_id set; "code" is the default when none is specified.
75 domain_id: Mapped[str | None] = mapped_column(String(128), nullable=True, index=True, default="code")
76 # Domain-specific metadata blob declared by the domain plugin
77 domain_meta: Mapped[JSONObject] = mapped_column(JSONB, nullable=False, default_factory=dict)
78 # Feature-flag settings not covered by dedicated columns (JSON blob).
79 settings: Mapped[JSONObject | None] = mapped_column(JSONB, nullable=True, default=None)
80 # Default branch name (updated on each push to match CLI intent)
81 default_branch: Mapped[str] = mapped_column(String(255), nullable=False, default="main", server_default="main")
82 # Last push timestamp — used for trending sort
83 pushed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, default=None)
84 created_at: Mapped[datetime] = mapped_column(
85 DateTime(timezone=True), nullable=False, default_factory=_utc_now, server_default=sa.func.now()
86 )
87 updated_at: Mapped[datetime] = mapped_column(
88 DateTime(timezone=True), nullable=False, default_factory=_utc_now, server_default=sa.func.now(), onupdate=_utc_now
89 )
90 # Compliance: opt-out of AI training data pipelines for this repo.
91 training_opt_out: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=sa.false())
92 # DMCA / legal hold — set True by POST /api/admin/takedown.
93 # When True, pushes to this repo are blocked and objects are quarantined.
94 dmca_hold: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=sa.false())
95
96 # --- Relationships — excluded from __init__ ---
97 branches: Mapped[list[MusehubBranch]] = relationship(
98 "MusehubBranch", back_populates="repo", cascade="all, delete-orphan",
99 init=False, default_factory=list,
100 )
101 commit_refs: Mapped[list["MusehubCommitRef"]] = relationship(
102 "MusehubCommitRef", back_populates="repo", cascade="all, delete-orphan",
103 init=False, default_factory=list,
104 )
105 issues: Mapped[list["MusehubIssue"]] = relationship(
106 "MusehubIssue", back_populates="repo", cascade="all, delete-orphan",
107 init=False, default_factory=list,
108 )
109 proposals: Mapped[list["MusehubProposal"]] = relationship(
110 "MusehubProposal", back_populates="repo", cascade="all, delete-orphan",
111 init=False, default_factory=list,
112 )
113 releases: Mapped[list["MusehubRelease"]] = relationship(
114 "MusehubRelease", back_populates="repo", cascade="all, delete-orphan",
115 init=False, default_factory=list,
116 )
117 sessions: Mapped[list[MusehubSession]] = relationship(
118 "MusehubSession", back_populates="repo", cascade="all, delete-orphan",
119 init=False, default_factory=list,
120 )
121 webhooks: Mapped[list["MusehubWebhook"]] = relationship(
122 "MusehubWebhook", back_populates="repo", cascade="all, delete-orphan",
123 init=False, default_factory=list,
124 )
125 wire_tags: Mapped[list[MusehubWireTag]] = relationship(
126 "MusehubWireTag", back_populates="repo", cascade="all, delete-orphan",
127 init=False, default_factory=list,
128 )
129
130
131 class MusehubBranch(Base):
132 """A named branch pointer inside a MuseHub repo."""
133
134 __tablename__ = "musehub_branches"
135 __table_args__ = (
136 # Branch name lookup: WHERE repo_id = ? AND name = ? (get_branch_head_commit_id)
137 # Also covers list_branches_with_detail ORDER BY name — no in-memory sort needed.
138 Index("ix_musehub_branches_repo_name", "repo_id", "name"),
139 )
140
141 branch_id: Mapped[str] = mapped_column(String(128), primary_key=True)
142 repo_id: Mapped[str] = mapped_column(
143 String(128),
144 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
145 nullable=False,
146 )
147 name: Mapped[str] = mapped_column(String(255), nullable=False)
148 # Null until the first push sets the head.
149 head_commit_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
150
151 repo: Mapped[MusehubRepo] = relationship("MusehubRepo", back_populates="branches")
152
153
154 class MusehubCommit(MappedAsDataclass, Base):
155 """A content-addressed commit record — globally shared across all repos.
156
157 Commits are immutable objects identified solely by their SHA-256 hash.
158 They do not belong to any specific repo; repo membership is tracked in
159 MusehubCommitRef (the materialized reachability index, mirroring
160 MusehubObjectRef for blobs).
161
162 ``parent_ids`` is a JSON list so merge commits can carry two parents.
163 """
164
165 __tablename__ = "musehub_commits"
166
167 # --- Required fields ---
168 commit_id: Mapped[str] = mapped_column(String(128), primary_key=True)
169 branch: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
170 message: Mapped[str] = mapped_column(Text, nullable=False)
171 author: Mapped[str] = mapped_column(String(255), nullable=False)
172 timestamp: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, index=True)
173
174 # --- Optional fields with Python-side defaults ---
175 # JSON list of parent commit IDs; two entries for merge commits.
176 parent_ids: Mapped[list[str]] = mapped_column(ARRAY(String(128)), nullable=False, default_factory=list)
177 snapshot_id: Mapped[str | None] = mapped_column(String(128), nullable=True, default=None)
178 # Provenance
179 agent_id: Mapped[str] = mapped_column(String(255), nullable=True, default="")
180 model_id: Mapped[str] = mapped_column(String(255), nullable=True, default="")
181 toolchain_id: Mapped[str] = mapped_column(String(255), nullable=True, default="")
182 # Original author branch (WireCommit.branch), distinct from push-target branch
183 commit_branch: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None)
184 # Signing
185 signature: Mapped[str] = mapped_column(Text, nullable=True, default="")
186 signer_public_key: Mapped[str] = mapped_column(Text, nullable=True, default="")
187 signer_key_id: Mapped[str] = mapped_column(String(255), nullable=True, default="")
188 # Semantic versioning
189 sem_ver_bump: Mapped[str] = mapped_column(String(10), nullable=True, default="none")
190 breaking_changes: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=True, default_factory=list)
191 # Review / CI
192 reviewed_by: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=True, default_factory=list)
193 test_runs: Mapped[int] = mapped_column(Integer, nullable=True, default=0)
194 prompt_hash: Mapped[str] = mapped_column(String(255), nullable=True, default="")
195 # Symbol-level delta blob sent by the Muse CLI on push.
196 structured_delta: Mapped[JSONObject | None] = mapped_column(JSONB, nullable=True, default=None)
197 # S3 URI of the canonical `commit <size>\0<json>` object. NULL until Phase 5 backfill.
198 storage_uri: Mapped[str | None] = mapped_column(String(2048), nullable=True, default=None)
199 created_at: Mapped[datetime] = mapped_column(
200 DateTime(timezone=True), nullable=False, default_factory=_utc_now, server_default=sa.func.now()
201 )
202
203
204 class MusehubCommitRef(Base):
205 """Materialized reachability index — which repos reference which commits.
206
207 Every repo that pushes (or pulls via fork) a commit gets one row here.
208 Mirrors MusehubObjectRef exactly: content-addressed objects are global;
209 per-repo membership is tracked separately for GC and listing.
210
211 Write rules:
212 - Push path: upsert (repo_id, commit_id) ON CONFLICT DO NOTHING.
213 - Fork path: copy ref rows from source repo to forked repo.
214 - GC path: delete ref rows for commits no longer reachable from any
215 branch head; then delete musehub_commits rows with no remaining refs.
216
217 The composite PK (repo_id, commit_id) makes all push upserts idempotent.
218 """
219
220 __tablename__ = "musehub_commit_refs"
221 __table_args__ = (
222 # List commits for a repo: WHERE repo_id = X
223 Index("ix_musehub_commit_refs_repo_id", "repo_id"),
224 )
225
226 repo_id: Mapped[str] = mapped_column(
227 String(128),
228 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
229 primary_key=True,
230 )
231 commit_id: Mapped[str] = mapped_column(
232 String(128),
233 ForeignKey("musehub_commits.commit_id", ondelete="CASCADE"),
234 primary_key=True,
235 )
236 created_at: Mapped[datetime] = mapped_column(
237 DateTime(timezone=True), nullable=False, default=_utc_now,
238 server_default=sa.text("now()"),
239 )
240
241 repo: Mapped[MusehubRepo] = relationship("MusehubRepo", back_populates="commit_refs")
242
243
244 class MusehubObject(Base):
245 """A globally content-addressed binary artifact stored in MuseHub.
246
247 Objects are owned by no single repo. ``object_id`` (bare SHA-256 hex) is
248 the primary key; identical bytes pushed by any number of repos share one
249 row. Per-repo membership is tracked in ``MusehubObjectRef`` (the
250 materialized reachability index).
251
252 ``storage_uri`` lifecycle:
253 ``"pending"`` — upload in progress; raw bytes in ``content_cache``.
254 ``"s3://…"`` — bytes live in R2/MinIO; ``content_cache`` is NULL.
255
256 ``content_cache`` is a transient BYTEA column written at push time and
257 cleared to NULL by the background upload task. It lets the fetch path
258 serve bytes immediately without waiting for the blob store.
259 """
260
261 __tablename__ = "musehub_objects"
262
263 # Content-addressed ID — bare SHA-256 hex, e.g. "abc123..."
264 object_id: Mapped[str] = mapped_column(String(128), primary_key=True)
265 # Relative path hint from the first repo to push this object.
266 # Not authoritative — per-repo path context lives in the snapshot manifest.
267 path: Mapped[str] = mapped_column(String(1024), nullable=False)
268 size_bytes: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
269 storage_uri: Mapped[str | None] = mapped_column(String(2048), nullable=True)
270 content_cache: Mapped[bytes | None] = mapped_column(
271 sa.LargeBinary, nullable=True, default=None
272 )
273 created_at: Mapped[datetime] = mapped_column(
274 DateTime(timezone=True), nullable=False, default=_utc_now
275 )
276 deleted_at: Mapped[datetime | None] = mapped_column(
277 DateTime(timezone=True), nullable=True, default=None
278 )
279
280 refs: Mapped[list[MusehubObjectRef]] = relationship(
281 "MusehubObjectRef", back_populates="object", cascade="all, delete-orphan"
282 )
283
284
285 class MusehubObjectRef(Base):
286 """Materialized reachability index — which repos reference which objects.
287
288 Every repo that pushes (or pulls via fork) an object gets one row here.
289 The table is the authoritative answer to "what objects does repo X own?"
290 and drives all per-repo operations:
291
292 - Storage quota: SUM(o.size_bytes) JOIN object_refs WHERE repo_id = X
293 - Targeted repair / decompress: JOIN object_refs WHERE repo_id = X
294 - GC eligibility: DELETE objects WHERE NOT EXISTS (SELECT 1 FROM object_refs)
295
296 Write rules:
297 - Push path: upsert (repo_id, object_id) ON CONFLICT DO NOTHING.
298 - Fork path: copy ref rows from source repo to forked repo.
299 - GC path: delete ref rows for objects no longer reachable from any
300 branch head; then delete musehub_objects rows with no remaining refs.
301
302 The composite PK (repo_id, object_id) makes all push upserts idempotent —
303 re-pushing the same object to the same repo is always O(1) and safe.
304 """
305
306 __tablename__ = "musehub_object_refs"
307
308 repo_id: Mapped[str] = mapped_column(
309 String(128),
310 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
311 primary_key=True,
312 )
313 object_id: Mapped[str] = mapped_column(
314 String(128),
315 ForeignKey("musehub_objects.object_id", ondelete="CASCADE"),
316 primary_key=True,
317 )
318
319 repo: Mapped[MusehubRepo] = relationship("MusehubRepo")
320 object: Mapped[MusehubObject] = relationship("MusehubObject", back_populates="refs")
321
322
323 class MusehubSnapshot(MappedAsDataclass, Base):
324 """Content-addressed file-tree record — globally shared across all repos.
325
326 A snapshot captures the full state of a repo at a point in time.
327 snapshot_id = sha256(manifest_bytes) — the hash IS the identity.
328 Snapshots do not belong to any specific repo; repo membership is tracked
329 in MusehubSnapshotRef (mirrors MusehubObjectRef).
330
331 SOURCE OF TRUTH (issue #63): the canonical object is stored in S3 as
332 ``snapshot <size>\\0<json>`` at the URI in ``storage_uri``. Wire fetch
333 reads from S3 first, falling back to DB only when storage_uri is null
334 (pre-backfill rows).
335
336 ``manifest_blob`` is a msgpack DB cache of the manifest for fast queries
337 (intel providers, GC, governance). It is NOT the source of truth.
338 ``delta_blob`` stores the push-receive delta for efficient reconstruction;
339 it is NOT used in the wire-serve path after Phase 4.
340
341 ``entry_count`` mirrors ``len(manifest)`` at write time for O(1) counts.
342 """
343
344 __tablename__ = "musehub_snapshots"
345
346 # --- Required fields ---
347 snapshot_id: Mapped[str] = mapped_column(String(128), primary_key=True)
348 # DB cache only — msgpack-serialized {path: object_id} manifest for fast reads
349 # (intel providers, GC, governance). S3 object at storage_uri is canonical.
350 # NULL for delta-only push snapshots; use _reconstruct_manifest() for those.
351 manifest_blob: Mapped[bytes | None] = mapped_column(sa.LargeBinary, nullable=True)
352
353 # Delta compression — populated at push time from the mpack wire format.
354 # parent_snapshot_id: the snapshot this delta was applied on top of.
355 # delta_blob: msgpack-serialized {path: object_id} of files added/changed.
356 # When set, wire_fetch_mpack sends the delta instead of the full manifest,
357 # reducing clone wire size from O(commits × files) to O(commits × delta).
358 parent_snapshot_id: Mapped[str | None] = mapped_column(String(128), nullable=True, default=None)
359 delta_blob: Mapped[bytes | None] = mapped_column(sa.LargeBinary, nullable=True, default=None)
360
361 # --- Optional fields with Python-side defaults ---
362 # Sorted list of workspace-relative directory paths tracked at snapshot time.
363 directories: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, default_factory=list)
364 # Number of tracked files in this snapshot. Denormalised from len(manifest).
365 entry_count: Mapped[int] = mapped_column(Integer(), nullable=False, default=0)
366 # S3 URI of the canonical `snapshot <size>\0<json>` object. NULL until Phase 5 backfill.
367 storage_uri: Mapped[str | None] = mapped_column(String(2048), nullable=True, default=None)
368 created_at: Mapped[datetime] = mapped_column(
369 DateTime(timezone=True), nullable=False, default_factory=_utc_now
370 )
371
372
373 class MusehubSnapshotRef(Base):
374 """Materialized reachability index — which repos reference which snapshots.
375
376 Mirrors MusehubObjectRef and MusehubCommitRef exactly. Snapshots are
377 content-addressed and globally shared; per-repo membership is tracked here.
378
379 Write rules:
380 - Push path: upsert (repo_id, snapshot_id) ON CONFLICT DO NOTHING.
381 - GC path: delete snapshots with no remaining refs.
382 """
383
384 __tablename__ = "musehub_snapshot_refs"
385 __table_args__ = (
386 Index("ix_musehub_snapshot_refs_repo_id", "repo_id"),
387 )
388
389 repo_id: Mapped[str] = mapped_column(
390 String(128),
391 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
392 primary_key=True,
393 )
394 snapshot_id: Mapped[str] = mapped_column(
395 String(128),
396 ForeignKey("musehub_snapshots.snapshot_id", ondelete="CASCADE"),
397 primary_key=True,
398 )
399 created_at: Mapped[datetime] = mapped_column(
400 DateTime(timezone=True), nullable=False, default=_utc_now,
401 server_default=sa.text("now()"),
402 )
403
404
405 class MusehubSnapshotEntry(Base):
406 """Normalized per-file row within a snapshot.
407
408 Stores the flattened file tree as individual rows keyed by
409 (snapshot_id, path) rather than as a msgpack blob. Enables
410 efficient diff queries without decoding the full manifest.
411 """
412
413 __tablename__ = "musehub_snapshot_entries"
414
415 snapshot_id: Mapped[str] = mapped_column(
416 String(128),
417 ForeignKey("musehub_snapshots.snapshot_id", ondelete="CASCADE"),
418 primary_key=True,
419 )
420 path: Mapped[str] = mapped_column(String(4096), primary_key=True)
421 object_id: Mapped[str] = mapped_column(String(128), nullable=False)
422 size_bytes: Mapped[int] = mapped_column(Integer(), nullable=False, default=0)
423
424
425 class MusehubSession(MappedAsDataclass, Base):
426 """A recording session record pushed to MuseHub from the CLI.
427
428 Sessions capture the creative context of a recording period: who was
429 present, where they recorded, what they intended to create, which commits
430 were made, and any closing notes. Maps to ``muse session show`` locally.
431
432 ``commits`` is a JSON list of Muse commit IDs associated with the session.
433 ``participants`` is a JSON list of participant name strings.
434 """
435
436 __tablename__ = "musehub_sessions"
437
438 # --- Required fields ---
439 session_id: Mapped[str] = mapped_column(String(128), primary_key=True)
440 repo_id: Mapped[str] = mapped_column(
441 String(128),
442 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
443 nullable=False,
444 index=True,
445 )
446 started_at: Mapped[datetime] = mapped_column(
447 DateTime(timezone=True), nullable=False, index=True
448 )
449
450 # --- Optional fields ---
451 schema_version: Mapped[str] = mapped_column(String(10), nullable=False, default="1", server_default="1")
452 ended_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, default=None)
453 participants: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, default_factory=list)
454 location: Mapped[str] = mapped_column(String(500), nullable=False, default="")
455 intent: Mapped[str] = mapped_column(Text, nullable=False, default="")
456 commits: Mapped[list[str]] = mapped_column(ARRAY(String(128)), nullable=False, default_factory=list)
457 notes: Mapped[str] = mapped_column(Text, nullable=False, default="")
458 is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, index=True, server_default=sa.false())
459 created_at: Mapped[datetime] = mapped_column(
460 DateTime(timezone=True), nullable=False, default_factory=_utc_now
461 )
462
463 repo: Mapped[MusehubRepo] = relationship("MusehubRepo", back_populates="sessions", init=False)
464
465
466 class MusehubWireTag(Base):
467 """A lightweight tag pushed from a Muse CLI client via the wire protocol.
468
469 Wire tags are distinct from version releases: they carry semantic labels
470 such as ``emotion:joyful`` or ``section:verse`` that annotate commits
471 without implying a versioned release. The ``tag`` field is the raw
472 string pushed by the client (e.g. ``emotion:joyful``).
473
474 The ``(repo_id, tag)`` pair is unique — a second push of the same tag
475 for the same commit is a no-op (upsert at the service layer).
476 """
477
478 __tablename__ = "musehub_wire_tags"
479 __table_args__ = (UniqueConstraint("repo_id", "tag", name="uq_musehub_wire_tags_repo_tag"),)
480
481 tag_id: Mapped[str] = mapped_column(String(128), primary_key=True)
482 repo_id: Mapped[str] = mapped_column(
483 String(128),
484 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
485 nullable=False,
486 index=True,
487 )
488 # The commit this tag points to.
489 commit_id: Mapped[str] = mapped_column(String(128), nullable=False)
490 # Raw tag label, e.g. "emotion:joyful", "section:verse", "v1.0-wip".
491 tag: Mapped[str] = mapped_column(String(500), nullable=False, index=True)
492 created_at: Mapped[datetime] = mapped_column(
493 DateTime(timezone=True), nullable=False, default=_utc_now
494 )
495
496 repo: Mapped[MusehubRepo] = relationship("MusehubRepo", back_populates="wire_tags")
497
498
499 class MusehubBridgeMirror(Base):
500 """A Git mirror registration for a MuseHub repo.
501
502 Each row links a MuseHub repo to a remote Git repository URL and records
503 the state of the last export (Muse→Git) and import (Git→Muse) operations.
504
505 ``direction`` controls which sync directions are active:
506 ``"export"`` — Muse commits are pushed to Git only
507 ``"import"`` — Git commits are pulled into Muse only
508 ``"bidirectional"`` — both directions are synced
509
510 The unique constraint on ``(repo_id, git_remote_url)`` means a given Git
511 URL can only be registered once per Muse repo.
512 """
513
514 __tablename__ = "musehub_bridge_mirrors"
515 __table_args__ = (
516 UniqueConstraint("repo_id", "git_remote_url", name="uq_bridge_mirror_repo_url"),
517 )
518
519 id: Mapped[str] = mapped_column(String(128), primary_key=True)
520 repo_id: Mapped[str] = mapped_column(
521 String(128),
522 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
523 nullable=False,
524 index=True,
525 )
526 git_remote_url: Mapped[str] = mapped_column(String(2048), nullable=False)
527 git_branch: Mapped[str] = mapped_column(String(255), nullable=False, default="muse-mirror", server_default="muse-mirror")
528 # "export" | "import" | "bidirectional"
529 direction: Mapped[str] = mapped_column(String(20), nullable=False)
530 last_export_muse_commit_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
531 last_export_git_sha: Mapped[str | None] = mapped_column(String(128), nullable=True)
532 last_export_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
533 last_import_git_sha: Mapped[str | None] = mapped_column(String(128), nullable=True)
534 last_import_muse_commit_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
535 last_import_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
536 auto_export: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=sa.false())
537 created_by: Mapped[str] = mapped_column(String(255), nullable=False)
538 created_at: Mapped[datetime] = mapped_column(
539 DateTime(timezone=True), nullable=False, default=_utc_now
540 )
541
542 repo: Mapped[MusehubRepo] = relationship("MusehubRepo")
543
544
545 class MusehubMist(MappedAsDataclass, Base):
546 """A Muse Mist — content-addressed, signed, forkable single-artifact share.
547
548 A Mist is the Muse answer to GitHub gists: a single artifact (code, MIDI,
549 prose, JSON Schema, ABI, or any binary blob) stored in the Muse object
550 store, identified by a content-derived 12-character base-58 mist_id, signed
551 with the author's Ed25519 key, and version-controlled via a Muse repo with
552 ``domain="mist"``.
553
554 Unlike a gist, a Mist:
555 - Has a Muse repo behind it (full VCS: branches, commits, proposals).
556 - Is domain-typed via ``artifact_type`` — the hub knows whether the
557 content is code, MIDI, prose, a JSON Schema, or a Solidity ABI.
558 - Carries agent provenance (``agent_id``, ``model_id``) for AI-authored mists.
559 - Has semantic intelligence for code mists (symbol anchors cached in
560 ``symbol_anchors`` JSON column).
561 - Is forkable via ``fork_parent_id`` self-referential FK.
562 - Is embeddable via ``/{owner}/mists/{mist_id}/embed``.
563 - Is MCP-accessible as ``muse:///{owner}/mists/{mist_id}``.
564
565 The ``mist_id`` is the first 12 characters of the base-58 encoding of the
566 SHA-256 digest of the initial artifact bytes — globally unique, stable
567 across renames, and collision-resistant (~70 bits of entropy).
568
569 Fork depth
570 ----------
571 ``fork_depth`` is 0 for originals and increments by 1 per fork tier.
572 The API enforces a hard limit of 5 fork tiers to prevent fork chains
573 from growing unboundedly.
574
575 Visibility
576 ----------
577 ``"public"`` mists appear in the explore feed and handle list.
578 ``"secret"`` mists are accessible only via direct URL — they are not
579 indexed and do not appear in any listing endpoint.
580
581 Counters
582 --------
583 ``view_count``, ``fork_count``, and ``embed_count`` are denormalized and
584 updated atomically (``UPDATE ... SET col = col + 1``) to avoid SELECT +
585 UPDATE races.
586 """
587
588 __tablename__ = "musehub_mists"
589 __table_args__ = (
590 # List page: owner's public mists, newest first
591 Index("ix_musehub_mists_owner_visibility", "owner", "visibility"),
592 # Pagination: owner's mists sorted by created_at
593 Index("ix_musehub_mists_owner_created_at", "owner", "created_at"),
594 # Fork graph: find all forks of a given mist
595 Index("ix_musehub_mists_fork_parent_id", "fork_parent_id"),
596 # Explore page: public mists by type, newest first
597 Index("ix_musehub_mists_artifact_type_created_at", "artifact_type", "created_at"),
598 )
599
600 # --- Required fields ---
601 # Primary key — content-addressed 12-character base-58 string (globally unique)
602 mist_id: Mapped[str] = mapped_column(String(128), primary_key=True)
603 repo_id: Mapped[str] = mapped_column(
604 String(128),
605 ForeignKey("musehub_repos.repo_id", ondelete="CASCADE"),
606 nullable=False,
607 index=True,
608 )
609 owner: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
610 filename: Mapped[str] = mapped_column(String(255), nullable=False)
611 content: Mapped[str] = mapped_column(Text, nullable=False)
612
613 # --- Optional fields ---
614 artifact_type: Mapped[str] = mapped_column(
615 String(20), nullable=False, default="unknown", index=True
616 )
617 language: Mapped[str] = mapped_column(String(64), nullable=False, default="")
618 title: Mapped[str] = mapped_column(String(500), nullable=False, default="")
619 description: Mapped[str] = mapped_column(Text, nullable=False, default="")
620 size_bytes: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
621 commit_id: Mapped[str | None] = mapped_column(String(128), nullable=True, default=None)
622 snapshot_id: Mapped[str | None] = mapped_column(String(128), nullable=True, default=None)
623 version: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
624 agent_id: Mapped[str] = mapped_column(String(255), nullable=False, default="")
625 model_id: Mapped[str] = mapped_column(String(255), nullable=False, default="")
626 gpg_signature: Mapped[str | None] = mapped_column(Text, nullable=True, default=None)
627 fork_parent_id: Mapped[str | None] = mapped_column(
628 String(128),
629 ForeignKey("musehub_mists.mist_id", ondelete="SET NULL"),
630 nullable=True,
631 default=None,
632 )
633 fork_depth: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
634 fork_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
635 view_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
636 embed_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
637 visibility: Mapped[str] = mapped_column(String(10), nullable=False, default="public", server_default="public")
638 tags: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, default_factory=list)
639 symbol_anchors: Mapped[list[str]] = mapped_column(ARRAY(Text), nullable=False, default_factory=list)
640 created_at: Mapped[datetime] = mapped_column(
641 DateTime(timezone=True), nullable=False, default_factory=_utc_now
642 )
643 updated_at: Mapped[datetime] = mapped_column(
644 DateTime(timezone=True), nullable=False, default_factory=_utc_now, onupdate=_utc_now
645 )
646
647 fork_parent: Mapped["MusehubMist | None"] = relationship(
648 "MusehubMist",
649 remote_side="MusehubMist.mist_id",
650 foreign_keys=[fork_parent_id],
651 back_populates="forks",
652 init=False,
653 default=None,
654 )
655 forks: Mapped[list["MusehubMist"]] = relationship(
656 "MusehubMist",
657 foreign_keys=[fork_parent_id],
658 back_populates="fork_parent",
659 cascade="save-update, merge",
660 init=False,
661 default_factory=list,
662 )
663 repo: Mapped[MusehubRepo] = relationship("MusehubRepo", init=False)
664
665
666 class MusehubMPackIndex(Base):
667 """MPack index — maps every commit, snapshot, and object to the mpack containing it.
668
669 Written by ``process_mpack_index_job`` after every push. Enables the
670 fetch path to locate covering mpacks for any entity_id without O(N)
671 individual GET calls.
672
673 Entries are content-addressed globally — no repo_id.
674 Primary key is (entity_id, mpack_id). ``on_conflict_do_nothing`` is idempotent.
675 entity_type is one of: "object", "commit", "snapshot".
676 """
677
678 __tablename__ = "musehub_mpack_index"
679 __table_args__ = (
680 Index("ix_musehub_mpack_index_entity_id", "entity_id"),
681 )
682
683 entity_id: Mapped[str] = mapped_column(String(128), primary_key=True)
684 mpack_id: Mapped[str] = mapped_column(String(128), primary_key=True)
685 entity_type: Mapped[str] = mapped_column(
686 String(16), nullable=False, server_default="object"
687 )
688 created_at: Mapped[datetime] = mapped_column(
689 DateTime(timezone=True), nullable=False, default=_utc_now,
690 server_default=sa.text("now()"),
691 )
692 byte_offset: Mapped[int | None] = mapped_column(sa.BigInteger(), nullable=True)
693 byte_length: Mapped[int | None] = mapped_column(sa.Integer(), nullable=True)
694
695
696 # Backwards-compatible alias — remove once all call sites are updated.
697 MusehubPackIndex = MusehubMPackIndex
698
699
700 class MusehubCommitGraph(Base):
701 """Precomputed commit graph — enables O(frontier) BFS instead of O(N commits).
702
703 Commits are content-addressed (like snapshots) so this table is global —
704 no repo_id. ``_walk_commit_delta`` queries in bulk (one query per BFS
705 frontier) instead of one ``session.get(MusehubCommit, cid)`` per commit.
706
707 ``generation`` is the topological depth from the root: root=0, each commit
708 is max(parent generations) + 1. The index on generation lets GC and stats
709 queries run forward scans instead of full-table scans.
710 """
711
712 __tablename__ = "musehub_commit_graph"
713 __table_args__ = (
714 Index("ix_musehub_commit_graph_generation", "generation"),
715 )
716
717 commit_id: Mapped[str] = mapped_column(String(128), primary_key=True)
718 parent_ids: Mapped[list[str]] = mapped_column(
719 ARRAY(Text), nullable=False, default=list, server_default="{}"
720 )
721 generation: Mapped[int] = mapped_column(
722 sa.BigInteger, nullable=False, default=0, server_default="0"
723 )
724 snapshot_id: Mapped[str | None] = mapped_column(String(128), nullable=True, default=None)
725 created_at: Mapped[datetime] = mapped_column(
726 DateTime(timezone=True), nullable=False, default=_utc_now,
727 server_default=sa.text("now()"),
728 )
File History 5 commits
sha256:5dfc96524e3921eb9acb8372241b6bec70b5f3e6598f79099a0ead16ff7cbb75 feat(phase1): add musehub_fetch_mpack_cache table (issue #9… Sonnet 4.6 patch 8 days ago
sha256:400438cf8bc700a611f1ba798aa9def68290f487dc19f7dbf317985ad17050c9 chore: delete muse/prose domain — hallucinated, never existed Sonnet 4.6 minor 8 days ago
sha256:e35be48854f182f7bf02dc6cc0f58d22b3de3a544b570c0e2bc53f9e75a3607d feat(phase6): remove delta_blob path, dead imports, add fal… Sonnet 4.6 minor 22 days ago
sha256:e3296c39859814a0ae8b688be26ec1a24b3895fec467d9bdaefb7431e5ae3a93 test(phase1): failing tests + DB schema for object store in… Sonnet 4.6 minor 22 days ago
sha256:302574ddba13c9a20694c0fb051176eef4896f943b63bc458df886633b1bfcd6 feat: mpack byte-range index — store byte_offset/byte_lengt… Sonnet 4.6 minor 22 days ago