wire.py
python
sha256:5601f81903b6c70ddd11bd88a5a257ee6dfd38aa3b85b19746c100c030657f1e
chore: update smoke_muse.sh comment to reference rc9
Sonnet 4.6
minor
⚠ breaking
21 days ago
| 1 | """Wire protocol Pydantic models — Muse CLI native format (msgpack). |
| 2 | |
| 3 | These models match the Muse CLI ``HttpTransport`` wire format exactly. |
| 4 | All fields are snake_case to match Muse's internal CommitDict/SnapshotDict/ |
| 5 | BlobPayload TypedDicts. |
| 6 | |
| 7 | The wire protocol is intentionally separate from the REST API's CamelModel: |
| 8 | Wire protocol /{owner}/{slug}/push|fetch|refs ← Muse CLI speaks here (msgpack) |
| 9 | REST API /api/repos/{id}/ ← agents and integrations speak here |
| 10 | MCP /mcp ← agents speak here too |
| 11 | |
| 12 | Encoding |
| 13 | -------- |
| 14 | All wire endpoints accept and return ``application/x-msgpack`` binary. |
| 15 | Objects are transported as raw ``bytes`` under the ``content`` key — no |
| 16 | base64 encoding overhead. |
| 17 | |
| 18 | Denial-of-Service limits |
| 19 | ------------------------ |
| 20 | All list fields that arrive over the network are capped so a single large |
| 21 | request cannot exhaust memory or DB connections: |
| 22 | |
| 23 | MAX_COMMITS_PER_PUSH = 10 000 — one push should carry at most 10k commits |
| 24 | MAX_OBJECTS_PER_PUSH = 1 000 — ditto for binary blobs per chunk |
| 25 | MAX_SNAPSHOTS_PER_PUSH = 10 000 — ditto for snapshot manifests |
| 26 | MAX_WANT_PER_FETCH = 1 000 — fetch want/have lists |
| 27 | MAX_OBJECT_BYTES = 38_000_000 — ~38 MB raw; objects above this limit are rejected |
| 28 | """ |
| 29 | |
| 30 | import re |
| 31 | |
| 32 | from pydantic import BaseModel, Field, field_validator, model_validator |
| 33 | |
| 34 | from musehub.types.json_types import JSONObject, StrDict |
| 35 | from musehub.types.pydantic_types import PydanticJson |
| 36 | |
| 37 | type _SizeMap = dict[str, int] |
| 38 | |
| 39 | # ── Per-request DoS limits ──────────────────────────────────────────────────── |
| 40 | MAX_COMMITS_PER_PUSH: int = 10_000 |
| 41 | MAX_OBJECTS_PER_PUSH: int = 1_000 |
| 42 | |
| 43 | # ── Object ID validation ────────────────────────────────────────────────────── |
| 44 | # object_id values arrive from untrusted clients and are used to construct |
| 45 | # storage keys (S3/R2 object paths). A malicious value containing '/' or '..' |
| 46 | # could escape the objects/ key namespace and overwrite arbitrary R2 keys. |
| 47 | # |
| 48 | # Valid format: <algo>:<lowercase hex digest> |
| 49 | # - algo : lower-case alphanumeric only (e.g. "sha256", "blake3") — no slashes |
| 50 | # - digest: lowercase hex, at least 32 chars (128-bit minimum) |
| 51 | # Raw hex (no prefix) is rejected — the algo: prefix is mandatory everywhere. |
| 52 | # The pattern is intentionally algo-agnostic so future hash upgrades (blake3, |
| 53 | # sha3-256, …) require no validator change. The hex-only digest ensures no |
| 54 | # path-traversal characters ('.', '/') can appear in the storage key. |
| 55 | _OBJECT_ID_RE: re.Pattern[str] = re.compile(r"^[a-z][a-z0-9]*:[0-9a-f]{32,}$") |
| 56 | _OBJECT_ID_MAX_LEN: int = 200 # generous cap; algo(<=16) + ":" + digest(<=128) = 145 |
| 57 | |
| 58 | def _validate_object_ids(ids: list[str]) -> list[str]: |
| 59 | """Raise ValueError for any object_id that contains unsafe characters.""" |
| 60 | for oid in ids: |
| 61 | if not _OBJECT_ID_RE.match(oid): |
| 62 | raise ValueError( |
| 63 | f"invalid object_id {oid!r}: only [a-zA-Z0-9:_-] characters are allowed" |
| 64 | ) |
| 65 | if len(oid) > _OBJECT_ID_MAX_LEN: |
| 66 | raise ValueError( |
| 67 | f"object_id exceeds maximum length ({_OBJECT_ID_MAX_LEN}): {oid[:40]!r}…" |
| 68 | ) |
| 69 | return ids |
| 70 | MAX_SNAPSHOTS_PER_PUSH: int = 10_000 |
| 71 | MAX_WANT_PER_FETCH: int = 1_000 |
| 72 | # Raw bytes limit per object — objects above this are rejected at the wire layer. |
| 73 | MAX_OBJECT_BYTES: int = 38_000_000 |
| 74 | |
| 75 | class WireCommit(BaseModel): |
| 76 | """Muse native commit record — mirrors CommitDict from muse.core.store. |
| 77 | |
| 78 | Field names match CommitDict exactly so both sides of the wire use the |
| 79 | same vocabulary. ``branch`` is the branch where the author made the |
| 80 | commit; it is distinct from the push-target branch in the push request body. |
| 81 | """ |
| 82 | |
| 83 | commit_id: str |
| 84 | repo_id: str = "" |
| 85 | branch: str = "" # author's branch (CommitDict.branch) |
| 86 | snapshot_id: str | None = None |
| 87 | message: str = "" |
| 88 | committed_at: str = "" # ISO-8601 UTC string |
| 89 | parent_commit_id: str | None = None # first parent (linear history) |
| 90 | parent2_commit_id: str | None = None # second parent (merge commits) |
| 91 | author: str = "" |
| 92 | metadata: StrDict = Field(default_factory=dict) |
| 93 | structured_delta: PydanticJson | None = None # domain-specific delta blob |
| 94 | sem_ver_bump: str = "none" # "none" | "patch" | "minor" | "major" |
| 95 | breaking_changes: list[str] = Field(default_factory=list) |
| 96 | agent_id: str = "" |
| 97 | model_id: str = "" |
| 98 | toolchain_id: str = "" |
| 99 | prompt_hash: str = "" |
| 100 | signature: str = "" |
| 101 | signer_public_key: str = "" |
| 102 | signer_key_id: str = "" |
| 103 | format_version: int = 7 |
| 104 | reviewed_by: list[str] = Field(default_factory=list) |
| 105 | test_runs: int = 0 |
| 106 | |
| 107 | model_config = {"extra": "ignore"} # tolerate future Muse fields gracefully |
| 108 | |
| 109 | @field_validator("commit_id") |
| 110 | @classmethod |
| 111 | def _check_commit_id(cls, v: str) -> str: |
| 112 | if not _OBJECT_ID_RE.match(v): |
| 113 | raise ValueError( |
| 114 | f"invalid commit_id {v!r}: must be 'sha256:<64 lowercase hex chars>'" |
| 115 | ) |
| 116 | return v |
| 117 | |
| 118 | @field_validator("snapshot_id") |
| 119 | @classmethod |
| 120 | def _check_snapshot_id(cls, v: str | None) -> str | None: |
| 121 | if v is not None and not _OBJECT_ID_RE.match(v): |
| 122 | raise ValueError( |
| 123 | f"invalid snapshot_id {v!r}: must be 'sha256:<64 lowercase hex chars>'" |
| 124 | ) |
| 125 | return v |
| 126 | |
| 127 | @field_validator("parent_commit_id") |
| 128 | @classmethod |
| 129 | def _check_parent_commit_id(cls, v: str | None) -> str | None: |
| 130 | if v is not None and not _OBJECT_ID_RE.match(v): |
| 131 | raise ValueError( |
| 132 | f"invalid parent_commit_id {v!r}: must be 'sha256:<64 lowercase hex chars>'" |
| 133 | ) |
| 134 | return v |
| 135 | |
| 136 | @field_validator("prompt_hash") |
| 137 | @classmethod |
| 138 | def _check_prompt_hash(cls, v: str) -> str: |
| 139 | if v and not _OBJECT_ID_RE.match(v): |
| 140 | raise ValueError( |
| 141 | f"invalid prompt_hash {v!r}: must be empty or 'sha256:<64 lowercase hex chars>'" |
| 142 | ) |
| 143 | return v |
| 144 | |
| 145 | class WireSnapshot(BaseModel): |
| 146 | """Unified snapshot wire format — same shape in both push and fetch directions. |
| 147 | |
| 148 | Both the client (push) and server (fetch) use delta encoding: |
| 149 | - ``delta_upsert`` — files added or changed relative to parent ({path: oid}) |
| 150 | - ``delta_remove`` — paths removed relative to parent |
| 151 | - ``parent_snapshot_id`` — None for the root snapshot of a push chain |
| 152 | |
| 153 | The root snapshot of a new repo has no parent; its ``delta_upsert`` equals the |
| 154 | full manifest. All other snapshots carry only the diff. |
| 155 | |
| 156 | ``directories`` is the sorted list of workspace-relative directory paths |
| 157 | tracked at snapshot time. It is included in the snapshot_id hash. |
| 158 | |
| 159 | The client's ``apply_mpack`` already handles this format. ``manifest`` is |
| 160 | accepted for backward compatibility but never produced by the server. |
| 161 | """ |
| 162 | |
| 163 | snapshot_id: str |
| 164 | parent_snapshot_id: str | None = None |
| 165 | delta_upsert: StrDict = Field(default_factory=dict, max_length=10_000) |
| 166 | delta_remove: list[str] = Field(default_factory=list, max_length=10_000) |
| 167 | directories: list[str] = Field(default_factory=list, max_length=10_000) |
| 168 | created_at: str = "" |
| 169 | |
| 170 | model_config = {"extra": "ignore"} |
| 171 | |
| 172 | @field_validator("snapshot_id") |
| 173 | @classmethod |
| 174 | def _check_snapshot_id(cls, v: str) -> str: |
| 175 | if not _OBJECT_ID_RE.match(v): |
| 176 | raise ValueError( |
| 177 | f"invalid snapshot_id {v!r}: must be 'sha256:<64 lowercase hex chars>'" |
| 178 | ) |
| 179 | return v |
| 180 | |
| 181 | @field_validator("delta_upsert") |
| 182 | @classmethod |
| 183 | def _check_delta_upsert_values(cls, v: StrDict) -> StrDict: |
| 184 | for path, oid in v.items(): |
| 185 | if not _OBJECT_ID_RE.match(oid): |
| 186 | raise ValueError( |
| 187 | f"delta_upsert entry {path!r} has invalid object_id {oid!r}: " |
| 188 | "must be 'sha256:<64 lowercase hex chars>'" |
| 189 | ) |
| 190 | if len(oid) > _OBJECT_ID_MAX_LEN: |
| 191 | raise ValueError( |
| 192 | f"delta_upsert entry {path!r} object_id exceeds maximum length: {oid[:40]!r}…" |
| 193 | ) |
| 194 | return v |
| 195 | |
| 196 | class WireObject(BaseModel): |
| 197 | """Content-addressed blob payload — mirrors BlobPayload from muse.core.mpack. |
| 198 | |
| 199 | ``content`` is raw bytes (msgpack bin type) — no base64 overhead. |
| 200 | |
| 201 | Encoding field controls how the server interprets ``content``: |
| 202 | ``"raw"`` — plain bytes; store as-is after hash verification. |
| 203 | ``"zlib"`` — zlib-compressed; decompress then verify hash. |
| 204 | ``"delta+zlib"`` — delta-encoded relative to ``base_id``, then zlib-compressed; |
| 205 | fetch base, apply delta, then verify hash. |
| 206 | """ |
| 207 | |
| 208 | object_id: str |
| 209 | content: bytes = Field(max_length=MAX_OBJECT_BYTES) |
| 210 | path: str = Field(default="", max_length=4096) |
| 211 | encoding: str = Field(default="raw") |
| 212 | base_id: str | None = Field(default=None) |
| 213 | |
| 214 | model_config = {"extra": "ignore"} |
| 215 | |
| 216 | @field_validator("object_id") |
| 217 | @classmethod |
| 218 | def _check_object_id(cls, v: str) -> str: |
| 219 | if not _OBJECT_ID_RE.match(v): |
| 220 | raise ValueError( |
| 221 | f"invalid object_id {v!r}: must be 'sha256:<64 lowercase hex chars>'" |
| 222 | ) |
| 223 | return v |
| 224 | |
| 225 | @field_validator("content") |
| 226 | @classmethod |
| 227 | def _check_content_size(cls, v: bytes) -> bytes: |
| 228 | if len(v) > MAX_OBJECT_BYTES: |
| 229 | raise ValueError( |
| 230 | f"content exceeds maximum size ({MAX_OBJECT_BYTES} bytes)." |
| 231 | ) |
| 232 | return v |
| 233 | |
| 234 | class WireMPack(BaseModel): |
| 235 | """An mpack sent in a push request. |
| 236 | |
| 237 | Mirrors MPack from muse.core.mpack. All fields are optional because |
| 238 | a minimal push may only contain commits (no new objects). |
| 239 | |
| 240 | List lengths are capped to prevent DoS via an oversized single request. |
| 241 | See the module-level ``MAX_*`` constants for the exact limits. |
| 242 | """ |
| 243 | |
| 244 | commits: list[WireCommit] = Field(default_factory=list, max_length=MAX_COMMITS_PER_PUSH) |
| 245 | snapshots: list[WireSnapshot] = Field(default_factory=list, max_length=MAX_SNAPSHOTS_PER_PUSH) |
| 246 | objects: list[WireObject] = Field(default_factory=list, max_length=MAX_OBJECTS_PER_PUSH) |
| 247 | branch_heads: StrDict = Field(default_factory=dict) |
| 248 | |
| 249 | class WireFetchRequest(BaseModel): |
| 250 | """Body for ``POST /wire/repos/{repo_id}/fetch``. |
| 251 | |
| 252 | Matches HttpTransport.fetch_mpack() payload: |
| 253 | ``{"want": [...sha...], "have": [...sha...]}`` |
| 254 | |
| 255 | ``want`` — commit SHAs the client wants. |
| 256 | ``have`` — commit SHAs the client already has (exclusion list). |
| 257 | """ |
| 258 | |
| 259 | want: list[str] = Field(default_factory=list, max_length=MAX_WANT_PER_FETCH) |
| 260 | have: list[str] = Field(default_factory=list, max_length=MAX_WANT_PER_FETCH) |
| 261 | depth: int | None = Field(default=None, ge=1) |
| 262 | |
| 263 | @field_validator("want", "have") |
| 264 | @classmethod |
| 265 | def _check_commit_ids(cls, v: list[str]) -> list[str]: |
| 266 | return _validate_object_ids(v) |
| 267 | |
| 268 | class WireRefsResponse(BaseModel): |
| 269 | """Response for ``GET /wire/repos/{repo_id}/refs``. |
| 270 | |
| 271 | Parsed by HttpTransport._parse_remote_info() into RemoteInfo. |
| 272 | """ |
| 273 | |
| 274 | repo_id: str |
| 275 | domain: str |
| 276 | default_branch: str |
| 277 | branch_heads: StrDict |
| 278 |
File History
2 commits
sha256:5601f81903b6c70ddd11bd88a5a257ee6dfd38aa3b85b19746c100c030657f1e
chore: update smoke_muse.sh comment to reference rc9
Sonnet 4.6
minor
⚠
21 days ago
sha256:39e9c4e6f2134da0732e6983268a218178973936f8d7ca03c91f2b5ad42133c8
fix: use read_object_bytes in blob viewer; add zstd magic d…
Sonnet 4.6
patch
21 days ago