gabriel / musehub public
wire.py python
278 lines 11.2 KB
Raw
sha256:5601f81903b6c70ddd11bd88a5a257ee6dfd38aa3b85b19746c100c030657f1e chore: update smoke_muse.sh comment to reference rc9 Sonnet 4.6 minor ⚠ breaking 21 days ago
1 """Wire protocol Pydantic models — Muse CLI native format (msgpack).
2
3 These models match the Muse CLI ``HttpTransport`` wire format exactly.
4 All fields are snake_case to match Muse's internal CommitDict/SnapshotDict/
5 BlobPayload TypedDicts.
6
7 The wire protocol is intentionally separate from the REST API's CamelModel:
8 Wire protocol /{owner}/{slug}/push|fetch|refs ← Muse CLI speaks here (msgpack)
9 REST API /api/repos/{id}/ ← agents and integrations speak here
10 MCP /mcp ← agents speak here too
11
12 Encoding
13 --------
14 All wire endpoints accept and return ``application/x-msgpack`` binary.
15 Objects are transported as raw ``bytes`` under the ``content`` key — no
16 base64 encoding overhead.
17
18 Denial-of-Service limits
19 ------------------------
20 All list fields that arrive over the network are capped so a single large
21 request cannot exhaust memory or DB connections:
22
23 MAX_COMMITS_PER_PUSH = 10 000 — one push should carry at most 10k commits
24 MAX_OBJECTS_PER_PUSH = 1 000 — ditto for binary blobs per chunk
25 MAX_SNAPSHOTS_PER_PUSH = 10 000 — ditto for snapshot manifests
26 MAX_WANT_PER_FETCH = 1 000 — fetch want/have lists
27 MAX_OBJECT_BYTES = 38_000_000 — ~38 MB raw; objects above this limit are rejected
28 """
29
30 import re
31
32 from pydantic import BaseModel, Field, field_validator, model_validator
33
34 from musehub.types.json_types import JSONObject, StrDict
35 from musehub.types.pydantic_types import PydanticJson
36
37 type _SizeMap = dict[str, int]
38
39 # ── Per-request DoS limits ────────────────────────────────────────────────────
40 MAX_COMMITS_PER_PUSH: int = 10_000
41 MAX_OBJECTS_PER_PUSH: int = 1_000
42
43 # ── Object ID validation ──────────────────────────────────────────────────────
44 # object_id values arrive from untrusted clients and are used to construct
45 # storage keys (S3/R2 object paths). A malicious value containing '/' or '..'
46 # could escape the objects/ key namespace and overwrite arbitrary R2 keys.
47 #
48 # Valid format: <algo>:<lowercase hex digest>
49 # - algo : lower-case alphanumeric only (e.g. "sha256", "blake3") — no slashes
50 # - digest: lowercase hex, at least 32 chars (128-bit minimum)
51 # Raw hex (no prefix) is rejected — the algo: prefix is mandatory everywhere.
52 # The pattern is intentionally algo-agnostic so future hash upgrades (blake3,
53 # sha3-256, …) require no validator change. The hex-only digest ensures no
54 # path-traversal characters ('.', '/') can appear in the storage key.
55 _OBJECT_ID_RE: re.Pattern[str] = re.compile(r"^[a-z][a-z0-9]*:[0-9a-f]{32,}$")
56 _OBJECT_ID_MAX_LEN: int = 200 # generous cap; algo(<=16) + ":" + digest(<=128) = 145
57
58 def _validate_object_ids(ids: list[str]) -> list[str]:
59 """Raise ValueError for any object_id that contains unsafe characters."""
60 for oid in ids:
61 if not _OBJECT_ID_RE.match(oid):
62 raise ValueError(
63 f"invalid object_id {oid!r}: only [a-zA-Z0-9:_-] characters are allowed"
64 )
65 if len(oid) > _OBJECT_ID_MAX_LEN:
66 raise ValueError(
67 f"object_id exceeds maximum length ({_OBJECT_ID_MAX_LEN}): {oid[:40]!r}…"
68 )
69 return ids
70 MAX_SNAPSHOTS_PER_PUSH: int = 10_000
71 MAX_WANT_PER_FETCH: int = 1_000
72 # Raw bytes limit per object — objects above this are rejected at the wire layer.
73 MAX_OBJECT_BYTES: int = 38_000_000
74
75 class WireCommit(BaseModel):
76 """Muse native commit record — mirrors CommitDict from muse.core.store.
77
78 Field names match CommitDict exactly so both sides of the wire use the
79 same vocabulary. ``branch`` is the branch where the author made the
80 commit; it is distinct from the push-target branch in the push request body.
81 """
82
83 commit_id: str
84 repo_id: str = ""
85 branch: str = "" # author's branch (CommitDict.branch)
86 snapshot_id: str | None = None
87 message: str = ""
88 committed_at: str = "" # ISO-8601 UTC string
89 parent_commit_id: str | None = None # first parent (linear history)
90 parent2_commit_id: str | None = None # second parent (merge commits)
91 author: str = ""
92 metadata: StrDict = Field(default_factory=dict)
93 structured_delta: PydanticJson | None = None # domain-specific delta blob
94 sem_ver_bump: str = "none" # "none" | "patch" | "minor" | "major"
95 breaking_changes: list[str] = Field(default_factory=list)
96 agent_id: str = ""
97 model_id: str = ""
98 toolchain_id: str = ""
99 prompt_hash: str = ""
100 signature: str = ""
101 signer_public_key: str = ""
102 signer_key_id: str = ""
103 format_version: int = 7
104 reviewed_by: list[str] = Field(default_factory=list)
105 test_runs: int = 0
106
107 model_config = {"extra": "ignore"} # tolerate future Muse fields gracefully
108
109 @field_validator("commit_id")
110 @classmethod
111 def _check_commit_id(cls, v: str) -> str:
112 if not _OBJECT_ID_RE.match(v):
113 raise ValueError(
114 f"invalid commit_id {v!r}: must be 'sha256:<64 lowercase hex chars>'"
115 )
116 return v
117
118 @field_validator("snapshot_id")
119 @classmethod
120 def _check_snapshot_id(cls, v: str | None) -> str | None:
121 if v is not None and not _OBJECT_ID_RE.match(v):
122 raise ValueError(
123 f"invalid snapshot_id {v!r}: must be 'sha256:<64 lowercase hex chars>'"
124 )
125 return v
126
127 @field_validator("parent_commit_id")
128 @classmethod
129 def _check_parent_commit_id(cls, v: str | None) -> str | None:
130 if v is not None and not _OBJECT_ID_RE.match(v):
131 raise ValueError(
132 f"invalid parent_commit_id {v!r}: must be 'sha256:<64 lowercase hex chars>'"
133 )
134 return v
135
136 @field_validator("prompt_hash")
137 @classmethod
138 def _check_prompt_hash(cls, v: str) -> str:
139 if v and not _OBJECT_ID_RE.match(v):
140 raise ValueError(
141 f"invalid prompt_hash {v!r}: must be empty or 'sha256:<64 lowercase hex chars>'"
142 )
143 return v
144
145 class WireSnapshot(BaseModel):
146 """Unified snapshot wire format — same shape in both push and fetch directions.
147
148 Both the client (push) and server (fetch) use delta encoding:
149 - ``delta_upsert`` — files added or changed relative to parent ({path: oid})
150 - ``delta_remove`` — paths removed relative to parent
151 - ``parent_snapshot_id`` — None for the root snapshot of a push chain
152
153 The root snapshot of a new repo has no parent; its ``delta_upsert`` equals the
154 full manifest. All other snapshots carry only the diff.
155
156 ``directories`` is the sorted list of workspace-relative directory paths
157 tracked at snapshot time. It is included in the snapshot_id hash.
158
159 The client's ``apply_mpack`` already handles this format. ``manifest`` is
160 accepted for backward compatibility but never produced by the server.
161 """
162
163 snapshot_id: str
164 parent_snapshot_id: str | None = None
165 delta_upsert: StrDict = Field(default_factory=dict, max_length=10_000)
166 delta_remove: list[str] = Field(default_factory=list, max_length=10_000)
167 directories: list[str] = Field(default_factory=list, max_length=10_000)
168 created_at: str = ""
169
170 model_config = {"extra": "ignore"}
171
172 @field_validator("snapshot_id")
173 @classmethod
174 def _check_snapshot_id(cls, v: str) -> str:
175 if not _OBJECT_ID_RE.match(v):
176 raise ValueError(
177 f"invalid snapshot_id {v!r}: must be 'sha256:<64 lowercase hex chars>'"
178 )
179 return v
180
181 @field_validator("delta_upsert")
182 @classmethod
183 def _check_delta_upsert_values(cls, v: StrDict) -> StrDict:
184 for path, oid in v.items():
185 if not _OBJECT_ID_RE.match(oid):
186 raise ValueError(
187 f"delta_upsert entry {path!r} has invalid object_id {oid!r}: "
188 "must be 'sha256:<64 lowercase hex chars>'"
189 )
190 if len(oid) > _OBJECT_ID_MAX_LEN:
191 raise ValueError(
192 f"delta_upsert entry {path!r} object_id exceeds maximum length: {oid[:40]!r}…"
193 )
194 return v
195
196 class WireObject(BaseModel):
197 """Content-addressed blob payload — mirrors BlobPayload from muse.core.mpack.
198
199 ``content`` is raw bytes (msgpack bin type) — no base64 overhead.
200
201 Encoding field controls how the server interprets ``content``:
202 ``"raw"`` — plain bytes; store as-is after hash verification.
203 ``"zlib"`` — zlib-compressed; decompress then verify hash.
204 ``"delta+zlib"`` — delta-encoded relative to ``base_id``, then zlib-compressed;
205 fetch base, apply delta, then verify hash.
206 """
207
208 object_id: str
209 content: bytes = Field(max_length=MAX_OBJECT_BYTES)
210 path: str = Field(default="", max_length=4096)
211 encoding: str = Field(default="raw")
212 base_id: str | None = Field(default=None)
213
214 model_config = {"extra": "ignore"}
215
216 @field_validator("object_id")
217 @classmethod
218 def _check_object_id(cls, v: str) -> str:
219 if not _OBJECT_ID_RE.match(v):
220 raise ValueError(
221 f"invalid object_id {v!r}: must be 'sha256:<64 lowercase hex chars>'"
222 )
223 return v
224
225 @field_validator("content")
226 @classmethod
227 def _check_content_size(cls, v: bytes) -> bytes:
228 if len(v) > MAX_OBJECT_BYTES:
229 raise ValueError(
230 f"content exceeds maximum size ({MAX_OBJECT_BYTES} bytes)."
231 )
232 return v
233
234 class WireMPack(BaseModel):
235 """An mpack sent in a push request.
236
237 Mirrors MPack from muse.core.mpack. All fields are optional because
238 a minimal push may only contain commits (no new objects).
239
240 List lengths are capped to prevent DoS via an oversized single request.
241 See the module-level ``MAX_*`` constants for the exact limits.
242 """
243
244 commits: list[WireCommit] = Field(default_factory=list, max_length=MAX_COMMITS_PER_PUSH)
245 snapshots: list[WireSnapshot] = Field(default_factory=list, max_length=MAX_SNAPSHOTS_PER_PUSH)
246 objects: list[WireObject] = Field(default_factory=list, max_length=MAX_OBJECTS_PER_PUSH)
247 branch_heads: StrDict = Field(default_factory=dict)
248
249 class WireFetchRequest(BaseModel):
250 """Body for ``POST /wire/repos/{repo_id}/fetch``.
251
252 Matches HttpTransport.fetch_mpack() payload:
253 ``{"want": [...sha...], "have": [...sha...]}``
254
255 ``want`` — commit SHAs the client wants.
256 ``have`` — commit SHAs the client already has (exclusion list).
257 """
258
259 want: list[str] = Field(default_factory=list, max_length=MAX_WANT_PER_FETCH)
260 have: list[str] = Field(default_factory=list, max_length=MAX_WANT_PER_FETCH)
261 depth: int | None = Field(default=None, ge=1)
262
263 @field_validator("want", "have")
264 @classmethod
265 def _check_commit_ids(cls, v: list[str]) -> list[str]:
266 return _validate_object_ids(v)
267
268 class WireRefsResponse(BaseModel):
269 """Response for ``GET /wire/repos/{repo_id}/refs``.
270
271 Parsed by HttpTransport._parse_remote_info() into RemoteInfo.
272 """
273
274 repo_id: str
275 domain: str
276 default_branch: str
277 branch_heads: StrDict
278
File History 2 commits
sha256:5601f81903b6c70ddd11bd88a5a257ee6dfd38aa3b85b19746c100c030657f1e chore: update smoke_muse.sh comment to reference rc9 Sonnet 4.6 minor 21 days ago
sha256:39e9c4e6f2134da0732e6983268a218178973936f8d7ca03c91f2b5ad42133c8 fix: use read_object_bytes in blob viewer; add zstd magic d… Sonnet 4.6 patch 21 days ago