transport.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
23 hours ago
| 1 | """Muse transport layer — presign + mpack remote communication. |
| 2 | |
| 3 | The :class:`MuseTransport` Protocol defines the interface between the Muse CLI |
| 4 | and a remote host. Use :func:`make_transport` instead of constructing |
| 5 | :class:`HttpTransport` directly — it always returns an ``HttpTransport``. |
| 6 | |
| 7 | Push flow |
| 8 | --------- |
| 9 | 1. ``POST {url}/push/mpack-presign`` — get presigned PUT URL for the mpack. |
| 10 | 2. ``PUT <presigned_url>`` — upload mpack to R2/MinIO directly. |
| 11 | 3. ``POST {url}/push/unpack-mpack`` — notify server to index the uploaded mpack. |
| 12 | |
| 13 | Fetch flow |
| 14 | ---------- |
| 15 | 1. ``POST {url}/fetch`` — receive presigned GET URL for the fetch mpack. |
| 16 | 2. ``GET <mpack_url>`` — download mpack from R2/MinIO directly. |
| 17 | 3. Verify ``sha256(bytes) == mpack_id`` before applying. |
| 18 | |
| 19 | Authentication |
| 20 | -------------- |
| 21 | All endpoints accept ``Authorization: MSign handle="<handle>" ts=<unix> |
| 22 | sig="<b64url>"`` for Ed25519 per-request signing. The signing identity is |
| 23 | never written to any log line. |
| 24 | |
| 25 | Error codes |
| 26 | ----------- |
| 27 | 401 Unauthorized — invalid or missing signature |
| 28 | 404 Not found — repo does not exist on the remote |
| 29 | 409 Conflict — push rejected (non-fast-forward without ``--force``) |
| 30 | 5xx Server error |
| 31 | |
| 32 | Security model |
| 33 | -------------- |
| 34 | - Refuses all HTTP redirects (prevents credential leakage to other hosts). |
| 35 | - Rejects non-HTTPS URLs when signing credentials are present. |
| 36 | - Caps response bodies at ``MAX_RESPONSE_BYTES`` (64 MiB) to prevent OOM. |
| 37 | """ |
| 38 | |
| 39 | import collections.abc |
| 40 | import contextlib |
| 41 | import json |
| 42 | import logging |
| 43 | import pathlib |
| 44 | import signal |
| 45 | import threading |
| 46 | import time as _time_mod |
| 47 | import urllib.parse |
| 48 | from typing import TYPE_CHECKING, NamedTuple, Protocol, TypedDict |
| 49 | |
| 50 | if TYPE_CHECKING: |
| 51 | import ssl |
| 52 | from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey |
| 53 | |
| 54 | class SigningIdentity(NamedTuple): |
| 55 | """Ed25519 signing identity for MSign request authentication.""" |
| 56 | |
| 57 | handle: str # hub handle, e.g. "gabriel" |
| 58 | private_key: Ed25519PrivateKey |
| 59 | |
| 60 | import msgpack |
| 61 | |
| 62 | from muse.core.mpack import ( |
| 63 | BlobPayload, |
| 64 | MPack, |
| 65 | PushResult, |
| 66 | RemoteInfo, |
| 67 | WireTag, |
| 68 | ) |
| 69 | from muse.core.semver import ChangelogEntry, SemVerTag |
| 70 | from muse.core.io import safe_unpackb |
| 71 | from muse.core.types import ( |
| 72 | BranchHeads, |
| 73 | Manifest, |
| 74 | Metadata, |
| 75 | MsgpackDict, |
| 76 | ) |
| 77 | from muse.core.commits import CommitDict |
| 78 | from muse.core.snapshots import SnapshotDict |
| 79 | from muse.core.releases import ReleaseDict |
| 80 | from muse.core.validation import ( |
| 81 | MAX_RESPONSE_BYTES, |
| 82 | sanitize_display, |
| 83 | ) |
| 84 | from muse.core.types import SemVerBump, blob_id |
| 85 | |
| 86 | logger = logging.getLogger(__name__) |
| 87 | |
| 88 | import ssl as _ssl_mod |
| 89 | import urllib.error |
| 90 | import urllib.request |
| 91 | |
| 92 | _TIMEOUT_SECONDS = 300 |
| 93 | |
| 94 | def _mkcert_ca() -> "pathlib.Path | None": |
| 95 | """Return the mkcert root CA path, or None if mkcert is not installed. |
| 96 | |
| 97 | Tries ``mkcert -CAROOT`` first (cross-platform), then falls back to the |
| 98 | well-known platform paths. |
| 99 | """ |
| 100 | import subprocess |
| 101 | try: |
| 102 | r = subprocess.run(["mkcert", "-CAROOT"], capture_output=True, text=True, timeout=3) |
| 103 | if r.returncode == 0: |
| 104 | ca = pathlib.Path(r.stdout.strip()) / "rootCA.pem" |
| 105 | if ca.exists(): |
| 106 | return ca |
| 107 | except (FileNotFoundError, subprocess.TimeoutExpired): |
| 108 | pass |
| 109 | # Platform fallbacks |
| 110 | candidates = [ |
| 111 | pathlib.Path.home() / "Library" / "Application Support" / "mkcert" / "rootCA.pem", # macOS |
| 112 | pathlib.Path.home() / ".local" / "share" / "mkcert" / "rootCA.pem", # Linux |
| 113 | ] |
| 114 | for p in candidates: |
| 115 | if p.exists(): |
| 116 | return p |
| 117 | return None |
| 118 | |
| 119 | def _make_ssl_ctx(url: str) -> "_ssl_mod.SSLContext": |
| 120 | """Return an SSLContext appropriate for *url*. |
| 121 | |
| 122 | - localhost HTTPS → context loaded from the mkcert root CA |
| 123 | - everything else → default system CA context |
| 124 | """ |
| 125 | if url.startswith("https://localhost") or url.startswith("https://127.0.0.1"): |
| 126 | ca = _mkcert_ca() |
| 127 | if ca is not None: |
| 128 | return _ssl_mod.create_default_context(cafile=str(ca)) |
| 129 | return _ssl_mod.create_default_context() |
| 130 | |
| 131 | class _Request(NamedTuple): |
| 132 | """Minimal HTTP request value passed between ``_build_request`` and ``_execute``.""" |
| 133 | |
| 134 | url: str |
| 135 | method: str |
| 136 | headers: dict[str, str] |
| 137 | data: bytes | None = None |
| 138 | |
| 139 | # Recursive type alias for msgpack-serializable values. |
| 140 | # Covers every type that msgpack can encode/decode natively — no base64, |
| 141 | # no `object`, no `Any`. Python 3.12+ `type` statement creates a proper |
| 142 | # TypeAlias that mypy treats as a first-class recursive type. |
| 143 | type _MsgVal = ( |
| 144 | str | int | float | bool | bytes | None |
| 145 | | list[_MsgVal] |
| 146 | | dict[str, _MsgVal] |
| 147 | ) |
| 148 | type _MsgDict = dict[str, _MsgVal] # top-level msgpack response dict |
| 149 | type _WireVal = _MsgVal # wire-protocol value (same shape) |
| 150 | type _WireDict = dict[str, _WireVal] # wire-protocol response dict |
| 151 | type _HeadersDict = dict[str, str] # HTTP request/response headers |
| 152 | |
| 153 | # Maximum number of objects to include in a single push batch. |
| 154 | # Must stay strictly below the server's MAX_OBJECTS_PER_PUSH limit (1 000). |
| 155 | # Large batches are split into sequential POSTs so each request body stays |
| 156 | # well within Cloudflare's body size limits. |
| 157 | CHUNK_OBJECTS: int = 500 |
| 158 | |
| 159 | # Maximum number of commits to include in a single POST /push call. |
| 160 | # Large histories are split into sequential chunked pushes so that each |
| 161 | # request stays well within Cloudflare's 100 MB upload limit. |
| 162 | # Each intermediate chunk uses force=True; the final chunk uses the caller's |
| 163 | # force flag. |
| 164 | # |
| 165 | # 500 commits/chunk means ~2 round-trips for a 1000-commit history vs the |
| 166 | # previous 5 at 200/chunk. The server now uses bulk INSERT ON CONFLICT DO |
| 167 | # NOTHING so per-chunk processing time is proportional to commit count, not |
| 168 | # the number of individual INSERT round-trips. |
| 169 | CHUNK_COMMITS: int = 500 |
| 170 | |
| 171 | |
| 172 | # --------------------------------------------------------------------------- |
| 173 | # Result TypedDict |
| 174 | # --------------------------------------------------------------------------- |
| 175 | |
| 176 | class FetchMPackResult(TypedDict): |
| 177 | """Result from ``fetch_mpack()`` — Phase 2 presigned-URL fetch. |
| 178 | |
| 179 | ``blobs`` — raw blob dicts from the mpack; passed to ``apply_mpack``. |
| 180 | ``blobs_received`` — count of blobs in the mpack. |
| 181 | """ |
| 182 | |
| 183 | repo_id: str |
| 184 | domain: str |
| 185 | default_branch: str |
| 186 | branch_heads: BranchHeads |
| 187 | commits: list[CommitDict] |
| 188 | snapshots: list[SnapshotDict] |
| 189 | blobs: list[dict] |
| 190 | blobs_received: int |
| 191 | shallow_commits: list[str] |
| 192 | |
| 193 | class _UnpackMPackResult(TypedDict): |
| 194 | """Response from POST /push/unpack-mpack.""" |
| 195 | job_id: str |
| 196 | head: str |
| 197 | branch: str |
| 198 | blobs_in_mpack: int |
| 199 | commits_in_mpack: int |
| 200 | |
| 201 | |
| 202 | class _PresignResult(TypedDict): |
| 203 | """Response from POST /push/mpack-presign.""" |
| 204 | upload_url: str |
| 205 | mpack_key: str |
| 206 | |
| 207 | |
| 208 | @contextlib.contextmanager |
| 209 | def _ignore_sigpipe() -> collections.abc.Generator[None, None, None]: |
| 210 | """Temporarily ignore SIGPIPE during socket I/O (main thread only). |
| 211 | |
| 212 | ``muse/cli/app.py`` sets ``SIGPIPE = SIG_DFL`` at startup so that piping |
| 213 | ``muse`` output to ``head``/``grep``/``jq`` exits cleanly. But ``SIG_DFL`` |
| 214 | also kills the process when a large HTTP request body is still in-flight |
| 215 | and the remote closes the connection early (auth failure, 409 conflict, |
| 216 | server crash, …). Without this guard the push command dies with exit code |
| 217 | 141 instead of raising ``TransportError``. |
| 218 | |
| 219 | Signal handlers may only be changed from the main interpreter thread. |
| 220 | Worker threads launched by ``ThreadPoolExecutor`` (used for parallel object |
| 221 | uploads) are already safe: Python blocks ``SIGPIPE`` in non-main threads, |
| 222 | so broken-socket writes raise ``BrokenPipeError`` rather than killing the |
| 223 | process. This context manager is therefore a no-op in non-main threads. |
| 224 | |
| 225 | In the main thread the context manager saves the current SIGPIPE |
| 226 | disposition, sets it to ``SIG_IGN`` for the duration of the network call |
| 227 | (so that broken-pipe conditions surface as ``BrokenPipeError`` → |
| 228 | ``TransportError``), then restores the original disposition on exit. On |
| 229 | platforms without ``SIGPIPE`` (Windows) this is a no-op. |
| 230 | """ |
| 231 | if not hasattr(signal, "SIGPIPE") or not threading.current_thread() is threading.main_thread(): |
| 232 | yield |
| 233 | return |
| 234 | old_handler: signal.Handlers = signal.signal(signal.SIGPIPE, signal.SIG_IGN) |
| 235 | try: |
| 236 | yield |
| 237 | finally: |
| 238 | signal.signal(signal.SIGPIPE, old_handler) |
| 239 | |
| 240 | # --------------------------------------------------------------------------- |
| 241 | # Exception |
| 242 | # --------------------------------------------------------------------------- |
| 243 | |
| 244 | class TransportError(Exception): |
| 245 | """Raised when the remote returns a non-2xx response or is unreachable. |
| 246 | |
| 247 | Attributes: |
| 248 | status_code: HTTP status code (e.g. ``401``, ``404``, ``409``, ``500``). |
| 249 | ``0`` for network-level failures (DNS, connection refused). |
| 250 | """ |
| 251 | |
| 252 | def __init__(self, message: str, status_code: int) -> None: |
| 253 | super().__init__(message) |
| 254 | self.status_code = status_code |
| 255 | |
| 256 | # --------------------------------------------------------------------------- |
| 257 | # Protocol — the seam between CLI commands and the transport implementation |
| 258 | # --------------------------------------------------------------------------- |
| 259 | |
| 260 | class MuseTransport(Protocol): |
| 261 | """Protocol for Muse remote transport implementations. |
| 262 | |
| 263 | All methods are synchronous — the Muse CLI is synchronous by design. |
| 264 | """ |
| 265 | |
| 266 | def fetch_remote_info(self, url: str, signing: SigningIdentity | None) -> RemoteInfo: |
| 267 | """Return repository metadata from ``GET {url}/refs``. |
| 268 | |
| 269 | Args: |
| 270 | url: Remote repository URL. |
| 271 | token: Ed25519 signing identity, or ``None`` for public repos. |
| 272 | |
| 273 | Raises: |
| 274 | :class:`TransportError` on HTTP 4xx/5xx or network failure. |
| 275 | """ |
| 276 | ... |
| 277 | |
| 278 | def fetch_mpack( |
| 279 | self, |
| 280 | url: str, |
| 281 | signing: "SigningIdentity | None", |
| 282 | want: "list[str]", |
| 283 | have: "list[str]", |
| 284 | *, |
| 285 | on_object: "collections.abc.Callable[[BlobPayload], None] | None" = None, |
| 286 | ttl_seconds: int = 3600, |
| 287 | ) -> "FetchMPackResult": |
| 288 | """Fetch the delta as a single content-addressed MPack. |
| 289 | |
| 290 | POSTs to ``{url}/fetch/mpack``. When the server returns |
| 291 | ``presign=False`` the mpack bytes are inline; when ``presign=True`` |
| 292 | the client GETs the mpack from the presigned URL. |
| 293 | |
| 294 | Either way ``sha256(mpack_bytes) == mpack_id`` is verified before |
| 295 | the mpack is unpacked — one hash covers all contents. |
| 296 | |
| 297 | Args: |
| 298 | url: Canonical repo URL. |
| 299 | signing: Ed25519 signing identity; ``None`` for public repos. |
| 300 | want: Commit IDs the client wants. |
| 301 | have: Commit IDs already present locally (ancestry cut). |
| 302 | on_object: Callback invoked for each object in the mpack. |
| 303 | ttl_seconds: Presigned URL TTL forwarded to the server. |
| 304 | |
| 305 | Raises: |
| 306 | :class:`TransportError` on non-200 response, integrity failure, |
| 307 | or network error. |
| 308 | """ |
| 309 | ... |
| 310 | |
| 311 | def push_tags( |
| 312 | self, |
| 313 | url: str, |
| 314 | signing: SigningIdentity | None, |
| 315 | tags: list[WireTag], |
| 316 | ) -> int: |
| 317 | """Push local tags to the remote via ``POST {url}/tags``. |
| 318 | |
| 319 | Tags are immutable once created on the remote — the server skips any |
| 320 | it already holds. Returns the number of tags newly stored. |
| 321 | |
| 322 | Args: |
| 323 | url: Remote repository URL. |
| 324 | token: Ed25519 signing identity, or ``None``. |
| 325 | tags: Tags to push. |
| 326 | |
| 327 | Raises: |
| 328 | :class:`TransportError` on HTTP 4xx/5xx or network failure. |
| 329 | """ |
| 330 | ... |
| 331 | |
| 332 | def create_release( |
| 333 | self, |
| 334 | url: str, |
| 335 | signing: SigningIdentity | None, |
| 336 | release: ReleaseDict, |
| 337 | ) -> str: |
| 338 | """Create a release on the remote via ``POST {url}/releases``. |
| 339 | |
| 340 | Returns the ``release_id`` assigned by the server. |
| 341 | |
| 342 | Args: |
| 343 | url: Remote repository URL. |
| 344 | token: Ed25519 signing identity. |
| 345 | release: Fully-populated :class:`~muse.core.store.ReleaseDict`. |
| 346 | |
| 347 | Raises: |
| 348 | :class:`TransportError` on HTTP 4xx/5xx or network failure. |
| 349 | """ |
| 350 | ... |
| 351 | |
| 352 | def list_releases_remote( |
| 353 | self, |
| 354 | url: str, |
| 355 | signing: SigningIdentity | None, |
| 356 | channel: str | None = None, |
| 357 | include_drafts: bool = False, |
| 358 | ) -> list[ReleaseDict]: |
| 359 | """Fetch releases from the remote via ``GET {url}/releases``. |
| 360 | |
| 361 | Args: |
| 362 | url: Remote repository URL. |
| 363 | token: Ed25519 signing identity, or ``None``. |
| 364 | channel: Filter by release channel; ``None`` returns all. |
| 365 | include_drafts: Include draft releases when ``True``. |
| 366 | |
| 367 | Raises: |
| 368 | :class:`TransportError` on HTTP 4xx/5xx or network failure. |
| 369 | """ |
| 370 | ... |
| 371 | |
| 372 | def delete_release_remote( |
| 373 | self, |
| 374 | url: str, |
| 375 | signing: SigningIdentity | None, |
| 376 | tag: str, |
| 377 | ) -> None: |
| 378 | """Retract a release from the remote via ``DELETE {url}/releases/{tag}``. |
| 379 | |
| 380 | Removes only the named label from the remote registry. The underlying |
| 381 | commit and snapshot objects are **not** affected. |
| 382 | |
| 383 | Args: |
| 384 | url: Remote repository URL. |
| 385 | token: Ed25519 signing identity (owner credentials required). |
| 386 | tag: Semver tag of the release to retract (e.g. ``"v1.2.0"``). |
| 387 | |
| 388 | Raises: |
| 389 | :class:`TransportError` on HTTP 4xx/5xx, network failure, or if |
| 390 | the release does not exist on the remote. |
| 391 | """ |
| 392 | ... |
| 393 | |
| 394 | def delete_branch_remote( |
| 395 | self, |
| 396 | url: str, |
| 397 | signing: SigningIdentity | None, |
| 398 | branch: str, |
| 399 | ) -> None: |
| 400 | """Delete a branch on the remote via ``DELETE {url}/branches/{branch}``. |
| 401 | |
| 402 | Equivalent to ``git push origin --delete <branch>``. The branch ref is |
| 403 | removed from the server; commits and objects are unaffected. |
| 404 | |
| 405 | Args: |
| 406 | url: Remote repository URL. |
| 407 | token: Ed25519 signing identity (owner credentials required). |
| 408 | branch: Branch name to delete (e.g. ``"feat/my-thing"``). |
| 409 | |
| 410 | Raises: |
| 411 | :class:`TransportError` on HTTP 4xx/5xx, network failure, or if |
| 412 | the branch does not exist on the remote. |
| 413 | """ |
| 414 | ... |
| 415 | |
| 416 | # --------------------------------------------------------------------------- |
| 417 | # HTTP/1.1 implementation (stdlib, zero extra dependencies) |
| 418 | # --------------------------------------------------------------------------- |
| 419 | |
| 420 | def _http_error_message_from_bytes(status_code: int, body: bytes) -> str: |
| 421 | """Build a safe error message from a status code and raw response bytes.""" |
| 422 | if status_code == 401: |
| 423 | return "Authentication failed (HTTP 401). Run 'muse auth register'." |
| 424 | raw = body[:300].decode("utf-8", errors="replace") |
| 425 | safe = sanitize_display(raw[:200]) |
| 426 | if safe: |
| 427 | return f"HTTP {status_code}: {safe}" |
| 428 | return f"HTTP {status_code}" |
| 429 | |
| 430 | def _http_error_message(err: "urllib.error.HTTPError") -> str: |
| 431 | """Return a safe, credential-free error message for an HTTP error. |
| 432 | |
| 433 | HTTP 401 — NEVER includes the server body (which may echo the signing |
| 434 | credential). Returns a fixed generic string. |
| 435 | |
| 436 | All other codes — include the body, capped at 200 chars, with all C0/C1 |
| 437 | control characters stripped to prevent ANSI/OSC terminal injection. |
| 438 | """ |
| 439 | if err.code == 401: |
| 440 | return "Authentication failed (HTTP 401). Run 'muse auth register'." |
| 441 | try: |
| 442 | raw_body = err.read(300).decode("utf-8", errors="replace") |
| 443 | except Exception: |
| 444 | raw_body = "" |
| 445 | safe_body = sanitize_display(raw_body[:200]) |
| 446 | if safe_body: |
| 447 | return f"HTTP {err.code}: {safe_body}" |
| 448 | return f"HTTP {err.code}" |
| 449 | |
| 450 | class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): |
| 451 | """urllib redirect handler that raises immediately on any 3xx response.""" |
| 452 | def redirect_request( # type: ignore[override] |
| 453 | self, |
| 454 | req: urllib.request.Request, |
| 455 | fp: "Any", |
| 456 | code: int, |
| 457 | msg: str, |
| 458 | headers: "Any", |
| 459 | newurl: str, |
| 460 | ) -> None: |
| 461 | raise urllib.error.HTTPError(newurl, code, f"Redirect to {newurl}", headers, fp) |
| 462 | |
| 463 | |
| 464 | def _open_url(req: "urllib.request.Request") -> "Any": |
| 465 | """Thin seam around ``urllib.request.urlopen`` — patchable in tests.""" |
| 466 | return urllib.request.urlopen(req) # noqa: S310 |
| 467 | |
| 468 | |
| 469 | def _urllib_do( |
| 470 | method: str, |
| 471 | url: str, |
| 472 | headers: "dict[str, str]", |
| 473 | data: "bytes | None" = None, |
| 474 | *, |
| 475 | timeout: int = _TIMEOUT_SECONDS, |
| 476 | follow_redirects: bool = False, |
| 477 | max_bytes: "int | None" = MAX_RESPONSE_BYTES, |
| 478 | ) -> bytes: |
| 479 | """Execute an HTTP request via urllib and return the response body. |
| 480 | |
| 481 | Raises :class:`TransportError` on non-2xx or any network/SSL error. |
| 482 | Response body is capped at ``max_bytes`` (default: ``MAX_RESPONSE_BYTES``). |
| 483 | Pass ``max_bytes=None`` for uncapped streaming reads (e.g. mpack downloads). |
| 484 | """ |
| 485 | ssl_ctx = _make_ssl_ctx(url) |
| 486 | opener_handlers: list[urllib.request.BaseHandler] = [urllib.request.HTTPSHandler(context=ssl_ctx)] |
| 487 | if not follow_redirects: |
| 488 | opener_handlers.append(_NoRedirectHandler()) |
| 489 | opener = urllib.request.build_opener(*opener_handlers) |
| 490 | req = urllib.request.Request(url, data=data, headers=headers, method=method) |
| 491 | try: |
| 492 | with opener.open(req, timeout=timeout) as resp: |
| 493 | if max_bytes is None: |
| 494 | import io as _io |
| 495 | buf = _io.BytesIO() |
| 496 | _CHUNK = 1 << 20 # 1 MiB chunks |
| 497 | while True: |
| 498 | chunk = resp.read(_CHUNK) |
| 499 | if not chunk: |
| 500 | break |
| 501 | buf.write(chunk) |
| 502 | body = buf.getvalue() |
| 503 | else: |
| 504 | body = resp.read(max_bytes + 1) |
| 505 | if len(body) > max_bytes: |
| 506 | raise TransportError( |
| 507 | f"Response body exceeds the {max_bytes // (1024 * 1024)} MiB cap.", 0 |
| 508 | ) |
| 509 | return body |
| 510 | except urllib.error.HTTPError as err: |
| 511 | raise TransportError(_http_error_message(err), err.code) from err |
| 512 | except urllib.error.URLError as err: |
| 513 | raise TransportError(sanitize_display(str(err.reason)), 0) from err |
| 514 | except TransportError: |
| 515 | raise |
| 516 | except Exception as exc: |
| 517 | raise TransportError(sanitize_display(str(exc)), 0) from exc |
| 518 | |
| 519 | class HttpTransport: |
| 520 | """HTTPS transport using urllib. |
| 521 | |
| 522 | One short-lived connection per call. Signing identity values are |
| 523 | **never** written to any log line. |
| 524 | """ |
| 525 | |
| 526 | def _build_request( |
| 527 | self, |
| 528 | method: str, |
| 529 | url: str, |
| 530 | signing: SigningIdentity | None, |
| 531 | body_bytes: bytes | None = None, |
| 532 | content_type: str = "application/x-msgpack", |
| 533 | extra_headers: dict[str, str] | None = None, |
| 534 | ) -> _Request: |
| 535 | _parsed = urllib.parse.urlparse(url) |
| 536 | _is_loopback = _parsed.hostname in { |
| 537 | "localhost", "127.0.0.1", "::1", "host.docker.internal", |
| 538 | } |
| 539 | if signing and _parsed.scheme != "https" and not _is_loopback: |
| 540 | raise TransportError( |
| 541 | f"Refusing to send credentials to a non-HTTPS URL: {url!r}. " |
| 542 | "Ensure the remote URL uses https://.", |
| 543 | 0, |
| 544 | ) |
| 545 | |
| 546 | # TOFU hub certificate fingerprint pinning — only for signed requests. |
| 547 | if not _is_loopback and signing is not None: |
| 548 | try: |
| 549 | from muse.core.hub_trust import check_and_pin |
| 550 | base_url = f"{_parsed.scheme}://{_parsed.netloc}" |
| 551 | check_and_pin(base_url) |
| 552 | except Exception as _hub_exc: # noqa: BLE001 |
| 553 | from muse.core.errors import HubFingerprintMismatchError |
| 554 | if isinstance(_hub_exc, HubFingerprintMismatchError): |
| 555 | raise TransportError(str(_hub_exc), 0) from _hub_exc |
| 556 | logger.warning("⚠️ Hub trust check failed: %s", _hub_exc) |
| 557 | |
| 558 | headers: _HeadersDict = { |
| 559 | "Accept": "application/x-msgpack, application/json", |
| 560 | } |
| 561 | if body_bytes is not None: |
| 562 | headers["Content-Type"] = content_type |
| 563 | if extra_headers: |
| 564 | headers.update(extra_headers) |
| 565 | if signing: |
| 566 | from muse.core.msign import build_msign_header |
| 567 | headers["Authorization"] = build_msign_header(signing, method, url, body_bytes) |
| 568 | return _Request(url=url, method=method, headers=headers, data=body_bytes) |
| 569 | |
| 570 | @staticmethod |
| 571 | def _decode(raw: bytes) -> _MsgDict: |
| 572 | """Decode a msgpack server response into a plain dict. |
| 573 | |
| 574 | Raises :class:`TransportError` if the payload is not valid msgpack. |
| 575 | """ |
| 576 | if not raw: |
| 577 | return {} |
| 578 | try: |
| 579 | # MAX_RESPONSE_BYTES already caps the network read — safe_unpackb |
| 580 | # adds per-field limits as a second layer against billion-laughs payloads. |
| 581 | # allow_binary=True because pack/mpack responses carry raw blob content. |
| 582 | result: _MsgVal = safe_unpackb(raw, context="server response", allow_binary=True) |
| 583 | except Exception as exc: |
| 584 | raise TransportError(f"Server returned invalid msgpack: {exc}", 0) from exc |
| 585 | if not isinstance(result, dict): |
| 586 | return {} |
| 587 | return result |
| 588 | |
| 589 | def _execute(self, req: "_Request | urllib.request.Request") -> bytes: |
| 590 | """Send *req* and return raw response bytes. |
| 591 | |
| 592 | Accepts either an internal ``_Request`` or a ``urllib.request.Request``. |
| 593 | Both paths use urllib via ``_urllib_do`` / ``_execute_fetch``. |
| 594 | |
| 595 | Raises: |
| 596 | :class:`TransportError` on non-2xx HTTP or any network error. |
| 597 | """ |
| 598 | if isinstance(req, urllib.request.Request): |
| 599 | return self._execute_fetch(req) |
| 600 | with _ignore_sigpipe(): |
| 601 | return _urllib_do(req.method, req.url, req.headers, req.data) |
| 602 | |
| 603 | def _execute_fetch(self, req: "urllib.request.Request") -> bytes: |
| 604 | """Send *req* via urllib (HTTP/1.1) and return raw response bytes. |
| 605 | |
| 606 | Uses ``_open_url`` as the network seam — patchable in tests. |
| 607 | |
| 608 | Raises: |
| 609 | :class:`TransportError` on non-2xx HTTP or any network error. |
| 610 | """ |
| 611 | try: |
| 612 | with _open_url(req) as resp: |
| 613 | body_bytes = resp.read(MAX_RESPONSE_BYTES + 1) |
| 614 | if len(body_bytes) > MAX_RESPONSE_BYTES: |
| 615 | raise TransportError( |
| 616 | f"Response body exceeds the {MAX_RESPONSE_BYTES // (1024 * 1024)} MiB cap.", |
| 617 | 0, |
| 618 | ) |
| 619 | return body_bytes |
| 620 | except urllib.error.HTTPError as err: |
| 621 | raise TransportError(_http_error_message(err), err.code) from err |
| 622 | except urllib.error.URLError as err: |
| 623 | raise TransportError(sanitize_display(str(err.reason)), 0) from err |
| 624 | except TransportError: |
| 625 | raise |
| 626 | except Exception as exc: |
| 627 | raise TransportError(sanitize_display(str(exc)), 0) from exc |
| 628 | |
| 629 | def hub_json( |
| 630 | self, |
| 631 | method: str, |
| 632 | url: str, |
| 633 | signing: "SigningIdentity | None", |
| 634 | body: "dict[str, object] | None" = None, |
| 635 | ) -> "dict[str, object]": |
| 636 | """Make an authenticated JSON request and return the parsed response dict. |
| 637 | |
| 638 | Uses urllib with the mkcert SSL context for localhost URLs — the same |
| 639 | path as all other transport calls. Raises :class:`TransportError` on |
| 640 | any HTTP or network error. |
| 641 | |
| 642 | Args: |
| 643 | method: HTTP verb (``GET``, ``POST``, ``PATCH``, ``DELETE``). |
| 644 | url: Full URL including scheme and path. |
| 645 | signing: MSign identity, or ``None`` for unauthenticated calls. |
| 646 | body: Optional JSON-serializable dict sent as the request body. |
| 647 | """ |
| 648 | import json as _json_mod |
| 649 | body_bytes = _json_mod.dumps(body).encode() if body is not None else None |
| 650 | req = self._build_request( |
| 651 | method, url, signing, |
| 652 | body_bytes=body_bytes, |
| 653 | content_type="application/json", |
| 654 | extra_headers={"Accept": "application/json"}, |
| 655 | ) |
| 656 | raw = self._execute(req) |
| 657 | try: |
| 658 | result = _json_mod.loads(raw.decode("utf-8")) |
| 659 | except Exception as exc: |
| 660 | raise TransportError(f"Server returned invalid JSON: {exc}", 0) from exc |
| 661 | if not isinstance(result, dict): |
| 662 | return {} |
| 663 | return result |
| 664 | |
| 665 | def hub_bytes( |
| 666 | self, |
| 667 | url: str, |
| 668 | signing: "SigningIdentity | None", |
| 669 | ) -> bytes: |
| 670 | """Download raw bytes from *url* with optional MSign auth. |
| 671 | |
| 672 | Uses urllib with the mkcert SSL context for localhost URLs. |
| 673 | Raises :class:`TransportError` on any HTTP or network error. |
| 674 | |
| 675 | Args: |
| 676 | url: Full URL including scheme and path. |
| 677 | signing: MSign identity, or ``None`` for unauthenticated calls. |
| 678 | """ |
| 679 | req = self._build_request( |
| 680 | "GET", url, signing, |
| 681 | extra_headers={"Accept": "*/*"}, |
| 682 | ) |
| 683 | return self._execute(req) |
| 684 | |
| 685 | def fetch_remote_info(self, url: str, signing: SigningIdentity | None) -> RemoteInfo: |
| 686 | """Fetch repository metadata from ``GET {url}/refs``.""" |
| 687 | endpoint = f"{url.rstrip('/')}/refs" |
| 688 | logger.debug("transport: GET %s", endpoint) |
| 689 | req = self._build_request("GET", endpoint, signing) |
| 690 | raw = self._execute(req) |
| 691 | return _parse_remote_info(raw) |
| 692 | |
| 693 | def push_mpack_put(self, upload_url: str, mpack_bytes: bytes, mpack_key: str = "") -> None: |
| 694 | """PUT mpack_bytes directly to a presigned MinIO URL (Step 2). |
| 695 | |
| 696 | No MuseHub API involvement. No Authorization header. The presigned |
| 697 | URL is the credential. Raises :class:`TransportError` on non-2xx. |
| 698 | """ |
| 699 | import sys as _sys |
| 700 | from muse.core.types import blob_id as _blob_id_put |
| 701 | pre_put_hash = _blob_id_put(mpack_bytes) |
| 702 | print(f"[PUSH step 6] pre-PUT integrity check:", file=_sys.stderr) |
| 703 | print(f"[PUSH step 6] len(mpack_bytes) = {len(mpack_bytes)}", file=_sys.stderr) |
| 704 | print(f"[PUSH step 6] blob_id(mpack_bytes) = {pre_put_hash}", file=_sys.stderr) |
| 705 | if mpack_key: |
| 706 | match = pre_put_hash == mpack_key |
| 707 | print(f"[PUSH step 6] mpack_key = {mpack_key}", file=_sys.stderr) |
| 708 | print(f"[PUSH step 6] match = {match} {'✓' if match else '✗ MISMATCH'}", file=_sys.stderr) |
| 709 | body = _urllib_do( |
| 710 | "PUT", upload_url, {"Content-Type": "application/x-muse-pack"}, mpack_bytes, |
| 711 | follow_redirects=False, |
| 712 | ) |
| 713 | print(f"[PUSH step 6] PUT → HTTP 200 response_len={len(body)}", file=_sys.stderr) |
| 714 | |
| 715 | def push_mpack_unpack( |
| 716 | self, |
| 717 | url: str, |
| 718 | signing: "SigningIdentity | None", |
| 719 | mpack_key: str, |
| 720 | *, |
| 721 | branch: str = "main", |
| 722 | head: str = "", |
| 723 | commits_count: int = 0, |
| 724 | blobs_count: int = 0, |
| 725 | force: bool = False, |
| 726 | ) -> _UnpackMPackResult: |
| 727 | """POST /push/unpack-mpack — notify server to index the uploaded mpack (Step 3). |
| 728 | |
| 729 | Returns ``{"job_id": str, "head": str, "branch": str, |
| 730 | "blobs_in_mpack": int, "commits_in_mpack": int}``. |
| 731 | Raises :class:`TransportError` on non-2xx or missing job_id. |
| 732 | """ |
| 733 | from muse.core.mpack import build_unpack_payload |
| 734 | endpoint = f"{url.rstrip('/')}/push/unpack-mpack" |
| 735 | payload = build_unpack_payload( |
| 736 | mpack_key, branch=branch, head=head, |
| 737 | commits_count=commits_count, blobs_count=blobs_count, force=force, |
| 738 | ) |
| 739 | body_bytes = msgpack.packb(payload, use_bin_type=True) |
| 740 | req = self._build_request("POST", endpoint, signing, body_bytes) |
| 741 | raw = _urllib_do("POST", endpoint, dict(req.headers), body_bytes) |
| 742 | data = self._decode(raw) |
| 743 | return { |
| 744 | "job_id": str(data.get("job_id") or ""), |
| 745 | "head": str(data.get("head") or head), |
| 746 | "branch": str(data.get("branch") or branch), |
| 747 | "blobs_in_mpack": int(data.get("blobs_in_mpack") or 0), |
| 748 | "commits_in_mpack": int(data.get("commits_in_mpack") or 0), |
| 749 | } |
| 750 | |
| 751 | def push_mpack_presign( |
| 752 | self, |
| 753 | url: str, |
| 754 | signing: "SigningIdentity | None", |
| 755 | mpack_bytes: bytes, |
| 756 | ttl_seconds: int = 3600, |
| 757 | ) -> _PresignResult: |
| 758 | """POST /push/mpack-presign — get a presigned PUT URL for the mpack. |
| 759 | |
| 760 | Returns ``{"upload_url": str, "mpack_key": str}``. |
| 761 | Raises :class:`TransportError` on non-2xx or a missing upload_url. |
| 762 | """ |
| 763 | from muse.core.mpack import build_presign_payload |
| 764 | endpoint = f"{url.rstrip('/')}/push/mpack-presign" |
| 765 | payload = build_presign_payload(mpack_bytes) |
| 766 | body_bytes = msgpack.packb(payload, use_bin_type=True) |
| 767 | req = self._build_request("POST", endpoint, signing, body_bytes) |
| 768 | raw = _urllib_do("POST", endpoint, dict(req.headers), body_bytes) |
| 769 | data = self._decode(raw) |
| 770 | upload_url = str(data.get("upload_url") or "") |
| 771 | if not upload_url: |
| 772 | raise TransportError("push/mpack-presign: server response missing upload_url", 0) |
| 773 | return {"upload_url": upload_url, "mpack_key": str(data.get("mpack_key") or payload["mpack_key"])} |
| 774 | |
| 775 | def fetch_mpack( |
| 776 | self, |
| 777 | url: str, |
| 778 | signing: "SigningIdentity | None", |
| 779 | want: "list[str]", |
| 780 | have: "list[str]", |
| 781 | *, |
| 782 | ttl_seconds: int = 3600, |
| 783 | ) -> "FetchMPackResult": |
| 784 | """Fetch the commit delta as one content-addressed MPack. |
| 785 | |
| 786 | POSTs ``{url}/fetch/mpack``. Server returns a presigned GET URL. |
| 787 | Client GETs the full mpack bytes directly from MinIO, verifies |
| 788 | ``blob_id(bytes) == mpack_id``, then returns all content for the |
| 789 | caller to pass to ``apply_mpack``. No per-object streaming. |
| 790 | """ |
| 791 | endpoint = f"{url.rstrip('/')}/fetch/mpack" |
| 792 | body_bytes = msgpack.packb( |
| 793 | {"want": want, "have": have, "ttl_seconds": ttl_seconds}, |
| 794 | use_bin_type=True, |
| 795 | ) |
| 796 | req = self._build_request("POST", endpoint, signing, body_bytes) |
| 797 | |
| 798 | import sys as _sys |
| 799 | t0 = _time_mod.monotonic() |
| 800 | print(f"[transport] POST {endpoint} want={len(want)} have={len(have)}", file=_sys.stderr, flush=True) |
| 801 | try: |
| 802 | raw_post = _urllib_do("POST", endpoint, dict(req.headers), body_bytes, follow_redirects=True) |
| 803 | t_post = _time_mod.monotonic() |
| 804 | print( |
| 805 | f"[transport] POST done status=200 body={len(raw_post)}B t={int((t_post - t0)*1000)}ms", |
| 806 | file=_sys.stderr, flush=True, |
| 807 | ) |
| 808 | |
| 809 | data = msgpack.unpackb(raw_post, raw=False) |
| 810 | bundle_id: str = str(data.get("mpack_id") or data.get("bundle_id") or "") |
| 811 | mpack_url_raw: str = str(data.get("mpack_url") or "") |
| 812 | t_parse = _time_mod.monotonic() |
| 813 | print( |
| 814 | f"[transport] parsed bundle_id={bundle_id[:20] if bundle_id else 'none'}" |
| 815 | f" mpack_url={mpack_url_raw[:100] if mpack_url_raw else 'null'}" |
| 816 | f" t={int((t_parse - t0)*1000)}ms", |
| 817 | file=_sys.stderr, flush=True, |
| 818 | ) |
| 819 | |
| 820 | if mpack_url_raw: |
| 821 | print(f"[transport] GET mpack {mpack_url_raw[:120]}", file=_sys.stderr, flush=True) |
| 822 | mpack_bytes_dl = _urllib_do("GET", mpack_url_raw, {}, follow_redirects=True, max_bytes=None) |
| 823 | t_dl = _time_mod.monotonic() |
| 824 | dl_mb = len(mpack_bytes_dl) / (1024 * 1024) |
| 825 | print( |
| 826 | f"[transport] GET done status=200 size={dl_mb:.2f}MB" |
| 827 | f" t={int((t_dl - t_parse)*1000)}ms", |
| 828 | file=_sys.stderr, flush=True, |
| 829 | ) |
| 830 | actual_id = blob_id(mpack_bytes_dl) |
| 831 | t_verify = _time_mod.monotonic() |
| 832 | print( |
| 833 | f"[transport] sha256 match={actual_id == bundle_id} t={int((t_verify - t_dl)*1000)}ms", |
| 834 | file=_sys.stderr, flush=True, |
| 835 | ) |
| 836 | if actual_id != bundle_id: |
| 837 | raise TransportError( |
| 838 | f"fetch/mpack: integrity failure mpack_id={bundle_id!r} " |
| 839 | f"actual={actual_id!r}", |
| 840 | 0, |
| 841 | ) |
| 842 | if mpack_bytes_dl[:4] == b"MUSE": |
| 843 | from muse.core.mpack import parse_wire_mpack |
| 844 | mpack = parse_wire_mpack(mpack_bytes_dl) |
| 845 | else: |
| 846 | mpack = msgpack.unpackb(mpack_bytes_dl, raw=False) |
| 847 | t_unpack = _time_mod.monotonic() |
| 848 | print( |
| 849 | f"[transport] unpackb commits={len(mpack.get('commits') or [])} snaps={len(mpack.get('snapshots') or [])}" |
| 850 | f" blobs={len(mpack.get('blobs') or [])} t={int((t_unpack - t_verify)*1000)}ms", |
| 851 | file=_sys.stderr, flush=True, |
| 852 | ) |
| 853 | else: |
| 854 | # mpack_url absent or null → all data is inline in the response body |
| 855 | # (Phase 3 inline protocol — commits and snapshots in resp.content already parsed). |
| 856 | print("[transport] no mpack_url — inline protocol (no S3 download)", file=_sys.stderr, flush=True) |
| 857 | mpack = {"commits": [], "snapshots": [], "blobs": [], "branch_heads": {}} |
| 858 | t_unpack = _time_mod.monotonic() |
| 859 | |
| 860 | except TransportError: |
| 861 | raise |
| 862 | except Exception as exc: |
| 863 | raise TransportError(sanitize_display(str(exc)), 0) from exc |
| 864 | |
| 865 | raw_commits = mpack.get("commits") or [] |
| 866 | raw_snaps = mpack.get("snapshots") or [] |
| 867 | raw_blobs = mpack.get("blobs") or [] |
| 868 | raw_heads = data.get("branch_heads") or mpack.get("branch_heads") or {} |
| 869 | |
| 870 | t_coerce0 = _time_mod.monotonic() |
| 871 | commits: list[CommitDict] = [ |
| 872 | _coerce_commit_dict(dict(c)) for c in raw_commits if isinstance(c, dict) |
| 873 | ] |
| 874 | snapshots: list[SnapshotDict] = [ |
| 875 | _coerce_snapshot_dict(dict(s)) for s in raw_snaps if isinstance(s, dict) |
| 876 | ] |
| 877 | branch_heads: BranchHeads = { |
| 878 | str(k): str(v) for k, v in raw_heads.items() |
| 879 | if isinstance(k, str) and isinstance(v, str) |
| 880 | } |
| 881 | blobs: list[dict] = [obj for obj in raw_blobs if isinstance(obj, dict)] |
| 882 | t_coerce1 = _time_mod.monotonic() |
| 883 | print( |
| 884 | f"[transport] coerce commits={len(commits)} snaps={len(snapshots)} blobs={len(blobs)} heads={len(branch_heads)}" |
| 885 | f" t={int((t_coerce1 - t_coerce0)*1000)}ms TOTAL={int((t_coerce1 - t0)*1000)}ms", |
| 886 | file=_sys.stderr, flush=True, |
| 887 | ) |
| 888 | |
| 889 | return FetchMPackResult( |
| 890 | repo_id=str(data.get("repo_id") or ""), |
| 891 | domain=str(data.get("domain") or ""), |
| 892 | default_branch=str(data.get("default_branch") or "main"), |
| 893 | branch_heads=branch_heads, |
| 894 | commits=commits, |
| 895 | snapshots=snapshots, |
| 896 | blobs=blobs, |
| 897 | blobs_received=len(blobs), |
| 898 | shallow_commits=[], |
| 899 | ) |
| 900 | |
| 901 | def push_tags( |
| 902 | self, |
| 903 | url: str, |
| 904 | signing: SigningIdentity | None, |
| 905 | tags: list[WireTag], |
| 906 | ) -> int: |
| 907 | """Push tags via ``POST {url}/tags``.""" |
| 908 | endpoint = f"{url.rstrip('/')}/tags" |
| 909 | logger.debug("transport: POST %s (tags=%d)", endpoint, len(tags)) |
| 910 | body_bytes: bytes = msgpack.packb({"tags": list(tags)}, use_bin_type=True) |
| 911 | req = self._build_request("POST", endpoint, signing, body_bytes) |
| 912 | raw = self._execute(req) |
| 913 | parsed = self._decode(raw) |
| 914 | stored_val = parsed.get("stored") |
| 915 | return int(stored_val) if isinstance(stored_val, int) else 0 |
| 916 | |
| 917 | def create_release( |
| 918 | self, |
| 919 | url: str, |
| 920 | signing: SigningIdentity | None, |
| 921 | release: ReleaseDict, |
| 922 | ) -> str: |
| 923 | """Create a release via ``POST {url}/releases``.""" |
| 924 | endpoint = f"{url.rstrip('/')}/releases" |
| 925 | logger.debug("transport: POST %s (tag=%s)", endpoint, release.get("tag", "")) |
| 926 | # ReleaseDict contains no bytes fields so JSON encoding works directly. |
| 927 | body_bytes: bytes = json.dumps(release).encode("utf-8") |
| 928 | req = self._build_request("POST", endpoint, signing, body_bytes, content_type="application/json") |
| 929 | raw = self._execute(req) |
| 930 | parsed = self._decode(raw) |
| 931 | release_id_val = parsed.get("release_id") |
| 932 | return str(release_id_val) if isinstance(release_id_val, str) else "" |
| 933 | |
| 934 | def list_releases_remote( |
| 935 | self, |
| 936 | url: str, |
| 937 | signing: SigningIdentity | None, |
| 938 | channel: str | None = None, |
| 939 | include_drafts: bool = False, |
| 940 | ) -> list[ReleaseDict]: |
| 941 | """List releases via ``GET {url}/releases``.""" |
| 942 | qs_parts: list[str] = [] |
| 943 | if channel: |
| 944 | qs_parts.append(f"channel={urllib.parse.quote(channel)}") |
| 945 | if include_drafts: |
| 946 | qs_parts.append("include_drafts=1") |
| 947 | endpoint = f"{url.rstrip('/')}/releases" |
| 948 | if qs_parts: |
| 949 | endpoint = f"{endpoint}?{'&'.join(qs_parts)}" |
| 950 | logger.debug("transport: GET %s", endpoint) |
| 951 | req = self._build_request("GET", endpoint, signing) |
| 952 | raw = self._execute(req) |
| 953 | parsed = self._decode(raw) |
| 954 | return _parse_releases_list(parsed) |
| 955 | |
| 956 | def delete_release_remote( |
| 957 | self, |
| 958 | url: str, |
| 959 | signing: SigningIdentity | None, |
| 960 | tag: str, |
| 961 | ) -> None: |
| 962 | """Retract a release via ``DELETE {url}/releases/{tag}``.""" |
| 963 | endpoint = f"{url.rstrip('/')}/releases/{urllib.parse.quote(tag, safe='')}" |
| 964 | logger.debug("transport: DELETE %s", endpoint) |
| 965 | req = self._build_request("DELETE", endpoint, signing) |
| 966 | self._execute(req) |
| 967 | |
| 968 | def delete_branch_remote( |
| 969 | self, |
| 970 | url: str, |
| 971 | signing: SigningIdentity | None, |
| 972 | branch: str, |
| 973 | ) -> None: |
| 974 | """Delete a branch via ``DELETE {url}/branches/{branch}``.""" |
| 975 | endpoint = f"{url.rstrip('/')}/branches/{urllib.parse.quote(branch, safe='')}" |
| 976 | logger.debug("transport: DELETE %s", endpoint) |
| 977 | req = self._build_request("DELETE", endpoint, signing) |
| 978 | self._execute(req) |
| 979 | |
| 980 | # --------------------------------------------------------------------------- |
| 981 | # Response parsers — JSON bytes → typed TypedDicts |
| 982 | # --------------------------------------------------------------------------- |
| 983 | # json.loads() returns Any (per typeshed), so we use isinstance narrowing |
| 984 | # throughout. No explicit Any annotations appear in this file. |
| 985 | # --------------------------------------------------------------------------- |
| 986 | |
| 987 | def _parse_remote_info(raw: bytes) -> RemoteInfo: |
| 988 | """Parse ``GET /refs`` response bytes into a :class:`~muse.core.mpack.RemoteInfo`.""" |
| 989 | parsed = HttpTransport._decode(raw) |
| 990 | repo_id_val = parsed.get("repo_id") |
| 991 | domain_val = parsed.get("domain") |
| 992 | default_branch_val = parsed.get("default_branch") |
| 993 | branch_heads_raw = parsed.get("branch_heads") |
| 994 | branch_heads: BranchHeads = {} |
| 995 | if isinstance(branch_heads_raw, dict): |
| 996 | for k, v in branch_heads_raw.items(): |
| 997 | if isinstance(k, str) and isinstance(v, str): |
| 998 | branch_heads[k] = v |
| 999 | info = RemoteInfo( |
| 1000 | repo_id=str(repo_id_val) if isinstance(repo_id_val, str) else "", |
| 1001 | domain=str(domain_val) if isinstance(domain_val, str) else "midi", |
| 1002 | default_branch=( |
| 1003 | str(default_branch_val) if isinstance(default_branch_val, str) else "main" |
| 1004 | ), |
| 1005 | branch_heads=branch_heads, |
| 1006 | ) |
| 1007 | return info |
| 1008 | |
| 1009 | def _parse_mpack(raw: bytes) -> MPack: |
| 1010 | """Parse ``POST /fetch`` response bytes into a :class:`~muse.core.mpack.MPack`.""" |
| 1011 | parsed = HttpTransport._decode(raw) |
| 1012 | mpack: MPack = {} |
| 1013 | |
| 1014 | # Commits — each item is a raw dict that CommitRecord.from_dict() will validate. |
| 1015 | commits_raw = parsed.get("commits") |
| 1016 | if isinstance(commits_raw, list): |
| 1017 | commits: list[CommitDict] = [] |
| 1018 | for item in commits_raw: |
| 1019 | if isinstance(item, dict): |
| 1020 | commits.append(_coerce_commit_dict(item)) |
| 1021 | mpack["commits"] = commits |
| 1022 | |
| 1023 | # Snapshots |
| 1024 | snapshots_raw = parsed.get("snapshots") |
| 1025 | if isinstance(snapshots_raw, list): |
| 1026 | snapshots: list[SnapshotDict] = [] |
| 1027 | for item in snapshots_raw: |
| 1028 | if isinstance(item, dict): |
| 1029 | snapshots.append(_coerce_snapshot_dict(item)) |
| 1030 | mpack["snapshots"] = snapshots |
| 1031 | |
| 1032 | # Blobs — raw bytes in "content" (mpack wire format) |
| 1033 | blobs_raw = parsed.get("blobs") |
| 1034 | if isinstance(blobs_raw, list): |
| 1035 | blobs: list[BlobPayload] = [] |
| 1036 | for item in blobs_raw: |
| 1037 | if not isinstance(item, dict): |
| 1038 | continue |
| 1039 | oid = item.get("object_id") |
| 1040 | if not isinstance(oid, str): |
| 1041 | continue |
| 1042 | content_raw = item.get("content") |
| 1043 | if isinstance(content_raw, (bytes, bytearray)): |
| 1044 | blobs.append(BlobPayload(object_id=oid, content=bytes(content_raw))) |
| 1045 | mpack["blobs"] = blobs |
| 1046 | |
| 1047 | # Branch heads |
| 1048 | heads_raw = parsed.get("branch_heads") |
| 1049 | if isinstance(heads_raw, dict): |
| 1050 | branch_heads: BranchHeads = {} |
| 1051 | for k, v in heads_raw.items(): |
| 1052 | if isinstance(k, str) and isinstance(v, str): |
| 1053 | branch_heads[k] = v |
| 1054 | mpack["branch_heads"] = branch_heads |
| 1055 | |
| 1056 | return mpack |
| 1057 | |
| 1058 | def _parse_push_result(raw: bytes) -> PushResult: |
| 1059 | """Parse ``POST /push`` response bytes into a :class:`~muse.core.mpack.PushResult`.""" |
| 1060 | parsed = HttpTransport._decode(raw) |
| 1061 | ok_val = parsed.get("ok") |
| 1062 | msg_val = parsed.get("message") |
| 1063 | heads_raw = parsed.get("branch_heads") |
| 1064 | branch_heads: BranchHeads = {} |
| 1065 | if isinstance(heads_raw, dict): |
| 1066 | for k, v in heads_raw.items(): |
| 1067 | if isinstance(k, str) and isinstance(v, str): |
| 1068 | branch_heads[k] = v |
| 1069 | return PushResult( |
| 1070 | ok=bool(ok_val) if isinstance(ok_val, bool) else False, |
| 1071 | message=str(msg_val) if isinstance(msg_val, str) else "", |
| 1072 | branch_heads=branch_heads, |
| 1073 | ) |
| 1074 | |
| 1075 | def _coerce_sem_ver_bump(raw: _MsgVal) -> SemVerBump: |
| 1076 | """Safely coerce a raw value to a :class:`~muse.domain.SemVerBump` literal.""" |
| 1077 | if raw == "major": |
| 1078 | return "major" |
| 1079 | if raw == "minor": |
| 1080 | return "minor" |
| 1081 | if raw == "patch": |
| 1082 | return "patch" |
| 1083 | return "none" |
| 1084 | |
| 1085 | def _parse_releases_list(parsed: _MsgDict) -> list[ReleaseDict]: |
| 1086 | """Extract a list of :class:`~muse.core.store.ReleaseDict` from a parsed response.""" |
| 1087 | releases_raw = parsed.get("releases") |
| 1088 | if not isinstance(releases_raw, list): |
| 1089 | return [] |
| 1090 | results: list[ReleaseDict] = [] |
| 1091 | for item in releases_raw: |
| 1092 | if not isinstance(item, dict): |
| 1093 | continue |
| 1094 | try: |
| 1095 | semver_raw = item.get("semver") |
| 1096 | if isinstance(semver_raw, dict): |
| 1097 | sv_major = semver_raw.get("major") |
| 1098 | sv_minor = semver_raw.get("minor") |
| 1099 | sv_patch = semver_raw.get("patch") |
| 1100 | sv_pre = semver_raw.get("pre") |
| 1101 | sv_build = semver_raw.get("build") |
| 1102 | semver: SemVerTag = SemVerTag( |
| 1103 | major=int(sv_major) if isinstance(sv_major, int) else 0, |
| 1104 | minor=int(sv_minor) if isinstance(sv_minor, int) else 0, |
| 1105 | patch=int(sv_patch) if isinstance(sv_patch, int) else 0, |
| 1106 | pre=sv_pre if isinstance(sv_pre, str) else "", |
| 1107 | build=sv_build if isinstance(sv_build, str) else "", |
| 1108 | ) |
| 1109 | else: |
| 1110 | semver = SemVerTag(major=0, minor=0, patch=0, pre="", build="") |
| 1111 | changelog_raw = item.get("changelog") or [] |
| 1112 | changelog: list[ChangelogEntry] = [] |
| 1113 | if isinstance(changelog_raw, list): |
| 1114 | for entry in changelog_raw: |
| 1115 | if not isinstance(entry, dict): |
| 1116 | continue |
| 1117 | bc_raw = entry.get("breaking_changes") |
| 1118 | bc_list: list[str] = [str(b) for b in bc_raw if isinstance(b, str)] if isinstance(bc_raw, list) else [] |
| 1119 | changelog.append(ChangelogEntry( |
| 1120 | commit_id=str(entry.get("commit_id", "")), |
| 1121 | message=str(entry.get("message", "")), |
| 1122 | sem_ver_bump=_coerce_sem_ver_bump(entry.get("sem_ver_bump")), |
| 1123 | breaking_changes=bc_list, |
| 1124 | author=str(entry.get("author", "")), |
| 1125 | committed_at=str(entry.get("committed_at", "")), |
| 1126 | agent_id=str(entry.get("agent_id", "")), |
| 1127 | model_id=str(entry.get("model_id", "")), |
| 1128 | )) |
| 1129 | results.append(ReleaseDict( |
| 1130 | release_id=str(item.get("release_id", "")), |
| 1131 | repo_id=str(item.get("repo_id", "")), |
| 1132 | tag=str(item.get("tag", "")), |
| 1133 | semver=semver, |
| 1134 | channel=str(item.get("channel", "stable")), |
| 1135 | commit_id=str(item.get("commit_id", "")), |
| 1136 | snapshot_id=str(item.get("snapshot_id", "")), |
| 1137 | title=str(item.get("title", "")), |
| 1138 | body=str(item.get("body", "")), |
| 1139 | changelog=changelog, |
| 1140 | agent_id=str(item.get("agent_id", "")), |
| 1141 | model_id=str(item.get("model_id", "")), |
| 1142 | is_draft=bool(item.get("is_draft", False)), |
| 1143 | gpg_signature=str(item.get("gpg_signature", "")), |
| 1144 | created_at=str(item.get("created_at", "")), |
| 1145 | )) |
| 1146 | except (KeyError, TypeError, ValueError): |
| 1147 | continue |
| 1148 | return results |
| 1149 | |
| 1150 | # --------------------------------------------------------------------------- |
| 1151 | # TypedDict coercion helpers — extract known string fields from raw JSON dicts |
| 1152 | # --------------------------------------------------------------------------- |
| 1153 | # CommitDict and SnapshotDict are total=False (all fields optional), so we |
| 1154 | # only extract the string/scalar fields we can safely validate here. |
| 1155 | # CommitRecord.from_dict() and SnapshotRecord.from_dict() re-validate |
| 1156 | # required fields when apply_mpack() calls them. |
| 1157 | # --------------------------------------------------------------------------- |
| 1158 | |
| 1159 | def _str(val: _WireVal) -> str: |
| 1160 | """Return *val* as str, or empty string if not a str.""" |
| 1161 | return val if isinstance(val, str) else "" |
| 1162 | |
| 1163 | def _str_or_none(val: _WireVal) -> str | None: |
| 1164 | """Return *val* as str, or None if not a str.""" |
| 1165 | return val if isinstance(val, str) else None |
| 1166 | |
| 1167 | def _int_or(val: _WireVal, default: int) -> int: |
| 1168 | """Return *val* as int, or *default* if not an int.""" |
| 1169 | return val if isinstance(val, int) else default |
| 1170 | |
| 1171 | def _coerce_commit_dict(raw: _WireDict) -> CommitDict: |
| 1172 | """Extract typed scalar fields from *raw* into a :class:`~muse.core.store.CommitDict`. |
| 1173 | |
| 1174 | Only primitive fields are validated here; ``structured_delta`` is |
| 1175 | preserved as-is because :class:`~muse.core.store.CommitRecord.from_dict` |
| 1176 | already handles it gracefully. |
| 1177 | """ |
| 1178 | metadata_raw = raw.get("metadata") |
| 1179 | metadata: Metadata = {} |
| 1180 | if isinstance(metadata_raw, dict): |
| 1181 | for k, v in metadata_raw.items(): |
| 1182 | if isinstance(k, str) and isinstance(v, str): |
| 1183 | metadata[k] = v |
| 1184 | |
| 1185 | reviewed_by_raw = raw.get("reviewed_by") |
| 1186 | reviewed_by: list[str] = [] |
| 1187 | if isinstance(reviewed_by_raw, list): |
| 1188 | for item in reviewed_by_raw: |
| 1189 | if isinstance(item, str): |
| 1190 | reviewed_by.append(item) |
| 1191 | |
| 1192 | breaking_changes_raw = raw.get("breaking_changes") |
| 1193 | breaking_changes: list[str] = [] |
| 1194 | if isinstance(breaking_changes_raw, list): |
| 1195 | for item in breaking_changes_raw: |
| 1196 | if isinstance(item, str): |
| 1197 | breaking_changes.append(item) |
| 1198 | |
| 1199 | sem_ver_raw = raw.get("sem_ver_bump") |
| 1200 | sem_ver: SemVerBump |
| 1201 | if sem_ver_raw == "major": |
| 1202 | sem_ver = "major" |
| 1203 | elif sem_ver_raw == "minor": |
| 1204 | sem_ver = "minor" |
| 1205 | elif sem_ver_raw == "patch": |
| 1206 | sem_ver = "patch" |
| 1207 | else: |
| 1208 | sem_ver = "none" |
| 1209 | |
| 1210 | return CommitDict( |
| 1211 | commit_id=_str(raw.get("commit_id")), |
| 1212 | repo_id=_str(raw.get("repo_id")), |
| 1213 | branch=_str(raw.get("branch") or raw.get("created_on_branch")), |
| 1214 | snapshot_id=_str(raw.get("snapshot_id")), |
| 1215 | message=_str(raw.get("message")), |
| 1216 | committed_at=_str(raw.get("committed_at")), |
| 1217 | parent_commit_id=_str_or_none(raw.get("parent_commit_id")), |
| 1218 | parent2_commit_id=_str_or_none(raw.get("parent2_commit_id")), |
| 1219 | author=_str(raw.get("author")), |
| 1220 | metadata=metadata, |
| 1221 | structured_delta=None, |
| 1222 | sem_ver_bump=sem_ver, |
| 1223 | breaking_changes=breaking_changes, |
| 1224 | agent_id=_str(raw.get("agent_id")), |
| 1225 | model_id=_str(raw.get("model_id")), |
| 1226 | toolchain_id=_str(raw.get("toolchain_id")), |
| 1227 | prompt_hash=_str(raw.get("prompt_hash")), |
| 1228 | signature=_str(raw.get("signature")), |
| 1229 | signer_public_key=_str(raw.get("signer_public_key")), |
| 1230 | signer_key_id=_str(raw.get("signer_key_id")), |
| 1231 | reviewed_by=reviewed_by, |
| 1232 | test_runs=_int_or(raw.get("test_runs"), 0), |
| 1233 | ) |
| 1234 | |
| 1235 | def _coerce_snapshot_dict(raw: _WireDict) -> SnapshotDict: |
| 1236 | """Extract typed fields from *raw* into a :class:`~muse.core.store.SnapshotDict`. |
| 1237 | |
| 1238 | Two wire formats are accepted: |
| 1239 | 1. Delta format (fetch mpack): {snapshot_id, parent_snapshot_id, |
| 1240 | delta_upsert, delta_remove} — pass through delta fields so |
| 1241 | _apply_snapshot_deltas can reconstruct the manifest. |
| 1242 | 2. Full-manifest format: {snapshot_id, manifest} — coerce into |
| 1243 | SnapshotDict directly. |
| 1244 | """ |
| 1245 | manifest_raw = raw.get("manifest") |
| 1246 | delta_upsert_raw = raw.get("delta_upsert") |
| 1247 | |
| 1248 | # Delta format: no manifest, but delta_upsert present. |
| 1249 | # _apply_snapshot_deltas handles reconstruction; pass through fields as-is. |
| 1250 | if manifest_raw is None and isinstance(delta_upsert_raw, dict): |
| 1251 | directories_raw = raw.get("directories") |
| 1252 | return { # type: ignore[return-value] |
| 1253 | "snapshot_id": _str(raw.get("snapshot_id")), |
| 1254 | "parent_snapshot_id": _str_or_none(raw.get("parent_snapshot_id")), |
| 1255 | "delta_upsert": { |
| 1256 | str(k): str(v) for k, v in delta_upsert_raw.items() |
| 1257 | if isinstance(k, str) and isinstance(v, str) |
| 1258 | }, |
| 1259 | "delta_remove": [ |
| 1260 | str(p) for p in (raw.get("delta_remove") or []) |
| 1261 | if isinstance(p, str) |
| 1262 | ], |
| 1263 | "directories": [ |
| 1264 | str(d) for d in (directories_raw or []) if isinstance(d, str) |
| 1265 | ] if isinstance(directories_raw, list) else [], |
| 1266 | "created_at": _str(raw.get("created_at")), |
| 1267 | "manifest": None, # type: ignore[typeddict-item] |
| 1268 | "note": "", |
| 1269 | } |
| 1270 | |
| 1271 | # Full-manifest format. |
| 1272 | manifest: Manifest = {} |
| 1273 | if isinstance(manifest_raw, dict): |
| 1274 | for k, v in manifest_raw.items(): |
| 1275 | if isinstance(k, str) and isinstance(v, str): |
| 1276 | manifest[k] = v |
| 1277 | # ``directories`` is hashed into snapshot_id — dropping it produces a |
| 1278 | # SnapshotRecord whose snapshot_id never matches the recomputed hash, |
| 1279 | # making every snapshot with non-empty directories permanently unreadable. |
| 1280 | directories_raw = raw.get("directories") |
| 1281 | directories: list[str] = [] |
| 1282 | if isinstance(directories_raw, list): |
| 1283 | for item in directories_raw: |
| 1284 | if isinstance(item, str): |
| 1285 | directories.append(item) |
| 1286 | return SnapshotDict( |
| 1287 | snapshot_id=_str(raw.get("snapshot_id")), |
| 1288 | manifest=manifest, |
| 1289 | directories=directories, |
| 1290 | created_at=_str(raw.get("created_at")), |
| 1291 | note=_str(raw.get("note", "")), |
| 1292 | ) |
| 1293 | |
| 1294 | |
| 1295 | # --------------------------------------------------------------------------- |
| 1296 | # Factory |
| 1297 | # --------------------------------------------------------------------------- |
| 1298 | |
| 1299 | def make_transport(url: str) -> "HttpTransport": |
| 1300 | """Return an :class:`HttpTransport` for *url*. |
| 1301 | |
| 1302 | Args: |
| 1303 | url: Remote repository URL. |
| 1304 | |
| 1305 | Returns: |
| 1306 | An ``HttpTransport`` instance implementing :class:`MuseTransport`. |
| 1307 | """ |
| 1308 | return HttpTransport() |
File History
8 commits
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
23 hours ago
sha256:6b91dc90e1c59c5209b764a276c1ee824bd369320a454a403d28ce79890103f2
fix: stream mpack downloads without size cap
Sonnet 4.6
minor
⚠
5 days ago
sha256:f6cd81bc71702f5c1c6890bd39aaba994fe58c75f019d7c03934724fa2739bb4
fix: carry dev changes harmony dropped in merge — detached …
Sonnet 4.6
minor
⚠
10 days ago
sha256:b37d33a9dbf176b32955b34e5c9e983c4d8c6e7bfa4e2714edc938f31e721561
update transport logs
Human
minor
⚠
11 days ago
sha256:79ffe87f5fe2ec146e35f05521218bbf54dffdb0440c07f970bad05f16efb89f
chore: merge main — carry all urllib/typing/test fixes from dev
Sonnet 4.6
minor
⚠
13 days ago
sha256:0bea7600d1eee83e87950be49933b1006fa9dc2c71e7c4ee748d324f61138156
chore: bump version to 0.2.0rc11; fix typing audit violatio…
Sonnet 4.6
minor
⚠
13 days ago
sha256:633dfa2940e97bf1a3d04996c772027a57d70d103f1693c96da04969613dba6c
fix: urllib migration regressions — force flag, job_id, Con…
Sonnet 4.6
minor
⚠
13 days ago
sha256:00cec040ce5f70bf8191d2ce6a9f308fbde553911068f0c303217f4eb6d4e775
fix: migrate httpx → urllib in transport.py and push.py; fi…
Sonnet 4.6
minor
⚠
13 days ago