archive.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
7 days ago
| 1 | """``muse archive`` — export a snapshot as a portable archive. |
| 2 | |
| 3 | Creates a ``tar.gz`` or ``zip`` archive from any historical snapshot — |
| 4 | HEAD by default. The archive contains only the tracked files (the contents |
| 5 | of the snapshot at that point in time), making it the canonical way to |
| 6 | distribute a specific version without exposing ``.muse/`` internals. |
| 7 | |
| 8 | Commit reference |
| 9 | ---------------- |
| 10 | ``--ref`` accepts any reference understood by ``resolve_commit_ref``: |
| 11 | |
| 12 | - Omitted or ``HEAD`` — the most recent commit on the current branch. |
| 13 | - A branch name — the tip commit of that branch. |
| 14 | - ``HEAD~N`` — *N* first-parent steps back from HEAD. |
| 15 | - A full or abbreviated commit SHA. |
| 16 | |
| 17 | Formats |
| 18 | ------- |
| 19 | - ``tar.gz`` (default) — gzip-compressed POSIX tar. |
| 20 | - ``zip`` — Deflate-compressed ZIP. |
| 21 | |
| 22 | Security model |
| 23 | -------------- |
| 24 | - Every archive entry name is validated by ``_safe_arcname`` before being |
| 25 | written. Entries with ``..`` path segments, absolute paths, or null bytes |
| 26 | are silently skipped with a warning — this prevents both zip-slip and |
| 27 | tar-slip path-traversal attacks regardless of what is stored in a snapshot. |
| 28 | - ``--prefix`` is validated up-front for ``..`` segments so users get a clear |
| 29 | error before any I/O begins. |
| 30 | - ``--output`` paths that would write outside the current directory are |
| 31 | permitted (agents often write to ``/tmp/`` or explicit destinations), but |
| 32 | the directory must already exist. |
| 33 | - All user-supplied strings are sanitized via ``sanitize_display()`` before |
| 34 | appearing in human-readable terminal output. |
| 35 | - All error messages go to **stderr**; **stdout** carries only data. |
| 36 | |
| 37 | Agent UX |
| 38 | -------- |
| 39 | Pass ``--json`` for a machine-readable result. Pass ``--list`` to preview |
| 40 | what would be archived without writing anything to disk — useful for agents |
| 41 | that need to reason about snapshot contents before committing to a file. |
| 42 | |
| 43 | Usage:: |
| 44 | |
| 45 | muse archive # HEAD → <sha12>.tar.gz |
| 46 | muse archive --ref feat/audio # branch tip |
| 47 | muse archive --ref a1b2c3d4 # commit SHA prefix |
| 48 | muse archive --format zip # zip instead of tar.gz |
| 49 | muse archive --output release-v1.0.zip # custom output path |
| 50 | muse archive --prefix myproject/ # directory prefix inside archive |
| 51 | muse archive --list # preview without writing |
| 52 | muse archive --list --json # agent-readable manifest |
| 53 | muse archive --json # machine-readable result |
| 54 | |
| 55 | JSON schema — normal output (``--json``):: |
| 56 | |
| 57 | { |
| 58 | "path": "<output file path>", |
| 59 | "format": "tar.gz" | "zip", |
| 60 | "file_count": <int>, |
| 61 | "bytes": <int>, |
| 62 | "commit_id": "<sha256:…>", |
| 63 | "snapshot_id": "<sha256:…>", |
| 64 | "message": "<commit message>", |
| 65 | "branch": "<branch name>", |
| 66 | "author": "<author>", |
| 67 | "agent_id": "<agent id or empty>", |
| 68 | "model_id": "<model id or empty>", |
| 69 | "committed_at": "<ISO-8601>", |
| 70 | "ref": "<ref used or null>", |
| 71 | "prefix": "<directory prefix or empty>" |
| 72 | } |
| 73 | |
| 74 | JSON schema — list mode (``--list --json``):: |
| 75 | |
| 76 | { |
| 77 | "commit_id": "<sha256:…>", |
| 78 | "snapshot_id": "<sha256:…>", |
| 79 | "message": "<commit message>", |
| 80 | "branch": "<branch name>", |
| 81 | "author": "<author>", |
| 82 | "committed_at": "<ISO-8601>", |
| 83 | "ref": "<ref used or null>", |
| 84 | "prefix": "<directory prefix or empty>", |
| 85 | "file_count": <int>, |
| 86 | "entries": [ |
| 87 | {"path": "<archive path>", "object_id": "<sha256:…>"}, |
| 88 | ... |
| 89 | ] |
| 90 | } |
| 91 | |
| 92 | Exit codes |
| 93 | ---------- |
| 94 | - 0 — success |
| 95 | - 1 — bad arguments (bad format, bad prefix, missing commit, output dir missing) |
| 96 | - 2 — not inside a Muse repository |
| 97 | - 3 — internal error (snapshot or object data missing) |
| 98 | """ |
| 99 | |
| 100 | import argparse |
| 101 | import json |
| 102 | import logging |
| 103 | import pathlib |
| 104 | import sys |
| 105 | import tarfile |
| 106 | import zipfile |
| 107 | from typing import TypedDict |
| 108 | |
| 109 | from muse.core.types import split_id |
| 110 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 111 | from muse.core.errors import ExitCode |
| 112 | from muse.core.object_store import object_path, read_object |
| 113 | from muse.core.repo import require_repo |
| 114 | from muse.core.timing import start_timer |
| 115 | from muse.core.refs import ( |
| 116 | get_head_commit_id, |
| 117 | read_current_branch, |
| 118 | ) |
| 119 | from muse.core.commits import ( |
| 120 | read_commit, |
| 121 | resolve_commit_ref, |
| 122 | ) |
| 123 | from muse.core.snapshots import read_snapshot |
| 124 | from muse.core.validation import sanitize_display |
| 125 | from muse.core.types import Manifest |
| 126 | |
| 127 | logger = logging.getLogger(__name__) |
| 128 | |
| 129 | _FORMAT_CHOICES = {"tar.gz", "zip"} |
| 130 | |
| 131 | # --------------------------------------------------------------------------- |
| 132 | # Typed JSON schemas |
| 133 | # --------------------------------------------------------------------------- |
| 134 | |
| 135 | class _ArchiveJson(EnvelopeJson): |
| 136 | """Machine-readable output of ``muse archive --json`` (write mode). |
| 137 | |
| 138 | Fields |
| 139 | ------ |
| 140 | path |
| 141 | Absolute or relative path to the archive file that was written. |
| 142 | format |
| 143 | ``"tar.gz"`` or ``"zip"``. |
| 144 | file_count |
| 145 | Number of files successfully written into the archive. |
| 146 | bytes |
| 147 | Size of the archive file on disk in bytes. |
| 148 | commit_id |
| 149 | Full ``sha256:…`` commit ID that was archived. |
| 150 | snapshot_id |
| 151 | Full ``sha256:…`` snapshot ID — the content-addressed tree at that commit. |
| 152 | message |
| 153 | Commit message. |
| 154 | branch |
| 155 | Branch that was current when the archive was created. |
| 156 | author |
| 157 | Author field from the commit record. |
| 158 | agent_id |
| 159 | Agent identity string (empty for human commits). |
| 160 | model_id |
| 161 | Model identifier (empty for human commits). |
| 162 | committed_at |
| 163 | ISO-8601 commit timestamp. |
| 164 | ref |
| 165 | The ``--ref`` value passed by the caller, or ``null`` for HEAD. |
| 166 | prefix |
| 167 | The ``--prefix`` value used (empty string if none). |
| 168 | """ |
| 169 | |
| 170 | path: str |
| 171 | format: str |
| 172 | file_count: int |
| 173 | bytes: int |
| 174 | commit_id: str |
| 175 | snapshot_id: str |
| 176 | message: str |
| 177 | branch: str |
| 178 | author: str |
| 179 | agent_id: str |
| 180 | model_id: str |
| 181 | committed_at: str |
| 182 | ref: str | None |
| 183 | prefix: str |
| 184 | |
| 185 | class _ListEntryJson(TypedDict): |
| 186 | """One file entry in the ``--list --json`` output.""" |
| 187 | |
| 188 | path: str |
| 189 | object_id: str |
| 190 | |
| 191 | class _ListJson(EnvelopeJson): |
| 192 | """Machine-readable output of ``muse archive --list --json``. |
| 193 | |
| 194 | Fields |
| 195 | ------ |
| 196 | commit_id |
| 197 | Full ``sha256:…`` commit ID. |
| 198 | snapshot_id |
| 199 | Full ``sha256:…`` snapshot ID. |
| 200 | message |
| 201 | Commit message. |
| 202 | branch |
| 203 | Current branch name. |
| 204 | author |
| 205 | Author field from the commit record. |
| 206 | committed_at |
| 207 | ISO-8601 commit timestamp. |
| 208 | ref |
| 209 | ``--ref`` value passed by the caller, or ``null`` for HEAD. |
| 210 | prefix |
| 211 | ``--prefix`` value used (empty string if none). |
| 212 | file_count |
| 213 | Total number of entries that would be written. |
| 214 | entries |
| 215 | Ordered list of ``{"path": <archive path>, "object_id": <sha256:…>}`` |
| 216 | dicts — one per file, sorted by archive path. |
| 217 | """ |
| 218 | |
| 219 | commit_id: str |
| 220 | snapshot_id: str |
| 221 | message: str |
| 222 | branch: str |
| 223 | author: str |
| 224 | committed_at: str |
| 225 | ref: str | None |
| 226 | prefix: str |
| 227 | file_count: int |
| 228 | entries: list[_ListEntryJson] |
| 229 | |
| 230 | # --------------------------------------------------------------------------- |
| 231 | # Path safety |
| 232 | # --------------------------------------------------------------------------- |
| 233 | |
| 234 | def _safe_arcname(prefix: str, rel_path: str) -> str | None: |
| 235 | """Build a safe archive entry name, guarding against zip-slip and tar-slip. |
| 236 | |
| 237 | Validates both the caller-supplied *prefix* and the per-file *rel_path* |
| 238 | from the snapshot manifest. Returns the combined archive path string on |
| 239 | success, or ``None`` if either component is unsafe — the caller must skip |
| 240 | ``None`` entries and log a warning. |
| 241 | |
| 242 | Safety rules enforced |
| 243 | --------------------- |
| 244 | - *rel_path* must be non-empty and must not normalise to ``"."``. |
| 245 | - *rel_path* must not be an absolute path. |
| 246 | - Neither *prefix* nor *rel_path* may contain ``..`` path components. |
| 247 | - Null bytes in either argument are rejected (they confuse archive readers |
| 248 | and some OS path APIs). |
| 249 | |
| 250 | Args: |
| 251 | prefix: Directory prefix to prepend inside the archive (may be empty). |
| 252 | rel_path: Relative file path from the snapshot manifest. |
| 253 | |
| 254 | Returns: |
| 255 | The safe archive entry name, or ``None`` if the entry should be skipped. |
| 256 | """ |
| 257 | if not rel_path or "\x00" in rel_path or "\x00" in prefix: |
| 258 | return None |
| 259 | |
| 260 | clean_prefix = prefix.rstrip("/").strip() |
| 261 | if clean_prefix and ".." in clean_prefix.split("/"): |
| 262 | return None |
| 263 | |
| 264 | resolved = pathlib.PurePosixPath(rel_path) |
| 265 | if resolved.is_absolute() or ".." in resolved.parts: |
| 266 | return None |
| 267 | |
| 268 | safe_rel = str(resolved) |
| 269 | # PurePosixPath("") normalises to "." — reject it. |
| 270 | if not safe_rel or safe_rel == ".": |
| 271 | return None |
| 272 | |
| 273 | return f"{clean_prefix}/{safe_rel}" if clean_prefix else safe_rel |
| 274 | |
| 275 | # --------------------------------------------------------------------------- |
| 276 | # Manifest helpers |
| 277 | # --------------------------------------------------------------------------- |
| 278 | |
| 279 | def _build_entries( |
| 280 | root: pathlib.Path, |
| 281 | manifest: Manifest, |
| 282 | prefix: str, |
| 283 | ) -> tuple[list[tuple[str, str, pathlib.Path]], list[str]]: |
| 284 | """Resolve manifest entries into (arcname, object_id, obj_path) triples. |
| 285 | |
| 286 | Validates every entry through ``_safe_arcname`` and checks object existence. |
| 287 | Returns a tuple of: |
| 288 | |
| 289 | - ``entries`` — safe ``(arcname, object_id, obj_path)`` triples, sorted by arcname. |
| 290 | - ``skipped`` — display-safe descriptions of any entries that were skipped. |
| 291 | |
| 292 | Args: |
| 293 | root: Repository root. |
| 294 | manifest: Snapshot manifest mapping relative path → object ID. |
| 295 | prefix: Directory prefix to prepend inside the archive. |
| 296 | |
| 297 | Returns: |
| 298 | ``(entries, skipped)`` where *entries* are ready to write and *skipped* |
| 299 | are human-readable descriptions of skipped paths for logging/warnings. |
| 300 | """ |
| 301 | entries: list[tuple[str, str, pathlib.Path]] = [] |
| 302 | skipped: list[str] = [] |
| 303 | |
| 304 | for rel_path, object_id in sorted(manifest.items()): |
| 305 | arcname = _safe_arcname(prefix, rel_path) |
| 306 | if arcname is None: |
| 307 | skipped.append(f"unsafe path: {sanitize_display(rel_path)}") |
| 308 | continue |
| 309 | obj = object_path(root, object_id) |
| 310 | if not obj.exists(): |
| 311 | skipped.append(f"missing object {object_id} for {sanitize_display(rel_path)}") |
| 312 | continue |
| 313 | entries.append((arcname, object_id, obj)) |
| 314 | |
| 315 | return entries, skipped |
| 316 | |
| 317 | # --------------------------------------------------------------------------- |
| 318 | # Archive builders |
| 319 | # --------------------------------------------------------------------------- |
| 320 | |
| 321 | def _build_tar( |
| 322 | entries: list[tuple[str, str, pathlib.Path]], |
| 323 | output_path: pathlib.Path, |
| 324 | root: pathlib.Path | None = None, |
| 325 | ) -> int: |
| 326 | """Write a ``tar.gz`` archive from pre-validated *entries*. |
| 327 | |
| 328 | Each entry is a ``(arcname, object_id, obj_path)`` triple produced by |
| 329 | ``_build_entries`` — every path has already been validated for safety. |
| 330 | |
| 331 | Args: |
| 332 | entries: Validated ``(arcname, object_id, obj_path)`` triples. |
| 333 | output_path: Destination file path for the archive. |
| 334 | root: Repository root (used to resolve object content). |
| 335 | |
| 336 | Returns: |
| 337 | Number of files written into the archive. |
| 338 | """ |
| 339 | import io |
| 340 | count = 0 |
| 341 | with tarfile.open(output_path, "w:gz") as tar: |
| 342 | for arcname, object_id, obj_path in entries: |
| 343 | if root is not None: |
| 344 | content = read_object(root, object_id) |
| 345 | if content is None: |
| 346 | continue |
| 347 | info = tarfile.TarInfo(name=arcname) |
| 348 | info.size = len(content) |
| 349 | tar.addfile(info, io.BytesIO(content)) |
| 350 | else: |
| 351 | tar.add(str(obj_path), arcname=arcname, recursive=False) |
| 352 | count += 1 |
| 353 | return count |
| 354 | |
| 355 | def _build_zip( |
| 356 | entries: list[tuple[str, str, pathlib.Path]], |
| 357 | output_path: pathlib.Path, |
| 358 | root: pathlib.Path | None = None, |
| 359 | ) -> int: |
| 360 | """Write a ``zip`` archive from pre-validated *entries*. |
| 361 | |
| 362 | Each entry is a ``(arcname, object_id, obj_path)`` triple produced by |
| 363 | ``_build_entries`` — every path has already been validated for safety. |
| 364 | |
| 365 | Args: |
| 366 | entries: Validated ``(arcname, object_id, obj_path)`` triples. |
| 367 | output_path: Destination file path for the archive. |
| 368 | root: Repository root (used to resolve object content). |
| 369 | |
| 370 | Returns: |
| 371 | Number of files written into the archive. |
| 372 | """ |
| 373 | count = 0 |
| 374 | with zipfile.ZipFile(output_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: |
| 375 | for arcname, object_id, obj_path in entries: |
| 376 | if root is not None: |
| 377 | content = read_object(root, object_id) |
| 378 | if content is None: |
| 379 | continue |
| 380 | zf.writestr(arcname, content) |
| 381 | else: |
| 382 | zf.write(str(obj_path), arcname=arcname) |
| 383 | count += 1 |
| 384 | return count |
| 385 | |
| 386 | # --------------------------------------------------------------------------- |
| 387 | # Registration |
| 388 | # --------------------------------------------------------------------------- |
| 389 | |
| 390 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 391 | """Register the ``archive`` subcommand with its argument parser. |
| 392 | |
| 393 | Flags |
| 394 | ----- |
| 395 | --ref REF |
| 396 | Branch, tag, or commit SHA to archive (default: HEAD). |
| 397 | --format / -f {tar.gz,zip} |
| 398 | Archive format. Default is ``tar.gz``. |
| 399 | --output / -o PATH |
| 400 | Output file path. Default: ``<sha12>.<format>`` in the current |
| 401 | directory. The destination directory must already exist. |
| 402 | --prefix DIR |
| 403 | Directory prefix prepended to every entry inside the archive |
| 404 | (e.g. ``myproject/``). Must not contain ``..`` segments. |
| 405 | --list |
| 406 | Preview mode — print what would be archived without writing a file. |
| 407 | Compatible with ``--ref``, ``--prefix``, and ``--json``. |
| 408 | --json |
| 409 | Emit a machine-readable JSON object to stdout instead of human text. |
| 410 | In list mode the schema is ``_ListJson``; otherwise ``_ArchiveJson``. |
| 411 | """ |
| 412 | parser = subparsers.add_parser( |
| 413 | "archive", |
| 414 | help="Export any historical snapshot as a portable archive.", |
| 415 | description=__doc__, |
| 416 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 417 | ) |
| 418 | parser.add_argument( |
| 419 | "--ref", |
| 420 | default=None, |
| 421 | help="Branch, tag, or commit SHA to archive (default: HEAD).", |
| 422 | ) |
| 423 | parser.add_argument( |
| 424 | "--format", "-f", |
| 425 | default="tar.gz", |
| 426 | dest="fmt", |
| 427 | choices=sorted(_FORMAT_CHOICES), |
| 428 | help="Archive format: tar.gz or zip (default: tar.gz).", |
| 429 | ) |
| 430 | parser.add_argument( |
| 431 | "--output", "-o", |
| 432 | default=None, |
| 433 | help=( |
| 434 | "Output file path (default: <sha12>.<format>). " |
| 435 | "The destination directory must already exist." |
| 436 | ), |
| 437 | ) |
| 438 | parser.add_argument( |
| 439 | "--prefix", |
| 440 | default="", |
| 441 | help="Directory prefix inside the archive (e.g. myproject/).", |
| 442 | ) |
| 443 | parser.add_argument( |
| 444 | "--list", |
| 445 | action="store_true", |
| 446 | dest="list_mode", |
| 447 | default=False, |
| 448 | help=( |
| 449 | "Preview what would be archived without writing a file. " |
| 450 | "Compatible with --ref, --prefix, and --json." |
| 451 | ), |
| 452 | ) |
| 453 | parser.add_argument( |
| 454 | "--json", "-j", |
| 455 | action="store_true", |
| 456 | dest="json_out", |
| 457 | help="Emit machine-readable JSON to stdout instead of human text.", |
| 458 | ) |
| 459 | parser.set_defaults(func=run) |
| 460 | |
| 461 | # --------------------------------------------------------------------------- |
| 462 | # Command implementation |
| 463 | # --------------------------------------------------------------------------- |
| 464 | |
| 465 | def run(args: argparse.Namespace) -> None: |
| 466 | """Export any historical snapshot as a portable archive. |
| 467 | |
| 468 | Resolves the commit ref, loads the snapshot manifest, validates every path |
| 469 | for traversal safety, then writes a ``tar.gz`` or ``zip`` archive containing |
| 470 | only tracked files (no ``.muse/`` internals). Use ``--list`` to preview |
| 471 | entries without writing anything to disk. |
| 472 | |
| 473 | Agent quickstart |
| 474 | ---------------- |
| 475 | :: |
| 476 | |
| 477 | muse archive --json # HEAD → tar.gz |
| 478 | muse archive --ref feat/audio --json # branch tip |
| 479 | muse archive --format zip --output out.zip --json |
| 480 | muse archive --list --json # preview without writing |
| 481 | |
| 482 | JSON fields |
| 483 | ----------- |
| 484 | path Output file path written. |
| 485 | format ``"tar.gz"`` or ``"zip"``. |
| 486 | file_count Number of files in the archive. |
| 487 | bytes Archive size on disk in bytes. |
| 488 | commit_id Full ``sha256:…`` commit ID archived. |
| 489 | snapshot_id Full ``sha256:…`` snapshot ID. |
| 490 | message Commit message. |
| 491 | branch Branch name at archive time. |
| 492 | author Author field from the commit record. |
| 493 | agent_id Agent identity (empty for human commits). |
| 494 | model_id Model identifier (empty for human commits). |
| 495 | committed_at ISO-8601 commit timestamp. |
| 496 | ref ``--ref`` value passed, or ``null`` for HEAD. |
| 497 | prefix ``--prefix`` value used (empty string if none). |
| 498 | |
| 499 | With ``--list``, ``path``/``format``/``bytes`` are absent and an |
| 500 | ``entries`` list is added — each entry: ``path`` (archive path), |
| 501 | ``object_id`` (sha256). |
| 502 | |
| 503 | Exit codes |
| 504 | ---------- |
| 505 | 0 Archive written (or list preview complete). |
| 506 | 1 Invalid arguments, bad prefix, output directory missing, ref not found. |
| 507 | 2 Not inside a Muse repository. |
| 508 | 3 Snapshot or object data missing. |
| 509 | """ |
| 510 | elapsed = start_timer() |
| 511 | ref: str | None = args.ref |
| 512 | fmt: str = args.fmt |
| 513 | output: str | None = args.output |
| 514 | prefix: str = args.prefix |
| 515 | list_mode: bool = args.list_mode |
| 516 | json_out: bool = args.json_out |
| 517 | |
| 518 | # Validate prefix up-front so the user gets a clear error before any I/O. |
| 519 | clean_prefix = prefix.rstrip("/").strip() |
| 520 | if clean_prefix and ".." in clean_prefix.split("/"): |
| 521 | print( |
| 522 | f"❌ --prefix must not contain '..' segments: {sanitize_display(prefix)}", |
| 523 | file=sys.stderr, |
| 524 | ) |
| 525 | raise SystemExit(ExitCode.USER_ERROR) |
| 526 | |
| 527 | root = require_repo() |
| 528 | branch = read_current_branch(root) |
| 529 | |
| 530 | # Resolve the commit reference. |
| 531 | if ref is None: |
| 532 | commit_id = get_head_commit_id(root, branch) |
| 533 | if not commit_id: |
| 534 | print("❌ No commits yet on this branch.", file=sys.stderr) |
| 535 | raise SystemExit(ExitCode.USER_ERROR) |
| 536 | commit = read_commit(root, commit_id) |
| 537 | else: |
| 538 | # Try as a branch name first (e.g. "main", "feat/audio"), then fall |
| 539 | # through to resolve_commit_ref for SHA prefixes and HEAD~N syntax. |
| 540 | commit = None |
| 541 | try: |
| 542 | branch_tip_id = get_head_commit_id(root, ref) |
| 543 | if branch_tip_id: |
| 544 | commit = read_commit(root, branch_tip_id) |
| 545 | except Exception: |
| 546 | pass |
| 547 | if commit is None: |
| 548 | commit = resolve_commit_ref(root, branch, ref) |
| 549 | |
| 550 | if commit is None: |
| 551 | print( |
| 552 | f"❌ Ref {sanitize_display(ref or 'HEAD')!r} not found.", |
| 553 | file=sys.stderr, |
| 554 | ) |
| 555 | raise SystemExit(ExitCode.USER_ERROR) |
| 556 | |
| 557 | snapshot = read_snapshot(root, commit.snapshot_id) |
| 558 | if snapshot is None: |
| 559 | print( |
| 560 | f"❌ Snapshot {commit.snapshot_id} not found.", |
| 561 | file=sys.stderr, |
| 562 | ) |
| 563 | raise SystemExit(ExitCode.INTERNAL_ERROR) |
| 564 | |
| 565 | # Build and validate the entry list (shared between list and write modes). |
| 566 | entries, skipped = _build_entries(root, snapshot.manifest, clean_prefix) |
| 567 | |
| 568 | for desc in skipped: |
| 569 | logger.warning("⚠️ Skipping %s", desc) |
| 570 | |
| 571 | # --- List mode: preview without writing --- |
| 572 | if list_mode: |
| 573 | list_entries: list[_ListEntryJson] = [ |
| 574 | _ListEntryJson(path=arcname, object_id=object_id) |
| 575 | for arcname, object_id, _ in entries |
| 576 | ] |
| 577 | if json_out: |
| 578 | print(json.dumps(_ListJson( |
| 579 | **make_envelope(elapsed), |
| 580 | commit_id=commit.commit_id, |
| 581 | snapshot_id=commit.snapshot_id, |
| 582 | message=commit.message, |
| 583 | branch=branch, |
| 584 | author=commit.author, |
| 585 | committed_at=commit.committed_at.isoformat(), |
| 586 | ref=ref, |
| 587 | prefix=clean_prefix, |
| 588 | file_count=len(entries), |
| 589 | entries=list_entries, |
| 590 | ))) |
| 591 | return |
| 592 | |
| 593 | print( |
| 594 | f"ℹ️ Snapshot {commit.commit_id} {sanitize_display(commit.message)}\n" |
| 595 | f" {len(entries)} file(s) would be archived:" |
| 596 | ) |
| 597 | for entry in list_entries: |
| 598 | print(f" {entry['path']}") |
| 599 | if skipped: |
| 600 | print(f"\n ⚠️ {len(skipped)} entry/entries skipped (unsafe or missing).") |
| 601 | return |
| 602 | |
| 603 | # --- Write mode: build the archive --- |
| 604 | # Use bare hex for the default filename — colons are invalid on Windows. |
| 605 | _, _commit_hex = split_id(commit.commit_id) |
| 606 | out_name = output or f"{_commit_hex}.{fmt}" |
| 607 | out_path = pathlib.Path(out_name) |
| 608 | |
| 609 | # Validate that the destination directory exists before doing any work. |
| 610 | if out_path.parent != pathlib.Path(".") and not out_path.parent.exists(): |
| 611 | print( |
| 612 | f"❌ Output directory does not exist: {sanitize_display(str(out_path.parent))}", |
| 613 | file=sys.stderr, |
| 614 | ) |
| 615 | raise SystemExit(ExitCode.USER_ERROR) |
| 616 | |
| 617 | if fmt == "tar.gz": |
| 618 | count = _build_tar(entries, out_path, root=root) |
| 619 | else: |
| 620 | count = _build_zip(entries, out_path, root=root) |
| 621 | |
| 622 | archive_bytes = out_path.stat().st_size if out_path.exists() else 0 |
| 623 | |
| 624 | if json_out: |
| 625 | print(json.dumps(_ArchiveJson( |
| 626 | **make_envelope(elapsed), |
| 627 | path=str(out_path), |
| 628 | format=fmt, |
| 629 | file_count=count, |
| 630 | bytes=archive_bytes, |
| 631 | commit_id=commit.commit_id, |
| 632 | snapshot_id=commit.snapshot_id, |
| 633 | message=commit.message, |
| 634 | branch=branch, |
| 635 | author=commit.author, |
| 636 | agent_id=commit.agent_id, |
| 637 | model_id=commit.model_id, |
| 638 | committed_at=commit.committed_at.isoformat(), |
| 639 | ref=ref, |
| 640 | prefix=clean_prefix, |
| 641 | ))) |
| 642 | return |
| 643 | |
| 644 | size_kb = archive_bytes / 1024 |
| 645 | print( |
| 646 | f"✅ Archive: {out_path} ({count} file(s), {size_kb:.1f} KiB)\n" |
| 647 | f" Commit: {commit.commit_id} {sanitize_display(commit.message)}" |
| 648 | ) |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
7 days ago