ls_tree.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
7 days ago
| 1 | """``muse ls-tree`` — list the contents of a tree object at a given ref. |
| 2 | |
| 3 | Displays the files and synthetic directory entries recorded in a commit's |
| 4 | snapshot manifest. Because Muse uses a flat manifest (path → object_id) |
| 5 | instead of nested tree objects, directory entries are synthesized on the fly |
| 6 | from shared path prefixes. |
| 7 | |
| 8 | Modes |
| 9 | ----- |
| 10 | ``muse ls-tree HEAD`` |
| 11 | Non-recursive listing of the root. Blobs at the root level appear as |
| 12 | ``blob`` entries; any path that has children in a subdirectory is |
| 13 | collapsed into a synthetic ``tree`` entry (e.g. ``src/``). |
| 14 | |
| 15 | ``muse ls-tree -r HEAD`` |
| 16 | Recursive listing — all blobs, no synthetic tree entries. |
| 17 | |
| 18 | ``muse ls-tree HEAD src/`` |
| 19 | Scope the listing to the ``src/`` prefix. |
| 20 | |
| 21 | ``muse ls-tree -d HEAD`` |
| 22 | Show only synthetic directory (tree) entries, not blobs. |
| 23 | |
| 24 | Output formats |
| 25 | -------------- |
| 26 | Default text:: |
| 27 | |
| 28 | <mode> <type> <object_id>\\t<path> |
| 29 | |
| 30 | ``--name-only`` text:: |
| 31 | |
| 32 | <path> |
| 33 | |
| 34 | ``--long`` (``-l``) text adds the byte size between ``<object_id>`` and the |
| 35 | tab:: |
| 36 | |
| 37 | <mode> <type> <object_id> <size>\\t<path> |
| 38 | |
| 39 | JSON (``--json``):: |
| 40 | |
| 41 | { |
| 42 | "status": "ok", |
| 43 | "error": "", |
| 44 | "treeish": "HEAD", |
| 45 | "commit_id": "sha256:<hex>", |
| 46 | "path_prefix": null, |
| 47 | "recursive": false, |
| 48 | "entry_count": 3, |
| 49 | "entries": [ |
| 50 | {"mode": "100644", "type": "blob", |
| 51 | "object_id": "sha256:<hex>", "size": 12, "path": "file.py"}, |
| 52 | {"mode": "040000", "type": "tree", |
| 53 | "object_id": "sha256:<hex>", "size": null, "path": "src/"} |
| 54 | ], |
| 55 | "duration_ms": 1.2, |
| 56 | "exit_code": 0 |
| 57 | } |
| 58 | |
| 59 | All keys are always present so agents can read them without ``dict.get`` |
| 60 | guards. ``"status"`` is always ``"ok"`` on success. |
| 61 | |
| 62 | ``"path_prefix"`` is ``null`` when no path argument was given; otherwise it |
| 63 | echoes the normalised prefix that was applied. |
| 64 | |
| 65 | ``"recursive"`` reflects whether ``-r`` was passed. |
| 66 | |
| 67 | ``"entry_count"`` equals ``len(entries)`` — a convenient shortcut that avoids |
| 68 | parsing the array just to count it. |
| 69 | |
| 70 | When ``--name-only`` is combined with ``--json`` the entries contain only |
| 71 | ``path`` (no ``object_id``, ``mode``, ``type``, or ``size``). |
| 72 | |
| 73 | JSON error schema (exit non-zero):: |
| 74 | |
| 75 | { |
| 76 | "status": "error", |
| 77 | "error": "<human-readable message>", |
| 78 | "exit_code": <int> |
| 79 | } |
| 80 | |
| 81 | When ``--json`` is active all errors go to stdout as JSON — no prose on |
| 82 | stderr. Agents should parse stdout and check ``status``. |
| 83 | |
| 84 | Exit codes:: |
| 85 | |
| 86 | 0 — success |
| 87 | 1 — user error: bad ref, path traversal, ANSI in ref |
| 88 | 2 — not a Muse repository |
| 89 | 3 — I/O error |
| 90 | """ |
| 91 | |
| 92 | import argparse |
| 93 | import hashlib |
| 94 | import json as _json |
| 95 | import logging |
| 96 | import pathlib |
| 97 | import sys |
| 98 | from typing import TypedDict |
| 99 | |
| 100 | from muse.core.types import long_id |
| 101 | from muse.core.paths import ref_path as _ref_path |
| 102 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 103 | from muse.core.errors import ExitCode |
| 104 | from muse.core.object_store import read_object |
| 105 | from muse.core.repo import require_repo |
| 106 | from muse.core.refs import read_ref |
| 107 | from muse.core.refs import ( |
| 108 | get_head_commit_id, |
| 109 | read_current_branch, |
| 110 | ) |
| 111 | from muse.core.commits import ( |
| 112 | read_commit, |
| 113 | resolve_commit_ref, |
| 114 | ) |
| 115 | from muse.core.snapshots import read_snapshot |
| 116 | from muse.core.validation import sanitize_display |
| 117 | from muse.core.timing import start_timer |
| 118 | |
| 119 | logger = logging.getLogger(__name__) |
| 120 | |
| 121 | _BLOB_MODE = "100644" |
| 122 | _TREE_MODE = "040000" |
| 123 | |
| 124 | # --------------------------------------------------------------------------- |
| 125 | # Wire-format TypedDicts |
| 126 | # --------------------------------------------------------------------------- |
| 127 | |
| 128 | type _ManifestMap = dict[str, str] |
| 129 | |
| 130 | class _LsTreeEntry(TypedDict, total=False): |
| 131 | mode: str |
| 132 | type: str |
| 133 | object_id: str |
| 134 | size: int | None |
| 135 | path: str |
| 136 | |
| 137 | class _LsTreeJson(EnvelopeJson): |
| 138 | """Stable JSON envelope for ``muse ls-tree --json``. |
| 139 | |
| 140 | Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`. |
| 141 | |
| 142 | All keys are always present so agents can read them without ``dict.get`` |
| 143 | guards. ``status`` is ``"ok"`` on success. |
| 144 | """ |
| 145 | status: str # "ok" |
| 146 | error: str # always "" on success |
| 147 | treeish: str |
| 148 | commit_id: str |
| 149 | path_prefix: str | None # null when no path arg given |
| 150 | recursive: bool |
| 151 | entry_count: int # len(entries) — convenient shortcut |
| 152 | entries: list[_LsTreeEntry] |
| 153 | |
| 154 | class _LsTreeErrorJson(EnvelopeJson): |
| 155 | """Error payload for ``muse ls-tree --json`` on usage or internal errors.""" |
| 156 | status: str # "error" |
| 157 | error: str |
| 158 | |
| 159 | # --------------------------------------------------------------------------- |
| 160 | # Internal helpers |
| 161 | # --------------------------------------------------------------------------- |
| 162 | |
| 163 | def _emit_error(json_out: bool, msg: str, code: ExitCode, elapsed: float) -> None: |
| 164 | """Print an error and raise SystemExit. Never returns. |
| 165 | |
| 166 | In ``--json`` mode the error goes to stdout as a JSON payload so machine |
| 167 | consumers always get parseable output. In text mode it goes to stderr. |
| 168 | """ |
| 169 | if json_out: |
| 170 | print(_json.dumps(_LsTreeErrorJson( |
| 171 | **make_envelope(elapsed, exit_code=int(code)), |
| 172 | status="error", |
| 173 | error=msg, |
| 174 | ))) |
| 175 | else: |
| 176 | print(f"❌ {sanitize_display(msg)}", file=sys.stderr) |
| 177 | raise SystemExit(code) |
| 178 | |
| 179 | def _synthetic_tree_id(manifest: _ManifestMap, prefix: str) -> str: |
| 180 | """Return a deterministic ``sha256:``-prefixed ID for the synthetic tree at *prefix*. |
| 181 | |
| 182 | The ID is the SHA-256 of the sorted ``(path, object_id)`` pairs for all |
| 183 | manifest entries that fall under *prefix* (direct and indirect children). |
| 184 | |
| 185 | Args: |
| 186 | manifest: Full flat manifest (path → object_id). |
| 187 | prefix: Directory prefix ending with ``/`` (e.g. ``"src/"``). |
| 188 | |
| 189 | Returns: |
| 190 | ``sha256:``-prefixed 64-hex-char canonical object ID. |
| 191 | """ |
| 192 | h = hashlib.sha256() |
| 193 | for path in sorted(manifest): |
| 194 | if path.startswith(prefix): |
| 195 | line = f"{path}\x00{manifest[path]}\n" |
| 196 | h.update(line.encode()) |
| 197 | return long_id(h.hexdigest()) |
| 198 | |
| 199 | def _build_tree_entries( |
| 200 | manifest: dict[str, str], |
| 201 | path_prefix: str, |
| 202 | recursive: bool, |
| 203 | ) -> list[dict]: |
| 204 | """Build the list of tree entries for a given prefix and recursion mode. |
| 205 | |
| 206 | Args: |
| 207 | manifest: Full flat manifest (path → object_id). |
| 208 | path_prefix: Repo-relative POSIX prefix to scope the listing, e.g. |
| 209 | ``""`` for root or ``"src/"`` for a subdirectory. |
| 210 | recursive: If True, return all blobs (no synthetic tree entries). |
| 211 | If False, return immediate children only — blobs for files |
| 212 | in this directory level, synthetic tree entries for |
| 213 | subdirectories. |
| 214 | |
| 215 | Returns: |
| 216 | Sorted list of entry dicts with keys: |
| 217 | ``mode``, ``type``, ``object_id``, ``size`` (None for trees), ``path``. |
| 218 | """ |
| 219 | if recursive: |
| 220 | # Return every blob whose path starts with the prefix. |
| 221 | entries = [] |
| 222 | for path, oid in sorted(manifest.items()): |
| 223 | if path.startswith(path_prefix): |
| 224 | entries.append({ |
| 225 | "mode": _BLOB_MODE, |
| 226 | "type": "blob", |
| 227 | "object_id": oid, |
| 228 | "size": None, |
| 229 | "path": path, |
| 230 | }) |
| 231 | return entries |
| 232 | |
| 233 | # Non-recursive: collect immediate children at this directory level. |
| 234 | seen_dirs: set[str] = set() |
| 235 | entries: list[dict] = [] |
| 236 | |
| 237 | for path, oid in sorted(manifest.items()): |
| 238 | if not path.startswith(path_prefix): |
| 239 | continue |
| 240 | rel = path[len(path_prefix):] # path relative to the prefix |
| 241 | slash = rel.find("/") |
| 242 | if slash == -1: |
| 243 | # Direct blob child. |
| 244 | entries.append({ |
| 245 | "mode": _BLOB_MODE, |
| 246 | "type": "blob", |
| 247 | "object_id": oid, |
| 248 | "size": None, |
| 249 | "path": path, |
| 250 | }) |
| 251 | else: |
| 252 | # The path passes through a subdirectory — emit a synthetic tree. |
| 253 | dir_name = rel[:slash + 1] # e.g. "src/" |
| 254 | dir_full = path_prefix + dir_name # e.g. "src/" or "pkg/sub/" |
| 255 | if dir_full not in seen_dirs: |
| 256 | seen_dirs.add(dir_full) |
| 257 | entries.append({ |
| 258 | "mode": _TREE_MODE, |
| 259 | "type": "tree", |
| 260 | "object_id": _synthetic_tree_id(manifest, dir_full), |
| 261 | "size": None, |
| 262 | "path": dir_full, |
| 263 | }) |
| 264 | |
| 265 | return sorted(entries, key=lambda e: e["path"]) |
| 266 | |
| 267 | def _resolve_manifest( |
| 268 | root: pathlib.Path, |
| 269 | treeish: str, |
| 270 | json_out: bool, |
| 271 | elapsed: float, |
| 272 | ) -> tuple[str, dict[str, str]]: |
| 273 | """Resolve *treeish* to a ``(commit_id, manifest)`` pair. |
| 274 | |
| 275 | Resolution order: |
| 276 | 1. ``"HEAD"`` — current branch tip. |
| 277 | 2. Branch name — ``.muse/refs/heads/<treeish>``. |
| 278 | 3. Full or abbreviated commit ID — prefix scan of commits dir. |
| 279 | |
| 280 | Args: |
| 281 | root: Absolute repo root. |
| 282 | treeish: Branch name, commit ID, or ``"HEAD"``. |
| 283 | json_out: When True, errors go to stdout as JSON. |
| 284 | |
| 285 | Returns: |
| 286 | ``(commit_id, manifest)`` tuple. |
| 287 | |
| 288 | Raises: |
| 289 | SystemExit(USER_ERROR): ref not found or repo is empty. |
| 290 | """ |
| 291 | try: |
| 292 | branch = read_current_branch(root) |
| 293 | commit = None |
| 294 | |
| 295 | if treeish.upper() == "HEAD": |
| 296 | commit_id = get_head_commit_id(root, branch) |
| 297 | if not commit_id: |
| 298 | _emit_error(json_out, "Repository has no commits yet.", ExitCode.USER_ERROR, elapsed) |
| 299 | commit = read_commit(root, commit_id) |
| 300 | else: |
| 301 | # Try as a branch name first (direct ref file lookup). |
| 302 | branch_ref = _ref_path(root, treeish) |
| 303 | commit_id = read_ref(branch_ref) |
| 304 | if commit_id is not None: |
| 305 | commit = read_commit(root, commit_id) |
| 306 | else: |
| 307 | # Fall back to commit-ID prefix scan. |
| 308 | commit = resolve_commit_ref(root, branch, treeish) |
| 309 | |
| 310 | if commit is None: |
| 311 | _emit_error( |
| 312 | json_out, |
| 313 | f"'{sanitize_display(treeish)}' is not a known branch or commit ID.", |
| 314 | ExitCode.USER_ERROR, |
| 315 | elapsed, |
| 316 | ) |
| 317 | |
| 318 | commit_id = commit.commit_id |
| 319 | snap = read_snapshot(root, commit.snapshot_id) |
| 320 | manifest = dict(snap.manifest) if snap else {} |
| 321 | return commit_id, manifest |
| 322 | except SystemExit: |
| 323 | raise |
| 324 | except Exception as exc: |
| 325 | _emit_error( |
| 326 | json_out, |
| 327 | f"Failed to resolve '{sanitize_display(treeish)}': {exc}", |
| 328 | ExitCode.USER_ERROR, |
| 329 | elapsed, |
| 330 | ) |
| 331 | |
| 332 | def _validate_path_prefix(root: pathlib.Path, raw: str, json_out: bool, elapsed: float) -> str: |
| 333 | """Validate and normalise a user-supplied path prefix. |
| 334 | |
| 335 | Rejects path-traversal attempts (``..`` components, absolute paths that |
| 336 | escape the repo root). |
| 337 | |
| 338 | Args: |
| 339 | root: Absolute repo root. |
| 340 | raw: Raw path string as given by the user. |
| 341 | json_out: When True, errors go to stdout as JSON. |
| 342 | |
| 343 | Returns: |
| 344 | Normalised repo-relative POSIX path with trailing ``/`` if it looks |
| 345 | like a directory prefix, or as-is for explicit file paths. |
| 346 | |
| 347 | Raises: |
| 348 | SystemExit(USER_ERROR): path traversal detected. |
| 349 | """ |
| 350 | # Reject paths that try to escape the repo. |
| 351 | try: |
| 352 | candidate = (root / raw).resolve() |
| 353 | candidate.relative_to(root.resolve()) |
| 354 | except ValueError: |
| 355 | _emit_error( |
| 356 | json_out, |
| 357 | f"Path '{sanitize_display(raw)}' is outside the repository root.", |
| 358 | ExitCode.USER_ERROR, |
| 359 | elapsed, |
| 360 | ) |
| 361 | |
| 362 | # Build the normalised relative POSIX path. |
| 363 | try: |
| 364 | rel = candidate.relative_to(root.resolve()).as_posix() |
| 365 | except ValueError: |
| 366 | _emit_error( |
| 367 | json_out, |
| 368 | f"Path '{sanitize_display(raw)}' is outside the repository root.", |
| 369 | ExitCode.USER_ERROR, |
| 370 | elapsed, |
| 371 | ) |
| 372 | |
| 373 | if rel == ".": |
| 374 | return "" |
| 375 | |
| 376 | # Preserve trailing slash for directory-prefix semantics. |
| 377 | if raw.endswith("/"): |
| 378 | return f"{rel}/" |
| 379 | return rel |
| 380 | |
| 381 | # --------------------------------------------------------------------------- |
| 382 | # Registration |
| 383 | # --------------------------------------------------------------------------- |
| 384 | |
| 385 | def register( |
| 386 | subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]", |
| 387 | ) -> None: |
| 388 | """Register the ``muse ls-tree`` subcommand.""" |
| 389 | parser = subparsers.add_parser( |
| 390 | "ls-tree", |
| 391 | help="List the contents of a snapshot at a given ref.", |
| 392 | description=__doc__, |
| 393 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 394 | ) |
| 395 | parser.add_argument( |
| 396 | "-r", "--recursive", |
| 397 | action="store_true", |
| 398 | dest="recursive", |
| 399 | help="Recurse into subtrees, listing all blobs.", |
| 400 | ) |
| 401 | parser.add_argument( |
| 402 | "--dirs-only", |
| 403 | action="store_true", |
| 404 | dest="dirs_only", |
| 405 | help="Show only tree (directory) entries, not blobs.", |
| 406 | ) |
| 407 | parser.add_argument( |
| 408 | "-l", "--long", |
| 409 | action="store_true", |
| 410 | dest="long", |
| 411 | help="Include object size in the listing.", |
| 412 | ) |
| 413 | parser.add_argument( |
| 414 | "--name-only", |
| 415 | action="store_true", |
| 416 | dest="name_only", |
| 417 | help="Show only path names, omitting mode/type/object_id.", |
| 418 | ) |
| 419 | parser.add_argument( |
| 420 | "--json", "-j", |
| 421 | action="store_true", |
| 422 | dest="json_out", |
| 423 | help="Emit machine-readable JSON on stdout.", |
| 424 | ) |
| 425 | parser.add_argument( |
| 426 | "treeish", |
| 427 | metavar="TREEISH", |
| 428 | nargs="?", |
| 429 | default="HEAD", |
| 430 | help="Branch name or commit ID to inspect (default: HEAD).", |
| 431 | ) |
| 432 | parser.add_argument( |
| 433 | "path", |
| 434 | metavar="PATH", |
| 435 | nargs="?", |
| 436 | default=None, |
| 437 | help="Optional path prefix to scope the listing.", |
| 438 | ) |
| 439 | parser.set_defaults(func=run) |
| 440 | |
| 441 | # --------------------------------------------------------------------------- |
| 442 | # Run |
| 443 | # --------------------------------------------------------------------------- |
| 444 | |
| 445 | def run(args: argparse.Namespace) -> None: |
| 446 | """List snapshot contents for a given ref. |
| 447 | |
| 448 | Resolves *treeish* (branch name, commit ID, or ``HEAD``) to a snapshot |
| 449 | manifest and emits tree entries. Non-recursive mode synthesizes directory |
| 450 | entries from shared path prefixes; recursive mode emits raw blobs only. |
| 451 | |
| 452 | Agent quickstart |
| 453 | ---------------- |
| 454 | :: |
| 455 | |
| 456 | muse ls-tree --json |
| 457 | muse ls-tree HEAD src/ --json |
| 458 | muse ls-tree -r HEAD --json |
| 459 | muse ls-tree feat/billing --json |
| 460 | |
| 461 | JSON fields |
| 462 | ----------- |
| 463 | status ``"ok"`` on success. |
| 464 | treeish The ref that was resolved. |
| 465 | commit_id Commit ID of the resolved snapshot. |
| 466 | path_prefix Scoping prefix applied, or ``null``. |
| 467 | recursive ``true`` when ``-r`` was passed. |
| 468 | entry_count Number of entries returned. |
| 469 | entries List of entry objects: ``mode``, ``type``, ``object_id``, |
| 470 | ``size``, ``path`` (``size`` is ``null`` for tree entries |
| 471 | unless ``--long`` was passed). |
| 472 | |
| 473 | Exit codes |
| 474 | ---------- |
| 475 | 0 Success. |
| 476 | 1 Bad ref, path traversal, ANSI in ref, or empty repository. |
| 477 | 2 Not inside a Muse repository. |
| 478 | 3 I/O error. |
| 479 | """ |
| 480 | elapsed = start_timer() |
| 481 | |
| 482 | treeish: str = args.treeish or "HEAD" |
| 483 | raw_path: str | None = args.path |
| 484 | recursive: bool = args.recursive |
| 485 | dirs_only: bool = args.dirs_only |
| 486 | long_fmt: bool = args.long |
| 487 | name_only: bool = args.name_only |
| 488 | json_out: bool = args.json_out |
| 489 | |
| 490 | root = require_repo() |
| 491 | |
| 492 | # ── Validate ref — reject ANSI and other control characters ────────────── |
| 493 | if any(ord(c) < 32 for c in treeish): |
| 494 | _emit_error( |
| 495 | json_out, |
| 496 | f"Invalid ref '{sanitize_display(treeish)}': control characters not allowed.", |
| 497 | ExitCode.USER_ERROR, |
| 498 | elapsed, |
| 499 | ) |
| 500 | |
| 501 | # ── Resolve the ref to a manifest ──────────────────────────────────────── |
| 502 | commit_id, manifest = _resolve_manifest(root, treeish, json_out, elapsed) |
| 503 | |
| 504 | # ── Validate and normalise path prefix ─────────────────────────────────── |
| 505 | path_prefix = "" |
| 506 | path_prefix_out: str | None = None # what we echo in the envelope |
| 507 | if raw_path is not None: |
| 508 | path_prefix = _validate_path_prefix(root, raw_path, json_out, elapsed) |
| 509 | # Ensure directory prefixes end with / |
| 510 | if path_prefix and not path_prefix.endswith("/"): |
| 511 | path_prefix += "/" |
| 512 | path_prefix_out = path_prefix or None |
| 513 | |
| 514 | # ── Build entries ───────────────────────────────────────────────────────── |
| 515 | entries = _build_tree_entries(manifest, path_prefix, recursive) |
| 516 | |
| 517 | # Apply --dirs-only filter. |
| 518 | if dirs_only: |
| 519 | entries = [e for e in entries if e["type"] == "tree"] |
| 520 | |
| 521 | # ── Populate sizes when --long is requested ─────────────────────────────── |
| 522 | if long_fmt: |
| 523 | for entry in entries: |
| 524 | if entry["type"] == "blob": |
| 525 | data = read_object(root, entry["object_id"]) |
| 526 | entry["size"] = len(data) if data is not None else None |
| 527 | |
| 528 | # ── Output ─────────────────────────────────────────────────────────────── |
| 529 | if json_out: |
| 530 | if name_only: |
| 531 | out_entries = [{"path": e["path"]} for e in entries] |
| 532 | else: |
| 533 | out_entries = [] |
| 534 | for e in entries: |
| 535 | out_entries.append({ |
| 536 | "mode": e["mode"], |
| 537 | "type": e["type"], |
| 538 | "object_id": e["object_id"], |
| 539 | "size": e["size"], |
| 540 | "path": e["path"], |
| 541 | }) |
| 542 | print(_json.dumps(_LsTreeJson( |
| 543 | **make_envelope(elapsed), |
| 544 | status="ok", |
| 545 | error="", |
| 546 | treeish=treeish, |
| 547 | commit_id=commit_id, |
| 548 | path_prefix=path_prefix_out, |
| 549 | recursive=recursive, |
| 550 | entry_count=len(out_entries), |
| 551 | entries=out_entries, |
| 552 | ))) |
| 553 | else: |
| 554 | for e in entries: |
| 555 | if name_only: |
| 556 | print(e["path"]) |
| 557 | elif long_fmt: |
| 558 | size_str = str(e["size"]) if e["size"] is not None else "-" |
| 559 | print(f"{e['mode']} {e['type']} {e['object_id']} {size_str}\t{e['path']}") |
| 560 | else: |
| 561 | print(f"{e['mode']} {e['type']} {e['object_id']}\t{e['path']}") |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
7 days ago