gabriel / muse public
ls_tree.py python
561 lines 18.5 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago
1 """``muse ls-tree`` — list the contents of a tree object at a given ref.
2
3 Displays the files and synthetic directory entries recorded in a commit's
4 snapshot manifest. Because Muse uses a flat manifest (path → object_id)
5 instead of nested tree objects, directory entries are synthesized on the fly
6 from shared path prefixes.
7
8 Modes
9 -----
10 ``muse ls-tree HEAD``
11 Non-recursive listing of the root. Blobs at the root level appear as
12 ``blob`` entries; any path that has children in a subdirectory is
13 collapsed into a synthetic ``tree`` entry (e.g. ``src/``).
14
15 ``muse ls-tree -r HEAD``
16 Recursive listing — all blobs, no synthetic tree entries.
17
18 ``muse ls-tree HEAD src/``
19 Scope the listing to the ``src/`` prefix.
20
21 ``muse ls-tree -d HEAD``
22 Show only synthetic directory (tree) entries, not blobs.
23
24 Output formats
25 --------------
26 Default text::
27
28 <mode> <type> <object_id>\\t<path>
29
30 ``--name-only`` text::
31
32 <path>
33
34 ``--long`` (``-l``) text adds the byte size between ``<object_id>`` and the
35 tab::
36
37 <mode> <type> <object_id> <size>\\t<path>
38
39 JSON (``--json``)::
40
41 {
42 "status": "ok",
43 "error": "",
44 "treeish": "HEAD",
45 "commit_id": "sha256:<hex>",
46 "path_prefix": null,
47 "recursive": false,
48 "entry_count": 3,
49 "entries": [
50 {"mode": "100644", "type": "blob",
51 "object_id": "sha256:<hex>", "size": 12, "path": "file.py"},
52 {"mode": "040000", "type": "tree",
53 "object_id": "sha256:<hex>", "size": null, "path": "src/"}
54 ],
55 "duration_ms": 1.2,
56 "exit_code": 0
57 }
58
59 All keys are always present so agents can read them without ``dict.get``
60 guards. ``"status"`` is always ``"ok"`` on success.
61
62 ``"path_prefix"`` is ``null`` when no path argument was given; otherwise it
63 echoes the normalised prefix that was applied.
64
65 ``"recursive"`` reflects whether ``-r`` was passed.
66
67 ``"entry_count"`` equals ``len(entries)`` — a convenient shortcut that avoids
68 parsing the array just to count it.
69
70 When ``--name-only`` is combined with ``--json`` the entries contain only
71 ``path`` (no ``object_id``, ``mode``, ``type``, or ``size``).
72
73 JSON error schema (exit non-zero)::
74
75 {
76 "status": "error",
77 "error": "<human-readable message>",
78 "exit_code": <int>
79 }
80
81 When ``--json`` is active all errors go to stdout as JSON — no prose on
82 stderr. Agents should parse stdout and check ``status``.
83
84 Exit codes::
85
86 0 — success
87 1 — user error: bad ref, path traversal, ANSI in ref
88 2 — not a Muse repository
89 3 — I/O error
90 """
91
92 import argparse
93 import hashlib
94 import json as _json
95 import logging
96 import pathlib
97 import sys
98 from typing import TypedDict
99
100 from muse.core.types import long_id
101 from muse.core.paths import ref_path as _ref_path
102 from muse.core.envelope import EnvelopeJson, make_envelope
103 from muse.core.errors import ExitCode
104 from muse.core.object_store import read_object
105 from muse.core.repo import require_repo
106 from muse.core.refs import read_ref
107 from muse.core.refs import (
108 get_head_commit_id,
109 read_current_branch,
110 )
111 from muse.core.commits import (
112 read_commit,
113 resolve_commit_ref,
114 )
115 from muse.core.snapshots import read_snapshot
116 from muse.core.validation import sanitize_display
117 from muse.core.timing import start_timer
118
119 logger = logging.getLogger(__name__)
120
121 _BLOB_MODE = "100644"
122 _TREE_MODE = "040000"
123
124 # ---------------------------------------------------------------------------
125 # Wire-format TypedDicts
126 # ---------------------------------------------------------------------------
127
128 type _ManifestMap = dict[str, str]
129
130 class _LsTreeEntry(TypedDict, total=False):
131 mode: str
132 type: str
133 object_id: str
134 size: int | None
135 path: str
136
137 class _LsTreeJson(EnvelopeJson):
138 """Stable JSON envelope for ``muse ls-tree --json``.
139
140 Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`.
141
142 All keys are always present so agents can read them without ``dict.get``
143 guards. ``status`` is ``"ok"`` on success.
144 """
145 status: str # "ok"
146 error: str # always "" on success
147 treeish: str
148 commit_id: str
149 path_prefix: str | None # null when no path arg given
150 recursive: bool
151 entry_count: int # len(entries) — convenient shortcut
152 entries: list[_LsTreeEntry]
153
154 class _LsTreeErrorJson(EnvelopeJson):
155 """Error payload for ``muse ls-tree --json`` on usage or internal errors."""
156 status: str # "error"
157 error: str
158
159 # ---------------------------------------------------------------------------
160 # Internal helpers
161 # ---------------------------------------------------------------------------
162
163 def _emit_error(json_out: bool, msg: str, code: ExitCode, elapsed: float) -> None:
164 """Print an error and raise SystemExit. Never returns.
165
166 In ``--json`` mode the error goes to stdout as a JSON payload so machine
167 consumers always get parseable output. In text mode it goes to stderr.
168 """
169 if json_out:
170 print(_json.dumps(_LsTreeErrorJson(
171 **make_envelope(elapsed, exit_code=int(code)),
172 status="error",
173 error=msg,
174 )))
175 else:
176 print(f"❌ {sanitize_display(msg)}", file=sys.stderr)
177 raise SystemExit(code)
178
179 def _synthetic_tree_id(manifest: _ManifestMap, prefix: str) -> str:
180 """Return a deterministic ``sha256:``-prefixed ID for the synthetic tree at *prefix*.
181
182 The ID is the SHA-256 of the sorted ``(path, object_id)`` pairs for all
183 manifest entries that fall under *prefix* (direct and indirect children).
184
185 Args:
186 manifest: Full flat manifest (path → object_id).
187 prefix: Directory prefix ending with ``/`` (e.g. ``"src/"``).
188
189 Returns:
190 ``sha256:``-prefixed 64-hex-char canonical object ID.
191 """
192 h = hashlib.sha256()
193 for path in sorted(manifest):
194 if path.startswith(prefix):
195 line = f"{path}\x00{manifest[path]}\n"
196 h.update(line.encode())
197 return long_id(h.hexdigest())
198
199 def _build_tree_entries(
200 manifest: dict[str, str],
201 path_prefix: str,
202 recursive: bool,
203 ) -> list[dict]:
204 """Build the list of tree entries for a given prefix and recursion mode.
205
206 Args:
207 manifest: Full flat manifest (path → object_id).
208 path_prefix: Repo-relative POSIX prefix to scope the listing, e.g.
209 ``""`` for root or ``"src/"`` for a subdirectory.
210 recursive: If True, return all blobs (no synthetic tree entries).
211 If False, return immediate children only — blobs for files
212 in this directory level, synthetic tree entries for
213 subdirectories.
214
215 Returns:
216 Sorted list of entry dicts with keys:
217 ``mode``, ``type``, ``object_id``, ``size`` (None for trees), ``path``.
218 """
219 if recursive:
220 # Return every blob whose path starts with the prefix.
221 entries = []
222 for path, oid in sorted(manifest.items()):
223 if path.startswith(path_prefix):
224 entries.append({
225 "mode": _BLOB_MODE,
226 "type": "blob",
227 "object_id": oid,
228 "size": None,
229 "path": path,
230 })
231 return entries
232
233 # Non-recursive: collect immediate children at this directory level.
234 seen_dirs: set[str] = set()
235 entries: list[dict] = []
236
237 for path, oid in sorted(manifest.items()):
238 if not path.startswith(path_prefix):
239 continue
240 rel = path[len(path_prefix):] # path relative to the prefix
241 slash = rel.find("/")
242 if slash == -1:
243 # Direct blob child.
244 entries.append({
245 "mode": _BLOB_MODE,
246 "type": "blob",
247 "object_id": oid,
248 "size": None,
249 "path": path,
250 })
251 else:
252 # The path passes through a subdirectory — emit a synthetic tree.
253 dir_name = rel[:slash + 1] # e.g. "src/"
254 dir_full = path_prefix + dir_name # e.g. "src/" or "pkg/sub/"
255 if dir_full not in seen_dirs:
256 seen_dirs.add(dir_full)
257 entries.append({
258 "mode": _TREE_MODE,
259 "type": "tree",
260 "object_id": _synthetic_tree_id(manifest, dir_full),
261 "size": None,
262 "path": dir_full,
263 })
264
265 return sorted(entries, key=lambda e: e["path"])
266
267 def _resolve_manifest(
268 root: pathlib.Path,
269 treeish: str,
270 json_out: bool,
271 elapsed: float,
272 ) -> tuple[str, dict[str, str]]:
273 """Resolve *treeish* to a ``(commit_id, manifest)`` pair.
274
275 Resolution order:
276 1. ``"HEAD"`` — current branch tip.
277 2. Branch name — ``.muse/refs/heads/<treeish>``.
278 3. Full or abbreviated commit ID — prefix scan of commits dir.
279
280 Args:
281 root: Absolute repo root.
282 treeish: Branch name, commit ID, or ``"HEAD"``.
283 json_out: When True, errors go to stdout as JSON.
284
285 Returns:
286 ``(commit_id, manifest)`` tuple.
287
288 Raises:
289 SystemExit(USER_ERROR): ref not found or repo is empty.
290 """
291 try:
292 branch = read_current_branch(root)
293 commit = None
294
295 if treeish.upper() == "HEAD":
296 commit_id = get_head_commit_id(root, branch)
297 if not commit_id:
298 _emit_error(json_out, "Repository has no commits yet.", ExitCode.USER_ERROR, elapsed)
299 commit = read_commit(root, commit_id)
300 else:
301 # Try as a branch name first (direct ref file lookup).
302 branch_ref = _ref_path(root, treeish)
303 commit_id = read_ref(branch_ref)
304 if commit_id is not None:
305 commit = read_commit(root, commit_id)
306 else:
307 # Fall back to commit-ID prefix scan.
308 commit = resolve_commit_ref(root, branch, treeish)
309
310 if commit is None:
311 _emit_error(
312 json_out,
313 f"'{sanitize_display(treeish)}' is not a known branch or commit ID.",
314 ExitCode.USER_ERROR,
315 elapsed,
316 )
317
318 commit_id = commit.commit_id
319 snap = read_snapshot(root, commit.snapshot_id)
320 manifest = dict(snap.manifest) if snap else {}
321 return commit_id, manifest
322 except SystemExit:
323 raise
324 except Exception as exc:
325 _emit_error(
326 json_out,
327 f"Failed to resolve '{sanitize_display(treeish)}': {exc}",
328 ExitCode.USER_ERROR,
329 elapsed,
330 )
331
332 def _validate_path_prefix(root: pathlib.Path, raw: str, json_out: bool, elapsed: float) -> str:
333 """Validate and normalise a user-supplied path prefix.
334
335 Rejects path-traversal attempts (``..`` components, absolute paths that
336 escape the repo root).
337
338 Args:
339 root: Absolute repo root.
340 raw: Raw path string as given by the user.
341 json_out: When True, errors go to stdout as JSON.
342
343 Returns:
344 Normalised repo-relative POSIX path with trailing ``/`` if it looks
345 like a directory prefix, or as-is for explicit file paths.
346
347 Raises:
348 SystemExit(USER_ERROR): path traversal detected.
349 """
350 # Reject paths that try to escape the repo.
351 try:
352 candidate = (root / raw).resolve()
353 candidate.relative_to(root.resolve())
354 except ValueError:
355 _emit_error(
356 json_out,
357 f"Path '{sanitize_display(raw)}' is outside the repository root.",
358 ExitCode.USER_ERROR,
359 elapsed,
360 )
361
362 # Build the normalised relative POSIX path.
363 try:
364 rel = candidate.relative_to(root.resolve()).as_posix()
365 except ValueError:
366 _emit_error(
367 json_out,
368 f"Path '{sanitize_display(raw)}' is outside the repository root.",
369 ExitCode.USER_ERROR,
370 elapsed,
371 )
372
373 if rel == ".":
374 return ""
375
376 # Preserve trailing slash for directory-prefix semantics.
377 if raw.endswith("/"):
378 return f"{rel}/"
379 return rel
380
381 # ---------------------------------------------------------------------------
382 # Registration
383 # ---------------------------------------------------------------------------
384
385 def register(
386 subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",
387 ) -> None:
388 """Register the ``muse ls-tree`` subcommand."""
389 parser = subparsers.add_parser(
390 "ls-tree",
391 help="List the contents of a snapshot at a given ref.",
392 description=__doc__,
393 formatter_class=argparse.RawDescriptionHelpFormatter,
394 )
395 parser.add_argument(
396 "-r", "--recursive",
397 action="store_true",
398 dest="recursive",
399 help="Recurse into subtrees, listing all blobs.",
400 )
401 parser.add_argument(
402 "--dirs-only",
403 action="store_true",
404 dest="dirs_only",
405 help="Show only tree (directory) entries, not blobs.",
406 )
407 parser.add_argument(
408 "-l", "--long",
409 action="store_true",
410 dest="long",
411 help="Include object size in the listing.",
412 )
413 parser.add_argument(
414 "--name-only",
415 action="store_true",
416 dest="name_only",
417 help="Show only path names, omitting mode/type/object_id.",
418 )
419 parser.add_argument(
420 "--json", "-j",
421 action="store_true",
422 dest="json_out",
423 help="Emit machine-readable JSON on stdout.",
424 )
425 parser.add_argument(
426 "treeish",
427 metavar="TREEISH",
428 nargs="?",
429 default="HEAD",
430 help="Branch name or commit ID to inspect (default: HEAD).",
431 )
432 parser.add_argument(
433 "path",
434 metavar="PATH",
435 nargs="?",
436 default=None,
437 help="Optional path prefix to scope the listing.",
438 )
439 parser.set_defaults(func=run)
440
441 # ---------------------------------------------------------------------------
442 # Run
443 # ---------------------------------------------------------------------------
444
445 def run(args: argparse.Namespace) -> None:
446 """List snapshot contents for a given ref.
447
448 Resolves *treeish* (branch name, commit ID, or ``HEAD``) to a snapshot
449 manifest and emits tree entries. Non-recursive mode synthesizes directory
450 entries from shared path prefixes; recursive mode emits raw blobs only.
451
452 Agent quickstart
453 ----------------
454 ::
455
456 muse ls-tree --json
457 muse ls-tree HEAD src/ --json
458 muse ls-tree -r HEAD --json
459 muse ls-tree feat/billing --json
460
461 JSON fields
462 -----------
463 status ``"ok"`` on success.
464 treeish The ref that was resolved.
465 commit_id Commit ID of the resolved snapshot.
466 path_prefix Scoping prefix applied, or ``null``.
467 recursive ``true`` when ``-r`` was passed.
468 entry_count Number of entries returned.
469 entries List of entry objects: ``mode``, ``type``, ``object_id``,
470 ``size``, ``path`` (``size`` is ``null`` for tree entries
471 unless ``--long`` was passed).
472
473 Exit codes
474 ----------
475 0 Success.
476 1 Bad ref, path traversal, ANSI in ref, or empty repository.
477 2 Not inside a Muse repository.
478 3 I/O error.
479 """
480 elapsed = start_timer()
481
482 treeish: str = args.treeish or "HEAD"
483 raw_path: str | None = args.path
484 recursive: bool = args.recursive
485 dirs_only: bool = args.dirs_only
486 long_fmt: bool = args.long
487 name_only: bool = args.name_only
488 json_out: bool = args.json_out
489
490 root = require_repo()
491
492 # ── Validate ref — reject ANSI and other control characters ──────────────
493 if any(ord(c) < 32 for c in treeish):
494 _emit_error(
495 json_out,
496 f"Invalid ref '{sanitize_display(treeish)}': control characters not allowed.",
497 ExitCode.USER_ERROR,
498 elapsed,
499 )
500
501 # ── Resolve the ref to a manifest ────────────────────────────────────────
502 commit_id, manifest = _resolve_manifest(root, treeish, json_out, elapsed)
503
504 # ── Validate and normalise path prefix ───────────────────────────────────
505 path_prefix = ""
506 path_prefix_out: str | None = None # what we echo in the envelope
507 if raw_path is not None:
508 path_prefix = _validate_path_prefix(root, raw_path, json_out, elapsed)
509 # Ensure directory prefixes end with /
510 if path_prefix and not path_prefix.endswith("/"):
511 path_prefix += "/"
512 path_prefix_out = path_prefix or None
513
514 # ── Build entries ─────────────────────────────────────────────────────────
515 entries = _build_tree_entries(manifest, path_prefix, recursive)
516
517 # Apply --dirs-only filter.
518 if dirs_only:
519 entries = [e for e in entries if e["type"] == "tree"]
520
521 # ── Populate sizes when --long is requested ───────────────────────────────
522 if long_fmt:
523 for entry in entries:
524 if entry["type"] == "blob":
525 data = read_object(root, entry["object_id"])
526 entry["size"] = len(data) if data is not None else None
527
528 # ── Output ───────────────────────────────────────────────────────────────
529 if json_out:
530 if name_only:
531 out_entries = [{"path": e["path"]} for e in entries]
532 else:
533 out_entries = []
534 for e in entries:
535 out_entries.append({
536 "mode": e["mode"],
537 "type": e["type"],
538 "object_id": e["object_id"],
539 "size": e["size"],
540 "path": e["path"],
541 })
542 print(_json.dumps(_LsTreeJson(
543 **make_envelope(elapsed),
544 status="ok",
545 error="",
546 treeish=treeish,
547 commit_id=commit_id,
548 path_prefix=path_prefix_out,
549 recursive=recursive,
550 entry_count=len(out_entries),
551 entries=out_entries,
552 )))
553 else:
554 for e in entries:
555 if name_only:
556 print(e["path"])
557 elif long_fmt:
558 size_str = str(e["size"]) if e["size"] is not None else "-"
559 print(f"{e['mode']} {e['type']} {e['object_id']} {size_str}\t{e['path']}")
560 else:
561 print(f"{e['mode']} {e['type']} {e['object_id']}\t{e['path']}")
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago