"""``muse cat`` — print the raw content of one or more tracked files. The domain-agnostic, file-level counterpart to ``muse code cat`` (symbol-level) and the future ``muse midi cat`` (pattern-level). Mirrors the relationship between ``muse blame`` (line-level) and ``muse code blame`` (symbol-level). Usage:: muse cat README.md muse cat src/main.py --at v1.2.3 muse cat src/main.py --staged muse cat src/main.py --at abc123 --json muse cat file1.py file2.py --json Symbol addresses (``file.py::Symbol``) are not accepted here — use ``muse code cat`` for symbol-level reads. JSON output schemas:: Single file (--json): { "path": "", "content": "", "size_bytes": , "source_ref": "working tree" | "staged" | "commit on ", "duration_ms": } Multiple files (--json): { "files": [{"path": ..., "content": ..., "size_bytes": ...}, ...], "errors": [{"path": ..., "error": ..., "error_code": ..., "hint": ...}, ...], "source_ref": "", "duration_ms": } Exit codes:: 0 — all files read successfully 1 — file not tracked, symlink rejected, path traversal, bad ref, bad address 2 — not inside a Muse repository 3 — I/O error reading from the object store or disk """ import argparse import json import pathlib import sys from muse.core.errors import ExitCode from muse.core.object_store import read_object from muse.core.repo import require_repo from muse.core.types import Manifest from muse.core.refs import read_current_branch from muse.core.commits import resolve_commit_ref from muse.core.snapshots import ( get_commit_snapshot_manifest, get_head_snapshot_manifest, ) from muse.core.envelope import EnvelopeJson, make_envelope from muse.core.timing import start_timer from muse.core.validation import sanitize_display from muse.plugins.code.stage import read_stage from typing import TypedDict # --------------------------------------------------------------------------- # TypedDicts — JSON output schema # --------------------------------------------------------------------------- class _CatFileEntryJson(TypedDict): """One file entry in the multi-file output.""" path: str content: str size_bytes: int source_ref: str class _CatSingleJson(EnvelopeJson): """JSON output for ``muse cat`` with a single file.""" path: str content: str size_bytes: int source_ref: str class _CatErrorEntryJson(TypedDict): """One error entry in the multi-file output.""" path: str error: str error_code: str hint: str class _CatMultiJson(EnvelopeJson): """JSON output for ``muse cat`` with multiple files or errors.""" files: list[_CatFileEntryJson] errors: list[_CatErrorEntryJson] source_ref: str # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- class _FileError(Exception): """Raised when a file cannot be read, with a machine-parseable code.""" def __init__(self, message: str, code: str, hint: str = "") -> None: super().__init__(message) self.code = code self.hint = hint def _get_file_bytes( root: pathlib.Path, file_path: str, manifest: Manifest, source_is_workdir: bool, ) -> bytes: """Return raw bytes for *file_path* from disk or the object store. A file is "tracked" if it appears in the HEAD snapshot manifest OR in the stage index (staged-but-not-committed files). Files that exist only on disk without being tracked are rejected with FILE_NOT_TRACKED — this prevents silent reads of arbitrary workspace files that Muse knows nothing about. Security -------- Workdir reads reject symlinks and enforce path containment to prevent directory traversal attacks. """ if source_is_workdir: disk = root / file_path if disk.is_symlink(): raise _FileError( f"refusing to read symlink: {file_path}", code="SYMLINK_REJECTED", hint="dereference the symlink and commit the real file instead", ) try: disk.resolve().relative_to(root.resolve()) except ValueError: raise _FileError( f"path escapes repository root: {file_path}", code="PATH_TRAVERSAL", hint="file paths must be relative to the repository root", ) stage = read_stage(root) in_manifest = file_path in manifest stage_entry = stage.get(file_path) in_stage = stage_entry is not None and stage_entry["mode"] != "D" if not in_manifest and not in_stage: raise _FileError( f"file not tracked: {file_path}", code="FILE_NOT_TRACKED", hint="use 'muse code add ' to track it", ) try: return disk.read_bytes() except (FileNotFoundError, OSError): pass # deleted from disk — fall through to object store # File deleted from disk: try stage blob first, then manifest blob. if in_stage and stage_entry is not None: raw = read_object(root, stage_entry["object_id"]) if raw is not None: return raw if in_manifest: raw = read_object(root, manifest[file_path]) if raw is not None: return raw raise _FileError( f"blob not found in object store for: {file_path}", code="BLOB_NOT_FOUND", hint="the object store may be corrupted; try `muse gc` to diagnose", ) if file_path not in manifest: raise _FileError( f"file not tracked: {file_path}", code="FILE_NOT_TRACKED", hint="use 'muse code add ' to track it", ) raw = read_object(root, manifest[file_path]) if raw is None: raise _FileError( f"blob not found in object store: {manifest[file_path]}", code="BLOB_NOT_FOUND", hint="the object store may be corrupted; try `muse gc` to diagnose", ) return raw # --------------------------------------------------------------------------- # Registration # --------------------------------------------------------------------------- def register( subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]", ) -> None: """Register ``muse cat`` on *subparsers*.""" parser = subparsers.add_parser( "cat", help="Print the raw content of one or more tracked files.", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "files", nargs="*", metavar="file", help=( "One or more tracked file paths. " "Symbol addresses (file.py::Symbol) are not accepted — " "use 'muse code cat' for symbol-level reads." ), ) parser.add_argument( "--at", default=None, metavar="REF", help=( "Commit ref (SHA prefix, branch, tag) to read from. " "Defaults to the working tree (uncommitted edits visible). " "Mutually exclusive with --staged." ), ) parser.add_argument( "--staged", action="store_true", default=False, help=( "Read the staged version of each file — the content that would be " "committed if you ran 'muse commit' now. Ignores working-tree edits " "made after the last 'muse code add'. Mirrors 'git show :path'." ), ) parser.add_argument( "--json", "-j", action="store_true", dest="json_out", help="Emit machine-readable JSON.", ) parser.set_defaults(func=run) # --------------------------------------------------------------------------- # Handler # --------------------------------------------------------------------------- def run(args: argparse.Namespace) -> None: """Print the raw content of one or more tracked files. Reads file bytes from the committed snapshot or the working tree (with ``--at``). For multiple files emits a ``files`` array; for a single file emits a flat object. Binary and non-UTF-8 files are included as-is when ``--json`` is requested (content may be lossy if not valid UTF-8). Agent quickstart ---------------- :: muse cat src/billing.py --json muse cat src/billing.py --at HEAD~5 --json muse cat src/billing.py src/auth.py --json JSON fields (single file) ------------------------- file_path Workspace-relative file path. content Full file content as a UTF-8 string. size_bytes File size in bytes. source_ref ``"working tree"`` or ``"commit "`` or ``""``. JSON fields (multiple files) ---------------------------- files List of file entry objects (same fields as single-file mode). errors List of error strings for files that could not be read. source_ref Same source description for all files in the request. Exit codes ---------- 0 All files found and printed. 1 Any file not found or not tracked. 2 Not inside a Muse repository. """ elapsed = start_timer() files: list[str] = args.files at: str | None = args.at staged: bool = getattr(args, "staged", False) json_out: bool = args.json_out if staged and at is not None: msg = "--staged and --at are mutually exclusive" if json_out: print(json.dumps({"error": msg, "error_code": "MUTUALLY_EXCLUSIVE"})) else: print(f"❌ {msg}", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) if not files: msg = "no file given — usage: muse cat [ ...] [--at ]" if json_out: print(json.dumps({"error": msg})) else: print(f"❌ {msg}", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) # Reject symbol addresses up front. for f in files: if "::" in f: msg = ( f"'{sanitize_display(f)}' looks like a symbol address. " f"Use 'muse code cat' for symbol-level reads." ) if json_out: print(json.dumps({"error": msg, "error_code": "SYMBOL_ADDRESS_REJECTED"})) else: print(f"❌ {msg}", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) # Reject paths with control characters (newlines, null bytes, ANSI). for f in files: for ch in ("\n", "\r", "\x00"): if ch in f: msg = f"invalid path: control character in '{sanitize_display(f)}'" if json_out: print(json.dumps({"error": msg, "error_code": "INVALID_PATH"})) else: print(f"❌ {msg}", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) if "\x1b" in f: msg = f"invalid path: ANSI escape in '{sanitize_display(f)}'" if json_out: print(json.dumps({"error": msg, "error_code": "INVALID_PATH"})) else: print(f"❌ {msg}", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) root = require_repo() branch = read_current_branch(root) source_is_workdir = at is None and not staged manifest: Manifest if staged: head_manifest = get_head_snapshot_manifest(root, branch) or {} stage = read_stage(root) staged_manifest: dict[str, str] = dict(head_manifest) for _path, _entry in stage.items(): if _path.startswith(".muse/"): continue if _entry["mode"] == "D": staged_manifest.pop(_path, None) else: staged_manifest[_path] = _entry["object_id"] manifest = staged_manifest source_ref = "staged" elif source_is_workdir: manifest = get_head_snapshot_manifest(root, branch) or {} source_ref = "working tree" else: resolved = resolve_commit_ref(root, branch, at) if resolved is None: msg = f"ref not found: {sanitize_display(at or '')}" if json_out: print(json.dumps({"error": msg, "error_code": "REF_NOT_FOUND"})) else: print(f"❌ {msg}", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) manifest = get_commit_snapshot_manifest(root, resolved.commit_id) or {} source_ref = f"commit {resolved.commit_id} on {branch}" results: list[_CatFileEntryJson] = [] errors: list[_CatErrorEntryJson] = [] for file_path in files: try: raw = _get_file_bytes(root, file_path, manifest, source_is_workdir) except _FileError as exc: errors.append(_CatErrorEntryJson( path=file_path, error=sanitize_display(str(exc)), error_code=exc.code, hint=exc.hint, )) if not json_out: print(f"❌ {sanitize_display(str(exc))}", file=sys.stderr) continue content = raw.decode("utf-8", errors="replace") results.append(_CatFileEntryJson( path=file_path, content=content, size_bytes=len(raw), source_ref=source_ref, )) has_errors = bool(errors) if json_out: if len(files) == 1 and not has_errors: print(json.dumps(_CatSingleJson(**make_envelope(elapsed), **results[0]))) else: print(json.dumps(_CatMultiJson( **make_envelope(elapsed), files=results, errors=errors, source_ref=source_ref, ))) else: for entry in results: if len(files) > 1: print(f"# {entry['path']} ({source_ref})") sys.stdout.write(str(entry["content"])) if len(files) > 1: print() raise SystemExit(0 if not has_errors else ExitCode.USER_ERROR)