core_cat.py
python
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago
| 1 | """``muse cat`` — print the raw content of one or more tracked files. |
| 2 | |
| 3 | The domain-agnostic, file-level counterpart to ``muse code cat`` (symbol-level) |
| 4 | and the future ``muse midi cat`` (pattern-level). Mirrors the relationship |
| 5 | between ``muse blame`` (line-level) and ``muse code blame`` (symbol-level). |
| 6 | |
| 7 | Usage:: |
| 8 | |
| 9 | muse cat README.md |
| 10 | muse cat src/main.py --at v1.2.3 |
| 11 | muse cat src/main.py --staged |
| 12 | muse cat src/main.py --at abc123 --json |
| 13 | muse cat file1.py file2.py --json |
| 14 | |
| 15 | Symbol addresses (``file.py::Symbol``) are not accepted here — use |
| 16 | ``muse code cat`` for symbol-level reads. |
| 17 | |
| 18 | JSON output schemas:: |
| 19 | |
| 20 | Single file (--json): |
| 21 | { |
| 22 | "path": "<path>", |
| 23 | "content": "<utf-8 text, errors replaced>", |
| 24 | "size_bytes": <int>, |
| 25 | "source_ref": "working tree" | "staged" | "commit <sha8> on <branch>", |
| 26 | "duration_ms": <float> |
| 27 | } |
| 28 | |
| 29 | Multiple files (--json): |
| 30 | { |
| 31 | "files": [{"path": ..., "content": ..., "size_bytes": ...}, ...], |
| 32 | "errors": [{"path": ..., "error": ..., "error_code": ..., "hint": ...}, ...], |
| 33 | "source_ref": "<str>", |
| 34 | "duration_ms": <float> |
| 35 | } |
| 36 | |
| 37 | Exit codes:: |
| 38 | |
| 39 | 0 — all files read successfully |
| 40 | 1 — file not tracked, symlink rejected, path traversal, bad ref, bad address |
| 41 | 2 — not inside a Muse repository |
| 42 | 3 — I/O error reading from the object store or disk |
| 43 | """ |
| 44 | |
| 45 | import argparse |
| 46 | import json |
| 47 | import pathlib |
| 48 | import sys |
| 49 | |
| 50 | from muse.core.errors import ExitCode |
| 51 | from muse.core.object_store import read_object |
| 52 | from muse.core.repo import require_repo |
| 53 | from muse.core.types import Manifest |
| 54 | from muse.core.refs import read_current_branch |
| 55 | from muse.core.commits import resolve_commit_ref |
| 56 | from muse.core.snapshots import ( |
| 57 | get_commit_snapshot_manifest, |
| 58 | get_head_snapshot_manifest, |
| 59 | ) |
| 60 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 61 | from muse.core.timing import start_timer |
| 62 | from muse.core.validation import sanitize_display |
| 63 | from muse.plugins.code.stage import read_stage |
| 64 | from typing import TypedDict |
| 65 | |
| 66 | # --------------------------------------------------------------------------- |
| 67 | # TypedDicts — JSON output schema |
| 68 | # --------------------------------------------------------------------------- |
| 69 | |
| 70 | class _CatFileEntryJson(TypedDict): |
| 71 | """One file entry in the multi-file output.""" |
| 72 | |
| 73 | path: str |
| 74 | content: str |
| 75 | size_bytes: int |
| 76 | source_ref: str |
| 77 | |
| 78 | class _CatSingleJson(EnvelopeJson): |
| 79 | """JSON output for ``muse cat`` with a single file.""" |
| 80 | |
| 81 | path: str |
| 82 | content: str |
| 83 | size_bytes: int |
| 84 | source_ref: str |
| 85 | |
| 86 | class _CatErrorEntryJson(TypedDict): |
| 87 | """One error entry in the multi-file output.""" |
| 88 | |
| 89 | path: str |
| 90 | error: str |
| 91 | error_code: str |
| 92 | hint: str |
| 93 | |
| 94 | class _CatMultiJson(EnvelopeJson): |
| 95 | """JSON output for ``muse cat`` with multiple files or errors.""" |
| 96 | |
| 97 | files: list[_CatFileEntryJson] |
| 98 | errors: list[_CatErrorEntryJson] |
| 99 | source_ref: str |
| 100 | |
| 101 | # --------------------------------------------------------------------------- |
| 102 | # Internal helpers |
| 103 | # --------------------------------------------------------------------------- |
| 104 | |
| 105 | class _FileError(Exception): |
| 106 | """Raised when a file cannot be read, with a machine-parseable code.""" |
| 107 | |
| 108 | def __init__(self, message: str, code: str, hint: str = "") -> None: |
| 109 | super().__init__(message) |
| 110 | self.code = code |
| 111 | self.hint = hint |
| 112 | |
| 113 | def _get_file_bytes( |
| 114 | root: pathlib.Path, |
| 115 | file_path: str, |
| 116 | manifest: Manifest, |
| 117 | source_is_workdir: bool, |
| 118 | ) -> bytes: |
| 119 | """Return raw bytes for *file_path* from disk or the object store. |
| 120 | |
| 121 | A file is "tracked" if it appears in the HEAD snapshot manifest OR in the |
| 122 | stage index (staged-but-not-committed files). Files that exist only on |
| 123 | disk without being tracked are rejected with FILE_NOT_TRACKED — this |
| 124 | prevents silent reads of arbitrary workspace files that Muse knows nothing |
| 125 | about. |
| 126 | |
| 127 | Security |
| 128 | -------- |
| 129 | Workdir reads reject symlinks and enforce path containment to prevent |
| 130 | directory traversal attacks. |
| 131 | """ |
| 132 | if source_is_workdir: |
| 133 | disk = root / file_path |
| 134 | if disk.is_symlink(): |
| 135 | raise _FileError( |
| 136 | f"refusing to read symlink: {file_path}", |
| 137 | code="SYMLINK_REJECTED", |
| 138 | hint="dereference the symlink and commit the real file instead", |
| 139 | ) |
| 140 | try: |
| 141 | disk.resolve().relative_to(root.resolve()) |
| 142 | except ValueError: |
| 143 | raise _FileError( |
| 144 | f"path escapes repository root: {file_path}", |
| 145 | code="PATH_TRAVERSAL", |
| 146 | hint="file paths must be relative to the repository root", |
| 147 | ) |
| 148 | |
| 149 | stage = read_stage(root) |
| 150 | in_manifest = file_path in manifest |
| 151 | stage_entry = stage.get(file_path) |
| 152 | in_stage = stage_entry is not None and stage_entry["mode"] != "D" |
| 153 | |
| 154 | if not in_manifest and not in_stage: |
| 155 | raise _FileError( |
| 156 | f"file not tracked: {file_path}", |
| 157 | code="FILE_NOT_TRACKED", |
| 158 | hint="use 'muse code add <file>' to track it", |
| 159 | ) |
| 160 | |
| 161 | try: |
| 162 | return disk.read_bytes() |
| 163 | except (FileNotFoundError, OSError): |
| 164 | pass # deleted from disk — fall through to object store |
| 165 | |
| 166 | # File deleted from disk: try stage blob first, then manifest blob. |
| 167 | if in_stage and stage_entry is not None: |
| 168 | raw = read_object(root, stage_entry["object_id"]) |
| 169 | if raw is not None: |
| 170 | return raw |
| 171 | if in_manifest: |
| 172 | raw = read_object(root, manifest[file_path]) |
| 173 | if raw is not None: |
| 174 | return raw |
| 175 | raise _FileError( |
| 176 | f"blob not found in object store for: {file_path}", |
| 177 | code="BLOB_NOT_FOUND", |
| 178 | hint="the object store may be corrupted; try `muse gc` to diagnose", |
| 179 | ) |
| 180 | |
| 181 | if file_path not in manifest: |
| 182 | raise _FileError( |
| 183 | f"file not tracked: {file_path}", |
| 184 | code="FILE_NOT_TRACKED", |
| 185 | hint="use 'muse code add <file>' to track it", |
| 186 | ) |
| 187 | |
| 188 | raw = read_object(root, manifest[file_path]) |
| 189 | if raw is None: |
| 190 | raise _FileError( |
| 191 | f"blob not found in object store: {manifest[file_path]}", |
| 192 | code="BLOB_NOT_FOUND", |
| 193 | hint="the object store may be corrupted; try `muse gc` to diagnose", |
| 194 | ) |
| 195 | return raw |
| 196 | |
| 197 | # --------------------------------------------------------------------------- |
| 198 | # Registration |
| 199 | # --------------------------------------------------------------------------- |
| 200 | |
| 201 | def register( |
| 202 | subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]", |
| 203 | ) -> None: |
| 204 | """Register ``muse cat`` on *subparsers*.""" |
| 205 | parser = subparsers.add_parser( |
| 206 | "cat", |
| 207 | help="Print the raw content of one or more tracked files.", |
| 208 | description=__doc__, |
| 209 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 210 | ) |
| 211 | parser.add_argument( |
| 212 | "files", |
| 213 | nargs="*", |
| 214 | metavar="file", |
| 215 | help=( |
| 216 | "One or more tracked file paths. " |
| 217 | "Symbol addresses (file.py::Symbol) are not accepted — " |
| 218 | "use 'muse code cat' for symbol-level reads." |
| 219 | ), |
| 220 | ) |
| 221 | parser.add_argument( |
| 222 | "--at", default=None, metavar="REF", |
| 223 | help=( |
| 224 | "Commit ref (SHA prefix, branch, tag) to read from. " |
| 225 | "Defaults to the working tree (uncommitted edits visible). " |
| 226 | "Mutually exclusive with --staged." |
| 227 | ), |
| 228 | ) |
| 229 | parser.add_argument( |
| 230 | "--staged", action="store_true", default=False, |
| 231 | help=( |
| 232 | "Read the staged version of each file — the content that would be " |
| 233 | "committed if you ran 'muse commit' now. Ignores working-tree edits " |
| 234 | "made after the last 'muse code add'. Mirrors 'git show :path'." |
| 235 | ), |
| 236 | ) |
| 237 | parser.add_argument( |
| 238 | "--json", "-j", action="store_true", dest="json_out", |
| 239 | help="Emit machine-readable JSON.", |
| 240 | ) |
| 241 | parser.set_defaults(func=run) |
| 242 | |
| 243 | # --------------------------------------------------------------------------- |
| 244 | # Handler |
| 245 | # --------------------------------------------------------------------------- |
| 246 | |
| 247 | def run(args: argparse.Namespace) -> None: |
| 248 | """Print the raw content of one or more tracked files. |
| 249 | |
| 250 | Reads file bytes from the committed snapshot or the working tree (with |
| 251 | ``--at``). For multiple files emits a ``files`` array; for a single file |
| 252 | emits a flat object. Binary and non-UTF-8 files are included as-is when |
| 253 | ``--json`` is requested (content may be lossy if not valid UTF-8). |
| 254 | |
| 255 | Agent quickstart |
| 256 | ---------------- |
| 257 | :: |
| 258 | |
| 259 | muse cat src/billing.py --json |
| 260 | muse cat src/billing.py --at HEAD~5 --json |
| 261 | muse cat src/billing.py src/auth.py --json |
| 262 | |
| 263 | JSON fields (single file) |
| 264 | ------------------------- |
| 265 | file_path Workspace-relative file path. |
| 266 | content Full file content as a UTF-8 string. |
| 267 | size_bytes File size in bytes. |
| 268 | source_ref ``"working tree"`` or ``"commit <sha>"`` or ``"<branch>"``. |
| 269 | |
| 270 | JSON fields (multiple files) |
| 271 | ---------------------------- |
| 272 | files List of file entry objects (same fields as single-file mode). |
| 273 | errors List of error strings for files that could not be read. |
| 274 | source_ref Same source description for all files in the request. |
| 275 | |
| 276 | Exit codes |
| 277 | ---------- |
| 278 | 0 All files found and printed. |
| 279 | 1 Any file not found or not tracked. |
| 280 | 2 Not inside a Muse repository. |
| 281 | """ |
| 282 | elapsed = start_timer() |
| 283 | files: list[str] = args.files |
| 284 | at: str | None = args.at |
| 285 | staged: bool = getattr(args, "staged", False) |
| 286 | json_out: bool = args.json_out |
| 287 | |
| 288 | if staged and at is not None: |
| 289 | msg = "--staged and --at are mutually exclusive" |
| 290 | if json_out: |
| 291 | print(json.dumps({"error": msg, "error_code": "MUTUALLY_EXCLUSIVE"})) |
| 292 | else: |
| 293 | print(f"❌ {msg}", file=sys.stderr) |
| 294 | raise SystemExit(ExitCode.USER_ERROR) |
| 295 | |
| 296 | if not files: |
| 297 | msg = "no file given — usage: muse cat <file> [<file> ...] [--at <ref>]" |
| 298 | if json_out: |
| 299 | print(json.dumps({"error": msg})) |
| 300 | else: |
| 301 | print(f"❌ {msg}", file=sys.stderr) |
| 302 | raise SystemExit(ExitCode.USER_ERROR) |
| 303 | |
| 304 | # Reject symbol addresses up front. |
| 305 | for f in files: |
| 306 | if "::" in f: |
| 307 | msg = ( |
| 308 | f"'{sanitize_display(f)}' looks like a symbol address. " |
| 309 | f"Use 'muse code cat' for symbol-level reads." |
| 310 | ) |
| 311 | if json_out: |
| 312 | print(json.dumps({"error": msg, "error_code": "SYMBOL_ADDRESS_REJECTED"})) |
| 313 | else: |
| 314 | print(f"❌ {msg}", file=sys.stderr) |
| 315 | raise SystemExit(ExitCode.USER_ERROR) |
| 316 | |
| 317 | # Reject paths with control characters (newlines, null bytes, ANSI). |
| 318 | for f in files: |
| 319 | for ch in ("\n", "\r", "\x00"): |
| 320 | if ch in f: |
| 321 | msg = f"invalid path: control character in '{sanitize_display(f)}'" |
| 322 | if json_out: |
| 323 | print(json.dumps({"error": msg, "error_code": "INVALID_PATH"})) |
| 324 | else: |
| 325 | print(f"❌ {msg}", file=sys.stderr) |
| 326 | raise SystemExit(ExitCode.USER_ERROR) |
| 327 | if "\x1b" in f: |
| 328 | msg = f"invalid path: ANSI escape in '{sanitize_display(f)}'" |
| 329 | if json_out: |
| 330 | print(json.dumps({"error": msg, "error_code": "INVALID_PATH"})) |
| 331 | else: |
| 332 | print(f"❌ {msg}", file=sys.stderr) |
| 333 | raise SystemExit(ExitCode.USER_ERROR) |
| 334 | |
| 335 | root = require_repo() |
| 336 | branch = read_current_branch(root) |
| 337 | |
| 338 | source_is_workdir = at is None and not staged |
| 339 | manifest: Manifest |
| 340 | |
| 341 | if staged: |
| 342 | head_manifest = get_head_snapshot_manifest(root, branch) or {} |
| 343 | stage = read_stage(root) |
| 344 | staged_manifest: dict[str, str] = dict(head_manifest) |
| 345 | for _path, _entry in stage.items(): |
| 346 | if _path.startswith(".muse/"): |
| 347 | continue |
| 348 | if _entry["mode"] == "D": |
| 349 | staged_manifest.pop(_path, None) |
| 350 | else: |
| 351 | staged_manifest[_path] = _entry["object_id"] |
| 352 | manifest = staged_manifest |
| 353 | source_ref = "staged" |
| 354 | elif source_is_workdir: |
| 355 | manifest = get_head_snapshot_manifest(root, branch) or {} |
| 356 | source_ref = "working tree" |
| 357 | else: |
| 358 | resolved = resolve_commit_ref(root, branch, at) |
| 359 | if resolved is None: |
| 360 | msg = f"ref not found: {sanitize_display(at or '')}" |
| 361 | if json_out: |
| 362 | print(json.dumps({"error": msg, "error_code": "REF_NOT_FOUND"})) |
| 363 | else: |
| 364 | print(f"❌ {msg}", file=sys.stderr) |
| 365 | raise SystemExit(ExitCode.USER_ERROR) |
| 366 | manifest = get_commit_snapshot_manifest(root, resolved.commit_id) or {} |
| 367 | source_ref = f"commit {resolved.commit_id} on {branch}" |
| 368 | |
| 369 | results: list[_CatFileEntryJson] = [] |
| 370 | errors: list[_CatErrorEntryJson] = [] |
| 371 | |
| 372 | for file_path in files: |
| 373 | try: |
| 374 | raw = _get_file_bytes(root, file_path, manifest, source_is_workdir) |
| 375 | except _FileError as exc: |
| 376 | errors.append(_CatErrorEntryJson( |
| 377 | path=file_path, |
| 378 | error=sanitize_display(str(exc)), |
| 379 | error_code=exc.code, |
| 380 | hint=exc.hint, |
| 381 | )) |
| 382 | if not json_out: |
| 383 | print(f"❌ {sanitize_display(str(exc))}", file=sys.stderr) |
| 384 | continue |
| 385 | content = raw.decode("utf-8", errors="replace") |
| 386 | results.append(_CatFileEntryJson( |
| 387 | path=file_path, |
| 388 | content=content, |
| 389 | size_bytes=len(raw), |
| 390 | source_ref=source_ref, |
| 391 | )) |
| 392 | |
| 393 | has_errors = bool(errors) |
| 394 | |
| 395 | if json_out: |
| 396 | if len(files) == 1 and not has_errors: |
| 397 | print(json.dumps(_CatSingleJson(**make_envelope(elapsed), **results[0]))) |
| 398 | else: |
| 399 | print(json.dumps(_CatMultiJson( |
| 400 | **make_envelope(elapsed), |
| 401 | files=results, |
| 402 | errors=errors, |
| 403 | source_ref=source_ref, |
| 404 | ))) |
| 405 | else: |
| 406 | for entry in results: |
| 407 | if len(files) > 1: |
| 408 | print(f"# {entry['path']} ({source_ref})") |
| 409 | sys.stdout.write(str(entry["content"])) |
| 410 | if len(files) > 1: |
| 411 | print() |
| 412 | |
| 413 | raise SystemExit(0 if not has_errors else ExitCode.USER_ERROR) |
File History
1 commit
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago