gabriel / muse public
core_cat.py python
413 lines 14.1 KB
Raw
1 """``muse cat`` — print the raw content of one or more tracked files.
2
3 The domain-agnostic, file-level counterpart to ``muse code cat`` (symbol-level)
4 and the future ``muse midi cat`` (pattern-level). Mirrors the relationship
5 between ``muse blame`` (line-level) and ``muse code blame`` (symbol-level).
6
7 Usage::
8
9 muse cat README.md
10 muse cat src/main.py --at v1.2.3
11 muse cat src/main.py --staged
12 muse cat src/main.py --at abc123 --json
13 muse cat file1.py file2.py --json
14
15 Symbol addresses (``file.py::Symbol``) are not accepted here — use
16 ``muse code cat`` for symbol-level reads.
17
18 JSON output schemas::
19
20 Single file (--json):
21 {
22 "path": "<path>",
23 "content": "<utf-8 text, errors replaced>",
24 "size_bytes": <int>,
25 "source_ref": "working tree" | "staged" | "commit <sha8> on <branch>",
26 "duration_ms": <float>
27 }
28
29 Multiple files (--json):
30 {
31 "files": [{"path": ..., "content": ..., "size_bytes": ...}, ...],
32 "errors": [{"path": ..., "error": ..., "error_code": ..., "hint": ...}, ...],
33 "source_ref": "<str>",
34 "duration_ms": <float>
35 }
36
37 Exit codes::
38
39 0 — all files read successfully
40 1 — file not tracked, symlink rejected, path traversal, bad ref, bad address
41 2 — not inside a Muse repository
42 3 — I/O error reading from the object store or disk
43 """
44
45 import argparse
46 import json
47 import pathlib
48 import sys
49
50 from muse.core.errors import ExitCode
51 from muse.core.object_store import read_object
52 from muse.core.repo import require_repo
53 from muse.core.types import Manifest
54 from muse.core.refs import read_current_branch
55 from muse.core.commits import resolve_commit_ref
56 from muse.core.snapshots import (
57 get_commit_snapshot_manifest,
58 get_head_snapshot_manifest,
59 )
60 from muse.core.envelope import EnvelopeJson, make_envelope
61 from muse.core.timing import start_timer
62 from muse.core.validation import sanitize_display
63 from muse.plugins.code.stage import read_stage
64 from typing import TypedDict
65
66 # ---------------------------------------------------------------------------
67 # TypedDicts — JSON output schema
68 # ---------------------------------------------------------------------------
69
70 class _CatFileEntryJson(TypedDict):
71 """One file entry in the multi-file output."""
72
73 path: str
74 content: str
75 size_bytes: int
76 source_ref: str
77
78 class _CatSingleJson(EnvelopeJson):
79 """JSON output for ``muse cat`` with a single file."""
80
81 path: str
82 content: str
83 size_bytes: int
84 source_ref: str
85
86 class _CatErrorEntryJson(TypedDict):
87 """One error entry in the multi-file output."""
88
89 path: str
90 error: str
91 error_code: str
92 hint: str
93
94 class _CatMultiJson(EnvelopeJson):
95 """JSON output for ``muse cat`` with multiple files or errors."""
96
97 files: list[_CatFileEntryJson]
98 errors: list[_CatErrorEntryJson]
99 source_ref: str
100
101 # ---------------------------------------------------------------------------
102 # Internal helpers
103 # ---------------------------------------------------------------------------
104
105 class _FileError(Exception):
106 """Raised when a file cannot be read, with a machine-parseable code."""
107
108 def __init__(self, message: str, code: str, hint: str = "") -> None:
109 super().__init__(message)
110 self.code = code
111 self.hint = hint
112
113 def _get_file_bytes(
114 root: pathlib.Path,
115 file_path: str,
116 manifest: Manifest,
117 source_is_workdir: bool,
118 ) -> bytes:
119 """Return raw bytes for *file_path* from disk or the object store.
120
121 A file is "tracked" if it appears in the HEAD snapshot manifest OR in the
122 stage index (staged-but-not-committed files). Files that exist only on
123 disk without being tracked are rejected with FILE_NOT_TRACKED — this
124 prevents silent reads of arbitrary workspace files that Muse knows nothing
125 about.
126
127 Security
128 --------
129 Workdir reads reject symlinks and enforce path containment to prevent
130 directory traversal attacks.
131 """
132 if source_is_workdir:
133 disk = root / file_path
134 if disk.is_symlink():
135 raise _FileError(
136 f"refusing to read symlink: {file_path}",
137 code="SYMLINK_REJECTED",
138 hint="dereference the symlink and commit the real file instead",
139 )
140 try:
141 disk.resolve().relative_to(root.resolve())
142 except ValueError:
143 raise _FileError(
144 f"path escapes repository root: {file_path}",
145 code="PATH_TRAVERSAL",
146 hint="file paths must be relative to the repository root",
147 )
148
149 stage = read_stage(root)
150 in_manifest = file_path in manifest
151 stage_entry = stage.get(file_path)
152 in_stage = stage_entry is not None and stage_entry["mode"] != "D"
153
154 if not in_manifest and not in_stage:
155 raise _FileError(
156 f"file not tracked: {file_path}",
157 code="FILE_NOT_TRACKED",
158 hint="use 'muse code add <file>' to track it",
159 )
160
161 try:
162 return disk.read_bytes()
163 except (FileNotFoundError, OSError):
164 pass # deleted from disk — fall through to object store
165
166 # File deleted from disk: try stage blob first, then manifest blob.
167 if in_stage and stage_entry is not None:
168 raw = read_object(root, stage_entry["object_id"])
169 if raw is not None:
170 return raw
171 if in_manifest:
172 raw = read_object(root, manifest[file_path])
173 if raw is not None:
174 return raw
175 raise _FileError(
176 f"blob not found in object store for: {file_path}",
177 code="BLOB_NOT_FOUND",
178 hint="the object store may be corrupted; try `muse gc` to diagnose",
179 )
180
181 if file_path not in manifest:
182 raise _FileError(
183 f"file not tracked: {file_path}",
184 code="FILE_NOT_TRACKED",
185 hint="use 'muse code add <file>' to track it",
186 )
187
188 raw = read_object(root, manifest[file_path])
189 if raw is None:
190 raise _FileError(
191 f"blob not found in object store: {manifest[file_path]}",
192 code="BLOB_NOT_FOUND",
193 hint="the object store may be corrupted; try `muse gc` to diagnose",
194 )
195 return raw
196
197 # ---------------------------------------------------------------------------
198 # Registration
199 # ---------------------------------------------------------------------------
200
201 def register(
202 subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",
203 ) -> None:
204 """Register ``muse cat`` on *subparsers*."""
205 parser = subparsers.add_parser(
206 "cat",
207 help="Print the raw content of one or more tracked files.",
208 description=__doc__,
209 formatter_class=argparse.RawDescriptionHelpFormatter,
210 )
211 parser.add_argument(
212 "files",
213 nargs="*",
214 metavar="file",
215 help=(
216 "One or more tracked file paths. "
217 "Symbol addresses (file.py::Symbol) are not accepted — "
218 "use 'muse code cat' for symbol-level reads."
219 ),
220 )
221 parser.add_argument(
222 "--at", default=None, metavar="REF",
223 help=(
224 "Commit ref (SHA prefix, branch, tag) to read from. "
225 "Defaults to the working tree (uncommitted edits visible). "
226 "Mutually exclusive with --staged."
227 ),
228 )
229 parser.add_argument(
230 "--staged", action="store_true", default=False,
231 help=(
232 "Read the staged version of each file — the content that would be "
233 "committed if you ran 'muse commit' now. Ignores working-tree edits "
234 "made after the last 'muse code add'. Mirrors 'git show :path'."
235 ),
236 )
237 parser.add_argument(
238 "--json", "-j", action="store_true", dest="json_out",
239 help="Emit machine-readable JSON.",
240 )
241 parser.set_defaults(func=run)
242
243 # ---------------------------------------------------------------------------
244 # Handler
245 # ---------------------------------------------------------------------------
246
247 def run(args: argparse.Namespace) -> None:
248 """Print the raw content of one or more tracked files.
249
250 Reads file bytes from the committed snapshot or the working tree (with
251 ``--at``). For multiple files emits a ``files`` array; for a single file
252 emits a flat object. Binary and non-UTF-8 files are included as-is when
253 ``--json`` is requested (content may be lossy if not valid UTF-8).
254
255 Agent quickstart
256 ----------------
257 ::
258
259 muse cat src/billing.py --json
260 muse cat src/billing.py --at HEAD~5 --json
261 muse cat src/billing.py src/auth.py --json
262
263 JSON fields (single file)
264 -------------------------
265 file_path Workspace-relative file path.
266 content Full file content as a UTF-8 string.
267 size_bytes File size in bytes.
268 source_ref ``"working tree"`` or ``"commit <sha>"`` or ``"<branch>"``.
269
270 JSON fields (multiple files)
271 ----------------------------
272 files List of file entry objects (same fields as single-file mode).
273 errors List of error strings for files that could not be read.
274 source_ref Same source description for all files in the request.
275
276 Exit codes
277 ----------
278 0 All files found and printed.
279 1 Any file not found or not tracked.
280 2 Not inside a Muse repository.
281 """
282 elapsed = start_timer()
283 files: list[str] = args.files
284 at: str | None = args.at
285 staged: bool = getattr(args, "staged", False)
286 json_out: bool = args.json_out
287
288 if staged and at is not None:
289 msg = "--staged and --at are mutually exclusive"
290 if json_out:
291 print(json.dumps({"error": msg, "error_code": "MUTUALLY_EXCLUSIVE"}))
292 else:
293 print(f"❌ {msg}", file=sys.stderr)
294 raise SystemExit(ExitCode.USER_ERROR)
295
296 if not files:
297 msg = "no file given — usage: muse cat <file> [<file> ...] [--at <ref>]"
298 if json_out:
299 print(json.dumps({"error": msg}))
300 else:
301 print(f"❌ {msg}", file=sys.stderr)
302 raise SystemExit(ExitCode.USER_ERROR)
303
304 # Reject symbol addresses up front.
305 for f in files:
306 if "::" in f:
307 msg = (
308 f"'{sanitize_display(f)}' looks like a symbol address. "
309 f"Use 'muse code cat' for symbol-level reads."
310 )
311 if json_out:
312 print(json.dumps({"error": msg, "error_code": "SYMBOL_ADDRESS_REJECTED"}))
313 else:
314 print(f"❌ {msg}", file=sys.stderr)
315 raise SystemExit(ExitCode.USER_ERROR)
316
317 # Reject paths with control characters (newlines, null bytes, ANSI).
318 for f in files:
319 for ch in ("\n", "\r", "\x00"):
320 if ch in f:
321 msg = f"invalid path: control character in '{sanitize_display(f)}'"
322 if json_out:
323 print(json.dumps({"error": msg, "error_code": "INVALID_PATH"}))
324 else:
325 print(f"❌ {msg}", file=sys.stderr)
326 raise SystemExit(ExitCode.USER_ERROR)
327 if "\x1b" in f:
328 msg = f"invalid path: ANSI escape in '{sanitize_display(f)}'"
329 if json_out:
330 print(json.dumps({"error": msg, "error_code": "INVALID_PATH"}))
331 else:
332 print(f"❌ {msg}", file=sys.stderr)
333 raise SystemExit(ExitCode.USER_ERROR)
334
335 root = require_repo()
336 branch = read_current_branch(root)
337
338 source_is_workdir = at is None and not staged
339 manifest: Manifest
340
341 if staged:
342 head_manifest = get_head_snapshot_manifest(root, branch) or {}
343 stage = read_stage(root)
344 staged_manifest: dict[str, str] = dict(head_manifest)
345 for _path, _entry in stage.items():
346 if _path.startswith(".muse/"):
347 continue
348 if _entry["mode"] == "D":
349 staged_manifest.pop(_path, None)
350 else:
351 staged_manifest[_path] = _entry["object_id"]
352 manifest = staged_manifest
353 source_ref = "staged"
354 elif source_is_workdir:
355 manifest = get_head_snapshot_manifest(root, branch) or {}
356 source_ref = "working tree"
357 else:
358 resolved = resolve_commit_ref(root, branch, at)
359 if resolved is None:
360 msg = f"ref not found: {sanitize_display(at or '')}"
361 if json_out:
362 print(json.dumps({"error": msg, "error_code": "REF_NOT_FOUND"}))
363 else:
364 print(f"❌ {msg}", file=sys.stderr)
365 raise SystemExit(ExitCode.USER_ERROR)
366 manifest = get_commit_snapshot_manifest(root, resolved.commit_id) or {}
367 source_ref = f"commit {resolved.commit_id} on {branch}"
368
369 results: list[_CatFileEntryJson] = []
370 errors: list[_CatErrorEntryJson] = []
371
372 for file_path in files:
373 try:
374 raw = _get_file_bytes(root, file_path, manifest, source_is_workdir)
375 except _FileError as exc:
376 errors.append(_CatErrorEntryJson(
377 path=file_path,
378 error=sanitize_display(str(exc)),
379 error_code=exc.code,
380 hint=exc.hint,
381 ))
382 if not json_out:
383 print(f"❌ {sanitize_display(str(exc))}", file=sys.stderr)
384 continue
385 content = raw.decode("utf-8", errors="replace")
386 results.append(_CatFileEntryJson(
387 path=file_path,
388 content=content,
389 size_bytes=len(raw),
390 source_ref=source_ref,
391 ))
392
393 has_errors = bool(errors)
394
395 if json_out:
396 if len(files) == 1 and not has_errors:
397 print(json.dumps(_CatSingleJson(**make_envelope(elapsed), **results[0])))
398 else:
399 print(json.dumps(_CatMultiJson(
400 **make_envelope(elapsed),
401 files=results,
402 errors=errors,
403 source_ref=source_ref,
404 )))
405 else:
406 for entry in results:
407 if len(files) > 1:
408 print(f"# {entry['path']} ({source_ref})")
409 sys.stdout.write(str(entry["content"]))
410 if len(files) > 1:
411 print()
412
413 raise SystemExit(0 if not has_errors else ExitCode.USER_ERROR)
File History 1 commit