gabriel / muse public
cat_object.py python
312 lines 10.0 KB
Raw
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago
1 """muse cat-object — read a stored object from the object store.
2
3 Reads the raw bytes of a content-addressed object and writes them to stdout.
4 Useful for inspecting stored blobs, verifying round-trips, or piping raw
5 content to other tools.
6
7 Single-object mode (default)
8 -----------------------------
9
10 With ``--format raw`` (default): bytes streamed directly to stdout at 64 KiB
11 at a time — no heap spike, no size ceiling.
12
13 With ``--format info`` / ``--json``: JSON metadata about the object (no
14 content emitted).
15
16 {"object_id": "sha256:<hex>", "present": true, "size_bytes": 1234,
17 "duration_ms": 0.001}
18
19 Add ``--inline`` to embed the full content in the JSON response as
20 base64-encoded ``content_b64`` — saves agents a second round-trip for small
21 objects::
22
23 {"object_id": "sha256:<hex>", "present": true, "size_bytes": 5,
24 "content_b64": "aGVsbG8=", "duration_ms": 0.001}
25
26 Object IDs
27 ----------
28
29 All object IDs use the canonical ``sha256:<64 lowercase hex chars>`` form.
30 This matches the format produced by every other muse command (``muse log``,
31 ``muse read``, ``muse status``, etc.). Bare hex (without the ``sha256:``
32 prefix) is rejected.
33
34 Batch mode
35 ----------
36
37 ``--batch`` reads object IDs from stdin (one per line) and for each emits the
38 batch-protocol header followed by the raw content::
39
40 <oid> blob <size>\\n
41 <raw-content-bytes>\\n
42
43 For missing or invalid OIDs the output is::
44
45 <oid> missing\\n
46
47 ``--batch-check`` is the header-only variant — same protocol but no content
48 bytes are emitted. Useful for bulk presence checks without reading blobs.
49
50 Batch mode is intended for agent pipelines and migration tools that need to
51 stream many objects efficiently from a single long-running process. The OIDs
52 in stdin may be ``sha256:``-prefixed (as emitted by muse commands) or bare hex
53 — invalid forms are reported as ``missing``.
54
55 Output contract
56 ---------------
57
58 - Exit 0: found — bytes written to stdout or metadata printed.
59 - Exit 1: not found in the store, or invalid object-id format.
60 - Exit 3: I/O error reading from the store.
61 - Batch mode always exits 0 (missing objects are reported inline, not as errors).
62
63 Agent use
64 ---------
65
66 Prefer ``--json`` over ``--format raw`` when only metadata is needed::
67
68 muse cat-object --json sha256:<oid>
69
70 For metadata + content in one call (small objects only)::
71
72 muse cat-object --json --inline sha256:<oid>
73
74 For bulk presence checks::
75
76 muse log --json | python3 -c "import sys,json; [print(c['commit_id']) for c in json.load(sys.stdin)['commits']]" \\
77 | muse cat-object --batch-check
78
79 For bulk reads::
80
81 printf '%s\\n' sha256:<oid1> sha256:<oid2> | muse cat-object --batch
82 """
83
84 import argparse
85 import base64
86 import hashlib
87 import json
88 import logging
89 import pathlib
90 import sys
91 import time
92 from typing import TypedDict
93
94 from muse.core.types import long_id
95 from muse.core.errors import ExitCode
96 from muse.core.object_store import has_object, object_path, read_object
97 from muse.core.repo import require_repo
98 from muse.core.validation import sanitize_display, validate_object_id
99 from muse.core.envelope import EnvelopeJson, make_envelope
100 from muse.core.timing import start_timer
101
102 logger = logging.getLogger(__name__)
103
104 _FORMAT_CHOICES = ("raw", "info")
105
106 class _CatObjectInfoJson(EnvelopeJson, total=False):
107 """JSON output for ``muse cat-object --json`` (info format)."""
108
109 object_id: str
110 present: bool
111 size_bytes: int
112 content_b64: str # only present with --inline
113 _CHUNK = 65536
114
115 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
116 """Register the cat-object subcommand."""
117 parser = subparsers.add_parser(
118 "cat-object",
119 help="Emit raw bytes of a stored object to stdout.",
120 description=__doc__,
121 formatter_class=argparse.RawDescriptionHelpFormatter,
122 )
123 parser.add_argument(
124 "object_id",
125 nargs="?",
126 default=None,
127 help=(
128 "Object ID to read in sha256:<hex> form. "
129 "Required in single-object mode; omit when using --batch or --batch-check."
130 ),
131 )
132
133 batch_group = parser.add_mutually_exclusive_group()
134 batch_group.add_argument(
135 "--batch",
136 action="store_true",
137 dest="batch",
138 help=(
139 "Batch mode: read object IDs from stdin (one per line) and emit "
140 "'<oid> blob <size>\\n<content>\\n' for each. "
141 "Missing or invalid OIDs emit '<oid> missing\\n'."
142 ),
143 )
144 batch_group.add_argument(
145 "--batch-check",
146 action="store_true",
147 dest="batch_check",
148 help=(
149 "Batch-check mode: like --batch but emits only the header line "
150 "'<oid> blob <size>\\n' — no content bytes. "
151 "Efficient for bulk presence checks."
152 ),
153 )
154
155 parser.add_argument(
156 "--json", "-j",
157 action="store_true",
158 dest="json_out",
159 help="Emit JSON metadata instead of raw bytes.",
160 )
161 parser.add_argument(
162 "--inline",
163 action="store_true",
164 default=False,
165 help=(
166 "When used with --json, embed the full object content as base64 in "
167 "the 'content_b64' field. Saves a second round-trip for small objects. "
168 "Requires --json."
169 ),
170 )
171 parser.set_defaults(func=run)
172
173 def _run_batch(root: "pathlib.Path", check_only: bool) -> None:
174 """Process OIDs from stdin in the git cat-file --batch protocol."""
175 out = sys.stdout.buffer
176
177 for raw_line in sys.stdin:
178 oid = raw_line.strip()
179 if not oid:
180 continue
181
182 # Validate format — invalid OIDs are reported as missing, not errors.
183 try:
184 validate_object_id(oid)
185 except ValueError:
186 out.write(f"{oid} missing\n".encode())
187 out.flush()
188 continue
189
190 if not has_object(root, oid):
191 out.write(f"{oid} missing\n".encode())
192 out.flush()
193 continue
194
195 content = read_object(root, oid)
196 if content is None:
197 out.write(f"{oid} missing\n".encode())
198 out.flush()
199 continue
200 size = len(content)
201 out.write(f"{oid} blob {size}\n".encode())
202
203 if not check_only:
204 out.write(content)
205 out.write(b"\n")
206
207 out.flush()
208
209 def run(args: argparse.Namespace) -> None:
210 """Read a raw object from the content-addressed object store by its ID.
211
212 In single-object mode with ``--format raw`` (default) raw bytes are streamed
213 to stdout in 64 KiB chunks — suitable for piping with no heap spike. With
214 ``--format info`` (or ``--json``) a metadata summary is printed instead.
215 In batch mode (``--batch`` or ``--batch-check``) object IDs are read from
216 stdin one per line, amortizing subprocess overhead for bulk lookups.
217
218 Agent quickstart
219 ----------------
220 ::
221
222 muse cat-object sha256:<64hex> --json
223 muse cat-object sha256:<64hex> --json --inline
224 echo "sha256:<64hex>" | muse cat-object --batch-check --json
225
226 JSON fields
227 -----------
228 object_id The full ``sha256:<64hex>`` object ID.
229 present ``true`` if the object exists in the store.
230 size_bytes Size of the stored object in bytes.
231 content_b64 Base64-encoded raw bytes (only with ``--inline``).
232
233 Exit codes
234 ----------
235 0 Object found and emitted.
236 1 Object not found, invalid object ID, or invalid arguments.
237 2 Not inside a Muse repository.
238 3 I/O error or SHA-256 integrity check failure.
239 """
240 elapsed = start_timer()
241
242 batch: bool = args.batch
243 batch_check: bool = args.batch_check
244 json_out: bool = args.json_out
245 inline: bool = getattr(args, "inline", False)
246 object_id: str | None = args.object_id
247
248 # ── Batch mode ────────────────────────────────────────────────────────────
249 if batch or batch_check:
250 root = require_repo()
251 _run_batch(root, check_only=batch_check)
252 return
253
254 # ── --inline requires --json ───────────────────────────────────────────────
255 if inline and not json_out:
256 print(
257 "❌ --inline requires --json.",
258 file=sys.stderr,
259 )
260 raise SystemExit(ExitCode.USER_ERROR)
261
262 # ── Single-object mode ────────────────────────────────────────────────────
263 if object_id is None:
264 print(
265 "❌ object_id is required in single-object mode "
266 "(or use --batch / --batch-check for stdin processing).",
267 file=sys.stderr,
268 )
269 raise SystemExit(ExitCode.USER_ERROR)
270
271 try:
272 validate_object_id(object_id)
273 except ValueError as exc:
274 print(f"❌ Invalid object ID: {sanitize_display(str(exc))}", file=sys.stderr)
275 raise SystemExit(ExitCode.USER_ERROR)
276
277 root = require_repo()
278
279 try:
280 content = read_object(root, object_id)
281 except OSError as exc:
282 print(
283 f"❌ Failed to read object: {sanitize_display(str(exc))}",
284 file=sys.stderr,
285 )
286 raise SystemExit(ExitCode.INTERNAL_ERROR)
287
288 if content is None:
289 if json_out:
290 print(json.dumps(_CatObjectInfoJson(
291 **make_envelope(elapsed, exit_code=ExitCode.USER_ERROR),
292 object_id=object_id,
293 present=False,
294 size_bytes=0,
295 )))
296 else:
297 print(f"❌ Object not found: {object_id}", file=sys.stderr)
298 raise SystemExit(ExitCode.USER_ERROR)
299
300 if json_out:
301 out = _CatObjectInfoJson(
302 **make_envelope(elapsed),
303 object_id=object_id,
304 present=True,
305 size_bytes=len(content),
306 )
307 if inline:
308 out["content_b64"] = base64.b64encode(content).decode()
309 print(json.dumps(out))
310 return
311
312 sys.stdout.buffer.write(content)
File History 1 commit
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago