cat_object.py
python
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40
docs: add | jq convention to --json section of agent-guide
Sonnet 4.6
1 day ago
| 1 | """muse cat-object — read a stored object from the object store. |
| 2 | |
| 3 | Reads the raw bytes of a content-addressed object and writes them to stdout. |
| 4 | Useful for inspecting stored blobs, verifying round-trips, or piping raw |
| 5 | content to other tools. |
| 6 | |
| 7 | Single-object mode (default) |
| 8 | ----------------------------- |
| 9 | |
| 10 | With ``--format raw`` (default): bytes streamed directly to stdout at 64 KiB |
| 11 | at a time — no heap spike, no size ceiling. |
| 12 | |
| 13 | With ``--format info`` / ``--json``: JSON metadata about the object (no |
| 14 | content emitted). |
| 15 | |
| 16 | {"object_id": "sha256:<hex>", "present": true, "size_bytes": 1234, |
| 17 | "duration_ms": 0.001} |
| 18 | |
| 19 | Add ``--inline`` to embed the full content in the JSON response as |
| 20 | base64-encoded ``content_b64`` — saves agents a second round-trip for small |
| 21 | objects:: |
| 22 | |
| 23 | {"object_id": "sha256:<hex>", "present": true, "size_bytes": 5, |
| 24 | "content_b64": "aGVsbG8=", "duration_ms": 0.001} |
| 25 | |
| 26 | Object IDs |
| 27 | ---------- |
| 28 | |
| 29 | All object IDs use the canonical ``sha256:<64 lowercase hex chars>`` form. |
| 30 | This matches the format produced by every other muse command (``muse log``, |
| 31 | ``muse read``, ``muse status``, etc.). Bare hex (without the ``sha256:`` |
| 32 | prefix) is rejected. |
| 33 | |
| 34 | Batch mode |
| 35 | ---------- |
| 36 | |
| 37 | ``--batch`` reads object IDs from stdin (one per line) and for each emits the |
| 38 | batch-protocol header followed by the raw content:: |
| 39 | |
| 40 | <oid> blob <size>\\n |
| 41 | <raw-content-bytes>\\n |
| 42 | |
| 43 | For missing or invalid OIDs the output is:: |
| 44 | |
| 45 | <oid> missing\\n |
| 46 | |
| 47 | ``--batch-check`` is the header-only variant — same protocol but no content |
| 48 | bytes are emitted. Useful for bulk presence checks without reading blobs. |
| 49 | |
| 50 | Batch mode is intended for agent pipelines and migration tools that need to |
| 51 | stream many objects efficiently from a single long-running process. The OIDs |
| 52 | in stdin may be ``sha256:``-prefixed (as emitted by muse commands) or bare hex |
| 53 | — invalid forms are reported as ``missing``. |
| 54 | |
| 55 | Output contract |
| 56 | --------------- |
| 57 | |
| 58 | - Exit 0: found — bytes written to stdout or metadata printed. |
| 59 | - Exit 1: not found in the store, or invalid object-id format. |
| 60 | - Exit 3: I/O error reading from the store. |
| 61 | - Batch mode always exits 0 (missing objects are reported inline, not as errors). |
| 62 | |
| 63 | Agent use |
| 64 | --------- |
| 65 | |
| 66 | Prefer ``--json`` over ``--format raw`` when only metadata is needed:: |
| 67 | |
| 68 | muse cat-object --json sha256:<oid> |
| 69 | |
| 70 | For metadata + content in one call (small objects only):: |
| 71 | |
| 72 | muse cat-object --json --inline sha256:<oid> |
| 73 | |
| 74 | For bulk presence checks:: |
| 75 | |
| 76 | muse log --json | python3 -c "import sys,json; [print(c['commit_id']) for c in json.load(sys.stdin)['commits']]" \\ |
| 77 | | muse cat-object --batch-check |
| 78 | |
| 79 | For bulk reads:: |
| 80 | |
| 81 | printf '%s\\n' sha256:<oid1> sha256:<oid2> | muse cat-object --batch |
| 82 | """ |
| 83 | |
| 84 | import argparse |
| 85 | import base64 |
| 86 | import hashlib |
| 87 | import json |
| 88 | import logging |
| 89 | import pathlib |
| 90 | import sys |
| 91 | import time |
| 92 | from typing import TypedDict |
| 93 | |
| 94 | from muse.core.types import long_id |
| 95 | from muse.core.errors import ExitCode |
| 96 | from muse.core.object_store import has_object, object_path, read_object |
| 97 | from muse.core.repo import require_repo |
| 98 | from muse.core.validation import sanitize_display, validate_object_id |
| 99 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 100 | from muse.core.timing import start_timer |
| 101 | |
| 102 | logger = logging.getLogger(__name__) |
| 103 | |
| 104 | _FORMAT_CHOICES = ("raw", "info") |
| 105 | |
| 106 | class _CatObjectInfoJson(EnvelopeJson, total=False): |
| 107 | """JSON output for ``muse cat-object --json`` (info format).""" |
| 108 | |
| 109 | object_id: str |
| 110 | present: bool |
| 111 | size_bytes: int |
| 112 | content_b64: str # only present with --inline |
| 113 | _CHUNK = 65536 |
| 114 | |
| 115 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 116 | """Register the cat-object subcommand.""" |
| 117 | parser = subparsers.add_parser( |
| 118 | "cat-object", |
| 119 | help="Emit raw bytes of a stored object to stdout.", |
| 120 | description=__doc__, |
| 121 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 122 | ) |
| 123 | parser.add_argument( |
| 124 | "object_id", |
| 125 | nargs="?", |
| 126 | default=None, |
| 127 | help=( |
| 128 | "Object ID to read in sha256:<hex> form. " |
| 129 | "Required in single-object mode; omit when using --batch or --batch-check." |
| 130 | ), |
| 131 | ) |
| 132 | |
| 133 | batch_group = parser.add_mutually_exclusive_group() |
| 134 | batch_group.add_argument( |
| 135 | "--batch", |
| 136 | action="store_true", |
| 137 | dest="batch", |
| 138 | help=( |
| 139 | "Batch mode: read object IDs from stdin (one per line) and emit " |
| 140 | "'<oid> blob <size>\\n<content>\\n' for each. " |
| 141 | "Missing or invalid OIDs emit '<oid> missing\\n'." |
| 142 | ), |
| 143 | ) |
| 144 | batch_group.add_argument( |
| 145 | "--batch-check", |
| 146 | action="store_true", |
| 147 | dest="batch_check", |
| 148 | help=( |
| 149 | "Batch-check mode: like --batch but emits only the header line " |
| 150 | "'<oid> blob <size>\\n' — no content bytes. " |
| 151 | "Efficient for bulk presence checks." |
| 152 | ), |
| 153 | ) |
| 154 | |
| 155 | parser.add_argument( |
| 156 | "--json", "-j", |
| 157 | action="store_true", |
| 158 | dest="json_out", |
| 159 | help="Emit JSON metadata instead of raw bytes.", |
| 160 | ) |
| 161 | parser.add_argument( |
| 162 | "--inline", |
| 163 | action="store_true", |
| 164 | default=False, |
| 165 | help=( |
| 166 | "When used with --json, embed the full object content as base64 in " |
| 167 | "the 'content_b64' field. Saves a second round-trip for small objects. " |
| 168 | "Requires --json." |
| 169 | ), |
| 170 | ) |
| 171 | parser.set_defaults(func=run) |
| 172 | |
| 173 | def _run_batch(root: "pathlib.Path", check_only: bool) -> None: |
| 174 | """Process OIDs from stdin in the git cat-file --batch protocol.""" |
| 175 | out = sys.stdout.buffer |
| 176 | |
| 177 | for raw_line in sys.stdin: |
| 178 | oid = raw_line.strip() |
| 179 | if not oid: |
| 180 | continue |
| 181 | |
| 182 | # Validate format — invalid OIDs are reported as missing, not errors. |
| 183 | try: |
| 184 | validate_object_id(oid) |
| 185 | except ValueError: |
| 186 | out.write(f"{oid} missing\n".encode()) |
| 187 | out.flush() |
| 188 | continue |
| 189 | |
| 190 | if not has_object(root, oid): |
| 191 | out.write(f"{oid} missing\n".encode()) |
| 192 | out.flush() |
| 193 | continue |
| 194 | |
| 195 | content = read_object(root, oid) |
| 196 | if content is None: |
| 197 | out.write(f"{oid} missing\n".encode()) |
| 198 | out.flush() |
| 199 | continue |
| 200 | size = len(content) |
| 201 | out.write(f"{oid} blob {size}\n".encode()) |
| 202 | |
| 203 | if not check_only: |
| 204 | out.write(content) |
| 205 | out.write(b"\n") |
| 206 | |
| 207 | out.flush() |
| 208 | |
| 209 | def run(args: argparse.Namespace) -> None: |
| 210 | """Read a raw object from the content-addressed object store by its ID. |
| 211 | |
| 212 | In single-object mode with ``--format raw`` (default) raw bytes are streamed |
| 213 | to stdout in 64 KiB chunks — suitable for piping with no heap spike. With |
| 214 | ``--format info`` (or ``--json``) a metadata summary is printed instead. |
| 215 | In batch mode (``--batch`` or ``--batch-check``) object IDs are read from |
| 216 | stdin one per line, amortizing subprocess overhead for bulk lookups. |
| 217 | |
| 218 | Agent quickstart |
| 219 | ---------------- |
| 220 | :: |
| 221 | |
| 222 | muse cat-object sha256:<64hex> --json |
| 223 | muse cat-object sha256:<64hex> --json --inline |
| 224 | echo "sha256:<64hex>" | muse cat-object --batch-check --json |
| 225 | |
| 226 | JSON fields |
| 227 | ----------- |
| 228 | object_id The full ``sha256:<64hex>`` object ID. |
| 229 | present ``true`` if the object exists in the store. |
| 230 | size_bytes Size of the stored object in bytes. |
| 231 | content_b64 Base64-encoded raw bytes (only with ``--inline``). |
| 232 | |
| 233 | Exit codes |
| 234 | ---------- |
| 235 | 0 Object found and emitted. |
| 236 | 1 Object not found, invalid object ID, or invalid arguments. |
| 237 | 2 Not inside a Muse repository. |
| 238 | 3 I/O error or SHA-256 integrity check failure. |
| 239 | """ |
| 240 | elapsed = start_timer() |
| 241 | |
| 242 | batch: bool = args.batch |
| 243 | batch_check: bool = args.batch_check |
| 244 | json_out: bool = args.json_out |
| 245 | inline: bool = getattr(args, "inline", False) |
| 246 | object_id: str | None = args.object_id |
| 247 | |
| 248 | # ── Batch mode ──────────────────────────────────────────────────────────── |
| 249 | if batch or batch_check: |
| 250 | root = require_repo() |
| 251 | _run_batch(root, check_only=batch_check) |
| 252 | return |
| 253 | |
| 254 | # ── --inline requires --json ─────────────────────────────────────────────── |
| 255 | if inline and not json_out: |
| 256 | print( |
| 257 | "❌ --inline requires --json.", |
| 258 | file=sys.stderr, |
| 259 | ) |
| 260 | raise SystemExit(ExitCode.USER_ERROR) |
| 261 | |
| 262 | # ── Single-object mode ──────────────────────────────────────────────────── |
| 263 | if object_id is None: |
| 264 | print( |
| 265 | "❌ object_id is required in single-object mode " |
| 266 | "(or use --batch / --batch-check for stdin processing).", |
| 267 | file=sys.stderr, |
| 268 | ) |
| 269 | raise SystemExit(ExitCode.USER_ERROR) |
| 270 | |
| 271 | try: |
| 272 | validate_object_id(object_id) |
| 273 | except ValueError as exc: |
| 274 | print(f"❌ Invalid object ID: {sanitize_display(str(exc))}", file=sys.stderr) |
| 275 | raise SystemExit(ExitCode.USER_ERROR) |
| 276 | |
| 277 | root = require_repo() |
| 278 | |
| 279 | try: |
| 280 | content = read_object(root, object_id) |
| 281 | except OSError as exc: |
| 282 | print( |
| 283 | f"❌ Failed to read object: {sanitize_display(str(exc))}", |
| 284 | file=sys.stderr, |
| 285 | ) |
| 286 | raise SystemExit(ExitCode.INTERNAL_ERROR) |
| 287 | |
| 288 | if content is None: |
| 289 | if json_out: |
| 290 | print(json.dumps(_CatObjectInfoJson( |
| 291 | **make_envelope(elapsed, exit_code=ExitCode.USER_ERROR), |
| 292 | object_id=object_id, |
| 293 | present=False, |
| 294 | size_bytes=0, |
| 295 | ))) |
| 296 | else: |
| 297 | print(f"❌ Object not found: {object_id}", file=sys.stderr) |
| 298 | raise SystemExit(ExitCode.USER_ERROR) |
| 299 | |
| 300 | if json_out: |
| 301 | out = _CatObjectInfoJson( |
| 302 | **make_envelope(elapsed), |
| 303 | object_id=object_id, |
| 304 | present=True, |
| 305 | size_bytes=len(content), |
| 306 | ) |
| 307 | if inline: |
| 308 | out["content_b64"] = base64.b64encode(content).decode() |
| 309 | print(json.dumps(out)) |
| 310 | return |
| 311 | |
| 312 | sys.stdout.buffer.write(content) |
File History
1 commit
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40
docs: add | jq convention to --json section of agent-guide
Sonnet 4.6
1 day ago