"""Comprehensive tests for ``muse cat-object``. Coverage tiers -------------- - Unit: _CHUNK constant, _FORMAT_CHOICES - Integration: raw/info formats, --json alias, missing/invalid object_id, duration_ms in JSON output, --inline base64 content embedding - Batch: --batch happy path, missing OIDs, mixed, binary, --batch-check, sha256:-prefixed OIDs from stdin, invalid OIDs handled as missing, empty lines skipped, large objects - Security: ANSI in object_id error, path traversal object_id - Stress: 10 MiB object streaming, 200 sequential reads """ from __future__ import annotations import json import pathlib from muse.core.types import blob_id, fake_id, long_id from muse.core.errors import ExitCode from muse.core.object_store import write_object from muse.core.paths import muse_dir from tests.cli_test_helper import CliRunner, InvokeResult runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: """Minimal .muse/ structure.""" repo = tmp_path / "repo" dot_muse = muse_dir(repo) for sub in ("objects", "commits", "snapshots", "refs/heads"): (dot_muse / sub).mkdir(parents=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main") (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test", "domain": "code"})) return repo def _store(repo: pathlib.Path, content: bytes) -> str: """Write content to the object store and return its canonical object_id (sha256: prefix).""" oid = blob_id(content) write_object(repo, oid, content) return oid def _cat(repo: pathlib.Path, *args: str, stdin: str | bytes | None = None) -> InvokeResult: from muse.cli.app import main as cli return runner.invoke( cli, ["cat-object", *args], env={"MUSE_REPO_ROOT": str(repo)}, input=stdin, ) # --------------------------------------------------------------------------- # Unit — module constants # --------------------------------------------------------------------------- class TestConstants: def test_chunk_size_is_64kib(self) -> None: from muse.cli.commands.cat_object import _CHUNK assert _CHUNK == 65536 def test_format_choices_correct(self) -> None: from muse.cli.commands.cat_object import _FORMAT_CHOICES assert "raw" in _FORMAT_CHOICES assert "info" in _FORMAT_CHOICES assert "json" not in _FORMAT_CHOICES # --------------------------------------------------------------------------- # Integration — raw format (single-object mode) # --------------------------------------------------------------------------- class TestRawFormat: def test_raw_bytes_match_stored_content(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"hello object store" oid = _store(repo, content) result = _cat(repo, oid) assert result.exit_code == 0 assert result.stdout_bytes == content def test_raw_is_default_format(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"default format" oid = _store(repo, content) result = _cat(repo, oid) assert result.exit_code == 0 assert result.stdout_bytes == content def test_raw_binary_content_preserved(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = bytes(range(256)) oid = _store(repo, content) result = _cat(repo, oid) assert result.exit_code == 0 assert result.stdout_bytes == content def test_raw_empty_object(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"" oid = _store(repo, content) result = _cat(repo, oid) assert result.exit_code == 0 assert result.stdout_bytes == content def test_explicit_format_raw(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"explicit raw" oid = _store(repo, content) result = _cat(repo, oid) assert result.exit_code == 0 assert result.stdout_bytes == content # --------------------------------------------------------------------------- # Integration — info / --json format (single-object mode) # --------------------------------------------------------------------------- class TestInfoFormat: def test_info_format_shape(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"info content" oid = _store(repo, content) result = _cat(repo, "--json", oid) assert result.exit_code == 0 data = json.loads(result.output) assert data["object_id"] == oid assert data["present"] is True assert data["size_bytes"] == len(content) def test_json_flag_is_alias_for_info(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"json alias test" oid = _store(repo, content) result = _cat(repo, "--json", oid) assert result.exit_code == 0, f"--json failed: {result.output}" data = json.loads(result.output) assert data["object_id"] == oid assert data["present"] is True assert data["size_bytes"] == len(content) def test_info_does_not_emit_content(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"secret bytes" oid = _store(repo, content) result = _cat(repo, "--json", oid) assert result.exit_code == 0 data = json.loads(result.output) assert "object_id" in data assert content not in result.output.encode() def test_info_size_matches_actual_file(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"size check " * 100 oid = _store(repo, content) result = _cat(repo, "--json", oid) data = json.loads(result.output) assert data["size_bytes"] == len(content) def test_missing_object_info_has_present_false(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = fake_id("missing-info-a") result = _cat(repo, "--json", oid) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) assert data["present"] is False assert data["size_bytes"] == 0 def test_json_flag_missing_object_has_present_false(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = fake_id("missing-json-b") result = _cat(repo, "--json", oid) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) assert data["present"] is False def test_json_output_has_duration_ms(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"elapsed timing test" oid = _store(repo, content) result = _cat(repo, "--json", oid) assert result.exit_code == 0 data = json.loads(result.output) assert "duration_ms" in data assert isinstance(data["duration_ms"], float) assert data["duration_ms"] >= 0.0 def test_json_duration_ms_present_for_missing_object(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = fake_id("missing-duration-c") result = _cat(repo, "--json", oid) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) assert "duration_ms" in data # --------------------------------------------------------------------------- # Integration — error paths (single-object mode) # --------------------------------------------------------------------------- class TestErrorPaths: def test_missing_object_raw_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, fake_id("missing-raw-c")) assert result.exit_code == ExitCode.USER_ERROR def test_invalid_object_id_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None: """Bare hex without sha256: prefix is rejected — use sha256: form.""" repo = _make_repo(tmp_path) result = _cat(repo, "a" * 64) assert result.exit_code == ExitCode.USER_ERROR def test_invalid_object_id_too_short(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, "abc123") assert result.exit_code == ExitCode.USER_ERROR def test_invalid_object_id_uppercase_content(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, long_id("A" * 64)) assert result.exit_code == ExitCode.USER_ERROR def test_invalid_object_id_non_hex_content(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, long_id("z" * 64)) assert result.exit_code == ExitCode.USER_ERROR def test_unrecognized_flag_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, "--no-such-flag", fake_id("bad-flag-a")) assert result.exit_code != 0 def test_no_object_id_without_batch_flag_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo) assert result.exit_code == ExitCode.USER_ERROR def test_no_repo_errors(self, tmp_path: pathlib.Path) -> None: from muse.cli.app import main as cli result = runner.invoke( cli, ["cat-object", fake_id("no-repo-a")], env={"MUSE_REPO_ROOT": str(tmp_path / "no_repo")}, ) assert result.exit_code != 0 # --------------------------------------------------------------------------- # Batch mode — --batch # --------------------------------------------------------------------------- class TestBatchMode: def test_batch_single_object_emits_header_and_content(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"batch content" oid = _store(repo, content) result = _cat(repo, "--batch", stdin=f"{oid}\n") assert result.exit_code == 0 raw = result.stdout_bytes # Header: " blob \n" header_line = f"{oid} blob {len(content)}\n".encode() assert raw.startswith(header_line) # Content follows header, then a trailing newline body = raw[len(header_line):] assert body == content + b"\n" def test_batch_missing_oid_emits_missing(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = fake_id("missing-batch-d") result = _cat(repo, "--batch", stdin=f"{oid}\n") assert result.exit_code == 0 assert result.stdout_bytes == f"{oid} missing\n".encode() def test_batch_invalid_oid_emits_missing(self, tmp_path: pathlib.Path) -> None: """Invalid OIDs should produce a 'missing' line, not an error exit.""" repo = _make_repo(tmp_path) result = _cat(repo, "--batch", stdin="not-a-valid-oid\n") assert result.exit_code == 0 assert b"missing" in result.stdout_bytes def test_batch_mixed_present_and_missing(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) c1 = b"first" c2 = b"second" oid1 = _store(repo, c1) oid2 = _store(repo, c2) missing = fake_id("missing-mixed-e") stdin = f"{oid1}\n{missing}\n{oid2}\n" result = _cat(repo, "--batch", stdin=stdin) assert result.exit_code == 0 raw = result.stdout_bytes # oid1 present assert f"{oid1} blob {len(c1)}\n".encode() in raw assert c1 in raw # missing assert f"{missing} missing\n".encode() in raw # oid2 present assert f"{oid2} blob {len(c2)}\n".encode() in raw assert c2 in raw def test_batch_empty_lines_skipped(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"hello" oid = _store(repo, content) # stdin has empty lines before and after result = _cat(repo, "--batch", stdin=f"\n\n{oid}\n\n") assert result.exit_code == 0 assert f"{oid} blob {len(content)}\n".encode() in result.stdout_bytes def test_batch_binary_content_round_trips(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = bytes(range(256)) oid = _store(repo, content) result = _cat(repo, "--batch", stdin=f"{oid}\n") assert result.exit_code == 0 raw = result.stdout_bytes header = f"{oid} blob {len(content)}\n".encode() body = raw[len(header):-1] # strip trailing newline assert body == content def test_batch_empty_stdin_produces_no_output(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, "--batch", stdin="") assert result.exit_code == 0 assert result.stdout_bytes == b"" def test_batch_multiple_objects_in_order(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) objects = [(b"alpha", ), (b"beta",), (b"gamma",)] oids = [_store(repo, c[0]) for c in objects] stdin = "\n".join(oids) + "\n" result = _cat(repo, "--batch", stdin=stdin) assert result.exit_code == 0 raw = result.stdout_bytes pos = 0 for oid, (content,) in zip(oids, objects): header = f"{oid} blob {len(content)}\n".encode() assert raw[pos:pos + len(header)] == header pos += len(header) assert raw[pos:pos + len(content)] == content pos += len(content) + 1 # +1 for trailing '\n' def test_batch_mutually_exclusive_with_batch_check(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, "--batch", "--batch-check", stdin="") assert result.exit_code != 0 # --------------------------------------------------------------------------- # Batch-check mode — --batch-check # --------------------------------------------------------------------------- class TestBatchCheckMode: def test_batch_check_emits_header_only_no_content(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"check only" oid = _store(repo, content) result = _cat(repo, "--batch-check", stdin=f"{oid}\n") assert result.exit_code == 0 raw = result.stdout_bytes expected = f"{oid} blob {len(content)}\n".encode() assert raw == expected # Content bytes must NOT appear assert content not in raw def test_batch_check_missing_emits_missing(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = fake_id("missing-check-f") result = _cat(repo, "--batch-check", stdin=f"{oid}\n") assert result.exit_code == 0 assert result.stdout_bytes == f"{oid} missing\n".encode() def test_batch_check_invalid_oid_emits_missing(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, "--batch-check", stdin="bad\n") assert result.exit_code == 0 assert b"missing" in result.stdout_bytes def test_batch_check_mixed(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) c1 = b"present" oid1 = _store(repo, c1) missing = fake_id("missing-check-0") result = _cat(repo, "--batch-check", stdin=f"{oid1}\n{missing}\n") assert result.exit_code == 0 raw = result.stdout_bytes assert f"{oid1} blob {len(c1)}\n".encode() in raw assert f"{missing} missing\n".encode() in raw # No content bytes assert c1 not in raw def test_batch_check_size_accurate(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"x" * 1000 oid = _store(repo, content) result = _cat(repo, "--batch-check", stdin=f"{oid}\n") assert result.exit_code == 0 line = result.stdout_bytes.decode() parts = line.strip().split() assert parts[0] == oid assert parts[1] == "blob" assert int(parts[2]) == len(content) # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecurity: def test_ansi_in_invalid_id_not_in_output(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) malicious = f"\x1b[31m{'a' * 60}" result = _cat(repo, malicious) assert result.exit_code == ExitCode.USER_ERROR assert "\x1b" not in result.output def test_path_traversal_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, "../../../etc/passwd") assert result.exit_code == ExitCode.USER_ERROR def test_null_byte_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, f"{'a' * 32}\x00{'b' * 31}") assert result.exit_code == ExitCode.USER_ERROR def test_no_traceback_on_invalid_id(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _cat(repo, "not-a-valid-id") assert "Traceback" not in result.output def test_batch_path_traversal_treated_as_missing(self, tmp_path: pathlib.Path) -> None: """In batch mode, bad OIDs are not errors — they are reported as missing.""" repo = _make_repo(tmp_path) result = _cat(repo, "--batch", stdin="../../../etc/passwd\n") assert result.exit_code == 0 assert b"missing" in result.stdout_bytes # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- class TestStress: def test_large_object_streams_without_oom(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"Z" * (10 * 1024 * 1024) # 10 MiB oid = _store(repo, content) result = _cat(repo, oid) assert result.exit_code == 0 assert len(result.stdout_bytes) == len(content) assert result.stdout_bytes == content def test_large_object_info_is_fast(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"Y" * (10 * 1024 * 1024) oid = _store(repo, content) result = _cat(repo, "--json", oid) assert result.exit_code == 0 data = json.loads(result.output) assert data["size_bytes"] == len(content) def test_200_sequential_reads(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"repeated read" oid = _store(repo, content) for i in range(200): result = _cat(repo, oid) assert result.exit_code == 0, f"failed at iteration {i}" assert result.stdout_bytes == content def test_batch_50_objects(self, tmp_path: pathlib.Path) -> None: """50 objects through a single --batch invocation.""" repo = _make_repo(tmp_path) pairs: list[tuple[str, bytes]] = [] for i in range(50): content = f"object-{i:03d}".encode() oid = _store(repo, content) pairs.append((oid, content)) stdin = "\n".join(oid for oid, _ in pairs) + "\n" result = _cat(repo, "--batch", stdin=stdin) assert result.exit_code == 0 raw = result.stdout_bytes for oid, content in pairs: assert f"{oid} blob {len(content)}\n".encode() in raw assert content in raw def test_batch_check_100_objects(self, tmp_path: pathlib.Path) -> None: """100 objects through --batch-check — no content read.""" repo = _make_repo(tmp_path) oids = [] sizes = [] for i in range(100): content = b"x" * (i + 1) oid = _store(repo, content) oids.append(oid) sizes.append(len(content)) stdin = "\n".join(oids) + "\n" result = _cat(repo, "--batch-check", stdin=stdin) assert result.exit_code == 0 lines = result.stdout_bytes.decode().strip().splitlines() assert len(lines) == 100 for line, oid, size in zip(lines, oids, sizes): parts = line.split() assert parts[0] == oid assert parts[1] == "blob" assert int(parts[2]) == size # --------------------------------------------------------------------------- # --inline — base64 content embedding in JSON (agent round-trip saver) # --------------------------------------------------------------------------- class TestInline: """--inline embeds base64-encoded content in the --json output. Agents that need both metadata and content for small objects can get both in a single invocation instead of two (--json for metadata, raw for bytes). """ def test_inline_embeds_content_b64(self, tmp_path: pathlib.Path) -> None: import base64 repo = _make_repo(tmp_path) content = b"hello inline" oid = _store(repo, content) result = _cat(repo, "--json", "--inline", oid) assert result.exit_code == 0 data = json.loads(result.output) assert "content_b64" in data assert base64.b64decode(data["content_b64"]) == content def test_inline_requires_json_flag(self, tmp_path: pathlib.Path) -> None: """--inline without --json is a user error.""" repo = _make_repo(tmp_path) content = b"inline needs json" oid = _store(repo, content) result = _cat(repo, "--inline", oid) assert result.exit_code == ExitCode.USER_ERROR def test_inline_binary_content_round_trips(self, tmp_path: pathlib.Path) -> None: import base64 repo = _make_repo(tmp_path) content = bytes(range(256)) oid = _store(repo, content) result = _cat(repo, "--json", "--inline", oid) assert result.exit_code == 0 data = json.loads(result.output) assert base64.b64decode(data["content_b64"]) == content def test_inline_missing_object_no_content_b64(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = fake_id("missing-inline-a") result = _cat(repo, "--json", "--inline", oid) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) assert data["present"] is False assert "content_b64" not in data def test_inline_json_still_has_standard_fields(self, tmp_path: pathlib.Path) -> None: import base64 repo = _make_repo(tmp_path) content = b"standard fields check" oid = _store(repo, content) result = _cat(repo, "--json", "--inline", oid) assert result.exit_code == 0 data = json.loads(result.output) assert data["object_id"] == oid assert data["present"] is True assert data["size_bytes"] == len(content) assert "duration_ms" in data assert isinstance(base64.b64decode(data["content_b64"]), bytes) def test_inline_empty_object(self, tmp_path: pathlib.Path) -> None: import base64 repo = _make_repo(tmp_path) content = b"" oid = _store(repo, content) result = _cat(repo, "--json", "--inline", oid) assert result.exit_code == 0 data = json.loads(result.output) assert data["content_b64"] == base64.b64encode(b"").decode() def test_no_inline_flag_has_no_content_b64(self, tmp_path: pathlib.Path) -> None: """Without --inline the JSON output must NOT include content_b64.""" repo = _make_repo(tmp_path) content = b"no inline here" oid = _store(repo, content) result = _cat(repo, "--json", oid) assert result.exit_code == 0 data = json.loads(result.output) assert "content_b64" not in data # --------------------------------------------------------------------------- # Flag registration tests # --------------------------------------------------------------------------- import argparse as _argparse from muse.cli.commands.cat_object import register as _register_cat_object def _parse_co(*args: str) -> _argparse.Namespace: """Build an argument parser via register() and parse args.""" root_p = _argparse.ArgumentParser() subs = root_p.add_subparsers(dest="cmd") _register_cat_object(subs) return root_p.parse_args(["cat-object", *args]) class TestRegisterFlags: def test_default_json_out_is_false(self) -> None: ns = _parse_co(fake_id("a")) assert ns.json_out is False def test_json_flag_sets_json_out(self) -> None: ns = _parse_co(fake_id("a"), "--json") assert ns.json_out is True def test_j_shorthand_sets_json_out(self) -> None: ns = _parse_co(fake_id("a"), "-j") assert ns.json_out is True def test_inline_flag(self) -> None: ns = _parse_co(fake_id("a"), "--json", "--inline") assert ns.inline is True def test_format_flag_no_longer_exists(self) -> None: import pytest with pytest.raises(SystemExit): _parse_co(fake_id("a"), "--format", "info")