"""Comprehensive tests for ``muse hash-object``. Coverage tiers -------------- - Unit: _hash_bytes correctness, _emit output shape - Integration: all flags, stdin mode, --write lifecycle, idempotency - Security: ANSI injection in path errors, path traversal attempt - Stress: large file (streaming), 500 sequential hashes, binary content """ from __future__ import annotations import json import pathlib import pytest from muse.core.errors import ExitCode from tests.cli_test_helper import CliRunner, InvokeResult from muse.core.types import blob_id, long_id, split_id from muse.core.object_store import object_path from muse.core.paths import muse_dir runner = CliRunner() # --------------------------------------------------------------------------- # Helpers shared across tests # --------------------------------------------------------------------------- def _plumb(tmp_path: pathlib.Path, *args: str, stdin: bytes | None = None) -> InvokeResult: from muse.cli.app import main as cli return runner.invoke(cli, ["hash-object", *args], input=stdin) def _plumb_repo(repo: pathlib.Path, *args: str, stdin: bytes | None = None) -> InvokeResult: from muse.cli.app import main as cli return runner.invoke( cli, ["hash-object", *args], env={"MUSE_REPO_ROOT": str(repo)}, input=stdin, ) def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: """Minimal .muse/ structure.""" repo = tmp_path / "repo" dot_muse = muse_dir(repo) for sub in ("objects", "commits", "snapshots", "refs/heads"): (dot_muse / sub).mkdir(parents=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main") (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test", "domain": "code"})) return repo # --------------------------------------------------------------------------- # Unit — _hash_bytes # --------------------------------------------------------------------------- class TestHashBytes: def test_known_sha256_empty(self) -> None: from muse.cli.commands.hash_object import _hash_bytes assert _hash_bytes(b"") == blob_id(b"") def test_known_sha256_hello_world(self) -> None: from muse.cli.commands.hash_object import _hash_bytes expected = blob_id(b"hello world") assert _hash_bytes(b"hello world") == expected def test_deterministic(self) -> None: from muse.cli.commands.hash_object import _hash_bytes data = b"some content " * 100 assert _hash_bytes(data) == _hash_bytes(data) def test_different_content_different_hash(self) -> None: from muse.cli.commands.hash_object import _hash_bytes assert _hash_bytes(b"a") != _hash_bytes(b"b") def test_returns_canonical_prefixed_id(self) -> None: from muse.cli.commands.hash_object import _hash_bytes result = _hash_bytes(b"test") assert result.startswith("sha256:") assert len(result) == 71 # sha256: (7) + 64 hex chars assert all(c in "0123456789abcdef" for c in split_id(result)[1]) class TestEmit: def test_text_format_prints_hash(self, capsys: pytest.CaptureFixture[str]) -> None: from muse.cli.commands.hash_object import _emit from muse.core.timing import start_timer oid = long_id("a" * 64) _emit(False, oid, False, 0, start_timer()) out = capsys.readouterr().out.strip() assert out == oid def test_json_format_has_fields(self, capsys: pytest.CaptureFixture[str]) -> None: from muse.cli.commands.hash_object import _emit from muse.core.timing import start_timer oid = long_id("b" * 64) _emit(True, oid, True, 42, start_timer()) data = json.loads(capsys.readouterr().out) assert data["object_id"] == oid assert data["stored"] is True assert data["size_bytes"] == 42 assert "duration_ms" in data assert "exit_code" in data # --------------------------------------------------------------------------- # Integration — file mode # --------------------------------------------------------------------------- class TestFileMode: def test_json_output_shape(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "data.txt" f.write_bytes(b"hello world") result = _plumb(tmp_path, "--json", str(f)) assert result.exit_code == 0 data = json.loads(result.output) assert "object_id" in data assert "stored" in data assert data["object_id"].startswith("sha256:") assert len(data["object_id"]) == 71 assert data["stored"] is False def test_json_flag_shorthand(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "data.txt" f.write_bytes(b"content") result = _plumb(tmp_path, "--json", str(f)) assert result.exit_code == 0 data = json.loads(result.output) assert "object_id" in data def test_text_format_is_canonical_id(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "data.txt" f.write_bytes(b"test bytes") result = _plumb(tmp_path, str(f)) assert result.exit_code == 0 raw = result.output.strip() assert raw.startswith("sha256:") assert len(raw) == 71 def test_text_and_json_same_hash(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "same.txt" f.write_bytes(b"identical content") json_result = _plumb(tmp_path, "--json", str(f)) text_result = _plumb(tmp_path, str(f)) json_id = json.loads(json_result.output)["object_id"] text_id = text_result.output.strip() assert json_id == text_id def test_determinism_same_content_same_hash(self, tmp_path: pathlib.Path) -> None: f1 = tmp_path / "f1.txt" f2 = tmp_path / "f2.txt" f1.write_bytes(b"same bytes") f2.write_bytes(b"same bytes") r1 = json.loads(_plumb(tmp_path, "--json", str(f1)).output)["object_id"] r2 = json.loads(_plumb(tmp_path, "--json", str(f2)).output)["object_id"] assert r1 == r2 def test_different_content_different_hash(self, tmp_path: pathlib.Path) -> None: f1 = tmp_path / "f1.txt" f2 = tmp_path / "f2.txt" f1.write_bytes(b"alpha") f2.write_bytes(b"beta") r1 = json.loads(_plumb(tmp_path, "--json", str(f1)).output)["object_id"] r2 = json.loads(_plumb(tmp_path, "--json", str(f2)).output)["object_id"] assert r1 != r2 def test_empty_file(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "empty.txt" f.write_bytes(b"") result = _plumb(tmp_path, "--json", str(f)) assert result.exit_code == 0 data = json.loads(result.output) assert data["object_id"] == blob_id(b"") def test_binary_content(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "binary.bin" f.write_bytes(bytes(range(256)) * 10) result = _plumb(tmp_path, "--json", str(f)) assert result.exit_code == 0 data = json.loads(result.output) assert data["object_id"].startswith("sha256:") assert len(data["object_id"]) == 71 def test_missing_file_errors(self, tmp_path: pathlib.Path) -> None: result = _plumb(tmp_path, str(tmp_path / "nonexistent.txt")) assert result.exit_code == ExitCode.USER_ERROR def test_directory_as_path_errors(self, tmp_path: pathlib.Path) -> None: result = _plumb(tmp_path, str(tmp_path)) assert result.exit_code == ExitCode.USER_ERROR def test_no_args_errors(self, tmp_path: pathlib.Path) -> None: result = _plumb(tmp_path) assert result.exit_code != 0 # --------------------------------------------------------------------------- # Integration — --write lifecycle # --------------------------------------------------------------------------- class TestWrite: def test_write_returns_stored_true(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) f = repo / "sample.txt" f.write_bytes(b"store me") result = _plumb_repo(repo, "--json", "--write", str(f)) assert result.exit_code == 0 assert json.loads(result.output)["stored"] is True def test_write_creates_object_file(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) f = repo / "sample.txt" content = b"store me too" f.write_bytes(content) result = _plumb_repo(repo, "--json", "--write", str(f)) data = json.loads(result.output) oid = data["object_id"] obj_file = object_path(repo, oid) assert obj_file.exists() from muse.core.object_store import read_object assert read_object(repo, oid) == content def test_write_idempotent_second_call_stored_false(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) f = repo / "dup.txt" f.write_bytes(b"duplicate content") _plumb_repo(repo, "--write", str(f)) result2 = _plumb_repo(repo, "--json", "--write", str(f)) assert result2.exit_code == 0 assert json.loads(result2.output)["stored"] is False def test_write_without_repo_errors(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "orphan.txt" f.write_bytes(b"no repo") # Point MUSE_REPO_ROOT at a dir with no .muse/ to force find_repo_root → None result = runner.invoke( __import__("muse.cli.app", fromlist=["main"]).main, ["hash-object", "--write", str(f)], env={"MUSE_REPO_ROOT": str(tmp_path / "no_repo_here")}, ) assert result.exit_code == ExitCode.USER_ERROR def test_write_text_format_still_works(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) f = repo / "text.txt" f.write_bytes(b"text mode write") result = _plumb_repo(repo, "--write", str(f)) assert result.exit_code == 0 raw = result.output.strip() assert raw.startswith("sha256:") assert len(raw) == 71 # --------------------------------------------------------------------------- # Integration — --stdin mode # --------------------------------------------------------------------------- class TestStdinMode: def test_stdin_produces_correct_hash(self, tmp_path: pathlib.Path) -> None: content = b"piped content" result = _plumb(tmp_path, "--json", "--stdin", stdin=content) assert result.exit_code == 0 data = json.loads(result.output) assert data["object_id"] == blob_id(content) assert data["stored"] is False def test_stdin_matches_file_hash(self, tmp_path: pathlib.Path) -> None: content = b"same content" f = tmp_path / "f.txt" f.write_bytes(content) file_result = json.loads(_plumb(tmp_path, "--json", str(f)).output)["object_id"] stdin_result = json.loads(_plumb(tmp_path, "--json", "--stdin", stdin=content).output)["object_id"] assert file_result == stdin_result def test_stdin_text_format(self, tmp_path: pathlib.Path) -> None: content = b"text stdin" result = _plumb(tmp_path, "--stdin", stdin=content) assert result.exit_code == 0 assert result.output.strip() == blob_id(content) def test_stdin_empty_input(self, tmp_path: pathlib.Path) -> None: result = _plumb(tmp_path, "--json", "--stdin", stdin=b"") assert result.exit_code == 0 data = json.loads(result.output) assert data["object_id"] == blob_id(b"") def test_stdin_and_path_mutually_exclusive(self, tmp_path: pathlib.Path) -> None: f = tmp_path / "f.txt" f.write_bytes(b"x") result = _plumb(tmp_path, "--stdin", str(f)) assert result.exit_code == ExitCode.USER_ERROR def test_stdin_write_stores_object(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"stdin stored" result = _plumb_repo(repo, "--json", "--stdin", "--write", stdin=content) assert result.exit_code == 0 data = json.loads(result.output) assert data["stored"] is True oid = data["object_id"] obj_file = object_path(repo, oid) assert obj_file.exists() def test_stdin_write_without_repo_errors(self, tmp_path: pathlib.Path) -> None: from muse.cli.app import main as cli result = runner.invoke( cli, ["hash-object", "--stdin", "--write"], env={"MUSE_REPO_ROOT": str(tmp_path / "no_repo_here")}, input=b"no repo", ) assert result.exit_code == ExitCode.USER_ERROR # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecurity: def test_ansi_in_path_not_in_stderr(self, tmp_path: pathlib.Path) -> None: """A path with embedded ANSI escapes must not reach stderr output.""" malicious_name = tmp_path / "\x1b[31mmalicious\x1b[0m.txt" result = _plumb(tmp_path, str(malicious_name)) assert result.exit_code != 0 assert "\x1b" not in result.output def test_path_traversal_attempt_outside_repo(self, tmp_path: pathlib.Path) -> None: """/../ in a path is just a filesystem lookup — it either exists or doesn't.""" traversal = tmp_path / ".." / "etc" / "passwd" result = _plumb(tmp_path, str(traversal)) # If the file doesn't exist, we get USER_ERROR cleanly — not a crash. assert result.exit_code in (0, ExitCode.USER_ERROR) def test_no_path_no_stdin_clean_error(self, tmp_path: pathlib.Path) -> None: result = _plumb(tmp_path) assert result.exit_code != 0 # Must not be a Python traceback assert "Traceback" not in result.output def test_json_output_is_never_a_traceback(self, tmp_path: pathlib.Path) -> None: """Even on error, output must be parseable or stderr-only.""" result = _plumb(tmp_path, str(tmp_path / "missing.txt")) assert result.exit_code != 0 # stdout should be empty (error went to stderr) assert result.output.strip() == "" or "Traceback" not in result.output # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- class TestStress: def test_large_file_streams_without_oom(self, tmp_path: pathlib.Path) -> None: """A 10 MiB file must hash without loading the full content into memory.""" large = tmp_path / "large.bin" chunk = b"X" * 65536 # 64 KiB chunk with large.open("wb") as fh: for _ in range(160): # 160 × 64 KiB = 10 MiB fh.write(chunk) result = _plumb(tmp_path, "--json", str(large)) assert result.exit_code == 0 data = json.loads(result.output) assert data["object_id"].startswith("sha256:") assert len(data["object_id"]) == 71 def test_large_file_hash_matches_reference(self, tmp_path: pathlib.Path) -> None: """Chunked hash_file must match a one-shot hashlib computation.""" large = tmp_path / "ref.bin" content = bytes(range(256)) * 4096 # 1 MiB, non-repeating byte pattern large.write_bytes(content) result = _plumb(tmp_path, "--json", str(large)) expected = blob_id(content) assert json.loads(result.output)["object_id"] == expected def test_500_sequential_hashes(self, tmp_path: pathlib.Path) -> None: """500 rapid hash calls must all succeed with consistent results.""" f = tmp_path / "stable.txt" f.write_bytes(b"stable content") expected = blob_id(b"stable content") for i in range(500): result = _plumb(tmp_path, "--json", str(f)) assert result.exit_code == 0, f"failed at iteration {i}" assert json.loads(result.output)["object_id"] == expected def test_stdin_large_binary(self, tmp_path: pathlib.Path) -> None: """Stdin mode handles 1 MiB of binary content correctly.""" content = bytes(range(256)) * 4096 result = _plumb(tmp_path, "--json", "--stdin", stdin=content) assert result.exit_code == 0 assert json.loads(result.output)["object_id"] == blob_id(content) # --------------------------------------------------------------------------- # TestRegisterFlags — argparse-level verification # --------------------------------------------------------------------------- class TestRegisterFlags: """Verify that register() wires --json / -j correctly.""" def _make_parser(self) -> "argparse.ArgumentParser": import argparse from muse.cli.commands.hash_object import register ap = argparse.ArgumentParser() subs = ap.add_subparsers() register(subs) return ap def test_json_flag_long(self) -> None: ns = self._make_parser().parse_args(["hash-object", "--stdin", "--json"]) assert ns.json_out is True def test_j_alias(self) -> None: ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"]) assert ns.json_out is True def test_default_is_text(self) -> None: ns = self._make_parser().parse_args(["hash-object", "--stdin"]) assert ns.json_out is False def test_dest_is_json_out(self) -> None: ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"]) assert hasattr(ns, "json_out") assert not hasattr(ns, "fmt")