test_cmd_hash_object.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """Comprehensive tests for ``muse hash-object``. |
| 2 | |
| 3 | Coverage tiers |
| 4 | -------------- |
| 5 | - Unit: _hash_bytes correctness, _emit output shape |
| 6 | - Integration: all flags, stdin mode, --write lifecycle, idempotency |
| 7 | - Security: ANSI injection in path errors, path traversal attempt |
| 8 | - Stress: large file (streaming), 500 sequential hashes, binary content |
| 9 | """ |
| 10 | from __future__ import annotations |
| 11 | |
| 12 | import json |
| 13 | import pathlib |
| 14 | |
| 15 | import pytest |
| 16 | |
| 17 | from muse.core.errors import ExitCode |
| 18 | from tests.cli_test_helper import CliRunner, InvokeResult |
| 19 | from muse.core.types import blob_id, long_id, split_id |
| 20 | from muse.core.object_store import object_path |
| 21 | from muse.core.paths import muse_dir |
| 22 | |
| 23 | runner = CliRunner() |
| 24 | |
| 25 | # --------------------------------------------------------------------------- |
| 26 | # Helpers shared across tests |
| 27 | # --------------------------------------------------------------------------- |
| 28 | |
| 29 | def _plumb(tmp_path: pathlib.Path, *args: str, stdin: bytes | None = None) -> InvokeResult: |
| 30 | from muse.cli.app import main as cli |
| 31 | return runner.invoke(cli, ["hash-object", *args], input=stdin) |
| 32 | |
| 33 | |
| 34 | def _plumb_repo(repo: pathlib.Path, *args: str, stdin: bytes | None = None) -> InvokeResult: |
| 35 | from muse.cli.app import main as cli |
| 36 | return runner.invoke( |
| 37 | cli, |
| 38 | ["hash-object", *args], |
| 39 | env={"MUSE_REPO_ROOT": str(repo)}, |
| 40 | input=stdin, |
| 41 | ) |
| 42 | |
| 43 | |
| 44 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 45 | """Minimal .muse/ structure.""" |
| 46 | repo = tmp_path / "repo" |
| 47 | dot_muse = muse_dir(repo) |
| 48 | for sub in ("objects", "commits", "snapshots", "refs/heads"): |
| 49 | (dot_muse / sub).mkdir(parents=True) |
| 50 | (dot_muse / "HEAD").write_text("ref: refs/heads/main") |
| 51 | (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test", "domain": "code"})) |
| 52 | return repo |
| 53 | |
| 54 | |
| 55 | # --------------------------------------------------------------------------- |
| 56 | # Unit β _hash_bytes |
| 57 | # --------------------------------------------------------------------------- |
| 58 | |
| 59 | |
| 60 | class TestHashBytes: |
| 61 | def test_known_sha256_empty(self) -> None: |
| 62 | from muse.cli.commands.hash_object import _hash_bytes |
| 63 | assert _hash_bytes(b"") == blob_id(b"") |
| 64 | |
| 65 | def test_known_sha256_hello_world(self) -> None: |
| 66 | from muse.cli.commands.hash_object import _hash_bytes |
| 67 | expected = blob_id(b"hello world") |
| 68 | assert _hash_bytes(b"hello world") == expected |
| 69 | |
| 70 | def test_deterministic(self) -> None: |
| 71 | from muse.cli.commands.hash_object import _hash_bytes |
| 72 | data = b"some content " * 100 |
| 73 | assert _hash_bytes(data) == _hash_bytes(data) |
| 74 | |
| 75 | def test_different_content_different_hash(self) -> None: |
| 76 | from muse.cli.commands.hash_object import _hash_bytes |
| 77 | assert _hash_bytes(b"a") != _hash_bytes(b"b") |
| 78 | |
| 79 | def test_returns_canonical_prefixed_id(self) -> None: |
| 80 | from muse.cli.commands.hash_object import _hash_bytes |
| 81 | result = _hash_bytes(b"test") |
| 82 | assert result.startswith("sha256:") |
| 83 | assert len(result) == 71 # sha256: (7) + 64 hex chars |
| 84 | assert all(c in "0123456789abcdef" for c in split_id(result)[1]) |
| 85 | |
| 86 | |
| 87 | class TestEmit: |
| 88 | def test_text_format_prints_hash(self, capsys: pytest.CaptureFixture[str]) -> None: |
| 89 | from muse.cli.commands.hash_object import _emit |
| 90 | from muse.core.timing import start_timer |
| 91 | oid = long_id("a" * 64) |
| 92 | _emit(False, oid, False, 0, start_timer()) |
| 93 | out = capsys.readouterr().out.strip() |
| 94 | assert out == oid |
| 95 | |
| 96 | def test_json_format_has_fields(self, capsys: pytest.CaptureFixture[str]) -> None: |
| 97 | from muse.cli.commands.hash_object import _emit |
| 98 | from muse.core.timing import start_timer |
| 99 | oid = long_id("b" * 64) |
| 100 | _emit(True, oid, True, 42, start_timer()) |
| 101 | data = json.loads(capsys.readouterr().out) |
| 102 | assert data["object_id"] == oid |
| 103 | assert data["stored"] is True |
| 104 | assert data["size_bytes"] == 42 |
| 105 | assert "duration_ms" in data |
| 106 | assert "exit_code" in data |
| 107 | |
| 108 | |
| 109 | # --------------------------------------------------------------------------- |
| 110 | # Integration β file mode |
| 111 | # --------------------------------------------------------------------------- |
| 112 | |
| 113 | |
| 114 | class TestFileMode: |
| 115 | def test_json_output_shape(self, tmp_path: pathlib.Path) -> None: |
| 116 | f = tmp_path / "data.txt" |
| 117 | f.write_bytes(b"hello world") |
| 118 | result = _plumb(tmp_path, "--json", str(f)) |
| 119 | assert result.exit_code == 0 |
| 120 | data = json.loads(result.output) |
| 121 | assert "object_id" in data |
| 122 | assert "stored" in data |
| 123 | assert data["object_id"].startswith("sha256:") |
| 124 | assert len(data["object_id"]) == 71 |
| 125 | assert data["stored"] is False |
| 126 | |
| 127 | def test_json_flag_shorthand(self, tmp_path: pathlib.Path) -> None: |
| 128 | f = tmp_path / "data.txt" |
| 129 | f.write_bytes(b"content") |
| 130 | result = _plumb(tmp_path, "--json", str(f)) |
| 131 | assert result.exit_code == 0 |
| 132 | data = json.loads(result.output) |
| 133 | assert "object_id" in data |
| 134 | |
| 135 | def test_text_format_is_canonical_id(self, tmp_path: pathlib.Path) -> None: |
| 136 | f = tmp_path / "data.txt" |
| 137 | f.write_bytes(b"test bytes") |
| 138 | result = _plumb(tmp_path, str(f)) |
| 139 | assert result.exit_code == 0 |
| 140 | raw = result.output.strip() |
| 141 | assert raw.startswith("sha256:") |
| 142 | assert len(raw) == 71 |
| 143 | |
| 144 | def test_text_and_json_same_hash(self, tmp_path: pathlib.Path) -> None: |
| 145 | f = tmp_path / "same.txt" |
| 146 | f.write_bytes(b"identical content") |
| 147 | json_result = _plumb(tmp_path, "--json", str(f)) |
| 148 | text_result = _plumb(tmp_path, str(f)) |
| 149 | json_id = json.loads(json_result.output)["object_id"] |
| 150 | text_id = text_result.output.strip() |
| 151 | assert json_id == text_id |
| 152 | |
| 153 | def test_determinism_same_content_same_hash(self, tmp_path: pathlib.Path) -> None: |
| 154 | f1 = tmp_path / "f1.txt" |
| 155 | f2 = tmp_path / "f2.txt" |
| 156 | f1.write_bytes(b"same bytes") |
| 157 | f2.write_bytes(b"same bytes") |
| 158 | r1 = json.loads(_plumb(tmp_path, "--json", str(f1)).output)["object_id"] |
| 159 | r2 = json.loads(_plumb(tmp_path, "--json", str(f2)).output)["object_id"] |
| 160 | assert r1 == r2 |
| 161 | |
| 162 | def test_different_content_different_hash(self, tmp_path: pathlib.Path) -> None: |
| 163 | f1 = tmp_path / "f1.txt" |
| 164 | f2 = tmp_path / "f2.txt" |
| 165 | f1.write_bytes(b"alpha") |
| 166 | f2.write_bytes(b"beta") |
| 167 | r1 = json.loads(_plumb(tmp_path, "--json", str(f1)).output)["object_id"] |
| 168 | r2 = json.loads(_plumb(tmp_path, "--json", str(f2)).output)["object_id"] |
| 169 | assert r1 != r2 |
| 170 | |
| 171 | def test_empty_file(self, tmp_path: pathlib.Path) -> None: |
| 172 | f = tmp_path / "empty.txt" |
| 173 | f.write_bytes(b"") |
| 174 | result = _plumb(tmp_path, "--json", str(f)) |
| 175 | assert result.exit_code == 0 |
| 176 | data = json.loads(result.output) |
| 177 | assert data["object_id"] == blob_id(b"") |
| 178 | |
| 179 | def test_binary_content(self, tmp_path: pathlib.Path) -> None: |
| 180 | f = tmp_path / "binary.bin" |
| 181 | f.write_bytes(bytes(range(256)) * 10) |
| 182 | result = _plumb(tmp_path, "--json", str(f)) |
| 183 | assert result.exit_code == 0 |
| 184 | data = json.loads(result.output) |
| 185 | assert data["object_id"].startswith("sha256:") |
| 186 | assert len(data["object_id"]) == 71 |
| 187 | |
| 188 | def test_missing_file_errors(self, tmp_path: pathlib.Path) -> None: |
| 189 | result = _plumb(tmp_path, str(tmp_path / "nonexistent.txt")) |
| 190 | assert result.exit_code == ExitCode.USER_ERROR |
| 191 | |
| 192 | def test_directory_as_path_errors(self, tmp_path: pathlib.Path) -> None: |
| 193 | result = _plumb(tmp_path, str(tmp_path)) |
| 194 | assert result.exit_code == ExitCode.USER_ERROR |
| 195 | |
| 196 | def test_no_args_errors(self, tmp_path: pathlib.Path) -> None: |
| 197 | result = _plumb(tmp_path) |
| 198 | assert result.exit_code != 0 |
| 199 | |
| 200 | |
| 201 | # --------------------------------------------------------------------------- |
| 202 | # Integration β --write lifecycle |
| 203 | # --------------------------------------------------------------------------- |
| 204 | |
| 205 | |
| 206 | class TestWrite: |
| 207 | def test_write_returns_stored_true(self, tmp_path: pathlib.Path) -> None: |
| 208 | repo = _make_repo(tmp_path) |
| 209 | f = repo / "sample.txt" |
| 210 | f.write_bytes(b"store me") |
| 211 | result = _plumb_repo(repo, "--json", "--write", str(f)) |
| 212 | assert result.exit_code == 0 |
| 213 | assert json.loads(result.output)["stored"] is True |
| 214 | |
| 215 | def test_write_creates_object_file(self, tmp_path: pathlib.Path) -> None: |
| 216 | repo = _make_repo(tmp_path) |
| 217 | f = repo / "sample.txt" |
| 218 | content = b"store me too" |
| 219 | f.write_bytes(content) |
| 220 | result = _plumb_repo(repo, "--json", "--write", str(f)) |
| 221 | data = json.loads(result.output) |
| 222 | oid = data["object_id"] |
| 223 | obj_file = object_path(repo, oid) |
| 224 | assert obj_file.exists() |
| 225 | from muse.core.object_store import read_object |
| 226 | assert read_object(repo, oid) == content |
| 227 | |
| 228 | def test_write_idempotent_second_call_stored_false(self, tmp_path: pathlib.Path) -> None: |
| 229 | repo = _make_repo(tmp_path) |
| 230 | f = repo / "dup.txt" |
| 231 | f.write_bytes(b"duplicate content") |
| 232 | _plumb_repo(repo, "--write", str(f)) |
| 233 | result2 = _plumb_repo(repo, "--json", "--write", str(f)) |
| 234 | assert result2.exit_code == 0 |
| 235 | assert json.loads(result2.output)["stored"] is False |
| 236 | |
| 237 | def test_write_without_repo_errors(self, tmp_path: pathlib.Path) -> None: |
| 238 | f = tmp_path / "orphan.txt" |
| 239 | f.write_bytes(b"no repo") |
| 240 | # Point MUSE_REPO_ROOT at a dir with no .muse/ to force find_repo_root β None |
| 241 | result = runner.invoke( |
| 242 | __import__("muse.cli.app", fromlist=["main"]).main, |
| 243 | ["hash-object", "--write", str(f)], |
| 244 | env={"MUSE_REPO_ROOT": str(tmp_path / "no_repo_here")}, |
| 245 | ) |
| 246 | assert result.exit_code == ExitCode.USER_ERROR |
| 247 | |
| 248 | def test_write_text_format_still_works(self, tmp_path: pathlib.Path) -> None: |
| 249 | repo = _make_repo(tmp_path) |
| 250 | f = repo / "text.txt" |
| 251 | f.write_bytes(b"text mode write") |
| 252 | result = _plumb_repo(repo, "--write", str(f)) |
| 253 | assert result.exit_code == 0 |
| 254 | raw = result.output.strip() |
| 255 | assert raw.startswith("sha256:") |
| 256 | assert len(raw) == 71 |
| 257 | |
| 258 | |
| 259 | # --------------------------------------------------------------------------- |
| 260 | # Integration β --stdin mode |
| 261 | # --------------------------------------------------------------------------- |
| 262 | |
| 263 | |
| 264 | class TestStdinMode: |
| 265 | def test_stdin_produces_correct_hash(self, tmp_path: pathlib.Path) -> None: |
| 266 | content = b"piped content" |
| 267 | result = _plumb(tmp_path, "--json", "--stdin", stdin=content) |
| 268 | assert result.exit_code == 0 |
| 269 | data = json.loads(result.output) |
| 270 | assert data["object_id"] == blob_id(content) |
| 271 | assert data["stored"] is False |
| 272 | |
| 273 | def test_stdin_matches_file_hash(self, tmp_path: pathlib.Path) -> None: |
| 274 | content = b"same content" |
| 275 | f = tmp_path / "f.txt" |
| 276 | f.write_bytes(content) |
| 277 | file_result = json.loads(_plumb(tmp_path, "--json", str(f)).output)["object_id"] |
| 278 | stdin_result = json.loads(_plumb(tmp_path, "--json", "--stdin", stdin=content).output)["object_id"] |
| 279 | assert file_result == stdin_result |
| 280 | |
| 281 | def test_stdin_text_format(self, tmp_path: pathlib.Path) -> None: |
| 282 | content = b"text stdin" |
| 283 | result = _plumb(tmp_path, "--stdin", stdin=content) |
| 284 | assert result.exit_code == 0 |
| 285 | assert result.output.strip() == blob_id(content) |
| 286 | |
| 287 | def test_stdin_empty_input(self, tmp_path: pathlib.Path) -> None: |
| 288 | result = _plumb(tmp_path, "--json", "--stdin", stdin=b"") |
| 289 | assert result.exit_code == 0 |
| 290 | data = json.loads(result.output) |
| 291 | assert data["object_id"] == blob_id(b"") |
| 292 | |
| 293 | def test_stdin_and_path_mutually_exclusive(self, tmp_path: pathlib.Path) -> None: |
| 294 | f = tmp_path / "f.txt" |
| 295 | f.write_bytes(b"x") |
| 296 | result = _plumb(tmp_path, "--stdin", str(f)) |
| 297 | assert result.exit_code == ExitCode.USER_ERROR |
| 298 | |
| 299 | def test_stdin_write_stores_object(self, tmp_path: pathlib.Path) -> None: |
| 300 | repo = _make_repo(tmp_path) |
| 301 | content = b"stdin stored" |
| 302 | result = _plumb_repo(repo, "--json", "--stdin", "--write", stdin=content) |
| 303 | assert result.exit_code == 0 |
| 304 | data = json.loads(result.output) |
| 305 | assert data["stored"] is True |
| 306 | oid = data["object_id"] |
| 307 | obj_file = object_path(repo, oid) |
| 308 | assert obj_file.exists() |
| 309 | |
| 310 | def test_stdin_write_without_repo_errors(self, tmp_path: pathlib.Path) -> None: |
| 311 | from muse.cli.app import main as cli |
| 312 | result = runner.invoke( |
| 313 | cli, |
| 314 | ["hash-object", "--stdin", "--write"], |
| 315 | env={"MUSE_REPO_ROOT": str(tmp_path / "no_repo_here")}, |
| 316 | input=b"no repo", |
| 317 | ) |
| 318 | assert result.exit_code == ExitCode.USER_ERROR |
| 319 | |
| 320 | |
| 321 | # --------------------------------------------------------------------------- |
| 322 | # Security |
| 323 | # --------------------------------------------------------------------------- |
| 324 | |
| 325 | |
| 326 | class TestSecurity: |
| 327 | def test_ansi_in_path_not_in_stderr(self, tmp_path: pathlib.Path) -> None: |
| 328 | """A path with embedded ANSI escapes must not reach stderr output.""" |
| 329 | malicious_name = tmp_path / "\x1b[31mmalicious\x1b[0m.txt" |
| 330 | result = _plumb(tmp_path, str(malicious_name)) |
| 331 | assert result.exit_code != 0 |
| 332 | assert "\x1b" not in result.output |
| 333 | |
| 334 | def test_path_traversal_attempt_outside_repo(self, tmp_path: pathlib.Path) -> None: |
| 335 | """/../ in a path is just a filesystem lookup β it either exists or doesn't.""" |
| 336 | traversal = tmp_path / ".." / "etc" / "passwd" |
| 337 | result = _plumb(tmp_path, str(traversal)) |
| 338 | # If the file doesn't exist, we get USER_ERROR cleanly β not a crash. |
| 339 | assert result.exit_code in (0, ExitCode.USER_ERROR) |
| 340 | |
| 341 | def test_no_path_no_stdin_clean_error(self, tmp_path: pathlib.Path) -> None: |
| 342 | result = _plumb(tmp_path) |
| 343 | assert result.exit_code != 0 |
| 344 | # Must not be a Python traceback |
| 345 | assert "Traceback" not in result.output |
| 346 | |
| 347 | def test_json_output_is_never_a_traceback(self, tmp_path: pathlib.Path) -> None: |
| 348 | """Even on error, output must be parseable or stderr-only.""" |
| 349 | result = _plumb(tmp_path, str(tmp_path / "missing.txt")) |
| 350 | assert result.exit_code != 0 |
| 351 | # stdout should be empty (error went to stderr) |
| 352 | assert result.output.strip() == "" or "Traceback" not in result.output |
| 353 | |
| 354 | |
| 355 | # --------------------------------------------------------------------------- |
| 356 | # Stress |
| 357 | # --------------------------------------------------------------------------- |
| 358 | |
| 359 | |
| 360 | class TestStress: |
| 361 | def test_large_file_streams_without_oom(self, tmp_path: pathlib.Path) -> None: |
| 362 | """A 10 MiB file must hash without loading the full content into memory.""" |
| 363 | large = tmp_path / "large.bin" |
| 364 | chunk = b"X" * 65536 # 64 KiB chunk |
| 365 | with large.open("wb") as fh: |
| 366 | for _ in range(160): # 160 Γ 64 KiB = 10 MiB |
| 367 | fh.write(chunk) |
| 368 | result = _plumb(tmp_path, "--json", str(large)) |
| 369 | assert result.exit_code == 0 |
| 370 | data = json.loads(result.output) |
| 371 | assert data["object_id"].startswith("sha256:") |
| 372 | assert len(data["object_id"]) == 71 |
| 373 | |
| 374 | def test_large_file_hash_matches_reference(self, tmp_path: pathlib.Path) -> None: |
| 375 | """Chunked hash_file must match a one-shot hashlib computation.""" |
| 376 | large = tmp_path / "ref.bin" |
| 377 | content = bytes(range(256)) * 4096 # 1 MiB, non-repeating byte pattern |
| 378 | large.write_bytes(content) |
| 379 | result = _plumb(tmp_path, "--json", str(large)) |
| 380 | expected = blob_id(content) |
| 381 | assert json.loads(result.output)["object_id"] == expected |
| 382 | |
| 383 | def test_500_sequential_hashes(self, tmp_path: pathlib.Path) -> None: |
| 384 | """500 rapid hash calls must all succeed with consistent results.""" |
| 385 | f = tmp_path / "stable.txt" |
| 386 | f.write_bytes(b"stable content") |
| 387 | expected = blob_id(b"stable content") |
| 388 | for i in range(500): |
| 389 | result = _plumb(tmp_path, "--json", str(f)) |
| 390 | assert result.exit_code == 0, f"failed at iteration {i}" |
| 391 | assert json.loads(result.output)["object_id"] == expected |
| 392 | |
| 393 | def test_stdin_large_binary(self, tmp_path: pathlib.Path) -> None: |
| 394 | """Stdin mode handles 1 MiB of binary content correctly.""" |
| 395 | content = bytes(range(256)) * 4096 |
| 396 | result = _plumb(tmp_path, "--json", "--stdin", stdin=content) |
| 397 | assert result.exit_code == 0 |
| 398 | assert json.loads(result.output)["object_id"] == blob_id(content) |
| 399 | |
| 400 | |
| 401 | # --------------------------------------------------------------------------- |
| 402 | # TestRegisterFlags β argparse-level verification |
| 403 | # --------------------------------------------------------------------------- |
| 404 | |
| 405 | |
| 406 | class TestRegisterFlags: |
| 407 | """Verify that register() wires --json / -j correctly.""" |
| 408 | |
| 409 | def _make_parser(self) -> "argparse.ArgumentParser": |
| 410 | import argparse |
| 411 | from muse.cli.commands.hash_object import register |
| 412 | ap = argparse.ArgumentParser() |
| 413 | subs = ap.add_subparsers() |
| 414 | register(subs) |
| 415 | return ap |
| 416 | |
| 417 | def test_json_flag_long(self) -> None: |
| 418 | ns = self._make_parser().parse_args(["hash-object", "--stdin", "--json"]) |
| 419 | assert ns.json_out is True |
| 420 | |
| 421 | def test_j_alias(self) -> None: |
| 422 | ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"]) |
| 423 | assert ns.json_out is True |
| 424 | |
| 425 | def test_default_is_text(self) -> None: |
| 426 | ns = self._make_parser().parse_args(["hash-object", "--stdin"]) |
| 427 | assert ns.json_out is False |
| 428 | |
| 429 | def test_dest_is_json_out(self) -> None: |
| 430 | ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"]) |
| 431 | assert hasattr(ns, "json_out") |
| 432 | assert not hasattr(ns, "fmt") |