gabriel / muse public
test_cmd_hash_object.py python
432 lines 17.3 KB
Raw
1 """Comprehensive tests for ``muse hash-object``.
2
3 Coverage tiers
4 --------------
5 - Unit: _hash_bytes correctness, _emit output shape
6 - Integration: all flags, stdin mode, --write lifecycle, idempotency
7 - Security: ANSI injection in path errors, path traversal attempt
8 - Stress: large file (streaming), 500 sequential hashes, binary content
9 """
10 from __future__ import annotations
11
12 import json
13 import pathlib
14
15 import pytest
16
17 from muse.core.errors import ExitCode
18 from tests.cli_test_helper import CliRunner, InvokeResult
19 from muse.core.types import blob_id, long_id, split_id
20 from muse.core.object_store import object_path
21 from muse.core.paths import muse_dir
22
23 runner = CliRunner()
24
25 # ---------------------------------------------------------------------------
26 # Helpers shared across tests
27 # ---------------------------------------------------------------------------
28
29 def _plumb(tmp_path: pathlib.Path, *args: str, stdin: bytes | None = None) -> InvokeResult:
30 from muse.cli.app import main as cli
31 return runner.invoke(cli, ["hash-object", *args], input=stdin)
32
33
34 def _plumb_repo(repo: pathlib.Path, *args: str, stdin: bytes | None = None) -> InvokeResult:
35 from muse.cli.app import main as cli
36 return runner.invoke(
37 cli,
38 ["hash-object", *args],
39 env={"MUSE_REPO_ROOT": str(repo)},
40 input=stdin,
41 )
42
43
44 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
45 """Minimal .muse/ structure."""
46 repo = tmp_path / "repo"
47 dot_muse = muse_dir(repo)
48 for sub in ("objects", "commits", "snapshots", "refs/heads"):
49 (dot_muse / sub).mkdir(parents=True)
50 (dot_muse / "HEAD").write_text("ref: refs/heads/main")
51 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test", "domain": "code"}))
52 return repo
53
54
55 # ---------------------------------------------------------------------------
56 # Unit — _hash_bytes
57 # ---------------------------------------------------------------------------
58
59
60 class TestHashBytes:
61 def test_known_sha256_empty(self) -> None:
62 from muse.cli.commands.hash_object import _hash_bytes
63 assert _hash_bytes(b"") == blob_id(b"")
64
65 def test_known_sha256_hello_world(self) -> None:
66 from muse.cli.commands.hash_object import _hash_bytes
67 expected = blob_id(b"hello world")
68 assert _hash_bytes(b"hello world") == expected
69
70 def test_deterministic(self) -> None:
71 from muse.cli.commands.hash_object import _hash_bytes
72 data = b"some content " * 100
73 assert _hash_bytes(data) == _hash_bytes(data)
74
75 def test_different_content_different_hash(self) -> None:
76 from muse.cli.commands.hash_object import _hash_bytes
77 assert _hash_bytes(b"a") != _hash_bytes(b"b")
78
79 def test_returns_canonical_prefixed_id(self) -> None:
80 from muse.cli.commands.hash_object import _hash_bytes
81 result = _hash_bytes(b"test")
82 assert result.startswith("sha256:")
83 assert len(result) == 71 # sha256: (7) + 64 hex chars
84 assert all(c in "0123456789abcdef" for c in split_id(result)[1])
85
86
87 class TestEmit:
88 def test_text_format_prints_hash(self, capsys: pytest.CaptureFixture[str]) -> None:
89 from muse.cli.commands.hash_object import _emit
90 from muse.core.timing import start_timer
91 oid = long_id("a" * 64)
92 _emit(False, oid, False, 0, start_timer())
93 out = capsys.readouterr().out.strip()
94 assert out == oid
95
96 def test_json_format_has_fields(self, capsys: pytest.CaptureFixture[str]) -> None:
97 from muse.cli.commands.hash_object import _emit
98 from muse.core.timing import start_timer
99 oid = long_id("b" * 64)
100 _emit(True, oid, True, 42, start_timer())
101 data = json.loads(capsys.readouterr().out)
102 assert data["object_id"] == oid
103 assert data["stored"] is True
104 assert data["size_bytes"] == 42
105 assert "duration_ms" in data
106 assert "exit_code" in data
107
108
109 # ---------------------------------------------------------------------------
110 # Integration — file mode
111 # ---------------------------------------------------------------------------
112
113
114 class TestFileMode:
115 def test_json_output_shape(self, tmp_path: pathlib.Path) -> None:
116 f = tmp_path / "data.txt"
117 f.write_bytes(b"hello world")
118 result = _plumb(tmp_path, "--json", str(f))
119 assert result.exit_code == 0
120 data = json.loads(result.output)
121 assert "object_id" in data
122 assert "stored" in data
123 assert data["object_id"].startswith("sha256:")
124 assert len(data["object_id"]) == 71
125 assert data["stored"] is False
126
127 def test_json_flag_shorthand(self, tmp_path: pathlib.Path) -> None:
128 f = tmp_path / "data.txt"
129 f.write_bytes(b"content")
130 result = _plumb(tmp_path, "--json", str(f))
131 assert result.exit_code == 0
132 data = json.loads(result.output)
133 assert "object_id" in data
134
135 def test_text_format_is_canonical_id(self, tmp_path: pathlib.Path) -> None:
136 f = tmp_path / "data.txt"
137 f.write_bytes(b"test bytes")
138 result = _plumb(tmp_path, str(f))
139 assert result.exit_code == 0
140 raw = result.output.strip()
141 assert raw.startswith("sha256:")
142 assert len(raw) == 71
143
144 def test_text_and_json_same_hash(self, tmp_path: pathlib.Path) -> None:
145 f = tmp_path / "same.txt"
146 f.write_bytes(b"identical content")
147 json_result = _plumb(tmp_path, "--json", str(f))
148 text_result = _plumb(tmp_path, str(f))
149 json_id = json.loads(json_result.output)["object_id"]
150 text_id = text_result.output.strip()
151 assert json_id == text_id
152
153 def test_determinism_same_content_same_hash(self, tmp_path: pathlib.Path) -> None:
154 f1 = tmp_path / "f1.txt"
155 f2 = tmp_path / "f2.txt"
156 f1.write_bytes(b"same bytes")
157 f2.write_bytes(b"same bytes")
158 r1 = json.loads(_plumb(tmp_path, "--json", str(f1)).output)["object_id"]
159 r2 = json.loads(_plumb(tmp_path, "--json", str(f2)).output)["object_id"]
160 assert r1 == r2
161
162 def test_different_content_different_hash(self, tmp_path: pathlib.Path) -> None:
163 f1 = tmp_path / "f1.txt"
164 f2 = tmp_path / "f2.txt"
165 f1.write_bytes(b"alpha")
166 f2.write_bytes(b"beta")
167 r1 = json.loads(_plumb(tmp_path, "--json", str(f1)).output)["object_id"]
168 r2 = json.loads(_plumb(tmp_path, "--json", str(f2)).output)["object_id"]
169 assert r1 != r2
170
171 def test_empty_file(self, tmp_path: pathlib.Path) -> None:
172 f = tmp_path / "empty.txt"
173 f.write_bytes(b"")
174 result = _plumb(tmp_path, "--json", str(f))
175 assert result.exit_code == 0
176 data = json.loads(result.output)
177 assert data["object_id"] == blob_id(b"")
178
179 def test_binary_content(self, tmp_path: pathlib.Path) -> None:
180 f = tmp_path / "binary.bin"
181 f.write_bytes(bytes(range(256)) * 10)
182 result = _plumb(tmp_path, "--json", str(f))
183 assert result.exit_code == 0
184 data = json.loads(result.output)
185 assert data["object_id"].startswith("sha256:")
186 assert len(data["object_id"]) == 71
187
188 def test_missing_file_errors(self, tmp_path: pathlib.Path) -> None:
189 result = _plumb(tmp_path, str(tmp_path / "nonexistent.txt"))
190 assert result.exit_code == ExitCode.USER_ERROR
191
192 def test_directory_as_path_errors(self, tmp_path: pathlib.Path) -> None:
193 result = _plumb(tmp_path, str(tmp_path))
194 assert result.exit_code == ExitCode.USER_ERROR
195
196 def test_no_args_errors(self, tmp_path: pathlib.Path) -> None:
197 result = _plumb(tmp_path)
198 assert result.exit_code != 0
199
200
201 # ---------------------------------------------------------------------------
202 # Integration — --write lifecycle
203 # ---------------------------------------------------------------------------
204
205
206 class TestWrite:
207 def test_write_returns_stored_true(self, tmp_path: pathlib.Path) -> None:
208 repo = _make_repo(tmp_path)
209 f = repo / "sample.txt"
210 f.write_bytes(b"store me")
211 result = _plumb_repo(repo, "--json", "--write", str(f))
212 assert result.exit_code == 0
213 assert json.loads(result.output)["stored"] is True
214
215 def test_write_creates_object_file(self, tmp_path: pathlib.Path) -> None:
216 repo = _make_repo(tmp_path)
217 f = repo / "sample.txt"
218 content = b"store me too"
219 f.write_bytes(content)
220 result = _plumb_repo(repo, "--json", "--write", str(f))
221 data = json.loads(result.output)
222 oid = data["object_id"]
223 obj_file = object_path(repo, oid)
224 assert obj_file.exists()
225 from muse.core.object_store import read_object
226 assert read_object(repo, oid) == content
227
228 def test_write_idempotent_second_call_stored_false(self, tmp_path: pathlib.Path) -> None:
229 repo = _make_repo(tmp_path)
230 f = repo / "dup.txt"
231 f.write_bytes(b"duplicate content")
232 _plumb_repo(repo, "--write", str(f))
233 result2 = _plumb_repo(repo, "--json", "--write", str(f))
234 assert result2.exit_code == 0
235 assert json.loads(result2.output)["stored"] is False
236
237 def test_write_without_repo_errors(self, tmp_path: pathlib.Path) -> None:
238 f = tmp_path / "orphan.txt"
239 f.write_bytes(b"no repo")
240 # Point MUSE_REPO_ROOT at a dir with no .muse/ to force find_repo_root → None
241 result = runner.invoke(
242 __import__("muse.cli.app", fromlist=["main"]).main,
243 ["hash-object", "--write", str(f)],
244 env={"MUSE_REPO_ROOT": str(tmp_path / "no_repo_here")},
245 )
246 assert result.exit_code == ExitCode.USER_ERROR
247
248 def test_write_text_format_still_works(self, tmp_path: pathlib.Path) -> None:
249 repo = _make_repo(tmp_path)
250 f = repo / "text.txt"
251 f.write_bytes(b"text mode write")
252 result = _plumb_repo(repo, "--write", str(f))
253 assert result.exit_code == 0
254 raw = result.output.strip()
255 assert raw.startswith("sha256:")
256 assert len(raw) == 71
257
258
259 # ---------------------------------------------------------------------------
260 # Integration — --stdin mode
261 # ---------------------------------------------------------------------------
262
263
264 class TestStdinMode:
265 def test_stdin_produces_correct_hash(self, tmp_path: pathlib.Path) -> None:
266 content = b"piped content"
267 result = _plumb(tmp_path, "--json", "--stdin", stdin=content)
268 assert result.exit_code == 0
269 data = json.loads(result.output)
270 assert data["object_id"] == blob_id(content)
271 assert data["stored"] is False
272
273 def test_stdin_matches_file_hash(self, tmp_path: pathlib.Path) -> None:
274 content = b"same content"
275 f = tmp_path / "f.txt"
276 f.write_bytes(content)
277 file_result = json.loads(_plumb(tmp_path, "--json", str(f)).output)["object_id"]
278 stdin_result = json.loads(_plumb(tmp_path, "--json", "--stdin", stdin=content).output)["object_id"]
279 assert file_result == stdin_result
280
281 def test_stdin_text_format(self, tmp_path: pathlib.Path) -> None:
282 content = b"text stdin"
283 result = _plumb(tmp_path, "--stdin", stdin=content)
284 assert result.exit_code == 0
285 assert result.output.strip() == blob_id(content)
286
287 def test_stdin_empty_input(self, tmp_path: pathlib.Path) -> None:
288 result = _plumb(tmp_path, "--json", "--stdin", stdin=b"")
289 assert result.exit_code == 0
290 data = json.loads(result.output)
291 assert data["object_id"] == blob_id(b"")
292
293 def test_stdin_and_path_mutually_exclusive(self, tmp_path: pathlib.Path) -> None:
294 f = tmp_path / "f.txt"
295 f.write_bytes(b"x")
296 result = _plumb(tmp_path, "--stdin", str(f))
297 assert result.exit_code == ExitCode.USER_ERROR
298
299 def test_stdin_write_stores_object(self, tmp_path: pathlib.Path) -> None:
300 repo = _make_repo(tmp_path)
301 content = b"stdin stored"
302 result = _plumb_repo(repo, "--json", "--stdin", "--write", stdin=content)
303 assert result.exit_code == 0
304 data = json.loads(result.output)
305 assert data["stored"] is True
306 oid = data["object_id"]
307 obj_file = object_path(repo, oid)
308 assert obj_file.exists()
309
310 def test_stdin_write_without_repo_errors(self, tmp_path: pathlib.Path) -> None:
311 from muse.cli.app import main as cli
312 result = runner.invoke(
313 cli,
314 ["hash-object", "--stdin", "--write"],
315 env={"MUSE_REPO_ROOT": str(tmp_path / "no_repo_here")},
316 input=b"no repo",
317 )
318 assert result.exit_code == ExitCode.USER_ERROR
319
320
321 # ---------------------------------------------------------------------------
322 # Security
323 # ---------------------------------------------------------------------------
324
325
326 class TestSecurity:
327 def test_ansi_in_path_not_in_stderr(self, tmp_path: pathlib.Path) -> None:
328 """A path with embedded ANSI escapes must not reach stderr output."""
329 malicious_name = tmp_path / "\x1b[31mmalicious\x1b[0m.txt"
330 result = _plumb(tmp_path, str(malicious_name))
331 assert result.exit_code != 0
332 assert "\x1b" not in result.output
333
334 def test_path_traversal_attempt_outside_repo(self, tmp_path: pathlib.Path) -> None:
335 """/../ in a path is just a filesystem lookup — it either exists or doesn't."""
336 traversal = tmp_path / ".." / "etc" / "passwd"
337 result = _plumb(tmp_path, str(traversal))
338 # If the file doesn't exist, we get USER_ERROR cleanly — not a crash.
339 assert result.exit_code in (0, ExitCode.USER_ERROR)
340
341 def test_no_path_no_stdin_clean_error(self, tmp_path: pathlib.Path) -> None:
342 result = _plumb(tmp_path)
343 assert result.exit_code != 0
344 # Must not be a Python traceback
345 assert "Traceback" not in result.output
346
347 def test_json_output_is_never_a_traceback(self, tmp_path: pathlib.Path) -> None:
348 """Even on error, output must be parseable or stderr-only."""
349 result = _plumb(tmp_path, str(tmp_path / "missing.txt"))
350 assert result.exit_code != 0
351 # stdout should be empty (error went to stderr)
352 assert result.output.strip() == "" or "Traceback" not in result.output
353
354
355 # ---------------------------------------------------------------------------
356 # Stress
357 # ---------------------------------------------------------------------------
358
359
360 class TestStress:
361 def test_large_file_streams_without_oom(self, tmp_path: pathlib.Path) -> None:
362 """A 10 MiB file must hash without loading the full content into memory."""
363 large = tmp_path / "large.bin"
364 chunk = b"X" * 65536 # 64 KiB chunk
365 with large.open("wb") as fh:
366 for _ in range(160): # 160 × 64 KiB = 10 MiB
367 fh.write(chunk)
368 result = _plumb(tmp_path, "--json", str(large))
369 assert result.exit_code == 0
370 data = json.loads(result.output)
371 assert data["object_id"].startswith("sha256:")
372 assert len(data["object_id"]) == 71
373
374 def test_large_file_hash_matches_reference(self, tmp_path: pathlib.Path) -> None:
375 """Chunked hash_file must match a one-shot hashlib computation."""
376 large = tmp_path / "ref.bin"
377 content = bytes(range(256)) * 4096 # 1 MiB, non-repeating byte pattern
378 large.write_bytes(content)
379 result = _plumb(tmp_path, "--json", str(large))
380 expected = blob_id(content)
381 assert json.loads(result.output)["object_id"] == expected
382
383 def test_500_sequential_hashes(self, tmp_path: pathlib.Path) -> None:
384 """500 rapid hash calls must all succeed with consistent results."""
385 f = tmp_path / "stable.txt"
386 f.write_bytes(b"stable content")
387 expected = blob_id(b"stable content")
388 for i in range(500):
389 result = _plumb(tmp_path, "--json", str(f))
390 assert result.exit_code == 0, f"failed at iteration {i}"
391 assert json.loads(result.output)["object_id"] == expected
392
393 def test_stdin_large_binary(self, tmp_path: pathlib.Path) -> None:
394 """Stdin mode handles 1 MiB of binary content correctly."""
395 content = bytes(range(256)) * 4096
396 result = _plumb(tmp_path, "--json", "--stdin", stdin=content)
397 assert result.exit_code == 0
398 assert json.loads(result.output)["object_id"] == blob_id(content)
399
400
401 # ---------------------------------------------------------------------------
402 # TestRegisterFlags — argparse-level verification
403 # ---------------------------------------------------------------------------
404
405
406 class TestRegisterFlags:
407 """Verify that register() wires --json / -j correctly."""
408
409 def _make_parser(self) -> "argparse.ArgumentParser":
410 import argparse
411 from muse.cli.commands.hash_object import register
412 ap = argparse.ArgumentParser()
413 subs = ap.add_subparsers()
414 register(subs)
415 return ap
416
417 def test_json_flag_long(self) -> None:
418 ns = self._make_parser().parse_args(["hash-object", "--stdin", "--json"])
419 assert ns.json_out is True
420
421 def test_j_alias(self) -> None:
422 ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"])
423 assert ns.json_out is True
424
425 def test_default_is_text(self) -> None:
426 ns = self._make_parser().parse_args(["hash-object", "--stdin"])
427 assert ns.json_out is False
428
429 def test_dest_is_json_out(self) -> None:
430 ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"])
431 assert hasattr(ns, "json_out")
432 assert not hasattr(ns, "fmt")
File History 1 commit