"""Hardening tests for ``muse content-grep``. Covers: Unit — _is_binary, _path_matches_globs, _search_object (context, binary skip, utf-8 replace), pattern validation order Security — ANSI injection in file paths and match text, pattern length cap, invalid regex, ReDoS pattern rejected before I/O Perf — parallel reads complete correctly, --max-matches cap JSON — _ContentGrepJson schema (commit_id, snapshot_id, totals), GrepMatch context_before/context_after fields Flags — --include, --exclude, --max-matches, --context/-C, --json, rejection of old --format flag Integration — multi-file with mixed hits, --include narrows search, --exclude skips files, --context shows surrounding lines, --ref searches historical commit E2E — --help output mentions all new flags Stress — 500-file snapshot, concurrent parallel reads """ from __future__ import annotations from collections.abc import Mapping import datetime import json import pathlib import threading from typing import TypedDict import pytest from tests.cli_test_helper import CliRunner, InvokeResult from muse.core.object_store import write_object from muse.core.ids import hash_commit, hash_snapshot from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot from muse.core.types import Manifest, blob_id cli = None runner = CliRunner() _invoke_lock = threading.Lock() type _FilesMap = dict[str, bytes] _REPO_ID = "cgrep-hardening" # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- class _GrepMatchOut(TypedDict): line_number: int line: str context_before: list[str] context_after: list[str] class _GrepResultOut(TypedDict): file: str object_id: str match_count: int matches: list[_GrepMatchOut] class _GrepOut(TypedDict): source: str commit_id: str snapshot_id: str pattern: str total_files_matched: int total_matches: int results: list[_GrepResultOut] duration_ms: float exit_code: int def _init_repo(path: pathlib.Path, repo_id: str = _REPO_ID) -> pathlib.Path: dot_muse = muse_dir(path) for d in ("commits", "snapshots", "objects", "refs/heads"): (dot_muse / d).mkdir(parents=True, exist_ok=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (dot_muse / "repo.json").write_text( json.dumps({"repo_id": repo_id, "domain": "midi"}), encoding="utf-8" ) return path def _env(repo: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(repo)} _counter = 0 def _commit_files( root: pathlib.Path, files: _FilesMap, branch: str = "main", parent_id: str | None = None, ) -> str: global _counter _counter += 1 manifest: Manifest = {} for rel_path, content in files.items(): obj_id = blob_id(content) write_object(root, obj_id, content) manifest[rel_path] = obj_id snap_id = hash_snapshot(manifest) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime.now(datetime.timezone.utc) parent_ids = [parent_id] if parent_id else [] commit_id = hash_commit( parent_ids, snap_id, f"commit {_counter}", committed_at.isoformat(), ) write_commit( root, CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message=f"commit {_counter}", committed_at=committed_at, parent_commit_id=parent_id, ), ) branch_ref = ref_path(root, branch) branch_ref.parent.mkdir(parents=True, exist_ok=True) branch_ref.write_text(commit_id, encoding="utf-8") return commit_id def _invoke(args: list[str], env: Manifest | None = None) -> InvokeResult: with _invoke_lock: return runner.invoke(cli, args, env=env) def _parse(result: InvokeResult) -> _GrepOut: raw: _GrepOut = json.loads(result.output) return raw # --------------------------------------------------------------------------- # Unit: _is_binary # --------------------------------------------------------------------------- def test_is_binary_null_byte() -> None: from muse.cli.commands.content_grep import _is_binary assert _is_binary(b"\x00hello") is True def test_is_binary_clean_text() -> None: from muse.cli.commands.content_grep import _is_binary assert _is_binary(b"hello world\n") is False def test_is_binary_empty() -> None: from muse.cli.commands.content_grep import _is_binary assert _is_binary(b"") is False # --------------------------------------------------------------------------- # Unit: _path_matches_globs # --------------------------------------------------------------------------- def test_path_matches_no_filter() -> None: from muse.cli.commands.content_grep import _path_matches_globs assert _path_matches_globs("src/main.py", None, None) is True def test_path_matches_include_basename() -> None: from muse.cli.commands.content_grep import _path_matches_globs assert _path_matches_globs("src/main.py", "*.py", None) is True assert _path_matches_globs("src/main.js", "*.py", None) is False def test_path_matches_include_full_path() -> None: from muse.cli.commands.content_grep import _path_matches_globs assert _path_matches_globs("src/main.py", "src/*.py", None) is True assert _path_matches_globs("tests/main.py", "src/*.py", None) is False def test_path_matches_exclude_basename() -> None: from muse.cli.commands.content_grep import _path_matches_globs assert _path_matches_globs("app.min.js", None, "*.min.js") is False assert _path_matches_globs("app.js", None, "*.min.js") is True def test_path_matches_include_and_exclude() -> None: from muse.cli.commands.content_grep import _path_matches_globs assert _path_matches_globs("src/main.py", "*.py", "test_*.py") is True assert _path_matches_globs("test_foo.py", "*.py", "test_*.py") is False # --------------------------------------------------------------------------- # Unit: _search_object — context lines # --------------------------------------------------------------------------- def test_search_object_context(tmp_path: pathlib.Path) -> None: import re from muse.cli.commands.content_grep import _search_object _init_repo(tmp_path) content = b"line one\nTARGET line\nline three\n" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) pat = re.compile("TARGET") count, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=1) assert count == 1 assert len(matches) == 1 assert matches[0]["context_before"] == ["line one"] assert matches[0]["context_after"] == ["line three"] def test_search_object_context_at_boundary(tmp_path: pathlib.Path) -> None: import re from muse.cli.commands.content_grep import _search_object _init_repo(tmp_path) content = b"TARGET\nonly\n" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) pat = re.compile("TARGET") count, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=3) assert matches[0]["context_before"] == [] assert matches[0]["context_after"] == ["only"] def test_search_object_no_context(tmp_path: pathlib.Path) -> None: import re from muse.cli.commands.content_grep import _search_object _init_repo(tmp_path) content = b"line\nTARGET\nend\n" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) pat = re.compile("TARGET") _, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=0) assert matches[0]["context_before"] == [] assert matches[0]["context_after"] == [] def test_search_object_binary_skipped(tmp_path: pathlib.Path) -> None: import re from muse.cli.commands.content_grep import _search_object _init_repo(tmp_path) content = b"\x00\x01\x02TARGET\x03" obj_id = blob_id(content) write_object(tmp_path, obj_id, content) pat = re.compile("TARGET") count, matches = _search_object(tmp_path, obj_id, pat, False, False, 0) assert count == 0 assert matches == [] # --------------------------------------------------------------------------- # Security: pattern validation happens BEFORE I/O # --------------------------------------------------------------------------- def test_long_pattern_rejected_before_io(tmp_path: pathlib.Path) -> None: """A too-long pattern must be rejected without touching the object store.""" _init_repo(tmp_path) # Do NOT commit any files — if I/O happened, we'd get a 'no commits' error, # not the 'pattern too long' error. bad_pattern = "a" * 501 result = _invoke( ["content-grep", bad_pattern], env=_env(tmp_path) ) assert result.exit_code != 0 # The error must be about pattern length, not about missing commits. assert "too long" in result.output.lower() or "too long" in (result.stderr or "").lower() def test_invalid_regex_rejected_before_io(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) result = _invoke( ["content-grep", "[unclosed"], env=_env(tmp_path) ) assert result.exit_code != 0 assert "regex" in result.output.lower() or "regex" in (result.stderr or "").lower() # --------------------------------------------------------------------------- # Security: ANSI injection # --------------------------------------------------------------------------- def test_ansi_injection_in_path(tmp_path: pathlib.Path) -> None: """File paths with ANSI escapes must be stripped in text output.""" _init_repo(tmp_path) ansi_path = "\x1b[31mmalicious\x1b[0m.txt" _commit_files(tmp_path, {ansi_path: b"TARGET content\n"}) result = _invoke( ["content-grep", "TARGET"], env=_env(tmp_path) ) assert result.exit_code == 0 assert "\x1b" not in result.output def test_ansi_injection_in_match_text(tmp_path: pathlib.Path) -> None: """Match text with ANSI escapes must be stripped in text output.""" _init_repo(tmp_path) _commit_files(tmp_path, {"safe.txt": b"TARGET \x1b[31mred\x1b[0m content\n"}) result = _invoke( ["content-grep", "TARGET"], env=_env(tmp_path) ) assert result.exit_code == 0 assert "\x1b" not in result.output # --------------------------------------------------------------------------- # JSON schema: _ContentGrepJson # --------------------------------------------------------------------------- def test_json_schema_all_fields(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello world\nhello again\n"}) result = _invoke( ["content-grep", "hello", "--json"], env=_env(tmp_path) ) assert result.exit_code == 0 data = _parse(result) assert data["commit_id"].startswith("sha256:") assert len(data["commit_id"]) == 71 assert data["snapshot_id"].startswith("sha256:") assert len(data["snapshot_id"]) == 71 assert data["pattern"] == "hello" assert data["total_files_matched"] == 1 assert data["total_matches"] == 2 assert len(data["results"]) == 1 r = data["results"][0] assert r["path"] == "a.txt" assert r["match_count"] == 2 assert isinstance(r["matches"], list) def test_json_schema_context_fields(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"c.txt": b"before\nTARGET\nafter\n"}) result = _invoke( ["content-grep", "TARGET", "--context", "1", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = _parse(result) match = data["results"][0]["matches"][0] assert isinstance(match, dict) assert "context_before" in match assert "context_after" in match assert match["context_before"] == ["before"] assert match["context_after"] == ["after"] def test_json_schema_no_match_exit1(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) result = _invoke( ["content-grep", "ZZZNOMATCH", "--json"], env=_env(tmp_path) ) assert result.exit_code != 0 def test_json_total_matches_multiple_files(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, { "a.txt": b"hit\nhit\n", "b.txt": b"hit\n", "c.txt": b"miss\n", }) result = _invoke( ["content-grep", "hit", "--json"], env=_env(tmp_path) ) assert result.exit_code == 0 data = _parse(result) assert data["total_files_matched"] == 2 assert data["total_matches"] == 3 # --------------------------------------------------------------------------- # Flags: --include # --------------------------------------------------------------------------- def test_include_filters_to_py_only(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, { "module.py": b"TARGET in python\n", "module.js": b"TARGET in js\n", "readme.md": b"TARGET in md\n", }) result = _invoke( ["content-grep", "TARGET", "--include", "*.py", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = _parse(result) assert data["total_files_matched"] == 1 assert data["results"][0]["path"] == "module.py" def test_include_no_matches_after_filter(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"module.js": b"TARGET here\n"}) result = _invoke( ["content-grep", "TARGET", "--include", "*.py"], env=_env(tmp_path), ) assert result.exit_code != 0 # no files pass include filter # --------------------------------------------------------------------------- # Flags: --exclude # --------------------------------------------------------------------------- def test_exclude_skips_minified(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, { "app.js": b"TARGET here\n", "app.min.js": b"TARGET minified\n", }) result = _invoke( ["content-grep", "TARGET", "--exclude", "*.min.js", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = _parse(result) assert data["total_files_matched"] == 1 assert data["results"][0]["path"] == "app.js" def test_exclude_all_results_in_no_match(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"test.py": b"TARGET\n"}) result = _invoke( ["content-grep", "TARGET", "--exclude", "test_*.py"], env=_env(tmp_path), ) # test.py doesn't match test_*.py exclude pattern, so it should match. # Verify this works (target file isn't excluded). assert result.exit_code == 0 # --------------------------------------------------------------------------- # Flags: --max-matches # --------------------------------------------------------------------------- def test_max_matches_caps_output(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"many.txt": b"hit\n" * 100}) result = _invoke( ["content-grep", "hit", "--max-matches", "10", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = _parse(result) assert data["total_matches"] <= 10 def test_max_matches_zero_still_exits_nonzero_on_cap(tmp_path: pathlib.Path) -> None: """When max_matches=0, no results are kept — exit 1.""" _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hit\n"}) result = _invoke( ["content-grep", "hit", "--max-matches", "0", "--json"], env=_env(tmp_path), ) assert result.exit_code != 0 # no results after cap → exit 1 # --------------------------------------------------------------------------- # Flags: --context / -C # --------------------------------------------------------------------------- def test_context_text_output(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"ctx.txt": b"alpha\nbeta\ngamma\n"}) result = _invoke( ["content-grep", "beta", "--context", "1"], env=_env(tmp_path), ) assert result.exit_code == 0 # Context before and after should appear in output. assert "alpha" in result.output assert "gamma" in result.output def test_context_short_flag(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"ctx2.txt": b"first\nTARGET\nlast\n"}) result = _invoke( ["content-grep", "TARGET", "-C", "1"], env=_env(tmp_path), ) assert result.exit_code == 0 assert "first" in result.output assert "last" in result.output # --------------------------------------------------------------------------- # Flags: --json boolean (rejects old --format) # --------------------------------------------------------------------------- def test_format_flag_rejected(tmp_path: pathlib.Path) -> None: """Old ``--format json`` must be rejected by argparse (exit 2).""" _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) result = _invoke( ["content-grep", "hello", "--format", "json"], env=_env(tmp_path), ) assert result.exit_code == 2 # --------------------------------------------------------------------------- # Integration: --ref searches a different commit # --------------------------------------------------------------------------- def test_ref_searches_branch(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) c1 = _commit_files(tmp_path, {"v1.txt": b"OLD content\n"}) _commit_files(tmp_path, {"v2.txt": b"NEW content\n"}, parent_id=c1) # Search HEAD — should find NEW in v2.txt. result_head = _invoke( ["content-grep", "NEW", "--json"], env=_env(tmp_path) ) assert result_head.exit_code == 0 data = _parse(result_head) paths = [r["path"] for r in data["results"]] assert "v2.txt" in paths # Search the first commit by ID — should find OLD in v1.txt, not NEW. result_ref = _invoke( ["content-grep", "OLD", "--ref", c1, "--json"], env=_env(tmp_path), ) assert result_ref.exit_code == 0 data_ref = _parse(result_ref) paths_ref = [r["path"] for r in data_ref["results"]] assert "v1.txt" in paths_ref assert data_ref["commit_id"] == c1 # --------------------------------------------------------------------------- # E2E: --help mentions all new flags # --------------------------------------------------------------------------- def test_help_mentions_include() -> None: result = _invoke(["content-grep", "--help"]) assert result.exit_code == 0 assert "--include" in result.output def test_help_mentions_exclude() -> None: result = _invoke(["content-grep", "--help"]) assert "--exclude" in result.output def test_help_mentions_max_matches() -> None: result = _invoke(["content-grep", "--help"]) assert "--max-matches" in result.output def test_help_mentions_context() -> None: result = _invoke(["content-grep", "--help"]) assert "--context" in result.output or "-C" in result.output def test_help_mentions_json_not_format() -> None: result = _invoke(["content-grep", "--help"]) assert "--json" in result.output assert "--format" not in result.output # --------------------------------------------------------------------------- # Stress: 500-file snapshot, pattern matches 250 # --------------------------------------------------------------------------- def test_stress_500_files(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) files: _FilesMap = {} for i in range(500): content = b"TARGET_STRESS\n" if i % 2 == 0 else b"other\n" files[f"f_{i:04d}.txt"] = content _commit_files(tmp_path, files) result = _invoke( ["content-grep", "TARGET_STRESS", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = _parse(result) assert data["total_files_matched"] == 250 assert data["total_matches"] == 250 # --------------------------------------------------------------------------- # Stress: concurrent reads # --------------------------------------------------------------------------- def test_stress_concurrent_reads(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"concurrent.txt": b"CONCURRENT TARGET\n"}) errors: list[str] = [] def _read() -> None: r = _invoke( ["content-grep", "CONCURRENT", "--json"], env=_env(tmp_path), ) if r.exit_code != 0: errors.append(f"exit {r.exit_code}") else: try: d = json.loads(r.output) if d.get("total_matches", 0) != 1: errors.append(f"unexpected total_matches: {d.get('total_matches')}") except json.JSONDecodeError as exc: errors.append(str(exc)) threads = [threading.Thread(target=_read) for _ in range(8)] for t in threads: t.start() for t in threads: t.join() assert not errors, f"Concurrent read failures: {errors}" # --------------------------------------------------------------------------- # JSON schema: complete key set (TestJsonSchemaComplete) # --------------------------------------------------------------------------- _REQUIRED_KEYS = frozenset({ "source", "commit_id", "snapshot_id", "pattern", "total_files_matched", "total_matches", "results", "duration_ms", "exit_code", }) class TestJsonSchemaComplete: """Verify that every required key is present in JSON output.""" def test_all_required_keys_present_commit_mode(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) assert result.exit_code == 0 data = json.loads(result.output) missing = _REQUIRED_KEYS - data.keys() assert not missing, f"Missing keys: {missing}" def test_all_required_keys_present_working_tree_mode(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) # Also write a matching file to disk so working-tree search finds it. (tmp_path / "a.txt").write_bytes(b"hello\n") result = _invoke( ["content-grep", "hello", "--working-tree", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = json.loads(result.output) missing = _REQUIRED_KEYS - data.keys() assert not missing, f"Missing keys: {missing}" def test_source_field_is_commit(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert data["source"] == "commit" def test_source_field_is_working_tree(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) (tmp_path / "a.txt").write_bytes(b"hello\n") result = _invoke( ["content-grep", "hello", "--working-tree", "--json"], env=_env(tmp_path), ) data = json.loads(result.output) assert data["source"] == "working-tree" def test_commit_id_null_in_working_tree_mode(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) (tmp_path / "a.txt").write_bytes(b"hello\n") result = _invoke( ["content-grep", "hello", "--working-tree", "--json"], env=_env(tmp_path), ) data = json.loads(result.output) assert data["commit_id"] is None def test_snapshot_id_null_in_working_tree_mode(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) (tmp_path / "a.txt").write_bytes(b"hello\n") result = _invoke( ["content-grep", "hello", "--working-tree", "--json"], env=_env(tmp_path), ) data = json.loads(result.output) assert data["snapshot_id"] is None def test_exit_code_field_zero_on_match(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert data["exit_code"] == 0 def test_json_is_compact(self, tmp_path: pathlib.Path) -> None: """JSON output must be a single line — no pretty-printing.""" _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hello\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) lines = [ln for ln in result.output.splitlines() if ln.strip()] assert len(lines) == 1, "JSON must be compact (one line)" # --------------------------------------------------------------------------- # duration_ms (TestElapsedSeconds) # --------------------------------------------------------------------------- class TestElapsedSeconds: """``duration_ms`` must be a non-negative float in all JSON paths.""" def _assert_elapsed(self, data: Mapping[str, object]) -> None: # type: ignore[type-arg] assert "duration_ms" in data assert isinstance(data["duration_ms"], float) assert data["duration_ms"] >= 0.0 def test_elapsed_present_commit_mode(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"target\n"}) result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path)) self._assert_elapsed(json.loads(result.output)) def test_elapsed_present_working_tree_mode(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"target\n"}) (tmp_path / "a.txt").write_bytes(b"target\n") result = _invoke( ["content-grep", "target", "--working-tree", "--json"], env=_env(tmp_path), ) self._assert_elapsed(json.loads(result.output)) def test_elapsed_is_float_not_int(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"target\n"}) result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert isinstance(data["duration_ms"], float) def test_elapsed_reasonable_upper_bound(self, tmp_path: pathlib.Path) -> None: """Single-file search in a temp repo should be well under 5 seconds.""" _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"target\n"}) result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert data["duration_ms"] < 5.0 def test_elapsed_present_stress_mode(self, tmp_path: pathlib.Path) -> None: """duration_ms must appear even for 500-file parallel searches.""" _init_repo(tmp_path) files: Mapping[str, bytes] = {f"f{i}.txt": b"needle\n" for i in range(50)} _commit_files(tmp_path, files) result = _invoke(["content-grep", "needle", "--json"], env=_env(tmp_path)) assert result.exit_code == 0 self._assert_elapsed(json.loads(result.output)) def test_elapsed_six_decimal_places(self, tmp_path: pathlib.Path) -> None: """duration_ms should be rounded to at most 6 decimal places.""" _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"target\n"}) result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path)) data = json.loads(result.output) elapsed = data["duration_ms"] # round-trip through 6-decimal representation must be exact assert round(elapsed, 6) == elapsed # --------------------------------------------------------------------------- # exit_code field (TestExitCode) # --------------------------------------------------------------------------- class TestExitCode: """``exit_code`` in JSON must mirror the process exit code.""" def test_exit_code_zero_on_match(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hit\n"}) result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path)) assert result.exit_code == 0 assert json.loads(result.output)["exit_code"] == 0 def test_exit_code_zero_working_tree_match(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hit\n"}) (tmp_path / "a.txt").write_bytes(b"hit\n") result = _invoke( ["content-grep", "hit", "--working-tree", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 assert json.loads(result.output)["exit_code"] == 0 def test_exit_code_is_integer(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hit\n"}) result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert isinstance(data["exit_code"], int) def test_exit_code_in_json_matches_process_exit(self, tmp_path: pathlib.Path) -> None: """JSON exit_code must equal the actual process exit code.""" _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hit\n"}) result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert data["exit_code"] == result.exit_code def test_exit_code_multiple_files(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.txt": b"hit\n", "b.txt": b"hit\n"}) result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path)) assert result.exit_code == 0 assert json.loads(result.output)["exit_code"] == 0 # --------------------------------------------------------------------------- # Flag registration tests # --------------------------------------------------------------------------- import argparse as _argparse from muse.cli.commands.content_grep import register as _register_content_grep from muse.core.paths import muse_dir, ref_path def _parse_cgrep(*args: str) -> _argparse.Namespace: root_p = _argparse.ArgumentParser() subs = root_p.add_subparsers(dest="cmd") _register_content_grep(subs) return root_p.parse_args(["content-grep", *args]) class TestRegisterFlags: def test_default_json_out_is_false(self) -> None: ns = _parse_cgrep("TODO") assert ns.json_out is False def test_json_flag_sets_json_out(self) -> None: ns = _parse_cgrep("TODO", "--json") assert ns.json_out is True def test_j_shorthand_sets_json_out(self) -> None: ns = _parse_cgrep("TODO", "-j") assert ns.json_out is True def test_pattern_positional(self) -> None: ns = _parse_cgrep("FIXME") assert ns.pattern == "FIXME" # --------------------------------------------------------------------------- # JSON key ergonomics: results[].file and matches[].line # --------------------------------------------------------------------------- class TestJsonKeyErgonomics: """content-grep --json must use 'path' (matching all other muse commands) and 'line' (not 'text') for match content.""" def test_result_key_is_path(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"src/main.py": b"hello world\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert data["results"][0]["path"] == "src/main.py" def test_result_has_no_file_key(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"src/main.py": b"hello world\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) data = json.loads(result.output) assert "file" not in data["results"][0] def test_match_key_is_line_not_text(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.py": b"hello world\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) data = json.loads(result.output) match = data["results"][0]["matches"][0] assert match["line"] == "hello world" def test_match_has_no_text_key(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.py": b"hello world\n"}) result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path)) data = json.loads(result.output) match = data["results"][0]["matches"][0] assert "text" not in match def test_working_tree_result_key_is_path(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.py": b"placeholder\n"}) (tmp_path / "a.py").write_text("needle here\n", encoding="utf-8") result = _invoke( ["content-grep", "needle", "--working-tree", "--json"], env=_env(tmp_path) ) data = json.loads(result.output) assert data["results"][0]["path"] == "a.py" def test_working_tree_match_key_is_line(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"a.py": b"placeholder\n"}) (tmp_path / "a.py").write_text("needle here\n", encoding="utf-8") result = _invoke( ["content-grep", "needle", "--working-tree", "--json"], env=_env(tmp_path) ) data = json.loads(result.output) match = data["results"][0]["matches"][0] assert match["line"] == "needle here"