"""Hardening tests for ``muse content-grep``.

Covers:
    Unit      — _is_binary, _path_matches_globs, _search_object (context,
                binary skip, utf-8 replace), pattern validation order
    Security  — ANSI injection in file paths and match text, pattern length
                cap, invalid regex, ReDoS pattern rejected before I/O
    Perf      — parallel reads complete correctly, --max-matches cap
    JSON      — _ContentGrepJson schema (commit_id, snapshot_id, totals),
                GrepMatch context_before/context_after fields
    Flags     — --include, --exclude, --max-matches, --context/-C, --json,
                rejection of old --format flag
    Integration — multi-file with mixed hits, --include narrows search,
                  --exclude skips files, --context shows surrounding lines,
                  --ref searches historical commit
    E2E       — --help output mentions all new flags
    Stress    — 500-file snapshot, concurrent parallel reads
"""

from __future__ import annotations
from collections.abc import Mapping

import datetime
import json
import pathlib
import threading
from typing import TypedDict

import pytest
from tests.cli_test_helper import CliRunner, InvokeResult

from muse.core.object_store import write_object
from muse.core.ids import hash_commit, hash_snapshot
from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot
from muse.core.types import Manifest, blob_id

cli = None
runner = CliRunner()
_invoke_lock = threading.Lock()

type _FilesMap = dict[str, bytes]

_REPO_ID = "cgrep-hardening"


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


class _GrepMatchOut(TypedDict):
    line_number: int
    line: str
    context_before: list[str]
    context_after: list[str]


class _GrepResultOut(TypedDict):
    file: str
    object_id: str
    match_count: int
    matches: list[_GrepMatchOut]


class _GrepOut(TypedDict):
    source: str
    commit_id: str
    snapshot_id: str
    pattern: str
    total_files_matched: int
    total_matches: int
    results: list[_GrepResultOut]
    duration_ms: float
    exit_code: int


def _init_repo(path: pathlib.Path, repo_id: str = _REPO_ID) -> pathlib.Path:
    dot_muse = muse_dir(path)
    for d in ("commits", "snapshots", "objects", "refs/heads"):
        (dot_muse / d).mkdir(parents=True, exist_ok=True)
    (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
    (dot_muse / "repo.json").write_text(
        json.dumps({"repo_id": repo_id, "domain": "midi"}), encoding="utf-8"
    )
    return path


def _env(repo: pathlib.Path) -> Manifest:
    return {"MUSE_REPO_ROOT": str(repo)}


_counter = 0


def _commit_files(
    root: pathlib.Path,
    files: _FilesMap,
    branch: str = "main",
    parent_id: str | None = None,
) -> str:
    global _counter
    _counter += 1
    manifest: Manifest = {}
    for rel_path, content in files.items():
        obj_id = blob_id(content)
        write_object(root, obj_id, content)
        manifest[rel_path] = obj_id
    snap_id = hash_snapshot(manifest)
    write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
    committed_at = datetime.datetime.now(datetime.timezone.utc)
    parent_ids = [parent_id] if parent_id else []
    commit_id = hash_commit(
        parent_ids, snap_id, f"commit {_counter}", committed_at.isoformat(),
    )
    write_commit(
        root,
        CommitRecord(
            commit_id=commit_id,
            branch=branch,
            snapshot_id=snap_id,
            message=f"commit {_counter}",
            committed_at=committed_at,
            parent_commit_id=parent_id,
        ),
    )
    branch_ref = ref_path(root, branch)
    branch_ref.parent.mkdir(parents=True, exist_ok=True)
    branch_ref.write_text(commit_id, encoding="utf-8")
    return commit_id


def _invoke(args: list[str], env: Manifest | None = None) -> InvokeResult:
    with _invoke_lock:
        return runner.invoke(cli, args, env=env)


def _parse(result: InvokeResult) -> _GrepOut:
    raw: _GrepOut = json.loads(result.output)
    return raw


# ---------------------------------------------------------------------------
# Unit: _is_binary
# ---------------------------------------------------------------------------


def test_is_binary_null_byte() -> None:
    from muse.cli.commands.content_grep import _is_binary

    assert _is_binary(b"\x00hello") is True


def test_is_binary_clean_text() -> None:
    from muse.cli.commands.content_grep import _is_binary

    assert _is_binary(b"hello world\n") is False


def test_is_binary_empty() -> None:
    from muse.cli.commands.content_grep import _is_binary

    assert _is_binary(b"") is False


# ---------------------------------------------------------------------------
# Unit: _path_matches_globs
# ---------------------------------------------------------------------------


def test_path_matches_no_filter() -> None:
    from muse.cli.commands.content_grep import _path_matches_globs

    assert _path_matches_globs("src/main.py", None, None) is True


def test_path_matches_include_basename() -> None:
    from muse.cli.commands.content_grep import _path_matches_globs

    assert _path_matches_globs("src/main.py", "*.py", None) is True
    assert _path_matches_globs("src/main.js", "*.py", None) is False


def test_path_matches_include_full_path() -> None:
    from muse.cli.commands.content_grep import _path_matches_globs

    assert _path_matches_globs("src/main.py", "src/*.py", None) is True
    assert _path_matches_globs("tests/main.py", "src/*.py", None) is False


def test_path_matches_exclude_basename() -> None:
    from muse.cli.commands.content_grep import _path_matches_globs

    assert _path_matches_globs("app.min.js", None, "*.min.js") is False
    assert _path_matches_globs("app.js", None, "*.min.js") is True


def test_path_matches_include_and_exclude() -> None:
    from muse.cli.commands.content_grep import _path_matches_globs

    assert _path_matches_globs("src/main.py", "*.py", "test_*.py") is True
    assert _path_matches_globs("test_foo.py", "*.py", "test_*.py") is False


# ---------------------------------------------------------------------------
# Unit: _search_object — context lines
# ---------------------------------------------------------------------------


def test_search_object_context(tmp_path: pathlib.Path) -> None:
    import re
    from muse.cli.commands.content_grep import _search_object

    _init_repo(tmp_path)
    content = b"line one\nTARGET line\nline three\n"
    obj_id = blob_id(content)
    write_object(tmp_path, obj_id, content)

    pat = re.compile("TARGET")
    count, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=1)
    assert count == 1
    assert len(matches) == 1
    assert matches[0]["context_before"] == ["line one"]
    assert matches[0]["context_after"] == ["line three"]


def test_search_object_context_at_boundary(tmp_path: pathlib.Path) -> None:
    import re
    from muse.cli.commands.content_grep import _search_object

    _init_repo(tmp_path)
    content = b"TARGET\nonly\n"
    obj_id = blob_id(content)
    write_object(tmp_path, obj_id, content)

    pat = re.compile("TARGET")
    count, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=3)
    assert matches[0]["context_before"] == []
    assert matches[0]["context_after"] == ["only"]


def test_search_object_no_context(tmp_path: pathlib.Path) -> None:
    import re
    from muse.cli.commands.content_grep import _search_object

    _init_repo(tmp_path)
    content = b"line\nTARGET\nend\n"
    obj_id = blob_id(content)
    write_object(tmp_path, obj_id, content)

    pat = re.compile("TARGET")
    _, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=0)
    assert matches[0]["context_before"] == []
    assert matches[0]["context_after"] == []


def test_search_object_binary_skipped(tmp_path: pathlib.Path) -> None:
    import re
    from muse.cli.commands.content_grep import _search_object

    _init_repo(tmp_path)
    content = b"\x00\x01\x02TARGET\x03"
    obj_id = blob_id(content)
    write_object(tmp_path, obj_id, content)

    pat = re.compile("TARGET")
    count, matches = _search_object(tmp_path, obj_id, pat, False, False, 0)
    assert count == 0
    assert matches == []


# ---------------------------------------------------------------------------
# Security: pattern validation happens BEFORE I/O
# ---------------------------------------------------------------------------


def test_long_pattern_rejected_before_io(tmp_path: pathlib.Path) -> None:
    """A too-long pattern must be rejected without touching the object store."""
    _init_repo(tmp_path)
    # Do NOT commit any files — if I/O happened, we'd get a 'no commits' error,
    # not the 'pattern too long' error.
    bad_pattern = "a" * 501
    result = _invoke(
        ["content-grep", bad_pattern], env=_env(tmp_path)
    )
    assert result.exit_code != 0
    # The error must be about pattern length, not about missing commits.
    assert "too long" in result.output.lower() or "too long" in (result.stderr or "").lower()


def test_invalid_regex_rejected_before_io(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    result = _invoke(
        ["content-grep", "[unclosed"], env=_env(tmp_path)
    )
    assert result.exit_code != 0
    assert "regex" in result.output.lower() or "regex" in (result.stderr or "").lower()


# ---------------------------------------------------------------------------
# Security: ANSI injection
# ---------------------------------------------------------------------------


def test_ansi_injection_in_path(tmp_path: pathlib.Path) -> None:
    """File paths with ANSI escapes must be stripped in text output."""
    _init_repo(tmp_path)
    ansi_path = "\x1b[31mmalicious\x1b[0m.txt"
    _commit_files(tmp_path, {ansi_path: b"TARGET content\n"})
    result = _invoke(
        ["content-grep", "TARGET"], env=_env(tmp_path)
    )
    assert result.exit_code == 0
    assert "\x1b" not in result.output


def test_ansi_injection_in_match_text(tmp_path: pathlib.Path) -> None:
    """Match text with ANSI escapes must be stripped in text output."""
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"safe.txt": b"TARGET \x1b[31mred\x1b[0m content\n"})
    result = _invoke(
        ["content-grep", "TARGET"], env=_env(tmp_path)
    )
    assert result.exit_code == 0
    assert "\x1b" not in result.output


# ---------------------------------------------------------------------------
# JSON schema: _ContentGrepJson
# ---------------------------------------------------------------------------


def test_json_schema_all_fields(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"a.txt": b"hello world\nhello again\n"})
    result = _invoke(
        ["content-grep", "hello", "--json"], env=_env(tmp_path)
    )
    assert result.exit_code == 0
    data = _parse(result)
    assert data["commit_id"].startswith("sha256:")
    assert len(data["commit_id"]) == 71
    assert data["snapshot_id"].startswith("sha256:")
    assert len(data["snapshot_id"]) == 71
    assert data["pattern"] == "hello"
    assert data["total_files_matched"] == 1
    assert data["total_matches"] == 2
    assert len(data["results"]) == 1
    r = data["results"][0]
    assert r["path"] == "a.txt"
    assert r["match_count"] == 2
    assert isinstance(r["matches"], list)


def test_json_schema_context_fields(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"c.txt": b"before\nTARGET\nafter\n"})
    result = _invoke(
        ["content-grep", "TARGET", "--context", "1", "--json"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 0
    data = _parse(result)
    match = data["results"][0]["matches"][0]
    assert isinstance(match, dict)
    assert "context_before" in match
    assert "context_after" in match
    assert match["context_before"] == ["before"]
    assert match["context_after"] == ["after"]


def test_json_schema_no_match_exit1(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"a.txt": b"hello\n"})
    result = _invoke(
        ["content-grep", "ZZZNOMATCH", "--json"], env=_env(tmp_path)
    )
    assert result.exit_code != 0


def test_json_total_matches_multiple_files(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {
        "a.txt": b"hit\nhit\n",
        "b.txt": b"hit\n",
        "c.txt": b"miss\n",
    })
    result = _invoke(
        ["content-grep", "hit", "--json"], env=_env(tmp_path)
    )
    assert result.exit_code == 0
    data = _parse(result)
    assert data["total_files_matched"] == 2
    assert data["total_matches"] == 3


# ---------------------------------------------------------------------------
# Flags: --include
# ---------------------------------------------------------------------------


def test_include_filters_to_py_only(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {
        "module.py": b"TARGET in python\n",
        "module.js": b"TARGET in js\n",
        "readme.md": b"TARGET in md\n",
    })
    result = _invoke(
        ["content-grep", "TARGET", "--include", "*.py", "--json"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 0
    data = _parse(result)
    assert data["total_files_matched"] == 1
    assert data["results"][0]["path"] == "module.py"


def test_include_no_matches_after_filter(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"module.js": b"TARGET here\n"})
    result = _invoke(
        ["content-grep", "TARGET", "--include", "*.py"],
        env=_env(tmp_path),
    )
    assert result.exit_code != 0  # no files pass include filter


# ---------------------------------------------------------------------------
# Flags: --exclude
# ---------------------------------------------------------------------------


def test_exclude_skips_minified(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {
        "app.js": b"TARGET here\n",
        "app.min.js": b"TARGET minified\n",
    })
    result = _invoke(
        ["content-grep", "TARGET", "--exclude", "*.min.js", "--json"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 0
    data = _parse(result)
    assert data["total_files_matched"] == 1
    assert data["results"][0]["path"] == "app.js"


def test_exclude_all_results_in_no_match(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"test.py": b"TARGET\n"})
    result = _invoke(
        ["content-grep", "TARGET", "--exclude", "test_*.py"],
        env=_env(tmp_path),
    )
    # test.py doesn't match test_*.py exclude pattern, so it should match.
    # Verify this works (target file isn't excluded).
    assert result.exit_code == 0


# ---------------------------------------------------------------------------
# Flags: --max-matches
# ---------------------------------------------------------------------------


def test_max_matches_caps_output(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"many.txt": b"hit\n" * 100})
    result = _invoke(
        ["content-grep", "hit", "--max-matches", "10", "--json"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 0
    data = _parse(result)
    assert data["total_matches"] <= 10


def test_max_matches_zero_still_exits_nonzero_on_cap(tmp_path: pathlib.Path) -> None:
    """When max_matches=0, no results are kept — exit 1."""
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"a.txt": b"hit\n"})
    result = _invoke(
        ["content-grep", "hit", "--max-matches", "0", "--json"],
        env=_env(tmp_path),
    )
    assert result.exit_code != 0  # no results after cap → exit 1


# ---------------------------------------------------------------------------
# Flags: --context / -C
# ---------------------------------------------------------------------------


def test_context_text_output(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"ctx.txt": b"alpha\nbeta\ngamma\n"})
    result = _invoke(
        ["content-grep", "beta", "--context", "1"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 0
    # Context before and after should appear in output.
    assert "alpha" in result.output
    assert "gamma" in result.output


def test_context_short_flag(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"ctx2.txt": b"first\nTARGET\nlast\n"})
    result = _invoke(
        ["content-grep", "TARGET", "-C", "1"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 0
    assert "first" in result.output
    assert "last" in result.output


# ---------------------------------------------------------------------------
# Flags: --json boolean (rejects old --format)
# ---------------------------------------------------------------------------


def test_format_flag_rejected(tmp_path: pathlib.Path) -> None:
    """Old ``--format json`` must be rejected by argparse (exit 2)."""
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"a.txt": b"hello\n"})
    result = _invoke(
        ["content-grep", "hello", "--format", "json"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 2


# ---------------------------------------------------------------------------
# Integration: --ref searches a different commit
# ---------------------------------------------------------------------------


def test_ref_searches_branch(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    c1 = _commit_files(tmp_path, {"v1.txt": b"OLD content\n"})
    _commit_files(tmp_path, {"v2.txt": b"NEW content\n"}, parent_id=c1)

    # Search HEAD — should find NEW in v2.txt.
    result_head = _invoke(
        ["content-grep", "NEW", "--json"], env=_env(tmp_path)
    )
    assert result_head.exit_code == 0
    data = _parse(result_head)
    paths = [r["path"] for r in data["results"]]
    assert "v2.txt" in paths

    # Search the first commit by ID — should find OLD in v1.txt, not NEW.
    result_ref = _invoke(
        ["content-grep", "OLD", "--ref", c1, "--json"],
        env=_env(tmp_path),
    )
    assert result_ref.exit_code == 0
    data_ref = _parse(result_ref)
    paths_ref = [r["path"] for r in data_ref["results"]]
    assert "v1.txt" in paths_ref
    assert data_ref["commit_id"] == c1


# ---------------------------------------------------------------------------
# E2E: --help mentions all new flags
# ---------------------------------------------------------------------------


def test_help_mentions_include() -> None:
    result = _invoke(["content-grep", "--help"])
    assert result.exit_code == 0
    assert "--include" in result.output


def test_help_mentions_exclude() -> None:
    result = _invoke(["content-grep", "--help"])
    assert "--exclude" in result.output


def test_help_mentions_max_matches() -> None:
    result = _invoke(["content-grep", "--help"])
    assert "--max-matches" in result.output


def test_help_mentions_context() -> None:
    result = _invoke(["content-grep", "--help"])
    assert "--context" in result.output or "-C" in result.output


def test_help_mentions_json_not_format() -> None:
    result = _invoke(["content-grep", "--help"])
    assert "--json" in result.output
    assert "--format" not in result.output


# ---------------------------------------------------------------------------
# Stress: 500-file snapshot, pattern matches 250
# ---------------------------------------------------------------------------


def test_stress_500_files(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    files: _FilesMap = {}
    for i in range(500):
        content = b"TARGET_STRESS\n" if i % 2 == 0 else b"other\n"
        files[f"f_{i:04d}.txt"] = content
    _commit_files(tmp_path, files)
    result = _invoke(
        ["content-grep", "TARGET_STRESS", "--json"],
        env=_env(tmp_path),
    )
    assert result.exit_code == 0
    data = _parse(result)
    assert data["total_files_matched"] == 250
    assert data["total_matches"] == 250


# ---------------------------------------------------------------------------
# Stress: concurrent reads
# ---------------------------------------------------------------------------


def test_stress_concurrent_reads(tmp_path: pathlib.Path) -> None:
    _init_repo(tmp_path)
    _commit_files(tmp_path, {"concurrent.txt": b"CONCURRENT TARGET\n"})

    errors: list[str] = []

    def _read() -> None:
        r = _invoke(
            ["content-grep", "CONCURRENT", "--json"],
            env=_env(tmp_path),
        )
        if r.exit_code != 0:
            errors.append(f"exit {r.exit_code}")
        else:
            try:
                d = json.loads(r.output)
                if d.get("total_matches", 0) != 1:
                    errors.append(f"unexpected total_matches: {d.get('total_matches')}")
            except json.JSONDecodeError as exc:
                errors.append(str(exc))

    threads = [threading.Thread(target=_read) for _ in range(8)]
    for t in threads:
        t.start()
    for t in threads:
        t.join()

    assert not errors, f"Concurrent read failures: {errors}"


# ---------------------------------------------------------------------------
# JSON schema: complete key set (TestJsonSchemaComplete)
# ---------------------------------------------------------------------------


_REQUIRED_KEYS = frozenset({
    "source",
    "commit_id",
    "snapshot_id",
    "pattern",
    "total_files_matched",
    "total_matches",
    "results",
    "duration_ms",
    "exit_code",
})


class TestJsonSchemaComplete:
    """Verify that every required key is present in JSON output."""

    def test_all_required_keys_present_commit_mode(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        assert result.exit_code == 0
        data = json.loads(result.output)
        missing = _REQUIRED_KEYS - data.keys()
        assert not missing, f"Missing keys: {missing}"

    def test_all_required_keys_present_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        # Also write a matching file to disk so working-tree search finds it.
        (tmp_path / "a.txt").write_bytes(b"hello\n")
        result = _invoke(
            ["content-grep", "hello", "--working-tree", "--json"],
            env=_env(tmp_path),
        )
        assert result.exit_code == 0
        data = json.loads(result.output)
        missing = _REQUIRED_KEYS - data.keys()
        assert not missing, f"Missing keys: {missing}"

    def test_source_field_is_commit(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert data["source"] == "commit"

    def test_source_field_is_working_tree(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        (tmp_path / "a.txt").write_bytes(b"hello\n")
        result = _invoke(
            ["content-grep", "hello", "--working-tree", "--json"],
            env=_env(tmp_path),
        )
        data = json.loads(result.output)
        assert data["source"] == "working-tree"

    def test_commit_id_null_in_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        (tmp_path / "a.txt").write_bytes(b"hello\n")
        result = _invoke(
            ["content-grep", "hello", "--working-tree", "--json"],
            env=_env(tmp_path),
        )
        data = json.loads(result.output)
        assert data["commit_id"] is None

    def test_snapshot_id_null_in_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        (tmp_path / "a.txt").write_bytes(b"hello\n")
        result = _invoke(
            ["content-grep", "hello", "--working-tree", "--json"],
            env=_env(tmp_path),
        )
        data = json.loads(result.output)
        assert data["snapshot_id"] is None

    def test_exit_code_field_zero_on_match(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert data["exit_code"] == 0

    def test_json_is_compact(self, tmp_path: pathlib.Path) -> None:
        """JSON output must be a single line — no pretty-printing."""
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hello\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        lines = [ln for ln in result.output.splitlines() if ln.strip()]
        assert len(lines) == 1, "JSON must be compact (one line)"


# ---------------------------------------------------------------------------
# duration_ms (TestElapsedSeconds)
# ---------------------------------------------------------------------------


class TestElapsedSeconds:
    """``duration_ms`` must be a non-negative float in all JSON paths."""

    def _assert_elapsed(self, data: Mapping[str, object]) -> None:  # type: ignore[type-arg]
        assert "duration_ms" in data
        assert isinstance(data["duration_ms"], float)
        assert data["duration_ms"] >= 0.0

    def test_elapsed_present_commit_mode(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"target\n"})
        result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
        self._assert_elapsed(json.loads(result.output))

    def test_elapsed_present_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"target\n"})
        (tmp_path / "a.txt").write_bytes(b"target\n")
        result = _invoke(
            ["content-grep", "target", "--working-tree", "--json"],
            env=_env(tmp_path),
        )
        self._assert_elapsed(json.loads(result.output))

    def test_elapsed_is_float_not_int(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"target\n"})
        result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert isinstance(data["duration_ms"], float)

    def test_elapsed_reasonable_upper_bound(self, tmp_path: pathlib.Path) -> None:
        """Single-file search in a temp repo should be well under 5 seconds."""
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"target\n"})
        result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert data["duration_ms"] < 5.0

    def test_elapsed_present_stress_mode(self, tmp_path: pathlib.Path) -> None:
        """duration_ms must appear even for 500-file parallel searches."""
        _init_repo(tmp_path)
        files: Mapping[str, bytes] = {f"f{i}.txt": b"needle\n" for i in range(50)}
        _commit_files(tmp_path, files)
        result = _invoke(["content-grep", "needle", "--json"], env=_env(tmp_path))
        assert result.exit_code == 0
        self._assert_elapsed(json.loads(result.output))

    def test_elapsed_six_decimal_places(self, tmp_path: pathlib.Path) -> None:
        """duration_ms should be rounded to at most 6 decimal places."""
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"target\n"})
        result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        elapsed = data["duration_ms"]
        # round-trip through 6-decimal representation must be exact
        assert round(elapsed, 6) == elapsed


# ---------------------------------------------------------------------------
# exit_code field (TestExitCode)
# ---------------------------------------------------------------------------


class TestExitCode:
    """``exit_code`` in JSON must mirror the process exit code."""

    def test_exit_code_zero_on_match(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hit\n"})
        result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
        assert result.exit_code == 0
        assert json.loads(result.output)["exit_code"] == 0

    def test_exit_code_zero_working_tree_match(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hit\n"})
        (tmp_path / "a.txt").write_bytes(b"hit\n")
        result = _invoke(
            ["content-grep", "hit", "--working-tree", "--json"],
            env=_env(tmp_path),
        )
        assert result.exit_code == 0
        assert json.loads(result.output)["exit_code"] == 0

    def test_exit_code_is_integer(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hit\n"})
        result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert isinstance(data["exit_code"], int)

    def test_exit_code_in_json_matches_process_exit(self, tmp_path: pathlib.Path) -> None:
        """JSON exit_code must equal the actual process exit code."""
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hit\n"})
        result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert data["exit_code"] == result.exit_code

    def test_exit_code_multiple_files(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.txt": b"hit\n", "b.txt": b"hit\n"})
        result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
        assert result.exit_code == 0
        assert json.loads(result.output)["exit_code"] == 0


# ---------------------------------------------------------------------------
# Flag registration tests
# ---------------------------------------------------------------------------

import argparse as _argparse
from muse.cli.commands.content_grep import register as _register_content_grep
from muse.core.paths import muse_dir, ref_path


def _parse_cgrep(*args: str) -> _argparse.Namespace:
    root_p = _argparse.ArgumentParser()
    subs = root_p.add_subparsers(dest="cmd")
    _register_content_grep(subs)
    return root_p.parse_args(["content-grep", *args])


class TestRegisterFlags:
    def test_default_json_out_is_false(self) -> None:
        ns = _parse_cgrep("TODO")
        assert ns.json_out is False

    def test_json_flag_sets_json_out(self) -> None:
        ns = _parse_cgrep("TODO", "--json")
        assert ns.json_out is True

    def test_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_cgrep("TODO", "-j")
        assert ns.json_out is True

    def test_pattern_positional(self) -> None:
        ns = _parse_cgrep("FIXME")
        assert ns.pattern == "FIXME"


# ---------------------------------------------------------------------------
# JSON key ergonomics: results[].file and matches[].line
# ---------------------------------------------------------------------------


class TestJsonKeyErgonomics:
    """content-grep --json must use 'path' (matching all other muse commands) and
    'line' (not 'text') for match content."""

    def test_result_key_is_path(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"src/main.py": b"hello world\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert data["results"][0]["path"] == "src/main.py"

    def test_result_has_no_file_key(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"src/main.py": b"hello world\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        assert "file" not in data["results"][0]

    def test_match_key_is_line_not_text(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.py": b"hello world\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        match = data["results"][0]["matches"][0]
        assert match["line"] == "hello world"

    def test_match_has_no_text_key(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.py": b"hello world\n"})
        result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
        data = json.loads(result.output)
        match = data["results"][0]["matches"][0]
        assert "text" not in match

    def test_working_tree_result_key_is_path(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.py": b"placeholder\n"})
        (tmp_path / "a.py").write_text("needle here\n", encoding="utf-8")
        result = _invoke(
            ["content-grep", "needle", "--working-tree", "--json"], env=_env(tmp_path)
        )
        data = json.loads(result.output)
        assert data["results"][0]["path"] == "a.py"

    def test_working_tree_match_key_is_line(self, tmp_path: pathlib.Path) -> None:
        _init_repo(tmp_path)
        _commit_files(tmp_path, {"a.py": b"placeholder\n"})
        (tmp_path / "a.py").write_text("needle here\n", encoding="utf-8")
        result = _invoke(
            ["content-grep", "needle", "--working-tree", "--json"], env=_env(tmp_path)
        )
        data = json.loads(result.output)
        match = data["results"][0]["matches"][0]
        assert match["line"] == "needle here"