"""Comprehensive hardening tests for ``muse bisect``.

Covers:
- Unit: _toml_escape, _load_state symlink guard, size cap, _save_state injection
- Security: branch TOML injection, symlink state file, oversized state, ANSI
  sanitization, error routing to stderr, null bytes in refs
- JSON schema: all subcommands (start, bad, good, skip, log, reset, run)
- Integration: --json round-trips, get_bisect_next public API, session lifecycle
- E2E: symbol-scoped bisect, run subcommand NDJSON, reset --json, log --json
- Stress: 200-commit chain, concurrent read-only queries
"""
from __future__ import annotations

import datetime
import json
import pathlib
import re
import threading
from typing import TypedDict

import pytest

from muse.core.ids import hash_commit, hash_snapshot
from muse.core.commits import (
    CommitRecord,
    write_commit,
)
from muse.core.snapshots import (
    SnapshotRecord,
    write_snapshot,
)
from muse.core.types import Manifest, fake_id, short_id
from tests.cli_test_helper import CliRunner, InvokeResult

# Helpers to check store field names at import time; mypy will catch mismatches.
_SNAP_FIELDS: set[str] = {"snapshot_id", "manifest", "created_at"}
_COMMIT_FIELDS: set[str] = {"commit_id", "repo_id", "branch", "snapshot_id", "message", "committed_at"}

runner = CliRunner()

_ANSI_RE = re.compile(r"\x1b\[[0-9;]*m")


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


def _make_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
    """Create a minimal Muse repo layout without calling muse init.

    Returns (repo_root, repo_id).
    """
    repo_id = fake_id("repo")
    muse = muse_dir(tmp_path)
    muse.mkdir()
    (muse / "repo.json").write_text(
        json.dumps({
            "repo_id": repo_id,
            "domain": "code",
            "default_branch": "main",
            "created_at": "2026-01-01T00:00:00+00:00",
        })
    )
    (muse / "HEAD").write_text("ref: refs/heads/main")
    (muse / "refs" / "heads").mkdir(parents=True)
    (muse / "snapshots").mkdir()
    (muse / "commits").mkdir()
    (muse / "objects").mkdir()
    return tmp_path, repo_id


def _make_commit(
    root: pathlib.Path,
    repo_id: str,
    *,
    branch: str = "main",
    message: str = "commit",
    parent_id: str | None = None,
) -> str:
    """Write a synthetic commit and return its commit_id."""
    manifest: Manifest = {}
    snap_id = hash_snapshot(manifest)
    committed_at = datetime.datetime.now(datetime.timezone.utc)
    commit_id = hash_commit(        parent_ids=[parent_id] if parent_id else [],
        snapshot_id=snap_id,
        message=message,
        committed_at_iso=committed_at.isoformat(),
    )
    snap = SnapshotRecord(
        snapshot_id=snap_id,
        manifest={},
        created_at=committed_at,
    )
    write_snapshot(root, snap)
    commit = CommitRecord(
        commit_id=commit_id,
        parent_commit_id=parent_id,
        parent2_commit_id=None,
        snapshot_id=snap_id,
        branch=branch,
        message=message,
        committed_at=committed_at,
    )
    write_commit(root, commit)
    branch_ref = ref_path(root, branch)
    branch_ref.write_text(commit_id)
    (head_path(root)).write_text(f"ref: refs/heads/{branch}")
    return commit_id


def _build_chain(root: pathlib.Path, repo_id: str, n: int) -> list[str]:
    """Create n commits (linear chain) and return their IDs oldest-first."""
    ids: list[str] = []
    parent: str | None = None
    for i in range(n):
        cid = _make_commit(root, repo_id, message=f"commit {i}", parent_id=parent)
        ids.append(cid)
        parent = cid
    return ids


def _invoke(root: pathlib.Path, args: list[str]) -> InvokeResult:
    return runner.invoke(None, args, env={"MUSE_REPO_ROOT": str(root)})


def _json_blob(output: str) -> str:
    """Extract the first complete JSON object/array from mixed output.

    Handles both compact (single-line) and pretty-printed (multi-line) JSON.
    Falls back to line-by-line extraction for NDJSON streams.
    """
    stripped = output.strip()
    # Fast path: try the whole output (works for pretty-printed single objects)
    try:
        json.loads(stripped)
        return stripped
    except json.JSONDecodeError:
        pass
    # Fallback: find the first JSON line (NDJSON or compact output mixed with text)
    for line in output.splitlines():
        s = line.strip()
        if s.startswith("{") or s.startswith("["):
            return s
    return stripped


# ---------------------------------------------------------------------------
# Typed schema helpers
# ---------------------------------------------------------------------------


class _StepJson(TypedDict):
    done: bool
    first_bad: str | None
    next_to_test: str | None
    remaining_count: int
    steps_remaining: int
    verdict: str
    symbol_changes: list[str]


class _LogEntryJson(TypedDict):
    commit_id: str
    verdict: str
    timestamp: str


class _LogJson(TypedDict):
    active: bool
    entries: list[_LogEntryJson]


class _ResetJson(TypedDict):
    reset: bool


class _RunStepJson(TypedDict):
    step: int
    testing: str
    verdict: str
    remaining_count: int
    done: bool
    symbol_changes: list[str]


class _RunDoneJson(TypedDict):
    done: bool
    first_bad: str | None
    steps_taken: int


def _repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
    """Alias for _make_repo for readability inside test methods."""
    return _make_repo(tmp_path)


def _parse_step(output: str) -> _StepJson:
    raw = json.loads(_json_blob(output))
    assert isinstance(raw, dict)
    done_val = raw["done"]
    first_bad_val = raw["first_bad"]
    next_to_test_val = raw["next_to_test"]
    remaining_count_val = raw["remaining_count"]
    steps_remaining_val = raw["steps_remaining"]
    verdict_val = raw["verdict"]
    symbol_changes_val = raw["symbol_changes"]
    assert isinstance(done_val, bool)
    assert first_bad_val is None or isinstance(first_bad_val, str)
    assert next_to_test_val is None or isinstance(next_to_test_val, str)
    assert isinstance(remaining_count_val, int)
    assert isinstance(steps_remaining_val, int)
    assert isinstance(verdict_val, str)
    assert isinstance(symbol_changes_val, list)
    return _StepJson(
        done=done_val,
        first_bad=first_bad_val,
        next_to_test=next_to_test_val,
        remaining_count=remaining_count_val,
        steps_remaining=steps_remaining_val,
        verdict=verdict_val,
        symbol_changes=symbol_changes_val,
    )


def _parse_log(output: str) -> _LogJson:
    raw = json.loads(_json_blob(output))
    assert isinstance(raw, dict)
    active_val = raw["active"]
    entries_val = raw["entries"]
    assert isinstance(active_val, bool)
    assert isinstance(entries_val, list)
    return _LogJson(active=active_val, entries=entries_val)


def _parse_reset(output: str) -> _ResetJson:
    raw = json.loads(_json_blob(output))
    assert isinstance(raw, dict)
    reset_val = raw["reset"]
    assert isinstance(reset_val, bool)
    return _ResetJson(reset=reset_val)


# ---------------------------------------------------------------------------
# Unit — _toml_escape
# ---------------------------------------------------------------------------


class TestTomlEscape:
    def test_plain_string_unchanged(self) -> None:
        from muse.core.bisect import _toml_escape

        assert _toml_escape("feat/my-thing") == "feat/my-thing"

    def test_double_quote_escaped(self) -> None:
        from muse.core.bisect import _toml_escape

        result = _toml_escape('branch"with"quotes')
        # After escaping, no bare double-quotes remain (only \").
        assert '\\"' in result

    def test_backslash_escaped(self) -> None:
        from muse.core.bisect import _toml_escape

        result = _toml_escape("branch\\with\\backslash")
        assert result == "branch\\\\with\\\\backslash"

    def test_both_escaped(self) -> None:
        from muse.core.bisect import _toml_escape

        result = _toml_escape('malicious"; bad_id = "hacked')
        assert '\\"' in result
        assert "bad_id" in result  # literal text preserved, just escaped


# ---------------------------------------------------------------------------
# Unit — _load_state security
# ---------------------------------------------------------------------------


class TestLoadStateSecurity:
    def test_symlink_state_file_rejected(self, tmp_path: pathlib.Path) -> None:
        """A symlink at the bisect state path must be silently ignored."""
        from muse.core.bisect import _load_state, _state_path

        root, _ = _make_repo(tmp_path)
        target = tmp_path / "real_state.toml"
        target.write_text('bad_id = "abc"\ngood_ids = []\nskipped_ids = []\nremaining = []\nlog = []\n')
        state_path = _state_path(root)
        state_path.symlink_to(target)
        result = _load_state(root)
        assert result is None

    def test_oversized_state_file_rejected(self, tmp_path: pathlib.Path) -> None:
        """State files exceeding _MAX_STATE_BYTES must be rejected."""
        from muse.core.bisect import _MAX_STATE_BYTES, _load_state, _state_path

        root, _ = _make_repo(tmp_path)
        state_path = _state_path(root)
        huge = "x" * (_MAX_STATE_BYTES + 1)
        state_path.write_text(huge)
        result = _load_state(root)
        assert result is None

    def test_corrupt_state_returns_none(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import _load_state, _state_path

        root, _ = _make_repo(tmp_path)
        state_path = _state_path(root)
        state_path.write_text("not valid toml ]] [[[ !!!")
        result = _load_state(root)
        assert result is None

    def test_missing_state_returns_none(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import _load_state

        root, _ = _make_repo(tmp_path)
        result = _load_state(root)
        assert result is None


# ---------------------------------------------------------------------------
# Unit — _save_state TOML injection
# ---------------------------------------------------------------------------


class TestSaveStateTomlInjection:
    def test_branch_with_quote_survives_roundtrip(self, tmp_path: pathlib.Path) -> None:
        """A branch name containing a double-quote must not corrupt the state file."""
        from muse.core.bisect import BisectStateDict, _load_state, _save_state

        root, _ = _make_repo(tmp_path)
        state: BisectStateDict = {
            "bad_id": "a" * 64,
            "good_ids": ["b" * 64],
            "skipped_ids": [],
            "remaining": [],
            "log": [],
            "branch": 'malicious"; bad_id = "injected',
        }
        _save_state(root, state)
        loaded = _load_state(root)
        assert loaded is not None
        assert loaded.get("bad_id") == "a" * 64
        assert loaded.get("branch") == 'malicious"; bad_id = "injected'

    def test_branch_with_backslash_survives_roundtrip(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import BisectStateDict, _load_state, _save_state

        root, _ = _make_repo(tmp_path)
        state: BisectStateDict = {
            "bad_id": "c" * 64,
            "good_ids": ["d" * 64],
            "skipped_ids": [],
            "remaining": [],
            "log": [],
            "branch": "feat\\\\weird",
        }
        _save_state(root, state)
        loaded = _load_state(root)
        assert loaded is not None
        assert loaded.get("branch") == "feat\\\\weird"

    def test_symbol_filter_injection_survives_roundtrip(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import BisectStateDict, _load_state, _save_state

        root, _ = _make_repo(tmp_path)
        state: BisectStateDict = {
            "bad_id": "e" * 64,
            "good_ids": ["f" * 64],
            "skipped_ids": [],
            "remaining": [],
            "log": [],
            "symbol_filter": 'billing.py::Invoice"; bad_id = "EVIL',
        }
        _save_state(root, state)
        loaded = _load_state(root)
        assert loaded is not None
        assert loaded.get("bad_id") == "e" * 64
        assert loaded.get("symbol_filter") == 'billing.py::Invoice"; bad_id = "EVIL'


# ---------------------------------------------------------------------------
# Unit — get_bisect_next public API
# ---------------------------------------------------------------------------


class TestGetBisectNext:
    def test_no_session_returns_none(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import get_bisect_next

        root, _ = _make_repo(tmp_path)
        nxt, sf = get_bisect_next(root)
        assert nxt is None
        assert sf == ""

    def test_returns_next_after_start(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import get_bisect_next, start_bisect

        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        start_bisect(root, ids[-1], [ids[0]])
        nxt, sf = get_bisect_next(root)
        assert nxt is not None
        assert nxt in ids
        assert sf == ""

    def test_returns_symbol_filter(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import get_bisect_next, start_bisect

        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        # No commits touch this symbol, so remaining will be empty.
        start_bisect(root, ids[-1], [ids[0]], symbol_filter="no_file.py::NoSymbol")
        nxt, sf = get_bisect_next(root)
        # Symbol filter is preserved regardless of whether next exists.
        assert sf == "no_file.py::NoSymbol"


# ---------------------------------------------------------------------------
# Security — CLI error routing
# ---------------------------------------------------------------------------


class TestErrorRouting:
    def test_bad_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "bad"])
        assert result.exit_code != 0
        assert "No bisect session" in (result.stderr or result.output)

    def test_good_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "good"])
        assert result.exit_code != 0
        assert "No bisect session" in (result.stderr or result.output)

    def test_skip_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "skip"])
        assert result.exit_code != 0
        assert "No bisect session" in (result.stderr or result.output)

    def test_run_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code != 0
        assert "No bisect session" in (result.stderr or result.output)

    def test_symbol_without_double_colon_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "no_colon_here"])
        assert result.exit_code != 0
        assert "❌" in (result.stderr or result.output)

    def test_symbol_too_long_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        long_sym = f"f.py::{'x' * 600}"
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", long_sym])
        assert result.exit_code != 0
        assert "too long" in (result.stderr or result.output)

    def test_double_start_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        r1 = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r1.exit_code == 0
        r2 = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r2.exit_code != 0
        assert "already active" in (r2.stderr or r2.output)


# ---------------------------------------------------------------------------
# Security — ANSI sanitization in outputs
# ---------------------------------------------------------------------------


class TestAnsiSanitization:
    def test_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        ansi_ref = "\x1b[31mHEAD\x1b[0m"
        result = _invoke(root, ["bisect", "start", "--bad", ansi_ref, "--good", ids[0]])
        assert _ANSI_RE.search(result.output) is None

    def test_ansi_in_symbol_does_not_leak(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        sym = "\x1b[31mfoo.py::Bar\x1b[0m"
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", sym])
        assert _ANSI_RE.search(result.output) is None


# ---------------------------------------------------------------------------
# JSON schema — start
# ---------------------------------------------------------------------------


class TestJsonSchemaStart:
    def test_start_json_schema(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["verdict"] == "started"
        assert isinstance(parsed["done"], bool)
        assert isinstance(parsed["remaining_count"], int)
        assert parsed["remaining_count"] >= 0

    def test_start_json_done_when_no_remaining(self, tmp_path: pathlib.Path) -> None:
        """When bad and good are adjacent, start should report done=True immediately."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is True
        assert parsed["first_bad"] == ids[-1]

    def test_start_json_symbol_changes_list(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        parsed = _parse_step(result.output)
        assert isinstance(parsed["symbol_changes"], list)


# ---------------------------------------------------------------------------
# JSON schema — bad / good / skip
# ---------------------------------------------------------------------------


class TestJsonSchemaBadGoodSkip:
    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_bad_json_schema(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        midpoint = ids[len(ids) // 2]
        result = _invoke(root, ["bisect", "bad", midpoint, "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["verdict"] == "bad"

    def test_good_json_schema(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        midpoint = ids[len(ids) // 2]
        result = _invoke(root, ["bisect", "good", midpoint, "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["verdict"] == "good"

    def test_skip_json_schema(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        midpoint = ids[len(ids) // 2]
        result = _invoke(root, ["bisect", "skip", midpoint, "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["verdict"] == "skip"


# ---------------------------------------------------------------------------
# JSON schema — log
# ---------------------------------------------------------------------------


class TestJsonSchemaLog:
    def test_log_json_no_session(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        parsed = _parse_log(result.output)
        assert parsed["active"] is False
        assert parsed["entries"] == []

    def test_log_json_after_start(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        parsed = _parse_log(result.output)
        assert parsed["active"] is True
        assert len(parsed["entries"]) >= 2

    def test_log_json_entries_are_dicts(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "log", "--json"])
        parsed = _parse_log(result.output)
        for entry in parsed["entries"]:
            assert isinstance(entry, dict)
            assert set(entry.keys()) == {"commit_id", "verdict", "timestamp"}


# ---------------------------------------------------------------------------
# JSON schema — reset
# ---------------------------------------------------------------------------


class TestJsonSchemaReset:
    def test_reset_json_no_session(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        parsed = _parse_reset(result.output)
        assert parsed["reset"] is True

    def test_reset_json_with_session(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        parsed = _parse_reset(result.output)
        assert parsed["reset"] is True

    def test_reset_clears_active_flag(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        _invoke(root, ["bisect", "reset", "--json"])
        log_result = _invoke(root, ["bisect", "log", "--json"])
        parsed = _parse_log(log_result.output)
        assert parsed["active"] is False


# ---------------------------------------------------------------------------
# JSON schema — run (NDJSON)
# ---------------------------------------------------------------------------


class TestJsonSchemaRun:
    def test_run_json_ndjson_format(self, tmp_path: pathlib.Path) -> None:
        """``bisect run --json`` should emit valid NDJSON."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()]
        assert len(lines) >= 1
        for raw_line in lines[:-1]:
            step_raw = json.loads(raw_line)
            assert "step" in step_raw
            assert "verdict" in step_raw
            assert "testing" in step_raw
            assert "remaining_count" in step_raw
            assert "done" in step_raw
        done_raw = json.loads(lines[-1])
        done_val = done_raw["done"]
        assert isinstance(done_val, bool)
        steps_taken_val = done_raw["steps_taken"]
        assert isinstance(steps_taken_val, int)

    def test_run_json_done_has_first_bad(self, tmp_path: pathlib.Path) -> None:
        """With always-good command, first_bad on the done line should be set."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()]
        done_raw = json.loads(lines[-1])
        done_val = done_raw["done"]
        first_bad_val = done_raw["first_bad"]
        if done_val:
            assert first_bad_val is not None

    def test_run_json_steps_taken_increments(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()]
        done_raw = json.loads(lines[-1])
        steps_taken = done_raw["steps_taken"]
        assert steps_taken >= 1


# ---------------------------------------------------------------------------
# Integration — session lifecycle with --json
# ---------------------------------------------------------------------------


class TestIntegrationJson:
    def test_start_bad_good_converge(self, tmp_path: pathlib.Path) -> None:
        """A manual bisect session with --json converges to a first_bad."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 7)
        r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert r_start.exit_code == 0
        step = _parse_step(r_start.output)
        if step["done"]:
            assert step["first_bad"] is not None
            return
        for _ in range(20):
            nxt = step["next_to_test"]
            assert nxt is not None
            r = _invoke(root, ["bisect", "bad", nxt, "--json"])
            assert r.exit_code == 0
            step = _parse_step(r.output)
            if step["done"]:
                assert step["first_bad"] is not None
                return
        pytest.fail("Bisect did not converge within 20 steps")

    def test_good_narrows_range(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        midpoint = ids[len(ids) // 2]
        r_good = _invoke(root, ["bisect", "good", midpoint, "--json"])
        step = _parse_step(r_good.output)
        if not step["done"]:
            assert step["remaining_count"] < len(ids) - 2

    def test_log_grows_with_verdicts(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        midpoint = ids[len(ids) // 2]
        _invoke(root, ["bisect", "bad", midpoint])
        r_log = _invoke(root, ["bisect", "log", "--json"])
        parsed = _parse_log(r_log.output)
        # start logs 2 entries (bad+good); bad adds 1 more → at least 3.
        assert len(parsed["entries"]) >= 3

    def test_skip_excluded_from_remaining(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        step_start = _parse_step(r_start.output)
        if step_start["done"]:
            return
        nxt = step_start["next_to_test"]
        assert nxt is not None
        r_skip = _invoke(root, ["bisect", "skip", nxt, "--json"])
        step_skip = _parse_step(r_skip.output)
        if not step_skip["done"]:
            assert step_skip["next_to_test"] != nxt


# ---------------------------------------------------------------------------
# E2E — text (non-JSON) output still works
# ---------------------------------------------------------------------------


class TestE2EText:
    def test_start_text_output_no_json(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert result.exit_code == 0
        assert "Bisect session started" in result.output or "First bad commit" in result.output

    def test_bad_text_output(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        midpoint = ids[len(ids) // 2]
        result = _invoke(root, ["bisect", "bad", midpoint])
        assert result.exit_code == 0
        assert "bad" in result.output.lower()

    def test_log_text_shows_entries(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0
        assert "Bisect log" in result.output

    def test_reset_text_output(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "reset"])
        assert result.exit_code == 0
        assert "reset" in result.output.lower()

    def test_run_text_output_converges(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code == 0
        assert "First bad commit" in result.output or "Bisect complete" in result.output

    def test_no_good_flag_fails_clearly(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1]])
        assert result.exit_code != 0

    def test_log_empty_when_no_session(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0
        assert "No bisect log" in result.output


# ---------------------------------------------------------------------------
# E2E — symbol-scoped bisect
# ---------------------------------------------------------------------------


class TestSymbolScopedBisect:
    def test_symbol_filter_no_matching_commits_warns(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        result = _invoke(
            root,
            [
                "bisect", "start",
                "--bad", ids[-1],
                "--good", ids[0],
                "--symbol", "ghost.py::GhostFunc",
            ],
        )
        assert result.exit_code == 0
        combined = result.output + (result.stderr or "")
        assert "No commits" in combined or "First bad" in combined

    def test_symbol_filter_json_schema_preserved(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(
            root,
            [
                "bisect", "start",
                "--bad", ids[-1],
                "--good", ids[0],
                "--symbol", "ghost.py::GhostFunc",
                "--json",
            ],
        )
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert isinstance(parsed["symbol_changes"], list)

    def test_symbol_filter_state_persisted(self, tmp_path: pathlib.Path) -> None:
        """After start with --symbol, the symbol_filter must survive state reload."""
        from muse.core.bisect import _load_state

        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        _invoke(
            root,
            [
                "bisect", "start",
                "--bad", ids[-1],
                "--good", ids[0],
                "--symbol", "billing.py::Invoice",
            ],
        )
        state = _load_state(root)
        assert state is not None
        assert state.get("symbol_filter") == "billing.py::Invoice"


# ---------------------------------------------------------------------------
# Stress — large commit chains
# ---------------------------------------------------------------------------


class TestStress:
    def test_200_commit_chain_converges(self, tmp_path: pathlib.Path) -> None:
        """Bisect over 200 commits must converge in ≤9 steps (log₂(200) ≈ 7.6)."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 200)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])

        steps = 0
        for _ in range(10):
            r = _invoke(root, ["bisect", "run", "true", "--json"])
            assert r.exit_code == 0
            lines = [ln.strip() for ln in r.output.strip().splitlines() if ln.strip()]
            if lines:
                done_raw = json.loads(lines[-1])
                if done_raw.get("done"):
                    steps = done_raw.get("steps_taken", 0)
                    break
        else:
            pytest.fail("Bisect did not terminate within 10 run invocations")
        assert steps <= 9, f"Expected ≤9 steps for 200 commits, got {steps}"

    def test_concurrent_log_reads_are_safe(self, tmp_path: pathlib.Path) -> None:
        """Concurrent reads of bisect log must not crash."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 10)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])

        errors: list[str] = []

        def _read_log() -> None:
            from muse.core.bisect import get_bisect_log
            try:
                entries = get_bisect_log(root)
                assert isinstance(entries, list)
            except Exception as exc:
                errors.append(str(exc))

        threads = [threading.Thread(target=_read_log) for _ in range(20)]
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        assert not errors, f"Concurrent read failures: {errors}"

    def test_50_step_manual_bisect_json(self, tmp_path: pathlib.Path) -> None:
        """50 mark_bad calls on a 100-commit chain must all emit valid JSON."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert r_start.exit_code == 0
        step = _parse_step(r_start.output)

        for _ in range(50):
            if step["done"]:
                assert step["first_bad"] is not None
                return
            nxt = step["next_to_test"]
            assert nxt is not None
            r = _invoke(root, ["bisect", "bad", nxt, "--json"])
            assert r.exit_code == 0
            step = _parse_step(r.output)

        assert step["done"] is True


# ---------------------------------------------------------------------------
# bisect start — Extended, Security, Stress
# ---------------------------------------------------------------------------


class TestBisectStartExtended:
    """Extended unit / integration / e2e tests for muse bisect start."""

    def test_start_exits_0(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert result.exit_code == 0

    def test_start_j_alias_works(self, tmp_path: pathlib.Path) -> None:
        """-j is an accepted alias for --json."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "-j"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["verdict"] == "started"

    def test_start_json_verdict_is_started(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["verdict"] == "started"

    def test_start_json_done_false_with_remaining(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is False
        assert parsed["next_to_test"] is not None

    def test_start_json_done_true_when_adjacent(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 2)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is True
        assert parsed["first_bad"] == ids[-1]

    def test_start_json_remaining_count_positive(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] > 0

    def test_start_json_steps_remaining_positive(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["steps_remaining"] > 0

    def test_start_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert {"done", "first_bad", "next_to_test", "remaining_count",
                                  "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())

    def test_start_multiple_good_refs(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        result = _invoke(
            root,
            ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--good", ids[1], "--json"],
        )
        assert result.exit_code == 0
        assert _parse_step(result.output)["verdict"] == "started"

    def test_start_no_good_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1]])
        assert result.exit_code == 1

    def test_start_no_good_error_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1]])
        assert result.exit_code != 0
        combined = result.output + (result.stderr or "")
        assert "good" in combined.lower()

    def test_start_double_start_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert result.exit_code == 1

    def test_start_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        result = _invoke(empty, ["bisect", "start", "--bad", "abc", "--good", "def"])
        assert result.exit_code == 2

    def test_start_bad_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        # HEAD points to ids[-1]; omit --bad
        result = _invoke(root, ["bisect", "start", "--good", ids[0], "--json"])
        assert result.exit_code == 0

    def test_start_text_mentions_session_started(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert result.exit_code == 0
        assert "Bisect session started" in result.output or "First bad commit" in result.output

    def test_start_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert result.exit_code == 0
        assert not result.output.strip().startswith("{")

    def test_start_help_description_present(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "start", "--help"])
        assert "Agent quickstart" in result.output or "binary" in result.output.lower()

    def test_start_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        result = _invoke(root, ["bisect", "start", "--bad", "nonexistent_ref_abc123", "--good", ids[0]])
        assert result.exit_code == 1


class TestBisectStartSecurity:
    """Security hardening tests for muse bisect start."""

    def test_start_symbol_changes_no_ansi_in_json(self, tmp_path: pathlib.Path) -> None:
        """symbol_changes entries are sanitized in JSON output."""
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=3,
            steps_remaining=2,
            verdict="started",
            symbol_changes=["add Invoice.compute\x1b[31mred\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.start_bisect", return_value=injected):
            result = _invoke(
                root,
                ["bisect", "start", "--bad", ids[-1], "--good", ids[0],
                 "--symbol", "billing.py::Invoice", "--json"],
            )
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_start_symbol_changes_no_ansi_in_text(self, tmp_path: pathlib.Path) -> None:
        """symbol_changes entries are sanitized in text output."""
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=3,
            steps_remaining=2,
            verdict="started",
            symbol_changes=["add Invoice.compute\x1b[31mred\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.start_bisect", return_value=injected):
            result = _invoke(
                root,
                ["bisect", "start", "--bad", ids[-1], "--good", ids[0],
                 "--symbol", "billing.py::Invoice"],
            )
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_start_symbol_missing_separator_exits_1(self, tmp_path: pathlib.Path) -> None:
        """--symbol without '::' separator is rejected."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        result = _invoke(
            root,
            ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "NoSeparator"],
        )
        assert result.exit_code == 1

    def test_start_symbol_too_long_exits_1(self, tmp_path: pathlib.Path) -> None:
        """--symbol exceeding max length is rejected."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        long_sym = "a" * 510 + "::b"
        result = _invoke(
            root,
            ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", long_sym],
        )
        assert result.exit_code == 1

    def test_start_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
        """JSON output is well-formed."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert isinstance(d, dict)

    def test_start_json_bool_fields_are_bool(self, tmp_path: pathlib.Path) -> None:
        """done field is always a bool, never int or string."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert isinstance(d["done"], bool)


class TestBisectStartStress:
    """Performance and scale tests for muse bisect start."""

    def test_start_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
        """Start over a 100-commit chain exits 0 and returns a midpoint."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is False
        assert parsed["remaining_count"] > 0
        assert parsed["next_to_test"] is not None

    def test_start_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
        """Start over 100 commits completes within 5 seconds."""
        import time
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        t0 = time.monotonic()
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        elapsed = time.monotonic() - t0
        assert result.exit_code == 0
        assert elapsed < 5.0, f"start over 100 commits took {elapsed:.2f}s"

    def test_start_midpoint_is_within_range(self, tmp_path: pathlib.Path) -> None:
        """The suggested midpoint falls strictly between good and bad."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 20)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["next_to_test"] not in (ids[0], ids[-1])


# ---------------------------------------------------------------------------
# bisect bad — Extended, Security, Stress
# ---------------------------------------------------------------------------


class TestBisectBadExtended:
    """Extended unit / integration / e2e tests for muse bisect bad."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_bad_exits_0(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2]])
        assert result.exit_code == 0

    def test_bad_j_alias_works(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "-j"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["verdict"] == "bad"

    def test_bad_json_verdict_is_bad(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["verdict"] == "bad"

    def test_bad_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert {"done", "first_bad", "next_to_test", "remaining_count",
                                  "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())

    def test_bad_reduces_remaining(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 10)
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        before = _parse_step(r.output)["remaining_count"]
        mid = _parse_step(r.output)["next_to_test"]
        result = _invoke(root, ["bisect", "bad", mid, "--json"])
        assert result.exit_code == 0
        after = _parse_step(result.output)["remaining_count"]
        assert after < before

    def test_bad_done_true_when_isolated(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        # With 3 commits: good=ids[0], bad=ids[2] → ids[1] is the only remaining
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[1], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is True
        assert parsed["first_bad"] is not None

    def test_bad_first_bad_set_when_done(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[1], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is True
        assert isinstance(parsed["first_bad"], str)

    def test_bad_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        # HEAD points to ids[-1] (the known-bad); marking it bad again is valid
        result = _invoke(root, ["bisect", "bad", "--json"])
        assert result.exit_code == 0

    def test_bad_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "bad"])
        assert result.exit_code == 1

    def test_bad_no_session_error_to_stderr(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "bad"])
        assert result.exit_code != 0
        combined = result.output + (result.stderr or "")
        assert "No bisect session" in combined

    def test_bad_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        result = _invoke(empty, ["bisect", "bad"])
        assert result.exit_code == 2

    def test_bad_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", "deadbeef_nonexistent"])
        assert result.exit_code == 1

    def test_bad_text_mentions_commit(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        mid = ids[len(ids) // 2]
        result = _invoke(root, ["bisect", "bad", mid])
        assert result.exit_code == 0
        assert short_id(mid) in result.output

    def test_bad_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2]])
        assert result.exit_code == 0
        assert not result.output.strip().startswith("{")

    def test_bad_help_description_present(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "bad", "--help"])
        assert "Agent quickstart" in result.output or "regression" in result.output.lower()

    def test_bad_advances_bisect_log(self, tmp_path: pathlib.Path) -> None:
        """After marking bad, the bisect log records the verdict."""
        from muse.core.bisect import _load_state
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        mid = ids[len(ids) // 2]
        _invoke(root, ["bisect", "bad", mid])
        state = _load_state(root)
        assert state is not None
        assert any("bad" in entry for entry in state.get("log", []))

    def test_bad_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] >= 0

    def test_bad_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(_parse_step(result.output)["symbol_changes"], list)


class TestBisectBadSecurity:
    """Security hardening tests for muse bisect bad."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_bad_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert isinstance(d, dict)

    def test_bad_json_done_is_bool(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output))["done"], bool)

    def test_bad_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None:
        """ANSI in symbol_changes entries stripped from JSON output."""
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=2,
            steps_remaining=1,
            verdict="bad",
            symbol_changes=["modify func\x1b[31mred\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.mark_bad", return_value=injected):
            result = _invoke(root, ["bisect", "bad", ids[2], "--json"])
        assert "\x1b" not in result.output

    def test_bad_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None:
        """ANSI in symbol_changes entries stripped from text output."""
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=2,
            steps_remaining=1,
            verdict="bad",
            symbol_changes=["modify func\x1b[31mred\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.mark_bad", return_value=injected):
            result = _invoke(root, ["bisect", "bad", ids[2]])
        assert "\x1b" not in result.output

    def test_bad_error_output_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None:
        """Error messages go to stderr; stdout is clean on failure."""
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "bad"])
        assert result.exit_code != 0
        # CliRunner mixes stderr into output; verify no JSON object was emitted
        assert not result.output.strip().startswith("{")

    def test_bad_ansi_in_ref_does_not_leak_to_output(self, tmp_path: pathlib.Path) -> None:
        """Passing an ANSI-injected ref does not leak escape codes to stdout."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", "\x1b[31mHEAD\x1b[0m"])
        # Will fail (ref not found) but must not echo ANSI to stdout
        assert "\x1b" not in result.output


class TestBisectBadStress:
    """Performance and scale tests for muse bisect bad."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_bad_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
        """Marking bad on a 100-commit session exits 0 and advances the search."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[50], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] >= 0

    def test_bad_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
        """Marking bad on a 100-commit session completes within 5 seconds."""
        import time
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        self._start(root, ids)
        t0 = time.monotonic()
        result = _invoke(root, ["bisect", "bad", ids[50], "--json"])
        elapsed = time.monotonic() - t0
        assert result.exit_code == 0
        assert elapsed < 5.0, f"bisect bad on 100 commits took {elapsed:.2f}s"

    def test_bad_converges_full_session(self, tmp_path: pathlib.Path) -> None:
        """Marking next_to_test as bad on every step converges within log2(20) steps."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 20)
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert r.exit_code == 0
        parsed = _parse_step(r.output)
        done = parsed["done"]
        for _ in range(10):
            if done:
                break
            nxt = parsed["next_to_test"]
            assert nxt is not None
            next_r = _invoke(root, ["bisect", "bad", nxt, "--json"])
            assert next_r.exit_code == 0
            parsed = _parse_step(next_r.output)
            done = parsed["done"]
        assert done, "bisect did not converge within 10 bad steps on 20-commit chain"


# ---------------------------------------------------------------------------
# bisect good — Extended, Security, Stress
# ---------------------------------------------------------------------------


class TestBisectGoodExtended:
    """Extended unit / integration / e2e tests for muse bisect good."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_good_exits_0(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2]])
        assert result.exit_code == 0

    def test_good_j_alias_works(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "-j"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["verdict"] == "good"

    def test_good_json_verdict_is_good(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["verdict"] == "good"

    def test_good_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert {"done", "first_bad", "next_to_test", "remaining_count",
                                  "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())

    def test_good_reduces_remaining(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 10)
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        before = _parse_step(r.output)["remaining_count"]
        mid = _parse_step(r.output)["next_to_test"]
        result = _invoke(root, ["bisect", "good", mid, "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] < before

    def test_good_done_true_when_isolated(self, tmp_path: pathlib.Path) -> None:
        """Marking the only remaining commit good isolates first bad immediately."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        # good=ids[0], bad=ids[2]: ids[1] is the midpoint; marking it good resolves
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[1], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is True
        assert parsed["first_bad"] == ids[2]

    def test_good_first_bad_set_when_done(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[1], "--json"])
        assert result.exit_code == 0
        parsed = _parse_step(result.output)
        assert parsed["done"] is True
        assert isinstance(parsed["first_bad"], str)

    def test_good_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        # HEAD is ids[-1] (known bad); marking it good is legal but pushes bad boundary
        result = _invoke(root, ["bisect", "good", "--json"])
        assert result.exit_code == 0

    def test_good_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "good"])
        assert result.exit_code == 1

    def test_good_no_session_error_message(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "good"])
        combined = result.output + (result.stderr or "")
        assert "No bisect session" in combined

    def test_good_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        result = _invoke(empty, ["bisect", "good"])
        assert result.exit_code == 2

    def test_good_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", "deadbeef_nonexistent"])
        assert result.exit_code == 1

    def test_good_text_mentions_commit(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        mid = ids[len(ids) // 2]
        result = _invoke(root, ["bisect", "good", mid])
        assert result.exit_code == 0
        assert short_id(mid) in result.output

    def test_good_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2]])
        assert result.exit_code == 0
        assert not result.output.strip().startswith("{")

    def test_good_help_description_present(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "good", "--help"])
        assert "Agent quickstart" in result.output or "regression" in result.output.lower()

    def test_good_advances_bisect_log(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import _load_state
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        _invoke(root, ["bisect", "good", ids[len(ids) // 2]])
        state = _load_state(root)
        assert state is not None
        assert any("good" in entry for entry in state.get("log", []))

    def test_good_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] >= 0

    def test_good_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(_parse_step(result.output)["symbol_changes"], list)


class TestBisectGoodSecurity:
    """Security hardening tests for muse bisect good."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_good_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output)), dict)

    def test_good_json_done_is_bool(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output))["done"], bool)

    def test_good_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None:
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=2,
            steps_remaining=1,
            verdict="good",
            symbol_changes=["add func\x1b[32mgreen\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.mark_good", return_value=injected):
            result = _invoke(root, ["bisect", "good", ids[2], "--json"])
        assert "\x1b" not in result.output

    def test_good_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None:
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=2,
            steps_remaining=1,
            verdict="good",
            symbol_changes=["add func\x1b[32mgreen\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.mark_good", return_value=injected):
            result = _invoke(root, ["bisect", "good", ids[2]])
        assert "\x1b" not in result.output

    def test_good_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "good"])
        assert result.exit_code != 0
        assert not result.output.strip().startswith("{")

    def test_good_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", "\x1b[32mHEAD\x1b[0m"])
        assert "\x1b" not in result.output


class TestBisectGoodStress:
    """Performance and scale tests for muse bisect good."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_good_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "good", ids[10], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] >= 0

    def test_good_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
        import time
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        self._start(root, ids)
        t0 = time.monotonic()
        result = _invoke(root, ["bisect", "good", ids[10], "--json"])
        elapsed = time.monotonic() - t0
        assert result.exit_code == 0
        assert elapsed < 5.0, f"bisect good on 100 commits took {elapsed:.2f}s"

    def test_good_converges_full_session(self, tmp_path: pathlib.Path) -> None:
        """Marking next_to_test as good on each step converges within log2(20) steps."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 20)
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert r.exit_code == 0
        parsed = _parse_step(r.output)
        done = parsed["done"]
        for _ in range(10):
            if done:
                break
            nxt = parsed["next_to_test"]
            assert nxt is not None
            next_r = _invoke(root, ["bisect", "good", nxt, "--json"])
            assert next_r.exit_code == 0
            parsed = _parse_step(next_r.output)
            done = parsed["done"]
        assert done, "bisect did not converge within 10 good steps on 20-commit chain"


# ---------------------------------------------------------------------------
# bisect skip — Extended, Security, Stress
# ---------------------------------------------------------------------------


class TestBisectSkipExtended:
    """Extended unit / integration / e2e tests for muse bisect skip."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_skip_exits_0(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
        assert result.exit_code == 0

    def test_skip_j_alias_works(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "-j"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["verdict"] == "skip"

    def test_skip_json_verdict_is_skip(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["verdict"] == "skip"

    def test_skip_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert {"done", "first_bad", "next_to_test", "remaining_count",
                                  "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())

    def test_skip_removes_commit_from_remaining(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 10)
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        before = _parse_step(r.output)["remaining_count"]
        mid = _parse_step(r.output)["next_to_test"]
        result = _invoke(root, ["bisect", "skip", mid, "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] < before

    def test_skip_persisted_in_state(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import _load_state
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        mid = ids[len(ids) // 2]
        _invoke(root, ["bisect", "skip", mid])
        state = _load_state(root)
        assert state is not None
        assert mid in state.get("skipped_ids", [])

    def test_skip_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", "--json"])
        assert result.exit_code == 0

    def test_skip_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "skip"])
        assert result.exit_code == 1

    def test_skip_no_session_error_message(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "skip"])
        combined = result.output + (result.stderr or "")
        assert "No bisect session" in combined

    def test_skip_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        result = _invoke(empty, ["bisect", "skip"])
        assert result.exit_code == 2

    def test_skip_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", "deadbeef_nonexistent"])
        assert result.exit_code == 1

    def test_skip_text_mentions_commit(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        mid = ids[len(ids) // 2]
        result = _invoke(root, ["bisect", "skip", mid])
        assert result.exit_code == 0
        assert short_id(mid) in result.output

    def test_skip_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
        assert result.exit_code == 0
        assert not result.output.strip().startswith("{")

    def test_skip_help_description_present(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "skip", "--help"])
        assert "Agent quickstart" in result.output or "125" in result.output

    def test_skip_advances_log(self, tmp_path: pathlib.Path) -> None:
        from muse.core.bisect import _load_state
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        _invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
        state = _load_state(root)
        assert state is not None
        assert any("skip" in entry for entry in state.get("log", []))

    def test_skip_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] >= 0

    def test_skip_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(_parse_step(result.output)["symbol_changes"], list)

    def test_skip_multiple_commits(self, tmp_path: pathlib.Path) -> None:
        """Skipping several commits all land in skipped_ids."""
        from muse.core.bisect import _load_state
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        self._start(root, ids)
        for idx in (2, 3, 4):
            r = _invoke(root, ["bisect", "skip", ids[idx]])
            assert r.exit_code == 0
        state = _load_state(root)
        assert state is not None
        skipped = state.get("skipped_ids", [])
        assert all(ids[i] in skipped for i in (2, 3, 4))


class TestBisectSkipSecurity:
    """Security hardening tests for muse bisect skip."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_skip_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output)), dict)

    def test_skip_json_done_is_bool(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output))["done"], bool)

    def test_skip_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None:
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=2,
            steps_remaining=1,
            verdict="skip",
            symbol_changes=["modify func\x1b[33myellow\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.skip_commit", return_value=injected):
            result = _invoke(root, ["bisect", "skip", ids[2], "--json"])
        assert "\x1b" not in result.output

    def test_skip_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None:
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        injected = BisectResult(
            done=False,
            first_bad=None,
            next_to_test=ids[2],
            remaining_count=2,
            steps_remaining=1,
            verdict="skip",
            symbol_changes=["modify func\x1b[33myellow\x1b[0m"],
        )
        with patch("muse.cli.commands.bisect.skip_commit", return_value=injected):
            result = _invoke(root, ["bisect", "skip", ids[2]])
        assert "\x1b" not in result.output

    def test_skip_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "skip"])
        assert result.exit_code != 0
        assert not result.output.strip().startswith("{")

    def test_skip_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", "\x1b[33mHEAD\x1b[0m"])
        assert "\x1b" not in result.output


class TestBisectSkipStress:
    """Performance and scale tests for muse bisect skip."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_skip_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "skip", ids[50], "--json"])
        assert result.exit_code == 0
        assert _parse_step(result.output)["remaining_count"] >= 0

    def test_skip_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
        import time
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        self._start(root, ids)
        t0 = time.monotonic()
        result = _invoke(root, ["bisect", "skip", ids[50], "--json"])
        elapsed = time.monotonic() - t0
        assert result.exit_code == 0
        assert elapsed < 5.0, f"bisect skip on 100 commits took {elapsed:.2f}s"

    def test_skip_reduces_remaining_monotonically(self, tmp_path: pathlib.Path) -> None:
        """Each consecutive skip reduces remaining_count (non-increasing sequence)."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 20)
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert r.exit_code == 0
        counts = [_parse_step(r.output)["remaining_count"]]
        cur = r
        for _ in range(5):
            parsed = _parse_step(cur.output)
            if parsed["done"] or parsed["next_to_test"] is None:
                break
            nxt = parsed["next_to_test"]
            cur = _invoke(root, ["bisect", "skip", nxt, "--json"])
            assert cur.exit_code == 0
            counts.append(_parse_step(cur.output)["remaining_count"])
        assert all(counts[i] >= counts[i + 1] for i in range(len(counts) - 1))


# ---------------------------------------------------------------------------
# bisect run — Extended, Security, Stress
# ---------------------------------------------------------------------------


class TestBisectRunExtended:
    """Extended unit / integration / e2e tests for muse bisect run."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_run_exits_0_with_true(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code == 0

    def test_run_j_alias_works(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "-j"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        assert len(lines) >= 1
        done_raw = json.loads(lines[-1])
        assert done_raw["done"] is True

    def test_run_json_ndjson_step_keys(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        if len(lines) > 1:
            step = json.loads(lines[0])
            assert {"step", "testing", "verdict", "remaining_count", "done", "symbol_changes"} <= set(step.keys())

    def test_run_json_done_line_keys(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        done = json.loads(lines[-1])
        assert set(done.keys()) == {"done", "first_bad", "steps_taken"}

    def test_run_json_done_true_on_last_line(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        assert json.loads(lines[-1])["done"] is True

    def test_run_json_steps_taken_positive(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        assert json.loads(lines[-1])["steps_taken"] >= 1

    def test_run_json_verdict_good_with_true(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        step_lines = lines[:-1]
        assert all(json.loads(l)["verdict"] == "good" for l in step_lines)

    def test_run_json_verdict_bad_with_false(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "false", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        step_lines = lines[:-1]
        assert all(json.loads(l)["verdict"] == "bad" for l in step_lines)

    def test_run_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code == 1

    def test_run_no_session_error_message(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "run", "true"])
        combined = result.output + (result.stderr or "")
        assert "No bisect session" in combined

    def test_run_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        result = _invoke(empty, ["bisect", "run", "true"])
        assert result.exit_code == 2

    def test_run_text_mentions_testing(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code == 0
        assert "Testing" in result.output or "→" in result.output

    def test_run_text_mentions_first_bad(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code == 0
        assert "First bad commit" in result.output or "Bisect complete" in result.output

    def test_run_help_description_present(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "run", "--help"])
        assert "Agent quickstart" in result.output or "125" in result.output

    def test_run_json_step_numbers_increment(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        step_nums = [json.loads(l)["step"] for l in lines[:-1]]
        assert step_nums == list(range(1, len(step_nums) + 1))

    def test_run_json_remaining_nonincreasing(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        counts = [json.loads(l)["remaining_count"] for l in lines[:-1]]
        assert all(counts[i] >= counts[i + 1] for i in range(len(counts) - 1))

    def test_run_text_no_json_by_default(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code == 0
        # Text mode should not have a JSON object on a line by itself
        json_lines = [l for l in result.output.splitlines() if l.strip().startswith("{")]
        assert json_lines == []

    def test_run_json_first_bad_set_on_done(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        done = json.loads(lines[-1])
        if done["done"]:
            assert done["first_bad"] is not None


class TestBisectRunSecurity:
    """Security hardening tests for muse bisect run."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_run_json_lines_are_valid_json(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        for line in result.output.strip().splitlines():
            if line.strip():
                assert isinstance(json.loads(line.strip()), dict)

    def test_run_json_done_field_is_bool(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        for line in lines:
            assert isinstance(json.loads(line)["done"], bool)

    def test_run_text_symbol_changes_sanitized(self, tmp_path: pathlib.Path) -> None:
        """ANSI codes in symbol_changes are stripped from text output during run."""
        from unittest.mock import patch
        from muse.core.bisect import BisectResult
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        injected_result = BisectResult(
            done=True,
            first_bad=ids[2],
            next_to_test=None,
            remaining_count=0,
            steps_remaining=0,
            verdict="bad",
            symbol_changes=[],
        )
        with patch("muse.cli.commands.bisect._symbol_ops_in_commit",
                   return_value=["add func\x1b[31mred\x1b[0m"]), \
             patch("muse.cli.commands.bisect.get_bisect_next",
                   side_effect=[(ids[2], "billing.py::Invoice"), (None, "")]), \
             patch("muse.cli.commands.bisect.run_bisect_command",
                   return_value=injected_result):
            result = _invoke(root, ["bisect", "run", "true"])
        assert "\x1b" not in result.output

    def test_run_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code != 0
        assert not result.output.strip().startswith("{")

    def test_run_json_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_run_text_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output


class TestBisectRunStress:
    """Performance and scale tests for muse bisect run."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_run_50_commit_chain(self, tmp_path: pathlib.Path) -> None:
        """run converges on a 50-commit chain with always-good command."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 50)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        assert json.loads(lines[-1])["done"] is True

    def test_run_performance_20_commits(self, tmp_path: pathlib.Path) -> None:
        """run over 20 commits completes within 10 seconds."""
        import time
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 20)
        self._start(root, ids)
        t0 = time.monotonic()
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        elapsed = time.monotonic() - t0
        assert result.exit_code == 0
        assert elapsed < 10.0, f"bisect run 20 commits took {elapsed:.2f}s"

    def test_run_steps_taken_within_log2(self, tmp_path: pathlib.Path) -> None:
        """Steps taken should be at most log2(n)+1 for an always-good command."""
        import math
        root, repo_id = _make_repo(tmp_path)
        n = 32
        ids = _build_chain(root, repo_id, n)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        steps_taken = json.loads(lines[-1])["steps_taken"]
        assert steps_taken <= int(math.log2(n)) + 2


# ---------------------------------------------------------------------------
# bisect log — Extended, Security, Stress
# ---------------------------------------------------------------------------


class TestBisectLogExtended:
    """Extended unit / integration / e2e tests for muse bisect log."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_log_exits_0_no_session(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0

    def test_log_exits_0_with_session(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0

    def test_log_j_alias_works(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "-j"])
        assert result.exit_code == 0
        parsed = _parse_log(result.output)
        assert isinstance(parsed["active"], bool)

    def test_log_json_active_false_no_session(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        assert _parse_log(result.output)["active"] is False

    def test_log_json_active_true_with_session(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        assert _parse_log(result.output)["active"] is True

    def test_log_json_entries_empty_no_session(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        assert _parse_log(result.output)["entries"] == []

    def test_log_json_entries_grow_with_verdicts(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        after_start = len(_parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"])
        _invoke(root, ["bisect", "bad", ids[3]])
        after_bad = len(_parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"])
        assert after_bad > after_start

    def test_log_json_two_keys(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert {"active", "entries"} <= set(d.keys())

    def test_log_json_start_records_bad_and_good(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        verdicts = [e["verdict"] for e in entries]
        assert "bad" in verdicts
        assert "good" in verdicts

    def test_log_json_entries_contain_commit_ids(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        for entry in entries:
            # commit_id is stored with the sha256: prefix (71 chars total)
            assert entry["commit_id"].startswith("sha256:")

    def test_log_json_entries_are_dicts(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        self._start(root, ids)
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        for e in entries:
            assert isinstance(e, dict)
            assert "commit_id" in e
            assert "verdict" in e
            assert "timestamp" in e

    def test_log_active_false_after_reset(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        self._start(root, ids)
        _invoke(root, ["bisect", "reset"])
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        assert _parse_log(result.output)["active"] is False

    def test_log_text_shows_bisect_log_header(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0
        assert "Bisect log" in result.output

    def test_log_text_no_session_message(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0
        assert "No bisect log" in result.output

    def test_log_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        result = _invoke(empty, ["bisect", "log"])
        assert result.exit_code == 2

    def test_log_help_description_present(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "--help"])
        assert "Agent quickstart" in result.output or "verdict" in result.output.lower()

    def test_log_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0
        assert not any(l.strip().startswith("{") for l in result.output.splitlines())


class TestBisectLogSecurity:
    """Security hardening tests for muse bisect log."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_log_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert isinstance(d, dict)

    def test_log_json_active_is_bool(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output))["active"], bool)

    def test_log_json_entries_sanitized(self, tmp_path: pathlib.Path) -> None:
        """ANSI codes injected into the log state are stripped from JSON output."""
        from muse.core.bisect import _load_state, _save_state
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        self._start(root, ids)
        # Tamper: inject ANSI into a log entry
        state = _load_state(root)
        assert state is not None
        state["log"].append(f"{ids[1]} bad\x1b[31m 2026-01-01T00:00:00\x1b[0m")
        _save_state(root, state)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_log_text_entries_sanitized(self, tmp_path: pathlib.Path) -> None:
        """ANSI codes in log entries are stripped from text output."""
        from muse.core.bisect import _load_state, _save_state
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        self._start(root, ids)
        state = _load_state(root)
        assert state is not None
        state["log"].append(f"{ids[1]} bad\x1b[31m 2026-01-01T00:00:00\x1b[0m")
        _save_state(root, state)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_log_json_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_log_text_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "log"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output


class TestBisectLogStress:
    """Performance and scale tests for muse bisect log."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_log_100_commit_session(self, tmp_path: pathlib.Path) -> None:
        """Log on a 100-step session returns all entries."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        self._start(root, ids)
        # Apply 10 good verdicts to build up a log
        for i in range(1, 11):
            _invoke(root, ["bisect", "good", ids[i]])
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        entries = _parse_log(result.output)["entries"]
        # start adds 2 entries; 10 good verdicts add 10 more
        assert len(entries) >= 12

    def test_log_performance_large_session(self, tmp_path: pathlib.Path) -> None:
        """Log on a large session completes within 5 seconds."""
        import time
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 50)
        self._start(root, ids)
        for i in range(1, 8):
            _invoke(root, ["bisect", "bad", ids[i]])
        t0 = time.monotonic()
        result = _invoke(root, ["bisect", "log", "--json"])
        elapsed = time.monotonic() - t0
        assert result.exit_code == 0
        assert elapsed < 5.0, f"bisect log took {elapsed:.2f}s"

    def test_log_concurrent_reads_consistent(self, tmp_path: pathlib.Path) -> None:
        """Concurrent log reads all return the same entry count."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 20)
        self._start(root, ids)
        _invoke(root, ["bisect", "bad", ids[10]])
        counts: list[int] = []
        errors: list[str] = []
        lock = threading.Lock()

        def _run() -> None:
            r = _invoke(root, ["bisect", "log", "--json"])
            with lock:
                if r.exit_code != 0:
                    errors.append(r.output)
                    return
                try:
                    counts.append(len(_parse_log(r.output)["entries"]))
                except (json.JSONDecodeError, KeyError, ValueError) as exc:
                    errors.append(f"parse error: {exc!r}  output={r.output!r}")

        threads = [threading.Thread(target=_run) for _ in range(8)]
        for t in threads:
            t.start()
        for t in threads:
            t.join()
        assert not errors
        assert all(c == counts[0] for c in counts)


# ---------------------------------------------------------------------------
# bisect reset — Extended, Security, Stress
# ---------------------------------------------------------------------------


class TestBisectResetExtended:
    """Extended unit / integration / e2e tests for muse bisect reset."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_reset_exits_0_with_session(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        assert _invoke(root, ["bisect", "reset"]).exit_code == 0

    def test_reset_exits_0_no_session(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        assert _invoke(root, ["bisect", "reset"]).exit_code == 0

    def test_reset_j_alias_works(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "-j"])
        assert result.exit_code == 0
        assert _parse_reset(result.output)["reset"] is True

    def test_reset_json_reset_true(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        assert _parse_reset(result.output)["reset"] is True

    def test_reset_json_single_key(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        d = json.loads(_json_blob(result.output))
        assert {"reset"} <= set(d.keys())

    def test_reset_clears_active_session(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        _invoke(root, ["bisect", "reset"])
        log_r = _invoke(root, ["bisect", "log", "--json"])
        assert _parse_log(log_r.output)["active"] is False

    def test_reset_prevents_bad_after_reset(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        _invoke(root, ["bisect", "reset"])
        result = _invoke(root, ["bisect", "bad", ids[2]])
        assert result.exit_code == 1

    def test_reset_prevents_good_after_reset(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        _invoke(root, ["bisect", "reset"])
        assert _invoke(root, ["bisect", "good", ids[1]]).exit_code == 1

    def test_reset_prevents_skip_after_reset(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        _invoke(root, ["bisect", "reset"])
        assert _invoke(root, ["bisect", "skip", ids[2]]).exit_code == 1

    def test_reset_idempotent_double_reset(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        self._start(root, ids)
        assert _invoke(root, ["bisect", "reset"]).exit_code == 0
        assert _invoke(root, ["bisect", "reset"]).exit_code == 0

    def test_reset_allows_new_session_after(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        _invoke(root, ["bisect", "reset"])
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert result.exit_code == 0

    def test_reset_clears_log_entries(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        _invoke(root, ["bisect", "bad", ids[2]])
        _invoke(root, ["bisect", "reset"])
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        assert entries == []

    def test_reset_text_output_mentions_reset(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset"])
        assert result.exit_code == 0
        assert "reset" in result.output.lower()

    def test_reset_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset"])
        assert not result.output.strip().startswith("{")

    def test_reset_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        assert _invoke(empty, ["bisect", "reset"]).exit_code == 2

    def test_reset_help_description_present(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--help"])
        assert "Agent quickstart" in result.output or "Idempotent" in result.output

    def test_reset_json_reset_is_bool(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output))["reset"], bool)

    def test_reset_mid_session_with_verdicts(self, tmp_path: pathlib.Path) -> None:
        """Reset works correctly after several verdicts have been applied."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 10)
        self._start(root, ids)
        _invoke(root, ["bisect", "bad", ids[7]])
        _invoke(root, ["bisect", "good", ids[3]])
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        assert _parse_reset(result.output)["reset"] is True
        assert _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["active"] is False


class TestBisectResetSecurity:
    """Security hardening tests for muse bisect reset."""

    def test_reset_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        assert isinstance(json.loads(_json_blob(result.output)), dict)

    def test_reset_json_no_ansi(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_reset_text_no_ansi(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset"])
        assert result.exit_code == 0
        assert "\x1b" not in result.output

    def test_reset_state_file_removed(self, tmp_path: pathlib.Path) -> None:
        """After reset the state file no longer exists on disk."""
        from muse.core.bisect import _state_path
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 3)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert _state_path(root).exists()
        _invoke(root, ["bisect", "reset"])
        assert not _state_path(root).exists()

    def test_reset_no_session_state_file_absent(self, tmp_path: pathlib.Path) -> None:
        """Reset with no state file is a safe no-op."""
        from muse.core.bisect import _state_path
        root, _ = _make_repo(tmp_path)
        assert not _state_path(root).exists()
        result = _invoke(root, ["bisect", "reset"])
        assert result.exit_code == 0

    def test_reset_json_reset_value_true(self, tmp_path: pathlib.Path) -> None:
        """reset field is always true, never false or a truthy int."""
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        assert json.loads(_json_blob(result.output))["reset"] is True


class TestBisectResetStress:
    """Performance and scale tests for muse bisect reset."""

    def test_reset_after_100_commit_session(self, tmp_path: pathlib.Path) -> None:
        """Reset clears state from a 100-commit session instantly."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        assert _parse_reset(result.output)["reset"] is True
        assert _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["active"] is False

    def test_reset_performance(self, tmp_path: pathlib.Path) -> None:
        """Reset completes within 2 seconds even after a large session."""
        import time
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 100)
        _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        for i in range(1, 8):
            _invoke(root, ["bisect", "bad", ids[i]])
        t0 = time.monotonic()
        result = _invoke(root, ["bisect", "reset"])
        elapsed = time.monotonic() - t0
        assert result.exit_code == 0
        assert elapsed < 2.0, f"bisect reset took {elapsed:.2f}s"

    def test_reset_cycle_10_times(self, tmp_path: pathlib.Path) -> None:
        """Start → reset × 10 all succeed with no state leakage."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        for _ in range(10):
            r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
            assert r_start.exit_code == 0
            r_reset = _invoke(root, ["bisect", "reset", "--json"])
            assert r_reset.exit_code == 0
            assert _parse_reset(r_reset.output)["reset"] is True


# ===========================================================================
# New feature tests — status, structured log, timeout, symbol_changes in run
# ===========================================================================


class TestBisectStatus:
    """Tests for the new ``muse bisect status`` subcommand."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    # ── Unit: no session ────────────────────────────────────────────────────

    def test_status_no_session_exits_0(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "status"])
        assert result.exit_code == 0

    def test_status_no_session_json_active_false(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "status", "--json"])
        assert result.exit_code == 0
        d = json.loads(result.output.strip())
        assert d["active"] is False

    def test_status_no_session_json_only_active_key(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "status", "--json"])
        assert result.exit_code == 0
        d = json.loads(result.output.strip())
        assert {"active"} <= set(d.keys())

    # ── Integration: active session ─────────────────────────────────────────

    def test_status_active_session_exits_0(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "status"])
        assert result.exit_code == 0

    def test_status_active_json_schema(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "status", "--json"])
        assert result.exit_code == 0
        d = json.loads(result.output.strip())
        assert d["active"] is True
        assert "bad_id" in d
        assert "good_ids" in d
        assert "remaining_count" in d
        assert "steps_remaining" in d
        assert "skipped_count" in d
        assert "symbol_filter" in d

    def test_status_active_remaining_count_positive(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "status", "--json"])
        d = json.loads(result.output.strip())
        assert d["remaining_count"] > 0

    def test_status_bad_id_matches_session(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "status", "--json"])
        d = json.loads(result.output.strip())
        assert d["bad_id"] == ids[-1]

    def test_status_skipped_count_increments(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 8)
        self._start(root, ids)
        before = json.loads(
            _invoke(root, ["bisect", "status", "--json"]).output.strip()
        )["skipped_count"]
        # Skip the midpoint
        next_id = json.loads(
            _invoke(root, ["bisect", "status", "--json"]).output.strip()
        )
        _invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
        after = json.loads(
            _invoke(root, ["bisect", "status", "--json"]).output.strip()
        )["skipped_count"]
        assert after == before + 1

    def test_status_active_false_after_reset(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        _invoke(root, ["bisect", "reset"])
        result = _invoke(root, ["bisect", "status", "--json"])
        d = json.loads(result.output.strip())
        assert d["active"] is False

    # ── Security ────────────────────────────────────────────────────────────

    def test_status_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
        empty = tmp_path / "not_a_repo"
        empty.mkdir()
        result = _invoke(empty, ["bisect", "status"])
        assert result.exit_code == 2

    def test_status_json_is_compact(self, tmp_path: pathlib.Path) -> None:
        """JSON output is compact single-line."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "status", "--json"])
        assert result.exit_code == 0
        json.loads(result.output)

    def test_status_json_no_ansi(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "status", "--json"])
        assert "\x1b" not in result.output

    def test_status_text_no_session_message(self, tmp_path: pathlib.Path) -> None:
        root, _ = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "status"])
        assert "No bisect session" in result.output or "no bisect session" in result.output.lower()

    def test_status_text_active_shows_remaining(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "status"])
        assert "remaining" in result.output.lower()


class TestBisectLogStructured:
    """Tests verifying the new structured log entry schema."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_log_entry_has_three_keys(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        assert len(entries) >= 2
        for e in entries:
            assert set(e.keys()) == {"commit_id", "verdict", "timestamp"}

    def test_log_entry_verdict_values(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        verdicts = {e["verdict"] for e in entries}
        assert verdicts <= {"bad", "good", "skip"}

    def test_log_entry_timestamp_is_iso8601(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        for e in entries:
            # ISO8601 timestamps contain 'T' separating date from time
            assert "T" in e["timestamp"] or e["timestamp"] == ""

    def test_log_skip_entry_appears_after_skip(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        _invoke(root, ["bisect", "skip", ids[2]])
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        verdicts = [e["verdict"] for e in entries]
        assert "skip" in verdicts

    def test_log_entry_commit_ids_in_session_ids(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
        entry_ids = {e["commit_id"] for e in entries}
        # bad and good commit IDs from start should appear in log
        assert ids[-1] in entry_ids  # bad
        assert ids[0] in entry_ids   # good


class TestBisectRunTimeout:
    """Tests for ``--timeout`` on ``muse bisect run``."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_timeout_flag_accepted(self, tmp_path: pathlib.Path) -> None:
        """--timeout is a valid flag that doesn't crash the parser."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--timeout", "30"])
        assert result.exit_code == 0

    def test_timeout_fast_command_succeeds(self, tmp_path: pathlib.Path) -> None:
        """A command that finishes well within the timeout is treated normally."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--timeout", "10"])
        assert result.exit_code == 0

    def test_timeout_triggers_skip(self, tmp_path: pathlib.Path) -> None:
        """A command that exceeds --timeout is treated as skip (exit 125)."""
        from muse.core.bisect import run_bisect_command
        import tempfile

        with tempfile.TemporaryDirectory() as td:
            root_path = pathlib.Path(td)
            # We test the core directly to avoid actually sleeping in a test.
            # Patch subprocess.run to raise TimeoutExpired.
            import unittest.mock as mock
            from muse.core.bisect import _SKIP_EXIT_CODE
            # Build a minimal state so _apply_verdict can run.
            import datetime
            from muse.core.ids import hash_commit, hash_snapshot
            from muse.core.commits import (
    CommitRecord,
    write_commit,
            )
            from muse.core.snapshots import (
    SnapshotRecord,
    write_snapshot,
            )
            from muse.core.bisect import start_bisect

            repo_id = fake_id("repo")
            dot_muse = muse_dir(root_path)
            dot_muse.mkdir()
            (dot_muse / "repo.json").write_text(json.dumps({
                "repo_id": repo_id, "domain": "code",
                "default_branch": "main", "created_at": "2026-01-01T00:00:00+00:00",
            }))
            (dot_muse / "HEAD").write_text("ref: refs/heads/main")
            (dot_muse / "refs" / "heads").mkdir(parents=True)
            (dot_muse / "snapshots").mkdir()
            (dot_muse / "commits").mkdir()
            (dot_muse / "objects").mkdir()

            ids: list[str] = []
            parent = None
            for i in range(4):
                manifest = {}
                snap_id = hash_snapshot(manifest)
                committed_at = datetime.datetime.now(datetime.timezone.utc)
                commit_id = hash_commit(                    parent_ids=[parent] if parent else [],
                    snapshot_id=snap_id,
                    message=f"c{i}",
                    committed_at_iso=committed_at.isoformat(),
                )
                write_snapshot(root_path, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=committed_at))
                write_commit(root_path, CommitRecord(
                    commit_id=commit_id,
                    parent_commit_id=parent, parent2_commit_id=None,
                    snapshot_id=snap_id, branch="main", message=f"c{i}",
                    committed_at=committed_at,
                ))
                (dot_muse / "refs" / "heads" / "main").write_text(commit_id)
                ids.append(commit_id)
                parent = commit_id

            start_bisect(root_path, ids[-1], [ids[0]])

            import subprocess
            with mock.patch("subprocess.run", side_effect=subprocess.TimeoutExpired("cmd", 1)):
                result = run_bisect_command(root_path, "sleep 99", ids[2], timeout=1)
            assert result.verdict == "skip"

    def test_timeout_short_alias(self, tmp_path: pathlib.Path) -> None:
        """-t is the short alias for --timeout."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "-t", "10"])
        assert result.exit_code == 0


class TestBisectRunStepSymbolChanges:
    """Tests verifying symbol_changes is present in NDJSON step lines."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_step_json_has_symbol_changes_key(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        step_lines = [l for l in lines if '"step"' in l]
        if step_lines:
            step = json.loads(step_lines[0])
            assert "symbol_changes" in step

    def test_step_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
        for line in lines:
            obj = json.loads(line)
            if "symbol_changes" in obj:
                assert isinstance(obj["symbol_changes"], list)

    def test_step_ndjson_stays_compact(self, tmp_path: pathlib.Path) -> None:
        """NDJSON step lines must be single-line (not pretty-printed)."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 6)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        for line in result.output.strip().splitlines():
            line = line.strip()
            if not line:
                continue
            # Every non-empty line must be valid JSON on its own
            obj = json.loads(line)
            assert isinstance(obj, dict)


class TestBisectJsonCompact:
    """Tests verifying compact single-line JSON on single-object subcommands."""

    def _start(self, root: pathlib.Path, ids: list[str]) -> None:
        r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
        assert r.exit_code == 0

    def test_start_json_is_compact(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
        assert result.exit_code == 0
        json.loads(result.output)

    def test_bad_json_is_compact(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 5)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "bad", ids[-1], "--json"])
        assert result.exit_code == 0
        json.loads(result.output)

    def test_log_json_is_compact(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "log", "--json"])
        assert result.exit_code == 0
        json.loads(result.output)

    def test_reset_json_is_compact(self, tmp_path: pathlib.Path) -> None:
        root, repo_id = _make_repo(tmp_path)
        result = _invoke(root, ["bisect", "reset", "--json"])
        assert result.exit_code == 0
        json.loads(result.output)

    def test_run_json_ndjson_lines_are_compact(self, tmp_path: pathlib.Path) -> None:
        """run --json emits NDJSON: each line is a compact single-line JSON object."""
        root, repo_id = _make_repo(tmp_path)
        ids = _build_chain(root, repo_id, 4)
        self._start(root, ids)
        result = _invoke(root, ["bisect", "run", "true", "--json"])
        assert result.exit_code == 0
        for line in result.output.strip().splitlines():
            line = line.strip()
            if not line:
                continue
            # Single-line JSON: no embedded newlines, parseable as-is
            obj = json.loads(line)
            assert isinstance(obj, dict)


# ---------------------------------------------------------------------------
# Flag registration tests
# ---------------------------------------------------------------------------

import argparse as _argparse
from muse.cli.commands.bisect import register as _register_bisect
from muse.core.paths import head_path, muse_dir, ref_path


def _parse_bisect(*args: str) -> _argparse.Namespace:
    """Build an argument parser via register() and parse args."""
    root_p = _argparse.ArgumentParser()
    subs = root_p.add_subparsers(dest="cmd")
    _register_bisect(subs)
    return root_p.parse_args(["bisect", *args])


class TestRegisterFlags:
    # ── bad ─────────────────────────────────────────────────────────────────
    def test_bad_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("bad")
        assert ns.json_out is False

    def test_bad_json_flag_sets_json_out(self) -> None:
        ns = _parse_bisect("bad", "--json")
        assert ns.json_out is True

    def test_bad_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("bad", "-j")
        assert ns.json_out is True

    # ── good ────────────────────────────────────────────────────────────────
    def test_good_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("good")
        assert ns.json_out is False

    def test_good_json_flag_sets_json_out(self) -> None:
        ns = _parse_bisect("good", "--json")
        assert ns.json_out is True

    def test_good_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("good", "-j")
        assert ns.json_out is True

    # ── log ─────────────────────────────────────────────────────────────────
    def test_log_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("log")
        assert ns.json_out is False

    def test_log_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("log", "-j")
        assert ns.json_out is True

    # ── reset ────────────────────────────────────────────────────────────────
    def test_reset_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("reset")
        assert ns.json_out is False

    def test_reset_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("reset", "-j")
        assert ns.json_out is True

    # ── run ─────────────────────────────────────────────────────────────────
    def test_run_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("run", "pytest -x")
        assert ns.json_out is False

    def test_run_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("run", "pytest -x", "-j")
        assert ns.json_out is True

    # ── skip ─────────────────────────────────────────────────────────────────
    def test_skip_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("skip")
        assert ns.json_out is False

    def test_skip_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("skip", "-j")
        assert ns.json_out is True

    # ── start ────────────────────────────────────────────────────────────────
    def test_start_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0")
        assert ns.json_out is False

    def test_start_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0", "-j")
        assert ns.json_out is True

    def test_start_bad_flag(self) -> None:
        ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0")
        assert ns.bad == "HEAD"

    def test_start_good_flag(self) -> None:
        ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0")
        assert ns.good == ["v1.0.0"]

    # ── status ───────────────────────────────────────────────────────────────
    def test_status_default_json_out_is_false(self) -> None:
        ns = _parse_bisect("status")
        assert ns.json_out is False

    def test_status_j_shorthand_sets_json_out(self) -> None:
        ns = _parse_bisect("status", "-j")
        assert ns.json_out is True