"""Comprehensive hardening tests for ``muse bisect``. Covers: - Unit: _toml_escape, _load_state symlink guard, size cap, _save_state injection - Security: branch TOML injection, symlink state file, oversized state, ANSI sanitization, error routing to stderr, null bytes in refs - JSON schema: all subcommands (start, bad, good, skip, log, reset, run) - Integration: --json round-trips, get_bisect_next public API, session lifecycle - E2E: symbol-scoped bisect, run subcommand NDJSON, reset --json, log --json - Stress: 200-commit chain, concurrent read-only queries """ from __future__ import annotations import datetime import json import pathlib import re import threading from typing import TypedDict import pytest from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.types import Manifest, fake_id, short_id from tests.cli_test_helper import CliRunner, InvokeResult # Helpers to check store field names at import time; mypy will catch mismatches. _SNAP_FIELDS: set[str] = {"snapshot_id", "manifest", "created_at"} _COMMIT_FIELDS: set[str] = {"commit_id", "repo_id", "branch", "snapshot_id", "message", "committed_at"} runner = CliRunner() _ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- def _make_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]: """Create a minimal Muse repo layout without calling muse init. Returns (repo_root, repo_id). """ repo_id = fake_id("repo") muse = muse_dir(tmp_path) muse.mkdir() (muse / "repo.json").write_text( json.dumps({ "repo_id": repo_id, "domain": "code", "default_branch": "main", "created_at": "2026-01-01T00:00:00+00:00", }) ) (muse / "HEAD").write_text("ref: refs/heads/main") (muse / "refs" / "heads").mkdir(parents=True) (muse / "snapshots").mkdir() (muse / "commits").mkdir() (muse / "objects").mkdir() return tmp_path, repo_id def _make_commit( root: pathlib.Path, repo_id: str, *, branch: str = "main", message: str = "commit", parent_id: str | None = None, ) -> str: """Write a synthetic commit and return its commit_id.""" manifest: Manifest = {} snap_id = hash_snapshot(manifest) committed_at = datetime.datetime.now(datetime.timezone.utc) commit_id = hash_commit( parent_ids=[parent_id] if parent_id else [], snapshot_id=snap_id, message=message, committed_at_iso=committed_at.isoformat(), ) snap = SnapshotRecord( snapshot_id=snap_id, manifest={}, created_at=committed_at, ) write_snapshot(root, snap) commit = CommitRecord( commit_id=commit_id, parent_commit_id=parent_id, parent2_commit_id=None, snapshot_id=snap_id, branch=branch, message=message, committed_at=committed_at, ) write_commit(root, commit) branch_ref = ref_path(root, branch) branch_ref.write_text(commit_id) (head_path(root)).write_text(f"ref: refs/heads/{branch}") return commit_id def _build_chain(root: pathlib.Path, repo_id: str, n: int) -> list[str]: """Create n commits (linear chain) and return their IDs oldest-first.""" ids: list[str] = [] parent: str | None = None for i in range(n): cid = _make_commit(root, repo_id, message=f"commit {i}", parent_id=parent) ids.append(cid) parent = cid return ids def _invoke(root: pathlib.Path, args: list[str]) -> InvokeResult: return runner.invoke(None, args, env={"MUSE_REPO_ROOT": str(root)}) def _json_blob(output: str) -> str: """Extract the first complete JSON object/array from mixed output. Handles both compact (single-line) and pretty-printed (multi-line) JSON. Falls back to line-by-line extraction for NDJSON streams. """ stripped = output.strip() # Fast path: try the whole output (works for pretty-printed single objects) try: json.loads(stripped) return stripped except json.JSONDecodeError: pass # Fallback: find the first JSON line (NDJSON or compact output mixed with text) for line in output.splitlines(): s = line.strip() if s.startswith("{") or s.startswith("["): return s return stripped # --------------------------------------------------------------------------- # Typed schema helpers # --------------------------------------------------------------------------- class _StepJson(TypedDict): done: bool first_bad: str | None next_to_test: str | None remaining_count: int steps_remaining: int verdict: str symbol_changes: list[str] class _LogEntryJson(TypedDict): commit_id: str verdict: str timestamp: str class _LogJson(TypedDict): active: bool entries: list[_LogEntryJson] class _ResetJson(TypedDict): reset: bool class _RunStepJson(TypedDict): step: int testing: str verdict: str remaining_count: int done: bool symbol_changes: list[str] class _RunDoneJson(TypedDict): done: bool first_bad: str | None steps_taken: int def _repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]: """Alias for _make_repo for readability inside test methods.""" return _make_repo(tmp_path) def _parse_step(output: str) -> _StepJson: raw = json.loads(_json_blob(output)) assert isinstance(raw, dict) done_val = raw["done"] first_bad_val = raw["first_bad"] next_to_test_val = raw["next_to_test"] remaining_count_val = raw["remaining_count"] steps_remaining_val = raw["steps_remaining"] verdict_val = raw["verdict"] symbol_changes_val = raw["symbol_changes"] assert isinstance(done_val, bool) assert first_bad_val is None or isinstance(first_bad_val, str) assert next_to_test_val is None or isinstance(next_to_test_val, str) assert isinstance(remaining_count_val, int) assert isinstance(steps_remaining_val, int) assert isinstance(verdict_val, str) assert isinstance(symbol_changes_val, list) return _StepJson( done=done_val, first_bad=first_bad_val, next_to_test=next_to_test_val, remaining_count=remaining_count_val, steps_remaining=steps_remaining_val, verdict=verdict_val, symbol_changes=symbol_changes_val, ) def _parse_log(output: str) -> _LogJson: raw = json.loads(_json_blob(output)) assert isinstance(raw, dict) active_val = raw["active"] entries_val = raw["entries"] assert isinstance(active_val, bool) assert isinstance(entries_val, list) return _LogJson(active=active_val, entries=entries_val) def _parse_reset(output: str) -> _ResetJson: raw = json.loads(_json_blob(output)) assert isinstance(raw, dict) reset_val = raw["reset"] assert isinstance(reset_val, bool) return _ResetJson(reset=reset_val) # --------------------------------------------------------------------------- # Unit — _toml_escape # --------------------------------------------------------------------------- class TestTomlEscape: def test_plain_string_unchanged(self) -> None: from muse.core.bisect import _toml_escape assert _toml_escape("feat/my-thing") == "feat/my-thing" def test_double_quote_escaped(self) -> None: from muse.core.bisect import _toml_escape result = _toml_escape('branch"with"quotes') # After escaping, no bare double-quotes remain (only \"). assert '\\"' in result def test_backslash_escaped(self) -> None: from muse.core.bisect import _toml_escape result = _toml_escape("branch\\with\\backslash") assert result == "branch\\\\with\\\\backslash" def test_both_escaped(self) -> None: from muse.core.bisect import _toml_escape result = _toml_escape('malicious"; bad_id = "hacked') assert '\\"' in result assert "bad_id" in result # literal text preserved, just escaped # --------------------------------------------------------------------------- # Unit — _load_state security # --------------------------------------------------------------------------- class TestLoadStateSecurity: def test_symlink_state_file_rejected(self, tmp_path: pathlib.Path) -> None: """A symlink at the bisect state path must be silently ignored.""" from muse.core.bisect import _load_state, _state_path root, _ = _make_repo(tmp_path) target = tmp_path / "real_state.toml" target.write_text('bad_id = "abc"\ngood_ids = []\nskipped_ids = []\nremaining = []\nlog = []\n') state_path = _state_path(root) state_path.symlink_to(target) result = _load_state(root) assert result is None def test_oversized_state_file_rejected(self, tmp_path: pathlib.Path) -> None: """State files exceeding _MAX_STATE_BYTES must be rejected.""" from muse.core.bisect import _MAX_STATE_BYTES, _load_state, _state_path root, _ = _make_repo(tmp_path) state_path = _state_path(root) huge = "x" * (_MAX_STATE_BYTES + 1) state_path.write_text(huge) result = _load_state(root) assert result is None def test_corrupt_state_returns_none(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import _load_state, _state_path root, _ = _make_repo(tmp_path) state_path = _state_path(root) state_path.write_text("not valid toml ]] [[[ !!!") result = _load_state(root) assert result is None def test_missing_state_returns_none(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import _load_state root, _ = _make_repo(tmp_path) result = _load_state(root) assert result is None # --------------------------------------------------------------------------- # Unit — _save_state TOML injection # --------------------------------------------------------------------------- class TestSaveStateTomlInjection: def test_branch_with_quote_survives_roundtrip(self, tmp_path: pathlib.Path) -> None: """A branch name containing a double-quote must not corrupt the state file.""" from muse.core.bisect import BisectStateDict, _load_state, _save_state root, _ = _make_repo(tmp_path) state: BisectStateDict = { "bad_id": "a" * 64, "good_ids": ["b" * 64], "skipped_ids": [], "remaining": [], "log": [], "branch": 'malicious"; bad_id = "injected', } _save_state(root, state) loaded = _load_state(root) assert loaded is not None assert loaded.get("bad_id") == "a" * 64 assert loaded.get("branch") == 'malicious"; bad_id = "injected' def test_branch_with_backslash_survives_roundtrip(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import BisectStateDict, _load_state, _save_state root, _ = _make_repo(tmp_path) state: BisectStateDict = { "bad_id": "c" * 64, "good_ids": ["d" * 64], "skipped_ids": [], "remaining": [], "log": [], "branch": "feat\\\\weird", } _save_state(root, state) loaded = _load_state(root) assert loaded is not None assert loaded.get("branch") == "feat\\\\weird" def test_symbol_filter_injection_survives_roundtrip(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import BisectStateDict, _load_state, _save_state root, _ = _make_repo(tmp_path) state: BisectStateDict = { "bad_id": "e" * 64, "good_ids": ["f" * 64], "skipped_ids": [], "remaining": [], "log": [], "symbol_filter": 'billing.py::Invoice"; bad_id = "EVIL', } _save_state(root, state) loaded = _load_state(root) assert loaded is not None assert loaded.get("bad_id") == "e" * 64 assert loaded.get("symbol_filter") == 'billing.py::Invoice"; bad_id = "EVIL' # --------------------------------------------------------------------------- # Unit — get_bisect_next public API # --------------------------------------------------------------------------- class TestGetBisectNext: def test_no_session_returns_none(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import get_bisect_next root, _ = _make_repo(tmp_path) nxt, sf = get_bisect_next(root) assert nxt is None assert sf == "" def test_returns_next_after_start(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import get_bisect_next, start_bisect root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) start_bisect(root, ids[-1], [ids[0]]) nxt, sf = get_bisect_next(root) assert nxt is not None assert nxt in ids assert sf == "" def test_returns_symbol_filter(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import get_bisect_next, start_bisect root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) # No commits touch this symbol, so remaining will be empty. start_bisect(root, ids[-1], [ids[0]], symbol_filter="no_file.py::NoSymbol") nxt, sf = get_bisect_next(root) # Symbol filter is preserved regardless of whether next exists. assert sf == "no_file.py::NoSymbol" # --------------------------------------------------------------------------- # Security — CLI error routing # --------------------------------------------------------------------------- class TestErrorRouting: def test_bad_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "bad"]) assert result.exit_code != 0 assert "No bisect session" in (result.stderr or result.output) def test_good_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "good"]) assert result.exit_code != 0 assert "No bisect session" in (result.stderr or result.output) def test_skip_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "skip"]) assert result.exit_code != 0 assert "No bisect session" in (result.stderr or result.output) def test_run_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code != 0 assert "No bisect session" in (result.stderr or result.output) def test_symbol_without_double_colon_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "no_colon_here"]) assert result.exit_code != 0 assert "❌" in (result.stderr or result.output) def test_symbol_too_long_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) long_sym = f"f.py::{'x' * 600}" result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", long_sym]) assert result.exit_code != 0 assert "too long" in (result.stderr or result.output) def test_double_start_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) r1 = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r1.exit_code == 0 r2 = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r2.exit_code != 0 assert "already active" in (r2.stderr or r2.output) # --------------------------------------------------------------------------- # Security — ANSI sanitization in outputs # --------------------------------------------------------------------------- class TestAnsiSanitization: def test_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) ansi_ref = "\x1b[31mHEAD\x1b[0m" result = _invoke(root, ["bisect", "start", "--bad", ansi_ref, "--good", ids[0]]) assert _ANSI_RE.search(result.output) is None def test_ansi_in_symbol_does_not_leak(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) sym = "\x1b[31mfoo.py::Bar\x1b[0m" result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", sym]) assert _ANSI_RE.search(result.output) is None # --------------------------------------------------------------------------- # JSON schema — start # --------------------------------------------------------------------------- class TestJsonSchemaStart: def test_start_json_schema(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["verdict"] == "started" assert isinstance(parsed["done"], bool) assert isinstance(parsed["remaining_count"], int) assert parsed["remaining_count"] >= 0 def test_start_json_done_when_no_remaining(self, tmp_path: pathlib.Path) -> None: """When bad and good are adjacent, start should report done=True immediately.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is True assert parsed["first_bad"] == ids[-1] def test_start_json_symbol_changes_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) parsed = _parse_step(result.output) assert isinstance(parsed["symbol_changes"], list) # --------------------------------------------------------------------------- # JSON schema — bad / good / skip # --------------------------------------------------------------------------- class TestJsonSchemaBadGoodSkip: def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_bad_json_schema(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) midpoint = ids[len(ids) // 2] result = _invoke(root, ["bisect", "bad", midpoint, "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["verdict"] == "bad" def test_good_json_schema(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) midpoint = ids[len(ids) // 2] result = _invoke(root, ["bisect", "good", midpoint, "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["verdict"] == "good" def test_skip_json_schema(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) midpoint = ids[len(ids) // 2] result = _invoke(root, ["bisect", "skip", midpoint, "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["verdict"] == "skip" # --------------------------------------------------------------------------- # JSON schema — log # --------------------------------------------------------------------------- class TestJsonSchemaLog: def test_log_json_no_session(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 parsed = _parse_log(result.output) assert parsed["active"] is False assert parsed["entries"] == [] def test_log_json_after_start(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 parsed = _parse_log(result.output) assert parsed["active"] is True assert len(parsed["entries"]) >= 2 def test_log_json_entries_are_dicts(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "log", "--json"]) parsed = _parse_log(result.output) for entry in parsed["entries"]: assert isinstance(entry, dict) assert set(entry.keys()) == {"commit_id", "verdict", "timestamp"} # --------------------------------------------------------------------------- # JSON schema — reset # --------------------------------------------------------------------------- class TestJsonSchemaReset: def test_reset_json_no_session(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 parsed = _parse_reset(result.output) assert parsed["reset"] is True def test_reset_json_with_session(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 parsed = _parse_reset(result.output) assert parsed["reset"] is True def test_reset_clears_active_flag(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) _invoke(root, ["bisect", "reset", "--json"]) log_result = _invoke(root, ["bisect", "log", "--json"]) parsed = _parse_log(log_result.output) assert parsed["active"] is False # --------------------------------------------------------------------------- # JSON schema — run (NDJSON) # --------------------------------------------------------------------------- class TestJsonSchemaRun: def test_run_json_ndjson_format(self, tmp_path: pathlib.Path) -> None: """``bisect run --json`` should emit valid NDJSON.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()] assert len(lines) >= 1 for raw_line in lines[:-1]: step_raw = json.loads(raw_line) assert "step" in step_raw assert "verdict" in step_raw assert "testing" in step_raw assert "remaining_count" in step_raw assert "done" in step_raw done_raw = json.loads(lines[-1]) done_val = done_raw["done"] assert isinstance(done_val, bool) steps_taken_val = done_raw["steps_taken"] assert isinstance(steps_taken_val, int) def test_run_json_done_has_first_bad(self, tmp_path: pathlib.Path) -> None: """With always-good command, first_bad on the done line should be set.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()] done_raw = json.loads(lines[-1]) done_val = done_raw["done"] first_bad_val = done_raw["first_bad"] if done_val: assert first_bad_val is not None def test_run_json_steps_taken_increments(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "run", "true", "--json"]) lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()] done_raw = json.loads(lines[-1]) steps_taken = done_raw["steps_taken"] assert steps_taken >= 1 # --------------------------------------------------------------------------- # Integration — session lifecycle with --json # --------------------------------------------------------------------------- class TestIntegrationJson: def test_start_bad_good_converge(self, tmp_path: pathlib.Path) -> None: """A manual bisect session with --json converges to a first_bad.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 7) r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert r_start.exit_code == 0 step = _parse_step(r_start.output) if step["done"]: assert step["first_bad"] is not None return for _ in range(20): nxt = step["next_to_test"] assert nxt is not None r = _invoke(root, ["bisect", "bad", nxt, "--json"]) assert r.exit_code == 0 step = _parse_step(r.output) if step["done"]: assert step["first_bad"] is not None return pytest.fail("Bisect did not converge within 20 steps") def test_good_narrows_range(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) midpoint = ids[len(ids) // 2] r_good = _invoke(root, ["bisect", "good", midpoint, "--json"]) step = _parse_step(r_good.output) if not step["done"]: assert step["remaining_count"] < len(ids) - 2 def test_log_grows_with_verdicts(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) midpoint = ids[len(ids) // 2] _invoke(root, ["bisect", "bad", midpoint]) r_log = _invoke(root, ["bisect", "log", "--json"]) parsed = _parse_log(r_log.output) # start logs 2 entries (bad+good); bad adds 1 more → at least 3. assert len(parsed["entries"]) >= 3 def test_skip_excluded_from_remaining(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) step_start = _parse_step(r_start.output) if step_start["done"]: return nxt = step_start["next_to_test"] assert nxt is not None r_skip = _invoke(root, ["bisect", "skip", nxt, "--json"]) step_skip = _parse_step(r_skip.output) if not step_skip["done"]: assert step_skip["next_to_test"] != nxt # --------------------------------------------------------------------------- # E2E — text (non-JSON) output still works # --------------------------------------------------------------------------- class TestE2EText: def test_start_text_output_no_json(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert result.exit_code == 0 assert "Bisect session started" in result.output or "First bad commit" in result.output def test_bad_text_output(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) midpoint = ids[len(ids) // 2] result = _invoke(root, ["bisect", "bad", midpoint]) assert result.exit_code == 0 assert "bad" in result.output.lower() def test_log_text_shows_entries(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 assert "Bisect log" in result.output def test_reset_text_output(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "reset"]) assert result.exit_code == 0 assert "reset" in result.output.lower() def test_run_text_output_converges(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code == 0 assert "First bad commit" in result.output or "Bisect complete" in result.output def test_no_good_flag_fails_clearly(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) result = _invoke(root, ["bisect", "start", "--bad", ids[-1]]) assert result.exit_code != 0 def test_log_empty_when_no_session(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 assert "No bisect log" in result.output # --------------------------------------------------------------------------- # E2E — symbol-scoped bisect # --------------------------------------------------------------------------- class TestSymbolScopedBisect: def test_symbol_filter_no_matching_commits_warns(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) result = _invoke( root, [ "bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "ghost.py::GhostFunc", ], ) assert result.exit_code == 0 combined = result.output + (result.stderr or "") assert "No commits" in combined or "First bad" in combined def test_symbol_filter_json_schema_preserved(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke( root, [ "bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "ghost.py::GhostFunc", "--json", ], ) assert result.exit_code == 0 parsed = _parse_step(result.output) assert isinstance(parsed["symbol_changes"], list) def test_symbol_filter_state_persisted(self, tmp_path: pathlib.Path) -> None: """After start with --symbol, the symbol_filter must survive state reload.""" from muse.core.bisect import _load_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) _invoke( root, [ "bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "billing.py::Invoice", ], ) state = _load_state(root) assert state is not None assert state.get("symbol_filter") == "billing.py::Invoice" # --------------------------------------------------------------------------- # Stress — large commit chains # --------------------------------------------------------------------------- class TestStress: def test_200_commit_chain_converges(self, tmp_path: pathlib.Path) -> None: """Bisect over 200 commits must converge in ≤9 steps (log₂(200) ≈ 7.6).""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 200) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) steps = 0 for _ in range(10): r = _invoke(root, ["bisect", "run", "true", "--json"]) assert r.exit_code == 0 lines = [ln.strip() for ln in r.output.strip().splitlines() if ln.strip()] if lines: done_raw = json.loads(lines[-1]) if done_raw.get("done"): steps = done_raw.get("steps_taken", 0) break else: pytest.fail("Bisect did not terminate within 10 run invocations") assert steps <= 9, f"Expected ≤9 steps for 200 commits, got {steps}" def test_concurrent_log_reads_are_safe(self, tmp_path: pathlib.Path) -> None: """Concurrent reads of bisect log must not crash.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 10) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) errors: list[str] = [] def _read_log() -> None: from muse.core.bisect import get_bisect_log try: entries = get_bisect_log(root) assert isinstance(entries, list) except Exception as exc: errors.append(str(exc)) threads = [threading.Thread(target=_read_log) for _ in range(20)] for t in threads: t.start() for t in threads: t.join() assert not errors, f"Concurrent read failures: {errors}" def test_50_step_manual_bisect_json(self, tmp_path: pathlib.Path) -> None: """50 mark_bad calls on a 100-commit chain must all emit valid JSON.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert r_start.exit_code == 0 step = _parse_step(r_start.output) for _ in range(50): if step["done"]: assert step["first_bad"] is not None return nxt = step["next_to_test"] assert nxt is not None r = _invoke(root, ["bisect", "bad", nxt, "--json"]) assert r.exit_code == 0 step = _parse_step(r.output) assert step["done"] is True # --------------------------------------------------------------------------- # bisect start — Extended, Security, Stress # --------------------------------------------------------------------------- class TestBisectStartExtended: """Extended unit / integration / e2e tests for muse bisect start.""" def test_start_exits_0(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert result.exit_code == 0 def test_start_j_alias_works(self, tmp_path: pathlib.Path) -> None: """-j is an accepted alias for --json.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "-j"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["verdict"] == "started" def test_start_json_verdict_is_started(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["verdict"] == "started" def test_start_json_done_false_with_remaining(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is False assert parsed["next_to_test"] is not None def test_start_json_done_true_when_adjacent(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 2) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is True assert parsed["first_bad"] == ids[-1] def test_start_json_remaining_count_positive(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] > 0 def test_start_json_steps_remaining_positive(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["steps_remaining"] > 0 def test_start_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert {"done", "first_bad", "next_to_test", "remaining_count", "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys()) def test_start_multiple_good_refs(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) result = _invoke( root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--good", ids[1], "--json"], ) assert result.exit_code == 0 assert _parse_step(result.output)["verdict"] == "started" def test_start_no_good_exits_1(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) result = _invoke(root, ["bisect", "start", "--bad", ids[-1]]) assert result.exit_code == 1 def test_start_no_good_error_to_stderr(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) result = _invoke(root, ["bisect", "start", "--bad", ids[-1]]) assert result.exit_code != 0 combined = result.output + (result.stderr or "") assert "good" in combined.lower() def test_start_double_start_exits_1(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert result.exit_code == 1 def test_start_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() result = _invoke(empty, ["bisect", "start", "--bad", "abc", "--good", "def"]) assert result.exit_code == 2 def test_start_bad_defaults_to_head(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) # HEAD points to ids[-1]; omit --bad result = _invoke(root, ["bisect", "start", "--good", ids[0], "--json"]) assert result.exit_code == 0 def test_start_text_mentions_session_started(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert result.exit_code == 0 assert "Bisect session started" in result.output or "First bad commit" in result.output def test_start_text_no_json_object(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert result.exit_code == 0 assert not result.output.strip().startswith("{") def test_start_help_description_present(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "start", "--help"]) assert "Agent quickstart" in result.output or "binary" in result.output.lower() def test_start_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) result = _invoke(root, ["bisect", "start", "--bad", "nonexistent_ref_abc123", "--good", ids[0]]) assert result.exit_code == 1 class TestBisectStartSecurity: """Security hardening tests for muse bisect start.""" def test_start_symbol_changes_no_ansi_in_json(self, tmp_path: pathlib.Path) -> None: """symbol_changes entries are sanitized in JSON output.""" from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=3, steps_remaining=2, verdict="started", symbol_changes=["add Invoice.compute\x1b[31mred\x1b[0m"], ) with patch("muse.cli.commands.bisect.start_bisect", return_value=injected): result = _invoke( root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "billing.py::Invoice", "--json"], ) assert result.exit_code == 0 assert "\x1b" not in result.output def test_start_symbol_changes_no_ansi_in_text(self, tmp_path: pathlib.Path) -> None: """symbol_changes entries are sanitized in text output.""" from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=3, steps_remaining=2, verdict="started", symbol_changes=["add Invoice.compute\x1b[31mred\x1b[0m"], ) with patch("muse.cli.commands.bisect.start_bisect", return_value=injected): result = _invoke( root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "billing.py::Invoice"], ) assert result.exit_code == 0 assert "\x1b" not in result.output def test_start_symbol_missing_separator_exits_1(self, tmp_path: pathlib.Path) -> None: """--symbol without '::' separator is rejected.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) result = _invoke( root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "NoSeparator"], ) assert result.exit_code == 1 def test_start_symbol_too_long_exits_1(self, tmp_path: pathlib.Path) -> None: """--symbol exceeding max length is rejected.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) long_sym = "a" * 510 + "::b" result = _invoke( root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", long_sym], ) assert result.exit_code == 1 def test_start_json_is_valid_json(self, tmp_path: pathlib.Path) -> None: """JSON output is well-formed.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert isinstance(d, dict) def test_start_json_bool_fields_are_bool(self, tmp_path: pathlib.Path) -> None: """done field is always a bool, never int or string.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert isinstance(d["done"], bool) class TestBisectStartStress: """Performance and scale tests for muse bisect start.""" def test_start_100_commit_chain(self, tmp_path: pathlib.Path) -> None: """Start over a 100-commit chain exits 0 and returns a midpoint.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is False assert parsed["remaining_count"] > 0 assert parsed["next_to_test"] is not None def test_start_performance_100_commits(self, tmp_path: pathlib.Path) -> None: """Start over 100 commits completes within 5 seconds.""" import time root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) t0 = time.monotonic() result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 5.0, f"start over 100 commits took {elapsed:.2f}s" def test_start_midpoint_is_within_range(self, tmp_path: pathlib.Path) -> None: """The suggested midpoint falls strictly between good and bad.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 20) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["next_to_test"] not in (ids[0], ids[-1]) # --------------------------------------------------------------------------- # bisect bad — Extended, Security, Stress # --------------------------------------------------------------------------- class TestBisectBadExtended: """Extended unit / integration / e2e tests for muse bisect bad.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_bad_exits_0(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2]]) assert result.exit_code == 0 def test_bad_j_alias_works(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "-j"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["verdict"] == "bad" def test_bad_json_verdict_is_bad(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["verdict"] == "bad" def test_bad_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert {"done", "first_bad", "next_to_test", "remaining_count", "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys()) def test_bad_reduces_remaining(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 10) r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) before = _parse_step(r.output)["remaining_count"] mid = _parse_step(r.output)["next_to_test"] result = _invoke(root, ["bisect", "bad", mid, "--json"]) assert result.exit_code == 0 after = _parse_step(result.output)["remaining_count"] assert after < before def test_bad_done_true_when_isolated(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) # With 3 commits: good=ids[0], bad=ids[2] → ids[1] is the only remaining self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[1], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is True assert parsed["first_bad"] is not None def test_bad_first_bad_set_when_done(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[1], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is True assert isinstance(parsed["first_bad"], str) def test_bad_defaults_to_head(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) # HEAD points to ids[-1] (the known-bad); marking it bad again is valid result = _invoke(root, ["bisect", "bad", "--json"]) assert result.exit_code == 0 def test_bad_no_session_exits_1(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "bad"]) assert result.exit_code == 1 def test_bad_no_session_error_to_stderr(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "bad"]) assert result.exit_code != 0 combined = result.output + (result.stderr or "") assert "No bisect session" in combined def test_bad_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() result = _invoke(empty, ["bisect", "bad"]) assert result.exit_code == 2 def test_bad_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "bad", "deadbeef_nonexistent"]) assert result.exit_code == 1 def test_bad_text_mentions_commit(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) mid = ids[len(ids) // 2] result = _invoke(root, ["bisect", "bad", mid]) assert result.exit_code == 0 assert short_id(mid) in result.output def test_bad_text_no_json_object(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2]]) assert result.exit_code == 0 assert not result.output.strip().startswith("{") def test_bad_help_description_present(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "bad", "--help"]) assert "Agent quickstart" in result.output or "regression" in result.output.lower() def test_bad_advances_bisect_log(self, tmp_path: pathlib.Path) -> None: """After marking bad, the bisect log records the verdict.""" from muse.core.bisect import _load_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) mid = ids[len(ids) // 2] _invoke(root, ["bisect", "bad", mid]) state = _load_state(root) assert state is not None assert any("bad" in entry for entry in state.get("log", [])) def test_bad_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] >= 0 def test_bad_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(_parse_step(result.output)["symbol_changes"], list) class TestBisectBadSecurity: """Security hardening tests for muse bisect bad.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_bad_json_is_valid_json(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert isinstance(d, dict) def test_bad_json_done_is_bool(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output))["done"], bool) def test_bad_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None: """ANSI in symbol_changes entries stripped from JSON output.""" from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=2, steps_remaining=1, verdict="bad", symbol_changes=["modify func\x1b[31mred\x1b[0m"], ) with patch("muse.cli.commands.bisect.mark_bad", return_value=injected): result = _invoke(root, ["bisect", "bad", ids[2], "--json"]) assert "\x1b" not in result.output def test_bad_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None: """ANSI in symbol_changes entries stripped from text output.""" from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=2, steps_remaining=1, verdict="bad", symbol_changes=["modify func\x1b[31mred\x1b[0m"], ) with patch("muse.cli.commands.bisect.mark_bad", return_value=injected): result = _invoke(root, ["bisect", "bad", ids[2]]) assert "\x1b" not in result.output def test_bad_error_output_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None: """Error messages go to stderr; stdout is clean on failure.""" root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "bad"]) assert result.exit_code != 0 # CliRunner mixes stderr into output; verify no JSON object was emitted assert not result.output.strip().startswith("{") def test_bad_ansi_in_ref_does_not_leak_to_output(self, tmp_path: pathlib.Path) -> None: """Passing an ANSI-injected ref does not leak escape codes to stdout.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "bad", "\x1b[31mHEAD\x1b[0m"]) # Will fail (ref not found) but must not echo ANSI to stdout assert "\x1b" not in result.output class TestBisectBadStress: """Performance and scale tests for muse bisect bad.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_bad_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None: """Marking bad on a 100-commit session exits 0 and advances the search.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[50], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] >= 0 def test_bad_performance_100_commits(self, tmp_path: pathlib.Path) -> None: """Marking bad on a 100-commit session completes within 5 seconds.""" import time root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) self._start(root, ids) t0 = time.monotonic() result = _invoke(root, ["bisect", "bad", ids[50], "--json"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 5.0, f"bisect bad on 100 commits took {elapsed:.2f}s" def test_bad_converges_full_session(self, tmp_path: pathlib.Path) -> None: """Marking next_to_test as bad on every step converges within log2(20) steps.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 20) r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert r.exit_code == 0 parsed = _parse_step(r.output) done = parsed["done"] for _ in range(10): if done: break nxt = parsed["next_to_test"] assert nxt is not None next_r = _invoke(root, ["bisect", "bad", nxt, "--json"]) assert next_r.exit_code == 0 parsed = _parse_step(next_r.output) done = parsed["done"] assert done, "bisect did not converge within 10 bad steps on 20-commit chain" # --------------------------------------------------------------------------- # bisect good — Extended, Security, Stress # --------------------------------------------------------------------------- class TestBisectGoodExtended: """Extended unit / integration / e2e tests for muse bisect good.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_good_exits_0(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2]]) assert result.exit_code == 0 def test_good_j_alias_works(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "-j"]) assert result.exit_code == 0 assert _parse_step(result.output)["verdict"] == "good" def test_good_json_verdict_is_good(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["verdict"] == "good" def test_good_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert {"done", "first_bad", "next_to_test", "remaining_count", "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys()) def test_good_reduces_remaining(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 10) r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) before = _parse_step(r.output)["remaining_count"] mid = _parse_step(r.output)["next_to_test"] result = _invoke(root, ["bisect", "good", mid, "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] < before def test_good_done_true_when_isolated(self, tmp_path: pathlib.Path) -> None: """Marking the only remaining commit good isolates first bad immediately.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) # good=ids[0], bad=ids[2]: ids[1] is the midpoint; marking it good resolves self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[1], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is True assert parsed["first_bad"] == ids[2] def test_good_first_bad_set_when_done(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[1], "--json"]) assert result.exit_code == 0 parsed = _parse_step(result.output) assert parsed["done"] is True assert isinstance(parsed["first_bad"], str) def test_good_defaults_to_head(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) # HEAD is ids[-1] (known bad); marking it good is legal but pushes bad boundary result = _invoke(root, ["bisect", "good", "--json"]) assert result.exit_code == 0 def test_good_no_session_exits_1(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "good"]) assert result.exit_code == 1 def test_good_no_session_error_message(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "good"]) combined = result.output + (result.stderr or "") assert "No bisect session" in combined def test_good_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() result = _invoke(empty, ["bisect", "good"]) assert result.exit_code == 2 def test_good_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "good", "deadbeef_nonexistent"]) assert result.exit_code == 1 def test_good_text_mentions_commit(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) mid = ids[len(ids) // 2] result = _invoke(root, ["bisect", "good", mid]) assert result.exit_code == 0 assert short_id(mid) in result.output def test_good_text_no_json_object(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2]]) assert result.exit_code == 0 assert not result.output.strip().startswith("{") def test_good_help_description_present(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "good", "--help"]) assert "Agent quickstart" in result.output or "regression" in result.output.lower() def test_good_advances_bisect_log(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import _load_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) _invoke(root, ["bisect", "good", ids[len(ids) // 2]]) state = _load_state(root) assert state is not None assert any("good" in entry for entry in state.get("log", [])) def test_good_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] >= 0 def test_good_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(_parse_step(result.output)["symbol_changes"], list) class TestBisectGoodSecurity: """Security hardening tests for muse bisect good.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_good_json_is_valid_json(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output)), dict) def test_good_json_done_is_bool(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output))["done"], bool) def test_good_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None: from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=2, steps_remaining=1, verdict="good", symbol_changes=["add func\x1b[32mgreen\x1b[0m"], ) with patch("muse.cli.commands.bisect.mark_good", return_value=injected): result = _invoke(root, ["bisect", "good", ids[2], "--json"]) assert "\x1b" not in result.output def test_good_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None: from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=2, steps_remaining=1, verdict="good", symbol_changes=["add func\x1b[32mgreen\x1b[0m"], ) with patch("muse.cli.commands.bisect.mark_good", return_value=injected): result = _invoke(root, ["bisect", "good", ids[2]]) assert "\x1b" not in result.output def test_good_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "good"]) assert result.exit_code != 0 assert not result.output.strip().startswith("{") def test_good_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "good", "\x1b[32mHEAD\x1b[0m"]) assert "\x1b" not in result.output class TestBisectGoodStress: """Performance and scale tests for muse bisect good.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_good_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) self._start(root, ids) result = _invoke(root, ["bisect", "good", ids[10], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] >= 0 def test_good_performance_100_commits(self, tmp_path: pathlib.Path) -> None: import time root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) self._start(root, ids) t0 = time.monotonic() result = _invoke(root, ["bisect", "good", ids[10], "--json"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 5.0, f"bisect good on 100 commits took {elapsed:.2f}s" def test_good_converges_full_session(self, tmp_path: pathlib.Path) -> None: """Marking next_to_test as good on each step converges within log2(20) steps.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 20) r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert r.exit_code == 0 parsed = _parse_step(r.output) done = parsed["done"] for _ in range(10): if done: break nxt = parsed["next_to_test"] assert nxt is not None next_r = _invoke(root, ["bisect", "good", nxt, "--json"]) assert next_r.exit_code == 0 parsed = _parse_step(next_r.output) done = parsed["done"] assert done, "bisect did not converge within 10 good steps on 20-commit chain" # --------------------------------------------------------------------------- # bisect skip — Extended, Security, Stress # --------------------------------------------------------------------------- class TestBisectSkipExtended: """Extended unit / integration / e2e tests for muse bisect skip.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_skip_exits_0(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2]]) assert result.exit_code == 0 def test_skip_j_alias_works(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "-j"]) assert result.exit_code == 0 assert _parse_step(result.output)["verdict"] == "skip" def test_skip_json_verdict_is_skip(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["verdict"] == "skip" def test_skip_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert {"done", "first_bad", "next_to_test", "remaining_count", "steps_remaining", "verdict", "symbol_changes"} <= set(d.keys()) def test_skip_removes_commit_from_remaining(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 10) r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) before = _parse_step(r.output)["remaining_count"] mid = _parse_step(r.output)["next_to_test"] result = _invoke(root, ["bisect", "skip", mid, "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] < before def test_skip_persisted_in_state(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import _load_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) mid = ids[len(ids) // 2] _invoke(root, ["bisect", "skip", mid]) state = _load_state(root) assert state is not None assert mid in state.get("skipped_ids", []) def test_skip_defaults_to_head(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "skip", "--json"]) assert result.exit_code == 0 def test_skip_no_session_exits_1(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "skip"]) assert result.exit_code == 1 def test_skip_no_session_error_message(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "skip"]) combined = result.output + (result.stderr or "") assert "No bisect session" in combined def test_skip_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() result = _invoke(empty, ["bisect", "skip"]) assert result.exit_code == 2 def test_skip_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "skip", "deadbeef_nonexistent"]) assert result.exit_code == 1 def test_skip_text_mentions_commit(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) mid = ids[len(ids) // 2] result = _invoke(root, ["bisect", "skip", mid]) assert result.exit_code == 0 assert short_id(mid) in result.output def test_skip_text_no_json_object(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2]]) assert result.exit_code == 0 assert not result.output.strip().startswith("{") def test_skip_help_description_present(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "skip", "--help"]) assert "Agent quickstart" in result.output or "125" in result.output def test_skip_advances_log(self, tmp_path: pathlib.Path) -> None: from muse.core.bisect import _load_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) _invoke(root, ["bisect", "skip", ids[len(ids) // 2]]) state = _load_state(root) assert state is not None assert any("skip" in entry for entry in state.get("log", [])) def test_skip_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] >= 0 def test_skip_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(_parse_step(result.output)["symbol_changes"], list) def test_skip_multiple_commits(self, tmp_path: pathlib.Path) -> None: """Skipping several commits all land in skipped_ids.""" from muse.core.bisect import _load_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) self._start(root, ids) for idx in (2, 3, 4): r = _invoke(root, ["bisect", "skip", ids[idx]]) assert r.exit_code == 0 state = _load_state(root) assert state is not None skipped = state.get("skipped_ids", []) assert all(ids[i] in skipped for i in (2, 3, 4)) class TestBisectSkipSecurity: """Security hardening tests for muse bisect skip.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_skip_json_is_valid_json(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output)), dict) def test_skip_json_done_is_bool(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output))["done"], bool) def test_skip_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None: from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=2, steps_remaining=1, verdict="skip", symbol_changes=["modify func\x1b[33myellow\x1b[0m"], ) with patch("muse.cli.commands.bisect.skip_commit", return_value=injected): result = _invoke(root, ["bisect", "skip", ids[2], "--json"]) assert "\x1b" not in result.output def test_skip_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None: from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) injected = BisectResult( done=False, first_bad=None, next_to_test=ids[2], remaining_count=2, steps_remaining=1, verdict="skip", symbol_changes=["modify func\x1b[33myellow\x1b[0m"], ) with patch("muse.cli.commands.bisect.skip_commit", return_value=injected): result = _invoke(root, ["bisect", "skip", ids[2]]) assert "\x1b" not in result.output def test_skip_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "skip"]) assert result.exit_code != 0 assert not result.output.strip().startswith("{") def test_skip_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "skip", "\x1b[33mHEAD\x1b[0m"]) assert "\x1b" not in result.output class TestBisectSkipStress: """Performance and scale tests for muse bisect skip.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_skip_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) self._start(root, ids) result = _invoke(root, ["bisect", "skip", ids[50], "--json"]) assert result.exit_code == 0 assert _parse_step(result.output)["remaining_count"] >= 0 def test_skip_performance_100_commits(self, tmp_path: pathlib.Path) -> None: import time root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) self._start(root, ids) t0 = time.monotonic() result = _invoke(root, ["bisect", "skip", ids[50], "--json"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 5.0, f"bisect skip on 100 commits took {elapsed:.2f}s" def test_skip_reduces_remaining_monotonically(self, tmp_path: pathlib.Path) -> None: """Each consecutive skip reduces remaining_count (non-increasing sequence).""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 20) r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert r.exit_code == 0 counts = [_parse_step(r.output)["remaining_count"]] cur = r for _ in range(5): parsed = _parse_step(cur.output) if parsed["done"] or parsed["next_to_test"] is None: break nxt = parsed["next_to_test"] cur = _invoke(root, ["bisect", "skip", nxt, "--json"]) assert cur.exit_code == 0 counts.append(_parse_step(cur.output)["remaining_count"]) assert all(counts[i] >= counts[i + 1] for i in range(len(counts) - 1)) # --------------------------------------------------------------------------- # bisect run — Extended, Security, Stress # --------------------------------------------------------------------------- class TestBisectRunExtended: """Extended unit / integration / e2e tests for muse bisect run.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_run_exits_0_with_true(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code == 0 def test_run_j_alias_works(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "-j"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] assert len(lines) >= 1 done_raw = json.loads(lines[-1]) assert done_raw["done"] is True def test_run_json_ndjson_step_keys(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] if len(lines) > 1: step = json.loads(lines[0]) assert {"step", "testing", "verdict", "remaining_count", "done", "symbol_changes"} <= set(step.keys()) def test_run_json_done_line_keys(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] done = json.loads(lines[-1]) assert set(done.keys()) == {"done", "first_bad", "steps_taken"} def test_run_json_done_true_on_last_line(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] assert json.loads(lines[-1])["done"] is True def test_run_json_steps_taken_positive(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] assert json.loads(lines[-1])["steps_taken"] >= 1 def test_run_json_verdict_good_with_true(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] step_lines = lines[:-1] assert all(json.loads(l)["verdict"] == "good" for l in step_lines) def test_run_json_verdict_bad_with_false(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "false", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] step_lines = lines[:-1] assert all(json.loads(l)["verdict"] == "bad" for l in step_lines) def test_run_no_session_exits_1(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code == 1 def test_run_no_session_error_message(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "run", "true"]) combined = result.output + (result.stderr or "") assert "No bisect session" in combined def test_run_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() result = _invoke(empty, ["bisect", "run", "true"]) assert result.exit_code == 2 def test_run_text_mentions_testing(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code == 0 assert "Testing" in result.output or "→" in result.output def test_run_text_mentions_first_bad(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code == 0 assert "First bad commit" in result.output or "Bisect complete" in result.output def test_run_help_description_present(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "run", "--help"]) assert "Agent quickstart" in result.output or "125" in result.output def test_run_json_step_numbers_increment(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] step_nums = [json.loads(l)["step"] for l in lines[:-1]] assert step_nums == list(range(1, len(step_nums) + 1)) def test_run_json_remaining_nonincreasing(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] counts = [json.loads(l)["remaining_count"] for l in lines[:-1]] assert all(counts[i] >= counts[i + 1] for i in range(len(counts) - 1)) def test_run_text_no_json_by_default(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code == 0 # Text mode should not have a JSON object on a line by itself json_lines = [l for l in result.output.splitlines() if l.strip().startswith("{")] assert json_lines == [] def test_run_json_first_bad_set_on_done(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] done = json.loads(lines[-1]) if done["done"]: assert done["first_bad"] is not None class TestBisectRunSecurity: """Security hardening tests for muse bisect run.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_run_json_lines_are_valid_json(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 for line in result.output.strip().splitlines(): if line.strip(): assert isinstance(json.loads(line.strip()), dict) def test_run_json_done_field_is_bool(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] for line in lines: assert isinstance(json.loads(line)["done"], bool) def test_run_text_symbol_changes_sanitized(self, tmp_path: pathlib.Path) -> None: """ANSI codes in symbol_changes are stripped from text output during run.""" from unittest.mock import patch from muse.core.bisect import BisectResult root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) injected_result = BisectResult( done=True, first_bad=ids[2], next_to_test=None, remaining_count=0, steps_remaining=0, verdict="bad", symbol_changes=[], ) with patch("muse.cli.commands.bisect._symbol_ops_in_commit", return_value=["add func\x1b[31mred\x1b[0m"]), \ patch("muse.cli.commands.bisect.get_bisect_next", side_effect=[(ids[2], "billing.py::Invoice"), (None, "")]), \ patch("muse.cli.commands.bisect.run_bisect_command", return_value=injected_result): result = _invoke(root, ["bisect", "run", "true"]) assert "\x1b" not in result.output def test_run_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code != 0 assert not result.output.strip().startswith("{") def test_run_json_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 assert "\x1b" not in result.output def test_run_text_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true"]) assert result.exit_code == 0 assert "\x1b" not in result.output class TestBisectRunStress: """Performance and scale tests for muse bisect run.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_run_50_commit_chain(self, tmp_path: pathlib.Path) -> None: """run converges on a 50-commit chain with always-good command.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 50) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] assert json.loads(lines[-1])["done"] is True def test_run_performance_20_commits(self, tmp_path: pathlib.Path) -> None: """run over 20 commits completes within 10 seconds.""" import time root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 20) self._start(root, ids) t0 = time.monotonic() result = _invoke(root, ["bisect", "run", "true", "--json"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 10.0, f"bisect run 20 commits took {elapsed:.2f}s" def test_run_steps_taken_within_log2(self, tmp_path: pathlib.Path) -> None: """Steps taken should be at most log2(n)+1 for an always-good command.""" import math root, repo_id = _make_repo(tmp_path) n = 32 ids = _build_chain(root, repo_id, n) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] steps_taken = json.loads(lines[-1])["steps_taken"] assert steps_taken <= int(math.log2(n)) + 2 # --------------------------------------------------------------------------- # bisect log — Extended, Security, Stress # --------------------------------------------------------------------------- class TestBisectLogExtended: """Extended unit / integration / e2e tests for muse bisect log.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_log_exits_0_no_session(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 def test_log_exits_0_with_session(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 def test_log_j_alias_works(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "-j"]) assert result.exit_code == 0 parsed = _parse_log(result.output) assert isinstance(parsed["active"], bool) def test_log_json_active_false_no_session(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 assert _parse_log(result.output)["active"] is False def test_log_json_active_true_with_session(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 assert _parse_log(result.output)["active"] is True def test_log_json_entries_empty_no_session(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 assert _parse_log(result.output)["entries"] == [] def test_log_json_entries_grow_with_verdicts(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) after_start = len(_parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]) _invoke(root, ["bisect", "bad", ids[3]]) after_bad = len(_parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]) assert after_bad > after_start def test_log_json_two_keys(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert {"active", "entries"} <= set(d.keys()) def test_log_json_start_records_bad_and_good(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] verdicts = [e["verdict"] for e in entries] assert "bad" in verdicts assert "good" in verdicts def test_log_json_entries_contain_commit_ids(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] for entry in entries: # commit_id is stored with the sha256: prefix (71 chars total) assert entry["commit_id"].startswith("sha256:") def test_log_json_entries_are_dicts(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) self._start(root, ids) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] for e in entries: assert isinstance(e, dict) assert "commit_id" in e assert "verdict" in e assert "timestamp" in e def test_log_active_false_after_reset(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) self._start(root, ids) _invoke(root, ["bisect", "reset"]) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 assert _parse_log(result.output)["active"] is False def test_log_text_shows_bisect_log_header(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 assert "Bisect log" in result.output def test_log_text_no_session_message(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 assert "No bisect log" in result.output def test_log_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() result = _invoke(empty, ["bisect", "log"]) assert result.exit_code == 2 def test_log_help_description_present(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "--help"]) assert "Agent quickstart" in result.output or "verdict" in result.output.lower() def test_log_text_no_json_object(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 assert not any(l.strip().startswith("{") for l in result.output.splitlines()) class TestBisectLogSecurity: """Security hardening tests for muse bisect log.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_log_json_is_valid_json(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert isinstance(d, dict) def test_log_json_active_is_bool(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output))["active"], bool) def test_log_json_entries_sanitized(self, tmp_path: pathlib.Path) -> None: """ANSI codes injected into the log state are stripped from JSON output.""" from muse.core.bisect import _load_state, _save_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) self._start(root, ids) # Tamper: inject ANSI into a log entry state = _load_state(root) assert state is not None state["log"].append(f"{ids[1]} bad\x1b[31m 2026-01-01T00:00:00\x1b[0m") _save_state(root, state) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 assert "\x1b" not in result.output def test_log_text_entries_sanitized(self, tmp_path: pathlib.Path) -> None: """ANSI codes in log entries are stripped from text output.""" from muse.core.bisect import _load_state, _save_state root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) self._start(root, ids) state = _load_state(root) assert state is not None state["log"].append(f"{ids[1]} bad\x1b[31m 2026-01-01T00:00:00\x1b[0m") _save_state(root, state) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 assert "\x1b" not in result.output def test_log_json_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 assert "\x1b" not in result.output def test_log_text_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "log"]) assert result.exit_code == 0 assert "\x1b" not in result.output class TestBisectLogStress: """Performance and scale tests for muse bisect log.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_log_100_commit_session(self, tmp_path: pathlib.Path) -> None: """Log on a 100-step session returns all entries.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) self._start(root, ids) # Apply 10 good verdicts to build up a log for i in range(1, 11): _invoke(root, ["bisect", "good", ids[i]]) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 entries = _parse_log(result.output)["entries"] # start adds 2 entries; 10 good verdicts add 10 more assert len(entries) >= 12 def test_log_performance_large_session(self, tmp_path: pathlib.Path) -> None: """Log on a large session completes within 5 seconds.""" import time root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 50) self._start(root, ids) for i in range(1, 8): _invoke(root, ["bisect", "bad", ids[i]]) t0 = time.monotonic() result = _invoke(root, ["bisect", "log", "--json"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 5.0, f"bisect log took {elapsed:.2f}s" def test_log_concurrent_reads_consistent(self, tmp_path: pathlib.Path) -> None: """Concurrent log reads all return the same entry count.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 20) self._start(root, ids) _invoke(root, ["bisect", "bad", ids[10]]) counts: list[int] = [] errors: list[str] = [] lock = threading.Lock() def _run() -> None: r = _invoke(root, ["bisect", "log", "--json"]) with lock: if r.exit_code != 0: errors.append(r.output) return try: counts.append(len(_parse_log(r.output)["entries"])) except (json.JSONDecodeError, KeyError, ValueError) as exc: errors.append(f"parse error: {exc!r} output={r.output!r}") threads = [threading.Thread(target=_run) for _ in range(8)] for t in threads: t.start() for t in threads: t.join() assert not errors assert all(c == counts[0] for c in counts) # --------------------------------------------------------------------------- # bisect reset — Extended, Security, Stress # --------------------------------------------------------------------------- class TestBisectResetExtended: """Extended unit / integration / e2e tests for muse bisect reset.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_reset_exits_0_with_session(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) assert _invoke(root, ["bisect", "reset"]).exit_code == 0 def test_reset_exits_0_no_session(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) assert _invoke(root, ["bisect", "reset"]).exit_code == 0 def test_reset_j_alias_works(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "-j"]) assert result.exit_code == 0 assert _parse_reset(result.output)["reset"] is True def test_reset_json_reset_true(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 assert _parse_reset(result.output)["reset"] is True def test_reset_json_single_key(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 d = json.loads(_json_blob(result.output)) assert {"reset"} <= set(d.keys()) def test_reset_clears_active_session(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) _invoke(root, ["bisect", "reset"]) log_r = _invoke(root, ["bisect", "log", "--json"]) assert _parse_log(log_r.output)["active"] is False def test_reset_prevents_bad_after_reset(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) _invoke(root, ["bisect", "reset"]) result = _invoke(root, ["bisect", "bad", ids[2]]) assert result.exit_code == 1 def test_reset_prevents_good_after_reset(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) _invoke(root, ["bisect", "reset"]) assert _invoke(root, ["bisect", "good", ids[1]]).exit_code == 1 def test_reset_prevents_skip_after_reset(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) _invoke(root, ["bisect", "reset"]) assert _invoke(root, ["bisect", "skip", ids[2]]).exit_code == 1 def test_reset_idempotent_double_reset(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) self._start(root, ids) assert _invoke(root, ["bisect", "reset"]).exit_code == 0 assert _invoke(root, ["bisect", "reset"]).exit_code == 0 def test_reset_allows_new_session_after(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) _invoke(root, ["bisect", "reset"]) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert result.exit_code == 0 def test_reset_clears_log_entries(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) _invoke(root, ["bisect", "bad", ids[2]]) _invoke(root, ["bisect", "reset"]) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] assert entries == [] def test_reset_text_output_mentions_reset(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset"]) assert result.exit_code == 0 assert "reset" in result.output.lower() def test_reset_text_no_json_object(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset"]) assert not result.output.strip().startswith("{") def test_reset_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() assert _invoke(empty, ["bisect", "reset"]).exit_code == 2 def test_reset_help_description_present(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--help"]) assert "Agent quickstart" in result.output or "Idempotent" in result.output def test_reset_json_reset_is_bool(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output))["reset"], bool) def test_reset_mid_session_with_verdicts(self, tmp_path: pathlib.Path) -> None: """Reset works correctly after several verdicts have been applied.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 10) self._start(root, ids) _invoke(root, ["bisect", "bad", ids[7]]) _invoke(root, ["bisect", "good", ids[3]]) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 assert _parse_reset(result.output)["reset"] is True assert _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["active"] is False class TestBisectResetSecurity: """Security hardening tests for muse bisect reset.""" def test_reset_json_is_valid_json(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 assert isinstance(json.loads(_json_blob(result.output)), dict) def test_reset_json_no_ansi(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 assert "\x1b" not in result.output def test_reset_text_no_ansi(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset"]) assert result.exit_code == 0 assert "\x1b" not in result.output def test_reset_state_file_removed(self, tmp_path: pathlib.Path) -> None: """After reset the state file no longer exists on disk.""" from muse.core.bisect import _state_path root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 3) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert _state_path(root).exists() _invoke(root, ["bisect", "reset"]) assert not _state_path(root).exists() def test_reset_no_session_state_file_absent(self, tmp_path: pathlib.Path) -> None: """Reset with no state file is a safe no-op.""" from muse.core.bisect import _state_path root, _ = _make_repo(tmp_path) assert not _state_path(root).exists() result = _invoke(root, ["bisect", "reset"]) assert result.exit_code == 0 def test_reset_json_reset_value_true(self, tmp_path: pathlib.Path) -> None: """reset field is always true, never false or a truthy int.""" root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 assert json.loads(_json_blob(result.output))["reset"] is True class TestBisectResetStress: """Performance and scale tests for muse bisect reset.""" def test_reset_after_100_commit_session(self, tmp_path: pathlib.Path) -> None: """Reset clears state from a 100-commit session instantly.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 assert _parse_reset(result.output)["reset"] is True assert _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["active"] is False def test_reset_performance(self, tmp_path: pathlib.Path) -> None: """Reset completes within 2 seconds even after a large session.""" import time root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 100) _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) for i in range(1, 8): _invoke(root, ["bisect", "bad", ids[i]]) t0 = time.monotonic() result = _invoke(root, ["bisect", "reset"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 2.0, f"bisect reset took {elapsed:.2f}s" def test_reset_cycle_10_times(self, tmp_path: pathlib.Path) -> None: """Start → reset × 10 all succeed with no state leakage.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) for _ in range(10): r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r_start.exit_code == 0 r_reset = _invoke(root, ["bisect", "reset", "--json"]) assert r_reset.exit_code == 0 assert _parse_reset(r_reset.output)["reset"] is True # =========================================================================== # New feature tests — status, structured log, timeout, symbol_changes in run # =========================================================================== class TestBisectStatus: """Tests for the new ``muse bisect status`` subcommand.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 # ── Unit: no session ──────────────────────────────────────────────────── def test_status_no_session_exits_0(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "status"]) assert result.exit_code == 0 def test_status_no_session_json_active_false(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "status", "--json"]) assert result.exit_code == 0 d = json.loads(result.output.strip()) assert d["active"] is False def test_status_no_session_json_only_active_key(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "status", "--json"]) assert result.exit_code == 0 d = json.loads(result.output.strip()) assert {"active"} <= set(d.keys()) # ── Integration: active session ───────────────────────────────────────── def test_status_active_session_exits_0(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "status"]) assert result.exit_code == 0 def test_status_active_json_schema(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "status", "--json"]) assert result.exit_code == 0 d = json.loads(result.output.strip()) assert d["active"] is True assert "bad_id" in d assert "good_ids" in d assert "remaining_count" in d assert "steps_remaining" in d assert "skipped_count" in d assert "symbol_filter" in d def test_status_active_remaining_count_positive(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) self._start(root, ids) result = _invoke(root, ["bisect", "status", "--json"]) d = json.loads(result.output.strip()) assert d["remaining_count"] > 0 def test_status_bad_id_matches_session(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "status", "--json"]) d = json.loads(result.output.strip()) assert d["bad_id"] == ids[-1] def test_status_skipped_count_increments(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 8) self._start(root, ids) before = json.loads( _invoke(root, ["bisect", "status", "--json"]).output.strip() )["skipped_count"] # Skip the midpoint next_id = json.loads( _invoke(root, ["bisect", "status", "--json"]).output.strip() ) _invoke(root, ["bisect", "skip", ids[len(ids) // 2]]) after = json.loads( _invoke(root, ["bisect", "status", "--json"]).output.strip() )["skipped_count"] assert after == before + 1 def test_status_active_false_after_reset(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) _invoke(root, ["bisect", "reset"]) result = _invoke(root, ["bisect", "status", "--json"]) d = json.loads(result.output.strip()) assert d["active"] is False # ── Security ──────────────────────────────────────────────────────────── def test_status_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None: empty = tmp_path / "not_a_repo" empty.mkdir() result = _invoke(empty, ["bisect", "status"]) assert result.exit_code == 2 def test_status_json_is_compact(self, tmp_path: pathlib.Path) -> None: """JSON output is compact single-line.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "status", "--json"]) assert result.exit_code == 0 json.loads(result.output) def test_status_json_no_ansi(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "status", "--json"]) assert "\x1b" not in result.output def test_status_text_no_session_message(self, tmp_path: pathlib.Path) -> None: root, _ = _make_repo(tmp_path) result = _invoke(root, ["bisect", "status"]) assert "No bisect session" in result.output or "no bisect session" in result.output.lower() def test_status_text_active_shows_remaining(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "status"]) assert "remaining" in result.output.lower() class TestBisectLogStructured: """Tests verifying the new structured log entry schema.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_log_entry_has_three_keys(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] assert len(entries) >= 2 for e in entries: assert set(e.keys()) == {"commit_id", "verdict", "timestamp"} def test_log_entry_verdict_values(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] verdicts = {e["verdict"] for e in entries} assert verdicts <= {"bad", "good", "skip"} def test_log_entry_timestamp_is_iso8601(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] for e in entries: # ISO8601 timestamps contain 'T' separating date from time assert "T" in e["timestamp"] or e["timestamp"] == "" def test_log_skip_entry_appears_after_skip(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) _invoke(root, ["bisect", "skip", ids[2]]) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] verdicts = [e["verdict"] for e in entries] assert "skip" in verdicts def test_log_entry_commit_ids_in_session_ids(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"] entry_ids = {e["commit_id"] for e in entries} # bad and good commit IDs from start should appear in log assert ids[-1] in entry_ids # bad assert ids[0] in entry_ids # good class TestBisectRunTimeout: """Tests for ``--timeout`` on ``muse bisect run``.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_timeout_flag_accepted(self, tmp_path: pathlib.Path) -> None: """--timeout is a valid flag that doesn't crash the parser.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--timeout", "30"]) assert result.exit_code == 0 def test_timeout_fast_command_succeeds(self, tmp_path: pathlib.Path) -> None: """A command that finishes well within the timeout is treated normally.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--timeout", "10"]) assert result.exit_code == 0 def test_timeout_triggers_skip(self, tmp_path: pathlib.Path) -> None: """A command that exceeds --timeout is treated as skip (exit 125).""" from muse.core.bisect import run_bisect_command import tempfile with tempfile.TemporaryDirectory() as td: root_path = pathlib.Path(td) # We test the core directly to avoid actually sleeping in a test. # Patch subprocess.run to raise TimeoutExpired. import unittest.mock as mock from muse.core.bisect import _SKIP_EXIT_CODE # Build a minimal state so _apply_verdict can run. import datetime from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.bisect import start_bisect repo_id = fake_id("repo") dot_muse = muse_dir(root_path) dot_muse.mkdir() (dot_muse / "repo.json").write_text(json.dumps({ "repo_id": repo_id, "domain": "code", "default_branch": "main", "created_at": "2026-01-01T00:00:00+00:00", })) (dot_muse / "HEAD").write_text("ref: refs/heads/main") (dot_muse / "refs" / "heads").mkdir(parents=True) (dot_muse / "snapshots").mkdir() (dot_muse / "commits").mkdir() (dot_muse / "objects").mkdir() ids: list[str] = [] parent = None for i in range(4): manifest = {} snap_id = hash_snapshot(manifest) committed_at = datetime.datetime.now(datetime.timezone.utc) commit_id = hash_commit( parent_ids=[parent] if parent else [], snapshot_id=snap_id, message=f"c{i}", committed_at_iso=committed_at.isoformat(), ) write_snapshot(root_path, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=committed_at)) write_commit(root_path, CommitRecord( commit_id=commit_id, parent_commit_id=parent, parent2_commit_id=None, snapshot_id=snap_id, branch="main", message=f"c{i}", committed_at=committed_at, )) (dot_muse / "refs" / "heads" / "main").write_text(commit_id) ids.append(commit_id) parent = commit_id start_bisect(root_path, ids[-1], [ids[0]]) import subprocess with mock.patch("subprocess.run", side_effect=subprocess.TimeoutExpired("cmd", 1)): result = run_bisect_command(root_path, "sleep 99", ids[2], timeout=1) assert result.verdict == "skip" def test_timeout_short_alias(self, tmp_path: pathlib.Path) -> None: """-t is the short alias for --timeout.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "-t", "10"]) assert result.exit_code == 0 class TestBisectRunStepSymbolChanges: """Tests verifying symbol_changes is present in NDJSON step lines.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_step_json_has_symbol_changes_key(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] step_lines = [l for l in lines if '"step"' in l] if step_lines: step = json.loads(step_lines[0]) assert "symbol_changes" in step def test_step_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] for line in lines: obj = json.loads(line) if "symbol_changes" in obj: assert isinstance(obj["symbol_changes"], list) def test_step_ndjson_stays_compact(self, tmp_path: pathlib.Path) -> None: """NDJSON step lines must be single-line (not pretty-printed).""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 6) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 for line in result.output.strip().splitlines(): line = line.strip() if not line: continue # Every non-empty line must be valid JSON on its own obj = json.loads(line) assert isinstance(obj, dict) class TestBisectJsonCompact: """Tests verifying compact single-line JSON on single-object subcommands.""" def _start(self, root: pathlib.Path, ids: list[str]) -> None: r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]]) assert r.exit_code == 0 def test_start_json_is_compact(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"]) assert result.exit_code == 0 json.loads(result.output) def test_bad_json_is_compact(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 5) self._start(root, ids) result = _invoke(root, ["bisect", "bad", ids[-1], "--json"]) assert result.exit_code == 0 json.loads(result.output) def test_log_json_is_compact(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "log", "--json"]) assert result.exit_code == 0 json.loads(result.output) def test_reset_json_is_compact(self, tmp_path: pathlib.Path) -> None: root, repo_id = _make_repo(tmp_path) result = _invoke(root, ["bisect", "reset", "--json"]) assert result.exit_code == 0 json.loads(result.output) def test_run_json_ndjson_lines_are_compact(self, tmp_path: pathlib.Path) -> None: """run --json emits NDJSON: each line is a compact single-line JSON object.""" root, repo_id = _make_repo(tmp_path) ids = _build_chain(root, repo_id, 4) self._start(root, ids) result = _invoke(root, ["bisect", "run", "true", "--json"]) assert result.exit_code == 0 for line in result.output.strip().splitlines(): line = line.strip() if not line: continue # Single-line JSON: no embedded newlines, parseable as-is obj = json.loads(line) assert isinstance(obj, dict) # --------------------------------------------------------------------------- # Flag registration tests # --------------------------------------------------------------------------- import argparse as _argparse from muse.cli.commands.bisect import register as _register_bisect from muse.core.paths import head_path, muse_dir, ref_path def _parse_bisect(*args: str) -> _argparse.Namespace: """Build an argument parser via register() and parse args.""" root_p = _argparse.ArgumentParser() subs = root_p.add_subparsers(dest="cmd") _register_bisect(subs) return root_p.parse_args(["bisect", *args]) class TestRegisterFlags: # ── bad ───────────────────────────────────────────────────────────────── def test_bad_default_json_out_is_false(self) -> None: ns = _parse_bisect("bad") assert ns.json_out is False def test_bad_json_flag_sets_json_out(self) -> None: ns = _parse_bisect("bad", "--json") assert ns.json_out is True def test_bad_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("bad", "-j") assert ns.json_out is True # ── good ──────────────────────────────────────────────────────────────── def test_good_default_json_out_is_false(self) -> None: ns = _parse_bisect("good") assert ns.json_out is False def test_good_json_flag_sets_json_out(self) -> None: ns = _parse_bisect("good", "--json") assert ns.json_out is True def test_good_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("good", "-j") assert ns.json_out is True # ── log ───────────────────────────────────────────────────────────────── def test_log_default_json_out_is_false(self) -> None: ns = _parse_bisect("log") assert ns.json_out is False def test_log_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("log", "-j") assert ns.json_out is True # ── reset ──────────────────────────────────────────────────────────────── def test_reset_default_json_out_is_false(self) -> None: ns = _parse_bisect("reset") assert ns.json_out is False def test_reset_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("reset", "-j") assert ns.json_out is True # ── run ───────────────────────────────────────────────────────────────── def test_run_default_json_out_is_false(self) -> None: ns = _parse_bisect("run", "pytest -x") assert ns.json_out is False def test_run_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("run", "pytest -x", "-j") assert ns.json_out is True # ── skip ───────────────────────────────────────────────────────────────── def test_skip_default_json_out_is_false(self) -> None: ns = _parse_bisect("skip") assert ns.json_out is False def test_skip_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("skip", "-j") assert ns.json_out is True # ── start ──────────────────────────────────────────────────────────────── def test_start_default_json_out_is_false(self) -> None: ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0") assert ns.json_out is False def test_start_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0", "-j") assert ns.json_out is True def test_start_bad_flag(self) -> None: ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0") assert ns.bad == "HEAD" def test_start_good_flag(self) -> None: ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0") assert ns.good == ["v1.0.0"] # ── status ─────────────────────────────────────────────────────────────── def test_status_default_json_out_is_false(self) -> None: ns = _parse_bisect("status") assert ns.json_out is False def test_status_j_shorthand_sets_json_out(self) -> None: ns = _parse_bisect("status", "-j") assert ns.json_out is True