tests/test_cmd_bisect_hardening.py · gabriel/muse

test_cmd_bisect_hardening.py python

3,330 lines 141.5 KB

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago

1	"""Comprehensive hardening tests for ``muse bisect``.
2
3	Covers:
4	- Unit: _toml_escape, _load_state symlink guard, size cap, _save_state injection
5	- Security: branch TOML injection, symlink state file, oversized state, ANSI
6	sanitization, error routing to stderr, null bytes in refs
7	- JSON schema: all subcommands (start, bad, good, skip, log, reset, run)
8	- Integration: --json round-trips, get_bisect_next public API, session lifecycle
9	- E2E: symbol-scoped bisect, run subcommand NDJSON, reset --json, log --json
10	- Stress: 200-commit chain, concurrent read-only queries
11	"""
12	from __future__ import annotations
13
14	import datetime
15	import json
16	import pathlib
17	import re
18	import threading
19	from typing import TypedDict
20
21	import pytest
22
23	from muse.core.ids import hash_commit, hash_snapshot
24	from muse.core.commits import (
25	CommitRecord,
26	write_commit,
27	)
28	from muse.core.snapshots import (
29	SnapshotRecord,
30	write_snapshot,
31	)
32	from muse.core.types import Manifest, fake_id, short_id
33	from tests.cli_test_helper import CliRunner, InvokeResult
34
35	# Helpers to check store field names at import time; mypy will catch mismatches.
36	_SNAP_FIELDS: set[str] = {"snapshot_id", "manifest", "created_at"}
37	_COMMIT_FIELDS: set[str] = {"commit_id", "repo_id", "branch", "snapshot_id", "message", "committed_at"}
38
39	runner = CliRunner()
40
41	_ANSI_RE = re.compile(r"\x1b\[[0-9;]*m")
42
43
44	# ---------------------------------------------------------------------------
45	# Fixtures
46	# ---------------------------------------------------------------------------
47
48
49	def _make_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
50	"""Create a minimal Muse repo layout without calling muse init.
51
52	Returns (repo_root, repo_id).
53	"""
54	repo_id = fake_id("repo")
55	muse = muse_dir(tmp_path)
56	muse.mkdir()
57	(muse / "repo.json").write_text(
58	json.dumps({
59	"repo_id": repo_id,
60	"domain": "code",
61	"default_branch": "main",
62	"created_at": "2026-01-01T00:00:00+00:00",
63	})
64	)
65	(muse / "HEAD").write_text("ref: refs/heads/main")
66	(muse / "refs" / "heads").mkdir(parents=True)
67	(muse / "snapshots").mkdir()
68	(muse / "commits").mkdir()
69	(muse / "objects").mkdir()
70	return tmp_path, repo_id
71
72
73	def _make_commit(
74	root: pathlib.Path,
75	repo_id: str,
76	*,
77	branch: str = "main",
78	message: str = "commit",
79	parent_id: str \| None = None,
80	) -> str:
81	"""Write a synthetic commit and return its commit_id."""
82	manifest: Manifest = {}
83	snap_id = hash_snapshot(manifest)
84	committed_at = datetime.datetime.now(datetime.timezone.utc)
85	commit_id = hash_commit( parent_ids=[parent_id] if parent_id else [],
86	snapshot_id=snap_id,
87	message=message,
88	committed_at_iso=committed_at.isoformat(),
89	)
90	snap = SnapshotRecord(
91	snapshot_id=snap_id,
92	manifest={},
93	created_at=committed_at,
94	)
95	write_snapshot(root, snap)
96	commit = CommitRecord(
97	commit_id=commit_id,
98	parent_commit_id=parent_id,
99	parent2_commit_id=None,
100	snapshot_id=snap_id,
101	branch=branch,
102	message=message,
103	committed_at=committed_at,
104	)
105	write_commit(root, commit)
106	branch_ref = ref_path(root, branch)
107	branch_ref.write_text(commit_id)
108	(head_path(root)).write_text(f"ref: refs/heads/{branch}")
109	return commit_id
110
111
112	def _build_chain(root: pathlib.Path, repo_id: str, n: int) -> list[str]:
113	"""Create n commits (linear chain) and return their IDs oldest-first."""
114	ids: list[str] = []
115	parent: str \| None = None
116	for i in range(n):
117	cid = _make_commit(root, repo_id, message=f"commit {i}", parent_id=parent)
118	ids.append(cid)
119	parent = cid
120	return ids
121
122
123	def _invoke(root: pathlib.Path, args: list[str]) -> InvokeResult:
124	return runner.invoke(None, args, env={"MUSE_REPO_ROOT": str(root)})
125
126
127	def _json_blob(output: str) -> str:
128	"""Extract the first complete JSON object/array from mixed output.
129
130	Handles both compact (single-line) and pretty-printed (multi-line) JSON.
131	Falls back to line-by-line extraction for NDJSON streams.
132	"""
133	stripped = output.strip()
134	# Fast path: try the whole output (works for pretty-printed single objects)
135	try:
136	json.loads(stripped)
137	return stripped
138	except json.JSONDecodeError:
139	pass
140	# Fallback: find the first JSON line (NDJSON or compact output mixed with text)
141	for line in output.splitlines():
142	s = line.strip()
143	if s.startswith("{") or s.startswith("["):
144	return s
145	return stripped
146
147
148	# ---------------------------------------------------------------------------
149	# Typed schema helpers
150	# ---------------------------------------------------------------------------
151
152
153	class _StepJson(TypedDict):
154	done: bool
155	first_bad: str \| None
156	next_to_test: str \| None
157	remaining_count: int
158	steps_remaining: int
159	verdict: str
160	symbol_changes: list[str]
161
162
163	class _LogEntryJson(TypedDict):
164	commit_id: str
165	verdict: str
166	timestamp: str
167
168
169	class _LogJson(TypedDict):
170	active: bool
171	entries: list[_LogEntryJson]
172
173
174	class _ResetJson(TypedDict):
175	reset: bool
176
177
178	class _RunStepJson(TypedDict):
179	step: int
180	testing: str
181	verdict: str
182	remaining_count: int
183	done: bool
184	symbol_changes: list[str]
185
186
187	class _RunDoneJson(TypedDict):
188	done: bool
189	first_bad: str \| None
190	steps_taken: int
191
192
193	def _repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
194	"""Alias for _make_repo for readability inside test methods."""
195	return _make_repo(tmp_path)
196
197
198	def _parse_step(output: str) -> _StepJson:
199	raw = json.loads(_json_blob(output))
200	assert isinstance(raw, dict)
201	done_val = raw["done"]
202	first_bad_val = raw["first_bad"]
203	next_to_test_val = raw["next_to_test"]
204	remaining_count_val = raw["remaining_count"]
205	steps_remaining_val = raw["steps_remaining"]
206	verdict_val = raw["verdict"]
207	symbol_changes_val = raw["symbol_changes"]
208	assert isinstance(done_val, bool)
209	assert first_bad_val is None or isinstance(first_bad_val, str)
210	assert next_to_test_val is None or isinstance(next_to_test_val, str)
211	assert isinstance(remaining_count_val, int)
212	assert isinstance(steps_remaining_val, int)
213	assert isinstance(verdict_val, str)
214	assert isinstance(symbol_changes_val, list)
215	return _StepJson(
216	done=done_val,
217	first_bad=first_bad_val,
218	next_to_test=next_to_test_val,
219	remaining_count=remaining_count_val,
220	steps_remaining=steps_remaining_val,
221	verdict=verdict_val,
222	symbol_changes=symbol_changes_val,
223	)
224
225
226	def _parse_log(output: str) -> _LogJson:
227	raw = json.loads(_json_blob(output))
228	assert isinstance(raw, dict)
229	active_val = raw["active"]
230	entries_val = raw["entries"]
231	assert isinstance(active_val, bool)
232	assert isinstance(entries_val, list)
233	return _LogJson(active=active_val, entries=entries_val)
234
235
236	def _parse_reset(output: str) -> _ResetJson:
237	raw = json.loads(_json_blob(output))
238	assert isinstance(raw, dict)
239	reset_val = raw["reset"]
240	assert isinstance(reset_val, bool)
241	return _ResetJson(reset=reset_val)
242
243
244	# ---------------------------------------------------------------------------
245	# Unit — _toml_escape
246	# ---------------------------------------------------------------------------
247
248
249	class TestTomlEscape:
250	def test_plain_string_unchanged(self) -> None:
251	from muse.core.bisect import _toml_escape
252
253	assert _toml_escape("feat/my-thing") == "feat/my-thing"
254
255	def test_double_quote_escaped(self) -> None:
256	from muse.core.bisect import _toml_escape
257
258	result = _toml_escape('branch"with"quotes')
259	# After escaping, no bare double-quotes remain (only \").
260	assert '\\"' in result
261
262	def test_backslash_escaped(self) -> None:
263	from muse.core.bisect import _toml_escape
264
265	result = _toml_escape("branch\\with\\backslash")
266	assert result == "branch\\\\with\\\\backslash"
267
268	def test_both_escaped(self) -> None:
269	from muse.core.bisect import _toml_escape
270
271	result = _toml_escape('malicious"; bad_id = "hacked')
272	assert '\\"' in result
273	assert "bad_id" in result # literal text preserved, just escaped
274
275
276	# ---------------------------------------------------------------------------
277	# Unit — _load_state security
278	# ---------------------------------------------------------------------------
279
280
281	class TestLoadStateSecurity:
282	def test_symlink_state_file_rejected(self, tmp_path: pathlib.Path) -> None:
283	"""A symlink at the bisect state path must be silently ignored."""
284	from muse.core.bisect import _load_state, _state_path
285
286	root, _ = _make_repo(tmp_path)
287	target = tmp_path / "real_state.toml"
288	target.write_text('bad_id = "abc"\ngood_ids = []\nskipped_ids = []\nremaining = []\nlog = []\n')
289	state_path = _state_path(root)
290	state_path.symlink_to(target)
291	result = _load_state(root)
292	assert result is None
293
294	def test_oversized_state_file_rejected(self, tmp_path: pathlib.Path) -> None:
295	"""State files exceeding _MAX_STATE_BYTES must be rejected."""
296	from muse.core.bisect import _MAX_STATE_BYTES, _load_state, _state_path
297
298	root, _ = _make_repo(tmp_path)
299	state_path = _state_path(root)
300	huge = "x" * (_MAX_STATE_BYTES + 1)
301	state_path.write_text(huge)
302	result = _load_state(root)
303	assert result is None
304
305	def test_corrupt_state_returns_none(self, tmp_path: pathlib.Path) -> None:
306	from muse.core.bisect import _load_state, _state_path
307
308	root, _ = _make_repo(tmp_path)
309	state_path = _state_path(root)
310	state_path.write_text("not valid toml ]] [[[ !!!")
311	result = _load_state(root)
312	assert result is None
313
314	def test_missing_state_returns_none(self, tmp_path: pathlib.Path) -> None:
315	from muse.core.bisect import _load_state
316
317	root, _ = _make_repo(tmp_path)
318	result = _load_state(root)
319	assert result is None
320
321
322	# ---------------------------------------------------------------------------
323	# Unit — _save_state TOML injection
324	# ---------------------------------------------------------------------------
325
326
327	class TestSaveStateTomlInjection:
328	def test_branch_with_quote_survives_roundtrip(self, tmp_path: pathlib.Path) -> None:
329	"""A branch name containing a double-quote must not corrupt the state file."""
330	from muse.core.bisect import BisectStateDict, _load_state, _save_state
331
332	root, _ = _make_repo(tmp_path)
333	state: BisectStateDict = {
334	"bad_id": "a" * 64,
335	"good_ids": ["b" * 64],
336	"skipped_ids": [],
337	"remaining": [],
338	"log": [],
339	"branch": 'malicious"; bad_id = "injected',
340	}
341	_save_state(root, state)
342	loaded = _load_state(root)
343	assert loaded is not None
344	assert loaded.get("bad_id") == "a" * 64
345	assert loaded.get("branch") == 'malicious"; bad_id = "injected'
346
347	def test_branch_with_backslash_survives_roundtrip(self, tmp_path: pathlib.Path) -> None:
348	from muse.core.bisect import BisectStateDict, _load_state, _save_state
349
350	root, _ = _make_repo(tmp_path)
351	state: BisectStateDict = {
352	"bad_id": "c" * 64,
353	"good_ids": ["d" * 64],
354	"skipped_ids": [],
355	"remaining": [],
356	"log": [],
357	"branch": "feat\\\\weird",
358	}
359	_save_state(root, state)
360	loaded = _load_state(root)
361	assert loaded is not None
362	assert loaded.get("branch") == "feat\\\\weird"
363
364	def test_symbol_filter_injection_survives_roundtrip(self, tmp_path: pathlib.Path) -> None:
365	from muse.core.bisect import BisectStateDict, _load_state, _save_state
366
367	root, _ = _make_repo(tmp_path)
368	state: BisectStateDict = {
369	"bad_id": "e" * 64,
370	"good_ids": ["f" * 64],
371	"skipped_ids": [],
372	"remaining": [],
373	"log": [],
374	"symbol_filter": 'billing.py::Invoice"; bad_id = "EVIL',
375	}
376	_save_state(root, state)
377	loaded = _load_state(root)
378	assert loaded is not None
379	assert loaded.get("bad_id") == "e" * 64
380	assert loaded.get("symbol_filter") == 'billing.py::Invoice"; bad_id = "EVIL'
381
382
383	# ---------------------------------------------------------------------------
384	# Unit — get_bisect_next public API
385	# ---------------------------------------------------------------------------
386
387
388	class TestGetBisectNext:
389	def test_no_session_returns_none(self, tmp_path: pathlib.Path) -> None:
390	from muse.core.bisect import get_bisect_next
391
392	root, _ = _make_repo(tmp_path)
393	nxt, sf = get_bisect_next(root)
394	assert nxt is None
395	assert sf == ""
396
397	def test_returns_next_after_start(self, tmp_path: pathlib.Path) -> None:
398	from muse.core.bisect import get_bisect_next, start_bisect
399
400	root, repo_id = _make_repo(tmp_path)
401	ids = _build_chain(root, repo_id, 5)
402	start_bisect(root, ids[-1], [ids[0]])
403	nxt, sf = get_bisect_next(root)
404	assert nxt is not None
405	assert nxt in ids
406	assert sf == ""
407
408	def test_returns_symbol_filter(self, tmp_path: pathlib.Path) -> None:
409	from muse.core.bisect import get_bisect_next, start_bisect
410
411	root, repo_id = _make_repo(tmp_path)
412	ids = _build_chain(root, repo_id, 4)
413	# No commits touch this symbol, so remaining will be empty.
414	start_bisect(root, ids[-1], [ids[0]], symbol_filter="no_file.py::NoSymbol")
415	nxt, sf = get_bisect_next(root)
416	# Symbol filter is preserved regardless of whether next exists.
417	assert sf == "no_file.py::NoSymbol"
418
419
420	# ---------------------------------------------------------------------------
421	# Security — CLI error routing
422	# ---------------------------------------------------------------------------
423
424
425	class TestErrorRouting:
426	def test_bad_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
427	root, _ = _make_repo(tmp_path)
428	result = _invoke(root, ["bisect", "bad"])
429	assert result.exit_code != 0
430	assert "No bisect session" in (result.stderr or result.output)
431
432	def test_good_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
433	root, _ = _make_repo(tmp_path)
434	result = _invoke(root, ["bisect", "good"])
435	assert result.exit_code != 0
436	assert "No bisect session" in (result.stderr or result.output)
437
438	def test_skip_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
439	root, _ = _make_repo(tmp_path)
440	result = _invoke(root, ["bisect", "skip"])
441	assert result.exit_code != 0
442	assert "No bisect session" in (result.stderr or result.output)
443
444	def test_run_without_session_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
445	root, _ = _make_repo(tmp_path)
446	result = _invoke(root, ["bisect", "run", "true"])
447	assert result.exit_code != 0
448	assert "No bisect session" in (result.stderr or result.output)
449
450	def test_symbol_without_double_colon_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
451	root, repo_id = _make_repo(tmp_path)
452	ids = _build_chain(root, repo_id, 2)
453	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "no_colon_here"])
454	assert result.exit_code != 0
455	assert "❌" in (result.stderr or result.output)
456
457	def test_symbol_too_long_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
458	root, repo_id = _make_repo(tmp_path)
459	ids = _build_chain(root, repo_id, 2)
460	long_sym = f"f.py::{'x' * 600}"
461	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", long_sym])
462	assert result.exit_code != 0
463	assert "too long" in (result.stderr or result.output)
464
465	def test_double_start_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
466	root, repo_id = _make_repo(tmp_path)
467	ids = _build_chain(root, repo_id, 3)
468	r1 = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
469	assert r1.exit_code == 0
470	r2 = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
471	assert r2.exit_code != 0
472	assert "already active" in (r2.stderr or r2.output)
473
474
475	# ---------------------------------------------------------------------------
476	# Security — ANSI sanitization in outputs
477	# ---------------------------------------------------------------------------
478
479
480	class TestAnsiSanitization:
481	def test_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None:
482	root, repo_id = _make_repo(tmp_path)
483	ids = _build_chain(root, repo_id, 2)
484	ansi_ref = "\x1b[31mHEAD\x1b[0m"
485	result = _invoke(root, ["bisect", "start", "--bad", ansi_ref, "--good", ids[0]])
486	assert _ANSI_RE.search(result.output) is None
487
488	def test_ansi_in_symbol_does_not_leak(self, tmp_path: pathlib.Path) -> None:
489	root, repo_id = _make_repo(tmp_path)
490	ids = _build_chain(root, repo_id, 2)
491	sym = "\x1b[31mfoo.py::Bar\x1b[0m"
492	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", sym])
493	assert _ANSI_RE.search(result.output) is None
494
495
496	# ---------------------------------------------------------------------------
497	# JSON schema — start
498	# ---------------------------------------------------------------------------
499
500
501	class TestJsonSchemaStart:
502	def test_start_json_schema(self, tmp_path: pathlib.Path) -> None:
503	root, repo_id = _make_repo(tmp_path)
504	ids = _build_chain(root, repo_id, 5)
505	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
506	assert result.exit_code == 0
507	parsed = _parse_step(result.output)
508	assert parsed["verdict"] == "started"
509	assert isinstance(parsed["done"], bool)
510	assert isinstance(parsed["remaining_count"], int)
511	assert parsed["remaining_count"] >= 0
512
513	def test_start_json_done_when_no_remaining(self, tmp_path: pathlib.Path) -> None:
514	"""When bad and good are adjacent, start should report done=True immediately."""
515	root, repo_id = _make_repo(tmp_path)
516	ids = _build_chain(root, repo_id, 2)
517	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
518	assert result.exit_code == 0
519	parsed = _parse_step(result.output)
520	assert parsed["done"] is True
521	assert parsed["first_bad"] == ids[-1]
522
523	def test_start_json_symbol_changes_list(self, tmp_path: pathlib.Path) -> None:
524	root, repo_id = _make_repo(tmp_path)
525	ids = _build_chain(root, repo_id, 4)
526	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
527	parsed = _parse_step(result.output)
528	assert isinstance(parsed["symbol_changes"], list)
529
530
531	# ---------------------------------------------------------------------------
532	# JSON schema — bad / good / skip
533	# ---------------------------------------------------------------------------
534
535
536	class TestJsonSchemaBadGoodSkip:
537	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
538	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
539	assert r.exit_code == 0
540
541	def test_bad_json_schema(self, tmp_path: pathlib.Path) -> None:
542	root, repo_id = _make_repo(tmp_path)
543	ids = _build_chain(root, repo_id, 5)
544	self._start(root, ids)
545	midpoint = ids[len(ids) // 2]
546	result = _invoke(root, ["bisect", "bad", midpoint, "--json"])
547	assert result.exit_code == 0
548	parsed = _parse_step(result.output)
549	assert parsed["verdict"] == "bad"
550
551	def test_good_json_schema(self, tmp_path: pathlib.Path) -> None:
552	root, repo_id = _make_repo(tmp_path)
553	ids = _build_chain(root, repo_id, 5)
554	self._start(root, ids)
555	midpoint = ids[len(ids) // 2]
556	result = _invoke(root, ["bisect", "good", midpoint, "--json"])
557	assert result.exit_code == 0
558	parsed = _parse_step(result.output)
559	assert parsed["verdict"] == "good"
560
561	def test_skip_json_schema(self, tmp_path: pathlib.Path) -> None:
562	root, repo_id = _make_repo(tmp_path)
563	ids = _build_chain(root, repo_id, 5)
564	self._start(root, ids)
565	midpoint = ids[len(ids) // 2]
566	result = _invoke(root, ["bisect", "skip", midpoint, "--json"])
567	assert result.exit_code == 0
568	parsed = _parse_step(result.output)
569	assert parsed["verdict"] == "skip"
570
571
572	# ---------------------------------------------------------------------------
573	# JSON schema — log
574	# ---------------------------------------------------------------------------
575
576
577	class TestJsonSchemaLog:
578	def test_log_json_no_session(self, tmp_path: pathlib.Path) -> None:
579	root, _ = _make_repo(tmp_path)
580	result = _invoke(root, ["bisect", "log", "--json"])
581	assert result.exit_code == 0
582	parsed = _parse_log(result.output)
583	assert parsed["active"] is False
584	assert parsed["entries"] == []
585
586	def test_log_json_after_start(self, tmp_path: pathlib.Path) -> None:
587	root, repo_id = _make_repo(tmp_path)
588	ids = _build_chain(root, repo_id, 4)
589	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
590	result = _invoke(root, ["bisect", "log", "--json"])
591	assert result.exit_code == 0
592	parsed = _parse_log(result.output)
593	assert parsed["active"] is True
594	assert len(parsed["entries"]) >= 2
595
596	def test_log_json_entries_are_dicts(self, tmp_path: pathlib.Path) -> None:
597	root, repo_id = _make_repo(tmp_path)
598	ids = _build_chain(root, repo_id, 3)
599	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
600	result = _invoke(root, ["bisect", "log", "--json"])
601	parsed = _parse_log(result.output)
602	for entry in parsed["entries"]:
603	assert isinstance(entry, dict)
604	assert set(entry.keys()) == {"commit_id", "verdict", "timestamp"}
605
606
607	# ---------------------------------------------------------------------------
608	# JSON schema — reset
609	# ---------------------------------------------------------------------------
610
611
612	class TestJsonSchemaReset:
613	def test_reset_json_no_session(self, tmp_path: pathlib.Path) -> None:
614	root, _ = _make_repo(tmp_path)
615	result = _invoke(root, ["bisect", "reset", "--json"])
616	assert result.exit_code == 0
617	parsed = _parse_reset(result.output)
618	assert parsed["reset"] is True
619
620	def test_reset_json_with_session(self, tmp_path: pathlib.Path) -> None:
621	root, repo_id = _make_repo(tmp_path)
622	ids = _build_chain(root, repo_id, 3)
623	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
624	result = _invoke(root, ["bisect", "reset", "--json"])
625	assert result.exit_code == 0
626	parsed = _parse_reset(result.output)
627	assert parsed["reset"] is True
628
629	def test_reset_clears_active_flag(self, tmp_path: pathlib.Path) -> None:
630	root, repo_id = _make_repo(tmp_path)
631	ids = _build_chain(root, repo_id, 3)
632	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
633	_invoke(root, ["bisect", "reset", "--json"])
634	log_result = _invoke(root, ["bisect", "log", "--json"])
635	parsed = _parse_log(log_result.output)
636	assert parsed["active"] is False
637
638
639	# ---------------------------------------------------------------------------
640	# JSON schema — run (NDJSON)
641	# ---------------------------------------------------------------------------
642
643
644	class TestJsonSchemaRun:
645	def test_run_json_ndjson_format(self, tmp_path: pathlib.Path) -> None:
646	"""``bisect run --json`` should emit valid NDJSON."""
647	root, repo_id = _make_repo(tmp_path)
648	ids = _build_chain(root, repo_id, 6)
649	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
650	result = _invoke(root, ["bisect", "run", "true", "--json"])
651	assert result.exit_code == 0
652	lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()]
653	assert len(lines) >= 1
654	for raw_line in lines[:-1]:
655	step_raw = json.loads(raw_line)
656	assert "step" in step_raw
657	assert "verdict" in step_raw
658	assert "testing" in step_raw
659	assert "remaining_count" in step_raw
660	assert "done" in step_raw
661	done_raw = json.loads(lines[-1])
662	done_val = done_raw["done"]
663	assert isinstance(done_val, bool)
664	steps_taken_val = done_raw["steps_taken"]
665	assert isinstance(steps_taken_val, int)
666
667	def test_run_json_done_has_first_bad(self, tmp_path: pathlib.Path) -> None:
668	"""With always-good command, first_bad on the done line should be set."""
669	root, repo_id = _make_repo(tmp_path)
670	ids = _build_chain(root, repo_id, 4)
671	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
672	result = _invoke(root, ["bisect", "run", "true", "--json"])
673	assert result.exit_code == 0
674	lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()]
675	done_raw = json.loads(lines[-1])
676	done_val = done_raw["done"]
677	first_bad_val = done_raw["first_bad"]
678	if done_val:
679	assert first_bad_val is not None
680
681	def test_run_json_steps_taken_increments(self, tmp_path: pathlib.Path) -> None:
682	root, repo_id = _make_repo(tmp_path)
683	ids = _build_chain(root, repo_id, 8)
684	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
685	result = _invoke(root, ["bisect", "run", "true", "--json"])
686	lines = [ln.strip() for ln in result.output.strip().splitlines() if ln.strip()]
687	done_raw = json.loads(lines[-1])
688	steps_taken = done_raw["steps_taken"]
689	assert steps_taken >= 1
690
691
692	# ---------------------------------------------------------------------------
693	# Integration — session lifecycle with --json
694	# ---------------------------------------------------------------------------
695
696
697	class TestIntegrationJson:
698	def test_start_bad_good_converge(self, tmp_path: pathlib.Path) -> None:
699	"""A manual bisect session with --json converges to a first_bad."""
700	root, repo_id = _make_repo(tmp_path)
701	ids = _build_chain(root, repo_id, 7)
702	r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
703	assert r_start.exit_code == 0
704	step = _parse_step(r_start.output)
705	if step["done"]:
706	assert step["first_bad"] is not None
707	return
708	for _ in range(20):
709	nxt = step["next_to_test"]
710	assert nxt is not None
711	r = _invoke(root, ["bisect", "bad", nxt, "--json"])
712	assert r.exit_code == 0
713	step = _parse_step(r.output)
714	if step["done"]:
715	assert step["first_bad"] is not None
716	return
717	pytest.fail("Bisect did not converge within 20 steps")
718
719	def test_good_narrows_range(self, tmp_path: pathlib.Path) -> None:
720	root, repo_id = _make_repo(tmp_path)
721	ids = _build_chain(root, repo_id, 8)
722	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
723	midpoint = ids[len(ids) // 2]
724	r_good = _invoke(root, ["bisect", "good", midpoint, "--json"])
725	step = _parse_step(r_good.output)
726	if not step["done"]:
727	assert step["remaining_count"] < len(ids) - 2
728
729	def test_log_grows_with_verdicts(self, tmp_path: pathlib.Path) -> None:
730	root, repo_id = _make_repo(tmp_path)
731	ids = _build_chain(root, repo_id, 5)
732	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
733	midpoint = ids[len(ids) // 2]
734	_invoke(root, ["bisect", "bad", midpoint])
735	r_log = _invoke(root, ["bisect", "log", "--json"])
736	parsed = _parse_log(r_log.output)
737	# start logs 2 entries (bad+good); bad adds 1 more → at least 3.
738	assert len(parsed["entries"]) >= 3
739
740	def test_skip_excluded_from_remaining(self, tmp_path: pathlib.Path) -> None:
741	root, repo_id = _make_repo(tmp_path)
742	ids = _build_chain(root, repo_id, 6)
743	r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
744	step_start = _parse_step(r_start.output)
745	if step_start["done"]:
746	return
747	nxt = step_start["next_to_test"]
748	assert nxt is not None
749	r_skip = _invoke(root, ["bisect", "skip", nxt, "--json"])
750	step_skip = _parse_step(r_skip.output)
751	if not step_skip["done"]:
752	assert step_skip["next_to_test"] != nxt
753
754
755	# ---------------------------------------------------------------------------
756	# E2E — text (non-JSON) output still works
757	# ---------------------------------------------------------------------------
758
759
760	class TestE2EText:
761	def test_start_text_output_no_json(self, tmp_path: pathlib.Path) -> None:
762	root, repo_id = _make_repo(tmp_path)
763	ids = _build_chain(root, repo_id, 4)
764	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
765	assert result.exit_code == 0
766	assert "Bisect session started" in result.output or "First bad commit" in result.output
767
768	def test_bad_text_output(self, tmp_path: pathlib.Path) -> None:
769	root, repo_id = _make_repo(tmp_path)
770	ids = _build_chain(root, repo_id, 4)
771	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
772	midpoint = ids[len(ids) // 2]
773	result = _invoke(root, ["bisect", "bad", midpoint])
774	assert result.exit_code == 0
775	assert "bad" in result.output.lower()
776
777	def test_log_text_shows_entries(self, tmp_path: pathlib.Path) -> None:
778	root, repo_id = _make_repo(tmp_path)
779	ids = _build_chain(root, repo_id, 3)
780	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
781	result = _invoke(root, ["bisect", "log"])
782	assert result.exit_code == 0
783	assert "Bisect log" in result.output
784
785	def test_reset_text_output(self, tmp_path: pathlib.Path) -> None:
786	root, repo_id = _make_repo(tmp_path)
787	ids = _build_chain(root, repo_id, 2)
788	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
789	result = _invoke(root, ["bisect", "reset"])
790	assert result.exit_code == 0
791	assert "reset" in result.output.lower()
792
793	def test_run_text_output_converges(self, tmp_path: pathlib.Path) -> None:
794	root, repo_id = _make_repo(tmp_path)
795	ids = _build_chain(root, repo_id, 5)
796	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
797	result = _invoke(root, ["bisect", "run", "true"])
798	assert result.exit_code == 0
799	assert "First bad commit" in result.output or "Bisect complete" in result.output
800
801	def test_no_good_flag_fails_clearly(self, tmp_path: pathlib.Path) -> None:
802	root, repo_id = _make_repo(tmp_path)
803	ids = _build_chain(root, repo_id, 2)
804	result = _invoke(root, ["bisect", "start", "--bad", ids[-1]])
805	assert result.exit_code != 0
806
807	def test_log_empty_when_no_session(self, tmp_path: pathlib.Path) -> None:
808	root, _ = _make_repo(tmp_path)
809	result = _invoke(root, ["bisect", "log"])
810	assert result.exit_code == 0
811	assert "No bisect log" in result.output
812
813
814	# ---------------------------------------------------------------------------
815	# E2E — symbol-scoped bisect
816	# ---------------------------------------------------------------------------
817
818
819	class TestSymbolScopedBisect:
820	def test_symbol_filter_no_matching_commits_warns(self, tmp_path: pathlib.Path) -> None:
821	root, repo_id = _make_repo(tmp_path)
822	ids = _build_chain(root, repo_id, 4)
823	result = _invoke(
824	root,
825	[
826	"bisect", "start",
827	"--bad", ids[-1],
828	"--good", ids[0],
829	"--symbol", "ghost.py::GhostFunc",
830	],
831	)
832	assert result.exit_code == 0
833	combined = result.output + (result.stderr or "")
834	assert "No commits" in combined or "First bad" in combined
835
836	def test_symbol_filter_json_schema_preserved(self, tmp_path: pathlib.Path) -> None:
837	root, repo_id = _make_repo(tmp_path)
838	ids = _build_chain(root, repo_id, 5)
839	result = _invoke(
840	root,
841	[
842	"bisect", "start",
843	"--bad", ids[-1],
844	"--good", ids[0],
845	"--symbol", "ghost.py::GhostFunc",
846	"--json",
847	],
848	)
849	assert result.exit_code == 0
850	parsed = _parse_step(result.output)
851	assert isinstance(parsed["symbol_changes"], list)
852
853	def test_symbol_filter_state_persisted(self, tmp_path: pathlib.Path) -> None:
854	"""After start with --symbol, the symbol_filter must survive state reload."""
855	from muse.core.bisect import _load_state
856
857	root, repo_id = _make_repo(tmp_path)
858	ids = _build_chain(root, repo_id, 4)
859	_invoke(
860	root,
861	[
862	"bisect", "start",
863	"--bad", ids[-1],
864	"--good", ids[0],
865	"--symbol", "billing.py::Invoice",
866	],
867	)
868	state = _load_state(root)
869	assert state is not None
870	assert state.get("symbol_filter") == "billing.py::Invoice"
871
872
873	# ---------------------------------------------------------------------------
874	# Stress — large commit chains
875	# ---------------------------------------------------------------------------
876
877
878	class TestStress:
879	def test_200_commit_chain_converges(self, tmp_path: pathlib.Path) -> None:
880	"""Bisect over 200 commits must converge in ≤9 steps (log₂(200) ≈ 7.6)."""
881	root, repo_id = _make_repo(tmp_path)
882	ids = _build_chain(root, repo_id, 200)
883	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
884
885	steps = 0
886	for _ in range(10):
887	r = _invoke(root, ["bisect", "run", "true", "--json"])
888	assert r.exit_code == 0
889	lines = [ln.strip() for ln in r.output.strip().splitlines() if ln.strip()]
890	if lines:
891	done_raw = json.loads(lines[-1])
892	if done_raw.get("done"):
893	steps = done_raw.get("steps_taken", 0)
894	break
895	else:
896	pytest.fail("Bisect did not terminate within 10 run invocations")
897	assert steps <= 9, f"Expected ≤9 steps for 200 commits, got {steps}"
898
899	def test_concurrent_log_reads_are_safe(self, tmp_path: pathlib.Path) -> None:
900	"""Concurrent reads of bisect log must not crash."""
901	root, repo_id = _make_repo(tmp_path)
902	ids = _build_chain(root, repo_id, 10)
903	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
904
905	errors: list[str] = []
906
907	def _read_log() -> None:
908	from muse.core.bisect import get_bisect_log
909	try:
910	entries = get_bisect_log(root)
911	assert isinstance(entries, list)
912	except Exception as exc:
913	errors.append(str(exc))
914
915	threads = [threading.Thread(target=_read_log) for _ in range(20)]
916	for t in threads:
917	t.start()
918	for t in threads:
919	t.join()
920
921	assert not errors, f"Concurrent read failures: {errors}"
922
923	def test_50_step_manual_bisect_json(self, tmp_path: pathlib.Path) -> None:
924	"""50 mark_bad calls on a 100-commit chain must all emit valid JSON."""
925	root, repo_id = _make_repo(tmp_path)
926	ids = _build_chain(root, repo_id, 100)
927	r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
928	assert r_start.exit_code == 0
929	step = _parse_step(r_start.output)
930
931	for _ in range(50):
932	if step["done"]:
933	assert step["first_bad"] is not None
934	return
935	nxt = step["next_to_test"]
936	assert nxt is not None
937	r = _invoke(root, ["bisect", "bad", nxt, "--json"])
938	assert r.exit_code == 0
939	step = _parse_step(r.output)
940
941	assert step["done"] is True
942
943
944	# ---------------------------------------------------------------------------
945	# bisect start — Extended, Security, Stress
946	# ---------------------------------------------------------------------------
947
948
949	class TestBisectStartExtended:
950	"""Extended unit / integration / e2e tests for muse bisect start."""
951
952	def test_start_exits_0(self, tmp_path: pathlib.Path) -> None:
953	root, repo_id = _make_repo(tmp_path)
954	ids = _build_chain(root, repo_id, 5)
955	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
956	assert result.exit_code == 0
957
958	def test_start_j_alias_works(self, tmp_path: pathlib.Path) -> None:
959	"""-j is an accepted alias for --json."""
960	root, repo_id = _make_repo(tmp_path)
961	ids = _build_chain(root, repo_id, 5)
962	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "-j"])
963	assert result.exit_code == 0
964	parsed = _parse_step(result.output)
965	assert parsed["verdict"] == "started"
966
967	def test_start_json_verdict_is_started(self, tmp_path: pathlib.Path) -> None:
968	root, repo_id = _make_repo(tmp_path)
969	ids = _build_chain(root, repo_id, 5)
970	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
971	assert result.exit_code == 0
972	assert _parse_step(result.output)["verdict"] == "started"
973
974	def test_start_json_done_false_with_remaining(self, tmp_path: pathlib.Path) -> None:
975	root, repo_id = _make_repo(tmp_path)
976	ids = _build_chain(root, repo_id, 5)
977	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
978	assert result.exit_code == 0
979	parsed = _parse_step(result.output)
980	assert parsed["done"] is False
981	assert parsed["next_to_test"] is not None
982
983	def test_start_json_done_true_when_adjacent(self, tmp_path: pathlib.Path) -> None:
984	root, repo_id = _make_repo(tmp_path)
985	ids = _build_chain(root, repo_id, 2)
986	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
987	assert result.exit_code == 0
988	parsed = _parse_step(result.output)
989	assert parsed["done"] is True
990	assert parsed["first_bad"] == ids[-1]
991
992	def test_start_json_remaining_count_positive(self, tmp_path: pathlib.Path) -> None:
993	root, repo_id = _make_repo(tmp_path)
994	ids = _build_chain(root, repo_id, 8)
995	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
996	assert result.exit_code == 0
997	assert _parse_step(result.output)["remaining_count"] > 0
998
999	def test_start_json_steps_remaining_positive(self, tmp_path: pathlib.Path) -> None:
1000	root, repo_id = _make_repo(tmp_path)
1001	ids = _build_chain(root, repo_id, 8)
1002	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1003	assert result.exit_code == 0
1004	assert _parse_step(result.output)["steps_remaining"] > 0
1005
1006	def test_start_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
1007	root, repo_id = _make_repo(tmp_path)
1008	ids = _build_chain(root, repo_id, 5)
1009	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1010	assert result.exit_code == 0
1011	d = json.loads(_json_blob(result.output))
1012	assert {"done", "first_bad", "next_to_test", "remaining_count",
1013	"steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())
1014
1015	def test_start_multiple_good_refs(self, tmp_path: pathlib.Path) -> None:
1016	root, repo_id = _make_repo(tmp_path)
1017	ids = _build_chain(root, repo_id, 6)
1018	result = _invoke(
1019	root,
1020	["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--good", ids[1], "--json"],
1021	)
1022	assert result.exit_code == 0
1023	assert _parse_step(result.output)["verdict"] == "started"
1024
1025	def test_start_no_good_exits_1(self, tmp_path: pathlib.Path) -> None:
1026	root, repo_id = _make_repo(tmp_path)
1027	ids = _build_chain(root, repo_id, 3)
1028	result = _invoke(root, ["bisect", "start", "--bad", ids[-1]])
1029	assert result.exit_code == 1
1030
1031	def test_start_no_good_error_to_stderr(self, tmp_path: pathlib.Path) -> None:
1032	root, repo_id = _make_repo(tmp_path)
1033	ids = _build_chain(root, repo_id, 3)
1034	result = _invoke(root, ["bisect", "start", "--bad", ids[-1]])
1035	assert result.exit_code != 0
1036	combined = result.output + (result.stderr or "")
1037	assert "good" in combined.lower()
1038
1039	def test_start_double_start_exits_1(self, tmp_path: pathlib.Path) -> None:
1040	root, repo_id = _make_repo(tmp_path)
1041	ids = _build_chain(root, repo_id, 5)
1042	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1043	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1044	assert result.exit_code == 1
1045
1046	def test_start_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
1047	empty = tmp_path / "not_a_repo"
1048	empty.mkdir()
1049	result = _invoke(empty, ["bisect", "start", "--bad", "abc", "--good", "def"])
1050	assert result.exit_code == 2
1051
1052	def test_start_bad_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
1053	root, repo_id = _make_repo(tmp_path)
1054	ids = _build_chain(root, repo_id, 4)
1055	# HEAD points to ids[-1]; omit --bad
1056	result = _invoke(root, ["bisect", "start", "--good", ids[0], "--json"])
1057	assert result.exit_code == 0
1058
1059	def test_start_text_mentions_session_started(self, tmp_path: pathlib.Path) -> None:
1060	root, repo_id = _make_repo(tmp_path)
1061	ids = _build_chain(root, repo_id, 5)
1062	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1063	assert result.exit_code == 0
1064	assert "Bisect session started" in result.output or "First bad commit" in result.output
1065
1066	def test_start_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
1067	root, repo_id = _make_repo(tmp_path)
1068	ids = _build_chain(root, repo_id, 5)
1069	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1070	assert result.exit_code == 0
1071	assert not result.output.strip().startswith("{")
1072
1073	def test_start_help_description_present(self, tmp_path: pathlib.Path) -> None:
1074	root, _ = _make_repo(tmp_path)
1075	result = _invoke(root, ["bisect", "start", "--help"])
1076	assert "Agent quickstart" in result.output or "binary" in result.output.lower()
1077
1078	def test_start_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
1079	root, repo_id = _make_repo(tmp_path)
1080	ids = _build_chain(root, repo_id, 3)
1081	result = _invoke(root, ["bisect", "start", "--bad", "nonexistent_ref_abc123", "--good", ids[0]])
1082	assert result.exit_code == 1
1083
1084
1085	class TestBisectStartSecurity:
1086	"""Security hardening tests for muse bisect start."""
1087
1088	def test_start_symbol_changes_no_ansi_in_json(self, tmp_path: pathlib.Path) -> None:
1089	"""symbol_changes entries are sanitized in JSON output."""
1090	from unittest.mock import patch
1091	from muse.core.bisect import BisectResult
1092	root, repo_id = _make_repo(tmp_path)
1093	ids = _build_chain(root, repo_id, 5)
1094	injected = BisectResult(
1095	done=False,
1096	first_bad=None,
1097	next_to_test=ids[2],
1098	remaining_count=3,
1099	steps_remaining=2,
1100	verdict="started",
1101	symbol_changes=["add Invoice.compute\x1b[31mred\x1b[0m"],
1102	)
1103	with patch("muse.cli.commands.bisect.start_bisect", return_value=injected):
1104	result = _invoke(
1105	root,
1106	["bisect", "start", "--bad", ids[-1], "--good", ids[0],
1107	"--symbol", "billing.py::Invoice", "--json"],
1108	)
1109	assert result.exit_code == 0
1110	assert "\x1b" not in result.output
1111
1112	def test_start_symbol_changes_no_ansi_in_text(self, tmp_path: pathlib.Path) -> None:
1113	"""symbol_changes entries are sanitized in text output."""
1114	from unittest.mock import patch
1115	from muse.core.bisect import BisectResult
1116	root, repo_id = _make_repo(tmp_path)
1117	ids = _build_chain(root, repo_id, 5)
1118	injected = BisectResult(
1119	done=False,
1120	first_bad=None,
1121	next_to_test=ids[2],
1122	remaining_count=3,
1123	steps_remaining=2,
1124	verdict="started",
1125	symbol_changes=["add Invoice.compute\x1b[31mred\x1b[0m"],
1126	)
1127	with patch("muse.cli.commands.bisect.start_bisect", return_value=injected):
1128	result = _invoke(
1129	root,
1130	["bisect", "start", "--bad", ids[-1], "--good", ids[0],
1131	"--symbol", "billing.py::Invoice"],
1132	)
1133	assert result.exit_code == 0
1134	assert "\x1b" not in result.output
1135
1136	def test_start_symbol_missing_separator_exits_1(self, tmp_path: pathlib.Path) -> None:
1137	"""--symbol without '::' separator is rejected."""
1138	root, repo_id = _make_repo(tmp_path)
1139	ids = _build_chain(root, repo_id, 3)
1140	result = _invoke(
1141	root,
1142	["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", "NoSeparator"],
1143	)
1144	assert result.exit_code == 1
1145
1146	def test_start_symbol_too_long_exits_1(self, tmp_path: pathlib.Path) -> None:
1147	"""--symbol exceeding max length is rejected."""
1148	root, repo_id = _make_repo(tmp_path)
1149	ids = _build_chain(root, repo_id, 3)
1150	long_sym = "a" * 510 + "::b"
1151	result = _invoke(
1152	root,
1153	["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--symbol", long_sym],
1154	)
1155	assert result.exit_code == 1
1156
1157	def test_start_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
1158	"""JSON output is well-formed."""
1159	root, repo_id = _make_repo(tmp_path)
1160	ids = _build_chain(root, repo_id, 5)
1161	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1162	assert result.exit_code == 0
1163	d = json.loads(_json_blob(result.output))
1164	assert isinstance(d, dict)
1165
1166	def test_start_json_bool_fields_are_bool(self, tmp_path: pathlib.Path) -> None:
1167	"""done field is always a bool, never int or string."""
1168	root, repo_id = _make_repo(tmp_path)
1169	ids = _build_chain(root, repo_id, 5)
1170	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1171	assert result.exit_code == 0
1172	d = json.loads(_json_blob(result.output))
1173	assert isinstance(d["done"], bool)
1174
1175
1176	class TestBisectStartStress:
1177	"""Performance and scale tests for muse bisect start."""
1178
1179	def test_start_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
1180	"""Start over a 100-commit chain exits 0 and returns a midpoint."""
1181	root, repo_id = _make_repo(tmp_path)
1182	ids = _build_chain(root, repo_id, 100)
1183	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1184	assert result.exit_code == 0
1185	parsed = _parse_step(result.output)
1186	assert parsed["done"] is False
1187	assert parsed["remaining_count"] > 0
1188	assert parsed["next_to_test"] is not None
1189
1190	def test_start_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
1191	"""Start over 100 commits completes within 5 seconds."""
1192	import time
1193	root, repo_id = _make_repo(tmp_path)
1194	ids = _build_chain(root, repo_id, 100)
1195	t0 = time.monotonic()
1196	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1197	elapsed = time.monotonic() - t0
1198	assert result.exit_code == 0
1199	assert elapsed < 5.0, f"start over 100 commits took {elapsed:.2f}s"
1200
1201	def test_start_midpoint_is_within_range(self, tmp_path: pathlib.Path) -> None:
1202	"""The suggested midpoint falls strictly between good and bad."""
1203	root, repo_id = _make_repo(tmp_path)
1204	ids = _build_chain(root, repo_id, 20)
1205	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1206	assert result.exit_code == 0
1207	parsed = _parse_step(result.output)
1208	assert parsed["next_to_test"] not in (ids[0], ids[-1])
1209
1210
1211	# ---------------------------------------------------------------------------
1212	# bisect bad — Extended, Security, Stress
1213	# ---------------------------------------------------------------------------
1214
1215
1216	class TestBisectBadExtended:
1217	"""Extended unit / integration / e2e tests for muse bisect bad."""
1218
1219	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1220	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1221	assert r.exit_code == 0
1222
1223	def test_bad_exits_0(self, tmp_path: pathlib.Path) -> None:
1224	root, repo_id = _make_repo(tmp_path)
1225	ids = _build_chain(root, repo_id, 6)
1226	self._start(root, ids)
1227	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2]])
1228	assert result.exit_code == 0
1229
1230	def test_bad_j_alias_works(self, tmp_path: pathlib.Path) -> None:
1231	root, repo_id = _make_repo(tmp_path)
1232	ids = _build_chain(root, repo_id, 6)
1233	self._start(root, ids)
1234	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "-j"])
1235	assert result.exit_code == 0
1236	parsed = _parse_step(result.output)
1237	assert parsed["verdict"] == "bad"
1238
1239	def test_bad_json_verdict_is_bad(self, tmp_path: pathlib.Path) -> None:
1240	root, repo_id = _make_repo(tmp_path)
1241	ids = _build_chain(root, repo_id, 6)
1242	self._start(root, ids)
1243	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
1244	assert result.exit_code == 0
1245	assert _parse_step(result.output)["verdict"] == "bad"
1246
1247	def test_bad_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
1248	root, repo_id = _make_repo(tmp_path)
1249	ids = _build_chain(root, repo_id, 6)
1250	self._start(root, ids)
1251	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
1252	assert result.exit_code == 0
1253	d = json.loads(_json_blob(result.output))
1254	assert {"done", "first_bad", "next_to_test", "remaining_count",
1255	"steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())
1256
1257	def test_bad_reduces_remaining(self, tmp_path: pathlib.Path) -> None:
1258	root, repo_id = _make_repo(tmp_path)
1259	ids = _build_chain(root, repo_id, 10)
1260	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1261	before = _parse_step(r.output)["remaining_count"]
1262	mid = _parse_step(r.output)["next_to_test"]
1263	result = _invoke(root, ["bisect", "bad", mid, "--json"])
1264	assert result.exit_code == 0
1265	after = _parse_step(result.output)["remaining_count"]
1266	assert after < before
1267
1268	def test_bad_done_true_when_isolated(self, tmp_path: pathlib.Path) -> None:
1269	root, repo_id = _make_repo(tmp_path)
1270	ids = _build_chain(root, repo_id, 3)
1271	# With 3 commits: good=ids[0], bad=ids[2] → ids[1] is the only remaining
1272	self._start(root, ids)
1273	result = _invoke(root, ["bisect", "bad", ids[1], "--json"])
1274	assert result.exit_code == 0
1275	parsed = _parse_step(result.output)
1276	assert parsed["done"] is True
1277	assert parsed["first_bad"] is not None
1278
1279	def test_bad_first_bad_set_when_done(self, tmp_path: pathlib.Path) -> None:
1280	root, repo_id = _make_repo(tmp_path)
1281	ids = _build_chain(root, repo_id, 3)
1282	self._start(root, ids)
1283	result = _invoke(root, ["bisect", "bad", ids[1], "--json"])
1284	assert result.exit_code == 0
1285	parsed = _parse_step(result.output)
1286	assert parsed["done"] is True
1287	assert isinstance(parsed["first_bad"], str)
1288
1289	def test_bad_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
1290	root, repo_id = _make_repo(tmp_path)
1291	ids = _build_chain(root, repo_id, 5)
1292	self._start(root, ids)
1293	# HEAD points to ids[-1] (the known-bad); marking it bad again is valid
1294	result = _invoke(root, ["bisect", "bad", "--json"])
1295	assert result.exit_code == 0
1296
1297	def test_bad_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
1298	root, _ = _make_repo(tmp_path)
1299	result = _invoke(root, ["bisect", "bad"])
1300	assert result.exit_code == 1
1301
1302	def test_bad_no_session_error_to_stderr(self, tmp_path: pathlib.Path) -> None:
1303	root, _ = _make_repo(tmp_path)
1304	result = _invoke(root, ["bisect", "bad"])
1305	assert result.exit_code != 0
1306	combined = result.output + (result.stderr or "")
1307	assert "No bisect session" in combined
1308
1309	def test_bad_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
1310	empty = tmp_path / "not_a_repo"
1311	empty.mkdir()
1312	result = _invoke(empty, ["bisect", "bad"])
1313	assert result.exit_code == 2
1314
1315	def test_bad_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
1316	root, repo_id = _make_repo(tmp_path)
1317	ids = _build_chain(root, repo_id, 4)
1318	self._start(root, ids)
1319	result = _invoke(root, ["bisect", "bad", "deadbeef_nonexistent"])
1320	assert result.exit_code == 1
1321
1322	def test_bad_text_mentions_commit(self, tmp_path: pathlib.Path) -> None:
1323	root, repo_id = _make_repo(tmp_path)
1324	ids = _build_chain(root, repo_id, 5)
1325	self._start(root, ids)
1326	mid = ids[len(ids) // 2]
1327	result = _invoke(root, ["bisect", "bad", mid])
1328	assert result.exit_code == 0
1329	assert short_id(mid) in result.output
1330
1331	def test_bad_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
1332	root, repo_id = _make_repo(tmp_path)
1333	ids = _build_chain(root, repo_id, 5)
1334	self._start(root, ids)
1335	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2]])
1336	assert result.exit_code == 0
1337	assert not result.output.strip().startswith("{")
1338
1339	def test_bad_help_description_present(self, tmp_path: pathlib.Path) -> None:
1340	root, _ = _make_repo(tmp_path)
1341	result = _invoke(root, ["bisect", "bad", "--help"])
1342	assert "Agent quickstart" in result.output or "regression" in result.output.lower()
1343
1344	def test_bad_advances_bisect_log(self, tmp_path: pathlib.Path) -> None:
1345	"""After marking bad, the bisect log records the verdict."""
1346	from muse.core.bisect import _load_state
1347	root, repo_id = _make_repo(tmp_path)
1348	ids = _build_chain(root, repo_id, 6)
1349	self._start(root, ids)
1350	mid = ids[len(ids) // 2]
1351	_invoke(root, ["bisect", "bad", mid])
1352	state = _load_state(root)
1353	assert state is not None
1354	assert any("bad" in entry for entry in state.get("log", []))
1355
1356	def test_bad_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None:
1357	root, repo_id = _make_repo(tmp_path)
1358	ids = _build_chain(root, repo_id, 5)
1359	self._start(root, ids)
1360	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
1361	assert result.exit_code == 0
1362	assert _parse_step(result.output)["remaining_count"] >= 0
1363
1364	def test_bad_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
1365	root, repo_id = _make_repo(tmp_path)
1366	ids = _build_chain(root, repo_id, 5)
1367	self._start(root, ids)
1368	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
1369	assert result.exit_code == 0
1370	assert isinstance(_parse_step(result.output)["symbol_changes"], list)
1371
1372
1373	class TestBisectBadSecurity:
1374	"""Security hardening tests for muse bisect bad."""
1375
1376	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1377	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1378	assert r.exit_code == 0
1379
1380	def test_bad_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
1381	root, repo_id = _make_repo(tmp_path)
1382	ids = _build_chain(root, repo_id, 5)
1383	self._start(root, ids)
1384	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
1385	assert result.exit_code == 0
1386	d = json.loads(_json_blob(result.output))
1387	assert isinstance(d, dict)
1388
1389	def test_bad_json_done_is_bool(self, tmp_path: pathlib.Path) -> None:
1390	root, repo_id = _make_repo(tmp_path)
1391	ids = _build_chain(root, repo_id, 5)
1392	self._start(root, ids)
1393	result = _invoke(root, ["bisect", "bad", ids[len(ids) // 2], "--json"])
1394	assert result.exit_code == 0
1395	assert isinstance(json.loads(_json_blob(result.output))["done"], bool)
1396
1397	def test_bad_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None:
1398	"""ANSI in symbol_changes entries stripped from JSON output."""
1399	from unittest.mock import patch
1400	from muse.core.bisect import BisectResult
1401	root, repo_id = _make_repo(tmp_path)
1402	ids = _build_chain(root, repo_id, 5)
1403	self._start(root, ids)
1404	injected = BisectResult(
1405	done=False,
1406	first_bad=None,
1407	next_to_test=ids[2],
1408	remaining_count=2,
1409	steps_remaining=1,
1410	verdict="bad",
1411	symbol_changes=["modify func\x1b[31mred\x1b[0m"],
1412	)
1413	with patch("muse.cli.commands.bisect.mark_bad", return_value=injected):
1414	result = _invoke(root, ["bisect", "bad", ids[2], "--json"])
1415	assert "\x1b" not in result.output
1416
1417	def test_bad_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None:
1418	"""ANSI in symbol_changes entries stripped from text output."""
1419	from unittest.mock import patch
1420	from muse.core.bisect import BisectResult
1421	root, repo_id = _make_repo(tmp_path)
1422	ids = _build_chain(root, repo_id, 5)
1423	self._start(root, ids)
1424	injected = BisectResult(
1425	done=False,
1426	first_bad=None,
1427	next_to_test=ids[2],
1428	remaining_count=2,
1429	steps_remaining=1,
1430	verdict="bad",
1431	symbol_changes=["modify func\x1b[31mred\x1b[0m"],
1432	)
1433	with patch("muse.cli.commands.bisect.mark_bad", return_value=injected):
1434	result = _invoke(root, ["bisect", "bad", ids[2]])
1435	assert "\x1b" not in result.output
1436
1437	def test_bad_error_output_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None:
1438	"""Error messages go to stderr; stdout is clean on failure."""
1439	root, _ = _make_repo(tmp_path)
1440	result = _invoke(root, ["bisect", "bad"])
1441	assert result.exit_code != 0
1442	# CliRunner mixes stderr into output; verify no JSON object was emitted
1443	assert not result.output.strip().startswith("{")
1444
1445	def test_bad_ansi_in_ref_does_not_leak_to_output(self, tmp_path: pathlib.Path) -> None:
1446	"""Passing an ANSI-injected ref does not leak escape codes to stdout."""
1447	root, repo_id = _make_repo(tmp_path)
1448	ids = _build_chain(root, repo_id, 4)
1449	self._start(root, ids)
1450	result = _invoke(root, ["bisect", "bad", "\x1b[31mHEAD\x1b[0m"])
1451	# Will fail (ref not found) but must not echo ANSI to stdout
1452	assert "\x1b" not in result.output
1453
1454
1455	class TestBisectBadStress:
1456	"""Performance and scale tests for muse bisect bad."""
1457
1458	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1459	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1460	assert r.exit_code == 0
1461
1462	def test_bad_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
1463	"""Marking bad on a 100-commit session exits 0 and advances the search."""
1464	root, repo_id = _make_repo(tmp_path)
1465	ids = _build_chain(root, repo_id, 100)
1466	self._start(root, ids)
1467	result = _invoke(root, ["bisect", "bad", ids[50], "--json"])
1468	assert result.exit_code == 0
1469	assert _parse_step(result.output)["remaining_count"] >= 0
1470
1471	def test_bad_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
1472	"""Marking bad on a 100-commit session completes within 5 seconds."""
1473	import time
1474	root, repo_id = _make_repo(tmp_path)
1475	ids = _build_chain(root, repo_id, 100)
1476	self._start(root, ids)
1477	t0 = time.monotonic()
1478	result = _invoke(root, ["bisect", "bad", ids[50], "--json"])
1479	elapsed = time.monotonic() - t0
1480	assert result.exit_code == 0
1481	assert elapsed < 5.0, f"bisect bad on 100 commits took {elapsed:.2f}s"
1482
1483	def test_bad_converges_full_session(self, tmp_path: pathlib.Path) -> None:
1484	"""Marking next_to_test as bad on every step converges within log2(20) steps."""
1485	root, repo_id = _make_repo(tmp_path)
1486	ids = _build_chain(root, repo_id, 20)
1487	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1488	assert r.exit_code == 0
1489	parsed = _parse_step(r.output)
1490	done = parsed["done"]
1491	for _ in range(10):
1492	if done:
1493	break
1494	nxt = parsed["next_to_test"]
1495	assert nxt is not None
1496	next_r = _invoke(root, ["bisect", "bad", nxt, "--json"])
1497	assert next_r.exit_code == 0
1498	parsed = _parse_step(next_r.output)
1499	done = parsed["done"]
1500	assert done, "bisect did not converge within 10 bad steps on 20-commit chain"
1501
1502
1503	# ---------------------------------------------------------------------------
1504	# bisect good — Extended, Security, Stress
1505	# ---------------------------------------------------------------------------
1506
1507
1508	class TestBisectGoodExtended:
1509	"""Extended unit / integration / e2e tests for muse bisect good."""
1510
1511	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1512	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1513	assert r.exit_code == 0
1514
1515	def test_good_exits_0(self, tmp_path: pathlib.Path) -> None:
1516	root, repo_id = _make_repo(tmp_path)
1517	ids = _build_chain(root, repo_id, 6)
1518	self._start(root, ids)
1519	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2]])
1520	assert result.exit_code == 0
1521
1522	def test_good_j_alias_works(self, tmp_path: pathlib.Path) -> None:
1523	root, repo_id = _make_repo(tmp_path)
1524	ids = _build_chain(root, repo_id, 6)
1525	self._start(root, ids)
1526	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "-j"])
1527	assert result.exit_code == 0
1528	assert _parse_step(result.output)["verdict"] == "good"
1529
1530	def test_good_json_verdict_is_good(self, tmp_path: pathlib.Path) -> None:
1531	root, repo_id = _make_repo(tmp_path)
1532	ids = _build_chain(root, repo_id, 6)
1533	self._start(root, ids)
1534	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
1535	assert result.exit_code == 0
1536	assert _parse_step(result.output)["verdict"] == "good"
1537
1538	def test_good_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
1539	root, repo_id = _make_repo(tmp_path)
1540	ids = _build_chain(root, repo_id, 6)
1541	self._start(root, ids)
1542	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
1543	assert result.exit_code == 0
1544	d = json.loads(_json_blob(result.output))
1545	assert {"done", "first_bad", "next_to_test", "remaining_count",
1546	"steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())
1547
1548	def test_good_reduces_remaining(self, tmp_path: pathlib.Path) -> None:
1549	root, repo_id = _make_repo(tmp_path)
1550	ids = _build_chain(root, repo_id, 10)
1551	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1552	before = _parse_step(r.output)["remaining_count"]
1553	mid = _parse_step(r.output)["next_to_test"]
1554	result = _invoke(root, ["bisect", "good", mid, "--json"])
1555	assert result.exit_code == 0
1556	assert _parse_step(result.output)["remaining_count"] < before
1557
1558	def test_good_done_true_when_isolated(self, tmp_path: pathlib.Path) -> None:
1559	"""Marking the only remaining commit good isolates first bad immediately."""
1560	root, repo_id = _make_repo(tmp_path)
1561	ids = _build_chain(root, repo_id, 3)
1562	# good=ids[0], bad=ids[2]: ids[1] is the midpoint; marking it good resolves
1563	self._start(root, ids)
1564	result = _invoke(root, ["bisect", "good", ids[1], "--json"])
1565	assert result.exit_code == 0
1566	parsed = _parse_step(result.output)
1567	assert parsed["done"] is True
1568	assert parsed["first_bad"] == ids[2]
1569
1570	def test_good_first_bad_set_when_done(self, tmp_path: pathlib.Path) -> None:
1571	root, repo_id = _make_repo(tmp_path)
1572	ids = _build_chain(root, repo_id, 3)
1573	self._start(root, ids)
1574	result = _invoke(root, ["bisect", "good", ids[1], "--json"])
1575	assert result.exit_code == 0
1576	parsed = _parse_step(result.output)
1577	assert parsed["done"] is True
1578	assert isinstance(parsed["first_bad"], str)
1579
1580	def test_good_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
1581	root, repo_id = _make_repo(tmp_path)
1582	ids = _build_chain(root, repo_id, 5)
1583	self._start(root, ids)
1584	# HEAD is ids[-1] (known bad); marking it good is legal but pushes bad boundary
1585	result = _invoke(root, ["bisect", "good", "--json"])
1586	assert result.exit_code == 0
1587
1588	def test_good_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
1589	root, _ = _make_repo(tmp_path)
1590	result = _invoke(root, ["bisect", "good"])
1591	assert result.exit_code == 1
1592
1593	def test_good_no_session_error_message(self, tmp_path: pathlib.Path) -> None:
1594	root, _ = _make_repo(tmp_path)
1595	result = _invoke(root, ["bisect", "good"])
1596	combined = result.output + (result.stderr or "")
1597	assert "No bisect session" in combined
1598
1599	def test_good_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
1600	empty = tmp_path / "not_a_repo"
1601	empty.mkdir()
1602	result = _invoke(empty, ["bisect", "good"])
1603	assert result.exit_code == 2
1604
1605	def test_good_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
1606	root, repo_id = _make_repo(tmp_path)
1607	ids = _build_chain(root, repo_id, 4)
1608	self._start(root, ids)
1609	result = _invoke(root, ["bisect", "good", "deadbeef_nonexistent"])
1610	assert result.exit_code == 1
1611
1612	def test_good_text_mentions_commit(self, tmp_path: pathlib.Path) -> None:
1613	root, repo_id = _make_repo(tmp_path)
1614	ids = _build_chain(root, repo_id, 5)
1615	self._start(root, ids)
1616	mid = ids[len(ids) // 2]
1617	result = _invoke(root, ["bisect", "good", mid])
1618	assert result.exit_code == 0
1619	assert short_id(mid) in result.output
1620
1621	def test_good_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
1622	root, repo_id = _make_repo(tmp_path)
1623	ids = _build_chain(root, repo_id, 5)
1624	self._start(root, ids)
1625	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2]])
1626	assert result.exit_code == 0
1627	assert not result.output.strip().startswith("{")
1628
1629	def test_good_help_description_present(self, tmp_path: pathlib.Path) -> None:
1630	root, _ = _make_repo(tmp_path)
1631	result = _invoke(root, ["bisect", "good", "--help"])
1632	assert "Agent quickstart" in result.output or "regression" in result.output.lower()
1633
1634	def test_good_advances_bisect_log(self, tmp_path: pathlib.Path) -> None:
1635	from muse.core.bisect import _load_state
1636	root, repo_id = _make_repo(tmp_path)
1637	ids = _build_chain(root, repo_id, 6)
1638	self._start(root, ids)
1639	_invoke(root, ["bisect", "good", ids[len(ids) // 2]])
1640	state = _load_state(root)
1641	assert state is not None
1642	assert any("good" in entry for entry in state.get("log", []))
1643
1644	def test_good_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None:
1645	root, repo_id = _make_repo(tmp_path)
1646	ids = _build_chain(root, repo_id, 5)
1647	self._start(root, ids)
1648	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
1649	assert result.exit_code == 0
1650	assert _parse_step(result.output)["remaining_count"] >= 0
1651
1652	def test_good_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
1653	root, repo_id = _make_repo(tmp_path)
1654	ids = _build_chain(root, repo_id, 5)
1655	self._start(root, ids)
1656	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
1657	assert result.exit_code == 0
1658	assert isinstance(_parse_step(result.output)["symbol_changes"], list)
1659
1660
1661	class TestBisectGoodSecurity:
1662	"""Security hardening tests for muse bisect good."""
1663
1664	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1665	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1666	assert r.exit_code == 0
1667
1668	def test_good_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
1669	root, repo_id = _make_repo(tmp_path)
1670	ids = _build_chain(root, repo_id, 5)
1671	self._start(root, ids)
1672	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
1673	assert result.exit_code == 0
1674	assert isinstance(json.loads(_json_blob(result.output)), dict)
1675
1676	def test_good_json_done_is_bool(self, tmp_path: pathlib.Path) -> None:
1677	root, repo_id = _make_repo(tmp_path)
1678	ids = _build_chain(root, repo_id, 5)
1679	self._start(root, ids)
1680	result = _invoke(root, ["bisect", "good", ids[len(ids) // 2], "--json"])
1681	assert result.exit_code == 0
1682	assert isinstance(json.loads(_json_blob(result.output))["done"], bool)
1683
1684	def test_good_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None:
1685	from unittest.mock import patch
1686	from muse.core.bisect import BisectResult
1687	root, repo_id = _make_repo(tmp_path)
1688	ids = _build_chain(root, repo_id, 5)
1689	self._start(root, ids)
1690	injected = BisectResult(
1691	done=False,
1692	first_bad=None,
1693	next_to_test=ids[2],
1694	remaining_count=2,
1695	steps_remaining=1,
1696	verdict="good",
1697	symbol_changes=["add func\x1b[32mgreen\x1b[0m"],
1698	)
1699	with patch("muse.cli.commands.bisect.mark_good", return_value=injected):
1700	result = _invoke(root, ["bisect", "good", ids[2], "--json"])
1701	assert "\x1b" not in result.output
1702
1703	def test_good_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None:
1704	from unittest.mock import patch
1705	from muse.core.bisect import BisectResult
1706	root, repo_id = _make_repo(tmp_path)
1707	ids = _build_chain(root, repo_id, 5)
1708	self._start(root, ids)
1709	injected = BisectResult(
1710	done=False,
1711	first_bad=None,
1712	next_to_test=ids[2],
1713	remaining_count=2,
1714	steps_remaining=1,
1715	verdict="good",
1716	symbol_changes=["add func\x1b[32mgreen\x1b[0m"],
1717	)
1718	with patch("muse.cli.commands.bisect.mark_good", return_value=injected):
1719	result = _invoke(root, ["bisect", "good", ids[2]])
1720	assert "\x1b" not in result.output
1721
1722	def test_good_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None:
1723	root, _ = _make_repo(tmp_path)
1724	result = _invoke(root, ["bisect", "good"])
1725	assert result.exit_code != 0
1726	assert not result.output.strip().startswith("{")
1727
1728	def test_good_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None:
1729	root, repo_id = _make_repo(tmp_path)
1730	ids = _build_chain(root, repo_id, 4)
1731	self._start(root, ids)
1732	result = _invoke(root, ["bisect", "good", "\x1b[32mHEAD\x1b[0m"])
1733	assert "\x1b" not in result.output
1734
1735
1736	class TestBisectGoodStress:
1737	"""Performance and scale tests for muse bisect good."""
1738
1739	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1740	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1741	assert r.exit_code == 0
1742
1743	def test_good_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
1744	root, repo_id = _make_repo(tmp_path)
1745	ids = _build_chain(root, repo_id, 100)
1746	self._start(root, ids)
1747	result = _invoke(root, ["bisect", "good", ids[10], "--json"])
1748	assert result.exit_code == 0
1749	assert _parse_step(result.output)["remaining_count"] >= 0
1750
1751	def test_good_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
1752	import time
1753	root, repo_id = _make_repo(tmp_path)
1754	ids = _build_chain(root, repo_id, 100)
1755	self._start(root, ids)
1756	t0 = time.monotonic()
1757	result = _invoke(root, ["bisect", "good", ids[10], "--json"])
1758	elapsed = time.monotonic() - t0
1759	assert result.exit_code == 0
1760	assert elapsed < 5.0, f"bisect good on 100 commits took {elapsed:.2f}s"
1761
1762	def test_good_converges_full_session(self, tmp_path: pathlib.Path) -> None:
1763	"""Marking next_to_test as good on each step converges within log2(20) steps."""
1764	root, repo_id = _make_repo(tmp_path)
1765	ids = _build_chain(root, repo_id, 20)
1766	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1767	assert r.exit_code == 0
1768	parsed = _parse_step(r.output)
1769	done = parsed["done"]
1770	for _ in range(10):
1771	if done:
1772	break
1773	nxt = parsed["next_to_test"]
1774	assert nxt is not None
1775	next_r = _invoke(root, ["bisect", "good", nxt, "--json"])
1776	assert next_r.exit_code == 0
1777	parsed = _parse_step(next_r.output)
1778	done = parsed["done"]
1779	assert done, "bisect did not converge within 10 good steps on 20-commit chain"
1780
1781
1782	# ---------------------------------------------------------------------------
1783	# bisect skip — Extended, Security, Stress
1784	# ---------------------------------------------------------------------------
1785
1786
1787	class TestBisectSkipExtended:
1788	"""Extended unit / integration / e2e tests for muse bisect skip."""
1789
1790	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1791	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1792	assert r.exit_code == 0
1793
1794	def test_skip_exits_0(self, tmp_path: pathlib.Path) -> None:
1795	root, repo_id = _make_repo(tmp_path)
1796	ids = _build_chain(root, repo_id, 6)
1797	self._start(root, ids)
1798	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
1799	assert result.exit_code == 0
1800
1801	def test_skip_j_alias_works(self, tmp_path: pathlib.Path) -> None:
1802	root, repo_id = _make_repo(tmp_path)
1803	ids = _build_chain(root, repo_id, 6)
1804	self._start(root, ids)
1805	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "-j"])
1806	assert result.exit_code == 0
1807	assert _parse_step(result.output)["verdict"] == "skip"
1808
1809	def test_skip_json_verdict_is_skip(self, tmp_path: pathlib.Path) -> None:
1810	root, repo_id = _make_repo(tmp_path)
1811	ids = _build_chain(root, repo_id, 6)
1812	self._start(root, ids)
1813	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
1814	assert result.exit_code == 0
1815	assert _parse_step(result.output)["verdict"] == "skip"
1816
1817	def test_skip_json_all_seven_keys(self, tmp_path: pathlib.Path) -> None:
1818	root, repo_id = _make_repo(tmp_path)
1819	ids = _build_chain(root, repo_id, 6)
1820	self._start(root, ids)
1821	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
1822	assert result.exit_code == 0
1823	d = json.loads(_json_blob(result.output))
1824	assert {"done", "first_bad", "next_to_test", "remaining_count",
1825	"steps_remaining", "verdict", "symbol_changes"} <= set(d.keys())
1826
1827	def test_skip_removes_commit_from_remaining(self, tmp_path: pathlib.Path) -> None:
1828	root, repo_id = _make_repo(tmp_path)
1829	ids = _build_chain(root, repo_id, 10)
1830	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
1831	before = _parse_step(r.output)["remaining_count"]
1832	mid = _parse_step(r.output)["next_to_test"]
1833	result = _invoke(root, ["bisect", "skip", mid, "--json"])
1834	assert result.exit_code == 0
1835	assert _parse_step(result.output)["remaining_count"] < before
1836
1837	def test_skip_persisted_in_state(self, tmp_path: pathlib.Path) -> None:
1838	from muse.core.bisect import _load_state
1839	root, repo_id = _make_repo(tmp_path)
1840	ids = _build_chain(root, repo_id, 6)
1841	self._start(root, ids)
1842	mid = ids[len(ids) // 2]
1843	_invoke(root, ["bisect", "skip", mid])
1844	state = _load_state(root)
1845	assert state is not None
1846	assert mid in state.get("skipped_ids", [])
1847
1848	def test_skip_defaults_to_head(self, tmp_path: pathlib.Path) -> None:
1849	root, repo_id = _make_repo(tmp_path)
1850	ids = _build_chain(root, repo_id, 5)
1851	self._start(root, ids)
1852	result = _invoke(root, ["bisect", "skip", "--json"])
1853	assert result.exit_code == 0
1854
1855	def test_skip_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
1856	root, _ = _make_repo(tmp_path)
1857	result = _invoke(root, ["bisect", "skip"])
1858	assert result.exit_code == 1
1859
1860	def test_skip_no_session_error_message(self, tmp_path: pathlib.Path) -> None:
1861	root, _ = _make_repo(tmp_path)
1862	result = _invoke(root, ["bisect", "skip"])
1863	combined = result.output + (result.stderr or "")
1864	assert "No bisect session" in combined
1865
1866	def test_skip_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
1867	empty = tmp_path / "not_a_repo"
1868	empty.mkdir()
1869	result = _invoke(empty, ["bisect", "skip"])
1870	assert result.exit_code == 2
1871
1872	def test_skip_invalid_ref_exits_1(self, tmp_path: pathlib.Path) -> None:
1873	root, repo_id = _make_repo(tmp_path)
1874	ids = _build_chain(root, repo_id, 4)
1875	self._start(root, ids)
1876	result = _invoke(root, ["bisect", "skip", "deadbeef_nonexistent"])
1877	assert result.exit_code == 1
1878
1879	def test_skip_text_mentions_commit(self, tmp_path: pathlib.Path) -> None:
1880	root, repo_id = _make_repo(tmp_path)
1881	ids = _build_chain(root, repo_id, 5)
1882	self._start(root, ids)
1883	mid = ids[len(ids) // 2]
1884	result = _invoke(root, ["bisect", "skip", mid])
1885	assert result.exit_code == 0
1886	assert short_id(mid) in result.output
1887
1888	def test_skip_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
1889	root, repo_id = _make_repo(tmp_path)
1890	ids = _build_chain(root, repo_id, 5)
1891	self._start(root, ids)
1892	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
1893	assert result.exit_code == 0
1894	assert not result.output.strip().startswith("{")
1895
1896	def test_skip_help_description_present(self, tmp_path: pathlib.Path) -> None:
1897	root, _ = _make_repo(tmp_path)
1898	result = _invoke(root, ["bisect", "skip", "--help"])
1899	assert "Agent quickstart" in result.output or "125" in result.output
1900
1901	def test_skip_advances_log(self, tmp_path: pathlib.Path) -> None:
1902	from muse.core.bisect import _load_state
1903	root, repo_id = _make_repo(tmp_path)
1904	ids = _build_chain(root, repo_id, 6)
1905	self._start(root, ids)
1906	_invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
1907	state = _load_state(root)
1908	assert state is not None
1909	assert any("skip" in entry for entry in state.get("log", []))
1910
1911	def test_skip_remaining_count_not_negative(self, tmp_path: pathlib.Path) -> None:
1912	root, repo_id = _make_repo(tmp_path)
1913	ids = _build_chain(root, repo_id, 5)
1914	self._start(root, ids)
1915	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
1916	assert result.exit_code == 0
1917	assert _parse_step(result.output)["remaining_count"] >= 0
1918
1919	def test_skip_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
1920	root, repo_id = _make_repo(tmp_path)
1921	ids = _build_chain(root, repo_id, 5)
1922	self._start(root, ids)
1923	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
1924	assert result.exit_code == 0
1925	assert isinstance(_parse_step(result.output)["symbol_changes"], list)
1926
1927	def test_skip_multiple_commits(self, tmp_path: pathlib.Path) -> None:
1928	"""Skipping several commits all land in skipped_ids."""
1929	from muse.core.bisect import _load_state
1930	root, repo_id = _make_repo(tmp_path)
1931	ids = _build_chain(root, repo_id, 8)
1932	self._start(root, ids)
1933	for idx in (2, 3, 4):
1934	r = _invoke(root, ["bisect", "skip", ids[idx]])
1935	assert r.exit_code == 0
1936	state = _load_state(root)
1937	assert state is not None
1938	skipped = state.get("skipped_ids", [])
1939	assert all(ids[i] in skipped for i in (2, 3, 4))
1940
1941
1942	class TestBisectSkipSecurity:
1943	"""Security hardening tests for muse bisect skip."""
1944
1945	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
1946	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
1947	assert r.exit_code == 0
1948
1949	def test_skip_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
1950	root, repo_id = _make_repo(tmp_path)
1951	ids = _build_chain(root, repo_id, 5)
1952	self._start(root, ids)
1953	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
1954	assert result.exit_code == 0
1955	assert isinstance(json.loads(_json_blob(result.output)), dict)
1956
1957	def test_skip_json_done_is_bool(self, tmp_path: pathlib.Path) -> None:
1958	root, repo_id = _make_repo(tmp_path)
1959	ids = _build_chain(root, repo_id, 5)
1960	self._start(root, ids)
1961	result = _invoke(root, ["bisect", "skip", ids[len(ids) // 2], "--json"])
1962	assert result.exit_code == 0
1963	assert isinstance(json.loads(_json_blob(result.output))["done"], bool)
1964
1965	def test_skip_symbol_changes_sanitized_in_json(self, tmp_path: pathlib.Path) -> None:
1966	from unittest.mock import patch
1967	from muse.core.bisect import BisectResult
1968	root, repo_id = _make_repo(tmp_path)
1969	ids = _build_chain(root, repo_id, 5)
1970	self._start(root, ids)
1971	injected = BisectResult(
1972	done=False,
1973	first_bad=None,
1974	next_to_test=ids[2],
1975	remaining_count=2,
1976	steps_remaining=1,
1977	verdict="skip",
1978	symbol_changes=["modify func\x1b[33myellow\x1b[0m"],
1979	)
1980	with patch("muse.cli.commands.bisect.skip_commit", return_value=injected):
1981	result = _invoke(root, ["bisect", "skip", ids[2], "--json"])
1982	assert "\x1b" not in result.output
1983
1984	def test_skip_symbol_changes_sanitized_in_text(self, tmp_path: pathlib.Path) -> None:
1985	from unittest.mock import patch
1986	from muse.core.bisect import BisectResult
1987	root, repo_id = _make_repo(tmp_path)
1988	ids = _build_chain(root, repo_id, 5)
1989	self._start(root, ids)
1990	injected = BisectResult(
1991	done=False,
1992	first_bad=None,
1993	next_to_test=ids[2],
1994	remaining_count=2,
1995	steps_remaining=1,
1996	verdict="skip",
1997	symbol_changes=["modify func\x1b[33myellow\x1b[0m"],
1998	)
1999	with patch("muse.cli.commands.bisect.skip_commit", return_value=injected):
2000	result = _invoke(root, ["bisect", "skip", ids[2]])
2001	assert "\x1b" not in result.output
2002
2003	def test_skip_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None:
2004	root, _ = _make_repo(tmp_path)
2005	result = _invoke(root, ["bisect", "skip"])
2006	assert result.exit_code != 0
2007	assert not result.output.strip().startswith("{")
2008
2009	def test_skip_ansi_in_ref_does_not_leak(self, tmp_path: pathlib.Path) -> None:
2010	root, repo_id = _make_repo(tmp_path)
2011	ids = _build_chain(root, repo_id, 4)
2012	self._start(root, ids)
2013	result = _invoke(root, ["bisect", "skip", "\x1b[33mHEAD\x1b[0m"])
2014	assert "\x1b" not in result.output
2015
2016
2017	class TestBisectSkipStress:
2018	"""Performance and scale tests for muse bisect skip."""
2019
2020	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2021	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2022	assert r.exit_code == 0
2023
2024	def test_skip_on_100_commit_chain(self, tmp_path: pathlib.Path) -> None:
2025	root, repo_id = _make_repo(tmp_path)
2026	ids = _build_chain(root, repo_id, 100)
2027	self._start(root, ids)
2028	result = _invoke(root, ["bisect", "skip", ids[50], "--json"])
2029	assert result.exit_code == 0
2030	assert _parse_step(result.output)["remaining_count"] >= 0
2031
2032	def test_skip_performance_100_commits(self, tmp_path: pathlib.Path) -> None:
2033	import time
2034	root, repo_id = _make_repo(tmp_path)
2035	ids = _build_chain(root, repo_id, 100)
2036	self._start(root, ids)
2037	t0 = time.monotonic()
2038	result = _invoke(root, ["bisect", "skip", ids[50], "--json"])
2039	elapsed = time.monotonic() - t0
2040	assert result.exit_code == 0
2041	assert elapsed < 5.0, f"bisect skip on 100 commits took {elapsed:.2f}s"
2042
2043	def test_skip_reduces_remaining_monotonically(self, tmp_path: pathlib.Path) -> None:
2044	"""Each consecutive skip reduces remaining_count (non-increasing sequence)."""
2045	root, repo_id = _make_repo(tmp_path)
2046	ids = _build_chain(root, repo_id, 20)
2047	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
2048	assert r.exit_code == 0
2049	counts = [_parse_step(r.output)["remaining_count"]]
2050	cur = r
2051	for _ in range(5):
2052	parsed = _parse_step(cur.output)
2053	if parsed["done"] or parsed["next_to_test"] is None:
2054	break
2055	nxt = parsed["next_to_test"]
2056	cur = _invoke(root, ["bisect", "skip", nxt, "--json"])
2057	assert cur.exit_code == 0
2058	counts.append(_parse_step(cur.output)["remaining_count"])
2059	assert all(counts[i] >= counts[i + 1] for i in range(len(counts) - 1))
2060
2061
2062	# ---------------------------------------------------------------------------
2063	# bisect run — Extended, Security, Stress
2064	# ---------------------------------------------------------------------------
2065
2066
2067	class TestBisectRunExtended:
2068	"""Extended unit / integration / e2e tests for muse bisect run."""
2069
2070	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2071	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2072	assert r.exit_code == 0
2073
2074	def test_run_exits_0_with_true(self, tmp_path: pathlib.Path) -> None:
2075	root, repo_id = _make_repo(tmp_path)
2076	ids = _build_chain(root, repo_id, 5)
2077	self._start(root, ids)
2078	result = _invoke(root, ["bisect", "run", "true"])
2079	assert result.exit_code == 0
2080
2081	def test_run_j_alias_works(self, tmp_path: pathlib.Path) -> None:
2082	root, repo_id = _make_repo(tmp_path)
2083	ids = _build_chain(root, repo_id, 5)
2084	self._start(root, ids)
2085	result = _invoke(root, ["bisect", "run", "true", "-j"])
2086	assert result.exit_code == 0
2087	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2088	assert len(lines) >= 1
2089	done_raw = json.loads(lines[-1])
2090	assert done_raw["done"] is True
2091
2092	def test_run_json_ndjson_step_keys(self, tmp_path: pathlib.Path) -> None:
2093	root, repo_id = _make_repo(tmp_path)
2094	ids = _build_chain(root, repo_id, 6)
2095	self._start(root, ids)
2096	result = _invoke(root, ["bisect", "run", "true", "--json"])
2097	assert result.exit_code == 0
2098	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2099	if len(lines) > 1:
2100	step = json.loads(lines[0])
2101	assert {"step", "testing", "verdict", "remaining_count", "done", "symbol_changes"} <= set(step.keys())
2102
2103	def test_run_json_done_line_keys(self, tmp_path: pathlib.Path) -> None:
2104	root, repo_id = _make_repo(tmp_path)
2105	ids = _build_chain(root, repo_id, 5)
2106	self._start(root, ids)
2107	result = _invoke(root, ["bisect", "run", "true", "--json"])
2108	assert result.exit_code == 0
2109	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2110	done = json.loads(lines[-1])
2111	assert set(done.keys()) == {"done", "first_bad", "steps_taken"}
2112
2113	def test_run_json_done_true_on_last_line(self, tmp_path: pathlib.Path) -> None:
2114	root, repo_id = _make_repo(tmp_path)
2115	ids = _build_chain(root, repo_id, 5)
2116	self._start(root, ids)
2117	result = _invoke(root, ["bisect", "run", "true", "--json"])
2118	assert result.exit_code == 0
2119	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2120	assert json.loads(lines[-1])["done"] is True
2121
2122	def test_run_json_steps_taken_positive(self, tmp_path: pathlib.Path) -> None:
2123	root, repo_id = _make_repo(tmp_path)
2124	ids = _build_chain(root, repo_id, 6)
2125	self._start(root, ids)
2126	result = _invoke(root, ["bisect", "run", "true", "--json"])
2127	assert result.exit_code == 0
2128	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2129	assert json.loads(lines[-1])["steps_taken"] >= 1
2130
2131	def test_run_json_verdict_good_with_true(self, tmp_path: pathlib.Path) -> None:
2132	root, repo_id = _make_repo(tmp_path)
2133	ids = _build_chain(root, repo_id, 5)
2134	self._start(root, ids)
2135	result = _invoke(root, ["bisect", "run", "true", "--json"])
2136	assert result.exit_code == 0
2137	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2138	step_lines = lines[:-1]
2139	assert all(json.loads(l)["verdict"] == "good" for l in step_lines)
2140
2141	def test_run_json_verdict_bad_with_false(self, tmp_path: pathlib.Path) -> None:
2142	root, repo_id = _make_repo(tmp_path)
2143	ids = _build_chain(root, repo_id, 5)
2144	self._start(root, ids)
2145	result = _invoke(root, ["bisect", "run", "false", "--json"])
2146	assert result.exit_code == 0
2147	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2148	step_lines = lines[:-1]
2149	assert all(json.loads(l)["verdict"] == "bad" for l in step_lines)
2150
2151	def test_run_no_session_exits_1(self, tmp_path: pathlib.Path) -> None:
2152	root, _ = _make_repo(tmp_path)
2153	result = _invoke(root, ["bisect", "run", "true"])
2154	assert result.exit_code == 1
2155
2156	def test_run_no_session_error_message(self, tmp_path: pathlib.Path) -> None:
2157	root, _ = _make_repo(tmp_path)
2158	result = _invoke(root, ["bisect", "run", "true"])
2159	combined = result.output + (result.stderr or "")
2160	assert "No bisect session" in combined
2161
2162	def test_run_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
2163	empty = tmp_path / "not_a_repo"
2164	empty.mkdir()
2165	result = _invoke(empty, ["bisect", "run", "true"])
2166	assert result.exit_code == 2
2167
2168	def test_run_text_mentions_testing(self, tmp_path: pathlib.Path) -> None:
2169	root, repo_id = _make_repo(tmp_path)
2170	ids = _build_chain(root, repo_id, 5)
2171	self._start(root, ids)
2172	result = _invoke(root, ["bisect", "run", "true"])
2173	assert result.exit_code == 0
2174	assert "Testing" in result.output or "→" in result.output
2175
2176	def test_run_text_mentions_first_bad(self, tmp_path: pathlib.Path) -> None:
2177	root, repo_id = _make_repo(tmp_path)
2178	ids = _build_chain(root, repo_id, 5)
2179	self._start(root, ids)
2180	result = _invoke(root, ["bisect", "run", "true"])
2181	assert result.exit_code == 0
2182	assert "First bad commit" in result.output or "Bisect complete" in result.output
2183
2184	def test_run_help_description_present(self, tmp_path: pathlib.Path) -> None:
2185	root, _ = _make_repo(tmp_path)
2186	result = _invoke(root, ["bisect", "run", "--help"])
2187	assert "Agent quickstart" in result.output or "125" in result.output
2188
2189	def test_run_json_step_numbers_increment(self, tmp_path: pathlib.Path) -> None:
2190	root, repo_id = _make_repo(tmp_path)
2191	ids = _build_chain(root, repo_id, 8)
2192	self._start(root, ids)
2193	result = _invoke(root, ["bisect", "run", "true", "--json"])
2194	assert result.exit_code == 0
2195	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2196	step_nums = [json.loads(l)["step"] for l in lines[:-1]]
2197	assert step_nums == list(range(1, len(step_nums) + 1))
2198
2199	def test_run_json_remaining_nonincreasing(self, tmp_path: pathlib.Path) -> None:
2200	root, repo_id = _make_repo(tmp_path)
2201	ids = _build_chain(root, repo_id, 8)
2202	self._start(root, ids)
2203	result = _invoke(root, ["bisect", "run", "true", "--json"])
2204	assert result.exit_code == 0
2205	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2206	counts = [json.loads(l)["remaining_count"] for l in lines[:-1]]
2207	assert all(counts[i] >= counts[i + 1] for i in range(len(counts) - 1))
2208
2209	def test_run_text_no_json_by_default(self, tmp_path: pathlib.Path) -> None:
2210	root, repo_id = _make_repo(tmp_path)
2211	ids = _build_chain(root, repo_id, 4)
2212	self._start(root, ids)
2213	result = _invoke(root, ["bisect", "run", "true"])
2214	assert result.exit_code == 0
2215	# Text mode should not have a JSON object on a line by itself
2216	json_lines = [l for l in result.output.splitlines() if l.strip().startswith("{")]
2217	assert json_lines == []
2218
2219	def test_run_json_first_bad_set_on_done(self, tmp_path: pathlib.Path) -> None:
2220	root, repo_id = _make_repo(tmp_path)
2221	ids = _build_chain(root, repo_id, 5)
2222	self._start(root, ids)
2223	result = _invoke(root, ["bisect", "run", "true", "--json"])
2224	assert result.exit_code == 0
2225	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2226	done = json.loads(lines[-1])
2227	if done["done"]:
2228	assert done["first_bad"] is not None
2229
2230
2231	class TestBisectRunSecurity:
2232	"""Security hardening tests for muse bisect run."""
2233
2234	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2235	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2236	assert r.exit_code == 0
2237
2238	def test_run_json_lines_are_valid_json(self, tmp_path: pathlib.Path) -> None:
2239	root, repo_id = _make_repo(tmp_path)
2240	ids = _build_chain(root, repo_id, 5)
2241	self._start(root, ids)
2242	result = _invoke(root, ["bisect", "run", "true", "--json"])
2243	assert result.exit_code == 0
2244	for line in result.output.strip().splitlines():
2245	if line.strip():
2246	assert isinstance(json.loads(line.strip()), dict)
2247
2248	def test_run_json_done_field_is_bool(self, tmp_path: pathlib.Path) -> None:
2249	root, repo_id = _make_repo(tmp_path)
2250	ids = _build_chain(root, repo_id, 5)
2251	self._start(root, ids)
2252	result = _invoke(root, ["bisect", "run", "true", "--json"])
2253	assert result.exit_code == 0
2254	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2255	for line in lines:
2256	assert isinstance(json.loads(line)["done"], bool)
2257
2258	def test_run_text_symbol_changes_sanitized(self, tmp_path: pathlib.Path) -> None:
2259	"""ANSI codes in symbol_changes are stripped from text output during run."""
2260	from unittest.mock import patch
2261	from muse.core.bisect import BisectResult
2262	root, repo_id = _make_repo(tmp_path)
2263	ids = _build_chain(root, repo_id, 5)
2264	self._start(root, ids)
2265	injected_result = BisectResult(
2266	done=True,
2267	first_bad=ids[2],
2268	next_to_test=None,
2269	remaining_count=0,
2270	steps_remaining=0,
2271	verdict="bad",
2272	symbol_changes=[],
2273	)
2274	with patch("muse.cli.commands.bisect._symbol_ops_in_commit",
2275	return_value=["add func\x1b[31mred\x1b[0m"]), \
2276	patch("muse.cli.commands.bisect.get_bisect_next",
2277	side_effect=[(ids[2], "billing.py::Invoice"), (None, "")]), \
2278	patch("muse.cli.commands.bisect.run_bisect_command",
2279	return_value=injected_result):
2280	result = _invoke(root, ["bisect", "run", "true"])
2281	assert "\x1b" not in result.output
2282
2283	def test_run_error_no_json_on_failure(self, tmp_path: pathlib.Path) -> None:
2284	root, _ = _make_repo(tmp_path)
2285	result = _invoke(root, ["bisect", "run", "true"])
2286	assert result.exit_code != 0
2287	assert not result.output.strip().startswith("{")
2288
2289	def test_run_json_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
2290	root, repo_id = _make_repo(tmp_path)
2291	ids = _build_chain(root, repo_id, 5)
2292	self._start(root, ids)
2293	result = _invoke(root, ["bisect", "run", "true", "--json"])
2294	assert result.exit_code == 0
2295	assert "\x1b" not in result.output
2296
2297	def test_run_text_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
2298	root, repo_id = _make_repo(tmp_path)
2299	ids = _build_chain(root, repo_id, 5)
2300	self._start(root, ids)
2301	result = _invoke(root, ["bisect", "run", "true"])
2302	assert result.exit_code == 0
2303	assert "\x1b" not in result.output
2304
2305
2306	class TestBisectRunStress:
2307	"""Performance and scale tests for muse bisect run."""
2308
2309	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2310	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2311	assert r.exit_code == 0
2312
2313	def test_run_50_commit_chain(self, tmp_path: pathlib.Path) -> None:
2314	"""run converges on a 50-commit chain with always-good command."""
2315	root, repo_id = _make_repo(tmp_path)
2316	ids = _build_chain(root, repo_id, 50)
2317	self._start(root, ids)
2318	result = _invoke(root, ["bisect", "run", "true", "--json"])
2319	assert result.exit_code == 0
2320	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2321	assert json.loads(lines[-1])["done"] is True
2322
2323	def test_run_performance_20_commits(self, tmp_path: pathlib.Path) -> None:
2324	"""run over 20 commits completes within 10 seconds."""
2325	import time
2326	root, repo_id = _make_repo(tmp_path)
2327	ids = _build_chain(root, repo_id, 20)
2328	self._start(root, ids)
2329	t0 = time.monotonic()
2330	result = _invoke(root, ["bisect", "run", "true", "--json"])
2331	elapsed = time.monotonic() - t0
2332	assert result.exit_code == 0
2333	assert elapsed < 10.0, f"bisect run 20 commits took {elapsed:.2f}s"
2334
2335	def test_run_steps_taken_within_log2(self, tmp_path: pathlib.Path) -> None:
2336	"""Steps taken should be at most log2(n)+1 for an always-good command."""
2337	import math
2338	root, repo_id = _make_repo(tmp_path)
2339	n = 32
2340	ids = _build_chain(root, repo_id, n)
2341	self._start(root, ids)
2342	result = _invoke(root, ["bisect", "run", "true", "--json"])
2343	assert result.exit_code == 0
2344	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
2345	steps_taken = json.loads(lines[-1])["steps_taken"]
2346	assert steps_taken <= int(math.log2(n)) + 2
2347
2348
2349	# ---------------------------------------------------------------------------
2350	# bisect log — Extended, Security, Stress
2351	# ---------------------------------------------------------------------------
2352
2353
2354	class TestBisectLogExtended:
2355	"""Extended unit / integration / e2e tests for muse bisect log."""
2356
2357	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2358	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2359	assert r.exit_code == 0
2360
2361	def test_log_exits_0_no_session(self, tmp_path: pathlib.Path) -> None:
2362	root, _ = _make_repo(tmp_path)
2363	result = _invoke(root, ["bisect", "log"])
2364	assert result.exit_code == 0
2365
2366	def test_log_exits_0_with_session(self, tmp_path: pathlib.Path) -> None:
2367	root, repo_id = _make_repo(tmp_path)
2368	ids = _build_chain(root, repo_id, 4)
2369	self._start(root, ids)
2370	result = _invoke(root, ["bisect", "log"])
2371	assert result.exit_code == 0
2372
2373	def test_log_j_alias_works(self, tmp_path: pathlib.Path) -> None:
2374	root, _ = _make_repo(tmp_path)
2375	result = _invoke(root, ["bisect", "log", "-j"])
2376	assert result.exit_code == 0
2377	parsed = _parse_log(result.output)
2378	assert isinstance(parsed["active"], bool)
2379
2380	def test_log_json_active_false_no_session(self, tmp_path: pathlib.Path) -> None:
2381	root, _ = _make_repo(tmp_path)
2382	result = _invoke(root, ["bisect", "log", "--json"])
2383	assert result.exit_code == 0
2384	assert _parse_log(result.output)["active"] is False
2385
2386	def test_log_json_active_true_with_session(self, tmp_path: pathlib.Path) -> None:
2387	root, repo_id = _make_repo(tmp_path)
2388	ids = _build_chain(root, repo_id, 4)
2389	self._start(root, ids)
2390	result = _invoke(root, ["bisect", "log", "--json"])
2391	assert result.exit_code == 0
2392	assert _parse_log(result.output)["active"] is True
2393
2394	def test_log_json_entries_empty_no_session(self, tmp_path: pathlib.Path) -> None:
2395	root, _ = _make_repo(tmp_path)
2396	result = _invoke(root, ["bisect", "log", "--json"])
2397	assert result.exit_code == 0
2398	assert _parse_log(result.output)["entries"] == []
2399
2400	def test_log_json_entries_grow_with_verdicts(self, tmp_path: pathlib.Path) -> None:
2401	root, repo_id = _make_repo(tmp_path)
2402	ids = _build_chain(root, repo_id, 6)
2403	self._start(root, ids)
2404	after_start = len(_parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"])
2405	_invoke(root, ["bisect", "bad", ids[3]])
2406	after_bad = len(_parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"])
2407	assert after_bad > after_start
2408
2409	def test_log_json_two_keys(self, tmp_path: pathlib.Path) -> None:
2410	root, _ = _make_repo(tmp_path)
2411	result = _invoke(root, ["bisect", "log", "--json"])
2412	assert result.exit_code == 0
2413	d = json.loads(_json_blob(result.output))
2414	assert {"active", "entries"} <= set(d.keys())
2415
2416	def test_log_json_start_records_bad_and_good(self, tmp_path: pathlib.Path) -> None:
2417	root, repo_id = _make_repo(tmp_path)
2418	ids = _build_chain(root, repo_id, 4)
2419	self._start(root, ids)
2420	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
2421	verdicts = [e["verdict"] for e in entries]
2422	assert "bad" in verdicts
2423	assert "good" in verdicts
2424
2425	def test_log_json_entries_contain_commit_ids(self, tmp_path: pathlib.Path) -> None:
2426	root, repo_id = _make_repo(tmp_path)
2427	ids = _build_chain(root, repo_id, 4)
2428	self._start(root, ids)
2429	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
2430	for entry in entries:
2431	# commit_id is stored with the sha256: prefix (71 chars total)
2432	assert entry["commit_id"].startswith("sha256:")
2433
2434	def test_log_json_entries_are_dicts(self, tmp_path: pathlib.Path) -> None:
2435	root, repo_id = _make_repo(tmp_path)
2436	ids = _build_chain(root, repo_id, 3)
2437	self._start(root, ids)
2438	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
2439	for e in entries:
2440	assert isinstance(e, dict)
2441	assert "commit_id" in e
2442	assert "verdict" in e
2443	assert "timestamp" in e
2444
2445	def test_log_active_false_after_reset(self, tmp_path: pathlib.Path) -> None:
2446	root, repo_id = _make_repo(tmp_path)
2447	ids = _build_chain(root, repo_id, 3)
2448	self._start(root, ids)
2449	_invoke(root, ["bisect", "reset"])
2450	result = _invoke(root, ["bisect", "log", "--json"])
2451	assert result.exit_code == 0
2452	assert _parse_log(result.output)["active"] is False
2453
2454	def test_log_text_shows_bisect_log_header(self, tmp_path: pathlib.Path) -> None:
2455	root, repo_id = _make_repo(tmp_path)
2456	ids = _build_chain(root, repo_id, 4)
2457	self._start(root, ids)
2458	result = _invoke(root, ["bisect", "log"])
2459	assert result.exit_code == 0
2460	assert "Bisect log" in result.output
2461
2462	def test_log_text_no_session_message(self, tmp_path: pathlib.Path) -> None:
2463	root, _ = _make_repo(tmp_path)
2464	result = _invoke(root, ["bisect", "log"])
2465	assert result.exit_code == 0
2466	assert "No bisect log" in result.output
2467
2468	def test_log_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
2469	empty = tmp_path / "not_a_repo"
2470	empty.mkdir()
2471	result = _invoke(empty, ["bisect", "log"])
2472	assert result.exit_code == 2
2473
2474	def test_log_help_description_present(self, tmp_path: pathlib.Path) -> None:
2475	root, _ = _make_repo(tmp_path)
2476	result = _invoke(root, ["bisect", "log", "--help"])
2477	assert "Agent quickstart" in result.output or "verdict" in result.output.lower()
2478
2479	def test_log_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
2480	root, repo_id = _make_repo(tmp_path)
2481	ids = _build_chain(root, repo_id, 4)
2482	self._start(root, ids)
2483	result = _invoke(root, ["bisect", "log"])
2484	assert result.exit_code == 0
2485	assert not any(l.strip().startswith("{") for l in result.output.splitlines())
2486
2487
2488	class TestBisectLogSecurity:
2489	"""Security hardening tests for muse bisect log."""
2490
2491	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2492	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2493	assert r.exit_code == 0
2494
2495	def test_log_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
2496	root, _ = _make_repo(tmp_path)
2497	result = _invoke(root, ["bisect", "log", "--json"])
2498	assert result.exit_code == 0
2499	d = json.loads(_json_blob(result.output))
2500	assert isinstance(d, dict)
2501
2502	def test_log_json_active_is_bool(self, tmp_path: pathlib.Path) -> None:
2503	root, _ = _make_repo(tmp_path)
2504	result = _invoke(root, ["bisect", "log", "--json"])
2505	assert result.exit_code == 0
2506	assert isinstance(json.loads(_json_blob(result.output))["active"], bool)
2507
2508	def test_log_json_entries_sanitized(self, tmp_path: pathlib.Path) -> None:
2509	"""ANSI codes injected into the log state are stripped from JSON output."""
2510	from muse.core.bisect import _load_state, _save_state
2511	root, repo_id = _make_repo(tmp_path)
2512	ids = _build_chain(root, repo_id, 3)
2513	self._start(root, ids)
2514	# Tamper: inject ANSI into a log entry
2515	state = _load_state(root)
2516	assert state is not None
2517	state["log"].append(f"{ids[1]} bad\x1b[31m 2026-01-01T00:00:00\x1b[0m")
2518	_save_state(root, state)
2519	result = _invoke(root, ["bisect", "log", "--json"])
2520	assert result.exit_code == 0
2521	assert "\x1b" not in result.output
2522
2523	def test_log_text_entries_sanitized(self, tmp_path: pathlib.Path) -> None:
2524	"""ANSI codes in log entries are stripped from text output."""
2525	from muse.core.bisect import _load_state, _save_state
2526	root, repo_id = _make_repo(tmp_path)
2527	ids = _build_chain(root, repo_id, 3)
2528	self._start(root, ids)
2529	state = _load_state(root)
2530	assert state is not None
2531	state["log"].append(f"{ids[1]} bad\x1b[31m 2026-01-01T00:00:00\x1b[0m")
2532	_save_state(root, state)
2533	result = _invoke(root, ["bisect", "log"])
2534	assert result.exit_code == 0
2535	assert "\x1b" not in result.output
2536
2537	def test_log_json_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
2538	root, repo_id = _make_repo(tmp_path)
2539	ids = _build_chain(root, repo_id, 4)
2540	self._start(root, ids)
2541	result = _invoke(root, ["bisect", "log", "--json"])
2542	assert result.exit_code == 0
2543	assert "\x1b" not in result.output
2544
2545	def test_log_text_no_ansi_in_output(self, tmp_path: pathlib.Path) -> None:
2546	root, repo_id = _make_repo(tmp_path)
2547	ids = _build_chain(root, repo_id, 4)
2548	self._start(root, ids)
2549	result = _invoke(root, ["bisect", "log"])
2550	assert result.exit_code == 0
2551	assert "\x1b" not in result.output
2552
2553
2554	class TestBisectLogStress:
2555	"""Performance and scale tests for muse bisect log."""
2556
2557	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2558	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2559	assert r.exit_code == 0
2560
2561	def test_log_100_commit_session(self, tmp_path: pathlib.Path) -> None:
2562	"""Log on a 100-step session returns all entries."""
2563	root, repo_id = _make_repo(tmp_path)
2564	ids = _build_chain(root, repo_id, 100)
2565	self._start(root, ids)
2566	# Apply 10 good verdicts to build up a log
2567	for i in range(1, 11):
2568	_invoke(root, ["bisect", "good", ids[i]])
2569	result = _invoke(root, ["bisect", "log", "--json"])
2570	assert result.exit_code == 0
2571	entries = _parse_log(result.output)["entries"]
2572	# start adds 2 entries; 10 good verdicts add 10 more
2573	assert len(entries) >= 12
2574
2575	def test_log_performance_large_session(self, tmp_path: pathlib.Path) -> None:
2576	"""Log on a large session completes within 5 seconds."""
2577	import time
2578	root, repo_id = _make_repo(tmp_path)
2579	ids = _build_chain(root, repo_id, 50)
2580	self._start(root, ids)
2581	for i in range(1, 8):
2582	_invoke(root, ["bisect", "bad", ids[i]])
2583	t0 = time.monotonic()
2584	result = _invoke(root, ["bisect", "log", "--json"])
2585	elapsed = time.monotonic() - t0
2586	assert result.exit_code == 0
2587	assert elapsed < 5.0, f"bisect log took {elapsed:.2f}s"
2588
2589	def test_log_concurrent_reads_consistent(self, tmp_path: pathlib.Path) -> None:
2590	"""Concurrent log reads all return the same entry count."""
2591	root, repo_id = _make_repo(tmp_path)
2592	ids = _build_chain(root, repo_id, 20)
2593	self._start(root, ids)
2594	_invoke(root, ["bisect", "bad", ids[10]])
2595	counts: list[int] = []
2596	errors: list[str] = []
2597	lock = threading.Lock()
2598
2599	def _run() -> None:
2600	r = _invoke(root, ["bisect", "log", "--json"])
2601	with lock:
2602	if r.exit_code != 0:
2603	errors.append(r.output)
2604	return
2605	try:
2606	counts.append(len(_parse_log(r.output)["entries"]))
2607	except (json.JSONDecodeError, KeyError, ValueError) as exc:
2608	errors.append(f"parse error: {exc!r} output={r.output!r}")
2609
2610	threads = [threading.Thread(target=_run) for _ in range(8)]
2611	for t in threads:
2612	t.start()
2613	for t in threads:
2614	t.join()
2615	assert not errors
2616	assert all(c == counts[0] for c in counts)
2617
2618
2619	# ---------------------------------------------------------------------------
2620	# bisect reset — Extended, Security, Stress
2621	# ---------------------------------------------------------------------------
2622
2623
2624	class TestBisectResetExtended:
2625	"""Extended unit / integration / e2e tests for muse bisect reset."""
2626
2627	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2628	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2629	assert r.exit_code == 0
2630
2631	def test_reset_exits_0_with_session(self, tmp_path: pathlib.Path) -> None:
2632	root, repo_id = _make_repo(tmp_path)
2633	ids = _build_chain(root, repo_id, 4)
2634	self._start(root, ids)
2635	assert _invoke(root, ["bisect", "reset"]).exit_code == 0
2636
2637	def test_reset_exits_0_no_session(self, tmp_path: pathlib.Path) -> None:
2638	root, _ = _make_repo(tmp_path)
2639	assert _invoke(root, ["bisect", "reset"]).exit_code == 0
2640
2641	def test_reset_j_alias_works(self, tmp_path: pathlib.Path) -> None:
2642	root, _ = _make_repo(tmp_path)
2643	result = _invoke(root, ["bisect", "reset", "-j"])
2644	assert result.exit_code == 0
2645	assert _parse_reset(result.output)["reset"] is True
2646
2647	def test_reset_json_reset_true(self, tmp_path: pathlib.Path) -> None:
2648	root, _ = _make_repo(tmp_path)
2649	result = _invoke(root, ["bisect", "reset", "--json"])
2650	assert result.exit_code == 0
2651	assert _parse_reset(result.output)["reset"] is True
2652
2653	def test_reset_json_single_key(self, tmp_path: pathlib.Path) -> None:
2654	root, _ = _make_repo(tmp_path)
2655	result = _invoke(root, ["bisect", "reset", "--json"])
2656	assert result.exit_code == 0
2657	d = json.loads(_json_blob(result.output))
2658	assert {"reset"} <= set(d.keys())
2659
2660	def test_reset_clears_active_session(self, tmp_path: pathlib.Path) -> None:
2661	root, repo_id = _make_repo(tmp_path)
2662	ids = _build_chain(root, repo_id, 4)
2663	self._start(root, ids)
2664	_invoke(root, ["bisect", "reset"])
2665	log_r = _invoke(root, ["bisect", "log", "--json"])
2666	assert _parse_log(log_r.output)["active"] is False
2667
2668	def test_reset_prevents_bad_after_reset(self, tmp_path: pathlib.Path) -> None:
2669	root, repo_id = _make_repo(tmp_path)
2670	ids = _build_chain(root, repo_id, 4)
2671	self._start(root, ids)
2672	_invoke(root, ["bisect", "reset"])
2673	result = _invoke(root, ["bisect", "bad", ids[2]])
2674	assert result.exit_code == 1
2675
2676	def test_reset_prevents_good_after_reset(self, tmp_path: pathlib.Path) -> None:
2677	root, repo_id = _make_repo(tmp_path)
2678	ids = _build_chain(root, repo_id, 4)
2679	self._start(root, ids)
2680	_invoke(root, ["bisect", "reset"])
2681	assert _invoke(root, ["bisect", "good", ids[1]]).exit_code == 1
2682
2683	def test_reset_prevents_skip_after_reset(self, tmp_path: pathlib.Path) -> None:
2684	root, repo_id = _make_repo(tmp_path)
2685	ids = _build_chain(root, repo_id, 4)
2686	self._start(root, ids)
2687	_invoke(root, ["bisect", "reset"])
2688	assert _invoke(root, ["bisect", "skip", ids[2]]).exit_code == 1
2689
2690	def test_reset_idempotent_double_reset(self, tmp_path: pathlib.Path) -> None:
2691	root, repo_id = _make_repo(tmp_path)
2692	ids = _build_chain(root, repo_id, 3)
2693	self._start(root, ids)
2694	assert _invoke(root, ["bisect", "reset"]).exit_code == 0
2695	assert _invoke(root, ["bisect", "reset"]).exit_code == 0
2696
2697	def test_reset_allows_new_session_after(self, tmp_path: pathlib.Path) -> None:
2698	root, repo_id = _make_repo(tmp_path)
2699	ids = _build_chain(root, repo_id, 5)
2700	self._start(root, ids)
2701	_invoke(root, ["bisect", "reset"])
2702	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2703	assert result.exit_code == 0
2704
2705	def test_reset_clears_log_entries(self, tmp_path: pathlib.Path) -> None:
2706	root, repo_id = _make_repo(tmp_path)
2707	ids = _build_chain(root, repo_id, 4)
2708	self._start(root, ids)
2709	_invoke(root, ["bisect", "bad", ids[2]])
2710	_invoke(root, ["bisect", "reset"])
2711	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
2712	assert entries == []
2713
2714	def test_reset_text_output_mentions_reset(self, tmp_path: pathlib.Path) -> None:
2715	root, _ = _make_repo(tmp_path)
2716	result = _invoke(root, ["bisect", "reset"])
2717	assert result.exit_code == 0
2718	assert "reset" in result.output.lower()
2719
2720	def test_reset_text_no_json_object(self, tmp_path: pathlib.Path) -> None:
2721	root, _ = _make_repo(tmp_path)
2722	result = _invoke(root, ["bisect", "reset"])
2723	assert not result.output.strip().startswith("{")
2724
2725	def test_reset_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
2726	empty = tmp_path / "not_a_repo"
2727	empty.mkdir()
2728	assert _invoke(empty, ["bisect", "reset"]).exit_code == 2
2729
2730	def test_reset_help_description_present(self, tmp_path: pathlib.Path) -> None:
2731	root, _ = _make_repo(tmp_path)
2732	result = _invoke(root, ["bisect", "reset", "--help"])
2733	assert "Agent quickstart" in result.output or "Idempotent" in result.output
2734
2735	def test_reset_json_reset_is_bool(self, tmp_path: pathlib.Path) -> None:
2736	root, _ = _make_repo(tmp_path)
2737	result = _invoke(root, ["bisect", "reset", "--json"])
2738	assert result.exit_code == 0
2739	assert isinstance(json.loads(_json_blob(result.output))["reset"], bool)
2740
2741	def test_reset_mid_session_with_verdicts(self, tmp_path: pathlib.Path) -> None:
2742	"""Reset works correctly after several verdicts have been applied."""
2743	root, repo_id = _make_repo(tmp_path)
2744	ids = _build_chain(root, repo_id, 10)
2745	self._start(root, ids)
2746	_invoke(root, ["bisect", "bad", ids[7]])
2747	_invoke(root, ["bisect", "good", ids[3]])
2748	result = _invoke(root, ["bisect", "reset", "--json"])
2749	assert result.exit_code == 0
2750	assert _parse_reset(result.output)["reset"] is True
2751	assert _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["active"] is False
2752
2753
2754	class TestBisectResetSecurity:
2755	"""Security hardening tests for muse bisect reset."""
2756
2757	def test_reset_json_is_valid_json(self, tmp_path: pathlib.Path) -> None:
2758	root, _ = _make_repo(tmp_path)
2759	result = _invoke(root, ["bisect", "reset", "--json"])
2760	assert result.exit_code == 0
2761	assert isinstance(json.loads(_json_blob(result.output)), dict)
2762
2763	def test_reset_json_no_ansi(self, tmp_path: pathlib.Path) -> None:
2764	root, _ = _make_repo(tmp_path)
2765	result = _invoke(root, ["bisect", "reset", "--json"])
2766	assert result.exit_code == 0
2767	assert "\x1b" not in result.output
2768
2769	def test_reset_text_no_ansi(self, tmp_path: pathlib.Path) -> None:
2770	root, _ = _make_repo(tmp_path)
2771	result = _invoke(root, ["bisect", "reset"])
2772	assert result.exit_code == 0
2773	assert "\x1b" not in result.output
2774
2775	def test_reset_state_file_removed(self, tmp_path: pathlib.Path) -> None:
2776	"""After reset the state file no longer exists on disk."""
2777	from muse.core.bisect import _state_path
2778	root, repo_id = _make_repo(tmp_path)
2779	ids = _build_chain(root, repo_id, 3)
2780	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2781	assert _state_path(root).exists()
2782	_invoke(root, ["bisect", "reset"])
2783	assert not _state_path(root).exists()
2784
2785	def test_reset_no_session_state_file_absent(self, tmp_path: pathlib.Path) -> None:
2786	"""Reset with no state file is a safe no-op."""
2787	from muse.core.bisect import _state_path
2788	root, _ = _make_repo(tmp_path)
2789	assert not _state_path(root).exists()
2790	result = _invoke(root, ["bisect", "reset"])
2791	assert result.exit_code == 0
2792
2793	def test_reset_json_reset_value_true(self, tmp_path: pathlib.Path) -> None:
2794	"""reset field is always true, never false or a truthy int."""
2795	root, _ = _make_repo(tmp_path)
2796	result = _invoke(root, ["bisect", "reset", "--json"])
2797	assert result.exit_code == 0
2798	assert json.loads(_json_blob(result.output))["reset"] is True
2799
2800
2801	class TestBisectResetStress:
2802	"""Performance and scale tests for muse bisect reset."""
2803
2804	def test_reset_after_100_commit_session(self, tmp_path: pathlib.Path) -> None:
2805	"""Reset clears state from a 100-commit session instantly."""
2806	root, repo_id = _make_repo(tmp_path)
2807	ids = _build_chain(root, repo_id, 100)
2808	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2809	result = _invoke(root, ["bisect", "reset", "--json"])
2810	assert result.exit_code == 0
2811	assert _parse_reset(result.output)["reset"] is True
2812	assert _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["active"] is False
2813
2814	def test_reset_performance(self, tmp_path: pathlib.Path) -> None:
2815	"""Reset completes within 2 seconds even after a large session."""
2816	import time
2817	root, repo_id = _make_repo(tmp_path)
2818	ids = _build_chain(root, repo_id, 100)
2819	_invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2820	for i in range(1, 8):
2821	_invoke(root, ["bisect", "bad", ids[i]])
2822	t0 = time.monotonic()
2823	result = _invoke(root, ["bisect", "reset"])
2824	elapsed = time.monotonic() - t0
2825	assert result.exit_code == 0
2826	assert elapsed < 2.0, f"bisect reset took {elapsed:.2f}s"
2827
2828	def test_reset_cycle_10_times(self, tmp_path: pathlib.Path) -> None:
2829	"""Start → reset × 10 all succeed with no state leakage."""
2830	root, repo_id = _make_repo(tmp_path)
2831	ids = _build_chain(root, repo_id, 6)
2832	for _ in range(10):
2833	r_start = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2834	assert r_start.exit_code == 0
2835	r_reset = _invoke(root, ["bisect", "reset", "--json"])
2836	assert r_reset.exit_code == 0
2837	assert _parse_reset(r_reset.output)["reset"] is True
2838
2839
2840	# ===========================================================================
2841	# New feature tests — status, structured log, timeout, symbol_changes in run
2842	# ===========================================================================
2843
2844
2845	class TestBisectStatus:
2846	"""Tests for the new ``muse bisect status`` subcommand."""
2847
2848	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2849	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2850	assert r.exit_code == 0
2851
2852	# ── Unit: no session ────────────────────────────────────────────────────
2853
2854	def test_status_no_session_exits_0(self, tmp_path: pathlib.Path) -> None:
2855	root, _ = _make_repo(tmp_path)
2856	result = _invoke(root, ["bisect", "status"])
2857	assert result.exit_code == 0
2858
2859	def test_status_no_session_json_active_false(self, tmp_path: pathlib.Path) -> None:
2860	root, _ = _make_repo(tmp_path)
2861	result = _invoke(root, ["bisect", "status", "--json"])
2862	assert result.exit_code == 0
2863	d = json.loads(result.output.strip())
2864	assert d["active"] is False
2865
2866	def test_status_no_session_json_only_active_key(self, tmp_path: pathlib.Path) -> None:
2867	root, _ = _make_repo(tmp_path)
2868	result = _invoke(root, ["bisect", "status", "--json"])
2869	assert result.exit_code == 0
2870	d = json.loads(result.output.strip())
2871	assert {"active"} <= set(d.keys())
2872
2873	# ── Integration: active session ─────────────────────────────────────────
2874
2875	def test_status_active_session_exits_0(self, tmp_path: pathlib.Path) -> None:
2876	root, repo_id = _make_repo(tmp_path)
2877	ids = _build_chain(root, repo_id, 6)
2878	self._start(root, ids)
2879	result = _invoke(root, ["bisect", "status"])
2880	assert result.exit_code == 0
2881
2882	def test_status_active_json_schema(self, tmp_path: pathlib.Path) -> None:
2883	root, repo_id = _make_repo(tmp_path)
2884	ids = _build_chain(root, repo_id, 6)
2885	self._start(root, ids)
2886	result = _invoke(root, ["bisect", "status", "--json"])
2887	assert result.exit_code == 0
2888	d = json.loads(result.output.strip())
2889	assert d["active"] is True
2890	assert "bad_id" in d
2891	assert "good_ids" in d
2892	assert "remaining_count" in d
2893	assert "steps_remaining" in d
2894	assert "skipped_count" in d
2895	assert "symbol_filter" in d
2896
2897	def test_status_active_remaining_count_positive(self, tmp_path: pathlib.Path) -> None:
2898	root, repo_id = _make_repo(tmp_path)
2899	ids = _build_chain(root, repo_id, 8)
2900	self._start(root, ids)
2901	result = _invoke(root, ["bisect", "status", "--json"])
2902	d = json.loads(result.output.strip())
2903	assert d["remaining_count"] > 0
2904
2905	def test_status_bad_id_matches_session(self, tmp_path: pathlib.Path) -> None:
2906	root, repo_id = _make_repo(tmp_path)
2907	ids = _build_chain(root, repo_id, 5)
2908	self._start(root, ids)
2909	result = _invoke(root, ["bisect", "status", "--json"])
2910	d = json.loads(result.output.strip())
2911	assert d["bad_id"] == ids[-1]
2912
2913	def test_status_skipped_count_increments(self, tmp_path: pathlib.Path) -> None:
2914	root, repo_id = _make_repo(tmp_path)
2915	ids = _build_chain(root, repo_id, 8)
2916	self._start(root, ids)
2917	before = json.loads(
2918	_invoke(root, ["bisect", "status", "--json"]).output.strip()
2919	)["skipped_count"]
2920	# Skip the midpoint
2921	next_id = json.loads(
2922	_invoke(root, ["bisect", "status", "--json"]).output.strip()
2923	)
2924	_invoke(root, ["bisect", "skip", ids[len(ids) // 2]])
2925	after = json.loads(
2926	_invoke(root, ["bisect", "status", "--json"]).output.strip()
2927	)["skipped_count"]
2928	assert after == before + 1
2929
2930	def test_status_active_false_after_reset(self, tmp_path: pathlib.Path) -> None:
2931	root, repo_id = _make_repo(tmp_path)
2932	ids = _build_chain(root, repo_id, 5)
2933	self._start(root, ids)
2934	_invoke(root, ["bisect", "reset"])
2935	result = _invoke(root, ["bisect", "status", "--json"])
2936	d = json.loads(result.output.strip())
2937	assert d["active"] is False
2938
2939	# ── Security ────────────────────────────────────────────────────────────
2940
2941	def test_status_outside_repo_exits_2(self, tmp_path: pathlib.Path) -> None:
2942	empty = tmp_path / "not_a_repo"
2943	empty.mkdir()
2944	result = _invoke(empty, ["bisect", "status"])
2945	assert result.exit_code == 2
2946
2947	def test_status_json_is_compact(self, tmp_path: pathlib.Path) -> None:
2948	"""JSON output is compact single-line."""
2949	root, repo_id = _make_repo(tmp_path)
2950	ids = _build_chain(root, repo_id, 5)
2951	self._start(root, ids)
2952	result = _invoke(root, ["bisect", "status", "--json"])
2953	assert result.exit_code == 0
2954	json.loads(result.output)
2955
2956	def test_status_json_no_ansi(self, tmp_path: pathlib.Path) -> None:
2957	root, repo_id = _make_repo(tmp_path)
2958	ids = _build_chain(root, repo_id, 5)
2959	self._start(root, ids)
2960	result = _invoke(root, ["bisect", "status", "--json"])
2961	assert "\x1b" not in result.output
2962
2963	def test_status_text_no_session_message(self, tmp_path: pathlib.Path) -> None:
2964	root, _ = _make_repo(tmp_path)
2965	result = _invoke(root, ["bisect", "status"])
2966	assert "No bisect session" in result.output or "no bisect session" in result.output.lower()
2967
2968	def test_status_text_active_shows_remaining(self, tmp_path: pathlib.Path) -> None:
2969	root, repo_id = _make_repo(tmp_path)
2970	ids = _build_chain(root, repo_id, 6)
2971	self._start(root, ids)
2972	result = _invoke(root, ["bisect", "status"])
2973	assert "remaining" in result.output.lower()
2974
2975
2976	class TestBisectLogStructured:
2977	"""Tests verifying the new structured log entry schema."""
2978
2979	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
2980	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
2981	assert r.exit_code == 0
2982
2983	def test_log_entry_has_three_keys(self, tmp_path: pathlib.Path) -> None:
2984	root, repo_id = _make_repo(tmp_path)
2985	ids = _build_chain(root, repo_id, 4)
2986	self._start(root, ids)
2987	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
2988	assert len(entries) >= 2
2989	for e in entries:
2990	assert set(e.keys()) == {"commit_id", "verdict", "timestamp"}
2991
2992	def test_log_entry_verdict_values(self, tmp_path: pathlib.Path) -> None:
2993	root, repo_id = _make_repo(tmp_path)
2994	ids = _build_chain(root, repo_id, 4)
2995	self._start(root, ids)
2996	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
2997	verdicts = {e["verdict"] for e in entries}
2998	assert verdicts <= {"bad", "good", "skip"}
2999
3000	def test_log_entry_timestamp_is_iso8601(self, tmp_path: pathlib.Path) -> None:
3001	root, repo_id = _make_repo(tmp_path)
3002	ids = _build_chain(root, repo_id, 4)
3003	self._start(root, ids)
3004	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
3005	for e in entries:
3006	# ISO8601 timestamps contain 'T' separating date from time
3007	assert "T" in e["timestamp"] or e["timestamp"] == ""
3008
3009	def test_log_skip_entry_appears_after_skip(self, tmp_path: pathlib.Path) -> None:
3010	root, repo_id = _make_repo(tmp_path)
3011	ids = _build_chain(root, repo_id, 6)
3012	self._start(root, ids)
3013	_invoke(root, ["bisect", "skip", ids[2]])
3014	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
3015	verdicts = [e["verdict"] for e in entries]
3016	assert "skip" in verdicts
3017
3018	def test_log_entry_commit_ids_in_session_ids(self, tmp_path: pathlib.Path) -> None:
3019	root, repo_id = _make_repo(tmp_path)
3020	ids = _build_chain(root, repo_id, 4)
3021	self._start(root, ids)
3022	entries = _parse_log(_invoke(root, ["bisect", "log", "--json"]).output)["entries"]
3023	entry_ids = {e["commit_id"] for e in entries}
3024	# bad and good commit IDs from start should appear in log
3025	assert ids[-1] in entry_ids # bad
3026	assert ids[0] in entry_ids # good
3027
3028
3029	class TestBisectRunTimeout:
3030	"""Tests for ``--timeout`` on ``muse bisect run``."""
3031
3032	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
3033	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
3034	assert r.exit_code == 0
3035
3036	def test_timeout_flag_accepted(self, tmp_path: pathlib.Path) -> None:
3037	"""--timeout is a valid flag that doesn't crash the parser."""
3038	root, repo_id = _make_repo(tmp_path)
3039	ids = _build_chain(root, repo_id, 4)
3040	self._start(root, ids)
3041	result = _invoke(root, ["bisect", "run", "true", "--timeout", "30"])
3042	assert result.exit_code == 0
3043
3044	def test_timeout_fast_command_succeeds(self, tmp_path: pathlib.Path) -> None:
3045	"""A command that finishes well within the timeout is treated normally."""
3046	root, repo_id = _make_repo(tmp_path)
3047	ids = _build_chain(root, repo_id, 5)
3048	self._start(root, ids)
3049	result = _invoke(root, ["bisect", "run", "true", "--timeout", "10"])
3050	assert result.exit_code == 0
3051
3052	def test_timeout_triggers_skip(self, tmp_path: pathlib.Path) -> None:
3053	"""A command that exceeds --timeout is treated as skip (exit 125)."""
3054	from muse.core.bisect import run_bisect_command
3055	import tempfile
3056
3057	with tempfile.TemporaryDirectory() as td:
3058	root_path = pathlib.Path(td)
3059	# We test the core directly to avoid actually sleeping in a test.
3060	# Patch subprocess.run to raise TimeoutExpired.
3061	import unittest.mock as mock
3062	from muse.core.bisect import _SKIP_EXIT_CODE
3063	# Build a minimal state so _apply_verdict can run.
3064	import datetime
3065	from muse.core.ids import hash_commit, hash_snapshot
3066	from muse.core.commits import (
3067	CommitRecord,
3068	write_commit,
3069	)
3070	from muse.core.snapshots import (
3071	SnapshotRecord,
3072	write_snapshot,
3073	)
3074	from muse.core.bisect import start_bisect
3075
3076	repo_id = fake_id("repo")
3077	dot_muse = muse_dir(root_path)
3078	dot_muse.mkdir()
3079	(dot_muse / "repo.json").write_text(json.dumps({
3080	"repo_id": repo_id, "domain": "code",
3081	"default_branch": "main", "created_at": "2026-01-01T00:00:00+00:00",
3082	}))
3083	(dot_muse / "HEAD").write_text("ref: refs/heads/main")
3084	(dot_muse / "refs" / "heads").mkdir(parents=True)
3085	(dot_muse / "snapshots").mkdir()
3086	(dot_muse / "commits").mkdir()
3087	(dot_muse / "objects").mkdir()
3088
3089	ids: list[str] = []
3090	parent = None
3091	for i in range(4):
3092	manifest = {}
3093	snap_id = hash_snapshot(manifest)
3094	committed_at = datetime.datetime.now(datetime.timezone.utc)
3095	commit_id = hash_commit( parent_ids=[parent] if parent else [],
3096	snapshot_id=snap_id,
3097	message=f"c{i}",
3098	committed_at_iso=committed_at.isoformat(),
3099	)
3100	write_snapshot(root_path, SnapshotRecord(snapshot_id=snap_id, manifest={}, created_at=committed_at))
3101	write_commit(root_path, CommitRecord(
3102	commit_id=commit_id,
3103	parent_commit_id=parent, parent2_commit_id=None,
3104	snapshot_id=snap_id, branch="main", message=f"c{i}",
3105	committed_at=committed_at,
3106	))
3107	(dot_muse / "refs" / "heads" / "main").write_text(commit_id)
3108	ids.append(commit_id)
3109	parent = commit_id
3110
3111	start_bisect(root_path, ids[-1], [ids[0]])
3112
3113	import subprocess
3114	with mock.patch("subprocess.run", side_effect=subprocess.TimeoutExpired("cmd", 1)):
3115	result = run_bisect_command(root_path, "sleep 99", ids[2], timeout=1)
3116	assert result.verdict == "skip"
3117
3118	def test_timeout_short_alias(self, tmp_path: pathlib.Path) -> None:
3119	"""-t is the short alias for --timeout."""
3120	root, repo_id = _make_repo(tmp_path)
3121	ids = _build_chain(root, repo_id, 4)
3122	self._start(root, ids)
3123	result = _invoke(root, ["bisect", "run", "true", "-t", "10"])
3124	assert result.exit_code == 0
3125
3126
3127	class TestBisectRunStepSymbolChanges:
3128	"""Tests verifying symbol_changes is present in NDJSON step lines."""
3129
3130	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
3131	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
3132	assert r.exit_code == 0
3133
3134	def test_step_json_has_symbol_changes_key(self, tmp_path: pathlib.Path) -> None:
3135	root, repo_id = _make_repo(tmp_path)
3136	ids = _build_chain(root, repo_id, 6)
3137	self._start(root, ids)
3138	result = _invoke(root, ["bisect", "run", "true", "--json"])
3139	assert result.exit_code == 0
3140	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
3141	step_lines = [l for l in lines if '"step"' in l]
3142	if step_lines:
3143	step = json.loads(step_lines[0])
3144	assert "symbol_changes" in step
3145
3146	def test_step_symbol_changes_is_list(self, tmp_path: pathlib.Path) -> None:
3147	root, repo_id = _make_repo(tmp_path)
3148	ids = _build_chain(root, repo_id, 6)
3149	self._start(root, ids)
3150	result = _invoke(root, ["bisect", "run", "true", "--json"])
3151	assert result.exit_code == 0
3152	lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
3153	for line in lines:
3154	obj = json.loads(line)
3155	if "symbol_changes" in obj:
3156	assert isinstance(obj["symbol_changes"], list)
3157
3158	def test_step_ndjson_stays_compact(self, tmp_path: pathlib.Path) -> None:
3159	"""NDJSON step lines must be single-line (not pretty-printed)."""
3160	root, repo_id = _make_repo(tmp_path)
3161	ids = _build_chain(root, repo_id, 6)
3162	self._start(root, ids)
3163	result = _invoke(root, ["bisect", "run", "true", "--json"])
3164	assert result.exit_code == 0
3165	for line in result.output.strip().splitlines():
3166	line = line.strip()
3167	if not line:
3168	continue
3169	# Every non-empty line must be valid JSON on its own
3170	obj = json.loads(line)
3171	assert isinstance(obj, dict)
3172
3173
3174	class TestBisectJsonCompact:
3175	"""Tests verifying compact single-line JSON on single-object subcommands."""
3176
3177	def _start(self, root: pathlib.Path, ids: list[str]) -> None:
3178	r = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]])
3179	assert r.exit_code == 0
3180
3181	def test_start_json_is_compact(self, tmp_path: pathlib.Path) -> None:
3182	root, repo_id = _make_repo(tmp_path)
3183	ids = _build_chain(root, repo_id, 4)
3184	result = _invoke(root, ["bisect", "start", "--bad", ids[-1], "--good", ids[0], "--json"])
3185	assert result.exit_code == 0
3186	json.loads(result.output)
3187
3188	def test_bad_json_is_compact(self, tmp_path: pathlib.Path) -> None:
3189	root, repo_id = _make_repo(tmp_path)
3190	ids = _build_chain(root, repo_id, 5)
3191	self._start(root, ids)
3192	result = _invoke(root, ["bisect", "bad", ids[-1], "--json"])
3193	assert result.exit_code == 0
3194	json.loads(result.output)
3195
3196	def test_log_json_is_compact(self, tmp_path: pathlib.Path) -> None:
3197	root, repo_id = _make_repo(tmp_path)
3198	ids = _build_chain(root, repo_id, 4)
3199	self._start(root, ids)
3200	result = _invoke(root, ["bisect", "log", "--json"])
3201	assert result.exit_code == 0
3202	json.loads(result.output)
3203
3204	def test_reset_json_is_compact(self, tmp_path: pathlib.Path) -> None:
3205	root, repo_id = _make_repo(tmp_path)
3206	result = _invoke(root, ["bisect", "reset", "--json"])
3207	assert result.exit_code == 0
3208	json.loads(result.output)
3209
3210	def test_run_json_ndjson_lines_are_compact(self, tmp_path: pathlib.Path) -> None:
3211	"""run --json emits NDJSON: each line is a compact single-line JSON object."""
3212	root, repo_id = _make_repo(tmp_path)
3213	ids = _build_chain(root, repo_id, 4)
3214	self._start(root, ids)
3215	result = _invoke(root, ["bisect", "run", "true", "--json"])
3216	assert result.exit_code == 0
3217	for line in result.output.strip().splitlines():
3218	line = line.strip()
3219	if not line:
3220	continue
3221	# Single-line JSON: no embedded newlines, parseable as-is
3222	obj = json.loads(line)
3223	assert isinstance(obj, dict)
3224
3225
3226	# ---------------------------------------------------------------------------
3227	# Flag registration tests
3228	# ---------------------------------------------------------------------------
3229
3230	import argparse as _argparse
3231	from muse.cli.commands.bisect import register as _register_bisect
3232	from muse.core.paths import head_path, muse_dir, ref_path
3233
3234
3235	def _parse_bisect(*args: str) -> _argparse.Namespace:
3236	"""Build an argument parser via register() and parse args."""
3237	root_p = _argparse.ArgumentParser()
3238	subs = root_p.add_subparsers(dest="cmd")
3239	_register_bisect(subs)
3240	return root_p.parse_args(["bisect", *args])
3241
3242
3243	class TestRegisterFlags:
3244	# ── bad ─────────────────────────────────────────────────────────────────
3245	def test_bad_default_json_out_is_false(self) -> None:
3246	ns = _parse_bisect("bad")
3247	assert ns.json_out is False
3248
3249	def test_bad_json_flag_sets_json_out(self) -> None:
3250	ns = _parse_bisect("bad", "--json")
3251	assert ns.json_out is True
3252
3253	def test_bad_j_shorthand_sets_json_out(self) -> None:
3254	ns = _parse_bisect("bad", "-j")
3255	assert ns.json_out is True
3256
3257	# ── good ────────────────────────────────────────────────────────────────
3258	def test_good_default_json_out_is_false(self) -> None:
3259	ns = _parse_bisect("good")
3260	assert ns.json_out is False
3261
3262	def test_good_json_flag_sets_json_out(self) -> None:
3263	ns = _parse_bisect("good", "--json")
3264	assert ns.json_out is True
3265
3266	def test_good_j_shorthand_sets_json_out(self) -> None:
3267	ns = _parse_bisect("good", "-j")
3268	assert ns.json_out is True
3269
3270	# ── log ─────────────────────────────────────────────────────────────────
3271	def test_log_default_json_out_is_false(self) -> None:
3272	ns = _parse_bisect("log")
3273	assert ns.json_out is False
3274
3275	def test_log_j_shorthand_sets_json_out(self) -> None:
3276	ns = _parse_bisect("log", "-j")
3277	assert ns.json_out is True
3278
3279	# ── reset ────────────────────────────────────────────────────────────────
3280	def test_reset_default_json_out_is_false(self) -> None:
3281	ns = _parse_bisect("reset")
3282	assert ns.json_out is False
3283
3284	def test_reset_j_shorthand_sets_json_out(self) -> None:
3285	ns = _parse_bisect("reset", "-j")
3286	assert ns.json_out is True
3287
3288	# ── run ─────────────────────────────────────────────────────────────────
3289	def test_run_default_json_out_is_false(self) -> None:
3290	ns = _parse_bisect("run", "pytest -x")
3291	assert ns.json_out is False
3292
3293	def test_run_j_shorthand_sets_json_out(self) -> None:
3294	ns = _parse_bisect("run", "pytest -x", "-j")
3295	assert ns.json_out is True
3296
3297	# ── skip ─────────────────────────────────────────────────────────────────
3298	def test_skip_default_json_out_is_false(self) -> None:
3299	ns = _parse_bisect("skip")
3300	assert ns.json_out is False
3301
3302	def test_skip_j_shorthand_sets_json_out(self) -> None:
3303	ns = _parse_bisect("skip", "-j")
3304	assert ns.json_out is True
3305
3306	# ── start ────────────────────────────────────────────────────────────────
3307	def test_start_default_json_out_is_false(self) -> None:
3308	ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0")
3309	assert ns.json_out is False
3310
3311	def test_start_j_shorthand_sets_json_out(self) -> None:
3312	ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0", "-j")
3313	assert ns.json_out is True
3314
3315	def test_start_bad_flag(self) -> None:
3316	ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0")
3317	assert ns.bad == "HEAD"
3318
3319	def test_start_good_flag(self) -> None:
3320	ns = _parse_bisect("start", "--bad", "HEAD", "--good", "v1.0.0")
3321	assert ns.good == ["v1.0.0"]
3322
3323	# ── status ───────────────────────────────────────────────────────────────
3324	def test_status_default_json_out_is_false(self) -> None:
3325	ns = _parse_bisect("status")
3326	assert ns.json_out is False
3327
3328	def test_status_j_shorthand_sets_json_out(self) -> None:
3329	ns = _parse_bisect("status", "-j")
3330	assert ns.json_out is True

File History 4 commits

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago

sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago

sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago

sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ 28 days ago

function _make_repo

function _make_commit

function _build_chain

function _invoke

function _json_blob

class _StepJson

class _LogEntryJson

class _LogJson

class _ResetJson

class _RunStepJson

class _RunDoneJson

function _repo

function _parse_step

function _parse_log

function _parse_reset

class TestTomlEscape

function test_plain_string_unchanged

function test_double_quote_escaped

function test_backslash_escaped

function test_both_escaped

class TestLoadStateSecurity

function test_symlink_state_file_rejected

function test_oversized_state_file_rejected

function test_corrupt_state_returns_none

function test_missing_state_returns_none

class TestSaveStateTomlInjection

function test_branch_with_quote_survives_roundtrip

function test_branch_with_backslash_survives_roundtrip

function test_symbol_filter_injection_survives_roundtrip

class TestGetBisectNext

function test_no_session_returns_none

function test_returns_next_after_start

function test_returns_symbol_filter

class TestErrorRouting

function test_bad_without_session_goes_to_stderr

function test_good_without_session_goes_to_stderr

function test_skip_without_session_goes_to_stderr

function test_run_without_session_goes_to_stderr

function test_symbol_without_double_colon_goes_to_stderr

function test_symbol_too_long_goes_to_stderr

function test_double_start_goes_to_stderr

class TestAnsiSanitization

function test_ansi_in_ref_does_not_leak

function test_ansi_in_symbol_does_not_leak

class TestJsonSchemaStart

function test_start_json_schema

function test_start_json_done_when_no_remaining

function test_start_json_symbol_changes_list

class TestJsonSchemaBadGoodSkip

function _start

function test_bad_json_schema

function test_good_json_schema

function test_skip_json_schema

class TestJsonSchemaLog

function test_log_json_no_session

function test_log_json_after_start

function test_log_json_entries_are_dicts

class TestJsonSchemaReset

function test_reset_json_no_session

function test_reset_json_with_session

function test_reset_clears_active_flag

class TestJsonSchemaRun

function test_run_json_ndjson_format

function test_run_json_done_has_first_bad

function test_run_json_steps_taken_increments

class TestIntegrationJson

function test_start_bad_good_converge

function test_good_narrows_range

function test_log_grows_with_verdicts

function test_skip_excluded_from_remaining

class TestE2EText

function test_start_text_output_no_json

function test_bad_text_output

function test_log_text_shows_entries

function test_reset_text_output

function test_run_text_output_converges

function test_no_good_flag_fails_clearly

function test_log_empty_when_no_session

class TestSymbolScopedBisect

function test_symbol_filter_no_matching_commits_warns

function test_symbol_filter_json_schema_preserved

function test_symbol_filter_state_persisted

class TestStress

function test_200_commit_chain_converges

function test_concurrent_log_reads_are_safe

function _read_log

function test_50_step_manual_bisect_json

class TestBisectStartExtended

function test_start_exits_0

function test_start_j_alias_works

function test_start_json_verdict_is_started

function test_start_json_done_false_with_remaining

function test_start_json_done_true_when_adjacent

function test_start_json_remaining_count_positive

function test_start_json_steps_remaining_positive

function test_start_json_all_seven_keys

function test_start_multiple_good_refs

function test_start_no_good_exits_1

function test_start_no_good_error_to_stderr

function test_start_double_start_exits_1

function test_start_outside_repo_exits_2

function test_start_bad_defaults_to_head

function test_start_text_mentions_session_started

function test_start_text_no_json_object

function test_start_help_description_present

function test_start_invalid_ref_exits_1

class TestBisectStartSecurity

function test_start_symbol_changes_no_ansi_in_json

function test_start_symbol_changes_no_ansi_in_text

function test_start_symbol_missing_separator_exits_1

function test_start_symbol_too_long_exits_1

function test_start_json_is_valid_json

function test_start_json_bool_fields_are_bool

class TestBisectStartStress

function test_start_100_commit_chain

function test_start_performance_100_commits

function test_start_midpoint_is_within_range

class TestBisectBadExtended

function _start

function test_bad_exits_0

function test_bad_j_alias_works

function test_bad_json_verdict_is_bad

function test_bad_json_all_seven_keys

function test_bad_reduces_remaining

function test_bad_done_true_when_isolated

function test_bad_first_bad_set_when_done

function test_bad_defaults_to_head

function test_bad_no_session_exits_1

function test_bad_no_session_error_to_stderr

function test_bad_outside_repo_exits_2

function test_bad_invalid_ref_exits_1

function test_bad_text_mentions_commit

function test_bad_text_no_json_object

function test_bad_help_description_present

function test_bad_advances_bisect_log

function test_bad_remaining_count_not_negative

function test_bad_symbol_changes_is_list

class TestBisectBadSecurity

function _start

function test_bad_json_is_valid_json

function test_bad_json_done_is_bool

function test_bad_symbol_changes_sanitized_in_json

function test_bad_symbol_changes_sanitized_in_text

function test_bad_error_output_to_stderr_not_stdout

function test_bad_ansi_in_ref_does_not_leak_to_output

class TestBisectBadStress

function _start

function test_bad_on_100_commit_chain

function test_bad_performance_100_commits

function test_bad_converges_full_session

class TestBisectGoodExtended

function _start

function test_good_exits_0

function test_good_j_alias_works

function test_good_json_verdict_is_good

function test_good_json_all_seven_keys

function test_good_reduces_remaining

function test_good_done_true_when_isolated

function test_good_first_bad_set_when_done

function test_good_defaults_to_head

function test_good_no_session_exits_1

function test_good_no_session_error_message

function test_good_outside_repo_exits_2

function test_good_invalid_ref_exits_1

function test_good_text_mentions_commit

function test_good_text_no_json_object

function test_good_help_description_present

function test_good_advances_bisect_log

function test_good_remaining_count_not_negative

function test_good_symbol_changes_is_list

class TestBisectGoodSecurity

function _start

function test_good_json_is_valid_json

function test_good_json_done_is_bool

function test_good_symbol_changes_sanitized_in_json

function test_good_symbol_changes_sanitized_in_text

function test_good_error_no_json_on_failure

function test_good_ansi_in_ref_does_not_leak

class TestBisectGoodStress

function _start

function test_good_on_100_commit_chain

function test_good_performance_100_commits

function test_good_converges_full_session

class TestBisectSkipExtended

function _start

function test_skip_exits_0

function test_skip_j_alias_works

function test_skip_json_verdict_is_skip

function test_skip_json_all_seven_keys

function test_skip_removes_commit_from_remaining

function test_skip_persisted_in_state

function test_skip_defaults_to_head

function test_skip_no_session_exits_1

function test_skip_no_session_error_message

function test_skip_outside_repo_exits_2

function test_skip_invalid_ref_exits_1

function test_skip_text_mentions_commit

function test_skip_text_no_json_object

function test_skip_help_description_present

function test_skip_advances_log

function test_skip_remaining_count_not_negative

function test_skip_symbol_changes_is_list

function test_skip_multiple_commits

class TestBisectSkipSecurity

function _start

function test_skip_json_is_valid_json

function test_skip_json_done_is_bool

function test_skip_symbol_changes_sanitized_in_json

function test_skip_symbol_changes_sanitized_in_text

function test_skip_error_no_json_on_failure

function test_skip_ansi_in_ref_does_not_leak

class TestBisectSkipStress

function _start

function test_skip_on_100_commit_chain

function test_skip_performance_100_commits

function test_skip_reduces_remaining_monotonically

class TestBisectRunExtended

function _start

function test_run_exits_0_with_true

function test_run_j_alias_works

function test_run_json_ndjson_step_keys

function test_run_json_done_line_keys

function test_run_json_done_true_on_last_line

function test_run_json_steps_taken_positive

function test_run_json_verdict_good_with_true

function test_run_json_verdict_bad_with_false

function test_run_no_session_exits_1

function test_run_no_session_error_message

function test_run_outside_repo_exits_2

function test_run_text_mentions_testing

function test_run_text_mentions_first_bad

function test_run_help_description_present

function test_run_json_step_numbers_increment

function test_run_json_remaining_nonincreasing

function test_run_text_no_json_by_default

function test_run_json_first_bad_set_on_done

class TestBisectRunSecurity

function _start

function test_run_json_lines_are_valid_json

function test_run_json_done_field_is_bool

function test_run_text_symbol_changes_sanitized

function test_run_error_no_json_on_failure

function test_run_json_no_ansi_in_output

function test_run_text_no_ansi_in_output

class TestBisectRunStress

function _start

function test_run_50_commit_chain

function test_run_performance_20_commits

function test_run_steps_taken_within_log2

class TestBisectLogExtended

function _start

function test_log_exits_0_no_session

function test_log_exits_0_with_session

function test_log_j_alias_works

function test_log_json_active_false_no_session

function test_log_json_active_true_with_session

function test_log_json_entries_empty_no_session

function test_log_json_entries_grow_with_verdicts

function test_log_json_two_keys

function test_log_json_start_records_bad_and_good

function test_log_json_entries_contain_commit_ids

function test_log_json_entries_are_dicts

function test_log_active_false_after_reset

function test_log_text_shows_bisect_log_header

function test_log_text_no_session_message

function test_log_outside_repo_exits_2

function test_log_help_description_present

function test_log_text_no_json_object

class TestBisectLogSecurity

function _start

function test_log_json_is_valid_json

function test_log_json_active_is_bool

function test_log_json_entries_sanitized

function test_log_text_entries_sanitized

function test_log_json_no_ansi_in_output

function test_log_text_no_ansi_in_output

class TestBisectLogStress

function _start

function test_log_100_commit_session

function test_log_performance_large_session

function test_log_concurrent_reads_consistent

function _run

class TestBisectResetExtended

function _start

function test_reset_exits_0_with_session

function test_reset_exits_0_no_session

function test_reset_j_alias_works

function test_reset_json_reset_true

function test_reset_json_single_key

function test_reset_clears_active_session

function test_reset_prevents_bad_after_reset

function test_reset_prevents_good_after_reset

function test_reset_prevents_skip_after_reset

function test_reset_idempotent_double_reset

function test_reset_allows_new_session_after

function test_reset_clears_log_entries

function test_reset_text_output_mentions_reset

function test_reset_text_no_json_object

function test_reset_outside_repo_exits_2

function test_reset_help_description_present

function test_reset_json_reset_is_bool

function test_reset_mid_session_with_verdicts

class TestBisectResetSecurity

function test_reset_json_is_valid_json

function test_reset_json_no_ansi

function test_reset_text_no_ansi

function test_reset_state_file_removed

function test_reset_no_session_state_file_absent

function test_reset_json_reset_value_true

class TestBisectResetStress

function test_reset_after_100_commit_session

function test_reset_performance

function test_reset_cycle_10_times

class TestBisectStatus

function _start

function test_status_no_session_exits_0

function test_status_no_session_json_active_false

function test_status_no_session_json_only_active_key

function test_status_active_session_exits_0

function test_status_active_json_schema

function test_status_active_remaining_count_positive

function test_status_bad_id_matches_session

function test_status_skipped_count_increments

function test_status_active_false_after_reset

function test_status_outside_repo_exits_2

function test_status_json_is_compact

function test_status_json_no_ansi

function test_status_text_no_session_message

function test_status_text_active_shows_remaining

class TestBisectLogStructured

function _start

function test_log_entry_has_three_keys

function test_log_entry_verdict_values

function test_log_entry_timestamp_is_iso8601

function test_log_skip_entry_appears_after_skip

function test_log_entry_commit_ids_in_session_ids

class TestBisectRunTimeout

function _start

function test_timeout_flag_accepted

function test_timeout_fast_command_succeeds

function test_timeout_triggers_skip

function test_timeout_short_alias

class TestBisectRunStepSymbolChanges

function _start

function test_step_json_has_symbol_changes_key

function test_step_symbol_changes_is_list

function test_step_ndjson_stays_compact

class TestBisectJsonCompact

function _start

function test_start_json_is_compact

function test_bad_json_is_compact

function test_log_json_is_compact

function test_reset_json_is_compact

function test_run_json_ndjson_lines_are_compact

function _parse_bisect

class TestRegisterFlags

function test_bad_default_json_out_is_false

function test_bad_json_flag_sets_json_out

function test_bad_j_shorthand_sets_json_out

function test_good_default_json_out_is_false

function test_good_json_flag_sets_json_out

function test_good_j_shorthand_sets_json_out

function test_log_default_json_out_is_false

function test_log_j_shorthand_sets_json_out

function test_reset_default_json_out_is_false

function test_reset_j_shorthand_sets_json_out

function test_run_default_json_out_is_false

function test_run_j_shorthand_sets_json_out

function test_skip_default_json_out_is_false

function test_skip_j_shorthand_sets_json_out

function test_start_default_json_out_is_false

function test_start_j_shorthand_sets_json_out

function test_start_bad_flag

function test_start_good_flag

function test_status_default_json_out_is_false

function test_status_j_shorthand_sets_json_out

Pathtests/test_cmd_bisect_hardening.py

Lines3,330

Size141.5 KB

LangPython

Refsha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

Object ID

sha256:4e55b02c1f61a31e2553267b05120ea63b12ce750c7087fe61bd011774df95d9…

Last commit

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

fix: remove commit_exists filter from have anchor…

20 days ago

Quick links

Blame History