"""Comprehensive tests for ``muse code code-query``. Review findings addressed -------------------------- Bug fixes * ``walk_history`` used ``ref_file.read_text()`` directly instead of ``get_head_commit_id`` — now correctly delegates to the store. * The redundant double-check ``op_rec.get("op") == "patch" and op_rec["op"] == "patch"`` removed; replaced with ``_is_patch_op`` TypeGuard. * Dead ``if field_val is not None`` check (``field_val`` is always a ``str``) removed. * Dead ``_current_branch`` wrapper removed from CLI; uses ``read_current_branch`` directly. * Double-pass evaluator fallback for commit-level fields replaced with a single clean pass using an ``or_matched`` flag. * Redundant ``list(matches)`` call in JSON output removed. New capabilities * ``endswith`` operator added to DSL and evaluator. * ``--since DATE`` / ``--until DATE`` time-range filters. * ``--limit N`` result cap (independent of ``--max`` walk depth). * ``--count`` flag: prints only the match count. * ``load_manifest=False`` in ``walk_history``: skips snapshot I/O for code queries. * ``walk_history`` now uses ``get_head_commit_id`` instead of reading ref file directly. Test categories --------------- P Parser — all operators, fields, quoted/unquoted, error paths. E Evaluator (unit) — match/no-match for all operators and field types. W walk_history integration — load_manifest optimisation, since/until, max_commits, empty branch, multi-commit ordering. C CLI E2E — --count, --limit, --since, --until, --json, bad input. S Stress — 300-commit walk, large OR expression, no-manifest I/O path. """ from __future__ import annotations import argparse import datetime import json import pathlib from collections.abc import Generator from unittest.mock import MagicMock, patch import pytest from muse.core.query_engine import QueryMatch, format_matches, walk_history from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.domain import DeleteOp, DomainOp, InsertOp, PatchOp, ReplaceOp, SemVerBump, StructuredDelta from muse.core.types import Manifest, NULL_COMMIT_ID, NULL_LONG_ID, fake_id from muse.plugins.code._code_query import ( AndExpr, Comparison, OrExpr, _match_op, _parse_query, build_evaluator, ) from muse.core.paths import commits_dir, head_path, heads_dir, muse_dir from tests.cli_test_helper import CliRunner runner = CliRunner() cli = None # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _env(root: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(root)} def _run(root: pathlib.Path, *args: str) -> tuple[int, str]: result = runner.invoke(cli, list(args), env=_env(root), catch_exceptions=False) return result.exit_code, result.output def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]: result = runner.invoke(cli, list(args), env=_env(root)) return result.exit_code, result.output def _now() -> datetime.datetime: return datetime.datetime.now(datetime.timezone.utc) def _dt(year: int = 2026, month: int = 3, day: int = 1) -> datetime.datetime: return datetime.datetime(year, month, day, tzinfo=datetime.timezone.utc) def _insert_delta(*symbols: str, file: str = "src/foo.py") -> StructuredDelta: ops: list[DomainOp] = [ InsertOp( op="insert", address=f"{file}::{sym}", position=None, content_id=fake_id(sym), content_summary=f"added {sym}", ) for sym in symbols ] return StructuredDelta(domain="code", ops=ops, summary=f"{len(ops)} symbol(s) added") def _delete_delta(symbol: str, file: str = "src/foo.py") -> StructuredDelta: op = DeleteOp( op="delete", address=f"{file}::{symbol}", content_id=fake_id(symbol), position=None, content_summary=f"deleted {symbol}", ) return StructuredDelta(domain="code", ops=[op], summary="1 symbol deleted") def _make_commit( root: pathlib.Path, branch: str = "main", parent: str | None = None, delta: StructuredDelta | None = None, author: str = "alice", agent_id: str = "", model_id: str = "", sem_ver_bump: SemVerBump = "none", committed_at: datetime.datetime | None = None, message: str = "test commit", ) -> CommitRecord: """Write a CommitRecord with a content-addressed ID to *root* and return it.""" snap_id = hash_snapshot({}) committed_at_val = committed_at or _now() parent_ids = [parent] if parent else [] commit_id = hash_commit( parent_ids=parent_ids, snapshot_id=snap_id, message=message, committed_at_iso=committed_at_val.isoformat(), author=author, ) rec = CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message=message, committed_at=committed_at_val, parent_commit_id=parent, author=author, agent_id=agent_id, model_id=model_id, sem_ver_bump=sem_ver_bump, structured_delta=delta, ) write_commit(root, rec) return rec def _setup_branch( root: pathlib.Path, branch: str = "main", commits: list[CommitRecord] | None = None, ) -> None: """Wire up HEAD and branch ref so walk_history can find the commits.""" muse_dir(root).mkdir(exist_ok=True) (head_path(root)).write_text(branch) refs_dir = heads_dir(root) refs_dir.mkdir(parents=True, exist_ok=True) if commits: (refs_dir / branch).write_text(commits[-1].commit_id) @pytest.fixture() def store_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: """Minimal repo layout: .muse/ directories, no branch yet.""" (commits_dir(tmp_path)).mkdir(parents=True) (heads_dir(tmp_path)).mkdir(parents=True) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) return tmp_path @pytest.fixture() def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: """Full muse-init repo for E2E CLI tests.""" monkeypatch.chdir(tmp_path) r = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path)) assert r.exit_code == 0, r.output return tmp_path # --------------------------------------------------------------------------- # P — Parser tests # --------------------------------------------------------------------------- class TestParser: """Tokenizer and parser unit tests.""" def test_endswith_operator_parsed(self) -> None: q = _parse_query("symbol endswith _handler") cmp = q.clauses[0].clauses[0] assert cmp.op == "endswith" assert cmp.value == "_handler" def test_all_operators_accepted(self) -> None: for op in ("==", "!=", "contains", "startswith", "endswith"): q = _parse_query(f"author {op} alice") assert q.clauses[0].clauses[0].op == op def test_all_valid_fields_accepted(self) -> None: fields = [ "symbol", "file", "language", "kind", "change", "author", "agent_id", "model_id", "toolchain_id", "sem_ver_bump", "branch", ] for f in fields: q = _parse_query(f"{f} == test") assert q.clauses[0].clauses[0].field == f def test_complex_and_or_query(self) -> None: q = _parse_query("author == 'alice' and change == 'added' or author == 'bob'") assert isinstance(q, OrExpr) assert len(q.clauses) == 2 assert len(q.clauses[0].clauses) == 2 assert len(q.clauses[1].clauses) == 1 def test_single_quoted_value(self) -> None: q = _parse_query("agent_id == 'claude-4'") assert q.clauses[0].clauses[0].value == "claude-4" def test_double_quoted_value(self) -> None: q = _parse_query('model_id == "claude-opus-4"') assert q.clauses[0].clauses[0].value == "claude-opus-4" def test_unquoted_word_value(self) -> None: q = _parse_query("branch == dev") assert q.clauses[0].clauses[0].value == "dev" def test_unknown_field_raises(self) -> None: with pytest.raises(ValueError, match="Unknown field"): _parse_query("nonexistent == 'x'") def test_unknown_operator_raises(self) -> None: with pytest.raises(ValueError, match="Unknown operator"): _parse_query("author like alice") def test_multiple_and_clauses(self) -> None: q = _parse_query("author == 'alice' and change == 'added' and kind == 'function'") assert len(q.clauses[0].clauses) == 3 def test_multiple_or_clauses(self) -> None: q = _parse_query("author == 'a' or author == 'b' or author == 'c'") assert len(q.clauses) == 3 def test_endswith_in_and_chain(self) -> None: q = _parse_query("file endswith .py and symbol endswith _test") clauses = q.clauses[0].clauses assert clauses[0].op == "endswith" assert clauses[1].op == "endswith" def test_sem_ver_bump_values_accepted(self) -> None: for val in ("none", "patch", "minor", "major"): q = _parse_query(f"sem_ver_bump == {val}") assert q.clauses[0].clauses[0].value == val # --------------------------------------------------------------------------- # E — Evaluator unit tests # --------------------------------------------------------------------------- def _bare_commit( author: str = "alice", agent_id: str = "", model_id: str = "", branch: str = "main", sem_ver_bump: SemVerBump = "none", delta: StructuredDelta | None = None, message: str = "test", ) -> CommitRecord: return CommitRecord( commit_id=fake_id(f"{author}|{agent_id}|{branch}"), branch=branch, snapshot_id="s" * 64, message=message, committed_at=_now(), author=author, agent_id=agent_id, model_id=model_id, sem_ver_bump=sem_ver_bump, structured_delta=delta, ) class TestMatchOp: """Unit tests for the _match_op primitive.""" def test_eq_match(self) -> None: assert _match_op("alice", "==", "alice") is True def test_eq_no_match(self) -> None: assert _match_op("alice", "==", "bob") is False def test_neq_match(self) -> None: assert _match_op("alice", "!=", "bob") is True def test_neq_no_match(self) -> None: assert _match_op("alice", "!=", "alice") is False def test_contains_case_insensitive(self) -> None: assert _match_op("ClaudeBot", "contains", "claude") is True def test_startswith_case_insensitive(self) -> None: assert _match_op("Claude-opus", "startswith", "claude") is True def test_endswith_match(self) -> None: assert _match_op("my_handler", "endswith", "_handler") is True def test_endswith_no_match(self) -> None: assert _match_op("my_handler", "endswith", "_service") is False def test_endswith_case_insensitive(self) -> None: assert _match_op("MyHandler", "endswith", "handler") is True def test_endswith_empty_suffix(self) -> None: assert _match_op("anything", "endswith", "") is True class TestBuildEvaluator: """Evaluator closure tests.""" def test_author_eq_match(self) -> None: ev = build_evaluator("author == 'alice'") results = ev(_bare_commit(author="alice"), {}, pathlib.Path(".")) assert len(results) == 1 def test_author_eq_no_match(self) -> None: ev = build_evaluator("author == 'bob'") results = ev(_bare_commit(author="alice"), {}, pathlib.Path(".")) assert results == [] def test_author_contains(self) -> None: ev = build_evaluator("author contains li") results = ev(_bare_commit(author="alice"), {}, pathlib.Path(".")) assert len(results) == 1 def test_agent_id_contains(self) -> None: ev = build_evaluator("agent_id contains claude") results = ev(_bare_commit(agent_id="claude-4.6"), {}, pathlib.Path(".")) assert len(results) == 1 def test_model_id_startswith(self) -> None: ev = build_evaluator("model_id startswith claude") results = ev(_bare_commit(model_id="claude-opus-4"), {}, pathlib.Path(".")) assert len(results) == 1 def test_branch_match(self) -> None: ev = build_evaluator("branch == dev") results = ev(_bare_commit(branch="dev"), {}, pathlib.Path(".")) assert len(results) == 1 def test_sem_ver_bump_major(self) -> None: ev = build_evaluator("sem_ver_bump == major") results = ev(_bare_commit(sem_ver_bump="major"), {}, pathlib.Path(".")) assert len(results) == 1 def test_and_both_must_match(self) -> None: ev = build_evaluator("author == 'alice' and agent_id == 'bot'") commit = _bare_commit(author="alice", agent_id="human") assert ev(commit, {}, pathlib.Path(".")) == [] def test_and_all_match(self) -> None: ev = build_evaluator("author == 'alice' and agent_id == 'bot'") commit = _bare_commit(author="alice", agent_id="bot") assert len(ev(commit, {}, pathlib.Path("."))) == 1 def test_or_first_clause_matches(self) -> None: ev = build_evaluator("author == 'alice' or author == 'bob'") assert len(ev(_bare_commit(author="alice"), {}, pathlib.Path("."))) >= 1 def test_or_second_clause_matches(self) -> None: ev = build_evaluator("author == 'alice' or author == 'bob'") assert len(ev(_bare_commit(author="bob"), {}, pathlib.Path("."))) >= 1 def test_or_neither_clause_matches(self) -> None: ev = build_evaluator("author == 'alice' or author == 'bob'") assert ev(_bare_commit(author="carol"), {}, pathlib.Path(".")) == [] def test_symbol_eq_from_delta(self) -> None: delta = _insert_delta("my_func") ev = build_evaluator("symbol == 'my_func'") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 assert any("my_func" in r.get("detail", "") for r in results) def test_symbol_endswith(self) -> None: delta = _insert_delta("my_handler", "other_service") ev = build_evaluator("symbol endswith _handler") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 assert all("_handler" in r.get("detail", "").lower() for r in results) def test_symbol_endswith_no_match(self) -> None: delta = _insert_delta("my_service") ev = build_evaluator("symbol endswith _handler") assert ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) == [] def test_change_added(self) -> None: delta = _insert_delta("func_a") ev = build_evaluator("change == added") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_change_removed(self) -> None: delta = _delete_delta("old_func") ev = build_evaluator("change == removed") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_change_no_delta(self) -> None: ev = build_evaluator("change == added") assert ev(_bare_commit(delta=None), {}, pathlib.Path(".")) == [] def test_file_eq_match(self) -> None: delta = _insert_delta("func", file="src/core.py") ev = build_evaluator("file == 'src/core.py'") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_file_contains(self) -> None: delta = _insert_delta("func", file="muse/core/store.py") ev = build_evaluator("file contains core") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_file_endswith_extension(self) -> None: delta = _insert_delta("func", file="muse/core/store.py") ev = build_evaluator("file endswith .py") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_language_python(self) -> None: delta = _insert_delta("func", file="muse/core/store.py") ev = build_evaluator("language == Python") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_symbol_cap_at_20(self) -> None: """Per-commit symbol match cap is 20.""" symbols = [f"func_{i}" for i in range(30)] delta = _insert_delta(*symbols) ev = build_evaluator("change == added") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) == 20 def test_commit_level_match_detail_is_message(self) -> None: """Commit-level match uses the commit message as detail.""" ev = build_evaluator("author == 'alice'") commit = _bare_commit(author="alice", message="Fix the auth bug") results = ev(commit, {}, pathlib.Path(".")) assert len(results) == 1 assert "Fix the auth bug" in results[0]["detail"] def test_mixed_or_commit_level_clause_first_matches(self) -> None: """OR with commit-level first clause: matching commit gets a result even without delta.""" ev = build_evaluator("author == 'alice' or change == 'added'") commit = _bare_commit(author="alice", delta=None) results = ev(commit, {}, pathlib.Path(".")) # alice's author clause matched; no delta → commit-level QueryMatch assert len(results) == 1 def test_mixed_or_symbol_clause_second_matches(self) -> None: """OR with symbol-level second clause: delta provides symbol details.""" delta = _insert_delta("my_func") ev = build_evaluator("author == 'nobody' or change == 'added'") commit = _bare_commit(author="alice", delta=delta) results = ev(commit, {}, pathlib.Path(".")) assert len(results) >= 1 assert any("added" in r.get("detail", "") for r in results) def test_patch_op_child_ops_traversed(self) -> None: """PatchOp.child_ops should be evaluated for symbol matches.""" child: InsertOp = InsertOp( op="insert", address="src/module.py::child_func", position=None, content_id="c" * 64, content_summary="child added", ) patch_op: PatchOp = PatchOp( op="patch", address="src/module.py", child_ops=[child], child_domain="code", child_summary="", ) delta: StructuredDelta = StructuredDelta( domain="code", ops=[patch_op], summary="patched module" ) ev = build_evaluator("symbol == 'child_func'") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_agent_id_in_result(self) -> None: """agent_id appears in the QueryMatch when set.""" ev = build_evaluator("author == 'alice'") commit = _bare_commit(author="alice", agent_id="claude-4.6") results = ev(commit, {}, pathlib.Path(".")) assert results[0].get("agent_id") == "claude-4.6" def test_agent_id_absent_when_empty(self) -> None: """agent_id key is absent from QueryMatch when commit has no agent.""" ev = build_evaluator("author == 'alice'") commit = _bare_commit(author="alice", agent_id="") results = ev(commit, {}, pathlib.Path(".")) assert "agent_id" not in results[0] def test_extra_dict_in_symbol_match(self) -> None: """Symbol-level matches carry an 'extra' dict with file/symbol/change.""" delta = _insert_delta("my_func", file="src/core.py") ev = build_evaluator("change == added") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) extra = results[0].get("extra", {}) assert extra.get("file") == "src/core.py" assert extra.get("symbol") == "my_func" assert extra.get("change") == "added" # --------------------------------------------------------------------------- # W — walk_history integration tests # --------------------------------------------------------------------------- class TestWalkHistory: """Integration tests that write real commit records and call walk_history.""" def test_single_commit_match(self, store_root: pathlib.Path) -> None: c = _make_commit(store_root, author="alice") _setup_branch(store_root, commits=[c]) ev = build_evaluator("author == alice") results = walk_history(store_root, "main", ev, load_manifest=False) assert len(results) == 1 def test_single_commit_no_match(self, store_root: pathlib.Path) -> None: c = _make_commit(store_root, author="alice", message="no match commit") _setup_branch(store_root, commits=[c]) ev = build_evaluator("author == bob") results = walk_history(store_root, "main", ev, load_manifest=False) assert results == [] def test_multi_commit_chained(self, store_root: pathlib.Path) -> None: """Three-commit chain: all should be walked.""" c1 = _make_commit(store_root, author="alice", message="first") c2 = _make_commit(store_root, author="alice", message="second", parent=c1.commit_id) c3 = _make_commit(store_root, author="alice", message="third", parent=c2.commit_id) _setup_branch(store_root, commits=[c1, c2, c3]) ev = build_evaluator("author == alice") results = walk_history(store_root, "main", ev, load_manifest=False) assert len(results) == 3 def test_max_commits_respected(self, store_root: pathlib.Path) -> None: prev: str | None = None commits: list[CommitRecord] = [] for i in range(10): c = _make_commit(store_root, author="alice", parent=prev, message=f"commit {i}") commits.append(c) prev = c.commit_id _setup_branch(store_root, commits=commits) ev = build_evaluator("author == alice") results = walk_history(store_root, "main", ev, max_commits=5, load_manifest=False) assert len(results) == 5 def test_empty_branch_returns_empty(self, store_root: pathlib.Path) -> None: # Branch ref file does not exist. ev = build_evaluator("author == alice") results = walk_history(store_root, "ghost", ev, load_manifest=False) assert results == [] def test_load_manifest_false_skips_manifest_io( self, store_root: pathlib.Path ) -> None: """load_manifest=False must not call get_commit_snapshot_manifest.""" c = _make_commit(store_root, author="alice", message="manifest skip") _setup_branch(store_root, commits=[c]) ev = build_evaluator("author == alice") with patch( "muse.core.query_engine.get_commit_snapshot_manifest" ) as mock_manifest: walk_history(store_root, "main", ev, load_manifest=False) mock_manifest.assert_not_called() def test_load_manifest_true_calls_manifest_io( self, store_root: pathlib.Path ) -> None: """load_manifest=True (the default) should attempt manifest loading.""" c = _make_commit(store_root, author="alice", message="manifest load") _setup_branch(store_root, commits=[c]) ev = build_evaluator("author == alice") with patch( "muse.core.query_engine.get_commit_snapshot_manifest", return_value={}, ) as mock_manifest: walk_history(store_root, "main", ev, load_manifest=True) mock_manifest.assert_called_once() def test_since_filters_old_commits(self, store_root: pathlib.Path) -> None: old = _make_commit( store_root, author="alice", committed_at=_dt(2025, 1, 1), message="old commit", ) new = _make_commit( store_root, author="alice", committed_at=_dt(2026, 3, 1), parent=old.commit_id, message="new commit", ) _setup_branch(store_root, commits=[old, new]) ev = build_evaluator("author == alice") results = walk_history( store_root, "main", ev, load_manifest=False, since=_dt(2026, 1, 1), ) # Only the 2026 commit passes the filter. assert len(results) == 1 def test_until_filters_new_commits(self, store_root: pathlib.Path) -> None: old = _make_commit( store_root, author="alice", committed_at=_dt(2025, 6, 1), message="old until commit", ) new = _make_commit( store_root, author="alice", committed_at=_dt(2026, 3, 26), parent=old.commit_id, message="new until commit", ) _setup_branch(store_root, commits=[old, new]) ev = build_evaluator("author == alice") results = walk_history( store_root, "main", ev, load_manifest=False, until=_dt(2025, 12, 31), ) assert len(results) == 1 assert results[0]["committed_at"].startswith("2025") def test_since_and_until_window(self, store_root: pathlib.Path) -> None: dates = [_dt(2025, m, 1) for m in range(1, 13)] prev: str | None = None commits: list[CommitRecord] = [] for i, d in enumerate(dates): c = _make_commit(store_root, author="alice", committed_at=d, parent=prev, message=f"month {i}") commits.append(c) prev = c.commit_id _setup_branch(store_root, commits=commits) ev = build_evaluator("author == alice") results = walk_history( store_root, "main", ev, load_manifest=False, since=_dt(2025, 4, 1), until=_dt(2025, 9, 1), ) # April (4), May (5), Jun (6), Jul (7), Aug (8), Sep (9) = 6 assert len(results) == 6 def test_results_ordered_newest_first(self, store_root: pathlib.Path) -> None: """walk_history traverses parent chain newest-first.""" prev: str | None = None commits: list[CommitRecord] = [] for i in range(5): c = _make_commit( store_root, author="alice", committed_at=_dt(2026, 1, i + 1), parent=prev, message=f"order {i}", ) commits.append(c) prev = c.commit_id _setup_branch(store_root, commits=commits) ev = build_evaluator("author == alice") results = walk_history(store_root, "main", ev, load_manifest=False) timestamps = [r["committed_at"] for r in results] assert timestamps == sorted(timestamps, reverse=True) def test_head_commit_id_override(self, store_root: pathlib.Path) -> None: c1 = _make_commit(store_root, author="alice", message="override alice") c2 = _make_commit(store_root, author="bob", parent=c1.commit_id, message="override bob") _setup_branch(store_root, commits=[c1, c2]) ev = build_evaluator("author == alice") # Start from c1 directly, skipping c2. results = walk_history( store_root, "main", ev, head_commit_id=c1.commit_id, load_manifest=False, ) assert len(results) == 1 def test_broken_parent_chain_stops_gracefully( self, store_root: pathlib.Path ) -> None: from muse.core.object_store import object_path as _obj_path # Satisfy the parent-existence guard by writing a stub file at the # object store path. walk_history will find it, fail to parse the # payload as a CommitRecord (returns None), and stop. stub = _obj_path(store_root, NULL_LONG_ID) stub.parent.mkdir(parents=True, exist_ok=True) stub.write_bytes(b"commit 0\0") # valid header, empty payload — CommitRecord fails → None c = _make_commit( store_root, author="alice", parent=NULL_LONG_ID, # points to the stub above message="orphan commit", ) _setup_branch(store_root, commits=[c]) ev = build_evaluator("author == alice") results = walk_history(store_root, "main", ev, load_manifest=False) # Reads c, then tries parent "0"*64 which is unreadable → stops. assert len(results) == 1 def test_evaluator_exception_is_swallowed( self, store_root: pathlib.Path ) -> None: """An evaluator that raises should not abort the walk — just skip that commit.""" c1 = _make_commit(store_root, author="alice", message="exception c1") c2 = _make_commit(store_root, author="alice", parent=c1.commit_id, message="exception c2") _setup_branch(store_root, commits=[c1, c2]) call_count = [0] def flaky_ev( commit: CommitRecord, manifest: Manifest, root: pathlib.Path ) -> list[QueryMatch]: call_count[0] += 1 if call_count[0] == 1: raise RuntimeError("simulated evaluator failure") return [ QueryMatch( commit_id=commit.commit_id, author=commit.author, committed_at=commit.committed_at.isoformat(), branch=commit.branch, detail="ok", extra={}, ) ] results = walk_history(store_root, "main", flaky_ev, load_manifest=False) assert len(results) == 1 # --------------------------------------------------------------------------- # C — CLI E2E tests # --------------------------------------------------------------------------- def _seed_commit( root: pathlib.Path, branch: str = "main", parent: str | None = None, delta: StructuredDelta | None = None, author: str = "alice", agent_id: str = "", sem_ver_bump: SemVerBump = "none", committed_at: datetime.datetime | None = None, message: str = "test commit", ) -> CommitRecord: """Write a commit with a content-addressed ID and advance the branch HEAD.""" c = _make_commit( root, branch=branch, parent=parent, delta=delta, author=author, agent_id=agent_id, sem_ver_bump=sem_ver_bump, committed_at=committed_at, message=message, ) refs_dir = heads_dir(root) refs_dir.mkdir(parents=True, exist_ok=True) (refs_dir / branch).write_text(c.commit_id) return c class TestCLI: """E2E CLI tests using a real-init repo with crafted commit records.""" def test_count_flag(self, repo: pathlib.Path) -> None: delta = _insert_delta("func_a", "func_b") _seed_commit(repo, delta=delta, message="cli count") code, out = _run(repo, "code", "code-query", "change == added", "--count") assert code == 0 assert out.strip().isdigit() assert int(out.strip()) >= 1 def test_count_no_matches(self, repo: pathlib.Path) -> None: _seed_commit(repo, delta=None, message="cli count zero") code, out = _run(repo, "code", "code-query", "author == nobody", "--count") assert code == 0 assert out.strip() == "0" def test_json_flag_returns_list(self, repo: pathlib.Path) -> None: _seed_commit(repo, author="alice", message="cli json") code, out = _run(repo, "code", "code-query", "author == alice", "--json") assert code == 0 parsed = json.loads(out) assert isinstance(parsed, dict) assert "total" in parsed assert isinstance(parsed["results"], list) def test_json_match_has_required_keys(self, repo: pathlib.Path) -> None: _seed_commit(repo, author="alice", message="cli-json-keys") _, out = _run(repo, "code", "code-query", "author == alice", "--json") parsed = json.loads(out) matches = parsed["results"] assert len(matches) >= 1 m = matches[0] for key in ("commit_id", "author", "committed_at", "branch", "detail"): assert key in m, f"missing key: {key}" def test_json_no_matches_returns_empty_list(self, repo: pathlib.Path) -> None: _seed_commit(repo, author="alice", message="cli-json-empty") _, out = _run(repo, "code", "code-query", "author == nobody", "--json") parsed = json.loads(out) assert parsed["total"] == 0 assert parsed["results"] == [] def test_limit_caps_display(self, repo: pathlib.Path) -> None: delta = _insert_delta(*[f"func_{i}" for i in range(30)]) _seed_commit(repo, delta=delta, message="cli-limit") code, out = _run( repo, "code", "code-query", "change == added", "--limit", "3" ) assert code == 0 # "Found N match(es):" line + 3 detail lines + maybe truncation line result_lines = [l for l in out.splitlines() if l.strip().startswith("src/")] assert len(result_lines) <= 3 def test_endswith_operator_in_query(self, repo: pathlib.Path) -> None: delta = _insert_delta("auth_handler", "data_service", file="src/routes.py") _seed_commit(repo, delta=delta, message="cli-endswith") _, out = _run( repo, "code", "code-query", "symbol endswith _handler" ) assert "handler" in out.lower() or "match" in out.lower() def test_invalid_query_exits_1(self, repo: pathlib.Path) -> None: code, _ = _run_unchecked( repo, "code", "code-query", "nonexistent == value" ) assert code == 1 def test_since_filters_correctly(self, repo: pathlib.Path) -> None: old = _seed_commit(repo, author="alice", committed_at=_dt(2025, 1, 1), message="cli-since-old") _seed_commit( repo, author="alice", committed_at=_dt(2026, 3, 1), parent=old.commit_id, message="cli-since-new", ) _, out = _run( repo, "code", "code-query", "author == alice", "--since", "2026-01-01", ) # Output should mention exactly 1 match (the 2026 commit). assert "1 match" in out def test_until_filters_correctly(self, repo: pathlib.Path) -> None: old = _seed_commit(repo, author="alice", committed_at=_dt(2025, 1, 1), message="cli-until-old") _seed_commit( repo, author="alice", committed_at=_dt(2026, 3, 26), parent=old.commit_id, message="cli-until-new", ) _, out = _run( repo, "code", "code-query", "author == alice", "--until", "2025-12-31", ) assert "1 match" in out def test_invalid_since_date_exits_1(self, repo: pathlib.Path) -> None: code, _ = _run_unchecked( repo, "code", "code-query", "author == alice", "--since", "not-a-date", ) assert code == 1 def test_invalid_until_date_exits_1(self, repo: pathlib.Path) -> None: code, _ = _run_unchecked( repo, "code", "code-query", "author == alice", "--until", "2026/01/01", ) assert code == 1 def test_no_commits_on_branch_shows_no_matches( self, repo: pathlib.Path ) -> None: code, out = _run( repo, "code", "code-query", "author == alice", "--branch", "nonexistent-branch", ) assert code == 0 assert "No matches found" in out def test_sem_ver_bump_query(self, repo: pathlib.Path) -> None: _seed_commit(repo, author="alice", sem_ver_bump="major", message="cli-semver") _, out = _run(repo, "code", "code-query", "sem_ver_bump == major") assert "match" in out def test_text_output_format_header(self, repo: pathlib.Path) -> None: _seed_commit(repo, author="alice", message="cli-fmt") _, out = _run(repo, "code", "code-query", "author == alice") assert "Found" in out and "match" in out def test_since_datetime_format_accepted(self, repo: pathlib.Path) -> None: _seed_commit(repo, author="alice", committed_at=_dt(2026, 3, 26), message="cli-dt-fmt") code, _ = _run( repo, "code", "code-query", "author == alice", "--since", "2026-03-01T00:00:00", ) assert code == 0 def test_count_and_json_both_respected(self, repo: pathlib.Path) -> None: """--count takes precedence over --json (count is printed as a number).""" _seed_commit(repo, author="alice", message="cli-count-json") code, out = _run( repo, "code", "code-query", "author == alice", "--count", "--json" ) assert code == 0 # --count wins; output should be a plain integer assert out.strip().isdigit() # --------------------------------------------------------------------------- # S — Stress tests # --------------------------------------------------------------------------- class TestStress: """High-volume and performance stress tests.""" def test_300_commits_all_match(self, store_root: pathlib.Path) -> None: prev: str | None = None commits: list[CommitRecord] = [] for i in range(300): c = _make_commit(store_root, author="alice", parent=prev, message=f"stress {i}") commits.append(c) prev = c.commit_id _setup_branch(store_root, commits=commits) ev = build_evaluator("author == alice") results = walk_history( store_root, "main", ev, max_commits=300, load_manifest=False ) assert len(results) == 300 def test_300_commits_none_match(self, store_root: pathlib.Path) -> None: prev: str | None = None commits: list[CommitRecord] = [] for i in range(300): c = _make_commit(store_root, author="alice", parent=prev, message=f"miss {i}") commits.append(c) prev = c.commit_id _setup_branch(store_root, commits=commits) ev = build_evaluator("author == bob") results = walk_history( store_root, "main", ev, max_commits=300, load_manifest=False ) assert results == [] def test_large_or_expression_evaluator(self) -> None: """50-clause OR expression; evaluator must not degrade.""" clauses = " or ".join(f"author == 'agent_{i}'" for i in range(50)) ev = build_evaluator(clauses) commit = _bare_commit(author="agent_49") results = ev(commit, {}, pathlib.Path(".")) assert len(results) >= 1 def test_50_symbols_per_commit_cap_is_enforced( self, store_root: pathlib.Path ) -> None: """200 matching symbols in one commit must produce exactly 20 results (cap).""" symbols = [f"func_{i}" for i in range(200)] delta = _insert_delta(*symbols) c = _make_commit(store_root, delta=delta, message="stress cap commit") _setup_branch(store_root, commits=[c]) ev = build_evaluator("change == added") results = walk_history( store_root, "main", ev, load_manifest=False ) assert len(results) == 20 def test_load_manifest_false_never_reads_manifest_in_300_commit_walk( self, store_root: pathlib.Path ) -> None: """Critical: manifest I/O must be zero when load_manifest=False.""" prev: str | None = None commits: list[CommitRecord] = [] for i in range(300): c = _make_commit(store_root, author="alice", parent=prev, message=f"nomani {i}") commits.append(c) prev = c.commit_id _setup_branch(store_root, commits=commits) ev = build_evaluator("author == alice") with patch( "muse.core.query_engine.get_commit_snapshot_manifest" ) as mock_m: walk_history( store_root, "main", ev, max_commits=300, load_manifest=False ) mock_m.assert_not_called() def test_mixed_delta_and_no_delta_commits( self, store_root: pathlib.Path ) -> None: """Commits with and without deltas co-exist; walk must not crash.""" prev: str | None = None commits: list[CommitRecord] = [] for i in range(50): delta = _insert_delta("func") if i % 2 == 0 else None c = _make_commit(store_root, author="alice", delta=delta, parent=prev, message=f"mixed {i}") commits.append(c) prev = c.commit_id _setup_branch(store_root, commits=commits) ev = build_evaluator("author == alice") results = walk_history(store_root, "main", ev, max_commits=50, load_manifest=False) assert len(results) == 50 # --------------------------------------------------------------------------- # R — Regression tests (named for specific bugs fixed) # --------------------------------------------------------------------------- class TestRegressions: """One test per bug fixed — guaranteed not to regress.""" def test_walk_history_uses_store_not_direct_ref_read( self, store_root: pathlib.Path ) -> None: """walk_history must call get_head_commit_id, not read the ref file directly.""" c = _make_commit(store_root, author="alice", message="reg store commit") _setup_branch(store_root, commits=[c]) ev = build_evaluator("author == alice") with patch( "muse.core.query_engine.get_head_commit_id", wraps=__import__( "muse.core.refs", fromlist=["get_head_commit_id"] ).get_head_commit_id, ) as mock_fn: walk_history(store_root, "main", ev, load_manifest=False) mock_fn.assert_called_once_with(store_root, "main") def test_endswith_operator_not_silently_ignored(self) -> None: """Regression: endswith was missing from CodeOp, causing ValueError.""" # This would have raised ValueError: "Unknown operator: 'endswith'" before the fix. ev = build_evaluator("symbol endswith _service") delta = _insert_delta("auth_service") commit = _bare_commit(delta=delta) results = ev(commit, {}, pathlib.Path(".")) assert len(results) >= 1 def test_dead_field_val_none_check_removed(self) -> None: """field_val from .get(f, '') is always str — 'is not None' was dead code. Verify field matching still works correctly after the dead-check removal. """ delta = _insert_delta("my_func", file="src/core.py") ev = build_evaluator("file == 'src/core.py'") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_patch_op_redundant_condition_fixed(self) -> None: """Regression: 'op_rec.get("op") == "patch" and op_rec["op"] == "patch"' was redundant and now replaced by _is_patch_op TypeGuard. PatchOp child_ops must still be traversed correctly. """ child: InsertOp = InsertOp( op="insert", address="lib/utils.py::parse", position=None, content_id="a" * 64, content_summary="parse added", ) patch_op: PatchOp = PatchOp(op="patch", address="lib/utils.py", child_ops=[child], child_domain="code", child_summary="") delta: StructuredDelta = StructuredDelta( domain="code", ops=[patch_op], summary="patched utils" ) ev = build_evaluator("symbol == parse") results = ev(_bare_commit(delta=delta), {}, pathlib.Path(".")) assert len(results) >= 1 def test_json_output_is_list_not_wrapped_list(self, repo: pathlib.Path) -> None: """JSON output is {total, results} — results is a flat list of match dicts.""" _seed_commit(repo, author="alice", message="reg-json-list") _, out = _run(repo, "code", "code-query", "author == alice", "--json") parsed = json.loads(out) assert isinstance(parsed, dict) assert "total" in parsed assert isinstance(parsed["results"], list) assert parsed["total"] == len(parsed["results"]) if parsed["results"]: assert isinstance(parsed["results"][0], dict) def test_mixed_or_commit_level_clause_was_silently_dropped( self, store_root: pathlib.Path ) -> None: """Regression: with the old double-pass, a commit matching the FIRST OR clause (commit-level) would produce symbol_matches=[] and then fail the 'only_commit_fields' check if the SECOND clause used a symbol field — resulting in a silent drop. The new or_matched flag fixes this. """ c = _make_commit( store_root, author="alice", delta=None, message="reg or drop" # no delta at all ) _setup_branch(store_root, commits=[c]) # Mixed OR: first clause is commit-level (matches), second is symbol-level. ev = build_evaluator("author == 'alice' or change == 'added'") results = walk_history(store_root, "main", ev, load_manifest=False) # alice's commit must appear even though change=='added' can't match (no delta). assert len(results) == 1 # --------------------------------------------------------------------------- # TestRegisterFlags # --------------------------------------------------------------------------- class TestRegisterFlags: """register() wires --json / -j correctly.""" def _parse(self, *args: str) -> argparse.Namespace: from muse.cli.commands.code_query import register p = argparse.ArgumentParser() sub = p.add_subparsers() register(sub) return p.parse_args(["code-query", *args]) def test_default_json_out_is_false(self) -> None: ns = self._parse("author == 'x'") assert ns.json_out is False def test_json_flag_sets_json_out(self) -> None: ns = self._parse("--json", "author == 'x'") assert ns.json_out is True def test_j_shorthand_sets_json_out(self) -> None: ns = self._parse("-j", "author == 'x'") assert ns.json_out is True