"""Tests for ``muse content-grep``. Covers: no match exit-1, pattern found, --files-only, --count, --ignore-case, --format json, binary skip, multi-file, stress: 100 files. Working-tree mode: --working-tree searches disk, not the committed snapshot. """ from __future__ import annotations type _FileStore = dict[str, bytes] import datetime import json import pathlib import pytest from tests.cli_test_helper import CliRunner cli = None # argparse migration — CliRunner ignores this arg from muse.core.object_store import write_object from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.types import Manifest, blob_id from muse.core.paths import heads_dir, muse_dir runner = CliRunner() _REPO_ID = "cgrep-test" # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _init_repo(path: pathlib.Path) -> pathlib.Path: dot_muse = muse_dir(path) for d in ("commits", "snapshots", "objects", "refs/heads"): (dot_muse / d).mkdir(parents=True, exist_ok=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (dot_muse / "repo.json").write_text( json.dumps({"repo_id": _REPO_ID, "domain": "midi"}), encoding="utf-8" ) return path def _env(repo: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(repo)} _counter = 0 def _commit_files(root: pathlib.Path, files: _FileStore) -> str: global _counter _counter += 1 manifest: Manifest = {} for rel_path, content in files.items(): obj_id = blob_id(content) write_object(root, obj_id, content) manifest[rel_path] = obj_id snap_id = hash_snapshot(manifest) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime.now(datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message=f"commit {_counter}", committed_at_iso=committed_at.isoformat(), ) write_commit(root, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message=f"commit {_counter}", committed_at=committed_at, )) (heads_dir(root) / "main").write_text(commit_id, encoding="utf-8") return commit_id # --------------------------------------------------------------------------- # Unit: help # --------------------------------------------------------------------------- def test_content_grep_help() -> None: result = runner.invoke(cli, ["content-grep", "--help"]) assert result.exit_code == 0 assert "pattern" in result.output # --------------------------------------------------------------------------- # Unit: no match → exit 1 # --------------------------------------------------------------------------- def test_content_grep_no_match(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"song.txt": b"chord: Am\ntempo: 120\n"}) result = runner.invoke(cli, ["content-grep", "ZZZNOMATCH", "--json"], env=_env(tmp_path)) assert result.exit_code != 0 # --json must always emit valid JSON even on no-match so agents can parse safely. data = json.loads(result.output) assert data["total_matches"] == 0 assert data["results"] == [] # --------------------------------------------------------------------------- # Unit: match found → exit 0 # --------------------------------------------------------------------------- def test_content_grep_match_found(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"song.txt": b"chord: Cm7\ntempo: 120\n"}) result = runner.invoke(cli, ["content-grep", "Cm7"], env=_env(tmp_path)) assert result.exit_code == 0 assert "song.txt" in result.output # --------------------------------------------------------------------------- # Unit: --ignore-case # --------------------------------------------------------------------------- def test_content_grep_ignore_case(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"notes.txt": b"VERSE: intro melody\n"}) result = runner.invoke( cli, ["content-grep", "verse", "--ignore-case"], env=_env(tmp_path) ) assert result.exit_code == 0 assert "notes.txt" in result.output def test_content_grep_case_sensitive_no_match(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"notes.txt": b"VERSE: intro melody\n"}) result = runner.invoke( cli, ["content-grep", "verse"], env=_env(tmp_path) ) # Case-sensitive: "verse" ≠ "VERSE" → no match. assert result.exit_code != 0 # --------------------------------------------------------------------------- # Unit: --files-only # --------------------------------------------------------------------------- def test_content_grep_files_only(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, { "a.txt": b"match here\n", "b.txt": b"match here too\n", }) result = runner.invoke( cli, ["content-grep", "match", "--files-only"], env=_env(tmp_path) ) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().split("\n") if l.strip()] for line in lines: assert ":" not in line or line.startswith("a.txt") or line.startswith("b.txt") # --------------------------------------------------------------------------- # Unit: --count # --------------------------------------------------------------------------- def test_content_grep_count(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"multi.txt": b"hit\nhit\nhit\nmiss\n"}) result = runner.invoke( cli, ["content-grep", "hit", "--count"], env=_env(tmp_path) ) assert result.exit_code == 0 assert "3" in result.output # --------------------------------------------------------------------------- # Unit: --format json # --------------------------------------------------------------------------- def test_content_grep_json_output(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"song.midi.txt": b"note: C4\nnote: D4\n"}) result = runner.invoke( cli, ["content-grep", "note", "--json"], env=_env(tmp_path) ) assert result.exit_code == 0 data = json.loads(result.output) assert isinstance(data, dict) assert len(data["results"]) >= 1 assert data["results"][0]["match_count"] >= 2 # --------------------------------------------------------------------------- # Unit: binary file skipped silently # --------------------------------------------------------------------------- def test_content_grep_binary_skipped(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) binary_content = b"\x00\x01\x02\x03" * 100 text_content = b"searchable text here\n" _commit_files(tmp_path, { "binary.bin": binary_content, "text.txt": text_content, }) result = runner.invoke( cli, ["content-grep", "searchable"], env=_env(tmp_path) ) assert result.exit_code == 0 assert "text.txt" in result.output # --------------------------------------------------------------------------- # Unit: short flags work # --------------------------------------------------------------------------- def test_content_grep_short_flags(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit_files(tmp_path, {"f.txt": b"hello world\n"}) result = runner.invoke( cli, ["content-grep", "hello", "-i", "--json"], env=_env(tmp_path) ) assert result.exit_code == 0 data = json.loads(result.output) assert len(data["results"]) >= 1 # --------------------------------------------------------------------------- # Stress: 100 files, pattern matches 50 # --------------------------------------------------------------------------- def test_content_grep_stress_100_files(tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) files: _FileStore = {} for i in range(100): content = b"TARGET_LINE\n" if i % 2 == 0 else b"other content\n" files[f"file_{i:04d}.txt"] = content _commit_files(tmp_path, files) result = runner.invoke( cli, ["content-grep", "TARGET_LINE", "--json"], env=_env(tmp_path) ) assert result.exit_code == 0 data = json.loads(result.output) assert len(data["results"]) == 50 # --------------------------------------------------------------------------- # Working-tree mode: --working-tree searches disk, not the committed snapshot # --------------------------------------------------------------------------- def test_content_grep_working_tree_finds_uncommitted_edit(tmp_path: pathlib.Path) -> None: """--working-tree finds content written to disk that is not yet committed.""" _init_repo(tmp_path) # Commit a file with one pattern. _commit_files(tmp_path, {"song.txt": b"chord: Am\n"}) # Write an uncommitted edit with a different pattern. (tmp_path / "song.txt").write_bytes(b"chord: WORKING_TREE_ONLY\n") # Without --working-tree, finds the committed content. result_committed = runner.invoke( cli, ["content-grep", "Am"], env=_env(tmp_path) ) assert result_committed.exit_code == 0 # With --working-tree, finds the disk content. result_wt = runner.invoke( cli, ["content-grep", "WORKING_TREE_ONLY", "--working-tree"], env=_env(tmp_path), ) assert result_wt.exit_code == 0 assert "song.txt" in result_wt.output def test_content_grep_working_tree_no_match(tmp_path: pathlib.Path) -> None: """--working-tree returns exit 1 when pattern absent; --json still emits valid JSON.""" _init_repo(tmp_path) (tmp_path / "notes.txt").write_bytes(b"hello world\n") result = runner.invoke( cli, ["content-grep", "ZZZNOMATCH", "--working-tree", "--json"], env=_env(tmp_path), ) assert result.exit_code != 0 data = json.loads(result.output) assert data["total_matches"] == 0 assert data["results"] == [] def test_content_grep_working_tree_skips_muse_dir(tmp_path: pathlib.Path) -> None: """--working-tree never searches inside the .muse object store.""" _init_repo(tmp_path) # Write a matching string inside .muse/ — must NOT be found. (muse_dir(tmp_path) / "stray.txt").write_bytes(b"SECRET_IN_MUSE\n") # Write the same string outside .muse/ — must be found. (tmp_path / "real.txt").write_bytes(b"SECRET_IN_MUSE\n") result = runner.invoke( cli, ["content-grep", "SECRET_IN_MUSE", "--working-tree", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = json.loads(result.output) paths = [r["path"] for r in data["results"]] assert "real.txt" in paths assert not any(".muse" in p for p in paths) def test_content_grep_working_tree_json_schema(tmp_path: pathlib.Path) -> None: """--working-tree JSON output has source=working-tree and null commit_id/snapshot_id.""" _init_repo(tmp_path) (tmp_path / "f.txt").write_bytes(b"TARGET\n") result = runner.invoke( cli, ["content-grep", "TARGET", "--working-tree", "--json"], env=_env(tmp_path), ) assert result.exit_code == 0 data = json.loads(result.output) assert data["source"] == "working-tree" assert data["commit_id"] is None assert data["snapshot_id"] is None assert data["results"][0]["object_id"] is None def test_content_grep_working_tree_files_only(tmp_path: pathlib.Path) -> None: """--working-tree --files-only prints only file paths, no line numbers.""" _init_repo(tmp_path) (tmp_path / "a.txt").write_bytes(b"match\n") (tmp_path / "b.txt").write_bytes(b"match\n") result = runner.invoke( cli, ["content-grep", "match", "--working-tree", "--files-only"], env=_env(tmp_path), ) assert result.exit_code == 0 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()] assert all(":" not in l for l in lines) assert {"a.txt", "b.txt"}.issubset(set(lines)) def test_content_grep_working_tree_and_ref_mutually_exclusive(tmp_path: pathlib.Path) -> None: """Passing both --working-tree and --ref is a user error (exit non-zero).""" _init_repo(tmp_path) _commit_files(tmp_path, {"f.txt": b"content\n"}) result = runner.invoke( cli, ["content-grep", "content", "--working-tree", "--ref", "main"], env=_env(tmp_path), ) assert result.exit_code != 0 def test_content_grep_snapshot_json_has_source_commit(tmp_path: pathlib.Path) -> None: """Snapshot mode JSON output has source=commit and non-null commit_id/snapshot_id.""" _init_repo(tmp_path) _commit_files(tmp_path, {"f.txt": b"TARGET\n"}) result = runner.invoke( cli, ["content-grep", "TARGET", "--json"], env=_env(tmp_path) ) assert result.exit_code == 0 data = json.loads(result.output) assert data["source"] == "commit" assert data["commit_id"] is not None assert data["snapshot_id"] is not None