gabriel / muse public

test_cmd_content_grep.py file-level

at main · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:4 Merge branch 'dev' into main · gabriel · Jun 17, 2026
1 """Tests for ``muse content-grep``.
2
3 Covers: no match exit-1, pattern found, --files-only, --count, --ignore-case,
4 --format json, binary skip, multi-file, stress: 100 files.
5 Working-tree mode: --working-tree searches disk, not the committed snapshot.
6 """
7
8 from __future__ import annotations
9
10 type _FileStore = dict[str, bytes]
11
12 import datetime
13 import json
14 import pathlib
15
16 import pytest
17 from tests.cli_test_helper import CliRunner
18
19 cli = None # argparse migration — CliRunner ignores this arg
20 from muse.core.object_store import write_object
21 from muse.core.ids import hash_commit, hash_snapshot
22 from muse.core.commits import (
23 CommitRecord,
24 write_commit,
25 )
26 from muse.core.snapshots import (
27 SnapshotRecord,
28 write_snapshot,
29 )
30 from muse.core.types import Manifest, blob_id
31 from muse.core.paths import heads_dir, muse_dir
32
33 runner = CliRunner()
34
35 _REPO_ID = "cgrep-test"
36
37
38 # ---------------------------------------------------------------------------
39 # Helpers
40 # ---------------------------------------------------------------------------
41
42
43
44
45 def _init_repo(path: pathlib.Path) -> pathlib.Path:
46 dot_muse = muse_dir(path)
47 for d in ("commits", "snapshots", "objects", "refs/heads"):
48 (dot_muse / d).mkdir(parents=True, exist_ok=True)
49 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
50 (dot_muse / "repo.json").write_text(
51 json.dumps({"repo_id": _REPO_ID, "domain": "midi"}), encoding="utf-8"
52 )
53 return path
54
55
56 def _env(repo: pathlib.Path) -> Manifest:
57 return {"MUSE_REPO_ROOT": str(repo)}
58
59
60 _counter = 0
61
62
63 def _commit_files(root: pathlib.Path, files: _FileStore) -> str:
64 global _counter
65 _counter += 1
66 manifest: Manifest = {}
67 for rel_path, content in files.items():
68 obj_id = blob_id(content)
69 write_object(root, obj_id, content)
70 manifest[rel_path] = obj_id
71 snap_id = hash_snapshot(manifest)
72 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
73 committed_at = datetime.datetime.now(datetime.timezone.utc)
74 commit_id = hash_commit( parent_ids=[],
75 snapshot_id=snap_id,
76 message=f"commit {_counter}",
77 committed_at_iso=committed_at.isoformat(),
78 )
79 write_commit(root, CommitRecord(
80 commit_id=commit_id,
81 branch="main",
82 snapshot_id=snap_id,
83 message=f"commit {_counter}",
84 committed_at=committed_at,
85 ))
86 (heads_dir(root) / "main").write_text(commit_id, encoding="utf-8")
87 return commit_id
88
89
90 # ---------------------------------------------------------------------------
91 # Unit: help
92 # ---------------------------------------------------------------------------
93
94
95 def test_content_grep_help() -> None:
96 result = runner.invoke(cli, ["content-grep", "--help"])
97 assert result.exit_code == 0
98 assert "pattern" in result.output
99
100
101 # ---------------------------------------------------------------------------
102 # Unit: no match → exit 1
103 # ---------------------------------------------------------------------------
104
105
106 def test_content_grep_no_match(tmp_path: pathlib.Path) -> None:
107 _init_repo(tmp_path)
108 _commit_files(tmp_path, {"song.txt": b"chord: Am\ntempo: 120\n"})
109 result = runner.invoke(cli, ["content-grep", "ZZZNOMATCH", "--json"], env=_env(tmp_path))
110 assert result.exit_code != 0
111 # --json must always emit valid JSON even on no-match so agents can parse safely.
112 data = json.loads(result.output)
113 assert data["total_matches"] == 0
114 assert data["results"] == []
115
116
117 # ---------------------------------------------------------------------------
118 # Unit: match found → exit 0
119 # ---------------------------------------------------------------------------
120
121
122 def test_content_grep_match_found(tmp_path: pathlib.Path) -> None:
123 _init_repo(tmp_path)
124 _commit_files(tmp_path, {"song.txt": b"chord: Cm7\ntempo: 120\n"})
125 result = runner.invoke(cli, ["content-grep", "Cm7"], env=_env(tmp_path))
126 assert result.exit_code == 0
127 assert "song.txt" in result.output
128
129
130 # ---------------------------------------------------------------------------
131 # Unit: --ignore-case
132 # ---------------------------------------------------------------------------
133
134
135 def test_content_grep_ignore_case(tmp_path: pathlib.Path) -> None:
136 _init_repo(tmp_path)
137 _commit_files(tmp_path, {"notes.txt": b"VERSE: intro melody\n"})
138 result = runner.invoke(
139 cli, ["content-grep", "verse", "--ignore-case"], env=_env(tmp_path)
140 )
141 assert result.exit_code == 0
142 assert "notes.txt" in result.output
143
144
145 def test_content_grep_case_sensitive_no_match(tmp_path: pathlib.Path) -> None:
146 _init_repo(tmp_path)
147 _commit_files(tmp_path, {"notes.txt": b"VERSE: intro melody\n"})
148 result = runner.invoke(
149 cli, ["content-grep", "verse"], env=_env(tmp_path)
150 )
151 # Case-sensitive: "verse" ≠ "VERSE" → no match.
152 assert result.exit_code != 0
153
154
155 # ---------------------------------------------------------------------------
156 # Unit: --files-only
157 # ---------------------------------------------------------------------------
158
159
160 def test_content_grep_files_only(tmp_path: pathlib.Path) -> None:
161 _init_repo(tmp_path)
162 _commit_files(tmp_path, {
163 "a.txt": b"match here\n",
164 "b.txt": b"match here too\n",
165 })
166 result = runner.invoke(
167 cli, ["content-grep", "match", "--files-only"], env=_env(tmp_path)
168 )
169 assert result.exit_code == 0
170 lines = [l.strip() for l in result.output.strip().split("\n") if l.strip()]
171 for line in lines:
172 assert ":" not in line or line.startswith("a.txt") or line.startswith("b.txt")
173
174
175 # ---------------------------------------------------------------------------
176 # Unit: --count
177 # ---------------------------------------------------------------------------
178
179
180 def test_content_grep_count(tmp_path: pathlib.Path) -> None:
181 _init_repo(tmp_path)
182 _commit_files(tmp_path, {"multi.txt": b"hit\nhit\nhit\nmiss\n"})
183 result = runner.invoke(
184 cli, ["content-grep", "hit", "--count"], env=_env(tmp_path)
185 )
186 assert result.exit_code == 0
187 assert "3" in result.output
188
189
190 # ---------------------------------------------------------------------------
191 # Unit: --format json
192 # ---------------------------------------------------------------------------
193
194
195 def test_content_grep_json_output(tmp_path: pathlib.Path) -> None:
196 _init_repo(tmp_path)
197 _commit_files(tmp_path, {"song.midi.txt": b"note: C4\nnote: D4\n"})
198 result = runner.invoke(
199 cli, ["content-grep", "note", "--json"], env=_env(tmp_path)
200 )
201 assert result.exit_code == 0
202 data = json.loads(result.output)
203 assert isinstance(data, dict)
204 assert len(data["results"]) >= 1
205 assert data["results"][0]["match_count"] >= 2
206
207
208 # ---------------------------------------------------------------------------
209 # Unit: binary file skipped silently
210 # ---------------------------------------------------------------------------
211
212
213 def test_content_grep_binary_skipped(tmp_path: pathlib.Path) -> None:
214 _init_repo(tmp_path)
215 binary_content = b"\x00\x01\x02\x03" * 100
216 text_content = b"searchable text here\n"
217 _commit_files(tmp_path, {
218 "binary.bin": binary_content,
219 "text.txt": text_content,
220 })
221 result = runner.invoke(
222 cli, ["content-grep", "searchable"], env=_env(tmp_path)
223 )
224 assert result.exit_code == 0
225 assert "text.txt" in result.output
226
227
228 # ---------------------------------------------------------------------------
229 # Unit: short flags work
230 # ---------------------------------------------------------------------------
231
232
233 def test_content_grep_short_flags(tmp_path: pathlib.Path) -> None:
234 _init_repo(tmp_path)
235 _commit_files(tmp_path, {"f.txt": b"hello world\n"})
236 result = runner.invoke(
237 cli, ["content-grep", "hello", "-i", "--json"], env=_env(tmp_path)
238 )
239 assert result.exit_code == 0
240 data = json.loads(result.output)
241 assert len(data["results"]) >= 1
242
243
244 # ---------------------------------------------------------------------------
245 # Stress: 100 files, pattern matches 50
246 # ---------------------------------------------------------------------------
247
248
249 def test_content_grep_stress_100_files(tmp_path: pathlib.Path) -> None:
250 _init_repo(tmp_path)
251 files: _FileStore = {}
252 for i in range(100):
253 content = b"TARGET_LINE\n" if i % 2 == 0 else b"other content\n"
254 files[f"file_{i:04d}.txt"] = content
255 _commit_files(tmp_path, files)
256 result = runner.invoke(
257 cli, ["content-grep", "TARGET_LINE", "--json"], env=_env(tmp_path)
258 )
259 assert result.exit_code == 0
260 data = json.loads(result.output)
261 assert len(data["results"]) == 50
262
263
264 # ---------------------------------------------------------------------------
265 # Working-tree mode: --working-tree searches disk, not the committed snapshot
266 # ---------------------------------------------------------------------------
267
268
269 def test_content_grep_working_tree_finds_uncommitted_edit(tmp_path: pathlib.Path) -> None:
270 """--working-tree finds content written to disk that is not yet committed."""
271 _init_repo(tmp_path)
272 # Commit a file with one pattern.
273 _commit_files(tmp_path, {"song.txt": b"chord: Am\n"})
274 # Write an uncommitted edit with a different pattern.
275 (tmp_path / "song.txt").write_bytes(b"chord: WORKING_TREE_ONLY\n")
276
277 # Without --working-tree, finds the committed content.
278 result_committed = runner.invoke(
279 cli, ["content-grep", "Am"], env=_env(tmp_path)
280 )
281 assert result_committed.exit_code == 0
282
283 # With --working-tree, finds the disk content.
284 result_wt = runner.invoke(
285 cli, ["content-grep", "WORKING_TREE_ONLY", "--working-tree"],
286 env=_env(tmp_path),
287 )
288 assert result_wt.exit_code == 0
289 assert "song.txt" in result_wt.output
290
291
292 def test_content_grep_working_tree_no_match(tmp_path: pathlib.Path) -> None:
293 """--working-tree returns exit 1 when pattern absent; --json still emits valid JSON."""
294 _init_repo(tmp_path)
295 (tmp_path / "notes.txt").write_bytes(b"hello world\n")
296 result = runner.invoke(
297 cli, ["content-grep", "ZZZNOMATCH", "--working-tree", "--json"],
298 env=_env(tmp_path),
299 )
300 assert result.exit_code != 0
301 data = json.loads(result.output)
302 assert data["total_matches"] == 0
303 assert data["results"] == []
304
305
306 def test_content_grep_working_tree_skips_muse_dir(tmp_path: pathlib.Path) -> None:
307 """--working-tree never searches inside the .muse object store."""
308 _init_repo(tmp_path)
309 # Write a matching string inside .muse/ — must NOT be found.
310 (muse_dir(tmp_path) / "stray.txt").write_bytes(b"SECRET_IN_MUSE\n")
311 # Write the same string outside .muse/ — must be found.
312 (tmp_path / "real.txt").write_bytes(b"SECRET_IN_MUSE\n")
313
314 result = runner.invoke(
315 cli, ["content-grep", "SECRET_IN_MUSE", "--working-tree", "--json"],
316 env=_env(tmp_path),
317 )
318 assert result.exit_code == 0
319 data = json.loads(result.output)
320 paths = [r["path"] for r in data["results"]]
321 assert "real.txt" in paths
322 assert not any(".muse" in p for p in paths)
323
324
325 def test_content_grep_working_tree_json_schema(tmp_path: pathlib.Path) -> None:
326 """--working-tree JSON output has source=working-tree and null commit_id/snapshot_id."""
327 _init_repo(tmp_path)
328 (tmp_path / "f.txt").write_bytes(b"TARGET\n")
329 result = runner.invoke(
330 cli, ["content-grep", "TARGET", "--working-tree", "--json"],
331 env=_env(tmp_path),
332 )
333 assert result.exit_code == 0
334 data = json.loads(result.output)
335 assert data["source"] == "working-tree"
336 assert data["commit_id"] is None
337 assert data["snapshot_id"] is None
338 assert data["results"][0]["object_id"] is None
339
340
341 def test_content_grep_working_tree_files_only(tmp_path: pathlib.Path) -> None:
342 """--working-tree --files-only prints only file paths, no line numbers."""
343 _init_repo(tmp_path)
344 (tmp_path / "a.txt").write_bytes(b"match\n")
345 (tmp_path / "b.txt").write_bytes(b"match\n")
346 result = runner.invoke(
347 cli, ["content-grep", "match", "--working-tree", "--files-only"],
348 env=_env(tmp_path),
349 )
350 assert result.exit_code == 0
351 lines = [l.strip() for l in result.output.strip().splitlines() if l.strip()]
352 assert all(":" not in l for l in lines)
353 assert {"a.txt", "b.txt"}.issubset(set(lines))
354
355
356 def test_content_grep_working_tree_and_ref_mutually_exclusive(tmp_path: pathlib.Path) -> None:
357 """Passing both --working-tree and --ref is a user error (exit non-zero)."""
358 _init_repo(tmp_path)
359 _commit_files(tmp_path, {"f.txt": b"content\n"})
360 result = runner.invoke(
361 cli,
362 ["content-grep", "content", "--working-tree", "--ref", "main"],
363 env=_env(tmp_path),
364 )
365 assert result.exit_code != 0
366
367
368 def test_content_grep_snapshot_json_has_source_commit(tmp_path: pathlib.Path) -> None:
369 """Snapshot mode JSON output has source=commit and non-null commit_id/snapshot_id."""
370 _init_repo(tmp_path)
371 _commit_files(tmp_path, {"f.txt": b"TARGET\n"})
372 result = runner.invoke(
373 cli, ["content-grep", "TARGET", "--json"], env=_env(tmp_path)
374 )
375 assert result.exit_code == 0
376 data = json.loads(result.output)
377 assert data["source"] == "commit"
378 assert data["commit_id"] is not None
379 assert data["snapshot_id"] is not None