gabriel / muse public
test_cmd_grep.py python
596 lines 24.5 KB
Raw
1 """Comprehensive tests for ``muse code grep``.
2
3 Coverage
4 --------
5 Unit
6 _normalise_language — case folding, unknown passthrough
7 _file_matches — exact, suffix, separator normalisation
8 _resolve_file_filter — single match, ambiguous, no match
9 _MAX_PATTERN_LEN — boundary value
10 _KIND_ICON — all documented kinds present
11
12 Integration
13 grep basic substring — match, no-match, kind filter, language filter
14 grep --regex — valid, invalid (→ exit 1), boundary
15 grep --file — scoped to one file (faster path)
16 grep --count — prints integer, no extra lines
17 grep --json — schema correctness, unicode, qualified-name search
18 grep --hashes — content-id appears in output
19 grep --commit — historical snapshot
20 qualified-name search — "Invoice.compute_total" hits only that method
21
22 Security / ReDoS
23 Pattern length cap — 512 accepted, 513 rejected
24 Catastrophic regex — (a+)+ type does not hang (timeout guard)
25 NUL bytes in pattern — handled without crash
26 Control chars — handled without crash
27
28 Stress
29 1 000 symbols — search completes in < 5 s
30 512-char regex — compiles and runs without hang
31 """
32
33 from __future__ import annotations
34
35 import json
36 import pathlib
37 import textwrap
38 import time
39
40 import pytest
41
42 from tests.cli_test_helper import CliRunner
43 from muse.cli.commands.grep import (
44 _KIND_ICON,
45 _MAX_PATTERN_LEN,
46 _file_matches,
47 normalise_language as _normalise_language,
48 _resolve_file_filter,
49 )
50
51 cli = None
52 runner = CliRunner()
53
54
55 # ---------------------------------------------------------------------------
56 # Shared fixture
57 # ---------------------------------------------------------------------------
58
59
60 @pytest.fixture
61 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
62 """Initialise a fresh code-domain Muse repo with two Python files."""
63 monkeypatch.chdir(tmp_path)
64 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
65 result = runner.invoke(cli, ["init", "--domain", "code"])
66 assert result.exit_code == 0, result.output
67
68 (tmp_path / "billing.py").write_text(textwrap.dedent("""\
69 class Invoice:
70 def compute_total(self, items: list[int]) -> int:
71 return sum(items)
72
73 def apply_discount(self, total: float, pct: float) -> float:
74 return total * (1 - pct)
75
76 def validate_amount(amount: float) -> bool:
77 return amount > 0
78 """))
79
80 (tmp_path / "auth.py").write_text(textwrap.dedent("""\
81 def validate_token(token: str) -> bool:
82 return len(token) > 0
83
84 class Validator:
85 def validate(self, value: object) -> bool: # type: ignore[override]
86 return bool(value)
87 """))
88
89 r = runner.invoke(cli, ["commit", "-m", "initial"])
90 assert r.exit_code == 0, r.output
91 return tmp_path
92
93
94 # ---------------------------------------------------------------------------
95 # Unit — _normalise_language
96 # ---------------------------------------------------------------------------
97
98
99 class TestNormaliseLanguage:
100 def test_python_lowercase(self) -> None:
101 assert _normalise_language("python") == "Python"
102
103 def test_python_uppercase(self) -> None:
104 assert _normalise_language("PYTHON") == "Python"
105
106 def test_python_mixed(self) -> None:
107 assert _normalise_language("PyThOn") == "Python"
108
109 def test_unknown_passthrough(self) -> None:
110 # Unknown languages are passed through unchanged (after strip).
111 result = _normalise_language("Cobol")
112 assert result == "Cobol"
113
114 def test_strips_whitespace(self) -> None:
115 result = _normalise_language(" python ")
116 assert result == "Python"
117
118 def test_empty_string(self) -> None:
119 # Empty string is not a known language — passed through.
120 result = _normalise_language("")
121 assert result == ""
122
123
124 # ---------------------------------------------------------------------------
125 # Unit — _file_matches
126 # ---------------------------------------------------------------------------
127
128
129 class TestFileMatches:
130 def test_exact_match(self) -> None:
131 assert _file_matches("src/billing.py", "src/billing.py")
132
133 def test_suffix_match(self) -> None:
134 assert _file_matches("src/billing.py", "billing.py")
135
136 def test_no_match(self) -> None:
137 assert not _file_matches("src/billing.py", "other.py")
138
139 def test_partial_name_no_match(self) -> None:
140 # "illing.py" is a suffix of "billing.py" but should not match without
141 # a leading slash boundary.
142 assert not _file_matches("src/billing.py", "illing.py")
143
144 def test_backslash_normalised(self) -> None:
145 # Windows-style separators in the filter are normalised before suffix check.
146 assert _file_matches("a/src/billing.py", "src\\billing.py")
147
148 def test_empty_filter(self) -> None:
149 # Empty filter matches nothing sensible but must not crash.
150 # The function returns True only for exact or slash-prefixed suffix.
151 # "src/billing.py".endswith("/" + "") == endswith("/") → False
152 # "src/billing.py" == "" → False
153 assert not _file_matches("src/billing.py", "")
154
155 def test_deep_path_suffix(self) -> None:
156 assert _file_matches("a/b/c/d.py", "c/d.py")
157
158 def test_same_filename_different_dir_no_match(self) -> None:
159 assert not _file_matches("src/billing.py", "tests/billing.py")
160
161
162 # ---------------------------------------------------------------------------
163 # Unit — _resolve_file_filter
164 # ---------------------------------------------------------------------------
165
166
167 class TestResolveFileFilter:
168 def test_single_match_returns_full_path(self) -> None:
169 manifest = {"src/billing.py": "abc", "src/auth.py": "def"}
170 result = _resolve_file_filter("billing.py", manifest)
171 assert result == "src/billing.py"
172
173 def test_no_match_returns_none(self) -> None:
174 manifest = {"src/billing.py": "abc"}
175 result = _resolve_file_filter("nonexistent.py", manifest)
176 assert result is None
177
178 def test_ambiguous_raises_system_exit(self) -> None:
179 manifest = {
180 "a/billing.py": "hash1",
181 "b/billing.py": "hash2",
182 }
183 with pytest.raises(SystemExit):
184 _resolve_file_filter("billing.py", manifest)
185
186 def test_exact_path_returns_itself(self) -> None:
187 manifest = {"src/billing.py": "abc"}
188 result = _resolve_file_filter("src/billing.py", manifest)
189 assert result == "src/billing.py"
190
191 def test_empty_manifest_returns_none(self) -> None:
192 result = _resolve_file_filter("billing.py", {})
193 assert result is None
194
195
196 # ---------------------------------------------------------------------------
197 # Unit — _MAX_PATTERN_LEN and _KIND_ICON constants
198 # ---------------------------------------------------------------------------
199
200
201 class TestConstants:
202 def test_max_pattern_len_is_512(self) -> None:
203 assert _MAX_PATTERN_LEN == 512
204
205 def test_kind_icon_has_function(self) -> None:
206 assert "function" in _KIND_ICON
207
208 def test_kind_icon_has_class(self) -> None:
209 assert "class" in _KIND_ICON
210
211 def test_kind_icon_has_method(self) -> None:
212 assert "method" in _KIND_ICON
213
214
215 # ---------------------------------------------------------------------------
216 # Integration — basic substring search
217 # ---------------------------------------------------------------------------
218
219
220 class TestGrepBasic:
221 def test_finds_function_by_name(self, repo: pathlib.Path) -> None:
222 result = runner.invoke(cli, ["code", "grep", "validate"])
223 assert result.exit_code == 0, result.output
224 assert "validate" in result.output.lower()
225
226 def test_no_match_exits_zero(self, repo: pathlib.Path) -> None:
227 result = runner.invoke(cli, ["code", "grep", "zzznomatch999"])
228 assert result.exit_code == 0
229 assert "no symbols" in result.output.lower()
230
231 def test_kind_filter_function(self, repo: pathlib.Path) -> None:
232 result = runner.invoke(cli, ["code", "grep", "validate", "--kind", "function"])
233 assert result.exit_code == 0
234 # Only functions should appear (methods excluded).
235 assert "fn" in result.output
236
237 def test_kind_filter_class(self, repo: pathlib.Path) -> None:
238 result = runner.invoke(cli, ["code", "grep", "Invoice", "--kind", "class"])
239 assert result.exit_code == 0
240 assert "Invoice" in result.output
241 # Methods of Invoice should NOT appear.
242 assert "compute_total" not in result.output
243
244 def test_language_filter(self, repo: pathlib.Path) -> None:
245 result = runner.invoke(cli, ["code", "grep", "validate", "--language", "python"])
246 assert result.exit_code == 0
247 assert "validate" in result.output.lower()
248
249 def test_language_filter_unknown_exits_zero(self, repo: pathlib.Path) -> None:
250 result = runner.invoke(cli, ["code", "grep", "validate", "--language", "COBOL"])
251 # No COBOL files — 0 matches, but not an error.
252 assert result.exit_code == 0
253
254 def test_match_count_suffix(self, repo: pathlib.Path) -> None:
255 result = runner.invoke(cli, ["code", "grep", "validate"])
256 assert "match" in result.output.lower()
257
258
259 # ---------------------------------------------------------------------------
260 # Integration — --count flag
261 # ---------------------------------------------------------------------------
262
263
264 class TestGrepCount:
265 def test_count_only_prints_integer(self, repo: pathlib.Path) -> None:
266 result = runner.invoke(cli, ["code", "grep", "validate", "--count"])
267 assert result.exit_code == 0
268 line = result.output.strip().splitlines()[0]
269 assert line.endswith("match(es)")
270 # The leading token must be an integer.
271 count_str = line.split()[0]
272 assert count_str.isdigit()
273
274 def test_count_zero_for_no_match(self, repo: pathlib.Path) -> None:
275 result = runner.invoke(cli, ["code", "grep", "zzz_nothing", "--count"])
276 assert result.exit_code == 0
277 assert result.output.strip().startswith("0")
278
279
280 # ---------------------------------------------------------------------------
281 # Integration — --json output
282 # ---------------------------------------------------------------------------
283
284
285 class TestGrepJson:
286 def test_json_schema(self, repo: pathlib.Path) -> None:
287 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
288 assert result.exit_code == 0
289 data = json.loads(result.output)
290 assert "total_matches" in data
291 assert "results" in data
292 assert isinstance(data["results"], list)
293
294 def test_json_result_fields(self, repo: pathlib.Path) -> None:
295 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
296 data = json.loads(result.output)
297 if data["results"]:
298 r = data["results"][0]
299 for field in ("address", "kind", "name", "lineno"):
300 assert field in r, f"missing field {field!r}"
301
302 def test_json_total_matches_consistent(self, repo: pathlib.Path) -> None:
303 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
304 data = json.loads(result.output)
305 assert data["total_matches"] == len(data["results"])
306
307 def test_json_no_match_empty_results(self, repo: pathlib.Path) -> None:
308 result = runner.invoke(cli, ["code", "grep", "zzz_nope_ever", "--json"])
309 data = json.loads(result.output)
310 assert data["total_matches"] == 0
311 assert data["results"] == []
312
313 def test_json_pattern_echoed(self, repo: pathlib.Path) -> None:
314 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
315 data = json.loads(result.output)
316 assert data["pattern"] == "validate"
317
318 def test_json_unicode_pattern(self, repo: pathlib.Path) -> None:
319 result = runner.invoke(cli, ["code", "grep", "café", "--json"])
320 assert result.exit_code == 0
321 data = json.loads(result.output)
322 assert data["pattern"] == "café"
323
324
325 # ---------------------------------------------------------------------------
326 # Integration — --file scoped search
327 # ---------------------------------------------------------------------------
328
329
330 class TestGrepFile:
331 def test_file_scoped_results_only_from_that_file(self, repo: pathlib.Path) -> None:
332 result = runner.invoke(cli, ["code", "grep", "validate", "--file", "billing.py"])
333 assert result.exit_code == 0
334 if "validate" in result.output:
335 assert "auth.py" not in result.output
336
337 def test_file_ambiguous_exits_nonzero(self, repo: pathlib.Path) -> None:
338 # Create two files with the same basename in different dirs.
339 (repo / "sub").mkdir()
340 (repo / "sub" / "billing.py").write_text("def helper(): pass\n")
341 runner.invoke(cli, ["code", "add", "."])
342 runner.invoke(cli, ["commit", "-m", "add sub billing"])
343 result = runner.invoke(cli, ["code", "grep", "helper", "--file", "billing.py"])
344 # With two billing.py files it should be ambiguous.
345 assert result.exit_code == 1 or "ambiguous" in result.output.lower()
346
347
348 # ---------------------------------------------------------------------------
349 # Integration — --hashes flag
350 # ---------------------------------------------------------------------------
351
352
353 class TestGrepHashes:
354 def test_hashes_appear_in_output(self, repo: pathlib.Path) -> None:
355 result = runner.invoke(cli, ["code", "grep", "validate", "--hashes"])
356 assert result.exit_code == 0
357 # Content hash prefix should appear (8 hex chars + "..")
358 assert ".." in result.output
359
360
361 # ---------------------------------------------------------------------------
362 # Integration — qualified name search
363 # ---------------------------------------------------------------------------
364
365
366 class TestGrepQualifiedName:
367 def test_dot_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:
368 result = runner.invoke(cli, ["code", "grep", "Invoice.compute_total"])
369 assert result.exit_code == 0
370 assert "compute_total" in result.output
371
372 def test_double_colon_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:
373 result = runner.invoke(cli, ["code", "grep", "Invoice::compute_total"])
374 assert result.exit_code == 0
375
376 def test_qualified_name_does_not_match_unrelated(self, repo: pathlib.Path) -> None:
377 result = runner.invoke(cli, ["code", "grep", "Invoice.zzz_missing"])
378 assert result.exit_code == 0
379 assert "no symbols" in result.output.lower() or "0 match" in result.output.lower()
380
381
382 # ---------------------------------------------------------------------------
383 # Integration — --regex flag
384 # ---------------------------------------------------------------------------
385
386
387 class TestGrepRegex:
388 def test_valid_regex_matches(self, repo: pathlib.Path) -> None:
389 result = runner.invoke(cli, ["code", "grep", "--regex", "^validate"])
390 assert result.exit_code == 0
391
392 def test_invalid_regex_exits_one(self, repo: pathlib.Path) -> None:
393 result = runner.invoke(cli, ["code", "grep", "--regex", "[unclosed"])
394 assert result.exit_code == 1
395 assert "regex" in result.stderr.lower() or "invalid" in result.stderr.lower()
396
397 def test_regex_anchored_no_match(self, repo: pathlib.Path) -> None:
398 result = runner.invoke(cli, ["code", "grep", "--regex", "^zzz_nothing$"])
399 assert result.exit_code == 0
400 assert "0 match" in result.output.lower() or "no symbols" in result.output.lower()
401
402
403 # ---------------------------------------------------------------------------
404 # Security — ReDoS guards
405 # ---------------------------------------------------------------------------
406
407
408 class TestGrepSecurity:
409 def test_pattern_at_512_accepted(self, repo: pathlib.Path) -> None:
410 pattern = "a" * 512
411 result = runner.invoke(cli, ["code", "grep", pattern])
412 assert "too long" not in result.output.lower()
413
414 def test_pattern_at_513_rejected(self, repo: pathlib.Path) -> None:
415 pattern = "a" * 513
416 result = runner.invoke(cli, ["code", "grep", pattern])
417 assert result.exit_code == 1
418 assert "512" in result.stderr or "too long" in result.stderr.lower()
419
420 def test_catastrophic_regex_does_not_hang(self, repo: pathlib.Path) -> None:
421 # (a+)+ is exponential on backtracking engines; Python's re module
422 # with IGNORECASE+escape still builds a safe compiled pattern.
423 # Without --regex the pattern is escaped so it literally searches for
424 # "(a+)+" as a substring — which must return quickly.
425 start = time.monotonic()
426 result = runner.invoke(cli, ["code", "grep", "(a+)+"])
427 elapsed = time.monotonic() - start
428 assert result.exit_code == 0
429 assert elapsed < 5.0, f"grep took {elapsed:.1f}s — possible hang"
430
431 def test_null_bytes_in_pattern_handled(self, repo: pathlib.Path) -> None:
432 # NUL byte in pattern must not crash the process.
433 result = runner.invoke(cli, ["code", "grep", "val\x00idate"])
434 assert result.exit_code in (0, 1)
435
436 def test_control_chars_in_pattern_handled(self, repo: pathlib.Path) -> None:
437 result = runner.invoke(cli, ["code", "grep", "val\x01\x02idate"])
438 assert result.exit_code in (0, 1)
439
440 def test_requires_repo(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
441 monkeypatch.chdir(tmp_path)
442 monkeypatch.delenv("MUSE_REPO_ROOT", raising=False)
443 result = runner.invoke(cli, ["code", "grep", "validate"])
444 assert result.exit_code != 0
445
446
447 # ---------------------------------------------------------------------------
448 # Stress — 1 000 symbols, search must complete in < 5 s
449 # ---------------------------------------------------------------------------
450
451
452 class TestGrepStress:
453 @pytest.fixture
454 def large_repo(
455 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
456 ) -> pathlib.Path:
457 """Repo with ~1 000 Python symbols across 10 files."""
458 monkeypatch.chdir(tmp_path)
459 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
460 runner.invoke(cli, ["init", "--domain", "code"])
461
462 for file_idx in range(10):
463 lines: list[str] = []
464 for sym_idx in range(100):
465 lines.append(f"def compute_{file_idx}_{sym_idx}(x: int) -> int:")
466 lines.append(f" return x + {sym_idx}")
467 lines.append("")
468 (tmp_path / f"module_{file_idx}.py").write_text("\n".join(lines))
469
470 r = runner.invoke(cli, ["commit", "-m", "large module"])
471 assert r.exit_code == 0, r.output
472 return tmp_path
473
474 def test_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:
475 start = time.monotonic()
476 result = runner.invoke(cli, ["code", "grep", "compute"])
477 elapsed = time.monotonic() - start
478 assert result.exit_code == 0, result.output
479 assert "1000" in result.output or "match" in result.output.lower()
480 assert elapsed < 5.0, f"grep took {elapsed:.1f}s on 1 000 symbols"
481
482 def test_count_mode_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:
483 start = time.monotonic()
484 result = runner.invoke(cli, ["code", "grep", "compute", "--count"])
485 elapsed = time.monotonic() - start
486 assert result.exit_code == 0
487 assert elapsed < 5.0
488
489 def test_json_mode_1000_symbols_schema_valid(self, large_repo: pathlib.Path) -> None:
490 result = runner.invoke(cli, ["code", "grep", "compute", "--json"])
491 assert result.exit_code == 0
492 data = json.loads(result.output)
493 assert data["total_matches"] == len(data["results"])
494 assert data["total_matches"] >= 1000
495
496 def test_regex_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:
497 start = time.monotonic()
498 result = runner.invoke(cli, ["code", "grep", "--regex", r"^compute_\d"])
499 elapsed = time.monotonic() - start
500 assert result.exit_code == 0
501 assert elapsed < 5.0
502
503 def test_kind_filter_1000_symbols_correct_count(self, large_repo: pathlib.Path) -> None:
504 result = runner.invoke(cli, ["code", "grep", "compute", "--kind", "function", "--count"])
505 assert result.exit_code == 0
506 line = result.output.strip().splitlines()[0]
507 count = int(line.split()[0])
508 assert count >= 1000
509
510 def test_512_char_regex_does_not_hang_on_large_corpus(
511 self, large_repo: pathlib.Path
512 ) -> None:
513 pattern = f"compute_{'0' * 504}" # exactly 512 chars
514 start = time.monotonic()
515 result = runner.invoke(cli, ["code", "grep", pattern])
516 elapsed = time.monotonic() - start
517 assert elapsed < 5.0, f"512-char pattern took {elapsed:.1f}s — possible hang"
518 assert result.exit_code in (0, 1) # no match is fine
519
520
521 # ---------------------------------------------------------------------------
522 # --files flag (-l) — one file path per line, unique, sorted
523 # ---------------------------------------------------------------------------
524
525
526 class TestGrepFiles:
527 """``muse code grep --files`` prints one unique file path per line.
528
529 Ergonomics goal: trivially pipeable without JSON parsing.
530 Mirrors ``grep -l`` / ``rg -l`` behaviour.
531 """
532
533 def test_files_lists_matching_file(self, repo: pathlib.Path) -> None:
534 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
535 assert result.exit_code == 0, result.output
536 lines = [l for l in result.output.splitlines() if l.strip()]
537 assert any("billing.py" in l or "auth.py" in l for l in lines)
538
539 def test_files_output_is_unique_paths(self, repo: pathlib.Path) -> None:
540 """Each file path appears at most once, even if it has multiple matches."""
541 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
542 assert result.exit_code == 0, result.output
543 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
544 assert len(lines) == len(set(lines)), "duplicate file paths in --files output"
545
546 def test_files_output_is_sorted(self, repo: pathlib.Path) -> None:
547 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
548 assert result.exit_code == 0, result.output
549 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
550 assert lines == sorted(lines), "--files output must be sorted"
551
552 def test_files_no_match_empty_output(self, repo: pathlib.Path) -> None:
553 result = runner.invoke(cli, ["code", "grep", "zzznomatch", "--files"])
554 assert result.exit_code == 0
555 assert result.output.strip() == ""
556
557 def test_files_excludes_non_matching_files(self, repo: pathlib.Path) -> None:
558 """Only files that contain at least one match appear."""
559 result = runner.invoke(cli, ["code", "grep", "Invoice", "--files"])
560 assert result.exit_code == 0, result.output
561 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
562 # Invoice is only in billing.py
563 assert all("billing.py" in l for l in lines)
564 assert not any("auth.py" in l for l in lines)
565
566 def test_files_mutually_exclusive_with_json(self, repo: pathlib.Path) -> None:
567 result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--json"])
568 assert result.exit_code != 0
569
570 def test_files_mutually_exclusive_with_count(self, repo: pathlib.Path) -> None:
571 result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--count"])
572 assert result.exit_code != 0
573
574 def test_files_long_flag_only(self, repo: pathlib.Path) -> None:
575 """``--files`` is the only form (``-l`` is taken by ``--language``)."""
576 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
577 assert result.exit_code == 0, result.output
578 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
579 assert len(lines) > 0
580
581 def test_files_compatible_with_kind_filter(self, repo: pathlib.Path) -> None:
582 result = runner.invoke(
583 cli, ["code", "grep", "validate", "--files", "--kind", "function"]
584 )
585 assert result.exit_code == 0, result.output
586 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
587 # validate_amount and validate_token are functions
588 assert len(lines) > 0
589
590 def test_files_compatible_with_file_filter(self, repo: pathlib.Path) -> None:
591 result = runner.invoke(
592 cli, ["code", "grep", "validate", "--files", "--file", "billing.py"]
593 )
594 assert result.exit_code == 0, result.output
595 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
596 assert all("billing.py" in l for l in lines)
File History 1 commit