gabriel / muse public
test_cmd_grep.py python
595 lines 24.5 KB
Raw
1 """Comprehensive tests for ``muse code grep``.
2
3 Coverage
4 --------
5 Unit
6 _normalise_language — case folding, unknown passthrough
7 _file_matches — exact, suffix, separator normalisation
8 _resolve_file_filter — single match, ambiguous, no match
9 _MAX_PATTERN_LEN — boundary value
10 _KIND_ICON — all documented kinds present
11
12 Integration
13 grep basic substring — match, no-match, kind filter, language filter
14 grep --regex — valid, invalid (→ exit 1), boundary
15 grep --file — scoped to one file (faster path)
16 grep --count — prints integer, no extra lines
17 grep --json — schema correctness, unicode, qualified-name search
18 grep --hashes — content-id appears in output
19 grep --commit — historical snapshot
20 qualified-name search — "Invoice.compute_total" hits only that method
21
22 Security / ReDoS
23 Pattern length cap — 512 accepted, 513 rejected
24 Catastrophic regex — (a+)+ type does not hang (timeout guard)
25 NUL bytes in pattern — handled without crash
26 Control chars — handled without crash
27
28 Stress
29 1 000 symbols — search completes in < 5 s
30 512-char regex — compiles and runs without hang
31 """
32
33 from __future__ import annotations
34
35 import json
36 import pathlib
37 import textwrap
38 import time
39
40 import pytest
41
42 from tests.cli_test_helper import CliRunner
43 from muse.cli.commands.grep import (
44 _KIND_ICON,
45 _MAX_PATTERN_LEN,
46 _file_matches,
47 normalise_language as _normalise_language,
48 _resolve_file_filter,
49 )
50
51 cli = None
52 runner = CliRunner()
53
54
55 # ---------------------------------------------------------------------------
56 # Shared fixture
57 # ---------------------------------------------------------------------------
58
59
60 @pytest.fixture
61 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
62 """Initialise a fresh code-domain Muse repo with two Python files."""
63 monkeypatch.chdir(tmp_path)
64 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
65 result = runner.invoke(cli, ["init", "--domain", "code"])
66 assert result.exit_code == 0, result.output
67
68 (tmp_path / "billing.py").write_text(textwrap.dedent("""\
69 class Invoice:
70 def compute_total(self, items: list[int]) -> int:
71 return sum(items)
72
73 def apply_discount(self, total: float, pct: float) -> float:
74 return total * (1 - pct)
75
76 def validate_amount(amount: float) -> bool:
77 return amount > 0
78 """))
79
80 (tmp_path / "auth.py").write_text(textwrap.dedent("""\
81 def validate_token(token: str) -> bool:
82 return len(token) > 0
83
84 class Validator:
85 def validate(self, value: object) -> bool: # type: ignore[override]
86 return bool(value)
87 """))
88
89 r = runner.invoke(cli, ["commit", "-m", "initial"])
90 assert r.exit_code == 0, r.output
91 return tmp_path
92
93
94 # ---------------------------------------------------------------------------
95 # Unit — _normalise_language
96 # ---------------------------------------------------------------------------
97
98
99 class TestNormaliseLanguage:
100 def test_python_lowercase(self) -> None:
101 assert _normalise_language("python") == "Python"
102
103 def test_python_uppercase(self) -> None:
104 assert _normalise_language("PYTHON") == "Python"
105
106 def test_python_mixed(self) -> None:
107 assert _normalise_language("PyThOn") == "Python"
108
109 def test_unknown_passthrough(self) -> None:
110 # Unknown languages are passed through unchanged (after strip).
111 result = _normalise_language("Cobol")
112 assert result == "Cobol"
113
114 def test_strips_whitespace(self) -> None:
115 result = _normalise_language(" python ")
116 assert result == "Python"
117
118 def test_empty_string(self) -> None:
119 # Empty string is not a known language — passed through.
120 result = _normalise_language("")
121 assert result == ""
122
123
124 # ---------------------------------------------------------------------------
125 # Unit — _file_matches
126 # ---------------------------------------------------------------------------
127
128
129 class TestFileMatches:
130 def test_exact_match(self) -> None:
131 assert _file_matches("src/billing.py", "src/billing.py")
132
133 def test_suffix_match(self) -> None:
134 assert _file_matches("src/billing.py", "billing.py")
135
136 def test_no_match(self) -> None:
137 assert not _file_matches("src/billing.py", "other.py")
138
139 def test_partial_name_no_match(self) -> None:
140 # "illing.py" is a suffix of "billing.py" but should not match without
141 # a leading slash boundary.
142 assert not _file_matches("src/billing.py", "illing.py")
143
144 def test_backslash_normalised(self) -> None:
145 # Windows-style separators in the filter are normalised before suffix check.
146 assert _file_matches("a/src/billing.py", "src\\billing.py")
147
148 def test_empty_filter(self) -> None:
149 # Empty filter matches nothing sensible but must not crash.
150 # The function returns True only for exact or slash-prefixed suffix.
151 # "src/billing.py".endswith("/" + "") == endswith("/") → False
152 # "src/billing.py" == "" → False
153 assert not _file_matches("src/billing.py", "")
154
155 def test_deep_path_suffix(self) -> None:
156 assert _file_matches("a/b/c/d.py", "c/d.py")
157
158 def test_same_filename_different_dir_no_match(self) -> None:
159 assert not _file_matches("src/billing.py", "tests/billing.py")
160
161
162 # ---------------------------------------------------------------------------
163 # Unit — _resolve_file_filter
164 # ---------------------------------------------------------------------------
165
166
167 class TestResolveFileFilter:
168 def test_single_match_returns_full_path(self) -> None:
169 manifest = {"src/billing.py": "abc", "src/auth.py": "def"}
170 result = _resolve_file_filter("billing.py", manifest)
171 assert result == "src/billing.py"
172
173 def test_no_match_returns_none(self) -> None:
174 manifest = {"src/billing.py": "abc"}
175 result = _resolve_file_filter("nonexistent.py", manifest)
176 assert result is None
177
178 def test_ambiguous_raises_system_exit(self) -> None:
179 manifest = {
180 "a/billing.py": "hash1",
181 "b/billing.py": "hash2",
182 }
183 with pytest.raises(SystemExit):
184 _resolve_file_filter("billing.py", manifest)
185
186 def test_exact_path_returns_itself(self) -> None:
187 manifest = {"src/billing.py": "abc"}
188 result = _resolve_file_filter("src/billing.py", manifest)
189 assert result == "src/billing.py"
190
191 def test_empty_manifest_returns_none(self) -> None:
192 result = _resolve_file_filter("billing.py", {})
193 assert result is None
194
195
196 # ---------------------------------------------------------------------------
197 # Unit — _MAX_PATTERN_LEN and _KIND_ICON constants
198 # ---------------------------------------------------------------------------
199
200
201 class TestConstants:
202 def test_max_pattern_len_is_512(self) -> None:
203 assert _MAX_PATTERN_LEN == 512
204
205 def test_kind_icon_has_function(self) -> None:
206 assert "function" in _KIND_ICON
207
208 def test_kind_icon_has_class(self) -> None:
209 assert "class" in _KIND_ICON
210
211 def test_kind_icon_has_method(self) -> None:
212 assert "method" in _KIND_ICON
213
214
215 # ---------------------------------------------------------------------------
216 # Integration — basic substring search
217 # ---------------------------------------------------------------------------
218
219
220 class TestGrepBasic:
221 def test_finds_function_by_name(self, repo: pathlib.Path) -> None:
222 result = runner.invoke(cli, ["code", "grep", "validate"])
223 assert result.exit_code == 0, result.output
224 assert "validate" in result.output.lower()
225
226 def test_no_match_exits_zero(self, repo: pathlib.Path) -> None:
227 result = runner.invoke(cli, ["code", "grep", "zzznomatch999"])
228 assert result.exit_code == 0
229 assert "no symbols" in result.output.lower()
230
231 def test_kind_filter_function(self, repo: pathlib.Path) -> None:
232 result = runner.invoke(cli, ["code", "grep", "validate", "--kind", "function"])
233 assert result.exit_code == 0
234 # Only functions should appear (methods excluded).
235 assert "fn" in result.output
236
237 def test_kind_filter_class(self, repo: pathlib.Path) -> None:
238 result = runner.invoke(cli, ["code", "grep", "Invoice", "--kind", "class"])
239 assert result.exit_code == 0
240 assert "Invoice" in result.output
241 # Methods of Invoice should NOT appear.
242 assert "compute_total" not in result.output
243
244 def test_language_filter(self, repo: pathlib.Path) -> None:
245 result = runner.invoke(cli, ["code", "grep", "validate", "--language", "python"])
246 assert result.exit_code == 0
247 assert "validate" in result.output.lower()
248
249 def test_language_filter_unknown_exits_zero(self, repo: pathlib.Path) -> None:
250 result = runner.invoke(cli, ["code", "grep", "validate", "--language", "COBOL"])
251 # No COBOL files — 0 matches, but not an error.
252 assert result.exit_code == 0
253
254 def test_match_count_suffix(self, repo: pathlib.Path) -> None:
255 result = runner.invoke(cli, ["code", "grep", "validate"])
256 assert "match" in result.output.lower()
257
258
259 # ---------------------------------------------------------------------------
260 # Integration — --count flag
261 # ---------------------------------------------------------------------------
262
263
264 class TestGrepCount:
265 def test_count_only_prints_integer(self, repo: pathlib.Path) -> None:
266 result = runner.invoke(cli, ["code", "grep", "validate", "--count"])
267 assert result.exit_code == 0
268 line = result.output.strip().splitlines()[0]
269 assert line.endswith("match(es)")
270 # The leading token must be an integer.
271 count_str = line.split()[0]
272 assert count_str.isdigit()
273
274 def test_count_zero_for_no_match(self, repo: pathlib.Path) -> None:
275 result = runner.invoke(cli, ["code", "grep", "zzz_nothing", "--count"])
276 assert result.exit_code == 0
277 assert result.output.strip().startswith("0")
278
279
280 # ---------------------------------------------------------------------------
281 # Integration — --json output
282 # ---------------------------------------------------------------------------
283
284
285 class TestGrepJson:
286 def test_json_schema(self, repo: pathlib.Path) -> None:
287 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
288 assert result.exit_code == 0
289 data = json.loads(result.output)
290 assert "total_matches" in data
291 assert "results" in data
292 assert isinstance(data["results"], list)
293
294 def test_json_result_fields(self, repo: pathlib.Path) -> None:
295 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
296 data = json.loads(result.output)
297 if data["results"]:
298 r = data["results"][0]
299 for field in ("address", "kind", "name", "lineno"):
300 assert field in r, f"missing field {field!r}"
301
302 def test_json_total_matches_consistent(self, repo: pathlib.Path) -> None:
303 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
304 data = json.loads(result.output)
305 assert data["total_matches"] == len(data["results"])
306
307 def test_json_no_match_empty_results(self, repo: pathlib.Path) -> None:
308 result = runner.invoke(cli, ["code", "grep", "zzz_nope_ever", "--json"])
309 data = json.loads(result.output)
310 assert data["total_matches"] == 0
311 assert data["results"] == []
312
313 def test_json_pattern_echoed(self, repo: pathlib.Path) -> None:
314 result = runner.invoke(cli, ["code", "grep", "validate", "--json"])
315 data = json.loads(result.output)
316 assert data["pattern"] == "validate"
317
318 def test_json_unicode_pattern(self, repo: pathlib.Path) -> None:
319 result = runner.invoke(cli, ["code", "grep", "café", "--json"])
320 assert result.exit_code == 0
321 data = json.loads(result.output)
322 assert data["pattern"] == "café"
323
324
325 # ---------------------------------------------------------------------------
326 # Integration — --file scoped search
327 # ---------------------------------------------------------------------------
328
329
330 class TestGrepFile:
331 def test_file_scoped_results_only_from_that_file(self, repo: pathlib.Path) -> None:
332 result = runner.invoke(cli, ["code", "grep", "validate", "--file", "billing.py"])
333 assert result.exit_code == 0
334 if "validate" in result.output:
335 assert "auth.py" not in result.output
336
337 def test_file_ambiguous_exits_nonzero(self, repo: pathlib.Path) -> None:
338 # Create two files with the same basename in different dirs.
339 (repo / "sub").mkdir()
340 (repo / "sub" / "billing.py").write_text("def helper(): pass\n")
341 runner.invoke(cli, ["commit", "-m", "add sub billing"])
342 result = runner.invoke(cli, ["code", "grep", "helper", "--file", "billing.py"])
343 # With two billing.py files it should be ambiguous.
344 assert result.exit_code == 1 or "ambiguous" in result.output.lower()
345
346
347 # ---------------------------------------------------------------------------
348 # Integration — --hashes flag
349 # ---------------------------------------------------------------------------
350
351
352 class TestGrepHashes:
353 def test_hashes_appear_in_output(self, repo: pathlib.Path) -> None:
354 result = runner.invoke(cli, ["code", "grep", "validate", "--hashes"])
355 assert result.exit_code == 0
356 # Content hash prefix should appear (8 hex chars + "..")
357 assert ".." in result.output
358
359
360 # ---------------------------------------------------------------------------
361 # Integration — qualified name search
362 # ---------------------------------------------------------------------------
363
364
365 class TestGrepQualifiedName:
366 def test_dot_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:
367 result = runner.invoke(cli, ["code", "grep", "Invoice.compute_total"])
368 assert result.exit_code == 0
369 assert "compute_total" in result.output
370
371 def test_double_colon_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:
372 result = runner.invoke(cli, ["code", "grep", "Invoice::compute_total"])
373 assert result.exit_code == 0
374
375 def test_qualified_name_does_not_match_unrelated(self, repo: pathlib.Path) -> None:
376 result = runner.invoke(cli, ["code", "grep", "Invoice.zzz_missing"])
377 assert result.exit_code == 0
378 assert "no symbols" in result.output.lower() or "0 match" in result.output.lower()
379
380
381 # ---------------------------------------------------------------------------
382 # Integration — --regex flag
383 # ---------------------------------------------------------------------------
384
385
386 class TestGrepRegex:
387 def test_valid_regex_matches(self, repo: pathlib.Path) -> None:
388 result = runner.invoke(cli, ["code", "grep", "--regex", "^validate"])
389 assert result.exit_code == 0
390
391 def test_invalid_regex_exits_one(self, repo: pathlib.Path) -> None:
392 result = runner.invoke(cli, ["code", "grep", "--regex", "[unclosed"])
393 assert result.exit_code == 1
394 assert "regex" in result.stderr.lower() or "invalid" in result.stderr.lower()
395
396 def test_regex_anchored_no_match(self, repo: pathlib.Path) -> None:
397 result = runner.invoke(cli, ["code", "grep", "--regex", "^zzz_nothing$"])
398 assert result.exit_code == 0
399 assert "0 match" in result.output.lower() or "no symbols" in result.output.lower()
400
401
402 # ---------------------------------------------------------------------------
403 # Security — ReDoS guards
404 # ---------------------------------------------------------------------------
405
406
407 class TestGrepSecurity:
408 def test_pattern_at_512_accepted(self, repo: pathlib.Path) -> None:
409 pattern = "a" * 512
410 result = runner.invoke(cli, ["code", "grep", pattern])
411 assert "too long" not in result.output.lower()
412
413 def test_pattern_at_513_rejected(self, repo: pathlib.Path) -> None:
414 pattern = "a" * 513
415 result = runner.invoke(cli, ["code", "grep", pattern])
416 assert result.exit_code == 1
417 assert "512" in result.stderr or "too long" in result.stderr.lower()
418
419 def test_catastrophic_regex_does_not_hang(self, repo: pathlib.Path) -> None:
420 # (a+)+ is exponential on backtracking engines; Python's re module
421 # with IGNORECASE+escape still builds a safe compiled pattern.
422 # Without --regex the pattern is escaped so it literally searches for
423 # "(a+)+" as a substring — which must return quickly.
424 start = time.monotonic()
425 result = runner.invoke(cli, ["code", "grep", "(a+)+"])
426 elapsed = time.monotonic() - start
427 assert result.exit_code == 0
428 assert elapsed < 5.0, f"grep took {elapsed:.1f}s — possible hang"
429
430 def test_null_bytes_in_pattern_handled(self, repo: pathlib.Path) -> None:
431 # NUL byte in pattern must not crash the process.
432 result = runner.invoke(cli, ["code", "grep", "val\x00idate"])
433 assert result.exit_code in (0, 1)
434
435 def test_control_chars_in_pattern_handled(self, repo: pathlib.Path) -> None:
436 result = runner.invoke(cli, ["code", "grep", "val\x01\x02idate"])
437 assert result.exit_code in (0, 1)
438
439 def test_requires_repo(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
440 monkeypatch.chdir(tmp_path)
441 monkeypatch.delenv("MUSE_REPO_ROOT", raising=False)
442 result = runner.invoke(cli, ["code", "grep", "validate"])
443 assert result.exit_code != 0
444
445
446 # ---------------------------------------------------------------------------
447 # Stress — 1 000 symbols, search must complete in < 5 s
448 # ---------------------------------------------------------------------------
449
450
451 class TestGrepStress:
452 @pytest.fixture
453 def large_repo(
454 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
455 ) -> pathlib.Path:
456 """Repo with ~1 000 Python symbols across 10 files."""
457 monkeypatch.chdir(tmp_path)
458 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
459 runner.invoke(cli, ["init", "--domain", "code"])
460
461 for file_idx in range(10):
462 lines: list[str] = []
463 for sym_idx in range(100):
464 lines.append(f"def compute_{file_idx}_{sym_idx}(x: int) -> int:")
465 lines.append(f" return x + {sym_idx}")
466 lines.append("")
467 (tmp_path / f"module_{file_idx}.py").write_text("\n".join(lines))
468
469 r = runner.invoke(cli, ["commit", "-m", "large module"])
470 assert r.exit_code == 0, r.output
471 return tmp_path
472
473 def test_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:
474 start = time.monotonic()
475 result = runner.invoke(cli, ["code", "grep", "compute"])
476 elapsed = time.monotonic() - start
477 assert result.exit_code == 0, result.output
478 assert "1000" in result.output or "match" in result.output.lower()
479 assert elapsed < 5.0, f"grep took {elapsed:.1f}s on 1 000 symbols"
480
481 def test_count_mode_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:
482 start = time.monotonic()
483 result = runner.invoke(cli, ["code", "grep", "compute", "--count"])
484 elapsed = time.monotonic() - start
485 assert result.exit_code == 0
486 assert elapsed < 5.0
487
488 def test_json_mode_1000_symbols_schema_valid(self, large_repo: pathlib.Path) -> None:
489 result = runner.invoke(cli, ["code", "grep", "compute", "--json"])
490 assert result.exit_code == 0
491 data = json.loads(result.output)
492 assert data["total_matches"] == len(data["results"])
493 assert data["total_matches"] >= 1000
494
495 def test_regex_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:
496 start = time.monotonic()
497 result = runner.invoke(cli, ["code", "grep", "--regex", r"^compute_\d"])
498 elapsed = time.monotonic() - start
499 assert result.exit_code == 0
500 assert elapsed < 5.0
501
502 def test_kind_filter_1000_symbols_correct_count(self, large_repo: pathlib.Path) -> None:
503 result = runner.invoke(cli, ["code", "grep", "compute", "--kind", "function", "--count"])
504 assert result.exit_code == 0
505 line = result.output.strip().splitlines()[0]
506 count = int(line.split()[0])
507 assert count >= 1000
508
509 def test_512_char_regex_does_not_hang_on_large_corpus(
510 self, large_repo: pathlib.Path
511 ) -> None:
512 pattern = f"compute_{'0' * 504}" # exactly 512 chars
513 start = time.monotonic()
514 result = runner.invoke(cli, ["code", "grep", pattern])
515 elapsed = time.monotonic() - start
516 assert elapsed < 5.0, f"512-char pattern took {elapsed:.1f}s — possible hang"
517 assert result.exit_code in (0, 1) # no match is fine
518
519
520 # ---------------------------------------------------------------------------
521 # --files flag (-l) — one file path per line, unique, sorted
522 # ---------------------------------------------------------------------------
523
524
525 class TestGrepFiles:
526 """``muse code grep --files`` prints one unique file path per line.
527
528 Ergonomics goal: trivially pipeable without JSON parsing.
529 Mirrors ``grep -l`` / ``rg -l`` behaviour.
530 """
531
532 def test_files_lists_matching_file(self, repo: pathlib.Path) -> None:
533 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
534 assert result.exit_code == 0, result.output
535 lines = [l for l in result.output.splitlines() if l.strip()]
536 assert any("billing.py" in l or "auth.py" in l for l in lines)
537
538 def test_files_output_is_unique_paths(self, repo: pathlib.Path) -> None:
539 """Each file path appears at most once, even if it has multiple matches."""
540 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
541 assert result.exit_code == 0, result.output
542 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
543 assert len(lines) == len(set(lines)), "duplicate file paths in --files output"
544
545 def test_files_output_is_sorted(self, repo: pathlib.Path) -> None:
546 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
547 assert result.exit_code == 0, result.output
548 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
549 assert lines == sorted(lines), "--files output must be sorted"
550
551 def test_files_no_match_empty_output(self, repo: pathlib.Path) -> None:
552 result = runner.invoke(cli, ["code", "grep", "zzznomatch", "--files"])
553 assert result.exit_code == 0
554 assert result.output.strip() == ""
555
556 def test_files_excludes_non_matching_files(self, repo: pathlib.Path) -> None:
557 """Only files that contain at least one match appear."""
558 result = runner.invoke(cli, ["code", "grep", "Invoice", "--files"])
559 assert result.exit_code == 0, result.output
560 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
561 # Invoice is only in billing.py
562 assert all("billing.py" in l for l in lines)
563 assert not any("auth.py" in l for l in lines)
564
565 def test_files_mutually_exclusive_with_json(self, repo: pathlib.Path) -> None:
566 result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--json"])
567 assert result.exit_code != 0
568
569 def test_files_mutually_exclusive_with_count(self, repo: pathlib.Path) -> None:
570 result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--count"])
571 assert result.exit_code != 0
572
573 def test_files_long_flag_only(self, repo: pathlib.Path) -> None:
574 """``--files`` is the only form (``-l`` is taken by ``--language``)."""
575 result = runner.invoke(cli, ["code", "grep", "validate", "--files"])
576 assert result.exit_code == 0, result.output
577 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
578 assert len(lines) > 0
579
580 def test_files_compatible_with_kind_filter(self, repo: pathlib.Path) -> None:
581 result = runner.invoke(
582 cli, ["code", "grep", "validate", "--files", "--kind", "function"]
583 )
584 assert result.exit_code == 0, result.output
585 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
586 # validate_amount and validate_token are functions
587 assert len(lines) > 0
588
589 def test_files_compatible_with_file_filter(self, repo: pathlib.Path) -> None:
590 result = runner.invoke(
591 cli, ["code", "grep", "validate", "--files", "--file", "billing.py"]
592 )
593 assert result.exit_code == 0, result.output
594 lines = [l.strip() for l in result.output.splitlines() if l.strip()]
595 assert all("billing.py" in l for l in lines)
File History 1 commit