gabriel / muse public
test_cmd_content_grep_hardening.py python
969 lines 33.8 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago
1 """Hardening tests for ``muse content-grep``.
2
3 Covers:
4 Unit — _is_binary, _path_matches_globs, _search_object (context,
5 binary skip, utf-8 replace), pattern validation order
6 Security — ANSI injection in file paths and match text, pattern length
7 cap, invalid regex, ReDoS pattern rejected before I/O
8 Perf — parallel reads complete correctly, --max-matches cap
9 JSON — _ContentGrepJson schema (commit_id, snapshot_id, totals),
10 GrepMatch context_before/context_after fields
11 Flags — --include, --exclude, --max-matches, --context/-C, --json,
12 rejection of old --format flag
13 Integration — multi-file with mixed hits, --include narrows search,
14 --exclude skips files, --context shows surrounding lines,
15 --ref searches historical commit
16 E2E — --help output mentions all new flags
17 Stress — 500-file snapshot, concurrent parallel reads
18 """
19
20 from __future__ import annotations
21 from collections.abc import Mapping
22
23 import datetime
24 import json
25 import pathlib
26 import threading
27 from typing import TypedDict
28
29 import pytest
30 from tests.cli_test_helper import CliRunner, InvokeResult
31
32 from muse.core.object_store import write_object
33 from muse.core.ids import hash_commit, hash_snapshot
34 from muse.core.commits import (
35 CommitRecord,
36 write_commit,
37 )
38 from muse.core.snapshots import (
39 SnapshotRecord,
40 write_snapshot,
41 )
42 from muse.core.types import Manifest, blob_id
43
44 cli = None
45 runner = CliRunner()
46 _invoke_lock = threading.Lock()
47
48 type _FilesMap = dict[str, bytes]
49
50 _REPO_ID = "cgrep-hardening"
51
52
53 # ---------------------------------------------------------------------------
54 # Helpers
55 # ---------------------------------------------------------------------------
56
57
58 class _GrepMatchOut(TypedDict):
59 line_number: int
60 line: str
61 context_before: list[str]
62 context_after: list[str]
63
64
65 class _GrepResultOut(TypedDict):
66 file: str
67 object_id: str
68 match_count: int
69 matches: list[_GrepMatchOut]
70
71
72 class _GrepOut(TypedDict):
73 source: str
74 commit_id: str
75 snapshot_id: str
76 pattern: str
77 total_files_matched: int
78 total_matches: int
79 results: list[_GrepResultOut]
80 duration_ms: float
81 exit_code: int
82
83
84
85
86 def _init_repo(path: pathlib.Path, repo_id: str = _REPO_ID) -> pathlib.Path:
87 dot_muse = muse_dir(path)
88 for d in ("commits", "snapshots", "objects", "refs/heads"):
89 (dot_muse / d).mkdir(parents=True, exist_ok=True)
90 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
91 (dot_muse / "repo.json").write_text(
92 json.dumps({"repo_id": repo_id, "domain": "midi"}), encoding="utf-8"
93 )
94 return path
95
96
97 def _env(repo: pathlib.Path) -> Manifest:
98 return {"MUSE_REPO_ROOT": str(repo)}
99
100
101 _counter = 0
102
103
104 def _commit_files(
105 root: pathlib.Path,
106 files: _FilesMap,
107 branch: str = "main",
108 parent_id: str | None = None,
109 ) -> str:
110 global _counter
111 _counter += 1
112 manifest: Manifest = {}
113 for rel_path, content in files.items():
114 obj_id = blob_id(content)
115 write_object(root, obj_id, content)
116 manifest[rel_path] = obj_id
117 snap_id = hash_snapshot(manifest)
118 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
119 committed_at = datetime.datetime.now(datetime.timezone.utc)
120 parent_ids = [parent_id] if parent_id else []
121 commit_id = hash_commit(
122 parent_ids, snap_id, f"commit {_counter}", committed_at.isoformat(),
123 )
124 write_commit(
125 root,
126 CommitRecord(
127 commit_id=commit_id,
128 branch=branch,
129 snapshot_id=snap_id,
130 message=f"commit {_counter}",
131 committed_at=committed_at,
132 parent_commit_id=parent_id,
133 ),
134 )
135 branch_ref = ref_path(root, branch)
136 branch_ref.parent.mkdir(parents=True, exist_ok=True)
137 branch_ref.write_text(commit_id, encoding="utf-8")
138 return commit_id
139
140
141 def _invoke(args: list[str], env: Manifest | None = None) -> InvokeResult:
142 with _invoke_lock:
143 return runner.invoke(cli, args, env=env)
144
145
146 def _parse(result: InvokeResult) -> _GrepOut:
147 raw: _GrepOut = json.loads(result.output)
148 return raw
149
150
151 # ---------------------------------------------------------------------------
152 # Unit: _is_binary
153 # ---------------------------------------------------------------------------
154
155
156 def test_is_binary_null_byte() -> None:
157 from muse.cli.commands.content_grep import _is_binary
158
159 assert _is_binary(b"\x00hello") is True
160
161
162 def test_is_binary_clean_text() -> None:
163 from muse.cli.commands.content_grep import _is_binary
164
165 assert _is_binary(b"hello world\n") is False
166
167
168 def test_is_binary_empty() -> None:
169 from muse.cli.commands.content_grep import _is_binary
170
171 assert _is_binary(b"") is False
172
173
174 # ---------------------------------------------------------------------------
175 # Unit: _path_matches_globs
176 # ---------------------------------------------------------------------------
177
178
179 def test_path_matches_no_filter() -> None:
180 from muse.cli.commands.content_grep import _path_matches_globs
181
182 assert _path_matches_globs("src/main.py", None, None) is True
183
184
185 def test_path_matches_include_basename() -> None:
186 from muse.cli.commands.content_grep import _path_matches_globs
187
188 assert _path_matches_globs("src/main.py", "*.py", None) is True
189 assert _path_matches_globs("src/main.js", "*.py", None) is False
190
191
192 def test_path_matches_include_full_path() -> None:
193 from muse.cli.commands.content_grep import _path_matches_globs
194
195 assert _path_matches_globs("src/main.py", "src/*.py", None) is True
196 assert _path_matches_globs("tests/main.py", "src/*.py", None) is False
197
198
199 def test_path_matches_exclude_basename() -> None:
200 from muse.cli.commands.content_grep import _path_matches_globs
201
202 assert _path_matches_globs("app.min.js", None, "*.min.js") is False
203 assert _path_matches_globs("app.js", None, "*.min.js") is True
204
205
206 def test_path_matches_include_and_exclude() -> None:
207 from muse.cli.commands.content_grep import _path_matches_globs
208
209 assert _path_matches_globs("src/main.py", "*.py", "test_*.py") is True
210 assert _path_matches_globs("test_foo.py", "*.py", "test_*.py") is False
211
212
213 # ---------------------------------------------------------------------------
214 # Unit: _search_object — context lines
215 # ---------------------------------------------------------------------------
216
217
218 def test_search_object_context(tmp_path: pathlib.Path) -> None:
219 import re
220 from muse.cli.commands.content_grep import _search_object
221
222 _init_repo(tmp_path)
223 content = b"line one\nTARGET line\nline three\n"
224 obj_id = blob_id(content)
225 write_object(tmp_path, obj_id, content)
226
227 pat = re.compile("TARGET")
228 count, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=1)
229 assert count == 1
230 assert len(matches) == 1
231 assert matches[0]["context_before"] == ["line one"]
232 assert matches[0]["context_after"] == ["line three"]
233
234
235 def test_search_object_context_at_boundary(tmp_path: pathlib.Path) -> None:
236 import re
237 from muse.cli.commands.content_grep import _search_object
238
239 _init_repo(tmp_path)
240 content = b"TARGET\nonly\n"
241 obj_id = blob_id(content)
242 write_object(tmp_path, obj_id, content)
243
244 pat = re.compile("TARGET")
245 count, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=3)
246 assert matches[0]["context_before"] == []
247 assert matches[0]["context_after"] == ["only"]
248
249
250 def test_search_object_no_context(tmp_path: pathlib.Path) -> None:
251 import re
252 from muse.cli.commands.content_grep import _search_object
253
254 _init_repo(tmp_path)
255 content = b"line\nTARGET\nend\n"
256 obj_id = blob_id(content)
257 write_object(tmp_path, obj_id, content)
258
259 pat = re.compile("TARGET")
260 _, matches = _search_object(tmp_path, obj_id, pat, False, False, context_lines=0)
261 assert matches[0]["context_before"] == []
262 assert matches[0]["context_after"] == []
263
264
265 def test_search_object_binary_skipped(tmp_path: pathlib.Path) -> None:
266 import re
267 from muse.cli.commands.content_grep import _search_object
268
269 _init_repo(tmp_path)
270 content = b"\x00\x01\x02TARGET\x03"
271 obj_id = blob_id(content)
272 write_object(tmp_path, obj_id, content)
273
274 pat = re.compile("TARGET")
275 count, matches = _search_object(tmp_path, obj_id, pat, False, False, 0)
276 assert count == 0
277 assert matches == []
278
279
280 # ---------------------------------------------------------------------------
281 # Security: pattern validation happens BEFORE I/O
282 # ---------------------------------------------------------------------------
283
284
285 def test_long_pattern_rejected_before_io(tmp_path: pathlib.Path) -> None:
286 """A too-long pattern must be rejected without touching the object store."""
287 _init_repo(tmp_path)
288 # Do NOT commit any files — if I/O happened, we'd get a 'no commits' error,
289 # not the 'pattern too long' error.
290 bad_pattern = "a" * 501
291 result = _invoke(
292 ["content-grep", bad_pattern], env=_env(tmp_path)
293 )
294 assert result.exit_code != 0
295 # The error must be about pattern length, not about missing commits.
296 assert "too long" in result.output.lower() or "too long" in (result.stderr or "").lower()
297
298
299 def test_invalid_regex_rejected_before_io(tmp_path: pathlib.Path) -> None:
300 _init_repo(tmp_path)
301 result = _invoke(
302 ["content-grep", "[unclosed"], env=_env(tmp_path)
303 )
304 assert result.exit_code != 0
305 assert "regex" in result.output.lower() or "regex" in (result.stderr or "").lower()
306
307
308 # ---------------------------------------------------------------------------
309 # Security: ANSI injection
310 # ---------------------------------------------------------------------------
311
312
313 def test_ansi_injection_in_path(tmp_path: pathlib.Path) -> None:
314 """File paths with ANSI escapes must be stripped in text output."""
315 _init_repo(tmp_path)
316 ansi_path = "\x1b[31mmalicious\x1b[0m.txt"
317 _commit_files(tmp_path, {ansi_path: b"TARGET content\n"})
318 result = _invoke(
319 ["content-grep", "TARGET"], env=_env(tmp_path)
320 )
321 assert result.exit_code == 0
322 assert "\x1b" not in result.output
323
324
325 def test_ansi_injection_in_match_text(tmp_path: pathlib.Path) -> None:
326 """Match text with ANSI escapes must be stripped in text output."""
327 _init_repo(tmp_path)
328 _commit_files(tmp_path, {"safe.txt": b"TARGET \x1b[31mred\x1b[0m content\n"})
329 result = _invoke(
330 ["content-grep", "TARGET"], env=_env(tmp_path)
331 )
332 assert result.exit_code == 0
333 assert "\x1b" not in result.output
334
335
336 # ---------------------------------------------------------------------------
337 # JSON schema: _ContentGrepJson
338 # ---------------------------------------------------------------------------
339
340
341 def test_json_schema_all_fields(tmp_path: pathlib.Path) -> None:
342 _init_repo(tmp_path)
343 _commit_files(tmp_path, {"a.txt": b"hello world\nhello again\n"})
344 result = _invoke(
345 ["content-grep", "hello", "--json"], env=_env(tmp_path)
346 )
347 assert result.exit_code == 0
348 data = _parse(result)
349 assert data["commit_id"].startswith("sha256:")
350 assert len(data["commit_id"]) == 71
351 assert data["snapshot_id"].startswith("sha256:")
352 assert len(data["snapshot_id"]) == 71
353 assert data["pattern"] == "hello"
354 assert data["total_files_matched"] == 1
355 assert data["total_matches"] == 2
356 assert len(data["results"]) == 1
357 r = data["results"][0]
358 assert r["path"] == "a.txt"
359 assert r["match_count"] == 2
360 assert isinstance(r["matches"], list)
361
362
363 def test_json_schema_context_fields(tmp_path: pathlib.Path) -> None:
364 _init_repo(tmp_path)
365 _commit_files(tmp_path, {"c.txt": b"before\nTARGET\nafter\n"})
366 result = _invoke(
367 ["content-grep", "TARGET", "--context", "1", "--json"],
368 env=_env(tmp_path),
369 )
370 assert result.exit_code == 0
371 data = _parse(result)
372 match = data["results"][0]["matches"][0]
373 assert isinstance(match, dict)
374 assert "context_before" in match
375 assert "context_after" in match
376 assert match["context_before"] == ["before"]
377 assert match["context_after"] == ["after"]
378
379
380 def test_json_schema_no_match_exit1(tmp_path: pathlib.Path) -> None:
381 _init_repo(tmp_path)
382 _commit_files(tmp_path, {"a.txt": b"hello\n"})
383 result = _invoke(
384 ["content-grep", "ZZZNOMATCH", "--json"], env=_env(tmp_path)
385 )
386 assert result.exit_code != 0
387
388
389 def test_json_total_matches_multiple_files(tmp_path: pathlib.Path) -> None:
390 _init_repo(tmp_path)
391 _commit_files(tmp_path, {
392 "a.txt": b"hit\nhit\n",
393 "b.txt": b"hit\n",
394 "c.txt": b"miss\n",
395 })
396 result = _invoke(
397 ["content-grep", "hit", "--json"], env=_env(tmp_path)
398 )
399 assert result.exit_code == 0
400 data = _parse(result)
401 assert data["total_files_matched"] == 2
402 assert data["total_matches"] == 3
403
404
405 # ---------------------------------------------------------------------------
406 # Flags: --include
407 # ---------------------------------------------------------------------------
408
409
410 def test_include_filters_to_py_only(tmp_path: pathlib.Path) -> None:
411 _init_repo(tmp_path)
412 _commit_files(tmp_path, {
413 "module.py": b"TARGET in python\n",
414 "module.js": b"TARGET in js\n",
415 "readme.md": b"TARGET in md\n",
416 })
417 result = _invoke(
418 ["content-grep", "TARGET", "--include", "*.py", "--json"],
419 env=_env(tmp_path),
420 )
421 assert result.exit_code == 0
422 data = _parse(result)
423 assert data["total_files_matched"] == 1
424 assert data["results"][0]["path"] == "module.py"
425
426
427 def test_include_no_matches_after_filter(tmp_path: pathlib.Path) -> None:
428 _init_repo(tmp_path)
429 _commit_files(tmp_path, {"module.js": b"TARGET here\n"})
430 result = _invoke(
431 ["content-grep", "TARGET", "--include", "*.py"],
432 env=_env(tmp_path),
433 )
434 assert result.exit_code != 0 # no files pass include filter
435
436
437 # ---------------------------------------------------------------------------
438 # Flags: --exclude
439 # ---------------------------------------------------------------------------
440
441
442 def test_exclude_skips_minified(tmp_path: pathlib.Path) -> None:
443 _init_repo(tmp_path)
444 _commit_files(tmp_path, {
445 "app.js": b"TARGET here\n",
446 "app.min.js": b"TARGET minified\n",
447 })
448 result = _invoke(
449 ["content-grep", "TARGET", "--exclude", "*.min.js", "--json"],
450 env=_env(tmp_path),
451 )
452 assert result.exit_code == 0
453 data = _parse(result)
454 assert data["total_files_matched"] == 1
455 assert data["results"][0]["path"] == "app.js"
456
457
458 def test_exclude_all_results_in_no_match(tmp_path: pathlib.Path) -> None:
459 _init_repo(tmp_path)
460 _commit_files(tmp_path, {"test.py": b"TARGET\n"})
461 result = _invoke(
462 ["content-grep", "TARGET", "--exclude", "test_*.py"],
463 env=_env(tmp_path),
464 )
465 # test.py doesn't match test_*.py exclude pattern, so it should match.
466 # Verify this works (target file isn't excluded).
467 assert result.exit_code == 0
468
469
470 # ---------------------------------------------------------------------------
471 # Flags: --max-matches
472 # ---------------------------------------------------------------------------
473
474
475 def test_max_matches_caps_output(tmp_path: pathlib.Path) -> None:
476 _init_repo(tmp_path)
477 _commit_files(tmp_path, {"many.txt": b"hit\n" * 100})
478 result = _invoke(
479 ["content-grep", "hit", "--max-matches", "10", "--json"],
480 env=_env(tmp_path),
481 )
482 assert result.exit_code == 0
483 data = _parse(result)
484 assert data["total_matches"] <= 10
485
486
487 def test_max_matches_zero_still_exits_nonzero_on_cap(tmp_path: pathlib.Path) -> None:
488 """When max_matches=0, no results are kept — exit 1."""
489 _init_repo(tmp_path)
490 _commit_files(tmp_path, {"a.txt": b"hit\n"})
491 result = _invoke(
492 ["content-grep", "hit", "--max-matches", "0", "--json"],
493 env=_env(tmp_path),
494 )
495 assert result.exit_code != 0 # no results after cap → exit 1
496
497
498 # ---------------------------------------------------------------------------
499 # Flags: --context / -C
500 # ---------------------------------------------------------------------------
501
502
503 def test_context_text_output(tmp_path: pathlib.Path) -> None:
504 _init_repo(tmp_path)
505 _commit_files(tmp_path, {"ctx.txt": b"alpha\nbeta\ngamma\n"})
506 result = _invoke(
507 ["content-grep", "beta", "--context", "1"],
508 env=_env(tmp_path),
509 )
510 assert result.exit_code == 0
511 # Context before and after should appear in output.
512 assert "alpha" in result.output
513 assert "gamma" in result.output
514
515
516 def test_context_short_flag(tmp_path: pathlib.Path) -> None:
517 _init_repo(tmp_path)
518 _commit_files(tmp_path, {"ctx2.txt": b"first\nTARGET\nlast\n"})
519 result = _invoke(
520 ["content-grep", "TARGET", "-C", "1"],
521 env=_env(tmp_path),
522 )
523 assert result.exit_code == 0
524 assert "first" in result.output
525 assert "last" in result.output
526
527
528 # ---------------------------------------------------------------------------
529 # Flags: --json boolean (rejects old --format)
530 # ---------------------------------------------------------------------------
531
532
533 def test_format_flag_rejected(tmp_path: pathlib.Path) -> None:
534 """Old ``--format json`` must be rejected by argparse (exit 2)."""
535 _init_repo(tmp_path)
536 _commit_files(tmp_path, {"a.txt": b"hello\n"})
537 result = _invoke(
538 ["content-grep", "hello", "--format", "json"],
539 env=_env(tmp_path),
540 )
541 assert result.exit_code == 2
542
543
544 # ---------------------------------------------------------------------------
545 # Integration: --ref searches a different commit
546 # ---------------------------------------------------------------------------
547
548
549 def test_ref_searches_branch(tmp_path: pathlib.Path) -> None:
550 _init_repo(tmp_path)
551 c1 = _commit_files(tmp_path, {"v1.txt": b"OLD content\n"})
552 _commit_files(tmp_path, {"v2.txt": b"NEW content\n"}, parent_id=c1)
553
554 # Search HEAD — should find NEW in v2.txt.
555 result_head = _invoke(
556 ["content-grep", "NEW", "--json"], env=_env(tmp_path)
557 )
558 assert result_head.exit_code == 0
559 data = _parse(result_head)
560 paths = [r["path"] for r in data["results"]]
561 assert "v2.txt" in paths
562
563 # Search the first commit by ID — should find OLD in v1.txt, not NEW.
564 result_ref = _invoke(
565 ["content-grep", "OLD", "--ref", c1, "--json"],
566 env=_env(tmp_path),
567 )
568 assert result_ref.exit_code == 0
569 data_ref = _parse(result_ref)
570 paths_ref = [r["path"] for r in data_ref["results"]]
571 assert "v1.txt" in paths_ref
572 assert data_ref["commit_id"] == c1
573
574
575 # ---------------------------------------------------------------------------
576 # E2E: --help mentions all new flags
577 # ---------------------------------------------------------------------------
578
579
580 def test_help_mentions_include() -> None:
581 result = _invoke(["content-grep", "--help"])
582 assert result.exit_code == 0
583 assert "--include" in result.output
584
585
586 def test_help_mentions_exclude() -> None:
587 result = _invoke(["content-grep", "--help"])
588 assert "--exclude" in result.output
589
590
591 def test_help_mentions_max_matches() -> None:
592 result = _invoke(["content-grep", "--help"])
593 assert "--max-matches" in result.output
594
595
596 def test_help_mentions_context() -> None:
597 result = _invoke(["content-grep", "--help"])
598 assert "--context" in result.output or "-C" in result.output
599
600
601 def test_help_mentions_json_not_format() -> None:
602 result = _invoke(["content-grep", "--help"])
603 assert "--json" in result.output
604 assert "--format" not in result.output
605
606
607 # ---------------------------------------------------------------------------
608 # Stress: 500-file snapshot, pattern matches 250
609 # ---------------------------------------------------------------------------
610
611
612 def test_stress_500_files(tmp_path: pathlib.Path) -> None:
613 _init_repo(tmp_path)
614 files: _FilesMap = {}
615 for i in range(500):
616 content = b"TARGET_STRESS\n" if i % 2 == 0 else b"other\n"
617 files[f"f_{i:04d}.txt"] = content
618 _commit_files(tmp_path, files)
619 result = _invoke(
620 ["content-grep", "TARGET_STRESS", "--json"],
621 env=_env(tmp_path),
622 )
623 assert result.exit_code == 0
624 data = _parse(result)
625 assert data["total_files_matched"] == 250
626 assert data["total_matches"] == 250
627
628
629 # ---------------------------------------------------------------------------
630 # Stress: concurrent reads
631 # ---------------------------------------------------------------------------
632
633
634 def test_stress_concurrent_reads(tmp_path: pathlib.Path) -> None:
635 _init_repo(tmp_path)
636 _commit_files(tmp_path, {"concurrent.txt": b"CONCURRENT TARGET\n"})
637
638 errors: list[str] = []
639
640 def _read() -> None:
641 r = _invoke(
642 ["content-grep", "CONCURRENT", "--json"],
643 env=_env(tmp_path),
644 )
645 if r.exit_code != 0:
646 errors.append(f"exit {r.exit_code}")
647 else:
648 try:
649 d = json.loads(r.output)
650 if d.get("total_matches", 0) != 1:
651 errors.append(f"unexpected total_matches: {d.get('total_matches')}")
652 except json.JSONDecodeError as exc:
653 errors.append(str(exc))
654
655 threads = [threading.Thread(target=_read) for _ in range(8)]
656 for t in threads:
657 t.start()
658 for t in threads:
659 t.join()
660
661 assert not errors, f"Concurrent read failures: {errors}"
662
663
664 # ---------------------------------------------------------------------------
665 # JSON schema: complete key set (TestJsonSchemaComplete)
666 # ---------------------------------------------------------------------------
667
668
669 _REQUIRED_KEYS = frozenset({
670 "source",
671 "commit_id",
672 "snapshot_id",
673 "pattern",
674 "total_files_matched",
675 "total_matches",
676 "results",
677 "duration_ms",
678 "exit_code",
679 })
680
681
682 class TestJsonSchemaComplete:
683 """Verify that every required key is present in JSON output."""
684
685 def test_all_required_keys_present_commit_mode(self, tmp_path: pathlib.Path) -> None:
686 _init_repo(tmp_path)
687 _commit_files(tmp_path, {"a.txt": b"hello\n"})
688 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
689 assert result.exit_code == 0
690 data = json.loads(result.output)
691 missing = _REQUIRED_KEYS - data.keys()
692 assert not missing, f"Missing keys: {missing}"
693
694 def test_all_required_keys_present_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
695 _init_repo(tmp_path)
696 _commit_files(tmp_path, {"a.txt": b"hello\n"})
697 # Also write a matching file to disk so working-tree search finds it.
698 (tmp_path / "a.txt").write_bytes(b"hello\n")
699 result = _invoke(
700 ["content-grep", "hello", "--working-tree", "--json"],
701 env=_env(tmp_path),
702 )
703 assert result.exit_code == 0
704 data = json.loads(result.output)
705 missing = _REQUIRED_KEYS - data.keys()
706 assert not missing, f"Missing keys: {missing}"
707
708 def test_source_field_is_commit(self, tmp_path: pathlib.Path) -> None:
709 _init_repo(tmp_path)
710 _commit_files(tmp_path, {"a.txt": b"hello\n"})
711 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
712 data = json.loads(result.output)
713 assert data["source"] == "commit"
714
715 def test_source_field_is_working_tree(self, tmp_path: pathlib.Path) -> None:
716 _init_repo(tmp_path)
717 _commit_files(tmp_path, {"a.txt": b"hello\n"})
718 (tmp_path / "a.txt").write_bytes(b"hello\n")
719 result = _invoke(
720 ["content-grep", "hello", "--working-tree", "--json"],
721 env=_env(tmp_path),
722 )
723 data = json.loads(result.output)
724 assert data["source"] == "working-tree"
725
726 def test_commit_id_null_in_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
727 _init_repo(tmp_path)
728 _commit_files(tmp_path, {"a.txt": b"hello\n"})
729 (tmp_path / "a.txt").write_bytes(b"hello\n")
730 result = _invoke(
731 ["content-grep", "hello", "--working-tree", "--json"],
732 env=_env(tmp_path),
733 )
734 data = json.loads(result.output)
735 assert data["commit_id"] is None
736
737 def test_snapshot_id_null_in_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
738 _init_repo(tmp_path)
739 _commit_files(tmp_path, {"a.txt": b"hello\n"})
740 (tmp_path / "a.txt").write_bytes(b"hello\n")
741 result = _invoke(
742 ["content-grep", "hello", "--working-tree", "--json"],
743 env=_env(tmp_path),
744 )
745 data = json.loads(result.output)
746 assert data["snapshot_id"] is None
747
748 def test_exit_code_field_zero_on_match(self, tmp_path: pathlib.Path) -> None:
749 _init_repo(tmp_path)
750 _commit_files(tmp_path, {"a.txt": b"hello\n"})
751 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
752 data = json.loads(result.output)
753 assert data["exit_code"] == 0
754
755 def test_json_is_compact(self, tmp_path: pathlib.Path) -> None:
756 """JSON output must be a single line — no pretty-printing."""
757 _init_repo(tmp_path)
758 _commit_files(tmp_path, {"a.txt": b"hello\n"})
759 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
760 lines = [ln for ln in result.output.splitlines() if ln.strip()]
761 assert len(lines) == 1, "JSON must be compact (one line)"
762
763
764 # ---------------------------------------------------------------------------
765 # duration_ms (TestElapsedSeconds)
766 # ---------------------------------------------------------------------------
767
768
769 class TestElapsedSeconds:
770 """``duration_ms`` must be a non-negative float in all JSON paths."""
771
772 def _assert_elapsed(self, data: Mapping[str, object]) -> None: # type: ignore[type-arg]
773 assert "duration_ms" in data
774 assert isinstance(data["duration_ms"], float)
775 assert data["duration_ms"] >= 0.0
776
777 def test_elapsed_present_commit_mode(self, tmp_path: pathlib.Path) -> None:
778 _init_repo(tmp_path)
779 _commit_files(tmp_path, {"a.txt": b"target\n"})
780 result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
781 self._assert_elapsed(json.loads(result.output))
782
783 def test_elapsed_present_working_tree_mode(self, tmp_path: pathlib.Path) -> None:
784 _init_repo(tmp_path)
785 _commit_files(tmp_path, {"a.txt": b"target\n"})
786 (tmp_path / "a.txt").write_bytes(b"target\n")
787 result = _invoke(
788 ["content-grep", "target", "--working-tree", "--json"],
789 env=_env(tmp_path),
790 )
791 self._assert_elapsed(json.loads(result.output))
792
793 def test_elapsed_is_float_not_int(self, tmp_path: pathlib.Path) -> None:
794 _init_repo(tmp_path)
795 _commit_files(tmp_path, {"a.txt": b"target\n"})
796 result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
797 data = json.loads(result.output)
798 assert isinstance(data["duration_ms"], float)
799
800 def test_elapsed_reasonable_upper_bound(self, tmp_path: pathlib.Path) -> None:
801 """Single-file search in a temp repo should be well under 5 seconds."""
802 _init_repo(tmp_path)
803 _commit_files(tmp_path, {"a.txt": b"target\n"})
804 result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
805 data = json.loads(result.output)
806 assert data["duration_ms"] < 5.0
807
808 def test_elapsed_present_stress_mode(self, tmp_path: pathlib.Path) -> None:
809 """duration_ms must appear even for 500-file parallel searches."""
810 _init_repo(tmp_path)
811 files: Mapping[str, bytes] = {f"f{i}.txt": b"needle\n" for i in range(50)}
812 _commit_files(tmp_path, files)
813 result = _invoke(["content-grep", "needle", "--json"], env=_env(tmp_path))
814 assert result.exit_code == 0
815 self._assert_elapsed(json.loads(result.output))
816
817 def test_elapsed_six_decimal_places(self, tmp_path: pathlib.Path) -> None:
818 """duration_ms should be rounded to at most 6 decimal places."""
819 _init_repo(tmp_path)
820 _commit_files(tmp_path, {"a.txt": b"target\n"})
821 result = _invoke(["content-grep", "target", "--json"], env=_env(tmp_path))
822 data = json.loads(result.output)
823 elapsed = data["duration_ms"]
824 # round-trip through 6-decimal representation must be exact
825 assert round(elapsed, 6) == elapsed
826
827
828 # ---------------------------------------------------------------------------
829 # exit_code field (TestExitCode)
830 # ---------------------------------------------------------------------------
831
832
833 class TestExitCode:
834 """``exit_code`` in JSON must mirror the process exit code."""
835
836 def test_exit_code_zero_on_match(self, tmp_path: pathlib.Path) -> None:
837 _init_repo(tmp_path)
838 _commit_files(tmp_path, {"a.txt": b"hit\n"})
839 result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
840 assert result.exit_code == 0
841 assert json.loads(result.output)["exit_code"] == 0
842
843 def test_exit_code_zero_working_tree_match(self, tmp_path: pathlib.Path) -> None:
844 _init_repo(tmp_path)
845 _commit_files(tmp_path, {"a.txt": b"hit\n"})
846 (tmp_path / "a.txt").write_bytes(b"hit\n")
847 result = _invoke(
848 ["content-grep", "hit", "--working-tree", "--json"],
849 env=_env(tmp_path),
850 )
851 assert result.exit_code == 0
852 assert json.loads(result.output)["exit_code"] == 0
853
854 def test_exit_code_is_integer(self, tmp_path: pathlib.Path) -> None:
855 _init_repo(tmp_path)
856 _commit_files(tmp_path, {"a.txt": b"hit\n"})
857 result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
858 data = json.loads(result.output)
859 assert isinstance(data["exit_code"], int)
860
861 def test_exit_code_in_json_matches_process_exit(self, tmp_path: pathlib.Path) -> None:
862 """JSON exit_code must equal the actual process exit code."""
863 _init_repo(tmp_path)
864 _commit_files(tmp_path, {"a.txt": b"hit\n"})
865 result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
866 data = json.loads(result.output)
867 assert data["exit_code"] == result.exit_code
868
869 def test_exit_code_multiple_files(self, tmp_path: pathlib.Path) -> None:
870 _init_repo(tmp_path)
871 _commit_files(tmp_path, {"a.txt": b"hit\n", "b.txt": b"hit\n"})
872 result = _invoke(["content-grep", "hit", "--json"], env=_env(tmp_path))
873 assert result.exit_code == 0
874 assert json.loads(result.output)["exit_code"] == 0
875
876
877 # ---------------------------------------------------------------------------
878 # Flag registration tests
879 # ---------------------------------------------------------------------------
880
881 import argparse as _argparse
882 from muse.cli.commands.content_grep import register as _register_content_grep
883 from muse.core.paths import muse_dir, ref_path
884
885
886 def _parse_cgrep(*args: str) -> _argparse.Namespace:
887 root_p = _argparse.ArgumentParser()
888 subs = root_p.add_subparsers(dest="cmd")
889 _register_content_grep(subs)
890 return root_p.parse_args(["content-grep", *args])
891
892
893 class TestRegisterFlags:
894 def test_default_json_out_is_false(self) -> None:
895 ns = _parse_cgrep("TODO")
896 assert ns.json_out is False
897
898 def test_json_flag_sets_json_out(self) -> None:
899 ns = _parse_cgrep("TODO", "--json")
900 assert ns.json_out is True
901
902 def test_j_shorthand_sets_json_out(self) -> None:
903 ns = _parse_cgrep("TODO", "-j")
904 assert ns.json_out is True
905
906 def test_pattern_positional(self) -> None:
907 ns = _parse_cgrep("FIXME")
908 assert ns.pattern == "FIXME"
909
910
911 # ---------------------------------------------------------------------------
912 # JSON key ergonomics: results[].file and matches[].line
913 # ---------------------------------------------------------------------------
914
915
916 class TestJsonKeyErgonomics:
917 """content-grep --json must use 'path' (matching all other muse commands) and
918 'line' (not 'text') for match content."""
919
920 def test_result_key_is_path(self, tmp_path: pathlib.Path) -> None:
921 _init_repo(tmp_path)
922 _commit_files(tmp_path, {"src/main.py": b"hello world\n"})
923 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
924 data = json.loads(result.output)
925 assert data["results"][0]["path"] == "src/main.py"
926
927 def test_result_has_no_file_key(self, tmp_path: pathlib.Path) -> None:
928 _init_repo(tmp_path)
929 _commit_files(tmp_path, {"src/main.py": b"hello world\n"})
930 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
931 data = json.loads(result.output)
932 assert "file" not in data["results"][0]
933
934 def test_match_key_is_line_not_text(self, tmp_path: pathlib.Path) -> None:
935 _init_repo(tmp_path)
936 _commit_files(tmp_path, {"a.py": b"hello world\n"})
937 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
938 data = json.loads(result.output)
939 match = data["results"][0]["matches"][0]
940 assert match["line"] == "hello world"
941
942 def test_match_has_no_text_key(self, tmp_path: pathlib.Path) -> None:
943 _init_repo(tmp_path)
944 _commit_files(tmp_path, {"a.py": b"hello world\n"})
945 result = _invoke(["content-grep", "hello", "--json"], env=_env(tmp_path))
946 data = json.loads(result.output)
947 match = data["results"][0]["matches"][0]
948 assert "text" not in match
949
950 def test_working_tree_result_key_is_path(self, tmp_path: pathlib.Path) -> None:
951 _init_repo(tmp_path)
952 _commit_files(tmp_path, {"a.py": b"placeholder\n"})
953 (tmp_path / "a.py").write_text("needle here\n", encoding="utf-8")
954 result = _invoke(
955 ["content-grep", "needle", "--working-tree", "--json"], env=_env(tmp_path)
956 )
957 data = json.loads(result.output)
958 assert data["results"][0]["path"] == "a.py"
959
960 def test_working_tree_match_key_is_line(self, tmp_path: pathlib.Path) -> None:
961 _init_repo(tmp_path)
962 _commit_files(tmp_path, {"a.py": b"placeholder\n"})
963 (tmp_path / "a.py").write_text("needle here\n", encoding="utf-8")
964 result = _invoke(
965 ["content-grep", "needle", "--working-tree", "--json"], env=_env(tmp_path)
966 )
967 data = json.loads(result.output)
968 match = data["results"][0]["matches"][0]
969 assert match["line"] == "needle here"
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago