tests/test_cmd_grep.py · gabriel/muse

1

"""Comprehensive tests for ``muse code grep``.

2

3

Coverage

4

--------

5

Unit

6

_normalise_language — case folding, unknown passthrough

7

_file_matches — exact, suffix, separator normalisation

8

_resolve_file_filter — single match, ambiguous, no match

9

_MAX_PATTERN_LEN — boundary value

10

_KIND_ICON — all documented kinds present

11

12

Integration

13

grep basic substring — match, no-match, kind filter, language filter

14

grep --regex — valid, invalid (→ exit 1), boundary

15

grep --file — scoped to one file (faster path)

16

grep --count — prints integer, no extra lines

17

grep --json — schema correctness, unicode, qualified-name search

18

grep --hashes — content-id appears in output

19

grep --commit — historical snapshot

20

qualified-name search — "Invoice.compute_total" hits only that method

21

22

Security / ReDoS

23

Pattern length cap — 512 accepted, 513 rejected

24

Catastrophic regex — (a+)+ type does not hang (timeout guard)

25

NUL bytes in pattern — handled without crash

26

Control chars — handled without crash

27

28

Stress

29

1 000 symbols — search completes in < 5 s

30

512-char regex — compiles and runs without hang

31

"""

32

33

from __future__ import annotations

import json

import pathlib

import textwrap

import time

import pytest

from tests.cli_test_helper import CliRunner

43

from muse.cli.commands.grep import (

_KIND_ICON,

_MAX_PATTERN_LEN,

_file_matches,

normalise_language as _normalise_language,

48

_resolve_file_filter,

)

cli = None

runner = CliRunner()

# ---------------------------------------------------------------------------

56

# Shared fixture

57

# ---------------------------------------------------------------------------

@pytest.fixture

def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:

62

"""Initialise a fresh code-domain Muse repo with two Python files."""

63

monkeypatch.chdir(tmp_path)

64

monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))

65

result = runner.invoke(cli, ["init", "--domain", "code"])

66

assert result.exit_code == 0, result.output

67

68

(tmp_path / "billing.py").write_text(textwrap.dedent("""\

69

class Invoice:

70

def compute_total(self, items: list[int]) -> int:

71

return sum(items)

72

73

def apply_discount(self, total: float, pct: float) -> float:

74

return total * (1 - pct)

75

76

def validate_amount(amount: float) -> bool:

return amount > 0

"""))

(tmp_path / "auth.py").write_text(textwrap.dedent("""\

81

def validate_token(token: str) -> bool:

82

return len(token) > 0

83

84

class Validator:

85

def validate(self, value: object) -> bool: # type: ignore[override]

return bool(value)

"""))

r = runner.invoke(cli, ["commit", "-m", "initial"])

90

assert r.exit_code == 0, r.output

return tmp_path

# ---------------------------------------------------------------------------

95

# Unit — _normalise_language

96

# ---------------------------------------------------------------------------

97

98

99

class TestNormaliseLanguage:

100

def test_python_lowercase(self) -> None:

101

assert _normalise_language("python") == "Python"

102

103

def test_python_uppercase(self) -> None:

104

assert _normalise_language("PYTHON") == "Python"

105

106

def test_python_mixed(self) -> None:

107

assert _normalise_language("PyThOn") == "Python"

108

109

def test_unknown_passthrough(self) -> None:

110

# Unknown languages are passed through unchanged (after strip).

111

result = _normalise_language("Cobol")

112

assert result == "Cobol"

113

114

def test_strips_whitespace(self) -> None:

115

result = _normalise_language(" python ")

116

assert result == "Python"

117

118

def test_empty_string(self) -> None:

119

# Empty string is not a known language — passed through.

120

result = _normalise_language("")

assert result == ""

# ---------------------------------------------------------------------------

125

# Unit — _file_matches

126

# ---------------------------------------------------------------------------

127

128

129

class TestFileMatches:

130

def test_exact_match(self) -> None:

131

assert _file_matches("src/billing.py", "src/billing.py")

132

133

def test_suffix_match(self) -> None:

134

assert _file_matches("src/billing.py", "billing.py")

135

136

def test_no_match(self) -> None:

137

assert not _file_matches("src/billing.py", "other.py")

138

139

def test_partial_name_no_match(self) -> None:

140

# "illing.py" is a suffix of "billing.py" but should not match without

141

# a leading slash boundary.

142

assert not _file_matches("src/billing.py", "illing.py")

143

144

def test_backslash_normalised(self) -> None:

145

# Windows-style separators in the filter are normalised before suffix check.

146

assert _file_matches("a/src/billing.py", "src\\billing.py")

147

148

def test_empty_filter(self) -> None:

149

# Empty filter matches nothing sensible but must not crash.

150

# The function returns True only for exact or slash-prefixed suffix.

151

# "src/billing.py".endswith("/" + "") == endswith("/") → False

152

# "src/billing.py" == "" → False

153

assert not _file_matches("src/billing.py", "")

154

155

def test_deep_path_suffix(self) -> None:

156

assert _file_matches("a/b/c/d.py", "c/d.py")

157

158

def test_same_filename_different_dir_no_match(self) -> None:

159

assert not _file_matches("src/billing.py", "tests/billing.py")

160

161

162

# ---------------------------------------------------------------------------

163

# Unit — _resolve_file_filter

164

# ---------------------------------------------------------------------------

165

166

167

class TestResolveFileFilter:

168

def test_single_match_returns_full_path(self) -> None:

169

manifest = {"src/billing.py": "abc", "src/auth.py": "def"}

170

result = _resolve_file_filter("billing.py", manifest)

171

assert result == "src/billing.py"

172

173

def test_no_match_returns_none(self) -> None:

174

manifest = {"src/billing.py": "abc"}

175

result = _resolve_file_filter("nonexistent.py", manifest)

176

assert result is None

177

178

def test_ambiguous_raises_system_exit(self) -> None:

179

manifest = {

180

"a/billing.py": "hash1",

181

"b/billing.py": "hash2",

182

}

183

with pytest.raises(SystemExit):

184

_resolve_file_filter("billing.py", manifest)

185

186

def test_exact_path_returns_itself(self) -> None:

187

manifest = {"src/billing.py": "abc"}

188

result = _resolve_file_filter("src/billing.py", manifest)

189

assert result == "src/billing.py"

190

191

def test_empty_manifest_returns_none(self) -> None:

192

result = _resolve_file_filter("billing.py", {})

193

assert result is None

194

195

196

# ---------------------------------------------------------------------------

197

# Unit — _MAX_PATTERN_LEN and _KIND_ICON constants

198

# ---------------------------------------------------------------------------

class TestConstants:

def test_max_pattern_len_is_512(self) -> None:

203

assert _MAX_PATTERN_LEN == 512

204

205

def test_kind_icon_has_function(self) -> None:

206

assert "function" in _KIND_ICON

207

208

def test_kind_icon_has_class(self) -> None:

209

assert "class" in _KIND_ICON

210

211

def test_kind_icon_has_method(self) -> None:

212

assert "method" in _KIND_ICON

213

214

215

# ---------------------------------------------------------------------------

216

# Integration — basic substring search

217

# ---------------------------------------------------------------------------

class TestGrepBasic:

def test_finds_function_by_name(self, repo: pathlib.Path) -> None:

222

result = runner.invoke(cli, ["code", "grep", "validate"])

223

assert result.exit_code == 0, result.output

224

assert "validate" in result.output.lower()

225

226

def test_no_match_exits_zero(self, repo: pathlib.Path) -> None:

227

result = runner.invoke(cli, ["code", "grep", "zzznomatch999"])

228

assert result.exit_code == 0

229

assert "no symbols" in result.output.lower()

230

231

def test_kind_filter_function(self, repo: pathlib.Path) -> None:

232

result = runner.invoke(cli, ["code", "grep", "validate", "--kind", "function"])

233

assert result.exit_code == 0

234

# Only functions should appear (methods excluded).

235

assert "fn" in result.output

236

237

def test_kind_filter_class(self, repo: pathlib.Path) -> None:

238

result = runner.invoke(cli, ["code", "grep", "Invoice", "--kind", "class"])

239

assert result.exit_code == 0

240

assert "Invoice" in result.output

241

# Methods of Invoice should NOT appear.

242

assert "compute_total" not in result.output

243

244

def test_language_filter(self, repo: pathlib.Path) -> None:

245

result = runner.invoke(cli, ["code", "grep", "validate", "--language", "python"])

246

assert result.exit_code == 0

247

assert "validate" in result.output.lower()

248

249

def test_language_filter_unknown_exits_zero(self, repo: pathlib.Path) -> None:

250

result = runner.invoke(cli, ["code", "grep", "validate", "--language", "COBOL"])

251

# No COBOL files — 0 matches, but not an error.

252

assert result.exit_code == 0

253

254

def test_match_count_suffix(self, repo: pathlib.Path) -> None:

255

result = runner.invoke(cli, ["code", "grep", "validate"])

256

assert "match" in result.output.lower()

257

258

259

# ---------------------------------------------------------------------------

260

# Integration — --count flag

261

# ---------------------------------------------------------------------------

class TestGrepCount:

def test_count_only_prints_integer(self, repo: pathlib.Path) -> None:

266

result = runner.invoke(cli, ["code", "grep", "validate", "--count"])

267

assert result.exit_code == 0

268

line = result.output.strip().splitlines()[0]

269

assert line.endswith("match(es)")

270

# The leading token must be an integer.

271

count_str = line.split()[0]

272

assert count_str.isdigit()

273

274

def test_count_zero_for_no_match(self, repo: pathlib.Path) -> None:

275

result = runner.invoke(cli, ["code", "grep", "zzz_nothing", "--count"])

276

assert result.exit_code == 0

277

assert result.output.strip().startswith("0")

278

279

280

# ---------------------------------------------------------------------------

281

# Integration — --json output

282

# ---------------------------------------------------------------------------

class TestGrepJson:

def test_json_schema(self, repo: pathlib.Path) -> None:

287

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

288

assert result.exit_code == 0

289

data = json.loads(result.output)

290

assert "total_matches" in data

291

assert "results" in data

292

assert isinstance(data["results"], list)

293

294

def test_json_result_fields(self, repo: pathlib.Path) -> None:

295

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

296

data = json.loads(result.output)

297

if data["results"]:

298

r = data["results"][0]

299

for field in ("address", "kind", "name", "lineno"):

300

assert field in r, f"missing field {field!r}"

301

302

def test_json_total_matches_consistent(self, repo: pathlib.Path) -> None:

303

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

304

data = json.loads(result.output)

305

assert data["total_matches"] == len(data["results"])

306

307

def test_json_no_match_empty_results(self, repo: pathlib.Path) -> None:

308

result = runner.invoke(cli, ["code", "grep", "zzz_nope_ever", "--json"])

309

data = json.loads(result.output)

310

assert data["total_matches"] == 0

311

assert data["results"] == []

312

313

def test_json_pattern_echoed(self, repo: pathlib.Path) -> None:

314

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

315

data = json.loads(result.output)

316

assert data["pattern"] == "validate"

317

318

def test_json_unicode_pattern(self, repo: pathlib.Path) -> None:

319

result = runner.invoke(cli, ["code", "grep", "café", "--json"])

320

assert result.exit_code == 0

321

data = json.loads(result.output)

322

assert data["pattern"] == "café"

323

324

325

# ---------------------------------------------------------------------------

326

# Integration — --file scoped search

327

# ---------------------------------------------------------------------------

class TestGrepFile:

def test_file_scoped_results_only_from_that_file(self, repo: pathlib.Path) -> None:

332

result = runner.invoke(cli, ["code", "grep", "validate", "--file", "billing.py"])

333

assert result.exit_code == 0

334

if "validate" in result.output:

335

assert "auth.py" not in result.output

336

337

def test_file_ambiguous_exits_nonzero(self, repo: pathlib.Path) -> None:

338

# Create two files with the same basename in different dirs.

339

(repo / "sub").mkdir()

340

(repo / "sub" / "billing.py").write_text("def helper(): pass\n")

341

runner.invoke(cli, ["commit", "-m", "add sub billing"])

342

result = runner.invoke(cli, ["code", "grep", "helper", "--file", "billing.py"])

343

# With two billing.py files it should be ambiguous.

344

assert result.exit_code == 1 or "ambiguous" in result.output.lower()

345

346

347

# ---------------------------------------------------------------------------

348

# Integration — --hashes flag

349

# ---------------------------------------------------------------------------

350

351

352

class TestGrepHashes:

353

def test_hashes_appear_in_output(self, repo: pathlib.Path) -> None:

354

result = runner.invoke(cli, ["code", "grep", "validate", "--hashes"])

355

assert result.exit_code == 0

356

# Content hash prefix should appear (8 hex chars + "..")

357

assert ".." in result.output

358

359

360

# ---------------------------------------------------------------------------

361

# Integration — qualified name search

362

# ---------------------------------------------------------------------------

363

364

365

class TestGrepQualifiedName:

366

def test_dot_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:

367

result = runner.invoke(cli, ["code", "grep", "Invoice.compute_total"])

368

assert result.exit_code == 0

369

assert "compute_total" in result.output

370

371

def test_double_colon_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:

372

result = runner.invoke(cli, ["code", "grep", "Invoice::compute_total"])

373

assert result.exit_code == 0

374

375

def test_qualified_name_does_not_match_unrelated(self, repo: pathlib.Path) -> None:

376

result = runner.invoke(cli, ["code", "grep", "Invoice.zzz_missing"])

377

assert result.exit_code == 0

378

assert "no symbols" in result.output.lower() or "0 match" in result.output.lower()

379

380

381

# ---------------------------------------------------------------------------

382

# Integration — --regex flag

383

# ---------------------------------------------------------------------------

class TestGrepRegex:

def test_valid_regex_matches(self, repo: pathlib.Path) -> None:

388

result = runner.invoke(cli, ["code", "grep", "--regex", "^validate"])

389

assert result.exit_code == 0

390

391

def test_invalid_regex_exits_one(self, repo: pathlib.Path) -> None:

392

result = runner.invoke(cli, ["code", "grep", "--regex", "[unclosed"])

393

assert result.exit_code == 1

394

assert "regex" in result.stderr.lower() or "invalid" in result.stderr.lower()

395

396

def test_regex_anchored_no_match(self, repo: pathlib.Path) -> None:

397

result = runner.invoke(cli, ["code", "grep", "--regex", "^zzz_nothing$"])

398

assert result.exit_code == 0

399

assert "0 match" in result.output.lower() or "no symbols" in result.output.lower()

400

401

402

# ---------------------------------------------------------------------------

403

# Security — ReDoS guards

404

# ---------------------------------------------------------------------------

405

406

407

class TestGrepSecurity:

408

def test_pattern_at_512_accepted(self, repo: pathlib.Path) -> None:

409

pattern = "a" * 512

410

result = runner.invoke(cli, ["code", "grep", pattern])

411

assert "too long" not in result.output.lower()

412

413

def test_pattern_at_513_rejected(self, repo: pathlib.Path) -> None:

414

pattern = "a" * 513

415

result = runner.invoke(cli, ["code", "grep", pattern])

416

assert result.exit_code == 1

417

assert "512" in result.stderr or "too long" in result.stderr.lower()

418

419

def test_catastrophic_regex_does_not_hang(self, repo: pathlib.Path) -> None:

420

# (a+)+ is exponential on backtracking engines; Python's re module

421

# with IGNORECASE+escape still builds a safe compiled pattern.

422

# Without --regex the pattern is escaped so it literally searches for

423

# "(a+)+" as a substring — which must return quickly.

424

start = time.monotonic()

425

result = runner.invoke(cli, ["code", "grep", "(a+)+"])

426

elapsed = time.monotonic() - start

427

assert result.exit_code == 0

428

assert elapsed < 5.0, f"grep took {elapsed:.1f}s — possible hang"

429

430

def test_null_bytes_in_pattern_handled(self, repo: pathlib.Path) -> None:

431

# NUL byte in pattern must not crash the process.

432

result = runner.invoke(cli, ["code", "grep", "val\x00idate"])

433

assert result.exit_code in (0, 1)

434

435

def test_control_chars_in_pattern_handled(self, repo: pathlib.Path) -> None:

436

result = runner.invoke(cli, ["code", "grep", "val\x01\x02idate"])

437

assert result.exit_code in (0, 1)

438

439

def test_requires_repo(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:

440

monkeypatch.chdir(tmp_path)

441

monkeypatch.delenv("MUSE_REPO_ROOT", raising=False)

442

result = runner.invoke(cli, ["code", "grep", "validate"])

443

assert result.exit_code != 0

444

445

446

# ---------------------------------------------------------------------------

447

# Stress — 1 000 symbols, search must complete in < 5 s

448

# ---------------------------------------------------------------------------

449

450

451

class TestGrepStress:

452

@pytest.fixture

453

def large_repo(

454

self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch

455

) -> pathlib.Path:

456

"""Repo with ~1 000 Python symbols across 10 files."""

457

monkeypatch.chdir(tmp_path)

458

monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))

459

runner.invoke(cli, ["init", "--domain", "code"])

460

461

for file_idx in range(10):

462

lines: list[str] = []

463

for sym_idx in range(100):

464

lines.append(f"def compute_{file_idx}_{sym_idx}(x: int) -> int:")

465

lines.append(f" return x + {sym_idx}")

466

lines.append("")

467

(tmp_path / f"module_{file_idx}.py").write_text("\n".join(lines))

468

469

r = runner.invoke(cli, ["commit", "-m", "large module"])

470

assert r.exit_code == 0, r.output

471

return tmp_path

472

473

def test_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:

474

start = time.monotonic()

475

result = runner.invoke(cli, ["code", "grep", "compute"])

476

elapsed = time.monotonic() - start

477

assert result.exit_code == 0, result.output

478

assert "1000" in result.output or "match" in result.output.lower()

479

assert elapsed < 5.0, f"grep took {elapsed:.1f}s on 1 000 symbols"

480

481

def test_count_mode_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:

482

start = time.monotonic()

483

result = runner.invoke(cli, ["code", "grep", "compute", "--count"])

484

elapsed = time.monotonic() - start

485

assert result.exit_code == 0

486

assert elapsed < 5.0

487

488

def test_json_mode_1000_symbols_schema_valid(self, large_repo: pathlib.Path) -> None:

489

result = runner.invoke(cli, ["code", "grep", "compute", "--json"])

490

assert result.exit_code == 0

491

data = json.loads(result.output)

492

assert data["total_matches"] == len(data["results"])

493

assert data["total_matches"] >= 1000

494

495

def test_regex_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:

496

start = time.monotonic()

497

result = runner.invoke(cli, ["code", "grep", "--regex", r"^compute_\d"])

498

elapsed = time.monotonic() - start

499

assert result.exit_code == 0

500

assert elapsed < 5.0

501

502

def test_kind_filter_1000_symbols_correct_count(self, large_repo: pathlib.Path) -> None:

503

result = runner.invoke(cli, ["code", "grep", "compute", "--kind", "function", "--count"])

504

assert result.exit_code == 0

505

line = result.output.strip().splitlines()[0]

506

count = int(line.split()[0])

507

assert count >= 1000

508

509

def test_512_char_regex_does_not_hang_on_large_corpus(

510

self, large_repo: pathlib.Path

511

) -> None:

512

pattern = f"compute_{'0' * 504}" # exactly 512 chars

513

start = time.monotonic()

514

result = runner.invoke(cli, ["code", "grep", pattern])

515

elapsed = time.monotonic() - start

516

assert elapsed < 5.0, f"512-char pattern took {elapsed:.1f}s — possible hang"

517

assert result.exit_code in (0, 1) # no match is fine

518

519

520

# ---------------------------------------------------------------------------

521

# --files flag (-l) — one file path per line, unique, sorted

522

# ---------------------------------------------------------------------------

class TestGrepFiles:

"""``muse code grep --files`` prints one unique file path per line.

527

528

Ergonomics goal: trivially pipeable without JSON parsing.

529

Mirrors ``grep -l`` / ``rg -l`` behaviour.

530

"""

531

532

def test_files_lists_matching_file(self, repo: pathlib.Path) -> None:

533

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

534

assert result.exit_code == 0, result.output

535

lines = [l for l in result.output.splitlines() if l.strip()]

536

assert any("billing.py" in l or "auth.py" in l for l in lines)

537

538

def test_files_output_is_unique_paths(self, repo: pathlib.Path) -> None:

539

"""Each file path appears at most once, even if it has multiple matches."""

540

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

541

assert result.exit_code == 0, result.output

542

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

543

assert len(lines) == len(set(lines)), "duplicate file paths in --files output"

544

545

def test_files_output_is_sorted(self, repo: pathlib.Path) -> None:

546

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

547

assert result.exit_code == 0, result.output

548

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

549

assert lines == sorted(lines), "--files output must be sorted"

550

551

def test_files_no_match_empty_output(self, repo: pathlib.Path) -> None:

552

result = runner.invoke(cli, ["code", "grep", "zzznomatch", "--files"])

553

assert result.exit_code == 0

554

assert result.output.strip() == ""

555

556

def test_files_excludes_non_matching_files(self, repo: pathlib.Path) -> None:

557

"""Only files that contain at least one match appear."""

558

result = runner.invoke(cli, ["code", "grep", "Invoice", "--files"])

559

assert result.exit_code == 0, result.output

560

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

561

# Invoice is only in billing.py

562

assert all("billing.py" in l for l in lines)

563

assert not any("auth.py" in l for l in lines)

564

565

def test_files_mutually_exclusive_with_json(self, repo: pathlib.Path) -> None:

566

result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--json"])

567

assert result.exit_code != 0

568

569

def test_files_mutually_exclusive_with_count(self, repo: pathlib.Path) -> None:

570

result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--count"])

571

assert result.exit_code != 0

572

573

def test_files_long_flag_only(self, repo: pathlib.Path) -> None:

574

"""``--files`` is the only form (``-l`` is taken by ``--language``)."""

575

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

576

assert result.exit_code == 0, result.output

577

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

578

assert len(lines) > 0

579

580

def test_files_compatible_with_kind_filter(self, repo: pathlib.Path) -> None:

581

result = runner.invoke(

582

cli, ["code", "grep", "validate", "--files", "--kind", "function"]

583

)

584

assert result.exit_code == 0, result.output

585

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

586

# validate_amount and validate_token are functions

587

assert len(lines) > 0

588

589

def test_files_compatible_with_file_filter(self, repo: pathlib.Path) -> None:

590

result = runner.invoke(

591

cli, ["code", "grep", "validate", "--files", "--file", "billing.py"]

592

)

593

assert result.exit_code == 0, result.output

594

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

595

assert all("billing.py" in l for l in lines)