tests/test_cmd_grep.py · gabriel/muse

1

"""Comprehensive tests for ``muse code grep``.

2

3

Coverage

4

--------

5

Unit

6

_normalise_language — case folding, unknown passthrough

7

_file_matches — exact, suffix, separator normalisation

8

_resolve_file_filter — single match, ambiguous, no match

9

_MAX_PATTERN_LEN — boundary value

10

_KIND_ICON — all documented kinds present

11

12

Integration

13

grep basic substring — match, no-match, kind filter, language filter

14

grep --regex — valid, invalid (→ exit 1), boundary

15

grep --file — scoped to one file (faster path)

16

grep --count — prints integer, no extra lines

17

grep --json — schema correctness, unicode, qualified-name search

18

grep --hashes — content-id appears in output

19

grep --commit — historical snapshot

20

qualified-name search — "Invoice.compute_total" hits only that method

21

22

Security / ReDoS

23

Pattern length cap — 512 accepted, 513 rejected

24

Catastrophic regex — (a+)+ type does not hang (timeout guard)

25

NUL bytes in pattern — handled without crash

26

Control chars — handled without crash

27

28

Stress

29

1 000 symbols — search completes in < 5 s

30

512-char regex — compiles and runs without hang

31

"""

32

33

from __future__ import annotations

import json

import pathlib

import textwrap

import time

import pytest

from tests.cli_test_helper import CliRunner

43

from muse.cli.commands.grep import (

_KIND_ICON,

_MAX_PATTERN_LEN,

_file_matches,

normalise_language as _normalise_language,

48

_resolve_file_filter,

)

cli = None

runner = CliRunner()

# ---------------------------------------------------------------------------

56

# Shared fixture

57

# ---------------------------------------------------------------------------

@pytest.fixture

def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:

62

"""Initialise a fresh code-domain Muse repo with two Python files."""

63

monkeypatch.chdir(tmp_path)

64

monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))

65

result = runner.invoke(cli, ["init", "--domain", "code"])

66

assert result.exit_code == 0, result.output

67

68

(tmp_path / "billing.py").write_text(textwrap.dedent("""\

69

class Invoice:

70

def compute_total(self, items: list[int]) -> int:

71

return sum(items)

72

73

def apply_discount(self, total: float, pct: float) -> float:

74

return total * (1 - pct)

75

76

def validate_amount(amount: float) -> bool:

return amount > 0

"""))

(tmp_path / "auth.py").write_text(textwrap.dedent("""\

81

def validate_token(token: str) -> bool:

82

return len(token) > 0

83

84

class Validator:

85

def validate(self, value: object) -> bool: # type: ignore[override]

return bool(value)

"""))

r = runner.invoke(cli, ["commit", "-m", "initial"])

90

assert r.exit_code == 0, r.output

return tmp_path

# ---------------------------------------------------------------------------

95

# Unit — _normalise_language

96

# ---------------------------------------------------------------------------

97

98

99

class TestNormaliseLanguage:

100

def test_python_lowercase(self) -> None:

101

assert _normalise_language("python") == "Python"

102

103

def test_python_uppercase(self) -> None:

104

assert _normalise_language("PYTHON") == "Python"

105

106

def test_python_mixed(self) -> None:

107

assert _normalise_language("PyThOn") == "Python"

108

109

def test_unknown_passthrough(self) -> None:

110

# Unknown languages are passed through unchanged (after strip).

111

result = _normalise_language("Cobol")

112

assert result == "Cobol"

113

114

def test_strips_whitespace(self) -> None:

115

result = _normalise_language(" python ")

116

assert result == "Python"

117

118

def test_empty_string(self) -> None:

119

# Empty string is not a known language — passed through.

120

result = _normalise_language("")

assert result == ""

# ---------------------------------------------------------------------------

125

# Unit — _file_matches

126

# ---------------------------------------------------------------------------

127

128

129

class TestFileMatches:

130

def test_exact_match(self) -> None:

131

assert _file_matches("src/billing.py", "src/billing.py")

132

133

def test_suffix_match(self) -> None:

134

assert _file_matches("src/billing.py", "billing.py")

135

136

def test_no_match(self) -> None:

137

assert not _file_matches("src/billing.py", "other.py")

138

139

def test_partial_name_no_match(self) -> None:

140

# "illing.py" is a suffix of "billing.py" but should not match without

141

# a leading slash boundary.

142

assert not _file_matches("src/billing.py", "illing.py")

143

144

def test_backslash_normalised(self) -> None:

145

# Windows-style separators in the filter are normalised before suffix check.

146

assert _file_matches("a/src/billing.py", "src\\billing.py")

147

148

def test_empty_filter(self) -> None:

149

# Empty filter matches nothing sensible but must not crash.

150

# The function returns True only for exact or slash-prefixed suffix.

151

# "src/billing.py".endswith("/" + "") == endswith("/") → False

152

# "src/billing.py" == "" → False

153

assert not _file_matches("src/billing.py", "")

154

155

def test_deep_path_suffix(self) -> None:

156

assert _file_matches("a/b/c/d.py", "c/d.py")

157

158

def test_same_filename_different_dir_no_match(self) -> None:

159

assert not _file_matches("src/billing.py", "tests/billing.py")

160

161

162

# ---------------------------------------------------------------------------

163

# Unit — _resolve_file_filter

164

# ---------------------------------------------------------------------------

165

166

167

class TestResolveFileFilter:

168

def test_single_match_returns_full_path(self) -> None:

169

manifest = {"src/billing.py": "abc", "src/auth.py": "def"}

170

result = _resolve_file_filter("billing.py", manifest)

171

assert result == "src/billing.py"

172

173

def test_no_match_returns_none(self) -> None:

174

manifest = {"src/billing.py": "abc"}

175

result = _resolve_file_filter("nonexistent.py", manifest)

176

assert result is None

177

178

def test_ambiguous_raises_system_exit(self) -> None:

179

manifest = {

180

"a/billing.py": "hash1",

181

"b/billing.py": "hash2",

182

}

183

with pytest.raises(SystemExit):

184

_resolve_file_filter("billing.py", manifest)

185

186

def test_exact_path_returns_itself(self) -> None:

187

manifest = {"src/billing.py": "abc"}

188

result = _resolve_file_filter("src/billing.py", manifest)

189

assert result == "src/billing.py"

190

191

def test_empty_manifest_returns_none(self) -> None:

192

result = _resolve_file_filter("billing.py", {})

193

assert result is None

194

195

196

# ---------------------------------------------------------------------------

197

# Unit — _MAX_PATTERN_LEN and _KIND_ICON constants

198

# ---------------------------------------------------------------------------

class TestConstants:

def test_max_pattern_len_is_512(self) -> None:

203

assert _MAX_PATTERN_LEN == 512

204

205

def test_kind_icon_has_function(self) -> None:

206

assert "function" in _KIND_ICON

207

208

def test_kind_icon_has_class(self) -> None:

209

assert "class" in _KIND_ICON

210

211

def test_kind_icon_has_method(self) -> None:

212

assert "method" in _KIND_ICON

213

214

215

# ---------------------------------------------------------------------------

216

# Integration — basic substring search

217

# ---------------------------------------------------------------------------

class TestGrepBasic:

def test_finds_function_by_name(self, repo: pathlib.Path) -> None:

222

result = runner.invoke(cli, ["code", "grep", "validate"])

223

assert result.exit_code == 0, result.output

224

assert "validate" in result.output.lower()

225

226

def test_no_match_exits_zero(self, repo: pathlib.Path) -> None:

227

result = runner.invoke(cli, ["code", "grep", "zzznomatch999"])

228

assert result.exit_code == 0

229

assert "no symbols" in result.output.lower()

230

231

def test_kind_filter_function(self, repo: pathlib.Path) -> None:

232

result = runner.invoke(cli, ["code", "grep", "validate", "--kind", "function"])

233

assert result.exit_code == 0

234

# Only functions should appear (methods excluded).

235

assert "fn" in result.output

236

237

def test_kind_filter_class(self, repo: pathlib.Path) -> None:

238

result = runner.invoke(cli, ["code", "grep", "Invoice", "--kind", "class"])

239

assert result.exit_code == 0

240

assert "Invoice" in result.output

241

# Methods of Invoice should NOT appear.

242

assert "compute_total" not in result.output

243

244

def test_language_filter(self, repo: pathlib.Path) -> None:

245

result = runner.invoke(cli, ["code", "grep", "validate", "--language", "python"])

246

assert result.exit_code == 0

247

assert "validate" in result.output.lower()

248

249

def test_language_filter_unknown_exits_zero(self, repo: pathlib.Path) -> None:

250

result = runner.invoke(cli, ["code", "grep", "validate", "--language", "COBOL"])

251

# No COBOL files — 0 matches, but not an error.

252

assert result.exit_code == 0

253

254

def test_match_count_suffix(self, repo: pathlib.Path) -> None:

255

result = runner.invoke(cli, ["code", "grep", "validate"])

256

assert "match" in result.output.lower()

257

258

259

# ---------------------------------------------------------------------------

260

# Integration — --count flag

261

# ---------------------------------------------------------------------------

class TestGrepCount:

def test_count_only_prints_integer(self, repo: pathlib.Path) -> None:

266

result = runner.invoke(cli, ["code", "grep", "validate", "--count"])

267

assert result.exit_code == 0

268

line = result.output.strip().splitlines()[0]

269

assert line.endswith("match(es)")

270

# The leading token must be an integer.

271

count_str = line.split()[0]

272

assert count_str.isdigit()

273

274

def test_count_zero_for_no_match(self, repo: pathlib.Path) -> None:

275

result = runner.invoke(cli, ["code", "grep", "zzz_nothing", "--count"])

276

assert result.exit_code == 0

277

assert result.output.strip().startswith("0")

278

279

280

# ---------------------------------------------------------------------------

281

# Integration — --json output

282

# ---------------------------------------------------------------------------

class TestGrepJson:

def test_json_schema(self, repo: pathlib.Path) -> None:

287

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

288

assert result.exit_code == 0

289

data = json.loads(result.output)

290

assert "total_matches" in data

291

assert "results" in data

292

assert isinstance(data["results"], list)

293

294

def test_json_result_fields(self, repo: pathlib.Path) -> None:

295

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

296

data = json.loads(result.output)

297

if data["results"]:

298

r = data["results"][0]

299

for field in ("address", "kind", "name", "lineno"):

300

assert field in r, f"missing field {field!r}"

301

302

def test_json_total_matches_consistent(self, repo: pathlib.Path) -> None:

303

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

304

data = json.loads(result.output)

305

assert data["total_matches"] == len(data["results"])

306

307

def test_json_no_match_empty_results(self, repo: pathlib.Path) -> None:

308

result = runner.invoke(cli, ["code", "grep", "zzz_nope_ever", "--json"])

309

data = json.loads(result.output)

310

assert data["total_matches"] == 0

311

assert data["results"] == []

312

313

def test_json_pattern_echoed(self, repo: pathlib.Path) -> None:

314

result = runner.invoke(cli, ["code", "grep", "validate", "--json"])

315

data = json.loads(result.output)

316

assert data["pattern"] == "validate"

317

318

def test_json_unicode_pattern(self, repo: pathlib.Path) -> None:

319

result = runner.invoke(cli, ["code", "grep", "café", "--json"])

320

assert result.exit_code == 0

321

data = json.loads(result.output)

322

assert data["pattern"] == "café"

323

324

325

# ---------------------------------------------------------------------------

326

# Integration — --file scoped search

327

# ---------------------------------------------------------------------------

class TestGrepFile:

def test_file_scoped_results_only_from_that_file(self, repo: pathlib.Path) -> None:

332

result = runner.invoke(cli, ["code", "grep", "validate", "--file", "billing.py"])

333

assert result.exit_code == 0

334

if "validate" in result.output:

335

assert "auth.py" not in result.output

336

337

def test_file_ambiguous_exits_nonzero(self, repo: pathlib.Path) -> None:

338

# Create two files with the same basename in different dirs.

339

(repo / "sub").mkdir()

340

(repo / "sub" / "billing.py").write_text("def helper(): pass\n")

341

runner.invoke(cli, ["code", "add", "."])

342

runner.invoke(cli, ["commit", "-m", "add sub billing"])

343

result = runner.invoke(cli, ["code", "grep", "helper", "--file", "billing.py"])

344

# With two billing.py files it should be ambiguous.

345

assert result.exit_code == 1 or "ambiguous" in result.output.lower()

346

347

348

# ---------------------------------------------------------------------------

349

# Integration — --hashes flag

350

# ---------------------------------------------------------------------------

351

352

353

class TestGrepHashes:

354

def test_hashes_appear_in_output(self, repo: pathlib.Path) -> None:

355

result = runner.invoke(cli, ["code", "grep", "validate", "--hashes"])

356

assert result.exit_code == 0

357

# Content hash prefix should appear (8 hex chars + "..")

358

assert ".." in result.output

359

360

361

# ---------------------------------------------------------------------------

362

# Integration — qualified name search

363

# ---------------------------------------------------------------------------

364

365

366

class TestGrepQualifiedName:

367

def test_dot_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:

368

result = runner.invoke(cli, ["code", "grep", "Invoice.compute_total"])

369

assert result.exit_code == 0

370

assert "compute_total" in result.output

371

372

def test_double_colon_separator_hits_qualified_name(self, repo: pathlib.Path) -> None:

373

result = runner.invoke(cli, ["code", "grep", "Invoice::compute_total"])

374

assert result.exit_code == 0

375

376

def test_qualified_name_does_not_match_unrelated(self, repo: pathlib.Path) -> None:

377

result = runner.invoke(cli, ["code", "grep", "Invoice.zzz_missing"])

378

assert result.exit_code == 0

379

assert "no symbols" in result.output.lower() or "0 match" in result.output.lower()

380

381

382

# ---------------------------------------------------------------------------

383

# Integration — --regex flag

384

# ---------------------------------------------------------------------------

class TestGrepRegex:

def test_valid_regex_matches(self, repo: pathlib.Path) -> None:

389

result = runner.invoke(cli, ["code", "grep", "--regex", "^validate"])

390

assert result.exit_code == 0

391

392

def test_invalid_regex_exits_one(self, repo: pathlib.Path) -> None:

393

result = runner.invoke(cli, ["code", "grep", "--regex", "[unclosed"])

394

assert result.exit_code == 1

395

assert "regex" in result.stderr.lower() or "invalid" in result.stderr.lower()

396

397

def test_regex_anchored_no_match(self, repo: pathlib.Path) -> None:

398

result = runner.invoke(cli, ["code", "grep", "--regex", "^zzz_nothing$"])

399

assert result.exit_code == 0

400

assert "0 match" in result.output.lower() or "no symbols" in result.output.lower()

401

402

403

# ---------------------------------------------------------------------------

404

# Security — ReDoS guards

405

# ---------------------------------------------------------------------------

406

407

408

class TestGrepSecurity:

409

def test_pattern_at_512_accepted(self, repo: pathlib.Path) -> None:

410

pattern = "a" * 512

411

result = runner.invoke(cli, ["code", "grep", pattern])

412

assert "too long" not in result.output.lower()

413

414

def test_pattern_at_513_rejected(self, repo: pathlib.Path) -> None:

415

pattern = "a" * 513

416

result = runner.invoke(cli, ["code", "grep", pattern])

417

assert result.exit_code == 1

418

assert "512" in result.stderr or "too long" in result.stderr.lower()

419

420

def test_catastrophic_regex_does_not_hang(self, repo: pathlib.Path) -> None:

421

# (a+)+ is exponential on backtracking engines; Python's re module

422

# with IGNORECASE+escape still builds a safe compiled pattern.

423

# Without --regex the pattern is escaped so it literally searches for

424

# "(a+)+" as a substring — which must return quickly.

425

start = time.monotonic()

426

result = runner.invoke(cli, ["code", "grep", "(a+)+"])

427

elapsed = time.monotonic() - start

428

assert result.exit_code == 0

429

assert elapsed < 5.0, f"grep took {elapsed:.1f}s — possible hang"

430

431

def test_null_bytes_in_pattern_handled(self, repo: pathlib.Path) -> None:

432

# NUL byte in pattern must not crash the process.

433

result = runner.invoke(cli, ["code", "grep", "val\x00idate"])

434

assert result.exit_code in (0, 1)

435

436

def test_control_chars_in_pattern_handled(self, repo: pathlib.Path) -> None:

437

result = runner.invoke(cli, ["code", "grep", "val\x01\x02idate"])

438

assert result.exit_code in (0, 1)

439

440

def test_requires_repo(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:

441

monkeypatch.chdir(tmp_path)

442

monkeypatch.delenv("MUSE_REPO_ROOT", raising=False)

443

result = runner.invoke(cli, ["code", "grep", "validate"])

444

assert result.exit_code != 0

445

446

447

# ---------------------------------------------------------------------------

448

# Stress — 1 000 symbols, search must complete in < 5 s

449

# ---------------------------------------------------------------------------

450

451

452

class TestGrepStress:

453

@pytest.fixture

454

def large_repo(

455

self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch

456

) -> pathlib.Path:

457

"""Repo with ~1 000 Python symbols across 10 files."""

458

monkeypatch.chdir(tmp_path)

459

monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))

460

runner.invoke(cli, ["init", "--domain", "code"])

461

462

for file_idx in range(10):

463

lines: list[str] = []

464

for sym_idx in range(100):

465

lines.append(f"def compute_{file_idx}_{sym_idx}(x: int) -> int:")

466

lines.append(f" return x + {sym_idx}")

467

lines.append("")

468

(tmp_path / f"module_{file_idx}.py").write_text("\n".join(lines))

469

470

r = runner.invoke(cli, ["commit", "-m", "large module"])

471

assert r.exit_code == 0, r.output

472

return tmp_path

473

474

def test_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:

475

start = time.monotonic()

476

result = runner.invoke(cli, ["code", "grep", "compute"])

477

elapsed = time.monotonic() - start

478

assert result.exit_code == 0, result.output

479

assert "1000" in result.output or "match" in result.output.lower()

480

assert elapsed < 5.0, f"grep took {elapsed:.1f}s on 1 000 symbols"

481

482

def test_count_mode_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:

483

start = time.monotonic()

484

result = runner.invoke(cli, ["code", "grep", "compute", "--count"])

485

elapsed = time.monotonic() - start

486

assert result.exit_code == 0

487

assert elapsed < 5.0

488

489

def test_json_mode_1000_symbols_schema_valid(self, large_repo: pathlib.Path) -> None:

490

result = runner.invoke(cli, ["code", "grep", "compute", "--json"])

491

assert result.exit_code == 0

492

data = json.loads(result.output)

493

assert data["total_matches"] == len(data["results"])

494

assert data["total_matches"] >= 1000

495

496

def test_regex_search_1000_symbols_under_5s(self, large_repo: pathlib.Path) -> None:

497

start = time.monotonic()

498

result = runner.invoke(cli, ["code", "grep", "--regex", r"^compute_\d"])

499

elapsed = time.monotonic() - start

500

assert result.exit_code == 0

501

assert elapsed < 5.0

502

503

def test_kind_filter_1000_symbols_correct_count(self, large_repo: pathlib.Path) -> None:

504

result = runner.invoke(cli, ["code", "grep", "compute", "--kind", "function", "--count"])

505

assert result.exit_code == 0

506

line = result.output.strip().splitlines()[0]

507

count = int(line.split()[0])

508

assert count >= 1000

509

510

def test_512_char_regex_does_not_hang_on_large_corpus(

511

self, large_repo: pathlib.Path

512

) -> None:

513

pattern = f"compute_{'0' * 504}" # exactly 512 chars

514

start = time.monotonic()

515

result = runner.invoke(cli, ["code", "grep", pattern])

516

elapsed = time.monotonic() - start

517

assert elapsed < 5.0, f"512-char pattern took {elapsed:.1f}s — possible hang"

518

assert result.exit_code in (0, 1) # no match is fine

519

520

521

# ---------------------------------------------------------------------------

522

# --files flag (-l) — one file path per line, unique, sorted

523

# ---------------------------------------------------------------------------

class TestGrepFiles:

"""``muse code grep --files`` prints one unique file path per line.

528

529

Ergonomics goal: trivially pipeable without JSON parsing.

530

Mirrors ``grep -l`` / ``rg -l`` behaviour.

531

"""

532

533

def test_files_lists_matching_file(self, repo: pathlib.Path) -> None:

534

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

535

assert result.exit_code == 0, result.output

536

lines = [l for l in result.output.splitlines() if l.strip()]

537

assert any("billing.py" in l or "auth.py" in l for l in lines)

538

539

def test_files_output_is_unique_paths(self, repo: pathlib.Path) -> None:

540

"""Each file path appears at most once, even if it has multiple matches."""

541

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

542

assert result.exit_code == 0, result.output

543

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

544

assert len(lines) == len(set(lines)), "duplicate file paths in --files output"

545

546

def test_files_output_is_sorted(self, repo: pathlib.Path) -> None:

547

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

548

assert result.exit_code == 0, result.output

549

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

550

assert lines == sorted(lines), "--files output must be sorted"

551

552

def test_files_no_match_empty_output(self, repo: pathlib.Path) -> None:

553

result = runner.invoke(cli, ["code", "grep", "zzznomatch", "--files"])

554

assert result.exit_code == 0

555

assert result.output.strip() == ""

556

557

def test_files_excludes_non_matching_files(self, repo: pathlib.Path) -> None:

558

"""Only files that contain at least one match appear."""

559

result = runner.invoke(cli, ["code", "grep", "Invoice", "--files"])

560

assert result.exit_code == 0, result.output

561

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

562

# Invoice is only in billing.py

563

assert all("billing.py" in l for l in lines)

564

assert not any("auth.py" in l for l in lines)

565

566

def test_files_mutually_exclusive_with_json(self, repo: pathlib.Path) -> None:

567

result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--json"])

568

assert result.exit_code != 0

569

570

def test_files_mutually_exclusive_with_count(self, repo: pathlib.Path) -> None:

571

result = runner.invoke(cli, ["code", "grep", "validate", "--files", "--count"])

572

assert result.exit_code != 0

573

574

def test_files_long_flag_only(self, repo: pathlib.Path) -> None:

575

"""``--files`` is the only form (``-l`` is taken by ``--language``)."""

576

result = runner.invoke(cli, ["code", "grep", "validate", "--files"])

577

assert result.exit_code == 0, result.output

578

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

579

assert len(lines) > 0

580

581

def test_files_compatible_with_kind_filter(self, repo: pathlib.Path) -> None:

582

result = runner.invoke(

583

cli, ["code", "grep", "validate", "--files", "--kind", "function"]

584

)

585

assert result.exit_code == 0, result.output

586

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

587

# validate_amount and validate_token are functions

588

assert len(lines) > 0

589

590

def test_files_compatible_with_file_filter(self, repo: pathlib.Path) -> None:

591

result = runner.invoke(

592

cli, ["code", "grep", "validate", "--files", "--file", "billing.py"]

593

)

594

assert result.exit_code == 0, result.output

595

lines = [l.strip() for l in result.output.splitlines() if l.strip()]

596

assert all("billing.py" in l for l in lines)