gabriel / muse public
grep.py python
387 lines 14.9 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """muse code grep -- semantic symbol search across the symbol graph.
2
3 Unlike ``git grep`` which searches raw text lines, ``muse code grep`` searches
4 the *typed symbol graph* -- only returning actual symbol declarations with
5 their kind, file, line number, and stable content hash.
6
7 No false positives from comments, string literals, or call sites. Every
8 result is a real symbol that exists in the repository.
9
10 By default, PATTERN is matched case-insensitively against the bare symbol
11 name. When PATTERN contains a ``.`` or ``::`` it is also matched against the
12 fully-qualified name, so ``Invoice.validate`` finds only that specific method
13 rather than every symbol named ``validate``.
14
15 Usage::
16
17 muse code grep "validate" # symbols whose name contains "validate"
18 muse code grep "Invoice.validate" # exact qualified-name match
19 muse code grep "^handle" --regex # names matching regex "^handle"
20 muse code grep "Invoice" --kind class # only class symbols
21 muse code grep "compute" --language go # only Go symbols (case-insensitive)
22 muse code grep "total" --file billing # scope to one file (fast)
23 muse code grep "total" --commit HEAD~5 # search a historical snapshot
24 muse code grep "validate" --count # just the total count
25 muse code grep "validate" --json # machine-readable output for agents
26
27 Output::
28
29 muse/billing.py::validate_amount fn line 8
30 muse/auth.py::validate_token fn line 14
31 muse/auth.py::Validator class line 22
32 muse/auth.py::Validator.validate method line 28
33
34 4 match(es) across 2 file(s)
35
36 Security note: patterns are capped at 512 characters to prevent ReDoS.
37 Invalid regex syntax is caught and reported as exit 1 rather than crashing.
38 """
39
40 import argparse
41 import json
42 import logging
43 import pathlib
44 import re
45 import sys
46 from typing import TypedDict
47
48 from muse.core.types import short_id
49 from muse.core.envelope import EnvelopeJson, make_envelope
50 from muse.core.errors import ExitCode
51 from muse.core.repo import require_repo
52 from muse.core.timing import start_timer
53 from muse.core.refs import read_current_branch
54 from muse.core.commits import resolve_commit_ref
55 from muse.core.snapshots import get_commit_snapshot_manifest
56 from muse.plugins.code._query import language_of, normalise_language, symbols_for_snapshot
57 from muse.plugins.code.ast_parser import SymbolRecord
58 from muse.core.validation import sanitize_display
59 from muse.core.types import Manifest
60
61 type _IconMap = dict[str, str]
62 logger = logging.getLogger(__name__)
63
64 class _GrepResultEntry(TypedDict):
65 address: str
66 kind: str
67 name: str
68 qualified_name: str
69 path: str
70 lineno: int
71 language: str
72 content_id: str
73
74 # Guard against ReDoS: reject patterns longer than this before compiling.
75 _MAX_PATTERN_LEN: int = 512
76
77 _KIND_ICON: _IconMap = {
78 "function": "fn",
79 "async_function": "fn~",
80 "class": "class",
81 "method": "method",
82 "async_method": "method~",
83 "variable": "var",
84 "import": "import",
85 }
86
87 # ---------------------------------------------------------------------------
88 # Typed output shape
89 # ---------------------------------------------------------------------------
90
91 class _GrepOutputJson(EnvelopeJson):
92 """JSON output for ``muse code grep --json``.
93
94 Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`.
95
96 Fields
97 ------
98 source_ref ``"working-tree"`` when the search reflects uncommitted edits,
99 or the abbreviated commit ID (12 hex chars) that was searched.
100 working_tree True when the search reflects the current working tree rather
101 than a committed snapshot.
102 pattern The pattern string exactly as supplied by the caller.
103 total_matches Total number of symbol declarations matched.
104 results List of match dicts — each has address, kind, name,
105 qualified_name, path, lineno, language, and content_id.
106 """
107
108 source_ref: str
109 working_tree: bool
110 pattern: str
111 total_matches: int
112 results: list[_GrepResultEntry]
113
114 # ---------------------------------------------------------------------------
115 # Repository helpers
116 # ---------------------------------------------------------------------------
117
118 # ---------------------------------------------------------------------------
119 # File-filter helpers (same as symbols.py)
120 # ---------------------------------------------------------------------------
121
122 def _file_matches(file_path: str, file_filter: str) -> bool:
123 """True if *file_path* equals or ends with ``/<file_filter>``."""
124 if file_path == file_filter:
125 return True
126 normalized = file_filter.replace("\\", "/")
127 return file_path.endswith(f"/{normalized}")
128
129 def _resolve_file_filter(
130 file_filter: str,
131 manifest: Manifest,
132 ) -> str | None:
133 """Resolve a partial path suffix to the exact manifest key.
134
135 Exits non-zero on ambiguity; returns ``None`` when there is no match
136 (caller handles the empty result).
137 """
138 matching = [p for p in sorted(manifest) if _file_matches(p, file_filter)]
139 if len(matching) == 1:
140 return matching[0]
141 if len(matching) > 1:
142 print(
143 f"❌ '{file_filter}' is ambiguous — matches {len(matching)} files. "
144 "Use a more specific path:",
145 file=sys.stderr,
146 )
147 for m in matching[:10]:
148 print(f" {m}", file=sys.stderr)
149 if len(matching) > 10:
150 print(f" … and {len(matching) - 10} more", file=sys.stderr)
151 raise SystemExit(ExitCode.USER_ERROR)
152 return None # no match — caller handles empty result
153
154 # ---------------------------------------------------------------------------
155 # Argument parser registration
156 # ---------------------------------------------------------------------------
157
158 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
159 """Register the grep subcommand."""
160 parser = subparsers.add_parser(
161 "grep",
162 help="Search the symbol graph by name — not file text.",
163 description=__doc__,
164 formatter_class=argparse.RawDescriptionHelpFormatter,
165 )
166 parser.add_argument(
167 "pattern", metavar="PATTERN",
168 help="Name pattern to search for.",
169 )
170 parser.add_argument(
171 "--regex", "-e", action="store_true", dest="use_regex",
172 help="Treat PATTERN as a regular expression (default: substring match).",
173 )
174 parser.add_argument(
175 "--kind", "-k", default=None, metavar="KIND", dest="kind_filter",
176 help="Restrict to symbols of this kind (function, class, method, …).",
177 )
178 parser.add_argument(
179 "--language", "-l", default=None, metavar="LANG", dest="language_filter",
180 help="Restrict to symbols from files of this language (case-insensitive).",
181 )
182 parser.add_argument(
183 "--file", "-f", default=None, metavar="PATH", dest="file_filter",
184 help=(
185 "Scope to a single file. Accepts an exact path or a unique suffix "
186 "(e.g. 'billing.py' matches 'src/billing.py'). Up to 24x faster."
187 ),
188 )
189 parser.add_argument(
190 "--commit", "-c", default=None, metavar="REF", dest="ref",
191 help="Search a historical commit instead of the working tree.",
192 )
193 parser.add_argument(
194 "--hashes", action="store_true", dest="show_hashes",
195 help="Include content hashes in output.",
196 )
197
198 output_group = parser.add_mutually_exclusive_group()
199 output_group.add_argument(
200 "--count", action="store_true", dest="count_only",
201 help="Print only the total match count.",
202 )
203 output_group.add_argument(
204 "--json", "-j", action="store_true", dest="json_out",
205 help="Emit results as structured JSON.",
206 )
207 output_group.add_argument(
208 "--files", action="store_true", dest="files_only",
209 help=(
210 "Print only the unique file paths that contain at least one match, "
211 "one per line, sorted. Mirrors ``grep -l`` / ``rg -l``. "
212 "Trivially pipeable without JSON parsing."
213 ),
214 )
215
216 parser.set_defaults(func=run, files_only=False)
217
218 # ---------------------------------------------------------------------------
219 # Command entry point
220 # ---------------------------------------------------------------------------
221
222 def run(args: argparse.Namespace) -> None:
223 """Search the symbol graph by name — not file text.
224
225 Searches the typed, content-addressed symbol graph. Every result is a
226 real symbol declaration — no false positives from comments, string
227 literals, or call sites. Use ``--file`` to scope to one file (much
228 faster); ``--regex`` for full Python regex syntax.
229
230 Agent quickstart
231 ----------------
232 ::
233
234 muse code grep "validate" --json
235 muse code grep "Invoice.validate" --json
236 muse code grep "compute.*total" --regex --json
237 muse code grep "validate" --file src/billing.py --json
238
239 JSON fields
240 -----------
241 source_ref Commit ref or ``"working tree"`` searched.
242 working_tree ``true`` if searching uncommitted state.
243 pattern Pattern used.
244 total_matches Number of matching symbol declarations.
245 results List of match objects: ``address``, ``kind``, ``file``,
246 ``line``, ``language``.
247
248 Exit codes
249 ----------
250 0 Search complete (zero matches is still success).
251 1 Invalid regex or invalid arguments.
252 2 Not inside a Muse repository.
253 """
254 elapsed = start_timer()
255 pattern: str = args.pattern
256 use_regex: bool = args.use_regex
257 kind_filter: str | None = args.kind_filter
258 language_filter: str | None = args.language_filter
259 file_filter: str | None = args.file_filter
260 ref: str | None = args.ref
261 show_hashes: bool = args.show_hashes
262 count_only: bool = args.count_only
263 json_out: bool = args.json_out
264 files_only: bool = getattr(args, "files_only", False)
265
266 # ── Input validation ──────────────────────────────────────────────────────
267
268 if len(pattern) > _MAX_PATTERN_LEN:
269 print(
270 f"❌ Pattern too long ({len(pattern)} chars) — maximum is {_MAX_PATTERN_LEN}.",
271 file=sys.stderr,
272 )
273 raise SystemExit(ExitCode.USER_ERROR)
274
275 if language_filter is not None:
276 language_filter = normalise_language(language_filter)
277
278 # When pattern contains a separator, also search qualified names.
279 search_qualified = "." in pattern or "::" in pattern
280
281 try:
282 regex = (
283 re.compile(pattern, re.IGNORECASE)
284 if use_regex
285 else re.compile(re.escape(pattern), re.IGNORECASE)
286 )
287 except re.error as exc:
288 print(f"❌ Invalid regex pattern: {exc}", file=sys.stderr)
289 raise SystemExit(ExitCode.USER_ERROR)
290
291 # ── Repo / commit resolution ──────────────────────────────────────────────
292
293 root = require_repo()
294 branch = read_current_branch(root)
295
296 commit = resolve_commit_ref(root, branch, ref)
297 if commit is None:
298 print(f"❌ Commit '{ref or 'HEAD'}' not found.", file=sys.stderr)
299 raise SystemExit(ExitCode.USER_ERROR)
300
301 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
302
303 # ── File-filter resolution ────────────────────────────────────────────────
304
305 resolved_file_filter = file_filter
306 if file_filter is not None:
307 found = _resolve_file_filter(file_filter, manifest)
308 if found is not None:
309 resolved_file_filter = found
310 # None → no match; pass original so symbols_for_snapshot returns {}
311
312 # ── Working-tree vs object-store mode ────────────────────────────────────
313
314 working_tree = ref is None
315 workdir = root if working_tree else None
316 source_ref = "working-tree" if working_tree else commit.commit_id
317
318 # ── Symbol extraction ─────────────────────────────────────────────────────
319
320 symbol_map = symbols_for_snapshot(
321 root, manifest,
322 kind_filter=kind_filter,
323 file_filter=resolved_file_filter,
324 language_filter=language_filter,
325 workdir=workdir,
326 )
327
328 # ── Pattern matching ──────────────────────────────────────────────────────
329
330 matches: list[tuple[str, str, SymbolRecord]] = []
331 for file_path, tree in sorted(symbol_map.items()):
332 for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
333 name_hit = regex.search(rec["name"])
334 qual_hit = search_qualified and regex.search(rec["qualified_name"])
335 if name_hit or qual_hit:
336 matches.append((file_path, addr, rec))
337
338 # ── Output ────────────────────────────────────────────────────────────────
339
340 if count_only:
341 print(f"{len(matches)} match(es)")
342 return
343
344 if files_only:
345 seen: set[str] = set()
346 for file_path, _addr, _rec in matches:
347 seen.add(file_path)
348 for path in sorted(seen):
349 print(path)
350 return
351
352 if json_out:
353 results: list[_GrepResultEntry] = []
354 for _fp, addr, rec in matches:
355 results.append({
356 "address": addr,
357 "kind": rec["kind"],
358 "name": rec["name"],
359 "qualified_name": rec["qualified_name"],
360 "path": addr.split("::")[0],
361 "lineno": rec["lineno"],
362 "language": language_of(addr.split("::")[0]),
363 "content_id": rec["content_id"],
364 })
365 print(json.dumps(_GrepOutputJson(
366 **make_envelope(elapsed),
367 source_ref=source_ref,
368 working_tree=working_tree,
369 pattern=pattern,
370 total_matches=len(matches),
371 results=results,
372 )))
373 return
374
375 if not matches:
376 print(f" (no symbols matching '{sanitize_display(pattern)}')")
377 return
378
379 files_seen: set[str] = set()
380 for file_path, addr, rec in matches:
381 files_seen.add(file_path)
382 icon = _KIND_ICON.get(rec["kind"], rec["kind"])
383 line = rec["lineno"]
384 hash_part = f" {short_id(rec['content_id'])}.." if show_hashes else ""
385 print(f" {sanitize_display(addr):<60} {icon:<10} line {line:>4}{hash_part}")
386
387 print(f"\n{len(matches)} match(es) across {len(files_seen)} file(s)")
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago