grep.py
python
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
20 days ago
| 1 | """muse code grep -- semantic symbol search across the symbol graph. |
| 2 | |
| 3 | Unlike ``git grep`` which searches raw text lines, ``muse code grep`` searches |
| 4 | the *typed symbol graph* -- only returning actual symbol declarations with |
| 5 | their kind, file, line number, and stable content hash. |
| 6 | |
| 7 | No false positives from comments, string literals, or call sites. Every |
| 8 | result is a real symbol that exists in the repository. |
| 9 | |
| 10 | By default, PATTERN is matched case-insensitively against the bare symbol |
| 11 | name. When PATTERN contains a ``.`` or ``::`` it is also matched against the |
| 12 | fully-qualified name, so ``Invoice.validate`` finds only that specific method |
| 13 | rather than every symbol named ``validate``. |
| 14 | |
| 15 | Usage:: |
| 16 | |
| 17 | muse code grep "validate" # symbols whose name contains "validate" |
| 18 | muse code grep "Invoice.validate" # exact qualified-name match |
| 19 | muse code grep "^handle" --regex # names matching regex "^handle" |
| 20 | muse code grep "Invoice" --kind class # only class symbols |
| 21 | muse code grep "compute" --language go # only Go symbols (case-insensitive) |
| 22 | muse code grep "total" --file billing # scope to one file (fast) |
| 23 | muse code grep "total" --commit HEAD~5 # search a historical snapshot |
| 24 | muse code grep "validate" --count # just the total count |
| 25 | muse code grep "validate" --json # machine-readable output for agents |
| 26 | |
| 27 | Output:: |
| 28 | |
| 29 | muse/billing.py::validate_amount fn line 8 |
| 30 | muse/auth.py::validate_token fn line 14 |
| 31 | muse/auth.py::Validator class line 22 |
| 32 | muse/auth.py::Validator.validate method line 28 |
| 33 | |
| 34 | 4 match(es) across 2 file(s) |
| 35 | |
| 36 | Security note: patterns are capped at 512 characters to prevent ReDoS. |
| 37 | Invalid regex syntax is caught and reported as exit 1 rather than crashing. |
| 38 | """ |
| 39 | |
| 40 | import argparse |
| 41 | import json |
| 42 | import logging |
| 43 | import pathlib |
| 44 | import re |
| 45 | import sys |
| 46 | from typing import TypedDict |
| 47 | |
| 48 | from muse.core.types import short_id |
| 49 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 50 | from muse.core.errors import ExitCode |
| 51 | from muse.core.repo import require_repo |
| 52 | from muse.core.timing import start_timer |
| 53 | from muse.core.refs import read_current_branch |
| 54 | from muse.core.commits import resolve_commit_ref |
| 55 | from muse.core.snapshots import get_commit_snapshot_manifest |
| 56 | from muse.plugins.code._query import language_of, normalise_language, symbols_for_snapshot |
| 57 | from muse.plugins.code.ast_parser import SymbolRecord |
| 58 | from muse.core.validation import sanitize_display |
| 59 | from muse.core.types import Manifest |
| 60 | |
| 61 | type _IconMap = dict[str, str] |
| 62 | logger = logging.getLogger(__name__) |
| 63 | |
| 64 | class _GrepResultEntry(TypedDict): |
| 65 | address: str |
| 66 | kind: str |
| 67 | name: str |
| 68 | qualified_name: str |
| 69 | path: str |
| 70 | lineno: int |
| 71 | language: str |
| 72 | content_id: str |
| 73 | |
| 74 | # Guard against ReDoS: reject patterns longer than this before compiling. |
| 75 | _MAX_PATTERN_LEN: int = 512 |
| 76 | |
| 77 | _KIND_ICON: _IconMap = { |
| 78 | "function": "fn", |
| 79 | "async_function": "fn~", |
| 80 | "class": "class", |
| 81 | "method": "method", |
| 82 | "async_method": "method~", |
| 83 | "variable": "var", |
| 84 | "import": "import", |
| 85 | } |
| 86 | |
| 87 | # --------------------------------------------------------------------------- |
| 88 | # Typed output shape |
| 89 | # --------------------------------------------------------------------------- |
| 90 | |
| 91 | class _GrepOutputJson(EnvelopeJson): |
| 92 | """JSON output for ``muse code grep --json``. |
| 93 | |
| 94 | Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`. |
| 95 | |
| 96 | Fields |
| 97 | ------ |
| 98 | source_ref ``"working-tree"`` when the search reflects uncommitted edits, |
| 99 | or the abbreviated commit ID (12 hex chars) that was searched. |
| 100 | working_tree True when the search reflects the current working tree rather |
| 101 | than a committed snapshot. |
| 102 | pattern The pattern string exactly as supplied by the caller. |
| 103 | total_matches Total number of symbol declarations matched. |
| 104 | results List of match dicts — each has address, kind, name, |
| 105 | qualified_name, path, lineno, language, and content_id. |
| 106 | """ |
| 107 | |
| 108 | source_ref: str |
| 109 | working_tree: bool |
| 110 | pattern: str |
| 111 | total_matches: int |
| 112 | results: list[_GrepResultEntry] |
| 113 | |
| 114 | # --------------------------------------------------------------------------- |
| 115 | # Repository helpers |
| 116 | # --------------------------------------------------------------------------- |
| 117 | |
| 118 | # --------------------------------------------------------------------------- |
| 119 | # File-filter helpers (same as symbols.py) |
| 120 | # --------------------------------------------------------------------------- |
| 121 | |
| 122 | def _file_matches(file_path: str, file_filter: str) -> bool: |
| 123 | """True if *file_path* equals or ends with ``/<file_filter>``.""" |
| 124 | if file_path == file_filter: |
| 125 | return True |
| 126 | normalized = file_filter.replace("\\", "/") |
| 127 | return file_path.endswith(f"/{normalized}") |
| 128 | |
| 129 | def _resolve_file_filter( |
| 130 | file_filter: str, |
| 131 | manifest: Manifest, |
| 132 | ) -> str | None: |
| 133 | """Resolve a partial path suffix to the exact manifest key. |
| 134 | |
| 135 | Exits non-zero on ambiguity; returns ``None`` when there is no match |
| 136 | (caller handles the empty result). |
| 137 | """ |
| 138 | matching = [p for p in sorted(manifest) if _file_matches(p, file_filter)] |
| 139 | if len(matching) == 1: |
| 140 | return matching[0] |
| 141 | if len(matching) > 1: |
| 142 | print( |
| 143 | f"❌ '{file_filter}' is ambiguous — matches {len(matching)} files. " |
| 144 | "Use a more specific path:", |
| 145 | file=sys.stderr, |
| 146 | ) |
| 147 | for m in matching[:10]: |
| 148 | print(f" {m}", file=sys.stderr) |
| 149 | if len(matching) > 10: |
| 150 | print(f" … and {len(matching) - 10} more", file=sys.stderr) |
| 151 | raise SystemExit(ExitCode.USER_ERROR) |
| 152 | return None # no match — caller handles empty result |
| 153 | |
| 154 | # --------------------------------------------------------------------------- |
| 155 | # Argument parser registration |
| 156 | # --------------------------------------------------------------------------- |
| 157 | |
| 158 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 159 | """Register the grep subcommand.""" |
| 160 | parser = subparsers.add_parser( |
| 161 | "grep", |
| 162 | help="Search the symbol graph by name — not file text.", |
| 163 | description=__doc__, |
| 164 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 165 | ) |
| 166 | parser.add_argument( |
| 167 | "pattern", metavar="PATTERN", |
| 168 | help="Name pattern to search for.", |
| 169 | ) |
| 170 | parser.add_argument( |
| 171 | "--regex", "-e", action="store_true", dest="use_regex", |
| 172 | help="Treat PATTERN as a regular expression (default: substring match).", |
| 173 | ) |
| 174 | parser.add_argument( |
| 175 | "--kind", "-k", default=None, metavar="KIND", dest="kind_filter", |
| 176 | help="Restrict to symbols of this kind (function, class, method, …).", |
| 177 | ) |
| 178 | parser.add_argument( |
| 179 | "--language", "-l", default=None, metavar="LANG", dest="language_filter", |
| 180 | help="Restrict to symbols from files of this language (case-insensitive).", |
| 181 | ) |
| 182 | parser.add_argument( |
| 183 | "--file", "-f", default=None, metavar="PATH", dest="file_filter", |
| 184 | help=( |
| 185 | "Scope to a single file. Accepts an exact path or a unique suffix " |
| 186 | "(e.g. 'billing.py' matches 'src/billing.py'). Up to 24x faster." |
| 187 | ), |
| 188 | ) |
| 189 | parser.add_argument( |
| 190 | "--commit", "-c", default=None, metavar="REF", dest="ref", |
| 191 | help="Search a historical commit instead of the working tree.", |
| 192 | ) |
| 193 | parser.add_argument( |
| 194 | "--hashes", action="store_true", dest="show_hashes", |
| 195 | help="Include content hashes in output.", |
| 196 | ) |
| 197 | |
| 198 | output_group = parser.add_mutually_exclusive_group() |
| 199 | output_group.add_argument( |
| 200 | "--count", action="store_true", dest="count_only", |
| 201 | help="Print only the total match count.", |
| 202 | ) |
| 203 | output_group.add_argument( |
| 204 | "--json", "-j", action="store_true", dest="json_out", |
| 205 | help="Emit results as structured JSON.", |
| 206 | ) |
| 207 | output_group.add_argument( |
| 208 | "--files", action="store_true", dest="files_only", |
| 209 | help=( |
| 210 | "Print only the unique file paths that contain at least one match, " |
| 211 | "one per line, sorted. Mirrors ``grep -l`` / ``rg -l``. " |
| 212 | "Trivially pipeable without JSON parsing." |
| 213 | ), |
| 214 | ) |
| 215 | |
| 216 | parser.set_defaults(func=run, files_only=False) |
| 217 | |
| 218 | # --------------------------------------------------------------------------- |
| 219 | # Command entry point |
| 220 | # --------------------------------------------------------------------------- |
| 221 | |
| 222 | def run(args: argparse.Namespace) -> None: |
| 223 | """Search the symbol graph by name — not file text. |
| 224 | |
| 225 | Searches the typed, content-addressed symbol graph. Every result is a |
| 226 | real symbol declaration — no false positives from comments, string |
| 227 | literals, or call sites. Use ``--file`` to scope to one file (much |
| 228 | faster); ``--regex`` for full Python regex syntax. |
| 229 | |
| 230 | Agent quickstart |
| 231 | ---------------- |
| 232 | :: |
| 233 | |
| 234 | muse code grep "validate" --json |
| 235 | muse code grep "Invoice.validate" --json |
| 236 | muse code grep "compute.*total" --regex --json |
| 237 | muse code grep "validate" --file src/billing.py --json |
| 238 | |
| 239 | JSON fields |
| 240 | ----------- |
| 241 | source_ref Commit ref or ``"working tree"`` searched. |
| 242 | working_tree ``true`` if searching uncommitted state. |
| 243 | pattern Pattern used. |
| 244 | total_matches Number of matching symbol declarations. |
| 245 | results List of match objects: ``address``, ``kind``, ``file``, |
| 246 | ``line``, ``language``. |
| 247 | |
| 248 | Exit codes |
| 249 | ---------- |
| 250 | 0 Search complete (zero matches is still success). |
| 251 | 1 Invalid regex or invalid arguments. |
| 252 | 2 Not inside a Muse repository. |
| 253 | """ |
| 254 | elapsed = start_timer() |
| 255 | pattern: str = args.pattern |
| 256 | use_regex: bool = args.use_regex |
| 257 | kind_filter: str | None = args.kind_filter |
| 258 | language_filter: str | None = args.language_filter |
| 259 | file_filter: str | None = args.file_filter |
| 260 | ref: str | None = args.ref |
| 261 | show_hashes: bool = args.show_hashes |
| 262 | count_only: bool = args.count_only |
| 263 | json_out: bool = args.json_out |
| 264 | files_only: bool = getattr(args, "files_only", False) |
| 265 | |
| 266 | # ── Input validation ────────────────────────────────────────────────────── |
| 267 | |
| 268 | if len(pattern) > _MAX_PATTERN_LEN: |
| 269 | print( |
| 270 | f"❌ Pattern too long ({len(pattern)} chars) — maximum is {_MAX_PATTERN_LEN}.", |
| 271 | file=sys.stderr, |
| 272 | ) |
| 273 | raise SystemExit(ExitCode.USER_ERROR) |
| 274 | |
| 275 | if language_filter is not None: |
| 276 | language_filter = normalise_language(language_filter) |
| 277 | |
| 278 | # When pattern contains a separator, also search qualified names. |
| 279 | search_qualified = "." in pattern or "::" in pattern |
| 280 | |
| 281 | try: |
| 282 | regex = ( |
| 283 | re.compile(pattern, re.IGNORECASE) |
| 284 | if use_regex |
| 285 | else re.compile(re.escape(pattern), re.IGNORECASE) |
| 286 | ) |
| 287 | except re.error as exc: |
| 288 | print(f"❌ Invalid regex pattern: {exc}", file=sys.stderr) |
| 289 | raise SystemExit(ExitCode.USER_ERROR) |
| 290 | |
| 291 | # ── Repo / commit resolution ────────────────────────────────────────────── |
| 292 | |
| 293 | root = require_repo() |
| 294 | branch = read_current_branch(root) |
| 295 | |
| 296 | commit = resolve_commit_ref(root, branch, ref) |
| 297 | if commit is None: |
| 298 | print(f"❌ Commit '{ref or 'HEAD'}' not found.", file=sys.stderr) |
| 299 | raise SystemExit(ExitCode.USER_ERROR) |
| 300 | |
| 301 | manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 302 | |
| 303 | # ── File-filter resolution ──────────────────────────────────────────────── |
| 304 | |
| 305 | resolved_file_filter = file_filter |
| 306 | if file_filter is not None: |
| 307 | found = _resolve_file_filter(file_filter, manifest) |
| 308 | if found is not None: |
| 309 | resolved_file_filter = found |
| 310 | # None → no match; pass original so symbols_for_snapshot returns {} |
| 311 | |
| 312 | # ── Working-tree vs object-store mode ──────────────────────────────────── |
| 313 | |
| 314 | working_tree = ref is None |
| 315 | workdir = root if working_tree else None |
| 316 | source_ref = "working-tree" if working_tree else commit.commit_id |
| 317 | |
| 318 | # ── Symbol extraction ───────────────────────────────────────────────────── |
| 319 | |
| 320 | symbol_map = symbols_for_snapshot( |
| 321 | root, manifest, |
| 322 | kind_filter=kind_filter, |
| 323 | file_filter=resolved_file_filter, |
| 324 | language_filter=language_filter, |
| 325 | workdir=workdir, |
| 326 | ) |
| 327 | |
| 328 | # ── Pattern matching ────────────────────────────────────────────────────── |
| 329 | |
| 330 | matches: list[tuple[str, str, SymbolRecord]] = [] |
| 331 | for file_path, tree in sorted(symbol_map.items()): |
| 332 | for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]): |
| 333 | name_hit = regex.search(rec["name"]) |
| 334 | qual_hit = search_qualified and regex.search(rec["qualified_name"]) |
| 335 | if name_hit or qual_hit: |
| 336 | matches.append((file_path, addr, rec)) |
| 337 | |
| 338 | # ── Output ──────────────────────────────────────────────────────────────── |
| 339 | |
| 340 | if count_only: |
| 341 | print(f"{len(matches)} match(es)") |
| 342 | return |
| 343 | |
| 344 | if files_only: |
| 345 | seen: set[str] = set() |
| 346 | for file_path, _addr, _rec in matches: |
| 347 | seen.add(file_path) |
| 348 | for path in sorted(seen): |
| 349 | print(path) |
| 350 | return |
| 351 | |
| 352 | if json_out: |
| 353 | results: list[_GrepResultEntry] = [] |
| 354 | for _fp, addr, rec in matches: |
| 355 | results.append({ |
| 356 | "address": addr, |
| 357 | "kind": rec["kind"], |
| 358 | "name": rec["name"], |
| 359 | "qualified_name": rec["qualified_name"], |
| 360 | "path": addr.split("::")[0], |
| 361 | "lineno": rec["lineno"], |
| 362 | "language": language_of(addr.split("::")[0]), |
| 363 | "content_id": rec["content_id"], |
| 364 | }) |
| 365 | print(json.dumps(_GrepOutputJson( |
| 366 | **make_envelope(elapsed), |
| 367 | source_ref=source_ref, |
| 368 | working_tree=working_tree, |
| 369 | pattern=pattern, |
| 370 | total_matches=len(matches), |
| 371 | results=results, |
| 372 | ))) |
| 373 | return |
| 374 | |
| 375 | if not matches: |
| 376 | print(f" (no symbols matching '{sanitize_display(pattern)}')") |
| 377 | return |
| 378 | |
| 379 | files_seen: set[str] = set() |
| 380 | for file_path, addr, rec in matches: |
| 381 | files_seen.add(file_path) |
| 382 | icon = _KIND_ICON.get(rec["kind"], rec["kind"]) |
| 383 | line = rec["lineno"] |
| 384 | hash_part = f" {short_id(rec['content_id'])}.." if show_hashes else "" |
| 385 | print(f" {sanitize_display(addr):<60} {icon:<10} line {line:>4}{hash_part}") |
| 386 | |
| 387 | print(f"\n{len(matches)} match(es) across {len(files_seen)} file(s)") |
File History
4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
28 days ago