gabriel / muse public
symbols.py python
517 lines 19.1 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 8 days ago
1 """muse code symbols -- list every semantic symbol in a snapshot.
2
3 Muse tracks the semantic interior of every source file -- the full symbol graph
4 the code plugin builds at commit time -- giving each function, class, method,
5 and variable a stable, content-addressed identity independent of line numbers
6 or formatting.
7
8 Output (default -- human-readable table)::
9
10 src/utils.py
11 fn calculate_total line 12
12 fn _validate_amount line 28
13 class Invoice line 45
14 method Invoice.to_dict line 52
15 method Invoice.from_dict line 61
16
17 src/models.py
18 class User line 8
19 method User.__init__ line 10
20 method User.save line 19
21
22 12 symbols across 2 files (Python: 12)
23
24 Flags::
25
26 --commit <ref>
27 Inspect a specific commit instead of the working tree.
28 Accepts a full or abbreviated commit SHA, a branch name, or HEAD~N.
29
30 --kind <kind>
31 Filter to a specific symbol kind:
32 function, async_function, class, method, async_method,
33 variable, import, section, rule.
34
35 --file <path>
36 Show symbols from a single file. Accepts an exact path or a
37 unique suffix (e.g. "billing.py" matches "src/billing.py").
38
39 --language <lang>
40 Show symbols from files of this language only (case-insensitive,
41 e.g. "python", "TypeScript", "go").
42
43 --count
44 Print only the total symbol count and per-language breakdown.
45
46 --hashes
47 Include content hashes alongside each symbol.
48
49 --json
50 Emit a structured JSON object for tooling integration::
51
52 {
53 "source_ref": "a1b2c3d4",
54 "working_tree": true,
55 "total_symbols": 12,
56 "results": [
57 {"address": "src/utils.py::calculate_total",
58 "path": "src/utils.py", "kind": "function", ...}
59 ]
60 }
61 """
62
63 import argparse
64 import json
65 import logging
66 import pathlib
67 import sys
68 from collections.abc import Callable
69 from typing import TypedDict
70
71 from muse.core.types import short_id
72 from muse.core.envelope import EnvelopeJson, make_envelope
73 from muse.core.errors import ExitCode
74 from muse.core.repo import require_repo
75 from muse.core.stat_cache import load_cache as load_stat_cache
76 from muse.core.types import Manifest
77 from muse.core.refs import read_current_branch
78 from muse.core.commits import resolve_commit_ref
79 from muse.core.snapshots import get_commit_snapshot_manifest
80 from muse.core.timing import start_timer
81 from muse.plugins.code._query import language_of, symbols_for_snapshot
82 from muse.plugins.code.ast_parser import SymbolTree
83
84 class _SymbolEntry(TypedDict):
85 address: str
86 kind: str
87 name: str
88 qualified_name: str
89 path: str
90 lineno: int
91 language: str
92 content_id: str
93
94 class _SymbolsJson(EnvelopeJson):
95 source_ref: str
96 working_tree: bool
97 total_symbols: int
98 results: list[_SymbolEntry]
99
100 type _SymbolTreeMap = dict[str, SymbolTree]
101 type _CounterMap = dict[str, int]
102 type _KindDisplay = dict[str, tuple[str, list[str]]]
103
104 logger = logging.getLogger(__name__)
105
106 # ---------------------------------------------------------------------------
107 # ANSI colour helpers — only emitted when stdout is a TTY.
108 # ---------------------------------------------------------------------------
109
110 _RESET = "\033[0m"
111 _BOLD = "\033[1m"
112 _DIM = "\033[2m"
113 _CYAN = "\033[36m"
114 _YELLOW = "\033[33m"
115 _BLUE = "\033[34m"
116 _GREEN = "\033[32m"
117 _MAGENTA = "\033[35m"
118
119 def _c(text: str, *codes: str, tty: bool) -> str:
120 """Wrap *text* in ANSI *codes* when *tty* is True."""
121 if not tty:
122 return text
123 return "".join(codes) + text + _RESET
124
125 # Maps symbol kind → (short icon, ANSI colour codes).
126 _KIND_DISPLAY: _KindDisplay = {
127 "function": ("fn", [_BLUE]),
128 "async_function": ("fn~", [_BLUE, _DIM]),
129 "class": ("class", [_YELLOW, _BOLD]),
130 "method": ("method", [_CYAN]),
131 "async_method": ("method~", [_CYAN, _DIM]),
132 "variable": ("var", [_DIM]),
133 "import": ("import", [_DIM]),
134 "section": ("section", [_GREEN]),
135 "rule": ("rule", [_MAGENTA]),
136 }
137
138 _VALID_KINDS: frozenset[str] = frozenset(_KIND_DISPLAY)
139
140 # ---------------------------------------------------------------------------
141 # Language helpers
142 # ---------------------------------------------------------------------------
143
144 # Canonical map: lowercase language name → display name.
145 # Built from _SUFFIX_LANG in _query.py to stay in sync.
146 from muse.plugins.code._query import _SUFFIX_LANG # noqa: E402 (module-level import)
147 from muse.core.validation import sanitize_display
148
149 _LANG_CANONICAL: Manifest = {lang.lower(): lang for lang in set(_SUFFIX_LANG.values())}
150
151 def _normalise_language(lang: str) -> str:
152 """Return the canonical capitalisation for *lang*, or *lang* unchanged."""
153 return _LANG_CANONICAL.get(lang.strip().lower(), lang.strip())
154
155 # ---------------------------------------------------------------------------
156 # File-filter helpers
157 # ---------------------------------------------------------------------------
158
159 def _file_matches(file_path: str, file_filter: str) -> bool:
160 """Return True if *file_path* equals or uniquely ends with *file_filter*.
161
162 Allows passing ``"billing.py"`` to match ``"src/billing.py"`` without
163 requiring callers to know the full directory prefix. Uses a separator
164 anchor (``/``) to prevent ``y.py`` matching ``billy.py``.
165 """
166 if file_path == file_filter:
167 return True
168 normalized = file_filter.replace("\\", "/")
169 return file_path.endswith(f"/{normalized}")
170
171 def _resolve_file_filter(
172 file_filter: str,
173 manifest: Manifest,
174 ) -> str | None:
175 """Resolve *file_filter* to the exact manifest path, or ``None`` on ambiguity/miss.
176
177 Prints a helpful message to stderr and raises ``SystemExit`` on ambiguity.
178 Returns ``None`` when there is no match (caller emits "no symbols found").
179 """
180 matching = [p for p in sorted(manifest) if _file_matches(p, file_filter)]
181 if len(matching) == 1:
182 return matching[0]
183 if len(matching) > 1:
184 print(
185 f"❌ '{file_filter}' is ambiguous — matches {len(matching)} files. "
186 "Use a more specific path:",
187 file=sys.stderr,
188 )
189 for m in matching[:10]:
190 print(f" {m}", file=sys.stderr)
191 if len(matching) > 10:
192 print(f" … and {len(matching) - 10} more", file=sys.stderr)
193 raise SystemExit(ExitCode.USER_ERROR)
194 return None # no match — caller handles the empty result
195
196 # ---------------------------------------------------------------------------
197 # Repository helpers
198 # ---------------------------------------------------------------------------
199
200 # ---------------------------------------------------------------------------
201 # Output helpers
202 # ---------------------------------------------------------------------------
203
204 def _lang_counts(symbol_map: _SymbolTreeMap) -> _CounterMap:
205 """Return a language-name → symbol-count mapping for *symbol_map*."""
206 counts: _CounterMap = {}
207 for file_path, tree in symbol_map.items():
208 lang = language_of(file_path)
209 counts[lang] = counts.get(lang, 0) + len(tree)
210 return counts
211
212 def _print_human(
213 symbol_map: _SymbolTreeMap,
214 show_hashes: bool,
215 tty: bool,
216 ) -> None:
217 """Render symbol_map as a human-readable, optionally coloured table."""
218 if not symbol_map:
219 print(" (no semantic symbols found)")
220 return
221
222 total = 0
223 for file_path, tree in symbol_map.items():
224 total += len(tree)
225 print(f"\n{_c(sanitize_display(file_path), _BOLD, tty=tty)}")
226 for _addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
227 kind = rec["kind"]
228 icon, colour_codes = _KIND_DISPLAY.get(kind, (kind, []))
229 name = rec["qualified_name"]
230 lineno = rec["lineno"]
231 icon_str = _c(f"{icon:<10}", *colour_codes, tty=tty)
232 name_str = f"{name:<40}"
233 line_str = _c(f"line {lineno:>4}", _DIM, tty=tty)
234 hash_suffix = (
235 _c(f" {short_id(rec['content_id'])}..", _DIM, tty=tty)
236 if show_hashes
237 else ""
238 )
239 print(f" {icon_str} {name_str} {line_str}{hash_suffix}")
240
241 counts = _lang_counts(symbol_map)
242 lang_str = ", ".join(f"{lang}: {count:,}" for lang, count in sorted(counts.items()))
243 sym_word = "symbol" if total == 1 else "symbols"
244 file_word = "file" if len(symbol_map) == 1 else "files"
245 print(
246 f"\n{_c(f'{total:,}', _BOLD, tty=tty)} {sym_word} across "
247 f"{len(symbol_map):,} {file_word} ({lang_str})"
248 )
249
250 def _emit_json(
251 symbol_map: _SymbolTreeMap,
252 source_ref: str,
253 working_tree: bool,
254 elapsed: Callable[[], float],
255 ) -> None:
256 """Emit the symbol map as a structured JSON object."""
257 results = []
258 for file_path, tree in symbol_map.items():
259 for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
260 results.append({
261 "address": addr,
262 "kind": rec["kind"],
263 "name": rec["name"],
264 "qualified_name": rec["qualified_name"],
265 "path": file_path,
266 "lineno": rec["lineno"],
267 "end_lineno": rec["end_lineno"],
268 "content_id": rec["content_id"],
269 "body_hash": rec["body_hash"],
270 "signature_id": rec["signature_id"],
271 })
272 print(json.dumps(_SymbolsJson(
273 **make_envelope(elapsed),
274 source_ref=source_ref,
275 working_tree=working_tree,
276 total_symbols=len(results),
277 results=results,
278 )))
279
280 # ---------------------------------------------------------------------------
281 # Argument parser registration
282 # ---------------------------------------------------------------------------
283
284 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
285 """Register the symbols subcommand.
286
287 Arguments
288 ---------
289 --commit / -c REF
290 Commit ID or branch to inspect (default: working tree).
291 --kind / -k KIND
292 Filter to symbols of a specific kind (function, async_function,
293 class, method, async_method, variable, import, section, rule).
294 --file / -f PATH
295 Show symbols from a single file. Accepts an exact path or a
296 unique path suffix (e.g. ``billing.py`` matches ``src/billing.py``).
297 --language / -l LANG
298 Show symbols from files of this language only (case-insensitive).
299 --hashes
300 Include content hashes alongside each symbol.
301 --count
302 Print only the total symbol count and per-language breakdown.
303 Mutually exclusive with ``--json`` / ``-j``.
304 --json / -j
305 Emit the full symbol table as JSON with schema_version, exit_code,
306 and duration_ms in the envelope. Mutually exclusive with ``--count``.
307 """
308 parser = subparsers.add_parser(
309 "symbols",
310 help="List every semantic symbol (function, class, method…) in a snapshot.",
311 description=__doc__,
312 formatter_class=argparse.RawDescriptionHelpFormatter,
313 )
314 parser.add_argument(
315 "--commit", "-c",
316 dest="ref",
317 default=None,
318 metavar="REF",
319 help="Commit ID or branch to inspect (default: working tree).",
320 )
321 parser.add_argument(
322 "--kind", "-k",
323 dest="kind_filter",
324 default=None,
325 metavar="KIND",
326 help=(
327 "Filter to symbols of a specific kind "
328 "(function, async_function, class, method, async_method, "
329 "variable, import, section, rule)."
330 ),
331 )
332 parser.add_argument(
333 "--file", "-F",
334 dest="file_filter",
335 default=None,
336 metavar="PATH",
337 help=(
338 "Show symbols from a single file. Accepts an exact path or a "
339 "unique path suffix (e.g. 'billing.py' matches 'src/billing.py')."
340 ),
341 )
342 parser.add_argument(
343 "--language", "-l",
344 dest="language_filter",
345 default=None,
346 metavar="LANG",
347 help="Show symbols from files of this language only (case-insensitive).",
348 )
349 parser.add_argument(
350 "--hashes",
351 dest="show_hashes",
352 action="store_true",
353 help="Include content hashes in the output.",
354 )
355
356 output_group = parser.add_mutually_exclusive_group()
357 output_group.add_argument(
358 "--count",
359 dest="count_only",
360 action="store_true",
361 help="Print only the total symbol count and language breakdown.",
362 )
363 output_group.add_argument(
364 "--json", "-j",
365 dest="json_out",
366 action="store_true",
367 help="Emit the full symbol table as JSON.",
368 )
369
370 parser.set_defaults(func=run, json_out=False)
371
372 # ---------------------------------------------------------------------------
373 # Command entry point
374 # ---------------------------------------------------------------------------
375
376 def run(args: argparse.Namespace) -> None:
377 """List every semantic symbol (function, class, method…) in a snapshot.
378
379 Reads the stable, content-addressed symbol graph produced by the domain
380 plugin's AST analysis — independent of line numbers or formatting. When
381 ``--commit`` is omitted, reads the working tree (uncommitted edits
382 included). Use ``--kind``, ``--file``, and ``--language`` to narrow.
383
384 Agent quickstart::
385
386 muse code symbols --file src/utils.py --json
387 muse code symbols --kind function --json
388 muse code symbols --commit HEAD~5 --json
389 muse code symbols --language Python --count --json
390
391 JSON fields::
392
393 source_ref ``"working-tree"`` or short commit SHA.
394 working_tree ``true`` when reading from disk (no ``--commit``).
395 total_symbols Total symbols in the filtered result set.
396 results Flat symbol list sorted by file then line. Each entry:
397 ``address``, ``kind``, ``name``, ``qualified_name``,
398 ``path``, ``lineno``, ``end_lineno``, ``content_id``,
399 ``body_hash``, ``signature_id``.
400 muse_version Muse release that produced this output.
401 schema Envelope schema version (int).
402 exit_code ``0`` on success.
403 duration_ms Wall-clock milliseconds for the command.
404 timestamp ISO-8601 UTC timestamp of command completion.
405 warnings List of non-fatal advisory messages.
406
407 Exit codes::
408
409 0 Success.
410 1 User error (unknown kind, bad ref, ambiguous file path).
411 """
412 elapsed = start_timer()
413 ref: str | None = args.ref
414 kind_filter: str | None = args.kind_filter
415 file_filter: str | None = args.file_filter
416 language_filter: str | None = args.language_filter
417 count_only: bool = args.count_only
418 show_hashes: bool = args.show_hashes
419 json_out: bool = args.json_out
420 tty: bool = sys.stdout.isatty()
421
422 # ── Input validation ──────────────────────────────────────────────────────
423
424 if kind_filter is not None and kind_filter not in _VALID_KINDS:
425 valid = ", ".join(sorted(_VALID_KINDS))
426 print(f"❌ Unknown kind '{kind_filter}'. Valid kinds: {valid}", file=sys.stderr)
427 raise SystemExit(ExitCode.USER_ERROR)
428
429 if language_filter is not None:
430 language_filter = _normalise_language(language_filter)
431
432 # ── Repo / commit resolution ──────────────────────────────────────────────
433
434 root = require_repo()
435 branch = read_current_branch(root)
436
437 commit = resolve_commit_ref(root, branch, ref)
438 if commit is None:
439 label = ref or "HEAD"
440 print(f"❌ Commit '{label}' not found.", file=sys.stderr)
441 raise SystemExit(ExitCode.USER_ERROR)
442
443 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
444 if not manifest:
445 print(
446 f"❌ Snapshot for commit {commit.commit_id} has no files.",
447 file=sys.stderr,
448 )
449 raise SystemExit(ExitCode.USER_ERROR)
450
451 # ── Working-tree vs object-store mode / file-filter resolution ───────────
452
453 working_tree = ref is None # True when no --commit was given
454 workdir = root if working_tree else None
455
456 resolved_file_filter = file_filter
457 if file_filter is not None:
458 found = _resolve_file_filter(file_filter, manifest)
459 if found is not None:
460 resolved_file_filter = found
461 elif working_tree:
462 # File not in HEAD manifest — may be a new uncommitted file.
463 # Inject a synthetic manifest entry so symbols_for_snapshot can
464 # parse it directly from the working directory.
465 candidate = root / file_filter
466 if candidate.is_file():
467 manifest = {file_filter: ""}
468 resolved_file_filter = file_filter
469
470 # ── Symbol extraction ─────────────────────────────────────────────────────
471
472 # Load the stat cache when reading from the working tree so that
473 # symbols_for_snapshot can derive SHA-256 keys from (ino, mtime, size)
474 # without reading file bytes on warm SymbolCache hits.
475 stat_cache = load_stat_cache(root) if working_tree else None
476
477 symbol_map = symbols_for_snapshot(
478 root,
479 manifest,
480 kind_filter=kind_filter,
481 file_filter=resolved_file_filter,
482 language_filter=language_filter,
483 workdir=workdir,
484 stat_cache=stat_cache,
485 )
486
487 # ── Source reference label ────────────────────────────────────────────────
488
489 if working_tree:
490 source_ref = "working-tree"
491 else:
492 source_ref = commit.commit_id
493
494 # ── Output ────────────────────────────────────────────────────────────────
495
496 if count_only:
497 total = sum(len(t) for t in symbol_map.values())
498 counts = _lang_counts(symbol_map)
499 lang_str = ", ".join(f"{lang}: {count:,}" for lang, count in sorted(counts.items()))
500 sym_word = "symbol" if total == 1 else "symbols"
501 print(f"{total:,} {sym_word} ({lang_str})")
502 return
503
504 if json_out:
505 _emit_json(symbol_map, source_ref=source_ref, working_tree=working_tree, elapsed=elapsed)
506 return
507
508 if working_tree:
509 header = (
510 f'working tree '
511 f'(HEAD {commit.commit_id} "{sanitize_display(commit.message)}")'
512 )
513 else:
514 header = f'commit {commit.commit_id} "{sanitize_display(commit.message)}"'
515
516 print(_c(header, _DIM, tty=tty))
517 _print_human(symbol_map, show_hashes, tty)
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 8 days ago