symbols.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
8 days ago
| 1 | """muse code symbols -- list every semantic symbol in a snapshot. |
| 2 | |
| 3 | Muse tracks the semantic interior of every source file -- the full symbol graph |
| 4 | the code plugin builds at commit time -- giving each function, class, method, |
| 5 | and variable a stable, content-addressed identity independent of line numbers |
| 6 | or formatting. |
| 7 | |
| 8 | Output (default -- human-readable table):: |
| 9 | |
| 10 | src/utils.py |
| 11 | fn calculate_total line 12 |
| 12 | fn _validate_amount line 28 |
| 13 | class Invoice line 45 |
| 14 | method Invoice.to_dict line 52 |
| 15 | method Invoice.from_dict line 61 |
| 16 | |
| 17 | src/models.py |
| 18 | class User line 8 |
| 19 | method User.__init__ line 10 |
| 20 | method User.save line 19 |
| 21 | |
| 22 | 12 symbols across 2 files (Python: 12) |
| 23 | |
| 24 | Flags:: |
| 25 | |
| 26 | --commit <ref> |
| 27 | Inspect a specific commit instead of the working tree. |
| 28 | Accepts a full or abbreviated commit SHA, a branch name, or HEAD~N. |
| 29 | |
| 30 | --kind <kind> |
| 31 | Filter to a specific symbol kind: |
| 32 | function, async_function, class, method, async_method, |
| 33 | variable, import, section, rule. |
| 34 | |
| 35 | --file <path> |
| 36 | Show symbols from a single file. Accepts an exact path or a |
| 37 | unique suffix (e.g. "billing.py" matches "src/billing.py"). |
| 38 | |
| 39 | --language <lang> |
| 40 | Show symbols from files of this language only (case-insensitive, |
| 41 | e.g. "python", "TypeScript", "go"). |
| 42 | |
| 43 | --count |
| 44 | Print only the total symbol count and per-language breakdown. |
| 45 | |
| 46 | --hashes |
| 47 | Include content hashes alongside each symbol. |
| 48 | |
| 49 | --json |
| 50 | Emit a structured JSON object for tooling integration:: |
| 51 | |
| 52 | { |
| 53 | "source_ref": "a1b2c3d4", |
| 54 | "working_tree": true, |
| 55 | "total_symbols": 12, |
| 56 | "results": [ |
| 57 | {"address": "src/utils.py::calculate_total", |
| 58 | "path": "src/utils.py", "kind": "function", ...} |
| 59 | ] |
| 60 | } |
| 61 | """ |
| 62 | |
| 63 | import argparse |
| 64 | import json |
| 65 | import logging |
| 66 | import pathlib |
| 67 | import sys |
| 68 | from collections.abc import Callable |
| 69 | from typing import TypedDict |
| 70 | |
| 71 | from muse.core.types import short_id |
| 72 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 73 | from muse.core.errors import ExitCode |
| 74 | from muse.core.repo import require_repo |
| 75 | from muse.core.stat_cache import load_cache as load_stat_cache |
| 76 | from muse.core.types import Manifest |
| 77 | from muse.core.refs import read_current_branch |
| 78 | from muse.core.commits import resolve_commit_ref |
| 79 | from muse.core.snapshots import get_commit_snapshot_manifest |
| 80 | from muse.core.timing import start_timer |
| 81 | from muse.plugins.code._query import language_of, symbols_for_snapshot |
| 82 | from muse.plugins.code.ast_parser import SymbolTree |
| 83 | |
| 84 | class _SymbolEntry(TypedDict): |
| 85 | address: str |
| 86 | kind: str |
| 87 | name: str |
| 88 | qualified_name: str |
| 89 | path: str |
| 90 | lineno: int |
| 91 | language: str |
| 92 | content_id: str |
| 93 | |
| 94 | class _SymbolsJson(EnvelopeJson): |
| 95 | source_ref: str |
| 96 | working_tree: bool |
| 97 | total_symbols: int |
| 98 | results: list[_SymbolEntry] |
| 99 | |
| 100 | type _SymbolTreeMap = dict[str, SymbolTree] |
| 101 | type _CounterMap = dict[str, int] |
| 102 | type _KindDisplay = dict[str, tuple[str, list[str]]] |
| 103 | |
| 104 | logger = logging.getLogger(__name__) |
| 105 | |
| 106 | # --------------------------------------------------------------------------- |
| 107 | # ANSI colour helpers — only emitted when stdout is a TTY. |
| 108 | # --------------------------------------------------------------------------- |
| 109 | |
| 110 | _RESET = "\033[0m" |
| 111 | _BOLD = "\033[1m" |
| 112 | _DIM = "\033[2m" |
| 113 | _CYAN = "\033[36m" |
| 114 | _YELLOW = "\033[33m" |
| 115 | _BLUE = "\033[34m" |
| 116 | _GREEN = "\033[32m" |
| 117 | _MAGENTA = "\033[35m" |
| 118 | |
| 119 | def _c(text: str, *codes: str, tty: bool) -> str: |
| 120 | """Wrap *text* in ANSI *codes* when *tty* is True.""" |
| 121 | if not tty: |
| 122 | return text |
| 123 | return "".join(codes) + text + _RESET |
| 124 | |
| 125 | # Maps symbol kind → (short icon, ANSI colour codes). |
| 126 | _KIND_DISPLAY: _KindDisplay = { |
| 127 | "function": ("fn", [_BLUE]), |
| 128 | "async_function": ("fn~", [_BLUE, _DIM]), |
| 129 | "class": ("class", [_YELLOW, _BOLD]), |
| 130 | "method": ("method", [_CYAN]), |
| 131 | "async_method": ("method~", [_CYAN, _DIM]), |
| 132 | "variable": ("var", [_DIM]), |
| 133 | "import": ("import", [_DIM]), |
| 134 | "section": ("section", [_GREEN]), |
| 135 | "rule": ("rule", [_MAGENTA]), |
| 136 | } |
| 137 | |
| 138 | _VALID_KINDS: frozenset[str] = frozenset(_KIND_DISPLAY) |
| 139 | |
| 140 | # --------------------------------------------------------------------------- |
| 141 | # Language helpers |
| 142 | # --------------------------------------------------------------------------- |
| 143 | |
| 144 | # Canonical map: lowercase language name → display name. |
| 145 | # Built from _SUFFIX_LANG in _query.py to stay in sync. |
| 146 | from muse.plugins.code._query import _SUFFIX_LANG # noqa: E402 (module-level import) |
| 147 | from muse.core.validation import sanitize_display |
| 148 | |
| 149 | _LANG_CANONICAL: Manifest = {lang.lower(): lang for lang in set(_SUFFIX_LANG.values())} |
| 150 | |
| 151 | def _normalise_language(lang: str) -> str: |
| 152 | """Return the canonical capitalisation for *lang*, or *lang* unchanged.""" |
| 153 | return _LANG_CANONICAL.get(lang.strip().lower(), lang.strip()) |
| 154 | |
| 155 | # --------------------------------------------------------------------------- |
| 156 | # File-filter helpers |
| 157 | # --------------------------------------------------------------------------- |
| 158 | |
| 159 | def _file_matches(file_path: str, file_filter: str) -> bool: |
| 160 | """Return True if *file_path* equals or uniquely ends with *file_filter*. |
| 161 | |
| 162 | Allows passing ``"billing.py"`` to match ``"src/billing.py"`` without |
| 163 | requiring callers to know the full directory prefix. Uses a separator |
| 164 | anchor (``/``) to prevent ``y.py`` matching ``billy.py``. |
| 165 | """ |
| 166 | if file_path == file_filter: |
| 167 | return True |
| 168 | normalized = file_filter.replace("\\", "/") |
| 169 | return file_path.endswith(f"/{normalized}") |
| 170 | |
| 171 | def _resolve_file_filter( |
| 172 | file_filter: str, |
| 173 | manifest: Manifest, |
| 174 | ) -> str | None: |
| 175 | """Resolve *file_filter* to the exact manifest path, or ``None`` on ambiguity/miss. |
| 176 | |
| 177 | Prints a helpful message to stderr and raises ``SystemExit`` on ambiguity. |
| 178 | Returns ``None`` when there is no match (caller emits "no symbols found"). |
| 179 | """ |
| 180 | matching = [p for p in sorted(manifest) if _file_matches(p, file_filter)] |
| 181 | if len(matching) == 1: |
| 182 | return matching[0] |
| 183 | if len(matching) > 1: |
| 184 | print( |
| 185 | f"❌ '{file_filter}' is ambiguous — matches {len(matching)} files. " |
| 186 | "Use a more specific path:", |
| 187 | file=sys.stderr, |
| 188 | ) |
| 189 | for m in matching[:10]: |
| 190 | print(f" {m}", file=sys.stderr) |
| 191 | if len(matching) > 10: |
| 192 | print(f" … and {len(matching) - 10} more", file=sys.stderr) |
| 193 | raise SystemExit(ExitCode.USER_ERROR) |
| 194 | return None # no match — caller handles the empty result |
| 195 | |
| 196 | # --------------------------------------------------------------------------- |
| 197 | # Repository helpers |
| 198 | # --------------------------------------------------------------------------- |
| 199 | |
| 200 | # --------------------------------------------------------------------------- |
| 201 | # Output helpers |
| 202 | # --------------------------------------------------------------------------- |
| 203 | |
| 204 | def _lang_counts(symbol_map: _SymbolTreeMap) -> _CounterMap: |
| 205 | """Return a language-name → symbol-count mapping for *symbol_map*.""" |
| 206 | counts: _CounterMap = {} |
| 207 | for file_path, tree in symbol_map.items(): |
| 208 | lang = language_of(file_path) |
| 209 | counts[lang] = counts.get(lang, 0) + len(tree) |
| 210 | return counts |
| 211 | |
| 212 | def _print_human( |
| 213 | symbol_map: _SymbolTreeMap, |
| 214 | show_hashes: bool, |
| 215 | tty: bool, |
| 216 | ) -> None: |
| 217 | """Render symbol_map as a human-readable, optionally coloured table.""" |
| 218 | if not symbol_map: |
| 219 | print(" (no semantic symbols found)") |
| 220 | return |
| 221 | |
| 222 | total = 0 |
| 223 | for file_path, tree in symbol_map.items(): |
| 224 | total += len(tree) |
| 225 | print(f"\n{_c(sanitize_display(file_path), _BOLD, tty=tty)}") |
| 226 | for _addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]): |
| 227 | kind = rec["kind"] |
| 228 | icon, colour_codes = _KIND_DISPLAY.get(kind, (kind, [])) |
| 229 | name = rec["qualified_name"] |
| 230 | lineno = rec["lineno"] |
| 231 | icon_str = _c(f"{icon:<10}", *colour_codes, tty=tty) |
| 232 | name_str = f"{name:<40}" |
| 233 | line_str = _c(f"line {lineno:>4}", _DIM, tty=tty) |
| 234 | hash_suffix = ( |
| 235 | _c(f" {short_id(rec['content_id'])}..", _DIM, tty=tty) |
| 236 | if show_hashes |
| 237 | else "" |
| 238 | ) |
| 239 | print(f" {icon_str} {name_str} {line_str}{hash_suffix}") |
| 240 | |
| 241 | counts = _lang_counts(symbol_map) |
| 242 | lang_str = ", ".join(f"{lang}: {count:,}" for lang, count in sorted(counts.items())) |
| 243 | sym_word = "symbol" if total == 1 else "symbols" |
| 244 | file_word = "file" if len(symbol_map) == 1 else "files" |
| 245 | print( |
| 246 | f"\n{_c(f'{total:,}', _BOLD, tty=tty)} {sym_word} across " |
| 247 | f"{len(symbol_map):,} {file_word} ({lang_str})" |
| 248 | ) |
| 249 | |
| 250 | def _emit_json( |
| 251 | symbol_map: _SymbolTreeMap, |
| 252 | source_ref: str, |
| 253 | working_tree: bool, |
| 254 | elapsed: Callable[[], float], |
| 255 | ) -> None: |
| 256 | """Emit the symbol map as a structured JSON object.""" |
| 257 | results = [] |
| 258 | for file_path, tree in symbol_map.items(): |
| 259 | for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]): |
| 260 | results.append({ |
| 261 | "address": addr, |
| 262 | "kind": rec["kind"], |
| 263 | "name": rec["name"], |
| 264 | "qualified_name": rec["qualified_name"], |
| 265 | "path": file_path, |
| 266 | "lineno": rec["lineno"], |
| 267 | "end_lineno": rec["end_lineno"], |
| 268 | "content_id": rec["content_id"], |
| 269 | "body_hash": rec["body_hash"], |
| 270 | "signature_id": rec["signature_id"], |
| 271 | }) |
| 272 | print(json.dumps(_SymbolsJson( |
| 273 | **make_envelope(elapsed), |
| 274 | source_ref=source_ref, |
| 275 | working_tree=working_tree, |
| 276 | total_symbols=len(results), |
| 277 | results=results, |
| 278 | ))) |
| 279 | |
| 280 | # --------------------------------------------------------------------------- |
| 281 | # Argument parser registration |
| 282 | # --------------------------------------------------------------------------- |
| 283 | |
| 284 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 285 | """Register the symbols subcommand. |
| 286 | |
| 287 | Arguments |
| 288 | --------- |
| 289 | --commit / -c REF |
| 290 | Commit ID or branch to inspect (default: working tree). |
| 291 | --kind / -k KIND |
| 292 | Filter to symbols of a specific kind (function, async_function, |
| 293 | class, method, async_method, variable, import, section, rule). |
| 294 | --file / -f PATH |
| 295 | Show symbols from a single file. Accepts an exact path or a |
| 296 | unique path suffix (e.g. ``billing.py`` matches ``src/billing.py``). |
| 297 | --language / -l LANG |
| 298 | Show symbols from files of this language only (case-insensitive). |
| 299 | --hashes |
| 300 | Include content hashes alongside each symbol. |
| 301 | --count |
| 302 | Print only the total symbol count and per-language breakdown. |
| 303 | Mutually exclusive with ``--json`` / ``-j``. |
| 304 | --json / -j |
| 305 | Emit the full symbol table as JSON with schema_version, exit_code, |
| 306 | and duration_ms in the envelope. Mutually exclusive with ``--count``. |
| 307 | """ |
| 308 | parser = subparsers.add_parser( |
| 309 | "symbols", |
| 310 | help="List every semantic symbol (function, class, method…) in a snapshot.", |
| 311 | description=__doc__, |
| 312 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 313 | ) |
| 314 | parser.add_argument( |
| 315 | "--commit", "-c", |
| 316 | dest="ref", |
| 317 | default=None, |
| 318 | metavar="REF", |
| 319 | help="Commit ID or branch to inspect (default: working tree).", |
| 320 | ) |
| 321 | parser.add_argument( |
| 322 | "--kind", "-k", |
| 323 | dest="kind_filter", |
| 324 | default=None, |
| 325 | metavar="KIND", |
| 326 | help=( |
| 327 | "Filter to symbols of a specific kind " |
| 328 | "(function, async_function, class, method, async_method, " |
| 329 | "variable, import, section, rule)." |
| 330 | ), |
| 331 | ) |
| 332 | parser.add_argument( |
| 333 | "--file", "-F", |
| 334 | dest="file_filter", |
| 335 | default=None, |
| 336 | metavar="PATH", |
| 337 | help=( |
| 338 | "Show symbols from a single file. Accepts an exact path or a " |
| 339 | "unique path suffix (e.g. 'billing.py' matches 'src/billing.py')." |
| 340 | ), |
| 341 | ) |
| 342 | parser.add_argument( |
| 343 | "--language", "-l", |
| 344 | dest="language_filter", |
| 345 | default=None, |
| 346 | metavar="LANG", |
| 347 | help="Show symbols from files of this language only (case-insensitive).", |
| 348 | ) |
| 349 | parser.add_argument( |
| 350 | "--hashes", |
| 351 | dest="show_hashes", |
| 352 | action="store_true", |
| 353 | help="Include content hashes in the output.", |
| 354 | ) |
| 355 | |
| 356 | output_group = parser.add_mutually_exclusive_group() |
| 357 | output_group.add_argument( |
| 358 | "--count", |
| 359 | dest="count_only", |
| 360 | action="store_true", |
| 361 | help="Print only the total symbol count and language breakdown.", |
| 362 | ) |
| 363 | output_group.add_argument( |
| 364 | "--json", "-j", |
| 365 | dest="json_out", |
| 366 | action="store_true", |
| 367 | help="Emit the full symbol table as JSON.", |
| 368 | ) |
| 369 | |
| 370 | parser.set_defaults(func=run, json_out=False) |
| 371 | |
| 372 | # --------------------------------------------------------------------------- |
| 373 | # Command entry point |
| 374 | # --------------------------------------------------------------------------- |
| 375 | |
| 376 | def run(args: argparse.Namespace) -> None: |
| 377 | """List every semantic symbol (function, class, method…) in a snapshot. |
| 378 | |
| 379 | Reads the stable, content-addressed symbol graph produced by the domain |
| 380 | plugin's AST analysis — independent of line numbers or formatting. When |
| 381 | ``--commit`` is omitted, reads the working tree (uncommitted edits |
| 382 | included). Use ``--kind``, ``--file``, and ``--language`` to narrow. |
| 383 | |
| 384 | Agent quickstart:: |
| 385 | |
| 386 | muse code symbols --file src/utils.py --json |
| 387 | muse code symbols --kind function --json |
| 388 | muse code symbols --commit HEAD~5 --json |
| 389 | muse code symbols --language Python --count --json |
| 390 | |
| 391 | JSON fields:: |
| 392 | |
| 393 | source_ref ``"working-tree"`` or short commit SHA. |
| 394 | working_tree ``true`` when reading from disk (no ``--commit``). |
| 395 | total_symbols Total symbols in the filtered result set. |
| 396 | results Flat symbol list sorted by file then line. Each entry: |
| 397 | ``address``, ``kind``, ``name``, ``qualified_name``, |
| 398 | ``path``, ``lineno``, ``end_lineno``, ``content_id``, |
| 399 | ``body_hash``, ``signature_id``. |
| 400 | muse_version Muse release that produced this output. |
| 401 | schema Envelope schema version (int). |
| 402 | exit_code ``0`` on success. |
| 403 | duration_ms Wall-clock milliseconds for the command. |
| 404 | timestamp ISO-8601 UTC timestamp of command completion. |
| 405 | warnings List of non-fatal advisory messages. |
| 406 | |
| 407 | Exit codes:: |
| 408 | |
| 409 | 0 Success. |
| 410 | 1 User error (unknown kind, bad ref, ambiguous file path). |
| 411 | """ |
| 412 | elapsed = start_timer() |
| 413 | ref: str | None = args.ref |
| 414 | kind_filter: str | None = args.kind_filter |
| 415 | file_filter: str | None = args.file_filter |
| 416 | language_filter: str | None = args.language_filter |
| 417 | count_only: bool = args.count_only |
| 418 | show_hashes: bool = args.show_hashes |
| 419 | json_out: bool = args.json_out |
| 420 | tty: bool = sys.stdout.isatty() |
| 421 | |
| 422 | # ── Input validation ────────────────────────────────────────────────────── |
| 423 | |
| 424 | if kind_filter is not None and kind_filter not in _VALID_KINDS: |
| 425 | valid = ", ".join(sorted(_VALID_KINDS)) |
| 426 | print(f"❌ Unknown kind '{kind_filter}'. Valid kinds: {valid}", file=sys.stderr) |
| 427 | raise SystemExit(ExitCode.USER_ERROR) |
| 428 | |
| 429 | if language_filter is not None: |
| 430 | language_filter = _normalise_language(language_filter) |
| 431 | |
| 432 | # ── Repo / commit resolution ────────────────────────────────────────────── |
| 433 | |
| 434 | root = require_repo() |
| 435 | branch = read_current_branch(root) |
| 436 | |
| 437 | commit = resolve_commit_ref(root, branch, ref) |
| 438 | if commit is None: |
| 439 | label = ref or "HEAD" |
| 440 | print(f"❌ Commit '{label}' not found.", file=sys.stderr) |
| 441 | raise SystemExit(ExitCode.USER_ERROR) |
| 442 | |
| 443 | manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 444 | if not manifest: |
| 445 | print( |
| 446 | f"❌ Snapshot for commit {commit.commit_id} has no files.", |
| 447 | file=sys.stderr, |
| 448 | ) |
| 449 | raise SystemExit(ExitCode.USER_ERROR) |
| 450 | |
| 451 | # ── Working-tree vs object-store mode / file-filter resolution ─────────── |
| 452 | |
| 453 | working_tree = ref is None # True when no --commit was given |
| 454 | workdir = root if working_tree else None |
| 455 | |
| 456 | resolved_file_filter = file_filter |
| 457 | if file_filter is not None: |
| 458 | found = _resolve_file_filter(file_filter, manifest) |
| 459 | if found is not None: |
| 460 | resolved_file_filter = found |
| 461 | elif working_tree: |
| 462 | # File not in HEAD manifest — may be a new uncommitted file. |
| 463 | # Inject a synthetic manifest entry so symbols_for_snapshot can |
| 464 | # parse it directly from the working directory. |
| 465 | candidate = root / file_filter |
| 466 | if candidate.is_file(): |
| 467 | manifest = {file_filter: ""} |
| 468 | resolved_file_filter = file_filter |
| 469 | |
| 470 | # ── Symbol extraction ───────────────────────────────────────────────────── |
| 471 | |
| 472 | # Load the stat cache when reading from the working tree so that |
| 473 | # symbols_for_snapshot can derive SHA-256 keys from (ino, mtime, size) |
| 474 | # without reading file bytes on warm SymbolCache hits. |
| 475 | stat_cache = load_stat_cache(root) if working_tree else None |
| 476 | |
| 477 | symbol_map = symbols_for_snapshot( |
| 478 | root, |
| 479 | manifest, |
| 480 | kind_filter=kind_filter, |
| 481 | file_filter=resolved_file_filter, |
| 482 | language_filter=language_filter, |
| 483 | workdir=workdir, |
| 484 | stat_cache=stat_cache, |
| 485 | ) |
| 486 | |
| 487 | # ── Source reference label ──────────────────────────────────────────────── |
| 488 | |
| 489 | if working_tree: |
| 490 | source_ref = "working-tree" |
| 491 | else: |
| 492 | source_ref = commit.commit_id |
| 493 | |
| 494 | # ── Output ──────────────────────────────────────────────────────────────── |
| 495 | |
| 496 | if count_only: |
| 497 | total = sum(len(t) for t in symbol_map.values()) |
| 498 | counts = _lang_counts(symbol_map) |
| 499 | lang_str = ", ".join(f"{lang}: {count:,}" for lang, count in sorted(counts.items())) |
| 500 | sym_word = "symbol" if total == 1 else "symbols" |
| 501 | print(f"{total:,} {sym_word} ({lang_str})") |
| 502 | return |
| 503 | |
| 504 | if json_out: |
| 505 | _emit_json(symbol_map, source_ref=source_ref, working_tree=working_tree, elapsed=elapsed) |
| 506 | return |
| 507 | |
| 508 | if working_tree: |
| 509 | header = ( |
| 510 | f'working tree ' |
| 511 | f'(HEAD {commit.commit_id} "{sanitize_display(commit.message)}")' |
| 512 | ) |
| 513 | else: |
| 514 | header = f'commit {commit.commit_id} "{sanitize_display(commit.message)}"' |
| 515 | |
| 516 | print(_c(header, _DIM, tty=tty)) |
| 517 | _print_human(symbol_map, show_hashes, tty) |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
8 days ago