query.py
python
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠ breaking
28 days ago
| 1 | """muse code query — symbol graph predicate query (v2). |
| 2 | |
| 3 | SQL for your codebase. A full predicate DSL over the typed, content-addressed |
| 4 | symbol graph — with OR, NOT, grouping, and an expanded field set. |
| 5 | |
| 6 | v2 grammar:: |
| 7 | |
| 8 | expr = or_expr |
| 9 | or_expr = and_expr ( OR and_expr )* |
| 10 | and_expr = not_expr ( [AND] not_expr )* # implicit AND |
| 11 | not_expr = NOT primary | primary |
| 12 | primary = "(" expr ")" | atom |
| 13 | atom = KEY OP VALUE |
| 14 | |
| 15 | Supported operators:: |
| 16 | |
| 17 | = exact match |
| 18 | ~= contains (case-insensitive) |
| 19 | ^= starts with (case-insensitive) |
| 20 | $= ends with (case-insensitive) |
| 21 | != not equal |
| 22 | |
| 23 | Supported keys:: |
| 24 | |
| 25 | kind function | class | method | variable | import | … |
| 26 | language Python | Go | Rust | TypeScript | … |
| 27 | name bare symbol name |
| 28 | qualified_name dotted name (User.save) |
| 29 | file file path |
| 30 | hash content_id prefix (exact-body match) |
| 31 | body_hash body_hash prefix |
| 32 | signature_id signature_id prefix |
| 33 | lineno_gt symbol starts after line N |
| 34 | lineno_lt symbol starts before line N |
| 35 | size_gt symbol body exceeds N lines (end_lineno − lineno > N) |
| 36 | size_lt symbol body shorter than N lines |
| 37 | |
| 38 | Usage:: |
| 39 | |
| 40 | muse code query "kind=function" "language=Python" "name~=validate" |
| 41 | muse code query "(kind=function OR kind=method) name^=_" |
| 42 | muse code query "NOT kind=import" "file~=billing" |
| 43 | muse code query "hash=a3f2c9" |
| 44 | muse code query "kind=function" "name$=_test" --commit HEAD~10 |
| 45 | muse code query "kind=function" "name~=validate" --all-commits |
| 46 | muse code query "kind=function" "size_gt=50" --sort size # biggest fns |
| 47 | muse code query "kind=function" "name~=compute" --count # just the count |
| 48 | muse code query "kind=function" --unique-bodies # find clones |
| 49 | muse code query "kind=function" --all-commits --since 2026-01-01 # added this year |
| 50 | """ |
| 51 | |
| 52 | import argparse |
| 53 | import collections.abc |
| 54 | import datetime |
| 55 | import json |
| 56 | import logging |
| 57 | import pathlib |
| 58 | import sys |
| 59 | from typing import TypedDict |
| 60 | |
| 61 | from muse.core.types import short_id |
| 62 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 63 | from muse.core.errors import ExitCode |
| 64 | from muse.core.repo import parse_date_arg, require_repo |
| 65 | from muse.core.store import ( |
| 66 | CommitRecord, |
| 67 | get_all_commits, |
| 68 | get_commit_snapshot_manifest, |
| 69 | read_current_branch, |
| 70 | resolve_commit_ref, |
| 71 | ) |
| 72 | from muse.core.symbol_cache import SymbolCache, load_symbol_cache |
| 73 | from muse.core.timing import start_timer |
| 74 | from muse.plugins.code._predicate import Predicate, PredicateError, parse_query |
| 75 | from muse.plugins.code._query import language_of, symbols_for_snapshot |
| 76 | from muse.plugins.code.ast_parser import SymbolRecord |
| 77 | from muse.core.validation import clamp_int, sanitize_display |
| 78 | |
| 79 | type _QueryResult = dict[str, str | int | bool] |
| 80 | |
| 81 | class _QueryJson(EnvelopeJson): |
| 82 | """Formal schema for the ``muse code query --json`` output envelope (single-snapshot mode). |
| 83 | |
| 84 | All fields are always present. |
| 85 | |
| 86 | Fields |
| 87 | ------ |
| 88 | commit: Short commit ID of the snapshot that was queried. |
| 89 | sort: Sort field used (``file``, ``name``, ``kind``, ``lineno``, ``size``). |
| 90 | unique_bodies: Whether ``--unique-bodies`` was active. |
| 91 | truncated: ``true`` when ``--limit`` was applied and results were cut. |
| 92 | results: List of matching symbol records. |
| 93 | """ |
| 94 | |
| 95 | commit: str |
| 96 | sort: str |
| 97 | unique_bodies: bool |
| 98 | truncated: bool |
| 99 | results: list[_QueryResult] |
| 100 | |
| 101 | class _AllCommitsJson(EnvelopeJson): |
| 102 | """Formal schema for the ``muse code query --all-commits --json`` output envelope. |
| 103 | |
| 104 | All fields are always present. |
| 105 | |
| 106 | Fields |
| 107 | ------ |
| 108 | mode: Always ``"all-commits"`` in this mode. |
| 109 | truncated: ``true`` when ``--max-commits`` was hit. |
| 110 | results: List of historical symbol match records. |
| 111 | """ |
| 112 | |
| 113 | mode: str |
| 114 | truncated: bool |
| 115 | results: list[_QueryResult] |
| 116 | |
| 117 | type _StrMap = dict[str, str] |
| 118 | type _IconMap = dict[str, str] |
| 119 | logger = logging.getLogger(__name__) |
| 120 | |
| 121 | _KIND_ICON: _IconMap = { |
| 122 | "function": "fn", |
| 123 | "async_function": "fn~", |
| 124 | "class": "class", |
| 125 | "method": "method", |
| 126 | "async_method": "method~", |
| 127 | "variable": "var", |
| 128 | "import": "import", |
| 129 | } |
| 130 | |
| 131 | _VALID_SORT_FIELDS = frozenset({"file", "name", "kind", "lineno", "size"}) |
| 132 | |
| 133 | class _HistoricalMatch: |
| 134 | """A symbol match found in a historical commit (--all-commits mode).""" |
| 135 | |
| 136 | def __init__( |
| 137 | self, |
| 138 | address: str, |
| 139 | rec: SymbolRecord, |
| 140 | commit: CommitRecord, |
| 141 | first_seen: bool, |
| 142 | ) -> None: |
| 143 | self.address = address |
| 144 | self.rec = rec |
| 145 | self.commit = commit |
| 146 | self.first_seen = first_seen |
| 147 | |
| 148 | def to_dict(self) -> _QueryResult: |
| 149 | return { |
| 150 | "address": self.address, |
| 151 | "kind": self.rec["kind"], |
| 152 | "name": self.rec["name"], |
| 153 | "content_id": self.rec["content_id"], |
| 154 | "first_seen": self.first_seen, |
| 155 | "commit_id": self.commit.commit_id, |
| 156 | "commit_message": self.commit.message, |
| 157 | "committed_at": self.commit.committed_at.isoformat(), |
| 158 | "branch": self.commit.branch, |
| 159 | } |
| 160 | |
| 161 | def _query_all_commits( |
| 162 | root: pathlib.Path, |
| 163 | filters: list[Predicate], |
| 164 | max_commits: int, |
| 165 | since: datetime.date | None, |
| 166 | until: datetime.date | None, |
| 167 | ) -> tuple[list[_HistoricalMatch], bool]: |
| 168 | """Walk every commit oldest-first, apply predicates against each snapshot. |
| 169 | |
| 170 | Shares one ``SymbolCache`` instance across all snapshot loads so the cache |
| 171 | is read from disk exactly once and written back at most once — instead of |
| 172 | once per snapshot. On a warm cache this reduces wall time from O(n×200ms) |
| 173 | to O(1×load + n×dict_lookup). |
| 174 | |
| 175 | Deduplicates on ``snapshot_id`` — commits sharing a snapshot (e.g. merge |
| 176 | commits with no file changes) are processed exactly once. |
| 177 | |
| 178 | Returns: |
| 179 | ``(matches, truncated)`` — ``truncated`` is True when the walk was |
| 180 | capped at ``max_commits``. |
| 181 | """ |
| 182 | all_commits = get_all_commits(root) |
| 183 | if not all_commits: |
| 184 | return [], False |
| 185 | |
| 186 | sorted_commits = sorted(all_commits, key=lambda c: c.committed_at) |
| 187 | |
| 188 | # Apply date filters early to avoid unnecessary snapshot loading. |
| 189 | if since is not None: |
| 190 | sorted_commits = [ |
| 191 | c for c in sorted_commits if c.committed_at.date() >= since |
| 192 | ] |
| 193 | if until is not None: |
| 194 | sorted_commits = [ |
| 195 | c for c in sorted_commits if c.committed_at.date() <= until |
| 196 | ] |
| 197 | |
| 198 | truncated = len(sorted_commits) > max_commits |
| 199 | sorted_commits = sorted_commits[:max_commits] |
| 200 | |
| 201 | results: list[_HistoricalMatch] = [] |
| 202 | first_seen_map: _StrMap = {} |
| 203 | seen_snapshots: set[str] = set() |
| 204 | |
| 205 | # Load the symbol cache once; share it across all snapshot iterations. |
| 206 | shared_cache: SymbolCache = load_symbol_cache(root) |
| 207 | |
| 208 | try: |
| 209 | for commit in sorted_commits: |
| 210 | # Skip commits whose snapshot was already processed. |
| 211 | if commit.snapshot_id in seen_snapshots: |
| 212 | continue |
| 213 | seen_snapshots.add(commit.snapshot_id) |
| 214 | |
| 215 | manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 216 | if not manifest: |
| 217 | continue |
| 218 | |
| 219 | symbol_map = symbols_for_snapshot(root, manifest, cache=shared_cache) |
| 220 | for file_path, tree in sorted(symbol_map.items()): |
| 221 | for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]): |
| 222 | if not all(f(file_path, rec) for f in filters): |
| 223 | continue |
| 224 | cid = rec["content_id"] |
| 225 | is_first = cid not in first_seen_map |
| 226 | if is_first: |
| 227 | first_seen_map[cid] = commit.commit_id |
| 228 | results.append(_HistoricalMatch(addr, rec, commit, is_first)) |
| 229 | finally: |
| 230 | # Persist any newly parsed entries even if we exit early. |
| 231 | shared_cache.save() |
| 232 | |
| 233 | return results, truncated |
| 234 | |
| 235 | _SortTuple = tuple[str, str, SymbolRecord] |
| 236 | |
| 237 | def _sort_key(sort_by: str) -> collections.abc.Callable[[_SortTuple], tuple[str | int, ...]]: |
| 238 | """Return a sort key function for a list of ``(file_path, addr, rec)`` tuples.""" |
| 239 | if sort_by == "name": |
| 240 | return lambda t: (t[2]["name"].lower(), t[0], t[2]["lineno"]) |
| 241 | if sort_by == "kind": |
| 242 | return lambda t: (t[2]["kind"], t[0], t[2]["lineno"]) |
| 243 | if sort_by == "lineno": |
| 244 | return lambda t: (t[2]["lineno"], t[0]) |
| 245 | if sort_by == "size": |
| 246 | # Negate size so largest comes first. |
| 247 | return lambda t: (-(t[2]["end_lineno"] - t[2]["lineno"]), t[0]) |
| 248 | # Default: file then lineno. |
| 249 | return lambda t: (t[0], t[2]["lineno"]) |
| 250 | |
| 251 | def register( |
| 252 | subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]", |
| 253 | ) -> None: |
| 254 | """Register ``query`` as a subcommand of ``muse code``. |
| 255 | |
| 256 | Adds the following arguments: |
| 257 | |
| 258 | - ``PREDICATE`` (positional, one or more) — predicate expressions in the v2 DSL. |
| 259 | - ``--commit`` / ``-c`` REF — query a historical snapshot instead of HEAD. |
| 260 | - ``--all-commits`` — search across every commit on every branch. |
| 261 | - ``--since`` / ``--until`` YYYY-MM-DD — date range for ``--all-commits``. |
| 262 | - ``--max-commits`` N — cap for ``--all-commits`` walk (default 10 000). |
| 263 | - ``--limit`` N — cap total result count (0 = unlimited). |
| 264 | - ``--sort`` FIELD — sort by file, name, kind, lineno, or size. |
| 265 | - ``--count`` — print only the result count. |
| 266 | - ``--unique-bodies`` — deduplicate by content_id (clone-detector mode). |
| 267 | - ``--hashes`` — include content hashes in human-readable output. |
| 268 | - ``--committed`` — query the last committed snapshot only; skip working-tree overlay. |
| 269 | - ``--json`` / ``-j`` — emit structured JSON (``_QueryJson`` or ``_AllCommitsJson``). |
| 270 | """ |
| 271 | parser = subparsers.add_parser( |
| 272 | "query", |
| 273 | help="Query the symbol graph with a predicate DSL.", |
| 274 | description=__doc__, |
| 275 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 276 | ) |
| 277 | parser.add_argument( |
| 278 | "predicates", |
| 279 | nargs="*", |
| 280 | metavar="PREDICATE", |
| 281 | help='One or more predicates, e.g. "kind=function" "name~=validate".', |
| 282 | ) |
| 283 | parser.add_argument( |
| 284 | "--commit", "-c", |
| 285 | dest="ref", |
| 286 | default=None, |
| 287 | metavar="REF", |
| 288 | help="Query a historical snapshot instead of HEAD.", |
| 289 | ) |
| 290 | parser.add_argument( |
| 291 | "--all-commits", |
| 292 | action="store_true", |
| 293 | help=( |
| 294 | "Search across ALL commits (every branch). Enables temporal" |
| 295 | " hash= queries: find when a function body first appeared." |
| 296 | " Mutually exclusive with --commit." |
| 297 | ), |
| 298 | ) |
| 299 | parser.add_argument( |
| 300 | "--since", |
| 301 | metavar="YYYY-MM-DD", |
| 302 | default=None, |
| 303 | help=( |
| 304 | "With --all-commits: only consider commits on or after this date." |
| 305 | ), |
| 306 | ) |
| 307 | parser.add_argument( |
| 308 | "--until", |
| 309 | metavar="YYYY-MM-DD", |
| 310 | default=None, |
| 311 | help=( |
| 312 | "With --all-commits: only consider commits on or before this date." |
| 313 | ), |
| 314 | ) |
| 315 | parser.add_argument( |
| 316 | "--max-commits", |
| 317 | type=int, |
| 318 | default=10_000, |
| 319 | metavar="N", |
| 320 | help=( |
| 321 | "With --all-commits: cap the number of commits walked" |
| 322 | " (default: 10000)." |
| 323 | ), |
| 324 | ) |
| 325 | parser.add_argument( |
| 326 | "--limit", |
| 327 | type=int, |
| 328 | default=0, |
| 329 | metavar="N", |
| 330 | help="Cap the number of results returned (0 = unlimited).", |
| 331 | ) |
| 332 | parser.add_argument( |
| 333 | "--sort", |
| 334 | default="file", |
| 335 | metavar="FIELD", |
| 336 | choices=sorted(_VALID_SORT_FIELDS), |
| 337 | help=( |
| 338 | f"Sort results by field: {', '.join(sorted(_VALID_SORT_FIELDS))}" |
| 339 | " (default: file)." |
| 340 | ), |
| 341 | ) |
| 342 | parser.add_argument( |
| 343 | "--count", |
| 344 | action="store_true", |
| 345 | help="Print only the count of matching symbols — no symbol list.", |
| 346 | ) |
| 347 | parser.add_argument( |
| 348 | "--unique-bodies", |
| 349 | action="store_true", |
| 350 | help=( |
| 351 | "Deduplicate by content_id — show only unique implementations." |
| 352 | " Turns muse query into a clone detector." |
| 353 | ), |
| 354 | ) |
| 355 | parser.add_argument( |
| 356 | "--hashes", |
| 357 | dest="show_hashes", |
| 358 | action="store_true", |
| 359 | help="Include content hashes in output.", |
| 360 | ) |
| 361 | parser.add_argument( |
| 362 | "--committed", |
| 363 | action="store_true", |
| 364 | help=( |
| 365 | "Query the last committed snapshot only — do not overlay working-tree changes." |
| 366 | " By default, files on disk take precedence over the committed snapshot." |
| 367 | ), |
| 368 | ) |
| 369 | parser.add_argument( |
| 370 | "--json", "-j", |
| 371 | dest="json_out", |
| 372 | action="store_true", |
| 373 | help="Emit results as JSON for agent consumption (see _QueryJson / _AllCommitsJson schema).", |
| 374 | ) |
| 375 | parser.set_defaults(func=run) |
| 376 | |
| 377 | def run(args: argparse.Namespace) -> None: |
| 378 | """Query the symbol graph with a predicate DSL. |
| 379 | |
| 380 | ``muse query`` is SQL for your codebase. Every predicate is evaluated |
| 381 | against the typed, content-addressed symbol graph — not raw text. |
| 382 | |
| 383 | By default, working-tree files on disk take precedence over the committed |
| 384 | snapshot — so uncommitted edits are immediately visible without staging or |
| 385 | committing. Pass ``--committed`` to query the pure committed snapshot, or |
| 386 | ``--commit REF`` to query a specific historical snapshot (always committed). |
| 387 | |
| 388 | New in v2.1: |
| 389 | ``size_gt=N`` / ``size_lt=N`` — filter by symbol body line count. |
| 390 | ``--count`` — emit only the result count. |
| 391 | ``--limit N`` — cap results (like SQL LIMIT). |
| 392 | ``--sort FIELD`` — sort by file, name, kind, lineno, or size. |
| 393 | ``--unique-bodies`` — deduplicate by content_id (clone detector mode). |
| 394 | ``--since / --until YYYY-MM-DD`` — temporal range for --all-commits. |
| 395 | ``--committed`` — skip working-tree overlay; query committed snapshot only. |
| 396 | |
| 397 | JSON envelope (``--json`` / ``-j``) |
| 398 | ------------------------------------ |
| 399 | Single-snapshot mode emits ``_QueryJson``: |
| 400 | |
| 401 | - ``schema_version`` — Muse version string |
| 402 | - ``commit`` — short commit ID queried |
| 403 | - ``sort`` — sort field in effect |
| 404 | - ``unique_bodies`` — whether ``--unique-bodies`` was active |
| 405 | - ``truncated`` — ``true`` when ``--limit`` cut the result set |
| 406 | - ``results`` — list of matching symbol records |
| 407 | - ``exit_code`` — always ``0`` on this path |
| 408 | - ``duration_ms`` — wall-clock time for the command |
| 409 | |
| 410 | ``--all-commits`` mode emits ``_AllCommitsJson``: |
| 411 | |
| 412 | - ``schema_version`` — Muse version string |
| 413 | - ``mode`` — always ``"all-commits"`` |
| 414 | - ``truncated`` — ``true`` when ``--max-commits`` was hit |
| 415 | - ``results`` — list of historical match records |
| 416 | - ``exit_code`` — always ``0`` on this path |
| 417 | - ``duration_ms`` — wall-clock time for the command |
| 418 | """ |
| 419 | elapsed = start_timer() |
| 420 | |
| 421 | predicates: list[str] = args.predicates |
| 422 | ref: str | None = args.ref |
| 423 | all_commits: bool = args.all_commits |
| 424 | committed_only: bool = args.committed |
| 425 | show_hashes: bool = args.show_hashes |
| 426 | json_out: bool = args.json_out |
| 427 | count_only: bool = args.count |
| 428 | limit: int = clamp_int(args.limit, 0, 10000, 'limit') |
| 429 | sort_by: str = args.sort |
| 430 | unique_bodies: bool = args.unique_bodies |
| 431 | max_commits: int = clamp_int(args.max_commits, 1, 100000, 'max_commits') |
| 432 | |
| 433 | root = require_repo() |
| 434 | branch = read_current_branch(root) |
| 435 | |
| 436 | if not predicates: |
| 437 | print("❌ At least one predicate is required.", file=sys.stderr) |
| 438 | raise SystemExit(ExitCode.USER_ERROR) |
| 439 | |
| 440 | if all_commits and ref is not None: |
| 441 | print( |
| 442 | "❌ --all-commits and --commit are mutually exclusive.", |
| 443 | file=sys.stderr, |
| 444 | ) |
| 445 | raise SystemExit(ExitCode.USER_ERROR) |
| 446 | |
| 447 | if limit < 0: |
| 448 | print("❌ --limit must be >= 0.", file=sys.stderr) |
| 449 | raise SystemExit(ExitCode.USER_ERROR) |
| 450 | |
| 451 | if max_commits < 1: |
| 452 | print("❌ --max-commits must be >= 1.", file=sys.stderr) |
| 453 | raise SystemExit(ExitCode.USER_ERROR) |
| 454 | |
| 455 | # Parse --since / --until date filters. |
| 456 | since_date: datetime.date | None = ( |
| 457 | parse_date_arg(args.since, "--since").date() if args.since else None |
| 458 | ) |
| 459 | until_date: datetime.date | None = ( |
| 460 | parse_date_arg(args.until, "--until").date() if args.until else None |
| 461 | ) |
| 462 | |
| 463 | if (since_date or until_date) and not all_commits: |
| 464 | print( |
| 465 | "❌ --since / --until require --all-commits.", file=sys.stderr |
| 466 | ) |
| 467 | raise SystemExit(ExitCode.USER_ERROR) |
| 468 | |
| 469 | # Parse predicates via the v2 grammar. |
| 470 | try: |
| 471 | combined_predicate: Predicate = parse_query(predicates) |
| 472 | except PredicateError as exc: |
| 473 | print(f"❌ {exc}", file=sys.stderr) |
| 474 | raise SystemExit(ExitCode.USER_ERROR) |
| 475 | filters: list[Predicate] = [combined_predicate] |
| 476 | |
| 477 | # ── --all-commits mode ──────────────────────────────────────────────────── |
| 478 | if all_commits: |
| 479 | historical, truncated = _query_all_commits( |
| 480 | root, filters, max_commits, since_date, until_date |
| 481 | ) |
| 482 | |
| 483 | if json_out: |
| 484 | print(json.dumps(_AllCommitsJson( |
| 485 | **make_envelope(elapsed), |
| 486 | mode="all-commits", |
| 487 | truncated=truncated, |
| 488 | results=[h.to_dict() for h in historical], |
| 489 | ))) |
| 490 | return |
| 491 | |
| 492 | if not historical: |
| 493 | pred_display = " AND ".join(sanitize_display(p) for p in predicates) |
| 494 | print( |
| 495 | f" (no symbols matching: {pred_display}" |
| 496 | f" [searched all commits])" |
| 497 | ) |
| 498 | return |
| 499 | |
| 500 | # Deduplicate for display: show unique addresses with first-seen commit. |
| 501 | seen_addrs: set[str] = set() |
| 502 | unique: list[_HistoricalMatch] = [] |
| 503 | for h in historical: |
| 504 | if h.first_seen and h.address not in seen_addrs: |
| 505 | seen_addrs.add(h.address) |
| 506 | unique.append(h) |
| 507 | |
| 508 | if limit > 0: |
| 509 | unique = unique[:limit] |
| 510 | |
| 511 | if count_only: |
| 512 | print(len(unique)) |
| 513 | return |
| 514 | |
| 515 | pred_display = " AND ".join(sanitize_display(p) for p in predicates) |
| 516 | trunc_note = " ⚠️ truncated" if truncated else "" |
| 517 | print( |
| 518 | f"\n{len(unique)} unique symbol(s) matching" |
| 519 | f" [{pred_display}] across all commits{trunc_note}\n" |
| 520 | ) |
| 521 | for h in unique: |
| 522 | date_str = h.commit.committed_at.strftime("%Y-%m-%d") |
| 523 | cid = short_id(h.commit.commit_id) |
| 524 | icon = _KIND_ICON.get(h.rec["kind"], h.rec["kind"]) |
| 525 | hash_part = f" {short_id(h.rec['content_id'])}.." if show_hashes else "" |
| 526 | branch_label = ( |
| 527 | f" [{h.commit.branch}]" if h.commit.branch else "" |
| 528 | ) |
| 529 | print( |
| 530 | f" {h.address:<60} {icon:<8}" |
| 531 | f" first seen {cid} {date_str}" |
| 532 | f"{branch_label}{hash_part}" |
| 533 | ) |
| 534 | return |
| 535 | |
| 536 | # ── Single-snapshot mode (default) ──────────────────────────────────────── |
| 537 | commit = resolve_commit_ref(root, branch, ref) |
| 538 | if commit is None: |
| 539 | print( |
| 540 | f"❌ Commit '{ref or 'HEAD'}' not found.", file=sys.stderr |
| 541 | ) |
| 542 | raise SystemExit(ExitCode.USER_ERROR) |
| 543 | |
| 544 | manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 545 | # Default: overlay working-tree files so uncommitted edits are visible. |
| 546 | # --committed pins to the snapshot without reading disk. |
| 547 | workdir = None if committed_only or ref is not None else root |
| 548 | symbol_map = symbols_for_snapshot(root, manifest, workdir=workdir) |
| 549 | |
| 550 | # Collect matches. |
| 551 | matches: list[tuple[str, str, SymbolRecord]] = [] |
| 552 | for file_path, tree in symbol_map.items(): |
| 553 | for addr, rec in tree.items(): |
| 554 | if all(f(file_path, rec) for f in filters): |
| 555 | matches.append((file_path, addr, rec)) |
| 556 | |
| 557 | # Sort. |
| 558 | matches.sort(key=_sort_key(sort_by)) |
| 559 | |
| 560 | # Unique-bodies: deduplicate by content_id. |
| 561 | if unique_bodies: |
| 562 | seen_cids: set[str] = set() |
| 563 | deduped: list[tuple[str, str, SymbolRecord]] = [] |
| 564 | for fp, addr, rec in matches: |
| 565 | cid = rec["content_id"] |
| 566 | if cid not in seen_cids: |
| 567 | seen_cids.add(cid) |
| 568 | deduped.append((fp, addr, rec)) |
| 569 | matches = deduped |
| 570 | |
| 571 | # Apply limit. |
| 572 | limited = limit > 0 and len(matches) > limit |
| 573 | if limited: |
| 574 | matches = matches[:limit] |
| 575 | |
| 576 | # Count-only output. |
| 577 | if count_only: |
| 578 | print(len(matches)) |
| 579 | return |
| 580 | |
| 581 | # JSON output. |
| 582 | if json_out: |
| 583 | result_records = [] |
| 584 | for fp, addr, rec in matches: |
| 585 | result_records.append( |
| 586 | { |
| 587 | "address": addr, |
| 588 | "kind": rec["kind"], |
| 589 | "name": rec["name"], |
| 590 | "qualified_name": rec["qualified_name"], |
| 591 | "file": fp, |
| 592 | "lineno": rec["lineno"], |
| 593 | "end_lineno": rec["end_lineno"], |
| 594 | "size": rec["end_lineno"] - rec["lineno"], |
| 595 | "language": language_of(fp), |
| 596 | "content_id": rec["content_id"], |
| 597 | "body_hash": rec["body_hash"], |
| 598 | "signature_id": rec["signature_id"], |
| 599 | } |
| 600 | ) |
| 601 | print(json.dumps(_QueryJson( |
| 602 | **make_envelope(elapsed), |
| 603 | commit=commit.commit_id, |
| 604 | sort=sort_by, |
| 605 | unique_bodies=unique_bodies, |
| 606 | truncated=limited, |
| 607 | results=result_records, |
| 608 | ))) |
| 609 | return |
| 610 | |
| 611 | # Human-readable output. |
| 612 | if not matches: |
| 613 | pred_str = " AND ".join(sanitize_display(p) for p in predicates) |
| 614 | print(f" (no symbols matching: {pred_str})") |
| 615 | return |
| 616 | |
| 617 | files_seen: set[str] = set() |
| 618 | for fp, addr, rec in matches: |
| 619 | files_seen.add(fp) |
| 620 | icon = _KIND_ICON.get(rec["kind"], rec["kind"]) |
| 621 | line = rec["lineno"] |
| 622 | size = rec["end_lineno"] - rec["lineno"] |
| 623 | hash_part = f" {short_id(rec['content_id'])}.." if show_hashes else "" |
| 624 | size_part = f" {size:>3}L" if sort_by == "size" else "" |
| 625 | print(f" {sanitize_display(addr):<60} {icon:<10} line {line:>4}{size_part}{hash_part}") |
| 626 | |
| 627 | pred_display = " AND ".join(sanitize_display(p) for p in predicates) |
| 628 | trunc_note = f" (limited to {limit})" if limited else "" |
| 629 | print( |
| 630 | f"\n{len(matches)} match(es) across {len(files_seen)} file(s)" |
| 631 | f" [{pred_display}]{trunc_note}" |
| 632 | ) |
File History
1 commit
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
28 days ago