query_engine.py
python
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40
docs: add | jq convention to --json section of agent-guide
Sonnet 4.6
1 day ago
| 1 | """Domain-agnostic commit-history query engine for Muse. |
| 2 | |
| 3 | Any domain can walk the commit graph, evaluate a predicate per commit, and |
| 4 | collect structured matches — without reimplementing the graph-traversal loop. |
| 5 | |
| 6 | Three-tier walker contract |
| 7 | -------------------------- |
| 8 | Muse has three distinct commit-graph walkers, each serving a different purpose: |
| 9 | |
| 10 | 1. **``walk_history``** (this module) — high-level walker. |
| 11 | Use for any query that produces :class:`QueryMatch` results consumed by |
| 12 | users or agents. Supports ``follow_merges=True`` (BFS over the full DAG), |
| 13 | ``since``/``until`` time filters, ``load_manifest=False`` optimisation, |
| 14 | and the standard evaluator protocol. This is the right choice for all |
| 15 | ``muse code`` query commands. |
| 16 | |
| 17 | 2. **``store.walk_commits_between``** — range-bounded walker. |
| 18 | Use when you need a raw ``list[CommitRecord]`` for a specific linear range |
| 19 | (``from_commit_id`` exclusive → ``to_commit_id`` inclusive, first-parent |
| 20 | only). Used by ``find_symbol``, ``lineage``, ``query_history``, and |
| 21 | ``status`` where the full match protocol is not needed. |
| 22 | |
| 23 | 3. **``_query.walk_commits_bfs``** — low-level DAG walker. |
| 24 | Available as a low-level escape hatch during the migration period. |
| 25 | Prefer ``walk_history(follow_merges=True)`` for new code. |
| 26 | |
| 27 | Architecture |
| 28 | ------------ |
| 29 | :: |
| 30 | |
| 31 | muse/core/query_engine.py ← this file: generic history walker |
| 32 | muse/plugins/midi/_midi_query.py ← MIDI predicate evaluator |
| 33 | muse/plugins/code/_code_query.py ← code predicate evaluator |
| 34 | muse/cli/commands/midi_query.py ← CLI for MIDI query |
| 35 | muse/cli/commands/code_query.py ← CLI for code query |
| 36 | |
| 37 | Usage pattern:: |
| 38 | |
| 39 | from muse.core.query_engine import walk_history, QueryMatch |
| 40 | |
| 41 | def my_evaluator( |
| 42 | commit: CommitRecord, |
| 43 | manifest: Manifest, |
| 44 | repo_root: pathlib.Path, |
| 45 | ) -> list[QueryMatch]: |
| 46 | matches = [] |
| 47 | if "interesting-file.py" in manifest: |
| 48 | matches.append(QueryMatch( |
| 49 | commit_id=commit.commit_id, |
| 50 | author=commit.author, |
| 51 | committed_at=commit.committed_at.isoformat(), |
| 52 | branch=commit.branch, |
| 53 | detail="found interesting-file.py", |
| 54 | extra={}, |
| 55 | )) |
| 56 | return matches |
| 57 | |
| 58 | results = walk_history(repo_root, branch="main", evaluator=my_evaluator) |
| 59 | # DAG walk with follow_merges: |
| 60 | results = walk_history( |
| 61 | repo_root, branch="main", evaluator=my_evaluator, follow_merges=True |
| 62 | ) |
| 63 | |
| 64 | Public API |
| 65 | ---------- |
| 66 | - :class:`QueryMatch` — one result row from the evaluator. |
| 67 | - :class:`CommitEvaluator` — type alias for the evaluator callable. |
| 68 | - :func:`walk_history` — traverse commits and collect matches. |
| 69 | """ |
| 70 | |
| 71 | import datetime |
| 72 | import logging |
| 73 | import pathlib |
| 74 | from collections.abc import Callable |
| 75 | from typing import TypedDict |
| 76 | |
| 77 | from muse.core.types import Manifest |
| 78 | from muse.core.graph import iter_ancestors |
| 79 | from muse.core.refs import get_head_commit_id |
| 80 | from muse.core.commits import ( |
| 81 | CommitRecord, |
| 82 | read_commit, |
| 83 | ) |
| 84 | from muse.core.snapshots import get_commit_snapshot_manifest |
| 85 | |
| 86 | logger = logging.getLogger(__name__) |
| 87 | |
| 88 | _DEFAULT_MAX_COMMITS = 500 |
| 89 | |
| 90 | # --------------------------------------------------------------------------- |
| 91 | # Result type |
| 92 | # --------------------------------------------------------------------------- |
| 93 | |
| 94 | class QueryMatch(TypedDict, total=False): |
| 95 | """One match returned by a predicate evaluator. |
| 96 | |
| 97 | Required fields: |
| 98 | ``commit_id`` The commit that produced this match. |
| 99 | ``author`` Commit author string. |
| 100 | ``committed_at`` ISO-8601 timestamp string. |
| 101 | ``branch`` Branch name. |
| 102 | ``detail`` Short human-readable description of what matched. |
| 103 | |
| 104 | Optional: |
| 105 | ``extra`` Domain-specific data (e.g. ``{"symbol": "my_fn"}``). |
| 106 | ``agent_id`` Agent identity from commit provenance (if present). |
| 107 | ``model_id`` Model ID from commit provenance (if present). |
| 108 | """ |
| 109 | |
| 110 | commit_id: str |
| 111 | author: str |
| 112 | committed_at: str |
| 113 | branch: str |
| 114 | detail: str |
| 115 | extra: Manifest |
| 116 | agent_id: str |
| 117 | model_id: str |
| 118 | |
| 119 | # --------------------------------------------------------------------------- |
| 120 | # Evaluator type alias |
| 121 | # --------------------------------------------------------------------------- |
| 122 | |
| 123 | #: Signature every domain evaluator must satisfy. |
| 124 | #: Returns a (possibly empty) list of :class:`QueryMatch` for the commit. |
| 125 | CommitEvaluator = Callable[ |
| 126 | [CommitRecord, dict[str, str], pathlib.Path], |
| 127 | list[QueryMatch], |
| 128 | ] |
| 129 | |
| 130 | # --------------------------------------------------------------------------- |
| 131 | # Core history walker |
| 132 | # --------------------------------------------------------------------------- |
| 133 | |
| 134 | def walk_history( |
| 135 | repo_root: pathlib.Path, |
| 136 | branch: str, |
| 137 | evaluator: CommitEvaluator, |
| 138 | *, |
| 139 | max_commits: int = _DEFAULT_MAX_COMMITS, |
| 140 | head_commit_id: str | None = None, |
| 141 | load_manifest: bool = True, |
| 142 | since: datetime.datetime | None = None, |
| 143 | until: datetime.datetime | None = None, |
| 144 | follow_merges: bool = False, |
| 145 | ) -> list[QueryMatch]: |
| 146 | """Walk the commit graph from HEAD and collect matches from *evaluator*. |
| 147 | |
| 148 | For each commit the evaluator receives the :class:`~muse.core.store.CommitRecord`, |
| 149 | the raw file manifest (path → SHA-256 hash), and the repository root. |
| 150 | It returns a list of :class:`QueryMatch` dicts (empty list if the commit |
| 151 | has no matches). |
| 152 | |
| 153 | When *follow_merges* is ``False`` (default), only the main parent chain |
| 154 | (``parent_commit_id``) is followed — sufficient for single-branch queries |
| 155 | and avoids loading the full DAG. |
| 156 | |
| 157 | When *follow_merges* is ``True``, BFS is used so that merge commits' |
| 158 | second parents (``parent2_commit_id``) are also visited. This is |
| 159 | necessary for commands like ``hotspots`` and ``blame`` where missing |
| 160 | feature-branch commits would give incorrect results. |
| 161 | |
| 162 | Args: |
| 163 | repo_root: Repository root containing ``.muse/``. |
| 164 | branch: Branch to start from (used to resolve HEAD when |
| 165 | *head_commit_id* is ``None``). |
| 166 | evaluator: Domain-specific callable — see :data:`CommitEvaluator`. |
| 167 | max_commits: Maximum commits to inspect. Default 500. |
| 168 | head_commit_id: Override the starting commit. ``None`` → resolve HEAD |
| 169 | via the store (same logic as all other commands). |
| 170 | load_manifest: When ``False``, passes an empty manifest ``{}`` to the |
| 171 | evaluator and skips the snapshot-manifest I/O entirely. |
| 172 | Safe for evaluators that only inspect commit-level fields |
| 173 | (``author``, ``agent_id``, ``sem_ver_bump``, etc.). |
| 174 | Provides a significant speed-up on large repos. |
| 175 | since: Skip commits older than this timestamp (inclusive). |
| 176 | until: Skip commits newer than this timestamp (inclusive). |
| 177 | follow_merges: When ``True``, follow ``parent2_commit_id`` at merge |
| 178 | commits (BFS). When ``False`` (default), follow only |
| 179 | the linear ``parent_commit_id`` chain. |
| 180 | |
| 181 | Returns: |
| 182 | All :class:`QueryMatch` records collected, ordered by walk order |
| 183 | (newest-first for the main parent chain). |
| 184 | """ |
| 185 | if head_commit_id is None: |
| 186 | resolved = get_head_commit_id(repo_root, branch) |
| 187 | if not resolved: |
| 188 | logger.warning("Branch '%s' has no commits.", branch) |
| 189 | return [] |
| 190 | head_commit_id = resolved |
| 191 | |
| 192 | results: list[QueryMatch] = [] |
| 193 | |
| 194 | if follow_merges: |
| 195 | results = _walk_history_bfs( |
| 196 | repo_root, head_commit_id, evaluator, |
| 197 | max_commits=max_commits, |
| 198 | load_manifest=load_manifest, |
| 199 | since=since, |
| 200 | until=until, |
| 201 | ) |
| 202 | else: |
| 203 | results = _walk_history_linear( |
| 204 | repo_root, head_commit_id, evaluator, |
| 205 | max_commits=max_commits, |
| 206 | load_manifest=load_manifest, |
| 207 | since=since, |
| 208 | until=until, |
| 209 | ) |
| 210 | |
| 211 | return results |
| 212 | |
| 213 | def _make_tz_aware(dt: datetime.datetime) -> datetime.datetime: |
| 214 | """Return *dt* with UTC timezone attached if it is naive.""" |
| 215 | return dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) |
| 216 | |
| 217 | def _passes_time_filter( |
| 218 | commit: CommitRecord, |
| 219 | since: datetime.datetime | None, |
| 220 | until: datetime.datetime | None, |
| 221 | ) -> bool: |
| 222 | """Return True if *commit* falls within the [since, until] window.""" |
| 223 | ts = _make_tz_aware(commit.committed_at) |
| 224 | if since is not None and ts < _make_tz_aware(since): |
| 225 | return False |
| 226 | if until is not None and ts > _make_tz_aware(until): |
| 227 | return False |
| 228 | return True |
| 229 | |
| 230 | def _evaluate_commit( |
| 231 | repo_root: pathlib.Path, |
| 232 | commit: CommitRecord, |
| 233 | evaluator: CommitEvaluator, |
| 234 | load_manifest: bool, |
| 235 | ) -> list[QueryMatch]: |
| 236 | """Load manifest if needed and run *evaluator* on *commit*.""" |
| 237 | if load_manifest: |
| 238 | manifest_rec = get_commit_snapshot_manifest(repo_root, commit.commit_id) |
| 239 | manifest: Manifest = dict(manifest_rec) if manifest_rec else {} |
| 240 | else: |
| 241 | manifest = {} |
| 242 | try: |
| 243 | return evaluator(commit, manifest, repo_root) |
| 244 | except Exception: |
| 245 | logger.exception("Evaluator error on commit %s", commit.commit_id) |
| 246 | return [] |
| 247 | |
| 248 | def _walk_history_linear( |
| 249 | repo_root: pathlib.Path, |
| 250 | start_commit_id: str, |
| 251 | evaluator: CommitEvaluator, |
| 252 | *, |
| 253 | max_commits: int, |
| 254 | load_manifest: bool, |
| 255 | since: datetime.datetime | None, |
| 256 | until: datetime.datetime | None, |
| 257 | ) -> list[QueryMatch]: |
| 258 | """Linear first-parent walk — internal implementation.""" |
| 259 | results: list[QueryMatch] = [] |
| 260 | current_id: str | None = start_commit_id |
| 261 | seen = 0 |
| 262 | |
| 263 | while current_id and seen < max_commits: |
| 264 | commit = read_commit(repo_root, current_id) |
| 265 | if commit is None: |
| 266 | break |
| 267 | seen += 1 |
| 268 | if _passes_time_filter(commit, since, until): |
| 269 | results.extend(_evaluate_commit(repo_root, commit, evaluator, load_manifest)) |
| 270 | current_id = commit.parent_commit_id |
| 271 | |
| 272 | return results |
| 273 | |
| 274 | def _walk_history_bfs( |
| 275 | repo_root: pathlib.Path, |
| 276 | start_commit_id: str, |
| 277 | evaluator: CommitEvaluator, |
| 278 | *, |
| 279 | max_commits: int, |
| 280 | load_manifest: bool, |
| 281 | since: datetime.datetime | None, |
| 282 | until: datetime.datetime | None, |
| 283 | ) -> list[QueryMatch]: |
| 284 | """BFS DAG walk following both parents — internal implementation.""" |
| 285 | results: list[QueryMatch] = [] |
| 286 | for commit in iter_ancestors(repo_root, start_commit_id, max_commits=max_commits): |
| 287 | if _passes_time_filter(commit, since, until): |
| 288 | results.extend(_evaluate_commit(repo_root, commit, evaluator, load_manifest)) |
| 289 | return results |
| 290 | |
| 291 | def format_matches(matches: list[QueryMatch], *, max_results: int = 50) -> str: |
| 292 | """Format a list of matches as a human-readable table. |
| 293 | |
| 294 | Args: |
| 295 | matches: The results from :func:`walk_history`. |
| 296 | max_results: Maximum rows to show before the "… N more" truncation line. |
| 297 | |
| 298 | Returns: |
| 299 | Multi-line string ready for printing to stdout. |
| 300 | """ |
| 301 | if not matches: |
| 302 | return "No matches found." |
| 303 | |
| 304 | lines: list[str] = [f"Found {len(matches)} match(es):\n"] |
| 305 | for m in matches[:max_results]: |
| 306 | cid = m.get("commit_id", "?") |
| 307 | author = m.get("author", "unknown") |
| 308 | ts = m.get("committed_at", "")[:10] |
| 309 | detail = m.get("detail", "") |
| 310 | agent = m.get("agent_id", "") |
| 311 | agent_str = f" [{agent}]" if agent else "" |
| 312 | lines.append(f" {cid} {ts} {author}{agent_str} — {detail}") |
| 313 | |
| 314 | if len(matches) > max_results: |
| 315 | lines.append(f"\n … {len(matches) - max_results} more (use --limit to raise the cap)") |
| 316 | |
| 317 | return "\n".join(lines) |
File History
1 commit
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40
docs: add | jq convention to --json section of agent-guide
Sonnet 4.6
1 day ago