gabriel / muse public
query_engine.py python
317 lines 11.4 KB
Raw
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago
1 """Domain-agnostic commit-history query engine for Muse.
2
3 Any domain can walk the commit graph, evaluate a predicate per commit, and
4 collect structured matches — without reimplementing the graph-traversal loop.
5
6 Three-tier walker contract
7 --------------------------
8 Muse has three distinct commit-graph walkers, each serving a different purpose:
9
10 1. **``walk_history``** (this module) — high-level walker.
11 Use for any query that produces :class:`QueryMatch` results consumed by
12 users or agents. Supports ``follow_merges=True`` (BFS over the full DAG),
13 ``since``/``until`` time filters, ``load_manifest=False`` optimisation,
14 and the standard evaluator protocol. This is the right choice for all
15 ``muse code`` query commands.
16
17 2. **``store.walk_commits_between``** — range-bounded walker.
18 Use when you need a raw ``list[CommitRecord]`` for a specific linear range
19 (``from_commit_id`` exclusive → ``to_commit_id`` inclusive, first-parent
20 only). Used by ``find_symbol``, ``lineage``, ``query_history``, and
21 ``status`` where the full match protocol is not needed.
22
23 3. **``_query.walk_commits_bfs``** — low-level DAG walker.
24 Available as a low-level escape hatch during the migration period.
25 Prefer ``walk_history(follow_merges=True)`` for new code.
26
27 Architecture
28 ------------
29 ::
30
31 muse/core/query_engine.py ← this file: generic history walker
32 muse/plugins/midi/_midi_query.py ← MIDI predicate evaluator
33 muse/plugins/code/_code_query.py ← code predicate evaluator
34 muse/cli/commands/midi_query.py ← CLI for MIDI query
35 muse/cli/commands/code_query.py ← CLI for code query
36
37 Usage pattern::
38
39 from muse.core.query_engine import walk_history, QueryMatch
40
41 def my_evaluator(
42 commit: CommitRecord,
43 manifest: Manifest,
44 repo_root: pathlib.Path,
45 ) -> list[QueryMatch]:
46 matches = []
47 if "interesting-file.py" in manifest:
48 matches.append(QueryMatch(
49 commit_id=commit.commit_id,
50 author=commit.author,
51 committed_at=commit.committed_at.isoformat(),
52 branch=commit.branch,
53 detail="found interesting-file.py",
54 extra={},
55 ))
56 return matches
57
58 results = walk_history(repo_root, branch="main", evaluator=my_evaluator)
59 # DAG walk with follow_merges:
60 results = walk_history(
61 repo_root, branch="main", evaluator=my_evaluator, follow_merges=True
62 )
63
64 Public API
65 ----------
66 - :class:`QueryMatch` — one result row from the evaluator.
67 - :class:`CommitEvaluator` — type alias for the evaluator callable.
68 - :func:`walk_history` — traverse commits and collect matches.
69 """
70
71 import datetime
72 import logging
73 import pathlib
74 from collections.abc import Callable
75 from typing import TypedDict
76
77 from muse.core.types import Manifest
78 from muse.core.graph import iter_ancestors
79 from muse.core.refs import get_head_commit_id
80 from muse.core.commits import (
81 CommitRecord,
82 read_commit,
83 )
84 from muse.core.snapshots import get_commit_snapshot_manifest
85
86 logger = logging.getLogger(__name__)
87
88 _DEFAULT_MAX_COMMITS = 500
89
90 # ---------------------------------------------------------------------------
91 # Result type
92 # ---------------------------------------------------------------------------
93
94 class QueryMatch(TypedDict, total=False):
95 """One match returned by a predicate evaluator.
96
97 Required fields:
98 ``commit_id`` The commit that produced this match.
99 ``author`` Commit author string.
100 ``committed_at`` ISO-8601 timestamp string.
101 ``branch`` Branch name.
102 ``detail`` Short human-readable description of what matched.
103
104 Optional:
105 ``extra`` Domain-specific data (e.g. ``{"symbol": "my_fn"}``).
106 ``agent_id`` Agent identity from commit provenance (if present).
107 ``model_id`` Model ID from commit provenance (if present).
108 """
109
110 commit_id: str
111 author: str
112 committed_at: str
113 branch: str
114 detail: str
115 extra: Manifest
116 agent_id: str
117 model_id: str
118
119 # ---------------------------------------------------------------------------
120 # Evaluator type alias
121 # ---------------------------------------------------------------------------
122
123 #: Signature every domain evaluator must satisfy.
124 #: Returns a (possibly empty) list of :class:`QueryMatch` for the commit.
125 CommitEvaluator = Callable[
126 [CommitRecord, dict[str, str], pathlib.Path],
127 list[QueryMatch],
128 ]
129
130 # ---------------------------------------------------------------------------
131 # Core history walker
132 # ---------------------------------------------------------------------------
133
134 def walk_history(
135 repo_root: pathlib.Path,
136 branch: str,
137 evaluator: CommitEvaluator,
138 *,
139 max_commits: int = _DEFAULT_MAX_COMMITS,
140 head_commit_id: str | None = None,
141 load_manifest: bool = True,
142 since: datetime.datetime | None = None,
143 until: datetime.datetime | None = None,
144 follow_merges: bool = False,
145 ) -> list[QueryMatch]:
146 """Walk the commit graph from HEAD and collect matches from *evaluator*.
147
148 For each commit the evaluator receives the :class:`~muse.core.store.CommitRecord`,
149 the raw file manifest (path → SHA-256 hash), and the repository root.
150 It returns a list of :class:`QueryMatch` dicts (empty list if the commit
151 has no matches).
152
153 When *follow_merges* is ``False`` (default), only the main parent chain
154 (``parent_commit_id``) is followed — sufficient for single-branch queries
155 and avoids loading the full DAG.
156
157 When *follow_merges* is ``True``, BFS is used so that merge commits'
158 second parents (``parent2_commit_id``) are also visited. This is
159 necessary for commands like ``hotspots`` and ``blame`` where missing
160 feature-branch commits would give incorrect results.
161
162 Args:
163 repo_root: Repository root containing ``.muse/``.
164 branch: Branch to start from (used to resolve HEAD when
165 *head_commit_id* is ``None``).
166 evaluator: Domain-specific callable — see :data:`CommitEvaluator`.
167 max_commits: Maximum commits to inspect. Default 500.
168 head_commit_id: Override the starting commit. ``None`` → resolve HEAD
169 via the store (same logic as all other commands).
170 load_manifest: When ``False``, passes an empty manifest ``{}`` to the
171 evaluator and skips the snapshot-manifest I/O entirely.
172 Safe for evaluators that only inspect commit-level fields
173 (``author``, ``agent_id``, ``sem_ver_bump``, etc.).
174 Provides a significant speed-up on large repos.
175 since: Skip commits older than this timestamp (inclusive).
176 until: Skip commits newer than this timestamp (inclusive).
177 follow_merges: When ``True``, follow ``parent2_commit_id`` at merge
178 commits (BFS). When ``False`` (default), follow only
179 the linear ``parent_commit_id`` chain.
180
181 Returns:
182 All :class:`QueryMatch` records collected, ordered by walk order
183 (newest-first for the main parent chain).
184 """
185 if head_commit_id is None:
186 resolved = get_head_commit_id(repo_root, branch)
187 if not resolved:
188 logger.warning("Branch '%s' has no commits.", branch)
189 return []
190 head_commit_id = resolved
191
192 results: list[QueryMatch] = []
193
194 if follow_merges:
195 results = _walk_history_bfs(
196 repo_root, head_commit_id, evaluator,
197 max_commits=max_commits,
198 load_manifest=load_manifest,
199 since=since,
200 until=until,
201 )
202 else:
203 results = _walk_history_linear(
204 repo_root, head_commit_id, evaluator,
205 max_commits=max_commits,
206 load_manifest=load_manifest,
207 since=since,
208 until=until,
209 )
210
211 return results
212
213 def _make_tz_aware(dt: datetime.datetime) -> datetime.datetime:
214 """Return *dt* with UTC timezone attached if it is naive."""
215 return dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc)
216
217 def _passes_time_filter(
218 commit: CommitRecord,
219 since: datetime.datetime | None,
220 until: datetime.datetime | None,
221 ) -> bool:
222 """Return True if *commit* falls within the [since, until] window."""
223 ts = _make_tz_aware(commit.committed_at)
224 if since is not None and ts < _make_tz_aware(since):
225 return False
226 if until is not None and ts > _make_tz_aware(until):
227 return False
228 return True
229
230 def _evaluate_commit(
231 repo_root: pathlib.Path,
232 commit: CommitRecord,
233 evaluator: CommitEvaluator,
234 load_manifest: bool,
235 ) -> list[QueryMatch]:
236 """Load manifest if needed and run *evaluator* on *commit*."""
237 if load_manifest:
238 manifest_rec = get_commit_snapshot_manifest(repo_root, commit.commit_id)
239 manifest: Manifest = dict(manifest_rec) if manifest_rec else {}
240 else:
241 manifest = {}
242 try:
243 return evaluator(commit, manifest, repo_root)
244 except Exception:
245 logger.exception("Evaluator error on commit %s", commit.commit_id)
246 return []
247
248 def _walk_history_linear(
249 repo_root: pathlib.Path,
250 start_commit_id: str,
251 evaluator: CommitEvaluator,
252 *,
253 max_commits: int,
254 load_manifest: bool,
255 since: datetime.datetime | None,
256 until: datetime.datetime | None,
257 ) -> list[QueryMatch]:
258 """Linear first-parent walk — internal implementation."""
259 results: list[QueryMatch] = []
260 current_id: str | None = start_commit_id
261 seen = 0
262
263 while current_id and seen < max_commits:
264 commit = read_commit(repo_root, current_id)
265 if commit is None:
266 break
267 seen += 1
268 if _passes_time_filter(commit, since, until):
269 results.extend(_evaluate_commit(repo_root, commit, evaluator, load_manifest))
270 current_id = commit.parent_commit_id
271
272 return results
273
274 def _walk_history_bfs(
275 repo_root: pathlib.Path,
276 start_commit_id: str,
277 evaluator: CommitEvaluator,
278 *,
279 max_commits: int,
280 load_manifest: bool,
281 since: datetime.datetime | None,
282 until: datetime.datetime | None,
283 ) -> list[QueryMatch]:
284 """BFS DAG walk following both parents — internal implementation."""
285 results: list[QueryMatch] = []
286 for commit in iter_ancestors(repo_root, start_commit_id, max_commits=max_commits):
287 if _passes_time_filter(commit, since, until):
288 results.extend(_evaluate_commit(repo_root, commit, evaluator, load_manifest))
289 return results
290
291 def format_matches(matches: list[QueryMatch], *, max_results: int = 50) -> str:
292 """Format a list of matches as a human-readable table.
293
294 Args:
295 matches: The results from :func:`walk_history`.
296 max_results: Maximum rows to show before the "… N more" truncation line.
297
298 Returns:
299 Multi-line string ready for printing to stdout.
300 """
301 if not matches:
302 return "No matches found."
303
304 lines: list[str] = [f"Found {len(matches)} match(es):\n"]
305 for m in matches[:max_results]:
306 cid = m.get("commit_id", "?")
307 author = m.get("author", "unknown")
308 ts = m.get("committed_at", "")[:10]
309 detail = m.get("detail", "")
310 agent = m.get("agent_id", "")
311 agent_str = f" [{agent}]" if agent else ""
312 lines.append(f" {cid} {ts} {author}{agent_str} — {detail}")
313
314 if len(matches) > max_results:
315 lines.append(f"\n … {len(matches) - max_results} more (use --limit to raise the cap)")
316
317 return "\n".join(lines)
File History 1 commit
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago