muse/core/query_engine.py · gabriel/muse

query_engine.py python

317 lines 11.4 KB

sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago

1	"""Domain-agnostic commit-history query engine for Muse.
2
3	Any domain can walk the commit graph, evaluate a predicate per commit, and
4	collect structured matches — without reimplementing the graph-traversal loop.
5
6	Three-tier walker contract
7	--------------------------
8	Muse has three distinct commit-graph walkers, each serving a different purpose:
9
10	1. ``walk_history`` (this module) — high-level walker.
11	Use for any query that produces :class:`QueryMatch` results consumed by
12	users or agents. Supports ``follow_merges=True`` (BFS over the full DAG),
13	``since``/``until`` time filters, ``load_manifest=False`` optimisation,
14	and the standard evaluator protocol. This is the right choice for all
15	``muse code`` query commands.
16
17	2. ``store.walk_commits_between`` — range-bounded walker.
18	Use when you need a raw ``list[CommitRecord]`` for a specific linear range
19	(``from_commit_id`` exclusive → ``to_commit_id`` inclusive, first-parent
20	only). Used by ``find_symbol``, ``lineage``, ``query_history``, and
21	``status`` where the full match protocol is not needed.
22
23	3. ``_query.walk_commits_bfs`` — low-level DAG walker.
24	Available as a low-level escape hatch during the migration period.
25	Prefer ``walk_history(follow_merges=True)`` for new code.
26
27	Architecture
28	------------
29	::
30
31	muse/core/query_engine.py ← this file: generic history walker
32	muse/plugins/midi/_midi_query.py ← MIDI predicate evaluator
33	muse/plugins/code/_code_query.py ← code predicate evaluator
34	muse/cli/commands/midi_query.py ← CLI for MIDI query
35	muse/cli/commands/code_query.py ← CLI for code query
36
37	Usage pattern::
38
39	from muse.core.query_engine import walk_history, QueryMatch
40
41	def my_evaluator(
42	commit: CommitRecord,
43	manifest: Manifest,
44	repo_root: pathlib.Path,
45	) -> list[QueryMatch]:
46	matches = []
47	if "interesting-file.py" in manifest:
48	matches.append(QueryMatch(
49	commit_id=commit.commit_id,
50	author=commit.author,
51	committed_at=commit.committed_at.isoformat(),
52	branch=commit.branch,
53	detail="found interesting-file.py",
54	extra={},
55	))
56	return matches
57
58	results = walk_history(repo_root, branch="main", evaluator=my_evaluator)
59	# DAG walk with follow_merges:
60	results = walk_history(
61	repo_root, branch="main", evaluator=my_evaluator, follow_merges=True
62	)
63
64	Public API
65	----------
66	- :class:`QueryMatch` — one result row from the evaluator.
67	- :class:`CommitEvaluator` — type alias for the evaluator callable.
68	- :func:`walk_history` — traverse commits and collect matches.
69	"""
70
71	import datetime
72	import logging
73	import pathlib
74	from collections.abc import Callable
75	from typing import TypedDict
76
77	from muse.core.types import Manifest
78	from muse.core.graph import iter_ancestors
79	from muse.core.refs import get_head_commit_id
80	from muse.core.commits import (
81	CommitRecord,
82	read_commit,
83	)
84	from muse.core.snapshots import get_commit_snapshot_manifest
85
86	logger = logging.getLogger(__name__)
87
88	_DEFAULT_MAX_COMMITS = 500
89
90	# ---------------------------------------------------------------------------
91	# Result type
92	# ---------------------------------------------------------------------------
93
94	class QueryMatch(TypedDict, total=False):
95	"""One match returned by a predicate evaluator.
96
97	Required fields:
98	``commit_id`` The commit that produced this match.
99	``author`` Commit author string.
100	``committed_at`` ISO-8601 timestamp string.
101	``branch`` Branch name.
102	``detail`` Short human-readable description of what matched.
103
104	Optional:
105	``extra`` Domain-specific data (e.g. ``{"symbol": "my_fn"}``).
106	``agent_id`` Agent identity from commit provenance (if present).
107	``model_id`` Model ID from commit provenance (if present).
108	"""
109
110	commit_id: str
111	author: str
112	committed_at: str
113	branch: str
114	detail: str
115	extra: Manifest
116	agent_id: str
117	model_id: str
118
119	# ---------------------------------------------------------------------------
120	# Evaluator type alias
121	# ---------------------------------------------------------------------------
122
123	#: Signature every domain evaluator must satisfy.
124	#: Returns a (possibly empty) list of :class:`QueryMatch` for the commit.
125	CommitEvaluator = Callable[
126	[CommitRecord, dict[str, str], pathlib.Path],
127	list[QueryMatch],
128	]
129
130	# ---------------------------------------------------------------------------
131	# Core history walker
132	# ---------------------------------------------------------------------------
133
134	def walk_history(
135	repo_root: pathlib.Path,
136	branch: str,
137	evaluator: CommitEvaluator,
138	*,
139	max_commits: int = _DEFAULT_MAX_COMMITS,
140	head_commit_id: str \| None = None,
141	load_manifest: bool = True,
142	since: datetime.datetime \| None = None,
143	until: datetime.datetime \| None = None,
144	follow_merges: bool = False,
145	) -> list[QueryMatch]:
146	"""Walk the commit graph from HEAD and collect matches from evaluator.
147
148	For each commit the evaluator receives the :class:`~muse.core.store.CommitRecord`,
149	the raw file manifest (path → SHA-256 hash), and the repository root.
150	It returns a list of :class:`QueryMatch` dicts (empty list if the commit
151	has no matches).
152
153	When follow_merges is ``False`` (default), only the main parent chain
154	(``parent_commit_id``) is followed — sufficient for single-branch queries
155	and avoids loading the full DAG.
156
157	When follow_merges is ``True``, BFS is used so that merge commits'
158	second parents (``parent2_commit_id``) are also visited. This is
159	necessary for commands like ``hotspots`` and ``blame`` where missing
160	feature-branch commits would give incorrect results.
161
162	Args:
163	repo_root: Repository root containing ``.muse/``.
164	branch: Branch to start from (used to resolve HEAD when
165	head_commit_id is ``None``).
166	evaluator: Domain-specific callable — see :data:`CommitEvaluator`.
167	max_commits: Maximum commits to inspect. Default 500.
168	head_commit_id: Override the starting commit. ``None`` → resolve HEAD
169	via the store (same logic as all other commands).
170	load_manifest: When ``False``, passes an empty manifest ``{}`` to the
171	evaluator and skips the snapshot-manifest I/O entirely.
172	Safe for evaluators that only inspect commit-level fields
173	(``author``, ``agent_id``, ``sem_ver_bump``, etc.).
174	Provides a significant speed-up on large repos.
175	since: Skip commits older than this timestamp (inclusive).
176	until: Skip commits newer than this timestamp (inclusive).
177	follow_merges: When ``True``, follow ``parent2_commit_id`` at merge
178	commits (BFS). When ``False`` (default), follow only
179	the linear ``parent_commit_id`` chain.
180
181	Returns:
182	All :class:`QueryMatch` records collected, ordered by walk order
183	(newest-first for the main parent chain).
184	"""
185	if head_commit_id is None:
186	resolved = get_head_commit_id(repo_root, branch)
187	if not resolved:
188	logger.warning("Branch '%s' has no commits.", branch)
189	return []
190	head_commit_id = resolved
191
192	results: list[QueryMatch] = []
193
194	if follow_merges:
195	results = _walk_history_bfs(
196	repo_root, head_commit_id, evaluator,
197	max_commits=max_commits,
198	load_manifest=load_manifest,
199	since=since,
200	until=until,
201	)
202	else:
203	results = _walk_history_linear(
204	repo_root, head_commit_id, evaluator,
205	max_commits=max_commits,
206	load_manifest=load_manifest,
207	since=since,
208	until=until,
209	)
210
211	return results
212
213	def _make_tz_aware(dt: datetime.datetime) -> datetime.datetime:
214	"""Return dt with UTC timezone attached if it is naive."""
215	return dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc)
216
217	def _passes_time_filter(
218	commit: CommitRecord,
219	since: datetime.datetime \| None,
220	until: datetime.datetime \| None,
221	) -> bool:
222	"""Return True if commit falls within the [since, until] window."""
223	ts = _make_tz_aware(commit.committed_at)
224	if since is not None and ts < _make_tz_aware(since):
225	return False
226	if until is not None and ts > _make_tz_aware(until):
227	return False
228	return True
229
230	def _evaluate_commit(
231	repo_root: pathlib.Path,
232	commit: CommitRecord,
233	evaluator: CommitEvaluator,
234	load_manifest: bool,
235	) -> list[QueryMatch]:
236	"""Load manifest if needed and run evaluator on commit."""
237	if load_manifest:
238	manifest_rec = get_commit_snapshot_manifest(repo_root, commit.commit_id)
239	manifest: Manifest = dict(manifest_rec) if manifest_rec else {}
240	else:
241	manifest = {}
242	try:
243	return evaluator(commit, manifest, repo_root)
244	except Exception:
245	logger.exception("Evaluator error on commit %s", commit.commit_id)
246	return []
247
248	def _walk_history_linear(
249	repo_root: pathlib.Path,
250	start_commit_id: str,
251	evaluator: CommitEvaluator,
252	*,
253	max_commits: int,
254	load_manifest: bool,
255	since: datetime.datetime \| None,
256	until: datetime.datetime \| None,
257	) -> list[QueryMatch]:
258	"""Linear first-parent walk — internal implementation."""
259	results: list[QueryMatch] = []
260	current_id: str \| None = start_commit_id
261	seen = 0
262
263	while current_id and seen < max_commits:
264	commit = read_commit(repo_root, current_id)
265	if commit is None:
266	break
267	seen += 1
268	if _passes_time_filter(commit, since, until):
269	results.extend(_evaluate_commit(repo_root, commit, evaluator, load_manifest))
270	current_id = commit.parent_commit_id
271
272	return results
273
274	def _walk_history_bfs(
275	repo_root: pathlib.Path,
276	start_commit_id: str,
277	evaluator: CommitEvaluator,
278	*,
279	max_commits: int,
280	load_manifest: bool,
281	since: datetime.datetime \| None,
282	until: datetime.datetime \| None,
283	) -> list[QueryMatch]:
284	"""BFS DAG walk following both parents — internal implementation."""
285	results: list[QueryMatch] = []
286	for commit in iter_ancestors(repo_root, start_commit_id, max_commits=max_commits):
287	if _passes_time_filter(commit, since, until):
288	results.extend(_evaluate_commit(repo_root, commit, evaluator, load_manifest))
289	return results
290
291	def format_matches(matches: list[QueryMatch], *, max_results: int = 50) -> str:
292	"""Format a list of matches as a human-readable table.
293
294	Args:
295	matches: The results from :func:`walk_history`.
296	max_results: Maximum rows to show before the "… N more" truncation line.
297
298	Returns:
299	Multi-line string ready for printing to stdout.
300	"""
301	if not matches:
302	return "No matches found."
303
304	lines: list[str] = [f"Found {len(matches)} match(es):\n"]
305	for m in matches[:max_results]:
306	cid = m.get("commit_id", "?")
307	author = m.get("author", "unknown")
308	ts = m.get("committed_at", "")[:10]
309	detail = m.get("detail", "")
310	agent = m.get("agent_id", "")
311	agent_str = f" [{agent}]" if agent else ""
312	lines.append(f" {cid} {ts} {author}{agent_str} — {detail}")
313
314	if len(matches) > max_results:
315	lines.append(f"\n … {len(matches) - max_results} more (use --limit to raise the cap)")
316
317	return "\n".join(lines)

File History 1 commit

sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago

class QueryMatch

function walk_history

function _make_tz_aware

function _passes_time_filter

function _evaluate_commit

function _walk_history_linear

function _walk_history_bfs

function format_matches

Pathmuse/core/query_engine.py

Lines317

Size11.4 KB

LangPython

Refsha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40

Object ID

sha256:ad59ed5836b7758c806bdd834db3c9e93e03bc1b11ac34a2127165ba06bc3b89…

Last commit

sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40

docs: add | jq convention to --json section of ag…

1 day ago

Quick links

Blame History