muse/plugins/code/_code_query.py · gabriel/muse

_code_query.py python

383 lines 12.8 KB

sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 1 day ago

1	"""Code-domain query evaluator for the Muse generic query engine.
2
3	Implements :data:`~muse.core.query_engine.CommitEvaluator` for the code domain.
4	Allows agents and humans to search the commit history for code changes::
5
6	muse code-query "symbol == 'my_function' and change == 'added'"
7	muse code-query "language == 'Python' and author == 'agent-x'"
8	muse code-query "agent_id == 'claude' and sem_ver_bump == 'major'"
9	muse code-query "file == 'src/core.py'"
10	muse code-query "change == 'added' and kind == 'class'"
11	muse code-query "symbol endswith _handler"
12
13	Query language
14	--------------
15
16	query = and_expr ( 'or' and_expr )*
17	and_expr = atom ( 'and' atom )*
18	atom = FIELD OP VALUE
19	FIELD = 'symbol' \| 'file' \| 'language' \| 'kind' \| 'change'
20	\| 'author' \| 'agent_id' \| 'model_id' \| 'toolchain_id'
21	\| 'sem_ver_bump' \| 'branch'
22	OP = '==' \| '!=' \| 'contains' \| 'startswith' \| 'endswith'
23	VALUE = QUOTED_STRING \| UNQUOTED_WORD
24
25	Supported fields
26	----------------
27
28	``symbol`` Qualified symbol name (e.g. ``"MyClass.method"``).
29	``file`` Workspace-relative file path.
30	``language`` Language name (``"Python"``, ``"TypeScript"``…).
31	``kind`` Symbol kind (``"function"``, ``"class"``, ``"method"``…).
32	``change`` ``"added"``, ``"removed"``, or ``"modified"``.
33	``author`` Commit author string.
34	``agent_id`` Agent identity from commit provenance.
35	``model_id`` Model ID from commit provenance.
36	``toolchain_id`` Toolchain string from commit provenance.
37	``sem_ver_bump`` Semantic version bump: ``"none"``, ``"patch"``,
38	``"minor"``, ``"major"``.
39	``branch`` Branch name.
40
41	Performance note
42	----------------
43	The code evaluator reads ``commit.structured_delta`` — it never needs the
44	snapshot manifest. Callers should pass ``load_manifest=False`` to
45	:func:`~muse.core.query_engine.walk_history` to skip that I/O entirely.
46	"""
47
48	import logging
49	import pathlib
50	import re
51	from dataclasses import dataclass
52	from typing import Literal, TypedDict, TypeIs, get_args
53
54	from muse.core.query_engine import CommitEvaluator, QueryMatch
55	from muse.core.commits import CommitRecord
56	from muse.domain import DomainOp, PatchOp
57	from muse.plugins.code._query import language_of
58	from muse.core.types import Manifest
59
60	class _SymbolMatch(TypedDict):
61	file: str
62	symbol: str
63	kind: str
64	change: str
65	language: str
66
67	logger = logging.getLogger(__name__)
68
69	def _is_patch_op(op: DomainOp) -> TypeIs[PatchOp]:
70	"""Narrow op to :class:`~muse.domain.PatchOp` so mypy can see ``child_ops``."""
71	return op["op"] == "patch"
72
73	# ---------------------------------------------------------------------------
74	# Query AST types
75	# ---------------------------------------------------------------------------
76
77	CodeField = Literal[
78	"symbol", "file", "language", "kind", "change",
79	"author", "agent_id", "model_id", "toolchain_id",
80	"sem_ver_bump", "branch",
81	]
82
83	CodeOp = Literal["==", "!=", "contains", "startswith", "endswith"]
84
85	@dataclass(frozen=True)
86	class Comparison:
87	"""A single field OP value predicate."""
88
89	field: CodeField
90	op: CodeOp
91	value: str
92
93	@dataclass(frozen=True)
94	class AndExpr:
95	"""Conjunction of predicates (all must match)."""
96
97	clauses: list[Comparison]
98
99	@dataclass(frozen=True)
100	class OrExpr:
101	"""Disjunction of AND-expressions (any must match)."""
102
103	clauses: list[AndExpr]
104
105	# ---------------------------------------------------------------------------
106	# Tokeniser & parser
107	# ---------------------------------------------------------------------------
108
109	_TOKEN_RE = re.compile(
110	r"""
111	(?P<keyword>(?:or\|and\|contains\|startswith\|endswith)(?![A-Za-z0-9_.]))
112	\|(?P<op>==\|!=)
113	\|(?P<quoted>"[^"]"\|'[^']')
114	\|(?P<word>[A-Za-z_][A-Za-z0-9_.]*)
115	""",
116	re.VERBOSE,
117	)
118
119	_VALID_FIELDS: frozenset[str] = frozenset(get_args(CodeField))
120	_VALID_OPS: frozenset[str] = frozenset(get_args(CodeOp))
121
122	def _is_code_field(tok: str) -> TypeIs[CodeField]:
123	return tok in _VALID_FIELDS
124
125	def _is_code_op(tok: str) -> TypeIs[CodeOp]:
126	return tok in _VALID_OPS
127
128	def _as_code_field(tok: str) -> CodeField:
129	"""Validate and narrow tok to :data:`CodeField`; raises :exc:`ValueError` if invalid."""
130	if not _is_code_field(tok):
131	raise ValueError(f"Unknown field: {tok!r}. Valid: {sorted(_VALID_FIELDS)}")
132	return tok
133
134	def _as_code_op(tok: str) -> CodeOp:
135	"""Validate and narrow tok to :data:`CodeOp`; raises :exc:`ValueError` if invalid."""
136	if not _is_code_op(tok):
137	raise ValueError(f"Unknown operator: {tok!r}. Valid: {sorted(_VALID_OPS)}")
138	return tok
139
140	def _tokenize(query: str) -> list[str]:
141	return [m.group() for m in _TOKEN_RE.finditer(query)]
142
143	def _parse_query(query: str) -> OrExpr:
144	"""Parse a query string into an :class:`OrExpr` AST."""
145	tokens = _tokenize(query.strip())
146	pos = 0
147
148	def peek() -> str \| None:
149	return tokens[pos] if pos < len(tokens) else None
150
151	def consume() -> str:
152	nonlocal pos
153	tok = tokens[pos]
154	pos += 1
155	return tok
156
157	def parse_atom() -> Comparison:
158	field_tok = consume()
159	validated_field = _as_code_field(field_tok)
160	op_tok = consume()
161	validated_op = _as_code_op(op_tok)
162	val_tok = consume()
163	if val_tok.startswith(("'", '"')):
164	val_tok = val_tok[1:-1]
165	return Comparison(
166	field=validated_field,
167	op=validated_op,
168	value=val_tok,
169	)
170
171	def parse_and() -> AndExpr:
172	clauses: list[Comparison] = [parse_atom()]
173	while peek() == "and":
174	consume()
175	clauses.append(parse_atom())
176	return AndExpr(clauses=clauses)
177
178	def parse_or() -> OrExpr:
179	clauses: list[AndExpr] = [parse_and()]
180	while peek() == "or":
181	consume()
182	clauses.append(parse_and())
183	return OrExpr(clauses=clauses)
184
185	return parse_or()
186
187	# ---------------------------------------------------------------------------
188	# Evaluator
189	# ---------------------------------------------------------------------------
190
191	def _match_op(actual: str, op: CodeOp, expected: str) -> bool:
192	"""Apply op to actual and expected strings (case-insensitive where sensible)."""
193	if op == "==":
194	return actual == expected
195	if op == "!=":
196	return actual != expected
197	if op == "contains":
198	return expected.lower() in actual.lower()
199	if op == "startswith":
200	return actual.lower().startswith(expected.lower())
201	# op == "endswith"
202	return actual.lower().endswith(expected.lower())
203
204	def _commit_matches_comparison(
205	comparison: Comparison,
206	commit: CommitRecord,
207	manifest: Manifest,
208	root: pathlib.Path,
209	symbol_matches: list[_SymbolMatch],
210	) -> bool:
211	"""Return True if commit + its symbols satisfy comparison.
212
213	For symbol/file/language/kind/change fields, each (symbol, file) pair
214	that matches is appended to symbol_matches for result detail.
215	The ``manifest`` argument is accepted to satisfy the
216	:data:`~muse.core.query_engine.CommitEvaluator` protocol but is unused —
217	all relevant data lives in ``commit.structured_delta``.
218	"""
219	f = comparison.field
220	op = comparison.op
221	v = comparison.value
222
223	# Commit-level fields — no delta traversal needed.
224	if f == "author":
225	return _match_op(commit.author, op, v)
226	if f == "agent_id":
227	return _match_op(commit.agent_id, op, v)
228	if f == "model_id":
229	return _match_op(commit.model_id, op, v)
230	if f == "toolchain_id":
231	return _match_op(commit.toolchain_id, op, v)
232	if f == "sem_ver_bump":
233	return _match_op(commit.sem_ver_bump, op, v)
234	if f == "branch":
235	return _match_op(commit.branch, op, v)
236
237	# Symbol/file-level fields — iterate the structured delta.
238	delta = commit.structured_delta
239	if delta is None:
240	return False
241
242	hit = False
243	for op_rec in delta.get("ops", []):
244	op_type: str = op_rec.get("op", "")
245	address: str = op_rec.get("address", "")
246
247	if "::" in address:
248	file_path, symbol_name = address.split("::", 1)
249	else:
250	file_path = address
251	symbol_name = ""
252
253	lang = language_of(file_path)
254	change_type = (
255	"added" if op_type == "insert"
256	else "removed" if op_type == "delete"
257	else "modified"
258	)
259
260	# PatchOps may carry child_ops for the symbols they modify.
261	child_ops: list[DomainOp] = op_rec["child_ops"] if _is_patch_op(op_rec) else []
262	all_ops: list[DomainOp] = [op_rec] + child_ops
263
264	for rec in all_ops:
265	rec_address: str = str(rec.get("address", address))
266	if "::" in rec_address:
267	rec_file, rec_symbol = rec_address.split("::", 1)
268	else:
269	rec_file = rec_address
270	rec_symbol = ""
271
272	rec_kind: str = str(rec.get("kind", ""))
273	rec_op_type: str = str(rec.get("op", ""))
274	rec_change = (
275	"added" if rec_op_type == "insert"
276	else "removed" if rec_op_type == "delete"
277	else "modified"
278	)
279
280	field_val = {
281	"symbol": rec_symbol or symbol_name,
282	"file": rec_file or file_path,
283	"language": lang,
284	"kind": rec_kind,
285	"change": rec_change or change_type,
286	}.get(f, "")
287
288	if _match_op(field_val, op, v):
289	hit = True
290	symbol_matches.append({
291	"file": rec_file or file_path,
292	"symbol": rec_symbol or symbol_name,
293	"kind": rec_kind,
294	"change": rec_change or change_type,
295	"language": lang,
296	})
297
298	return hit
299
300	def build_evaluator(query: str) -> CommitEvaluator:
301	"""Parse query and return a :data:`CommitEvaluator` for :func:`~muse.core.query_engine.walk_history`.
302
303	The evaluator is a single-pass closure: it evaluates each commit once,
304	collecting both the match result and the per-symbol detail in the same
305	traversal. Commit-level fields (``author``, ``agent_id``, etc.) produce
306	one :class:`~muse.core.query_engine.QueryMatch` per matching commit;
307	symbol-level fields produce one match per matching symbol (capped at 20
308	per commit to prevent runaway output on large deltas).
309
310	Args:
311	query: A query string in the code query DSL.
312
313	Returns:
314	A callable that can be passed to :func:`~muse.core.query_engine.walk_history`.
315
316	Raises:
317	ValueError: If the query cannot be parsed.
318	"""
319	ast = _parse_query(query)
320
321	def evaluator(
322	commit: CommitRecord,
323	manifest: Manifest,
324	root: pathlib.Path,
325	) -> list[QueryMatch]:
326	symbol_matches: list[_SymbolMatch] = []
327	or_matched = False
328
329	# First matching OR clause wins; we stop early.
330	for and_expr in ast.clauses:
331	clause_symbols: list[_SymbolMatch] = []
332	all_match = all(
333	_commit_matches_comparison(cmp, commit, manifest, root, clause_symbols)
334	for cmp in and_expr.clauses
335	)
336	if all_match:
337	symbol_matches.extend(clause_symbols)
338	or_matched = True
339	break
340
341	if not or_matched:
342	return []
343
344	matches: list[QueryMatch] = []
345
346	if symbol_matches:
347	# Symbol-level matches — one QueryMatch per symbol (capped at 20).
348	for sym in symbol_matches[:20]:
349	detail = sym.get("symbol") or sym.get("file", "?")
350	change = sym.get("change", "")
351	if change:
352	detail = f"{detail} ({change})"
353	m = QueryMatch(
354	commit_id=commit.commit_id,
355	author=commit.author,
356	committed_at=commit.committed_at.isoformat(),
357	branch=commit.branch,
358	detail=detail,
359	extra={k: v for k, v in sym.items()},
360	)
361	if commit.agent_id:
362	m["agent_id"] = commit.agent_id
363	matches.append(m)
364	else:
365	# Commit-level match (query touched only commit fields, or the
366	# matching OR clause was a commit-level clause in a mixed query).
367	m = QueryMatch(
368	commit_id=commit.commit_id,
369	author=commit.author,
370	committed_at=commit.committed_at.isoformat(),
371	branch=commit.branch,
372	detail=commit.message[:80],
373	extra={},
374	)
375	if commit.agent_id:
376	m["agent_id"] = commit.agent_id
377	if commit.model_id:
378	m["model_id"] = commit.model_id
379	matches.append(m)
380
381	return matches
382
383	return evaluator

File History 1 commit

sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 1 day ago

class _SymbolMatch

function _is_patch_op

class Comparison

class AndExpr

class OrExpr

function _is_code_field

function _is_code_op

function _as_code_field

function _as_code_op

function _tokenize

function _parse_query

function peek

function consume

function parse_atom

function parse_and

function parse_or

function _match_op

function _commit_matches_comparison

function build_evaluator

function evaluator

Pathmuse/plugins/code/_code_query.py

Lines383

Size12.8 KB

LangPython

Refsha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf

Object ID

sha256:d0166221864f4ace96db2dbe9f691d70b616378f51d6904491b2b0a4a7945b2f…

Last commit

sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf

chore: bump version to 0.2.0rc14

1 day ago

Quick links

Blame History