gabriel / muse public
detect_refactor.py python
452 lines 16.4 KB
Raw
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 2 days ago
1 """muse code detect-refactor -- semantic refactoring detection across commits.
2
3 This command is impossible in Git. Git sees every refactoring operation as
4 a diff of text lines. A function extracted into a helper module? Delete lines
5 here, add lines there -- no semantic connection. A class renamed? Every file
6 that imports it becomes a "modification". Muse understands *what actually
7 happened* at the symbol level.
8
9 ``muse code detect-refactor`` scans the commit range and classifies every
10 semantic operation into one of four refactoring categories:
11
12 ``rename``
13 A symbol kept its body but changed its name. Detected via a
14 ``renamed to <new_name>`` marker in the structured delta.
15
16 ``move``
17 A symbol moved to a different file without changing its content.
18 Detected via a ``moved to <file>`` marker in the structured delta.
19
20 ``signature``
21 A symbol's name and body are unchanged; only its parameter list or
22 return type changed.
23
24 ``implementation``
25 A symbol's signature is stable; its internal logic changed.
26
27 Output::
28
29 Semantic refactoring report
30 From: cb4afaed "Layer 2: add harmonic dimension"
31 To: a3f2c9e1 "Refactor: rename and move helpers"
32 ----------------------------------------------------------------------
33
34 RENAME src/utils.py::calculate_total
35 -> compute_total
36 commit a3f2c9e1 "Rename: improve naming clarity"
37
38 MOVE src/utils.py::compute_total
39 -> src/helpers.py::compute_total
40 commit 1d2e3faa "Move: extract helpers module"
41
42 SIGNATURE src/api.py::handle_request
43 parameters changed: (req, ctx) -> (request, context, timeout)
44 commit 4b5c6d7e "API: add timeout parameter"
45
46 IMPLEMENTATION src/core.py::process_batch
47 implementation changed (signature stable)
48 commit 8f9a0b1c "Perf: vectorise batch processing"
49
50 ----------------------------------------------------------------------
51 4 refactoring operation(s) detected
52 (1 implementation · 1 move · 1 rename · 1 signature)
53
54 Flags::
55
56 --from <ref>
57 Start of the commit range (exclusive). Default: initial commit.
58 Accepts a full or abbreviated commit SHA or a branch name.
59
60 --to <ref>
61 End of the commit range (inclusive). Default: HEAD.
62
63 --max <n>
64 Cap the number of commits inspected (default: 500). When hit,
65 a warning is shown; increase with --max to see the full range.
66
67 --kind <kind>
68 Filter to one category: implementation, move, rename, signature.
69
70 --json
71 Emit the full refactoring report as JSON::
72
73 {
74 "schema_version": "<version>",
75 "from": "<sha8> \\"message\\"",
76 "to": "<sha8> \\"message\\"",
77 "commits_scanned": 42,
78 "truncated": false,
79 "total": 4,
80 "events": [
81 {
82 "kind": "implementation",
83 "address": "src/core.py::process_batch",
84 "detail": "implementation changed ...",
85 "commit_id": "<sha256>",
86 "commit_message": "...",
87 "committed_at": "2026-03-14T..."
88 }
89 ]
90 }
91 """
92
93 import argparse
94 import json
95 import logging
96 import pathlib
97 import sys
98 from typing import TypedDict
99
100 from muse.core.envelope import EnvelopeJson, make_envelope
101 from muse.core.errors import ExitCode
102 from muse.core.repo import require_repo
103 from muse.core.refs import read_current_branch
104 from muse.core.commits import (
105 CommitRecord,
106 read_commit,
107 resolve_commit_ref,
108 )
109 from muse.core.timing import start_timer
110 from muse.domain import DomainOp
111 from muse.plugins.code._query import walk_commits_bfs
112 from muse.core.validation import clamp_int, sanitize_display
113
114 type _KindCounts = dict[str, int]
115 type _LabelMap = dict[str, str]
116 logger = logging.getLogger(__name__)
117
118 # ---------------------------------------------------------------------------
119 # Typed output shape
120 # ---------------------------------------------------------------------------
121
122 class _RefactorPayload(TypedDict):
123 from_ref: str
124 to_ref: str
125 commits_scanned: int
126 truncated: bool
127 total: int
128 events: list[_LabelMap]
129
130 class _RefactorOutputJson(_RefactorPayload, EnvelopeJson):
131 """Full wire shape for ``muse code detect-refactor --json``."""
132
133 _VALID_KINDS: frozenset[str] = frozenset({"rename", "move", "signature", "implementation"})
134
135 # ---------------------------------------------------------------------------
136 # Repository helpers
137 # ---------------------------------------------------------------------------
138
139 # ---------------------------------------------------------------------------
140 # Event classification
141 # ---------------------------------------------------------------------------
142
143 def _flat_child_ops(ops: list[DomainOp]) -> list[DomainOp]:
144 """Flatten PatchOp child_ops; return all leaf ops."""
145 result: list[DomainOp] = []
146 for op in ops:
147 if op["op"] == "patch":
148 result.extend(op["child_ops"])
149 else:
150 result.append(op)
151 return result
152
153 class RefactorEvent:
154 """A single detected refactoring event."""
155
156 __slots__ = ("kind", "address", "detail", "commit")
157
158 def __init__(
159 self,
160 kind: str,
161 address: str,
162 detail: str,
163 commit: CommitRecord,
164 ) -> None:
165 self.kind = kind
166 self.address = address
167 self.detail = detail
168 self.commit = commit
169
170 def to_dict(self) -> _LabelMap:
171 return {
172 "kind": self.kind,
173 "address": self.address,
174 "detail": self.detail,
175 "commit_id": self.commit.commit_id,
176 "commit_message": self.commit.message,
177 "committed_at": self.commit.committed_at.isoformat(),
178 }
179
180 def _classify_ops(commit: CommitRecord) -> list[RefactorEvent]:
181 """Extract refactoring events from *commit*'s structured delta.
182
183 Classification rules (checked in priority order):
184
185 1. ``renamed to <name>`` → rename
186 2. ``moved to <path>`` → move (on both replace and delete ops)
187 3. ``signature`` keyword → signature
188 4. ``implementation`` or ``modified`` keyword → implementation
189 5. ``reformatted`` → skipped (explicitly "no semantic change")
190 6. everything else → skipped (non-semantic or unrecognised)
191 """
192 events: list[RefactorEvent] = []
193 if commit.structured_delta is None:
194 return events
195
196 all_ops = _flat_child_ops(commit.structured_delta["ops"])
197
198 for op in all_ops:
199 address = op["address"]
200
201 if op["op"] == "delete":
202 content_summary = op.get("content_summary", "")
203 if "moved to" in content_summary:
204 target = content_summary.split("moved to")[-1].strip()
205 events.append(RefactorEvent(
206 kind="move",
207 address=address,
208 detail=f"→ {target}",
209 commit=commit,
210 ))
211
212 elif op["op"] == "replace":
213 new_summary: str = op.get("new_summary", "")
214 old_summary: str = op.get("old_summary", "")
215
216 if new_summary.startswith("renamed to "):
217 new_name = new_summary.removeprefix("renamed to ").strip()
218 events.append(RefactorEvent(
219 kind="rename",
220 address=address,
221 detail=f"→ {new_name}",
222 commit=commit,
223 ))
224 elif new_summary.startswith("moved to "):
225 target = new_summary.removeprefix("moved to ").strip()
226 events.append(RefactorEvent(
227 kind="move",
228 address=address,
229 detail=f"→ {target}",
230 commit=commit,
231 ))
232 elif "signature" in new_summary or "signature" in old_summary:
233 detail = new_summary or f"{address} signature changed"
234 events.append(RefactorEvent(
235 kind="signature",
236 address=address,
237 detail=detail,
238 commit=commit,
239 ))
240 elif "implementation" in new_summary or "modified" in new_summary:
241 # Both "implementation changed" and "(modified)" map to this.
242 events.append(RefactorEvent(
243 kind="implementation",
244 address=address,
245 detail=new_summary or "implementation changed",
246 commit=commit,
247 ))
248 elif "reformatted" in new_summary:
249 # Explicitly "no semantic change" — skip without noise.
250 pass
251
252 return events
253
254 # ---------------------------------------------------------------------------
255 # Output
256 # ---------------------------------------------------------------------------
257
258 _LABEL: _LabelMap = {
259 "rename": "RENAME ",
260 "move": "MOVE ",
261 "signature": "SIGNATURE ",
262 "implementation": "IMPLEMENTATION",
263 }
264
265 def _print_human(
266 events: list[RefactorEvent],
267 from_label: str,
268 to_label: str,
269 commits_scanned: int,
270 truncated: bool,
271 ) -> None:
272 print("\nSemantic refactoring report")
273 print(f"From: {from_label}")
274 print(f"To: {to_label}")
275 print("─" * 62)
276
277 if truncated:
278 print(
279 f"\n⚠️ Results may be incomplete — scanned {commits_scanned:,} commits "
280 "(use --max to increase the limit).",
281 )
282
283 if not events:
284 print("\n (no semantic refactoring detected in this range)")
285 return
286
287 for ev in events:
288 label = _LABEL.get(ev.kind, ev.kind.upper().ljust(14))
289 print(f"\n{label} {sanitize_display(ev.address)}")
290 print(f" {ev.detail}")
291 print(f' commit {ev.commit.commit_id} "{sanitize_display(ev.commit.message)}"')
292
293 print(f"\n{'─' * 62}")
294 kind_counts: _KindCounts = {}
295 for ev in events:
296 kind_counts[ev.kind] = kind_counts.get(ev.kind, 0) + 1
297 summary_parts = [f"{v} {k}" for k, v in sorted(kind_counts.items())]
298 print(f"{len(events)} refactoring operation(s) detected")
299 print(f"({' · '.join(summary_parts)})")
300
301 # ---------------------------------------------------------------------------
302 # Argument parser registration
303 # ---------------------------------------------------------------------------
304
305 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
306 """Register the detect-refactor subcommand."""
307 parser = subparsers.add_parser(
308 "detect-refactor",
309 help="Detect semantic refactoring operations across a commit range.",
310 description=__doc__,
311 formatter_class=argparse.RawDescriptionHelpFormatter,
312 )
313 parser.add_argument(
314 "--from", default=None, metavar="REF", dest="from_ref",
315 help="Start of range (exclusive). Default: initial commit.",
316 )
317 parser.add_argument(
318 "--to", default=None, metavar="REF", dest="to_ref",
319 help="End of range (inclusive). Default: HEAD.",
320 )
321 parser.add_argument(
322 "--max", default=500, type=int, metavar="N", dest="max_commits",
323 help="Maximum number of commits to inspect (default: 500).",
324 )
325 parser.add_argument(
326 "--kind", "-k", default=None, metavar="KIND", dest="kind_filter",
327 help="Filter to one category: implementation, move, rename, signature.",
328 )
329 parser.add_argument(
330 "--json", "-j", action="store_true", dest="json_out",
331 help="Emit the full refactoring report as JSON.",
332 )
333 parser.set_defaults(func=run)
334
335 # ---------------------------------------------------------------------------
336 # Command entry point
337 # ---------------------------------------------------------------------------
338
339 def run(args: argparse.Namespace) -> None:
340 """Detect semantic refactoring operations across a commit range.
341
342 Walks the commit DAG at AST level and classifies semantic events: symbol
343 renames, moves across files, splits, merges, and body-hash changes.
344 Unlike Git's heuristic ``--find-renames``, Muse tracks function identity
345 across commits so no event is hidden behind a merge commit.
346
347 Agent quickstart
348 ----------------
349 ::
350
351 muse code detect-refactor --json
352 muse code detect-refactor --from HEAD~20 --json
353 muse code detect-refactor --kind rename --json
354 muse code detect-refactor --from v1.0.0 --to v2.0.0 --json
355
356 JSON fields
357 -----------
358 from Start ref (exclusive).
359 to End ref (inclusive).
360 commits_scanned Number of commits walked.
361 truncated ``true`` if ``--max`` was reached before root.
362 total Total refactoring events detected.
363 events List of event objects: ``kind``, ``from_address``,
364 ``to_address``, ``commit_id``, ``committed_at``.
365
366 Exit codes
367 ----------
368 0 Analysis complete.
369 1 Invalid arguments or ref not found.
370 2 Not inside a Muse repository.
371 """
372 elapsed = start_timer()
373 from_ref: str | None = args.from_ref
374 to_ref: str | None = args.to_ref
375 max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits')
376 kind_filter: str | None = args.kind_filter
377 json_out: bool = args.json_out
378
379 # ── Input validation ──────────────────────────────────────────────────────
380
381 if kind_filter and kind_filter not in _VALID_KINDS:
382 print(
383 f"❌ Unknown kind '{kind_filter}'. "
384 f"Valid: {', '.join(sorted(_VALID_KINDS))}",
385 file=sys.stderr,
386 )
387 raise SystemExit(ExitCode.USER_ERROR)
388
389 if max_commits < 1:
390 print("❌ --max must be at least 1.", file=sys.stderr)
391 raise SystemExit(ExitCode.USER_ERROR)
392
393 # ── Repo / commit resolution ──────────────────────────────────────────────
394
395 root = require_repo()
396 branch = read_current_branch(root)
397
398 to_commit = resolve_commit_ref(root, branch, to_ref)
399 if to_commit is None:
400 label = to_ref or "HEAD"
401 print(f"❌ Commit '{label}' not found.", file=sys.stderr)
402 raise SystemExit(ExitCode.USER_ERROR)
403
404 from_commit_id: str | None = None
405 if from_ref is not None:
406 from_commit = resolve_commit_ref(root, branch, from_ref)
407 if from_commit is None:
408 print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr)
409 raise SystemExit(ExitCode.USER_ERROR)
410 from_commit_id = from_commit.commit_id
411
412 # ── DAG walk + classification ─────────────────────────────────────────────
413
414 commits, truncated = walk_commits_bfs(
415 root, to_commit.commit_id, max_commits=max_commits, stop_at_commit_id=from_commit_id
416 )
417
418 all_events: list[RefactorEvent] = []
419 for commit in commits:
420 evs = _classify_ops(commit)
421 if kind_filter:
422 evs = [e for e in evs if e.kind == kind_filter]
423 all_events.extend(evs)
424
425 # ── Labels ────────────────────────────────────────────────────────────────
426
427 if from_commit_id is not None:
428 _fc = read_commit(root, from_commit_id)
429 from_label = (
430 f'{from_commit_id} "{_fc.message}"'
431 if _fc is not None
432 else "initial commit"
433 )
434 else:
435 from_label = "initial commit"
436 to_label = f'{to_commit.commit_id} "{to_commit.message}"'
437
438 # ── Output ────────────────────────────────────────────────────────────────
439
440 if json_out:
441 print(json.dumps(_RefactorOutputJson(
442 **make_envelope(elapsed),
443 from_ref=from_label,
444 to_ref=to_label,
445 commits_scanned=len(commits),
446 truncated=truncated,
447 total=len(all_events),
448 events=[e.to_dict() for e in all_events],
449 )))
450 return
451
452 _print_human(all_events, from_label, to_label, len(commits), truncated)
File History 1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 2 days ago