detect_refactor.py
python
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
2 days ago
| 1 | """muse code detect-refactor -- semantic refactoring detection across commits. |
| 2 | |
| 3 | This command is impossible in Git. Git sees every refactoring operation as |
| 4 | a diff of text lines. A function extracted into a helper module? Delete lines |
| 5 | here, add lines there -- no semantic connection. A class renamed? Every file |
| 6 | that imports it becomes a "modification". Muse understands *what actually |
| 7 | happened* at the symbol level. |
| 8 | |
| 9 | ``muse code detect-refactor`` scans the commit range and classifies every |
| 10 | semantic operation into one of four refactoring categories: |
| 11 | |
| 12 | ``rename`` |
| 13 | A symbol kept its body but changed its name. Detected via a |
| 14 | ``renamed to <new_name>`` marker in the structured delta. |
| 15 | |
| 16 | ``move`` |
| 17 | A symbol moved to a different file without changing its content. |
| 18 | Detected via a ``moved to <file>`` marker in the structured delta. |
| 19 | |
| 20 | ``signature`` |
| 21 | A symbol's name and body are unchanged; only its parameter list or |
| 22 | return type changed. |
| 23 | |
| 24 | ``implementation`` |
| 25 | A symbol's signature is stable; its internal logic changed. |
| 26 | |
| 27 | Output:: |
| 28 | |
| 29 | Semantic refactoring report |
| 30 | From: cb4afaed "Layer 2: add harmonic dimension" |
| 31 | To: a3f2c9e1 "Refactor: rename and move helpers" |
| 32 | ---------------------------------------------------------------------- |
| 33 | |
| 34 | RENAME src/utils.py::calculate_total |
| 35 | -> compute_total |
| 36 | commit a3f2c9e1 "Rename: improve naming clarity" |
| 37 | |
| 38 | MOVE src/utils.py::compute_total |
| 39 | -> src/helpers.py::compute_total |
| 40 | commit 1d2e3faa "Move: extract helpers module" |
| 41 | |
| 42 | SIGNATURE src/api.py::handle_request |
| 43 | parameters changed: (req, ctx) -> (request, context, timeout) |
| 44 | commit 4b5c6d7e "API: add timeout parameter" |
| 45 | |
| 46 | IMPLEMENTATION src/core.py::process_batch |
| 47 | implementation changed (signature stable) |
| 48 | commit 8f9a0b1c "Perf: vectorise batch processing" |
| 49 | |
| 50 | ---------------------------------------------------------------------- |
| 51 | 4 refactoring operation(s) detected |
| 52 | (1 implementation · 1 move · 1 rename · 1 signature) |
| 53 | |
| 54 | Flags:: |
| 55 | |
| 56 | --from <ref> |
| 57 | Start of the commit range (exclusive). Default: initial commit. |
| 58 | Accepts a full or abbreviated commit SHA or a branch name. |
| 59 | |
| 60 | --to <ref> |
| 61 | End of the commit range (inclusive). Default: HEAD. |
| 62 | |
| 63 | --max <n> |
| 64 | Cap the number of commits inspected (default: 500). When hit, |
| 65 | a warning is shown; increase with --max to see the full range. |
| 66 | |
| 67 | --kind <kind> |
| 68 | Filter to one category: implementation, move, rename, signature. |
| 69 | |
| 70 | --json |
| 71 | Emit the full refactoring report as JSON:: |
| 72 | |
| 73 | { |
| 74 | "schema_version": "<version>", |
| 75 | "from": "<sha8> \\"message\\"", |
| 76 | "to": "<sha8> \\"message\\"", |
| 77 | "commits_scanned": 42, |
| 78 | "truncated": false, |
| 79 | "total": 4, |
| 80 | "events": [ |
| 81 | { |
| 82 | "kind": "implementation", |
| 83 | "address": "src/core.py::process_batch", |
| 84 | "detail": "implementation changed ...", |
| 85 | "commit_id": "<sha256>", |
| 86 | "commit_message": "...", |
| 87 | "committed_at": "2026-03-14T..." |
| 88 | } |
| 89 | ] |
| 90 | } |
| 91 | """ |
| 92 | |
| 93 | import argparse |
| 94 | import json |
| 95 | import logging |
| 96 | import pathlib |
| 97 | import sys |
| 98 | from typing import TypedDict |
| 99 | |
| 100 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 101 | from muse.core.errors import ExitCode |
| 102 | from muse.core.repo import require_repo |
| 103 | from muse.core.refs import read_current_branch |
| 104 | from muse.core.commits import ( |
| 105 | CommitRecord, |
| 106 | read_commit, |
| 107 | resolve_commit_ref, |
| 108 | ) |
| 109 | from muse.core.timing import start_timer |
| 110 | from muse.domain import DomainOp |
| 111 | from muse.plugins.code._query import walk_commits_bfs |
| 112 | from muse.core.validation import clamp_int, sanitize_display |
| 113 | |
| 114 | type _KindCounts = dict[str, int] |
| 115 | type _LabelMap = dict[str, str] |
| 116 | logger = logging.getLogger(__name__) |
| 117 | |
| 118 | # --------------------------------------------------------------------------- |
| 119 | # Typed output shape |
| 120 | # --------------------------------------------------------------------------- |
| 121 | |
| 122 | class _RefactorPayload(TypedDict): |
| 123 | from_ref: str |
| 124 | to_ref: str |
| 125 | commits_scanned: int |
| 126 | truncated: bool |
| 127 | total: int |
| 128 | events: list[_LabelMap] |
| 129 | |
| 130 | class _RefactorOutputJson(_RefactorPayload, EnvelopeJson): |
| 131 | """Full wire shape for ``muse code detect-refactor --json``.""" |
| 132 | |
| 133 | _VALID_KINDS: frozenset[str] = frozenset({"rename", "move", "signature", "implementation"}) |
| 134 | |
| 135 | # --------------------------------------------------------------------------- |
| 136 | # Repository helpers |
| 137 | # --------------------------------------------------------------------------- |
| 138 | |
| 139 | # --------------------------------------------------------------------------- |
| 140 | # Event classification |
| 141 | # --------------------------------------------------------------------------- |
| 142 | |
| 143 | def _flat_child_ops(ops: list[DomainOp]) -> list[DomainOp]: |
| 144 | """Flatten PatchOp child_ops; return all leaf ops.""" |
| 145 | result: list[DomainOp] = [] |
| 146 | for op in ops: |
| 147 | if op["op"] == "patch": |
| 148 | result.extend(op["child_ops"]) |
| 149 | else: |
| 150 | result.append(op) |
| 151 | return result |
| 152 | |
| 153 | class RefactorEvent: |
| 154 | """A single detected refactoring event.""" |
| 155 | |
| 156 | __slots__ = ("kind", "address", "detail", "commit") |
| 157 | |
| 158 | def __init__( |
| 159 | self, |
| 160 | kind: str, |
| 161 | address: str, |
| 162 | detail: str, |
| 163 | commit: CommitRecord, |
| 164 | ) -> None: |
| 165 | self.kind = kind |
| 166 | self.address = address |
| 167 | self.detail = detail |
| 168 | self.commit = commit |
| 169 | |
| 170 | def to_dict(self) -> _LabelMap: |
| 171 | return { |
| 172 | "kind": self.kind, |
| 173 | "address": self.address, |
| 174 | "detail": self.detail, |
| 175 | "commit_id": self.commit.commit_id, |
| 176 | "commit_message": self.commit.message, |
| 177 | "committed_at": self.commit.committed_at.isoformat(), |
| 178 | } |
| 179 | |
| 180 | def _classify_ops(commit: CommitRecord) -> list[RefactorEvent]: |
| 181 | """Extract refactoring events from *commit*'s structured delta. |
| 182 | |
| 183 | Classification rules (checked in priority order): |
| 184 | |
| 185 | 1. ``renamed to <name>`` → rename |
| 186 | 2. ``moved to <path>`` → move (on both replace and delete ops) |
| 187 | 3. ``signature`` keyword → signature |
| 188 | 4. ``implementation`` or ``modified`` keyword → implementation |
| 189 | 5. ``reformatted`` → skipped (explicitly "no semantic change") |
| 190 | 6. everything else → skipped (non-semantic or unrecognised) |
| 191 | """ |
| 192 | events: list[RefactorEvent] = [] |
| 193 | if commit.structured_delta is None: |
| 194 | return events |
| 195 | |
| 196 | all_ops = _flat_child_ops(commit.structured_delta["ops"]) |
| 197 | |
| 198 | for op in all_ops: |
| 199 | address = op["address"] |
| 200 | |
| 201 | if op["op"] == "delete": |
| 202 | content_summary = op.get("content_summary", "") |
| 203 | if "moved to" in content_summary: |
| 204 | target = content_summary.split("moved to")[-1].strip() |
| 205 | events.append(RefactorEvent( |
| 206 | kind="move", |
| 207 | address=address, |
| 208 | detail=f"→ {target}", |
| 209 | commit=commit, |
| 210 | )) |
| 211 | |
| 212 | elif op["op"] == "replace": |
| 213 | new_summary: str = op.get("new_summary", "") |
| 214 | old_summary: str = op.get("old_summary", "") |
| 215 | |
| 216 | if new_summary.startswith("renamed to "): |
| 217 | new_name = new_summary.removeprefix("renamed to ").strip() |
| 218 | events.append(RefactorEvent( |
| 219 | kind="rename", |
| 220 | address=address, |
| 221 | detail=f"→ {new_name}", |
| 222 | commit=commit, |
| 223 | )) |
| 224 | elif new_summary.startswith("moved to "): |
| 225 | target = new_summary.removeprefix("moved to ").strip() |
| 226 | events.append(RefactorEvent( |
| 227 | kind="move", |
| 228 | address=address, |
| 229 | detail=f"→ {target}", |
| 230 | commit=commit, |
| 231 | )) |
| 232 | elif "signature" in new_summary or "signature" in old_summary: |
| 233 | detail = new_summary or f"{address} signature changed" |
| 234 | events.append(RefactorEvent( |
| 235 | kind="signature", |
| 236 | address=address, |
| 237 | detail=detail, |
| 238 | commit=commit, |
| 239 | )) |
| 240 | elif "implementation" in new_summary or "modified" in new_summary: |
| 241 | # Both "implementation changed" and "(modified)" map to this. |
| 242 | events.append(RefactorEvent( |
| 243 | kind="implementation", |
| 244 | address=address, |
| 245 | detail=new_summary or "implementation changed", |
| 246 | commit=commit, |
| 247 | )) |
| 248 | elif "reformatted" in new_summary: |
| 249 | # Explicitly "no semantic change" — skip without noise. |
| 250 | pass |
| 251 | |
| 252 | return events |
| 253 | |
| 254 | # --------------------------------------------------------------------------- |
| 255 | # Output |
| 256 | # --------------------------------------------------------------------------- |
| 257 | |
| 258 | _LABEL: _LabelMap = { |
| 259 | "rename": "RENAME ", |
| 260 | "move": "MOVE ", |
| 261 | "signature": "SIGNATURE ", |
| 262 | "implementation": "IMPLEMENTATION", |
| 263 | } |
| 264 | |
| 265 | def _print_human( |
| 266 | events: list[RefactorEvent], |
| 267 | from_label: str, |
| 268 | to_label: str, |
| 269 | commits_scanned: int, |
| 270 | truncated: bool, |
| 271 | ) -> None: |
| 272 | print("\nSemantic refactoring report") |
| 273 | print(f"From: {from_label}") |
| 274 | print(f"To: {to_label}") |
| 275 | print("─" * 62) |
| 276 | |
| 277 | if truncated: |
| 278 | print( |
| 279 | f"\n⚠️ Results may be incomplete — scanned {commits_scanned:,} commits " |
| 280 | "(use --max to increase the limit).", |
| 281 | ) |
| 282 | |
| 283 | if not events: |
| 284 | print("\n (no semantic refactoring detected in this range)") |
| 285 | return |
| 286 | |
| 287 | for ev in events: |
| 288 | label = _LABEL.get(ev.kind, ev.kind.upper().ljust(14)) |
| 289 | print(f"\n{label} {sanitize_display(ev.address)}") |
| 290 | print(f" {ev.detail}") |
| 291 | print(f' commit {ev.commit.commit_id} "{sanitize_display(ev.commit.message)}"') |
| 292 | |
| 293 | print(f"\n{'─' * 62}") |
| 294 | kind_counts: _KindCounts = {} |
| 295 | for ev in events: |
| 296 | kind_counts[ev.kind] = kind_counts.get(ev.kind, 0) + 1 |
| 297 | summary_parts = [f"{v} {k}" for k, v in sorted(kind_counts.items())] |
| 298 | print(f"{len(events)} refactoring operation(s) detected") |
| 299 | print(f"({' · '.join(summary_parts)})") |
| 300 | |
| 301 | # --------------------------------------------------------------------------- |
| 302 | # Argument parser registration |
| 303 | # --------------------------------------------------------------------------- |
| 304 | |
| 305 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 306 | """Register the detect-refactor subcommand.""" |
| 307 | parser = subparsers.add_parser( |
| 308 | "detect-refactor", |
| 309 | help="Detect semantic refactoring operations across a commit range.", |
| 310 | description=__doc__, |
| 311 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 312 | ) |
| 313 | parser.add_argument( |
| 314 | "--from", default=None, metavar="REF", dest="from_ref", |
| 315 | help="Start of range (exclusive). Default: initial commit.", |
| 316 | ) |
| 317 | parser.add_argument( |
| 318 | "--to", default=None, metavar="REF", dest="to_ref", |
| 319 | help="End of range (inclusive). Default: HEAD.", |
| 320 | ) |
| 321 | parser.add_argument( |
| 322 | "--max", default=500, type=int, metavar="N", dest="max_commits", |
| 323 | help="Maximum number of commits to inspect (default: 500).", |
| 324 | ) |
| 325 | parser.add_argument( |
| 326 | "--kind", "-k", default=None, metavar="KIND", dest="kind_filter", |
| 327 | help="Filter to one category: implementation, move, rename, signature.", |
| 328 | ) |
| 329 | parser.add_argument( |
| 330 | "--json", "-j", action="store_true", dest="json_out", |
| 331 | help="Emit the full refactoring report as JSON.", |
| 332 | ) |
| 333 | parser.set_defaults(func=run) |
| 334 | |
| 335 | # --------------------------------------------------------------------------- |
| 336 | # Command entry point |
| 337 | # --------------------------------------------------------------------------- |
| 338 | |
| 339 | def run(args: argparse.Namespace) -> None: |
| 340 | """Detect semantic refactoring operations across a commit range. |
| 341 | |
| 342 | Walks the commit DAG at AST level and classifies semantic events: symbol |
| 343 | renames, moves across files, splits, merges, and body-hash changes. |
| 344 | Unlike Git's heuristic ``--find-renames``, Muse tracks function identity |
| 345 | across commits so no event is hidden behind a merge commit. |
| 346 | |
| 347 | Agent quickstart |
| 348 | ---------------- |
| 349 | :: |
| 350 | |
| 351 | muse code detect-refactor --json |
| 352 | muse code detect-refactor --from HEAD~20 --json |
| 353 | muse code detect-refactor --kind rename --json |
| 354 | muse code detect-refactor --from v1.0.0 --to v2.0.0 --json |
| 355 | |
| 356 | JSON fields |
| 357 | ----------- |
| 358 | from Start ref (exclusive). |
| 359 | to End ref (inclusive). |
| 360 | commits_scanned Number of commits walked. |
| 361 | truncated ``true`` if ``--max`` was reached before root. |
| 362 | total Total refactoring events detected. |
| 363 | events List of event objects: ``kind``, ``from_address``, |
| 364 | ``to_address``, ``commit_id``, ``committed_at``. |
| 365 | |
| 366 | Exit codes |
| 367 | ---------- |
| 368 | 0 Analysis complete. |
| 369 | 1 Invalid arguments or ref not found. |
| 370 | 2 Not inside a Muse repository. |
| 371 | """ |
| 372 | elapsed = start_timer() |
| 373 | from_ref: str | None = args.from_ref |
| 374 | to_ref: str | None = args.to_ref |
| 375 | max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits') |
| 376 | kind_filter: str | None = args.kind_filter |
| 377 | json_out: bool = args.json_out |
| 378 | |
| 379 | # ── Input validation ────────────────────────────────────────────────────── |
| 380 | |
| 381 | if kind_filter and kind_filter not in _VALID_KINDS: |
| 382 | print( |
| 383 | f"❌ Unknown kind '{kind_filter}'. " |
| 384 | f"Valid: {', '.join(sorted(_VALID_KINDS))}", |
| 385 | file=sys.stderr, |
| 386 | ) |
| 387 | raise SystemExit(ExitCode.USER_ERROR) |
| 388 | |
| 389 | if max_commits < 1: |
| 390 | print("❌ --max must be at least 1.", file=sys.stderr) |
| 391 | raise SystemExit(ExitCode.USER_ERROR) |
| 392 | |
| 393 | # ── Repo / commit resolution ────────────────────────────────────────────── |
| 394 | |
| 395 | root = require_repo() |
| 396 | branch = read_current_branch(root) |
| 397 | |
| 398 | to_commit = resolve_commit_ref(root, branch, to_ref) |
| 399 | if to_commit is None: |
| 400 | label = to_ref or "HEAD" |
| 401 | print(f"❌ Commit '{label}' not found.", file=sys.stderr) |
| 402 | raise SystemExit(ExitCode.USER_ERROR) |
| 403 | |
| 404 | from_commit_id: str | None = None |
| 405 | if from_ref is not None: |
| 406 | from_commit = resolve_commit_ref(root, branch, from_ref) |
| 407 | if from_commit is None: |
| 408 | print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr) |
| 409 | raise SystemExit(ExitCode.USER_ERROR) |
| 410 | from_commit_id = from_commit.commit_id |
| 411 | |
| 412 | # ── DAG walk + classification ───────────────────────────────────────────── |
| 413 | |
| 414 | commits, truncated = walk_commits_bfs( |
| 415 | root, to_commit.commit_id, max_commits=max_commits, stop_at_commit_id=from_commit_id |
| 416 | ) |
| 417 | |
| 418 | all_events: list[RefactorEvent] = [] |
| 419 | for commit in commits: |
| 420 | evs = _classify_ops(commit) |
| 421 | if kind_filter: |
| 422 | evs = [e for e in evs if e.kind == kind_filter] |
| 423 | all_events.extend(evs) |
| 424 | |
| 425 | # ── Labels ──────────────────────────────────────────────────────────────── |
| 426 | |
| 427 | if from_commit_id is not None: |
| 428 | _fc = read_commit(root, from_commit_id) |
| 429 | from_label = ( |
| 430 | f'{from_commit_id} "{_fc.message}"' |
| 431 | if _fc is not None |
| 432 | else "initial commit" |
| 433 | ) |
| 434 | else: |
| 435 | from_label = "initial commit" |
| 436 | to_label = f'{to_commit.commit_id} "{to_commit.message}"' |
| 437 | |
| 438 | # ── Output ──────────────────────────────────────────────────────────────── |
| 439 | |
| 440 | if json_out: |
| 441 | print(json.dumps(_RefactorOutputJson( |
| 442 | **make_envelope(elapsed), |
| 443 | from_ref=from_label, |
| 444 | to_ref=to_label, |
| 445 | commits_scanned=len(commits), |
| 446 | truncated=truncated, |
| 447 | total=len(all_events), |
| 448 | events=[e.to_dict() for e in all_events], |
| 449 | ))) |
| 450 | return |
| 451 | |
| 452 | _print_human(all_events, from_label, to_label, len(commits), truncated) |
File History
1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
2 days ago