gabriel / muse public
hotspots.py python
445 lines 16.5 KB
Raw
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 13 hours ago
1 """muse code hotspots -- symbol churn leaderboard.
2
3 Walks the commit history and counts how many commits touched each symbol.
4 High churn = instability signal. The functions that change most are the
5 ones that need the most attention -- refactoring targets, test coverage gaps,
6 or domain logic under active evolution.
7
8 Unlike file-level churn metrics, ``muse code hotspots`` operates at the
9 *symbol* level: a 5,000-line module with one unstable function shows that
10 function at the top, not the whole file.
11
12 Import pseudo-symbols (``::import::*``) are excluded by default because they
13 almost always reflect dependency management rather than logic churn. Pass
14 ``--include-imports`` to include them.
15
16 Usage::
17
18 muse code hotspots
19 muse code hotspots --top 20
20 muse code hotspots --kind function --language Python
21 muse code hotspots --from HEAD~30 --to HEAD
22 muse code hotspots --min 3 # only symbols that changed >= 3 times
23 muse code hotspots --json # machine-readable for agents
24
25 Output::
26
27 Symbol churn -- top 10 most-changed symbols
28 Commits analysed: 47
29
30 1 src/billing.py::compute_invoice_total 12 changes
31 2 src/api.py::handle_request 9 changes
32 3 src/auth.py::validate_token 7 changes
33 4 src/models.py::User.save 5 changes
34
35 High churn = instability signal.
36 """
37
38 import argparse
39 import json
40 import logging
41 import pathlib
42 import sys
43 from typing import TypedDict
44
45 from muse.core.envelope import EnvelopeJson, make_envelope
46 from muse.core.errors import ExitCode
47 from muse.core.repo import require_repo
48 from muse.core.timing import start_timer
49 from muse.core.refs import read_current_branch
50 from muse.core.commits import resolve_commit_ref
51 from muse.domain import DomainOp
52 from muse.plugins.code._query import (
53 dir_of,
54 flat_symbol_ops,
55 language_of,
56 normalise_language,
57 touched_directories,
58 walk_commits_bfs,
59 )
60
61 logger = logging.getLogger(__name__)
62
63 _DEFAULT_TOP = 20
64 _DEFAULT_MAX_COMMITS = 10_000
65
66 # Canonical kind names produced by Muse's AST parser summaries.
67 _KNOWN_KINDS: frozenset[str] = frozenset({
68 "function", "async_function", "class", "method", "async_method",
69 "variable", "import", "section", "rule",
70 })
71
72 from muse.core.validation import clamp_int, sanitize_display
73
74 type _IntMap = dict[str, int]
75 type _StrMap = dict[str, str]
76
77 class _HotspotsFilters(TypedDict, total=False):
78 kind: str | None
79 language: str | None
80 include_imports: bool
81 min_changes: int
82
83 class _HotspotEntry(TypedDict):
84 address: str
85 changes: int
86
87 # ---------------------------------------------------------------------------
88 # Typed output shape
89 # ---------------------------------------------------------------------------
90
91 class _HotspotsOutputJson(EnvelopeJson):
92 """JSON output for ``muse code hotspots --json``.
93
94 Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`.
95
96 Fields
97 ------
98 from_ref Exclusive start of the scanned commit range, or ``None``
99 for the initial commit (all history up to to_ref).
100 to_ref Inclusive end of the scanned commit range (branch name
101 when ``--to`` is omitted, i.e. the current branch tip).
102 commits_analysed Total number of commits walked during the BFS pass.
103 truncated True when the scan hit ``--max-commits`` before exhausting
104 history — results cover the most recent N commits only.
105 filters Active filter values dict: ``kind``, ``language``,
106 ``include_imports``, ``min_changes``.
107 hotspots Ranked list of ``{address, changes}`` dicts, highest churn
108 first (symbol that changed most times is at index 0).
109 """
110
111 from_ref: str | None
112 to_ref: str
113 commits_analysed: int
114 truncated: bool
115 filters: _HotspotsFilters
116 hotspots: list[_HotspotEntry]
117
118 def _kind_from_op(op: DomainOp) -> str:
119 """Extract the symbol kind from the op's summary fields.
120
121 ``replace`` ops carry the kind in ``old_summary`` as the first word::
122
123 "function _collect_paths (implementation)" → "function"
124
125 ``insert`` / ``delete`` ops carry it in ``content_summary`` as the
126 second word (after "added" / "removed")::
127
128 "added function test_fn L10–20" → "function"
129 "removed import json L5–5" → "import"
130 """
131 if op["op"] == "replace":
132 raw = op.get("old_summary")
133 summary: str = raw if isinstance(raw, str) else ""
134 parts = summary.split(None, 1)
135 if parts and parts[0] in _KNOWN_KINDS:
136 return parts[0]
137 else:
138 raw2 = op.get("content_summary")
139 summary2: str = raw2 if isinstance(raw2, str) else ""
140 parts2 = summary2.split()
141 if len(parts2) >= 2 and parts2[1] in _KNOWN_KINDS:
142 return parts2[1]
143 return ""
144
145 # ---------------------------------------------------------------------------
146 # Repository helpers
147 # ---------------------------------------------------------------------------
148
149 # ---------------------------------------------------------------------------
150 # Churn collection
151 # ---------------------------------------------------------------------------
152
153 def _collect_churn(
154 root: pathlib.Path,
155 to_commit_id: str,
156 from_commit_id: str | None,
157 kind_filter: str | None,
158 language_filter: str | None,
159 include_imports: bool,
160 max_commits: int,
161 ) -> tuple[_IntMap, int, bool]:
162 """Return ``(churn_counts, commits_analysed, truncated)``.
163
164 Uses a BFS walk that follows both ``parent_commit_id`` and
165 ``parent2_commit_id``, so events on merged feature branches are included.
166 """
167 commits, truncated = walk_commits_bfs(
168 root, to_commit_id, max_commits, stop_at_commit_id=from_commit_id
169 )
170 counts: _IntMap = {}
171 for commit in commits:
172 if commit.structured_delta is None:
173 continue
174 for op in flat_symbol_ops(commit.structured_delta["ops"]):
175 addr: str = op["address"]
176
177 # Exclude import pseudo-symbols unless requested.
178 if not include_imports and "::import::" in addr:
179 continue
180
181 file_path = addr.split("::")[0]
182 if language_filter and language_of(file_path) != language_filter:
183 continue
184
185 if kind_filter:
186 if _kind_from_op(op) != kind_filter:
187 continue
188
189 counts[addr] = counts.get(addr, 0) + 1
190
191 return counts, len(commits), truncated
192
193
194 def _collect_directory_churn(
195 root: pathlib.Path,
196 to_commit_id: str,
197 from_commit_id: str | None,
198 max_commits: int,
199 ) -> tuple[_IntMap, int, bool]:
200 """Return ``(dir_churn_counts, commits_analysed, truncated)``.
201
202 Counts how many commits touched each directory, rolling up symbol-level
203 ops to their parent directory. A commit counts once per directory even
204 if multiple files in that directory changed.
205 """
206 commits, truncated = walk_commits_bfs(
207 root, to_commit_id, max_commits, stop_at_commit_id=from_commit_id
208 )
209 counts: _IntMap = {}
210 for commit in commits:
211 if commit.structured_delta is None:
212 continue
213 dirs = touched_directories(commit.structured_delta["ops"])
214 for d in dirs:
215 counts[d] = counts.get(d, 0) + 1
216 return counts, len(commits), truncated
217
218
219 # ---------------------------------------------------------------------------
220 # Argument parser registration
221 # ---------------------------------------------------------------------------
222
223 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
224 """Register the hotspots subcommand."""
225 parser = subparsers.add_parser(
226 "hotspots",
227 help="Show the symbols that change most often — the churn leaderboard.",
228 description=__doc__,
229 formatter_class=argparse.RawDescriptionHelpFormatter,
230 )
231 parser.add_argument(
232 "--top", "-n", type=int, default=_DEFAULT_TOP, metavar="N", dest="top",
233 help=f"Number of symbols to show (default: {_DEFAULT_TOP}).",
234 )
235 parser.add_argument(
236 "--min", type=int, default=1, metavar="N", dest="min_changes",
237 help="Only show symbols that changed at least N times (default: 1).",
238 )
239 parser.add_argument(
240 "--kind", "-k", default=None, metavar="KIND", dest="kind_filter",
241 help="Restrict to symbols of this kind (function, class, method, …).",
242 )
243 parser.add_argument(
244 "--language", "-l", default=None, metavar="LANG", dest="language_filter",
245 help="Restrict to symbols from files of this language (case-insensitive).",
246 )
247 parser.add_argument(
248 "--include-imports", action="store_true", dest="include_imports",
249 help="Include import pseudo-symbols (excluded by default).",
250 )
251 parser.add_argument(
252 "--from", default=None, metavar="REF", dest="from_ref",
253 help="Exclusive start of the commit range (default: initial commit).",
254 )
255 parser.add_argument(
256 "--to", default=None, metavar="REF", dest="to_ref",
257 help="Inclusive end of the commit range (default: HEAD).",
258 )
259 parser.add_argument(
260 "--max-commits", type=int, default=_DEFAULT_MAX_COMMITS, metavar="N",
261 dest="max_commits",
262 help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).",
263 )
264 parser.add_argument(
265 "--granularity", default="symbol", choices=("symbol", "directory"),
266 metavar="LEVEL", dest="granularity",
267 help="Aggregation level: 'symbol' (default) or 'directory'.",
268 )
269 parser.add_argument(
270 "--json", "-j", action="store_true", dest="json_out",
271 help="Emit results as structured JSON.",
272 )
273 parser.set_defaults(func=run)
274
275 # ---------------------------------------------------------------------------
276 # Command entry point
277 # ---------------------------------------------------------------------------
278
279 def run(args: argparse.Namespace) -> None:
280 """Show the symbols that change most often — the churn leaderboard.
281
282 Walks the commit history (BFS, both merge parents) and counts how many
283 commits touched each symbol. High-churn symbols reveal instability that
284 file-level metrics miss: a stable file can contain a single burning
285 function. Use ``--from`` / ``--to`` to scope to a sprint or release.
286
287 Agent quickstart
288 ----------------
289 ::
290
291 muse code hotspots --json
292 muse code hotspots --top 20 --json
293 muse code hotspots --kind function --min 3 --json
294 muse code hotspots --from HEAD~50 --json
295
296 JSON fields
297 -----------
298 from_ref Start ref used (exclusive).
299 to_ref End ref used (inclusive).
300 commits_analysed Number of commits walked.
301 truncated ``true`` if ``--max-commits`` was reached.
302 filters Echo of filter arguments used.
303 hotspots Ranked list: ``address``, ``changes`` (commit count).
304
305 Exit codes
306 ----------
307 0 Analysis complete.
308 1 Invalid arguments or ref not found.
309 2 Not inside a Muse repository.
310 """
311 elapsed = start_timer()
312 top: int = clamp_int(args.top, 1, 10_000, 'top')
313 min_changes: int = clamp_int(args.min_changes, 0, 100000, 'min_changes')
314 kind_filter: str | None = args.kind_filter
315 language_filter: str | None = args.language_filter
316 include_imports: bool = args.include_imports
317 from_ref: str | None = args.from_ref
318 to_ref: str | None = args.to_ref
319 max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits')
320 json_out: bool = args.json_out
321 granularity: str = getattr(args, "granularity", "symbol")
322
323 # ── Validation ────────────────────────────────────────────────────────────
324
325 if top < 1:
326 print("❌ --top must be at least 1.", file=sys.stderr)
327 raise SystemExit(ExitCode.USER_ERROR)
328
329 if min_changes < 1:
330 print("❌ --min must be at least 1.", file=sys.stderr)
331 raise SystemExit(ExitCode.USER_ERROR)
332
333 if max_commits < 1:
334 print("❌ --max-commits must be at least 1.", file=sys.stderr)
335 raise SystemExit(ExitCode.USER_ERROR)
336
337 if language_filter is not None:
338 language_filter = normalise_language(language_filter)
339
340 # ── Repo / commit resolution ──────────────────────────────────────────────
341
342 root = require_repo()
343 branch = read_current_branch(root)
344
345 to_commit = resolve_commit_ref(root, branch, to_ref)
346 if to_commit is None:
347 if to_ref is not None:
348 print(f"❌ Commit '{to_ref}' not found.", file=sys.stderr)
349 raise SystemExit(ExitCode.USER_ERROR)
350 # Empty repo — no commits yet; return empty result.
351 if json_out:
352 out = dict(_HotspotsOutputJson(
353 **make_envelope(elapsed),
354 from_ref=from_ref,
355 to_ref=branch,
356 commits_analysed=0,
357 truncated=False,
358 filters={
359 "kind": kind_filter,
360 "language": language_filter,
361 "include_imports": include_imports,
362 "min_changes": min_changes,
363 },
364 hotspots=[],
365 ))
366 out["granularity"] = granularity
367 print(json.dumps(out))
368 else:
369 noun = "directory" if granularity == "directory" else "symbol"
370 print(f" (no {noun}-level changes found — repository has no commits)")
371 return
372
373 from_commit_id: str | None = None
374 if from_ref is not None:
375 from_commit = resolve_commit_ref(root, branch, from_ref)
376 if from_commit is None:
377 print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr)
378 raise SystemExit(ExitCode.USER_ERROR)
379 from_commit_id = from_commit.commit_id
380
381 # ── Churn analysis ────────────────────────────────────────────────────────
382
383 if granularity == "directory":
384 counts, total_commits, truncated = _collect_directory_churn(
385 root, to_commit.commit_id, from_commit_id, max_commits,
386 )
387 else:
388 counts, total_commits, truncated = _collect_churn(
389 root, to_commit.commit_id, from_commit_id,
390 kind_filter, language_filter, include_imports, max_commits,
391 )
392
393 # Apply --min filter before ranking.
394 if min_changes > 1:
395 counts = {addr: n for addr, n in counts.items() if n >= min_changes}
396
397 ranked = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[:top]
398
399 # ── Output ────────────────────────────────────────────────────────────────
400
401 if json_out:
402 out = dict(_HotspotsOutputJson(
403 **make_envelope(elapsed),
404 from_ref=from_ref,
405 to_ref=to_ref or branch,
406 commits_analysed=total_commits,
407 truncated=truncated,
408 filters={
409 "kind": kind_filter,
410 "language": language_filter,
411 "include_imports": include_imports,
412 "min_changes": min_changes,
413 },
414 hotspots=[{"address": a, "changes": c} for a, c in ranked],
415 ))
416 out["granularity"] = granularity
417 print(json.dumps(out))
418 return
419
420 if not ranked:
421 noun = "directory" if granularity == "directory" else "symbol"
422 print(f" (no {noun}-level changes found in this range)")
423 return
424
425 filters_desc = ""
426 if kind_filter:
427 filters_desc += f" kind={kind_filter}"
428 if language_filter:
429 filters_desc += f" language={language_filter}"
430 if min_changes > 1:
431 filters_desc += f" min={min_changes}"
432
433 print(f"\nSymbol churn — top {len(ranked)} most-changed symbols{filters_desc}")
434 print(f"Commits analysed: {total_commits}", end="")
435 if truncated:
436 print(f" ⚠️ (capped at --max-commits {max_commits})", end="")
437 print("\n")
438
439 width = len(str(len(ranked)))
440 for rank, (addr, count) in enumerate(ranked, 1):
441 label = "change" if count == 1 else "changes"
442 print(f" {rank:>{width}} {sanitize_display(addr):<60} {count:>4} {label}")
443
444 print("")
445 print("High churn = instability signal. Consider refactoring or adding tests.")
File History 1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 13 hours ago