hotspots.py
python
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
13 hours ago
| 1 | """muse code hotspots -- symbol churn leaderboard. |
| 2 | |
| 3 | Walks the commit history and counts how many commits touched each symbol. |
| 4 | High churn = instability signal. The functions that change most are the |
| 5 | ones that need the most attention -- refactoring targets, test coverage gaps, |
| 6 | or domain logic under active evolution. |
| 7 | |
| 8 | Unlike file-level churn metrics, ``muse code hotspots`` operates at the |
| 9 | *symbol* level: a 5,000-line module with one unstable function shows that |
| 10 | function at the top, not the whole file. |
| 11 | |
| 12 | Import pseudo-symbols (``::import::*``) are excluded by default because they |
| 13 | almost always reflect dependency management rather than logic churn. Pass |
| 14 | ``--include-imports`` to include them. |
| 15 | |
| 16 | Usage:: |
| 17 | |
| 18 | muse code hotspots |
| 19 | muse code hotspots --top 20 |
| 20 | muse code hotspots --kind function --language Python |
| 21 | muse code hotspots --from HEAD~30 --to HEAD |
| 22 | muse code hotspots --min 3 # only symbols that changed >= 3 times |
| 23 | muse code hotspots --json # machine-readable for agents |
| 24 | |
| 25 | Output:: |
| 26 | |
| 27 | Symbol churn -- top 10 most-changed symbols |
| 28 | Commits analysed: 47 |
| 29 | |
| 30 | 1 src/billing.py::compute_invoice_total 12 changes |
| 31 | 2 src/api.py::handle_request 9 changes |
| 32 | 3 src/auth.py::validate_token 7 changes |
| 33 | 4 src/models.py::User.save 5 changes |
| 34 | |
| 35 | High churn = instability signal. |
| 36 | """ |
| 37 | |
| 38 | import argparse |
| 39 | import json |
| 40 | import logging |
| 41 | import pathlib |
| 42 | import sys |
| 43 | from typing import TypedDict |
| 44 | |
| 45 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 46 | from muse.core.errors import ExitCode |
| 47 | from muse.core.repo import require_repo |
| 48 | from muse.core.timing import start_timer |
| 49 | from muse.core.refs import read_current_branch |
| 50 | from muse.core.commits import resolve_commit_ref |
| 51 | from muse.domain import DomainOp |
| 52 | from muse.plugins.code._query import ( |
| 53 | dir_of, |
| 54 | flat_symbol_ops, |
| 55 | language_of, |
| 56 | normalise_language, |
| 57 | touched_directories, |
| 58 | walk_commits_bfs, |
| 59 | ) |
| 60 | |
| 61 | logger = logging.getLogger(__name__) |
| 62 | |
| 63 | _DEFAULT_TOP = 20 |
| 64 | _DEFAULT_MAX_COMMITS = 10_000 |
| 65 | |
| 66 | # Canonical kind names produced by Muse's AST parser summaries. |
| 67 | _KNOWN_KINDS: frozenset[str] = frozenset({ |
| 68 | "function", "async_function", "class", "method", "async_method", |
| 69 | "variable", "import", "section", "rule", |
| 70 | }) |
| 71 | |
| 72 | from muse.core.validation import clamp_int, sanitize_display |
| 73 | |
| 74 | type _IntMap = dict[str, int] |
| 75 | type _StrMap = dict[str, str] |
| 76 | |
| 77 | class _HotspotsFilters(TypedDict, total=False): |
| 78 | kind: str | None |
| 79 | language: str | None |
| 80 | include_imports: bool |
| 81 | min_changes: int |
| 82 | |
| 83 | class _HotspotEntry(TypedDict): |
| 84 | address: str |
| 85 | changes: int |
| 86 | |
| 87 | # --------------------------------------------------------------------------- |
| 88 | # Typed output shape |
| 89 | # --------------------------------------------------------------------------- |
| 90 | |
| 91 | class _HotspotsOutputJson(EnvelopeJson): |
| 92 | """JSON output for ``muse code hotspots --json``. |
| 93 | |
| 94 | Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`. |
| 95 | |
| 96 | Fields |
| 97 | ------ |
| 98 | from_ref Exclusive start of the scanned commit range, or ``None`` |
| 99 | for the initial commit (all history up to to_ref). |
| 100 | to_ref Inclusive end of the scanned commit range (branch name |
| 101 | when ``--to`` is omitted, i.e. the current branch tip). |
| 102 | commits_analysed Total number of commits walked during the BFS pass. |
| 103 | truncated True when the scan hit ``--max-commits`` before exhausting |
| 104 | history — results cover the most recent N commits only. |
| 105 | filters Active filter values dict: ``kind``, ``language``, |
| 106 | ``include_imports``, ``min_changes``. |
| 107 | hotspots Ranked list of ``{address, changes}`` dicts, highest churn |
| 108 | first (symbol that changed most times is at index 0). |
| 109 | """ |
| 110 | |
| 111 | from_ref: str | None |
| 112 | to_ref: str |
| 113 | commits_analysed: int |
| 114 | truncated: bool |
| 115 | filters: _HotspotsFilters |
| 116 | hotspots: list[_HotspotEntry] |
| 117 | |
| 118 | def _kind_from_op(op: DomainOp) -> str: |
| 119 | """Extract the symbol kind from the op's summary fields. |
| 120 | |
| 121 | ``replace`` ops carry the kind in ``old_summary`` as the first word:: |
| 122 | |
| 123 | "function _collect_paths (implementation)" → "function" |
| 124 | |
| 125 | ``insert`` / ``delete`` ops carry it in ``content_summary`` as the |
| 126 | second word (after "added" / "removed"):: |
| 127 | |
| 128 | "added function test_fn L10–20" → "function" |
| 129 | "removed import json L5–5" → "import" |
| 130 | """ |
| 131 | if op["op"] == "replace": |
| 132 | raw = op.get("old_summary") |
| 133 | summary: str = raw if isinstance(raw, str) else "" |
| 134 | parts = summary.split(None, 1) |
| 135 | if parts and parts[0] in _KNOWN_KINDS: |
| 136 | return parts[0] |
| 137 | else: |
| 138 | raw2 = op.get("content_summary") |
| 139 | summary2: str = raw2 if isinstance(raw2, str) else "" |
| 140 | parts2 = summary2.split() |
| 141 | if len(parts2) >= 2 and parts2[1] in _KNOWN_KINDS: |
| 142 | return parts2[1] |
| 143 | return "" |
| 144 | |
| 145 | # --------------------------------------------------------------------------- |
| 146 | # Repository helpers |
| 147 | # --------------------------------------------------------------------------- |
| 148 | |
| 149 | # --------------------------------------------------------------------------- |
| 150 | # Churn collection |
| 151 | # --------------------------------------------------------------------------- |
| 152 | |
| 153 | def _collect_churn( |
| 154 | root: pathlib.Path, |
| 155 | to_commit_id: str, |
| 156 | from_commit_id: str | None, |
| 157 | kind_filter: str | None, |
| 158 | language_filter: str | None, |
| 159 | include_imports: bool, |
| 160 | max_commits: int, |
| 161 | ) -> tuple[_IntMap, int, bool]: |
| 162 | """Return ``(churn_counts, commits_analysed, truncated)``. |
| 163 | |
| 164 | Uses a BFS walk that follows both ``parent_commit_id`` and |
| 165 | ``parent2_commit_id``, so events on merged feature branches are included. |
| 166 | """ |
| 167 | commits, truncated = walk_commits_bfs( |
| 168 | root, to_commit_id, max_commits, stop_at_commit_id=from_commit_id |
| 169 | ) |
| 170 | counts: _IntMap = {} |
| 171 | for commit in commits: |
| 172 | if commit.structured_delta is None: |
| 173 | continue |
| 174 | for op in flat_symbol_ops(commit.structured_delta["ops"]): |
| 175 | addr: str = op["address"] |
| 176 | |
| 177 | # Exclude import pseudo-symbols unless requested. |
| 178 | if not include_imports and "::import::" in addr: |
| 179 | continue |
| 180 | |
| 181 | file_path = addr.split("::")[0] |
| 182 | if language_filter and language_of(file_path) != language_filter: |
| 183 | continue |
| 184 | |
| 185 | if kind_filter: |
| 186 | if _kind_from_op(op) != kind_filter: |
| 187 | continue |
| 188 | |
| 189 | counts[addr] = counts.get(addr, 0) + 1 |
| 190 | |
| 191 | return counts, len(commits), truncated |
| 192 | |
| 193 | |
| 194 | def _collect_directory_churn( |
| 195 | root: pathlib.Path, |
| 196 | to_commit_id: str, |
| 197 | from_commit_id: str | None, |
| 198 | max_commits: int, |
| 199 | ) -> tuple[_IntMap, int, bool]: |
| 200 | """Return ``(dir_churn_counts, commits_analysed, truncated)``. |
| 201 | |
| 202 | Counts how many commits touched each directory, rolling up symbol-level |
| 203 | ops to their parent directory. A commit counts once per directory even |
| 204 | if multiple files in that directory changed. |
| 205 | """ |
| 206 | commits, truncated = walk_commits_bfs( |
| 207 | root, to_commit_id, max_commits, stop_at_commit_id=from_commit_id |
| 208 | ) |
| 209 | counts: _IntMap = {} |
| 210 | for commit in commits: |
| 211 | if commit.structured_delta is None: |
| 212 | continue |
| 213 | dirs = touched_directories(commit.structured_delta["ops"]) |
| 214 | for d in dirs: |
| 215 | counts[d] = counts.get(d, 0) + 1 |
| 216 | return counts, len(commits), truncated |
| 217 | |
| 218 | |
| 219 | # --------------------------------------------------------------------------- |
| 220 | # Argument parser registration |
| 221 | # --------------------------------------------------------------------------- |
| 222 | |
| 223 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 224 | """Register the hotspots subcommand.""" |
| 225 | parser = subparsers.add_parser( |
| 226 | "hotspots", |
| 227 | help="Show the symbols that change most often — the churn leaderboard.", |
| 228 | description=__doc__, |
| 229 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 230 | ) |
| 231 | parser.add_argument( |
| 232 | "--top", "-n", type=int, default=_DEFAULT_TOP, metavar="N", dest="top", |
| 233 | help=f"Number of symbols to show (default: {_DEFAULT_TOP}).", |
| 234 | ) |
| 235 | parser.add_argument( |
| 236 | "--min", type=int, default=1, metavar="N", dest="min_changes", |
| 237 | help="Only show symbols that changed at least N times (default: 1).", |
| 238 | ) |
| 239 | parser.add_argument( |
| 240 | "--kind", "-k", default=None, metavar="KIND", dest="kind_filter", |
| 241 | help="Restrict to symbols of this kind (function, class, method, …).", |
| 242 | ) |
| 243 | parser.add_argument( |
| 244 | "--language", "-l", default=None, metavar="LANG", dest="language_filter", |
| 245 | help="Restrict to symbols from files of this language (case-insensitive).", |
| 246 | ) |
| 247 | parser.add_argument( |
| 248 | "--include-imports", action="store_true", dest="include_imports", |
| 249 | help="Include import pseudo-symbols (excluded by default).", |
| 250 | ) |
| 251 | parser.add_argument( |
| 252 | "--from", default=None, metavar="REF", dest="from_ref", |
| 253 | help="Exclusive start of the commit range (default: initial commit).", |
| 254 | ) |
| 255 | parser.add_argument( |
| 256 | "--to", default=None, metavar="REF", dest="to_ref", |
| 257 | help="Inclusive end of the commit range (default: HEAD).", |
| 258 | ) |
| 259 | parser.add_argument( |
| 260 | "--max-commits", type=int, default=_DEFAULT_MAX_COMMITS, metavar="N", |
| 261 | dest="max_commits", |
| 262 | help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).", |
| 263 | ) |
| 264 | parser.add_argument( |
| 265 | "--granularity", default="symbol", choices=("symbol", "directory"), |
| 266 | metavar="LEVEL", dest="granularity", |
| 267 | help="Aggregation level: 'symbol' (default) or 'directory'.", |
| 268 | ) |
| 269 | parser.add_argument( |
| 270 | "--json", "-j", action="store_true", dest="json_out", |
| 271 | help="Emit results as structured JSON.", |
| 272 | ) |
| 273 | parser.set_defaults(func=run) |
| 274 | |
| 275 | # --------------------------------------------------------------------------- |
| 276 | # Command entry point |
| 277 | # --------------------------------------------------------------------------- |
| 278 | |
| 279 | def run(args: argparse.Namespace) -> None: |
| 280 | """Show the symbols that change most often — the churn leaderboard. |
| 281 | |
| 282 | Walks the commit history (BFS, both merge parents) and counts how many |
| 283 | commits touched each symbol. High-churn symbols reveal instability that |
| 284 | file-level metrics miss: a stable file can contain a single burning |
| 285 | function. Use ``--from`` / ``--to`` to scope to a sprint or release. |
| 286 | |
| 287 | Agent quickstart |
| 288 | ---------------- |
| 289 | :: |
| 290 | |
| 291 | muse code hotspots --json |
| 292 | muse code hotspots --top 20 --json |
| 293 | muse code hotspots --kind function --min 3 --json |
| 294 | muse code hotspots --from HEAD~50 --json |
| 295 | |
| 296 | JSON fields |
| 297 | ----------- |
| 298 | from_ref Start ref used (exclusive). |
| 299 | to_ref End ref used (inclusive). |
| 300 | commits_analysed Number of commits walked. |
| 301 | truncated ``true`` if ``--max-commits`` was reached. |
| 302 | filters Echo of filter arguments used. |
| 303 | hotspots Ranked list: ``address``, ``changes`` (commit count). |
| 304 | |
| 305 | Exit codes |
| 306 | ---------- |
| 307 | 0 Analysis complete. |
| 308 | 1 Invalid arguments or ref not found. |
| 309 | 2 Not inside a Muse repository. |
| 310 | """ |
| 311 | elapsed = start_timer() |
| 312 | top: int = clamp_int(args.top, 1, 10_000, 'top') |
| 313 | min_changes: int = clamp_int(args.min_changes, 0, 100000, 'min_changes') |
| 314 | kind_filter: str | None = args.kind_filter |
| 315 | language_filter: str | None = args.language_filter |
| 316 | include_imports: bool = args.include_imports |
| 317 | from_ref: str | None = args.from_ref |
| 318 | to_ref: str | None = args.to_ref |
| 319 | max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits') |
| 320 | json_out: bool = args.json_out |
| 321 | granularity: str = getattr(args, "granularity", "symbol") |
| 322 | |
| 323 | # ── Validation ──────────────────────────────────────────────────────────── |
| 324 | |
| 325 | if top < 1: |
| 326 | print("❌ --top must be at least 1.", file=sys.stderr) |
| 327 | raise SystemExit(ExitCode.USER_ERROR) |
| 328 | |
| 329 | if min_changes < 1: |
| 330 | print("❌ --min must be at least 1.", file=sys.stderr) |
| 331 | raise SystemExit(ExitCode.USER_ERROR) |
| 332 | |
| 333 | if max_commits < 1: |
| 334 | print("❌ --max-commits must be at least 1.", file=sys.stderr) |
| 335 | raise SystemExit(ExitCode.USER_ERROR) |
| 336 | |
| 337 | if language_filter is not None: |
| 338 | language_filter = normalise_language(language_filter) |
| 339 | |
| 340 | # ── Repo / commit resolution ────────────────────────────────────────────── |
| 341 | |
| 342 | root = require_repo() |
| 343 | branch = read_current_branch(root) |
| 344 | |
| 345 | to_commit = resolve_commit_ref(root, branch, to_ref) |
| 346 | if to_commit is None: |
| 347 | if to_ref is not None: |
| 348 | print(f"❌ Commit '{to_ref}' not found.", file=sys.stderr) |
| 349 | raise SystemExit(ExitCode.USER_ERROR) |
| 350 | # Empty repo — no commits yet; return empty result. |
| 351 | if json_out: |
| 352 | out = dict(_HotspotsOutputJson( |
| 353 | **make_envelope(elapsed), |
| 354 | from_ref=from_ref, |
| 355 | to_ref=branch, |
| 356 | commits_analysed=0, |
| 357 | truncated=False, |
| 358 | filters={ |
| 359 | "kind": kind_filter, |
| 360 | "language": language_filter, |
| 361 | "include_imports": include_imports, |
| 362 | "min_changes": min_changes, |
| 363 | }, |
| 364 | hotspots=[], |
| 365 | )) |
| 366 | out["granularity"] = granularity |
| 367 | print(json.dumps(out)) |
| 368 | else: |
| 369 | noun = "directory" if granularity == "directory" else "symbol" |
| 370 | print(f" (no {noun}-level changes found — repository has no commits)") |
| 371 | return |
| 372 | |
| 373 | from_commit_id: str | None = None |
| 374 | if from_ref is not None: |
| 375 | from_commit = resolve_commit_ref(root, branch, from_ref) |
| 376 | if from_commit is None: |
| 377 | print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr) |
| 378 | raise SystemExit(ExitCode.USER_ERROR) |
| 379 | from_commit_id = from_commit.commit_id |
| 380 | |
| 381 | # ── Churn analysis ──────────────────────────────────────────────────────── |
| 382 | |
| 383 | if granularity == "directory": |
| 384 | counts, total_commits, truncated = _collect_directory_churn( |
| 385 | root, to_commit.commit_id, from_commit_id, max_commits, |
| 386 | ) |
| 387 | else: |
| 388 | counts, total_commits, truncated = _collect_churn( |
| 389 | root, to_commit.commit_id, from_commit_id, |
| 390 | kind_filter, language_filter, include_imports, max_commits, |
| 391 | ) |
| 392 | |
| 393 | # Apply --min filter before ranking. |
| 394 | if min_changes > 1: |
| 395 | counts = {addr: n for addr, n in counts.items() if n >= min_changes} |
| 396 | |
| 397 | ranked = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[:top] |
| 398 | |
| 399 | # ── Output ──────────────────────────────────────────────────────────────── |
| 400 | |
| 401 | if json_out: |
| 402 | out = dict(_HotspotsOutputJson( |
| 403 | **make_envelope(elapsed), |
| 404 | from_ref=from_ref, |
| 405 | to_ref=to_ref or branch, |
| 406 | commits_analysed=total_commits, |
| 407 | truncated=truncated, |
| 408 | filters={ |
| 409 | "kind": kind_filter, |
| 410 | "language": language_filter, |
| 411 | "include_imports": include_imports, |
| 412 | "min_changes": min_changes, |
| 413 | }, |
| 414 | hotspots=[{"address": a, "changes": c} for a, c in ranked], |
| 415 | )) |
| 416 | out["granularity"] = granularity |
| 417 | print(json.dumps(out)) |
| 418 | return |
| 419 | |
| 420 | if not ranked: |
| 421 | noun = "directory" if granularity == "directory" else "symbol" |
| 422 | print(f" (no {noun}-level changes found in this range)") |
| 423 | return |
| 424 | |
| 425 | filters_desc = "" |
| 426 | if kind_filter: |
| 427 | filters_desc += f" kind={kind_filter}" |
| 428 | if language_filter: |
| 429 | filters_desc += f" language={language_filter}" |
| 430 | if min_changes > 1: |
| 431 | filters_desc += f" min={min_changes}" |
| 432 | |
| 433 | print(f"\nSymbol churn — top {len(ranked)} most-changed symbols{filters_desc}") |
| 434 | print(f"Commits analysed: {total_commits}", end="") |
| 435 | if truncated: |
| 436 | print(f" ⚠️ (capped at --max-commits {max_commits})", end="") |
| 437 | print("\n") |
| 438 | |
| 439 | width = len(str(len(ranked))) |
| 440 | for rank, (addr, count) in enumerate(ranked, 1): |
| 441 | label = "change" if count == 1 else "changes" |
| 442 | print(f" {rank:>{width}} {sanitize_display(addr):<60} {count:>4} {label}") |
| 443 | |
| 444 | print("") |
| 445 | print("High churn = instability signal. Consider refactoring or adding tests.") |
File History
1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
13 hours ago