gabriel / muse public
stable.py python
367 lines 12.8 KB
Raw
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402 Merge branch 'dev' into main Human 20 days ago
1 """muse code stable — symbol stability leaderboard.
2
3 The inverse of ``muse code hotspots``. Finds the symbols that have been
4 unchanged the longest — your bedrock, the code you can safely build on.
5
6 A function that hasn't needed modification across 50 commits is either
7 perfectly written or perfectly scoped. Either way, it's stable. Build
8 your architecture around stable symbols.
9
10 Documentation file symbols (Markdown, TOML, YAML, JSON, plain text) are
11 excluded by default because they almost never appear in structured-delta ops
12 and would otherwise crowd out all code results. Pass ``--include-docs`` to
13 include them.
14
15 Import pseudo-symbols (``::import::*``) are excluded by default. Pass
16 ``--include-imports`` to include them.
17
18 Usage::
19
20 muse code stable
21 muse code stable --top 20
22 muse code stable --kind function --language Python
23 muse code stable --since v2.0.0 # stability window since a tag
24 muse code stable --json # machine-readable for agents
25
26 Output::
27
28 Symbol stability — top 10 most stable symbols
29 Commits analysed: 302
30
31 1 muse/core/store.py::content_hash unchanged for 302 commits (since first commit)
32 2 muse/core/store.py::sha256_bytes unchanged for 287 commits
33 3 muse/core/repo.py::require_repo unchanged for 241 commits
34
35 These are your bedrock. High stability = safe to build on.
36 """
37
38 import argparse
39 import json
40 import logging
41 import pathlib
42 import sys
43 from typing import TypedDict
44
45 from muse.core.envelope import EnvelopeJson, make_envelope
46 from muse.core.errors import ExitCode
47 from muse.core.repo import require_repo
48 from muse.core.refs import read_current_branch
49 from muse.core.commits import resolve_commit_ref
50 from muse.core.snapshots import get_commit_snapshot_manifest
51 from muse.core.timing import start_timer
52 from muse.plugins.code._query import (
53 flat_symbol_ops,
54 language_of,
55 normalise_language,
56 symbols_for_snapshot,
57 walk_commits_bfs,
58 )
59 from muse.core.validation import clamp_int, sanitize_display
60
61 class _StableEntry(TypedDict):
62 address: str
63 unchanged_for: int
64 since_start_of_range: bool
65
66 class _StableFilters(TypedDict):
67 top: int
68 kind: str | None
69 language: str | None
70 since: str | None
71 include_imports: bool
72 include_docs: bool
73 max_commits: int
74
75 class _StableJson(EnvelopeJson):
76 from_ref: str
77 to_ref: str
78 commits_analysed: int
79 truncated: bool
80 filters: _StableFilters
81 stable: list[_StableEntry]
82
83 type _IntMap = dict[str, int]
84 type _Filters = dict[str, str | int | bool | None]
85 type _StrMap = dict[str, str]
86 logger = logging.getLogger(__name__)
87
88 _DEFAULT_TOP = 20
89 _DEFAULT_MAX_COMMITS = 10_000
90
91 # Languages to exclude by default: documentation formats whose symbols are
92 # almost never touched by structured-delta ops and crowd out code results.
93 _DOC_LANGUAGES: frozenset[str] = frozenset({
94 "Markdown", "Text", "TOML", "YAML", "JSON", "reStructuredText",
95 })
96
97 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
98 """Register the stable subcommand.
99
100 Arguments
101 ---------
102 --top / -n N
103 Number of symbols to show (default 20).
104 --kind / -k KIND
105 Restrict to this symbol kind (function, class, method, …).
106 --language / -l LANG
107 Restrict to files of this language (case-insensitive).
108 --since REF
109 Only count commits from HEAD back to this ref.
110 --max-commits N
111 Maximum commits to scan (default 10 000).
112 --include-imports
113 Include import pseudo-symbols (excluded by default).
114 --include-docs
115 Include symbols from documentation files (excluded by default).
116 --json / -j
117 Emit machine-readable JSON with schema_version, exit_code,
118 duration_ms, and the full stability list.
119 """
120 parser = subparsers.add_parser(
121 "stable",
122 help="Show the symbols that have been unchanged the longest.",
123 description=__doc__,
124 formatter_class=argparse.RawDescriptionHelpFormatter,
125 )
126 parser.add_argument(
127 "--top",
128 type=int,
129 default=_DEFAULT_TOP,
130 metavar="N",
131 help=f"Number of symbols to show (default: {_DEFAULT_TOP}).",
132 )
133 parser.add_argument(
134 "--kind", "-k",
135 dest="kind_filter",
136 default=None,
137 metavar="KIND",
138 help="Restrict to symbols of this kind (function, class, method, …).",
139 )
140 parser.add_argument(
141 "--language", "-l",
142 dest="language_filter",
143 default=None,
144 metavar="LANG",
145 help="Restrict to symbols from files of this language (case-insensitive).",
146 )
147 parser.add_argument(
148 "--since",
149 dest="since_ref",
150 default=None,
151 metavar="REF",
152 help=(
153 "Only count commits reachable from HEAD back to this ref "
154 "(tag, commit SHA, or branch). Useful for 'stable since v2.0'."
155 ),
156 )
157 parser.add_argument(
158 "--max-commits",
159 type=int,
160 default=_DEFAULT_MAX_COMMITS,
161 metavar="N",
162 help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).",
163 )
164 parser.add_argument(
165 "--include-imports",
166 dest="include_imports",
167 action="store_true",
168 help="Include import pseudo-symbols (excluded by default).",
169 )
170 parser.add_argument(
171 "--include-docs",
172 dest="include_docs",
173 action="store_true",
174 help=(
175 "Include symbols from documentation files — Markdown, TOML, "
176 "YAML, JSON, plain text (excluded by default)."
177 ),
178 )
179 parser.add_argument(
180 "--json", "-j",
181 dest="json_out",
182 action="store_true",
183 help="Emit results as JSON.",
184 )
185 parser.set_defaults(func=run)
186
187 def run(args: argparse.Namespace) -> None:
188 """Show the symbols that have been unchanged the longest.
189
190 ``muse code stable`` is the complement of ``muse code hotspots``. It
191 identifies the bedrock of your codebase — the functions, classes, and
192 methods that have remained stable across the most commits. Symbols that
193 haven't needed modification reveal your stable API surface.
194
195 Agent quickstart::
196
197 muse code stable --json
198 muse code stable --top 20 --json
199 muse code stable --kind function --language Python --json
200 muse code stable --since v2.0.0 --json
201
202 JSON fields::
203
204 from_ref Starting boundary of the commit window (``--since`` ref or ``"(beginning)"``).
205 to_ref Branch or ref at HEAD.
206 commits_analysed Number of commits actually walked.
207 truncated ``true`` when ``--max-commits`` capped the walk.
208 filters Effective values of all filter arguments.
209 stable Ranked entries: ``address``, ``unchanged_for`` (int), ``since_start_of_range`` (bool).
210 muse_version Muse release that produced this output.
211 schema Envelope schema version (int).
212 exit_code ``0`` on success.
213 duration_ms Wall-clock milliseconds for the command.
214 timestamp ISO-8601 UTC timestamp of command completion.
215 warnings List of non-fatal advisory messages.
216
217 Exit codes::
218
219 0 Success.
220 1 User error (bad ref, bad filter).
221 """
222 elapsed = start_timer()
223 top: int = clamp_int(args.top, 1, 10000, 'top')
224 kind_filter: str | None = args.kind_filter
225 language_filter: str | None = args.language_filter
226 since_ref: str | None = args.since_ref
227 max_commits: int = clamp_int(args.max_commits, 1, 100000, 'max_commits')
228 include_imports: bool = args.include_imports
229 include_docs: bool = args.include_docs
230 json_out: bool = args.json_out
231
232 if top < 1:
233 print("❌ --top must be >= 1.", file=sys.stderr)
234 raise SystemExit(ExitCode.USER_ERROR)
235 if max_commits < 1:
236 print("❌ --max-commits must be >= 1.", file=sys.stderr)
237 raise SystemExit(ExitCode.USER_ERROR)
238
239 if language_filter is not None:
240 language_filter = normalise_language(language_filter)
241
242 if kind_filter is not None:
243 kind_filter = kind_filter.strip().lower()
244
245 root = require_repo()
246 branch = read_current_branch(root)
247
248 head_commit = resolve_commit_ref(root, branch, None)
249 if head_commit is None:
250 print("❌ No commits found.", file=sys.stderr)
251 raise SystemExit(ExitCode.USER_ERROR)
252
253 # Resolve optional --since boundary.
254 stop_at: str | None = None
255 if since_ref is not None:
256 since_commit = resolve_commit_ref(root, branch, since_ref)
257 if since_commit is None:
258 print(f"❌ Could not resolve --since ref: {since_ref!r}", file=sys.stderr)
259 raise SystemExit(ExitCode.USER_ERROR)
260 stop_at = since_commit.commit_id
261
262 # 1. Collect all symbols that exist in HEAD snapshot.
263 manifest = get_commit_snapshot_manifest(root, head_commit.commit_id) or {}
264 symbol_map = symbols_for_snapshot(
265 root, manifest, kind_filter=kind_filter, language_filter=language_filter
266 )
267
268 # Build the universe of symbol addresses to track, applying doc/import filters.
269 all_current_addrs: set[str] = set()
270 for file_path_str, tree in symbol_map.items():
271 file_lang = language_of(file_path_str)
272 if not include_docs and file_lang in _DOC_LANGUAGES:
273 continue
274 for addr in tree:
275 if not include_imports and "::import::" in addr:
276 continue
277 all_current_addrs.add(addr)
278
279 # 2. Walk commits newest-first via BFS (follows both parent_commit_id and
280 # parent2_commit_id so merged feature-branch commits are not missed).
281 commits, truncated = walk_commits_bfs(
282 root,
283 head_commit.commit_id,
284 max_commits=max_commits,
285 stop_at_commit_id=stop_at,
286 )
287 total_commits = len(commits)
288
289 # Record the last (newest) commit index at which each symbol was touched.
290 # Index 0 = most recent commit; index N = touched N commits ago.
291 # A symbol at index N means it has been unchanged for N commits since that touch.
292 last_touched: _IntMap = {}
293 for idx, commit in enumerate(commits):
294 if commit.structured_delta is None:
295 continue
296 for op in flat_symbol_ops(commit.structured_delta["ops"]):
297 addr = op["address"]
298 if addr in all_current_addrs and addr not in last_touched:
299 last_touched[addr] = idx
300
301 # 3. Compute stability for every tracked symbol.
302 # never touched → stable for total_commits (unchanged since first commit / window start).
303 # touched at index N → unchanged for N commits between touch and HEAD.
304 stability: list[tuple[str, int, bool]] = []
305 for addr in sorted(all_current_addrs):
306 touch_idx = last_touched.get(addr)
307 if touch_idx is None:
308 stability.append((addr, total_commits, True))
309 else:
310 stability.append((addr, touch_idx, False))
311
312 stability.sort(key=lambda t: t[1], reverse=True)
313 ranked = stability[:top]
314
315 if json_out:
316 filters: _Filters = {
317 "top": top,
318 "kind": kind_filter,
319 "language": language_filter,
320 "since": since_ref,
321 "include_imports": include_imports,
322 "include_docs": include_docs,
323 "max_commits": max_commits,
324 }
325 print(json.dumps(_StableJson(
326 **make_envelope(elapsed),
327 from_ref=since_ref or "(beginning)",
328 to_ref=branch,
329 commits_analysed=total_commits,
330 truncated=truncated,
331 filters=filters,
332 stable=[
333 _StableEntry(
334 address=a,
335 unchanged_for=s,
336 since_start_of_range=sf,
337 )
338 for a, s, sf in ranked
339 ],
340 )))
341 return
342
343 # Human-readable output.
344 filter_parts: list[str] = []
345 if kind_filter:
346 filter_parts.append(f"kind={kind_filter}")
347 if language_filter:
348 filter_parts.append(f"language={language_filter}")
349 if since_ref:
350 filter_parts.append(f"since={since_ref}")
351 filters_str = f" {' '.join(filter_parts)}" if filter_parts else ""
352 range_label = f"since {since_ref}" if since_ref else "across all history"
353
354 print(f"\nSymbol stability — top {len(ranked)} most stable symbols{filters_str}")
355 print(f"Commits analysed: {total_commits} ({range_label})")
356 if truncated:
357 print(f"⚠️ Scan capped at {max_commits} commits — pass --max-commits to extend.")
358 print("")
359
360 width = len(str(len(ranked)))
361 for rank, (addr, count, since_start) in enumerate(ranked, 1):
362 suffix = " (since start of range)" if since_start else ""
363 label = "commit" if count == 1 else "commits"
364 print(f" {rank:>{width}} {sanitize_display(addr):<60} unchanged for {count:>4} {label}{suffix}")
365
366 print("")
367 print("These are your bedrock. High stability = safe to build on.")
File History 2 commits
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402 Merge branch 'dev' into main Human 20 days ago
sha256:1c4b3e3a9a1f300774c3ee662b572a698d5fd405bf765a71e6011a2e9c3eaaaa feat: Muse — version control for the agent era Human 73 days ago