gabriel / muse public
languages.py python
475 lines 16.0 KB
Raw
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 19 hours ago
1 """muse code languages — language breakdown of the current snapshot.
2
3 Shows the composition of the repository by programming language —
4 how many files, symbols, and which symbol kinds are present for
5 each language.
6
7 By default import pseudo-symbols are excluded from counts so the
8 numbers reflect semantic code density (functions, classes, methods,
9 sections) rather than dependency volume. Pass ``--include-imports``
10 to add them back.
11
12 Pass ``--diff REF`` to see how language composition *changed* between
13 an earlier commit and the current snapshot (or ``--commit`` target).
14
15 Usage::
16
17 muse code languages
18 muse code languages --commit a3f2c9
19 muse code languages --diff a3f2c9
20 muse code languages --diff a3f2c9 --commit 97fe523d
21 muse code languages --sort symbols
22 muse code languages --json
23
24 Output::
25
26 Language breakdown — commit 97fe523d
27
28 Python 378 files 12 347 symbols (fn: 1974 class: 969 method: 3908 var: 797)
29 Markdown 45 files 2 239 symbols (section: 1502 var: 737)
30 TOML 1 file 56 symbols (var: 56)
31 Shell 2 files 0 symbols
32 ────────────────────────────────────────────────────────────────────────────
33 Total 437 files 14 642 symbols (6 languages)
34
35 Diff output (--diff a3f2c9)::
36
37 Language change — a3f2c9..97fe523d
38
39 Python +12 files +382 symbols (+3.2%)
40 Markdown +1 file +14 symbols (+0.6%)
41 TOML (unchanged)
42 ────────────────────────────────────────────────────────────────────────────
43 Net +13 files +396 symbols
44 """
45
46 import argparse
47 import json
48 import logging
49 import pathlib
50 import sys
51 from typing import TypedDict
52
53 from muse.core.envelope import EnvelopeJson, make_envelope
54 from muse.core.errors import ExitCode
55 from muse.core.repo import require_repo
56 from muse.core.timing import start_timer
57 from muse.core.types import Manifest
58 from muse.core.refs import read_current_branch
59 from muse.core.commits import resolve_commit_ref
60 from muse.core.snapshots import get_commit_snapshot_manifest
61 from muse.core.symbol_cache import SymbolCache, load_symbol_cache
62 from muse.plugins.code._query import language_of, symbols_for_snapshot
63
64 logger = logging.getLogger(__name__)
65
66 type LangCount = dict[str, int] # language → count (files or symbols)
67 type KindCounts = dict[str, int] # kind → count
68 type LangKinds = dict[str, KindCounts] # language → kind counts
69 _KindLabelMap = dict[str, str]
70
71 # Kinds treated as import pseudo-symbols — excluded from default counts.
72 _IMPORT_KINDS: frozenset[str] = frozenset({"import"})
73
74 # Display order and labels for known kinds.
75 _KIND_LABEL: _KindLabelMap = {
76 "function": "fn",
77 "async_function": "fn~",
78 "class": "class",
79 "method": "method",
80 "async_method": "method~",
81 "section": "section",
82 "variable": "var",
83 "import": "import",
84 }
85
86 _SORT_CHOICES = ("name", "files", "symbols")
87
88 class _CommitSummaryDict(TypedDict):
89 commit_id: str
90 message: str
91
92 class _LangEntry(TypedDict):
93 language: str
94 files: int
95 symbols: int
96 kinds: LangCount
97
98 class _DiffEntry(TypedDict):
99 language: str
100 delta_files: int
101 delta_symbols: int
102 files_before: int
103 files_after: int
104 symbols_before: int
105 symbols_after: int
106 status: str # "added" | "removed" | "changed" | "unchanged"
107
108 class _SnapshotOutputJson(EnvelopeJson):
109 """JSON envelope emitted by ``muse code languages --json`` (snapshot mode).
110
111 Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`.
112
113 Fields
114 ------
115 commit Commit metadata dict (commit_id, message).
116 include_imports True when --include-imports was passed.
117 languages List of per-language entries (language, files, symbols, kinds).
118 """
119
120 commit: _CommitSummaryDict
121 include_imports: bool
122 languages: list[_LangEntry]
123
124 class _DiffOutputJson(EnvelopeJson):
125 """JSON envelope emitted by ``muse code languages --diff REF --json``.
126
127 Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`.
128
129 Fields
130 ------
131 from_commit Source commit metadata dict (commit_id, message).
132 to_commit Target commit metadata dict (commit_id, message).
133 include_imports True when --include-imports was passed.
134 diff List of per-language diff entries.
135 """
136
137 from_commit: _CommitSummaryDict
138 to_commit: _CommitSummaryDict
139 include_imports: bool
140 diff: list[_DiffEntry]
141
142 def _first_line(message: str) -> str:
143 for line in message.splitlines():
144 s = line.strip()
145 if s:
146 return s
147 return message.strip()
148
149 def _collect_stats(
150 root: pathlib.Path,
151 manifest: Manifest,
152 include_imports: bool,
153 cache: SymbolCache | None,
154 ) -> tuple[LangCount, LangCount, LangKinds]:
155 """Return (lang_files, lang_symbols, lang_kinds) for a manifest.
156
157 When *include_imports* is False, import pseudo-symbols are excluded from
158 symbol counts and kind breakdowns.
159 """
160 sc = cache
161 symbol_map = symbols_for_snapshot(root, manifest, cache=sc)
162
163 lang_files: LangCount = {}
164 lang_symbols: LangCount = {}
165 lang_kinds: LangKinds = {}
166
167 for file_path in manifest:
168 lang = language_of(file_path)
169 lang_files[lang] = lang_files.get(lang, 0) + 1
170
171 for file_path, tree in symbol_map.items():
172 lang = language_of(file_path)
173 kinds = lang_kinds.setdefault(lang, {})
174 for rec in tree.values():
175 kind: str = rec["kind"]
176 if not include_imports and kind in _IMPORT_KINDS:
177 continue
178 lang_symbols[lang] = lang_symbols.get(lang, 0) + 1
179 kinds[kind] = kinds.get(kind, 0) + 1
180
181 return lang_files, lang_symbols, lang_kinds
182
183 def _kind_str(kinds: LangCount) -> str:
184 """Format the kind breakdown as a parenthesised string."""
185 parts: list[str] = []
186 # Emit in canonical order for known kinds, then any remainder alphabetically.
187 seen: set[str] = set()
188 for k, label in _KIND_LABEL.items():
189 if k in kinds:
190 parts.append(f"{label}: {kinds[k]}")
191 seen.add(k)
192 for k in sorted(kinds):
193 if k not in seen:
194 parts.append(f"{k}: {kinds[k]}")
195 return f" ({', '.join(parts)})" if parts else ""
196
197 def _sorted_langs(
198 lang_files: LangCount,
199 lang_symbols: LangCount,
200 sort_by: str,
201 ) -> list[str]:
202 all_langs = list(lang_files)
203 if sort_by == "files":
204 all_langs.sort(key=lambda l: (-lang_files[l], l))
205 elif sort_by == "symbols":
206 all_langs.sort(key=lambda l: (-lang_symbols.get(l, 0), l))
207 else:
208 all_langs.sort()
209 return all_langs
210
211 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
212 """Register the languages subcommand."""
213 parser = subparsers.add_parser(
214 "languages",
215 help="Show the language composition of the repository.",
216 description=__doc__,
217 formatter_class=argparse.RawDescriptionHelpFormatter,
218 )
219 parser.add_argument(
220 "--commit", "-c",
221 dest="ref",
222 default=None,
223 metavar="REF",
224 help="Commit to inspect (default: HEAD).",
225 )
226 parser.add_argument(
227 "--diff", "-d",
228 dest="diff_ref",
229 default=None,
230 metavar="REF",
231 help="Show the language composition *change* from REF to --commit (or HEAD).",
232 )
233 parser.add_argument(
234 "--sort", "-s",
235 dest="sort_by",
236 default="name",
237 choices=_SORT_CHOICES,
238 metavar="KEY",
239 help=f"Sort output by: {', '.join(_SORT_CHOICES)} (default: name).",
240 )
241 parser.add_argument(
242 "--include-imports",
243 dest="include_imports",
244 action="store_true",
245 help="Include import pseudo-symbols in counts (excluded by default).",
246 )
247 parser.add_argument(
248 "--json", "-j",
249 dest="json_out",
250 action="store_true",
251 help="Emit results as JSON.",
252 )
253 parser.set_defaults(func=run)
254
255 def run(args: argparse.Namespace) -> None:
256 """Show the language composition of the repository.
257
258 Counts files and semantic symbols by programming language. Import
259 pseudo-symbols are excluded by default. Use ``--diff REF`` to see how the
260 breakdown changed between two commits — useful for sprint or release drift
261 analysis.
262
263 Agent quickstart
264 ----------------
265 ::
266
267 muse languages --json
268 muse languages --ref HEAD~10 --json
269 muse languages --diff HEAD~20 --json
270 muse languages --sort symbols --json
271
272 JSON fields (snapshot mode)
273 ----------------------------
274 commit Commit metadata: ``commit_id``, ``message``.
275 languages List of language entries: ``language``, ``files``,
276 ``symbols``, ``kinds`` (by symbol kind).
277
278 JSON fields (``--diff`` mode)
279 ------------------------------
280 from Start commit metadata.
281 to End commit metadata.
282 diff List of language diff entries: ``language``, ``files_before``,
283 ``files_after``, ``symbols_before``, ``symbols_after``, ``status``.
284
285 JSON envelope fields
286 --------------------
287 exit_code 0 on success; non-zero on failure.
288 duration_ms Wall-clock time in milliseconds for the analysis.
289
290 Exit codes
291 ----------
292 0 Analysis complete.
293 1 Ref not found or invalid arguments.
294 2 Not inside a Muse repository.
295 """
296 elapsed = start_timer()
297 ref: str | None = args.ref
298 diff_ref: str | None = args.diff_ref
299 sort_by: str = args.sort_by
300 include_imports: bool = args.include_imports
301 json_out: bool = args.json_out
302
303 root = require_repo()
304 branch = read_current_branch(root)
305
306 commit_b = resolve_commit_ref(root, branch, ref)
307 if commit_b is None:
308 print(f"❌ Commit '{ref or 'HEAD'}' not found.", file=sys.stderr)
309 raise SystemExit(ExitCode.USER_ERROR)
310
311 manifest_b: Manifest = get_commit_snapshot_manifest(root, commit_b.commit_id) or {}
312
313 # Shared cache across all snapshot loads.
314 shared_cache = load_symbol_cache(root)
315
316 files_b, syms_b, kinds_b = _collect_stats(root, manifest_b, include_imports, shared_cache)
317
318 # ── diff mode ────────────────────────────────────────────────────────────
319 if diff_ref is not None:
320 commit_a = resolve_commit_ref(root, branch, diff_ref)
321 if commit_a is None:
322 print(f"❌ Commit '{diff_ref}' not found.", file=sys.stderr)
323 raise SystemExit(ExitCode.USER_ERROR)
324
325 manifest_a: Manifest = get_commit_snapshot_manifest(root, commit_a.commit_id) or {}
326 files_a, syms_a, _ = _collect_stats(root, manifest_a, include_imports, shared_cache)
327
328 all_langs = sorted(set(files_a) | set(files_b))
329
330 if json_out:
331 entries: list[_DiffEntry] = []
332 for lang in all_langs:
333 fa = files_a.get(lang, 0)
334 fb = files_b.get(lang, 0)
335 sa = syms_a.get(lang, 0)
336 sb = syms_b.get(lang, 0)
337 if fa == 0 and fb > 0:
338 status = "added"
339 elif fa > 0 and fb == 0:
340 status = "removed"
341 elif fa == fb and sa == sb:
342 status = "unchanged"
343 else:
344 status = "changed"
345 entries.append(_DiffEntry(
346 language=lang,
347 delta_files=fb - fa,
348 delta_symbols=sb - sa,
349 files_before=fa,
350 files_after=fb,
351 symbols_before=sa,
352 symbols_after=sb,
353 status=status,
354 ))
355 print(json.dumps(_DiffOutputJson(
356 **make_envelope(elapsed),
357 from_commit={"commit_id": commit_a.commit_id, "message": _first_line(commit_a.message)},
358 to_commit={"commit_id": commit_b.commit_id, "message": _first_line(commit_b.message)},
359 include_imports=include_imports,
360 diff=entries,
361 )))
362 return
363
364 _print_diff(
365 commit_a.commit_id, commit_b.commit_id,
366 files_a, syms_a, files_b, syms_b,
367 all_langs, sort_by,
368 )
369 return
370
371 # ── snapshot mode ────────────────────────────────────────────────────────
372 all_langs_snap = _sorted_langs(files_b, syms_b, sort_by)
373
374 if json_out:
375 out: list[_LangEntry] = [
376 _LangEntry(
377 language=lang,
378 files=files_b[lang],
379 symbols=syms_b.get(lang, 0),
380 kinds=kinds_b.get(lang, {}),
381 )
382 for lang in all_langs_snap
383 ]
384 print(json.dumps(_SnapshotOutputJson(
385 **make_envelope(elapsed),
386 commit={"commit_id": commit_b.commit_id, "message": _first_line(commit_b.message)},
387 include_imports=include_imports,
388 languages=out,
389 )))
390 return
391
392 _print_snapshot(commit_b.commit_id, files_b, syms_b, kinds_b, all_langs_snap)
393
394 def _print_snapshot(
395 commit_id: str,
396 lang_files: LangCount,
397 lang_symbols: LangCount,
398 lang_kinds: LangKinds,
399 langs: list[str],
400 ) -> None:
401 print(f"\nLanguage breakdown — commit {commit_id}\n")
402 max_lang = max((len(l) for l in langs), default=8)
403 total_files = total_syms = 0
404 for lang in langs:
405 files = lang_files[lang]
406 syms = lang_symbols.get(lang, 0)
407 total_files += files
408 total_syms += syms
409 kinds = lang_kinds.get(lang, {})
410 ks = _kind_str(kinds)
411 file_label = "file " if files == 1 else "files"
412 print(f" {lang:<{max_lang}} {files:>4} {file_label} {syms:>6} symbols{ks}")
413 print(f" {'─' * 66}")
414 print(
415 f" {'Total':<{max_lang}} {total_files:>4} files {total_syms:>6} symbols"
416 f" ({len(langs)} languages)"
417 )
418
419 def _print_diff(
420 commit_id_a: str,
421 commit_id_b: str,
422 files_a: LangCount,
423 syms_a: LangCount,
424 files_b: LangCount,
425 syms_b: LangCount,
426 all_langs: list[str],
427 sort_by: str,
428 ) -> None:
429 print(f"\nLanguage change — {commit_id_a}..{commit_id_b}\n")
430 max_lang = max((len(l) for l in all_langs), default=8)
431 net_files = net_syms = 0
432
433 # Sort diff: by abs(delta_symbols) desc, then name.
434 def _sort_key(lang: str) -> tuple[int, int, str]:
435 sa = syms_a.get(lang, 0)
436 sb = syms_b.get(lang, 0)
437 if sort_by == "symbols":
438 return (0, -(abs(sb - sa)), lang)
439 if sort_by == "files":
440 fa = files_a.get(lang, 0)
441 fb = files_b.get(lang, 0)
442 return (0, -(abs(fb - fa)), lang)
443 return (0, 0, lang)
444
445 sorted_langs = sorted(all_langs, key=_sort_key)
446
447 for lang in sorted_langs:
448 fa = files_a.get(lang, 0)
449 fb = files_b.get(lang, 0)
450 sa = syms_a.get(lang, 0)
451 sb = syms_b.get(lang, 0)
452 df = fb - fa
453 ds = sb - sa
454 net_files += df
455 net_syms += ds
456
457 if df == 0 and ds == 0:
458 print(f" {lang:<{max_lang}} (unchanged)")
459 continue
460
461 status = ""
462 if fa == 0:
463 status = " (new)"
464 elif fb == 0:
465 status = " (removed)"
466
467 df_str = f"{df:+d} {'file' if abs(df) == 1 else 'files'}"
468 pct = f" ({ds / sa * 100:+.1f}%)" if sa > 0 else (" (+∞)" if ds > 0 else "")
469 ds_str = f"{ds:+d} symbols{pct}"
470 print(f" {lang:<{max_lang}} {df_str:<14} {ds_str}{status}")
471
472 print(f" {'─' * 66}")
473 ndf_str = f"{net_files:+d} {'file' if abs(net_files) == 1 else 'files'}"
474 nds_str = f"{net_syms:+d} symbols"
475 print(f" {'Net':<{max_lang}} {ndf_str:<14} {nds_str}")
File History 7 commits
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 19 hours ago
sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8 chore: prebuild timing test Sonnet 4.6 8 days ago
sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1 merge: pull staging/dev — advance to 0.2.0rc12 Sonnet 4.6 patch 9 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago