languages.py
python
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e
chore: remove blob-debug test marker file
Sonnet 4.6
19 hours ago
| 1 | """muse code languages — language breakdown of the current snapshot. |
| 2 | |
| 3 | Shows the composition of the repository by programming language — |
| 4 | how many files, symbols, and which symbol kinds are present for |
| 5 | each language. |
| 6 | |
| 7 | By default import pseudo-symbols are excluded from counts so the |
| 8 | numbers reflect semantic code density (functions, classes, methods, |
| 9 | sections) rather than dependency volume. Pass ``--include-imports`` |
| 10 | to add them back. |
| 11 | |
| 12 | Pass ``--diff REF`` to see how language composition *changed* between |
| 13 | an earlier commit and the current snapshot (or ``--commit`` target). |
| 14 | |
| 15 | Usage:: |
| 16 | |
| 17 | muse code languages |
| 18 | muse code languages --commit a3f2c9 |
| 19 | muse code languages --diff a3f2c9 |
| 20 | muse code languages --diff a3f2c9 --commit 97fe523d |
| 21 | muse code languages --sort symbols |
| 22 | muse code languages --json |
| 23 | |
| 24 | Output:: |
| 25 | |
| 26 | Language breakdown — commit 97fe523d |
| 27 | |
| 28 | Python 378 files 12 347 symbols (fn: 1974 class: 969 method: 3908 var: 797) |
| 29 | Markdown 45 files 2 239 symbols (section: 1502 var: 737) |
| 30 | TOML 1 file 56 symbols (var: 56) |
| 31 | Shell 2 files 0 symbols |
| 32 | ──────────────────────────────────────────────────────────────────────────── |
| 33 | Total 437 files 14 642 symbols (6 languages) |
| 34 | |
| 35 | Diff output (--diff a3f2c9):: |
| 36 | |
| 37 | Language change — a3f2c9..97fe523d |
| 38 | |
| 39 | Python +12 files +382 symbols (+3.2%) |
| 40 | Markdown +1 file +14 symbols (+0.6%) |
| 41 | TOML (unchanged) |
| 42 | ──────────────────────────────────────────────────────────────────────────── |
| 43 | Net +13 files +396 symbols |
| 44 | """ |
| 45 | |
| 46 | import argparse |
| 47 | import json |
| 48 | import logging |
| 49 | import pathlib |
| 50 | import sys |
| 51 | from typing import TypedDict |
| 52 | |
| 53 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 54 | from muse.core.errors import ExitCode |
| 55 | from muse.core.repo import require_repo |
| 56 | from muse.core.timing import start_timer |
| 57 | from muse.core.types import Manifest |
| 58 | from muse.core.refs import read_current_branch |
| 59 | from muse.core.commits import resolve_commit_ref |
| 60 | from muse.core.snapshots import get_commit_snapshot_manifest |
| 61 | from muse.core.symbol_cache import SymbolCache, load_symbol_cache |
| 62 | from muse.plugins.code._query import language_of, symbols_for_snapshot |
| 63 | |
| 64 | logger = logging.getLogger(__name__) |
| 65 | |
| 66 | type LangCount = dict[str, int] # language → count (files or symbols) |
| 67 | type KindCounts = dict[str, int] # kind → count |
| 68 | type LangKinds = dict[str, KindCounts] # language → kind counts |
| 69 | _KindLabelMap = dict[str, str] |
| 70 | |
| 71 | # Kinds treated as import pseudo-symbols — excluded from default counts. |
| 72 | _IMPORT_KINDS: frozenset[str] = frozenset({"import"}) |
| 73 | |
| 74 | # Display order and labels for known kinds. |
| 75 | _KIND_LABEL: _KindLabelMap = { |
| 76 | "function": "fn", |
| 77 | "async_function": "fn~", |
| 78 | "class": "class", |
| 79 | "method": "method", |
| 80 | "async_method": "method~", |
| 81 | "section": "section", |
| 82 | "variable": "var", |
| 83 | "import": "import", |
| 84 | } |
| 85 | |
| 86 | _SORT_CHOICES = ("name", "files", "symbols") |
| 87 | |
| 88 | class _CommitSummaryDict(TypedDict): |
| 89 | commit_id: str |
| 90 | message: str |
| 91 | |
| 92 | class _LangEntry(TypedDict): |
| 93 | language: str |
| 94 | files: int |
| 95 | symbols: int |
| 96 | kinds: LangCount |
| 97 | |
| 98 | class _DiffEntry(TypedDict): |
| 99 | language: str |
| 100 | delta_files: int |
| 101 | delta_symbols: int |
| 102 | files_before: int |
| 103 | files_after: int |
| 104 | symbols_before: int |
| 105 | symbols_after: int |
| 106 | status: str # "added" | "removed" | "changed" | "unchanged" |
| 107 | |
| 108 | class _SnapshotOutputJson(EnvelopeJson): |
| 109 | """JSON envelope emitted by ``muse code languages --json`` (snapshot mode). |
| 110 | |
| 111 | Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`. |
| 112 | |
| 113 | Fields |
| 114 | ------ |
| 115 | commit Commit metadata dict (commit_id, message). |
| 116 | include_imports True when --include-imports was passed. |
| 117 | languages List of per-language entries (language, files, symbols, kinds). |
| 118 | """ |
| 119 | |
| 120 | commit: _CommitSummaryDict |
| 121 | include_imports: bool |
| 122 | languages: list[_LangEntry] |
| 123 | |
| 124 | class _DiffOutputJson(EnvelopeJson): |
| 125 | """JSON envelope emitted by ``muse code languages --diff REF --json``. |
| 126 | |
| 127 | Inherits the 6 standard envelope fields from :class:`~muse.core.envelope.EnvelopeJson`. |
| 128 | |
| 129 | Fields |
| 130 | ------ |
| 131 | from_commit Source commit metadata dict (commit_id, message). |
| 132 | to_commit Target commit metadata dict (commit_id, message). |
| 133 | include_imports True when --include-imports was passed. |
| 134 | diff List of per-language diff entries. |
| 135 | """ |
| 136 | |
| 137 | from_commit: _CommitSummaryDict |
| 138 | to_commit: _CommitSummaryDict |
| 139 | include_imports: bool |
| 140 | diff: list[_DiffEntry] |
| 141 | |
| 142 | def _first_line(message: str) -> str: |
| 143 | for line in message.splitlines(): |
| 144 | s = line.strip() |
| 145 | if s: |
| 146 | return s |
| 147 | return message.strip() |
| 148 | |
| 149 | def _collect_stats( |
| 150 | root: pathlib.Path, |
| 151 | manifest: Manifest, |
| 152 | include_imports: bool, |
| 153 | cache: SymbolCache | None, |
| 154 | ) -> tuple[LangCount, LangCount, LangKinds]: |
| 155 | """Return (lang_files, lang_symbols, lang_kinds) for a manifest. |
| 156 | |
| 157 | When *include_imports* is False, import pseudo-symbols are excluded from |
| 158 | symbol counts and kind breakdowns. |
| 159 | """ |
| 160 | sc = cache |
| 161 | symbol_map = symbols_for_snapshot(root, manifest, cache=sc) |
| 162 | |
| 163 | lang_files: LangCount = {} |
| 164 | lang_symbols: LangCount = {} |
| 165 | lang_kinds: LangKinds = {} |
| 166 | |
| 167 | for file_path in manifest: |
| 168 | lang = language_of(file_path) |
| 169 | lang_files[lang] = lang_files.get(lang, 0) + 1 |
| 170 | |
| 171 | for file_path, tree in symbol_map.items(): |
| 172 | lang = language_of(file_path) |
| 173 | kinds = lang_kinds.setdefault(lang, {}) |
| 174 | for rec in tree.values(): |
| 175 | kind: str = rec["kind"] |
| 176 | if not include_imports and kind in _IMPORT_KINDS: |
| 177 | continue |
| 178 | lang_symbols[lang] = lang_symbols.get(lang, 0) + 1 |
| 179 | kinds[kind] = kinds.get(kind, 0) + 1 |
| 180 | |
| 181 | return lang_files, lang_symbols, lang_kinds |
| 182 | |
| 183 | def _kind_str(kinds: LangCount) -> str: |
| 184 | """Format the kind breakdown as a parenthesised string.""" |
| 185 | parts: list[str] = [] |
| 186 | # Emit in canonical order for known kinds, then any remainder alphabetically. |
| 187 | seen: set[str] = set() |
| 188 | for k, label in _KIND_LABEL.items(): |
| 189 | if k in kinds: |
| 190 | parts.append(f"{label}: {kinds[k]}") |
| 191 | seen.add(k) |
| 192 | for k in sorted(kinds): |
| 193 | if k not in seen: |
| 194 | parts.append(f"{k}: {kinds[k]}") |
| 195 | return f" ({', '.join(parts)})" if parts else "" |
| 196 | |
| 197 | def _sorted_langs( |
| 198 | lang_files: LangCount, |
| 199 | lang_symbols: LangCount, |
| 200 | sort_by: str, |
| 201 | ) -> list[str]: |
| 202 | all_langs = list(lang_files) |
| 203 | if sort_by == "files": |
| 204 | all_langs.sort(key=lambda l: (-lang_files[l], l)) |
| 205 | elif sort_by == "symbols": |
| 206 | all_langs.sort(key=lambda l: (-lang_symbols.get(l, 0), l)) |
| 207 | else: |
| 208 | all_langs.sort() |
| 209 | return all_langs |
| 210 | |
| 211 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 212 | """Register the languages subcommand.""" |
| 213 | parser = subparsers.add_parser( |
| 214 | "languages", |
| 215 | help="Show the language composition of the repository.", |
| 216 | description=__doc__, |
| 217 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 218 | ) |
| 219 | parser.add_argument( |
| 220 | "--commit", "-c", |
| 221 | dest="ref", |
| 222 | default=None, |
| 223 | metavar="REF", |
| 224 | help="Commit to inspect (default: HEAD).", |
| 225 | ) |
| 226 | parser.add_argument( |
| 227 | "--diff", "-d", |
| 228 | dest="diff_ref", |
| 229 | default=None, |
| 230 | metavar="REF", |
| 231 | help="Show the language composition *change* from REF to --commit (or HEAD).", |
| 232 | ) |
| 233 | parser.add_argument( |
| 234 | "--sort", "-s", |
| 235 | dest="sort_by", |
| 236 | default="name", |
| 237 | choices=_SORT_CHOICES, |
| 238 | metavar="KEY", |
| 239 | help=f"Sort output by: {', '.join(_SORT_CHOICES)} (default: name).", |
| 240 | ) |
| 241 | parser.add_argument( |
| 242 | "--include-imports", |
| 243 | dest="include_imports", |
| 244 | action="store_true", |
| 245 | help="Include import pseudo-symbols in counts (excluded by default).", |
| 246 | ) |
| 247 | parser.add_argument( |
| 248 | "--json", "-j", |
| 249 | dest="json_out", |
| 250 | action="store_true", |
| 251 | help="Emit results as JSON.", |
| 252 | ) |
| 253 | parser.set_defaults(func=run) |
| 254 | |
| 255 | def run(args: argparse.Namespace) -> None: |
| 256 | """Show the language composition of the repository. |
| 257 | |
| 258 | Counts files and semantic symbols by programming language. Import |
| 259 | pseudo-symbols are excluded by default. Use ``--diff REF`` to see how the |
| 260 | breakdown changed between two commits — useful for sprint or release drift |
| 261 | analysis. |
| 262 | |
| 263 | Agent quickstart |
| 264 | ---------------- |
| 265 | :: |
| 266 | |
| 267 | muse languages --json |
| 268 | muse languages --ref HEAD~10 --json |
| 269 | muse languages --diff HEAD~20 --json |
| 270 | muse languages --sort symbols --json |
| 271 | |
| 272 | JSON fields (snapshot mode) |
| 273 | ---------------------------- |
| 274 | commit Commit metadata: ``commit_id``, ``message``. |
| 275 | languages List of language entries: ``language``, ``files``, |
| 276 | ``symbols``, ``kinds`` (by symbol kind). |
| 277 | |
| 278 | JSON fields (``--diff`` mode) |
| 279 | ------------------------------ |
| 280 | from Start commit metadata. |
| 281 | to End commit metadata. |
| 282 | diff List of language diff entries: ``language``, ``files_before``, |
| 283 | ``files_after``, ``symbols_before``, ``symbols_after``, ``status``. |
| 284 | |
| 285 | JSON envelope fields |
| 286 | -------------------- |
| 287 | exit_code 0 on success; non-zero on failure. |
| 288 | duration_ms Wall-clock time in milliseconds for the analysis. |
| 289 | |
| 290 | Exit codes |
| 291 | ---------- |
| 292 | 0 Analysis complete. |
| 293 | 1 Ref not found or invalid arguments. |
| 294 | 2 Not inside a Muse repository. |
| 295 | """ |
| 296 | elapsed = start_timer() |
| 297 | ref: str | None = args.ref |
| 298 | diff_ref: str | None = args.diff_ref |
| 299 | sort_by: str = args.sort_by |
| 300 | include_imports: bool = args.include_imports |
| 301 | json_out: bool = args.json_out |
| 302 | |
| 303 | root = require_repo() |
| 304 | branch = read_current_branch(root) |
| 305 | |
| 306 | commit_b = resolve_commit_ref(root, branch, ref) |
| 307 | if commit_b is None: |
| 308 | print(f"❌ Commit '{ref or 'HEAD'}' not found.", file=sys.stderr) |
| 309 | raise SystemExit(ExitCode.USER_ERROR) |
| 310 | |
| 311 | manifest_b: Manifest = get_commit_snapshot_manifest(root, commit_b.commit_id) or {} |
| 312 | |
| 313 | # Shared cache across all snapshot loads. |
| 314 | shared_cache = load_symbol_cache(root) |
| 315 | |
| 316 | files_b, syms_b, kinds_b = _collect_stats(root, manifest_b, include_imports, shared_cache) |
| 317 | |
| 318 | # ── diff mode ──────────────────────────────────────────────────────────── |
| 319 | if diff_ref is not None: |
| 320 | commit_a = resolve_commit_ref(root, branch, diff_ref) |
| 321 | if commit_a is None: |
| 322 | print(f"❌ Commit '{diff_ref}' not found.", file=sys.stderr) |
| 323 | raise SystemExit(ExitCode.USER_ERROR) |
| 324 | |
| 325 | manifest_a: Manifest = get_commit_snapshot_manifest(root, commit_a.commit_id) or {} |
| 326 | files_a, syms_a, _ = _collect_stats(root, manifest_a, include_imports, shared_cache) |
| 327 | |
| 328 | all_langs = sorted(set(files_a) | set(files_b)) |
| 329 | |
| 330 | if json_out: |
| 331 | entries: list[_DiffEntry] = [] |
| 332 | for lang in all_langs: |
| 333 | fa = files_a.get(lang, 0) |
| 334 | fb = files_b.get(lang, 0) |
| 335 | sa = syms_a.get(lang, 0) |
| 336 | sb = syms_b.get(lang, 0) |
| 337 | if fa == 0 and fb > 0: |
| 338 | status = "added" |
| 339 | elif fa > 0 and fb == 0: |
| 340 | status = "removed" |
| 341 | elif fa == fb and sa == sb: |
| 342 | status = "unchanged" |
| 343 | else: |
| 344 | status = "changed" |
| 345 | entries.append(_DiffEntry( |
| 346 | language=lang, |
| 347 | delta_files=fb - fa, |
| 348 | delta_symbols=sb - sa, |
| 349 | files_before=fa, |
| 350 | files_after=fb, |
| 351 | symbols_before=sa, |
| 352 | symbols_after=sb, |
| 353 | status=status, |
| 354 | )) |
| 355 | print(json.dumps(_DiffOutputJson( |
| 356 | **make_envelope(elapsed), |
| 357 | from_commit={"commit_id": commit_a.commit_id, "message": _first_line(commit_a.message)}, |
| 358 | to_commit={"commit_id": commit_b.commit_id, "message": _first_line(commit_b.message)}, |
| 359 | include_imports=include_imports, |
| 360 | diff=entries, |
| 361 | ))) |
| 362 | return |
| 363 | |
| 364 | _print_diff( |
| 365 | commit_a.commit_id, commit_b.commit_id, |
| 366 | files_a, syms_a, files_b, syms_b, |
| 367 | all_langs, sort_by, |
| 368 | ) |
| 369 | return |
| 370 | |
| 371 | # ── snapshot mode ──────────────────────────────────────────────────────── |
| 372 | all_langs_snap = _sorted_langs(files_b, syms_b, sort_by) |
| 373 | |
| 374 | if json_out: |
| 375 | out: list[_LangEntry] = [ |
| 376 | _LangEntry( |
| 377 | language=lang, |
| 378 | files=files_b[lang], |
| 379 | symbols=syms_b.get(lang, 0), |
| 380 | kinds=kinds_b.get(lang, {}), |
| 381 | ) |
| 382 | for lang in all_langs_snap |
| 383 | ] |
| 384 | print(json.dumps(_SnapshotOutputJson( |
| 385 | **make_envelope(elapsed), |
| 386 | commit={"commit_id": commit_b.commit_id, "message": _first_line(commit_b.message)}, |
| 387 | include_imports=include_imports, |
| 388 | languages=out, |
| 389 | ))) |
| 390 | return |
| 391 | |
| 392 | _print_snapshot(commit_b.commit_id, files_b, syms_b, kinds_b, all_langs_snap) |
| 393 | |
| 394 | def _print_snapshot( |
| 395 | commit_id: str, |
| 396 | lang_files: LangCount, |
| 397 | lang_symbols: LangCount, |
| 398 | lang_kinds: LangKinds, |
| 399 | langs: list[str], |
| 400 | ) -> None: |
| 401 | print(f"\nLanguage breakdown — commit {commit_id}\n") |
| 402 | max_lang = max((len(l) for l in langs), default=8) |
| 403 | total_files = total_syms = 0 |
| 404 | for lang in langs: |
| 405 | files = lang_files[lang] |
| 406 | syms = lang_symbols.get(lang, 0) |
| 407 | total_files += files |
| 408 | total_syms += syms |
| 409 | kinds = lang_kinds.get(lang, {}) |
| 410 | ks = _kind_str(kinds) |
| 411 | file_label = "file " if files == 1 else "files" |
| 412 | print(f" {lang:<{max_lang}} {files:>4} {file_label} {syms:>6} symbols{ks}") |
| 413 | print(f" {'─' * 66}") |
| 414 | print( |
| 415 | f" {'Total':<{max_lang}} {total_files:>4} files {total_syms:>6} symbols" |
| 416 | f" ({len(langs)} languages)" |
| 417 | ) |
| 418 | |
| 419 | def _print_diff( |
| 420 | commit_id_a: str, |
| 421 | commit_id_b: str, |
| 422 | files_a: LangCount, |
| 423 | syms_a: LangCount, |
| 424 | files_b: LangCount, |
| 425 | syms_b: LangCount, |
| 426 | all_langs: list[str], |
| 427 | sort_by: str, |
| 428 | ) -> None: |
| 429 | print(f"\nLanguage change — {commit_id_a}..{commit_id_b}\n") |
| 430 | max_lang = max((len(l) for l in all_langs), default=8) |
| 431 | net_files = net_syms = 0 |
| 432 | |
| 433 | # Sort diff: by abs(delta_symbols) desc, then name. |
| 434 | def _sort_key(lang: str) -> tuple[int, int, str]: |
| 435 | sa = syms_a.get(lang, 0) |
| 436 | sb = syms_b.get(lang, 0) |
| 437 | if sort_by == "symbols": |
| 438 | return (0, -(abs(sb - sa)), lang) |
| 439 | if sort_by == "files": |
| 440 | fa = files_a.get(lang, 0) |
| 441 | fb = files_b.get(lang, 0) |
| 442 | return (0, -(abs(fb - fa)), lang) |
| 443 | return (0, 0, lang) |
| 444 | |
| 445 | sorted_langs = sorted(all_langs, key=_sort_key) |
| 446 | |
| 447 | for lang in sorted_langs: |
| 448 | fa = files_a.get(lang, 0) |
| 449 | fb = files_b.get(lang, 0) |
| 450 | sa = syms_a.get(lang, 0) |
| 451 | sb = syms_b.get(lang, 0) |
| 452 | df = fb - fa |
| 453 | ds = sb - sa |
| 454 | net_files += df |
| 455 | net_syms += ds |
| 456 | |
| 457 | if df == 0 and ds == 0: |
| 458 | print(f" {lang:<{max_lang}} (unchanged)") |
| 459 | continue |
| 460 | |
| 461 | status = "" |
| 462 | if fa == 0: |
| 463 | status = " (new)" |
| 464 | elif fb == 0: |
| 465 | status = " (removed)" |
| 466 | |
| 467 | df_str = f"{df:+d} {'file' if abs(df) == 1 else 'files'}" |
| 468 | pct = f" ({ds / sa * 100:+.1f}%)" if sa > 0 else (" (+∞)" if ds > 0 else "") |
| 469 | ds_str = f"{ds:+d} symbols{pct}" |
| 470 | print(f" {lang:<{max_lang}} {df_str:<14} {ds_str}{status}") |
| 471 | |
| 472 | print(f" {'─' * 66}") |
| 473 | ndf_str = f"{net_files:+d} {'file' if abs(net_files) == 1 else 'files'}" |
| 474 | nds_str = f"{net_syms:+d} symbols" |
| 475 | print(f" {'Net':<{max_lang}} {ndf_str:<14} {nds_str}") |
File History
7 commits
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e
chore: remove blob-debug test marker file
Sonnet 4.6
19 hours ago
sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8
chore: prebuild timing test
Sonnet 4.6
8 days ago
sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1
merge: pull staging/dev — advance to 0.2.0rc12
Sonnet 4.6
patch
9 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea
fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub …
Sonnet 4.6
21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
30 days ago