"""muse code age — evolutionary distance of every symbol. ``muse code stable`` answers "which symbols haven't changed in a long time?" ``muse code age`` answers "how much of a symbol's *original* implementation is still alive — and when was it last fundamentally rewritten?" These are different questions. A function that was created 8 months ago but has been completely rewritten 5 times has a *genetic age* of days, not months. Knowing this tells you: - Don't trust the creation date as a proxy for stability. - Don't assume institutional knowledge. The person who wrote the original may have left; the current implementation may have been authored by someone who joined last month. - Focus code review attention on symbols with many rewrites and few tests. Dimensions ---------- ``calendar_age`` Days since the symbol was first inserted into the repository. ``genetic_age`` Days since the symbol's implementation body last changed. If the body has never changed (0 rewrites), ``genetic_age == calendar_age``. A rename or signature change does **not** reset genetic age — only a full body rewrite does. ``impl_changes`` Total count of commits that changed the function body (replace ops whose summary indicates an implementation or body change). ``sig_changes`` Total count of commits that changed only the signature (type annotations, parameter names) without a body rewrite. ``renames`` Total count of rename operations recorded in the delta history. ``est_survival`` Estimated fraction of the original implementation still present: ``round(100 / (impl_changes + 1)) %``. 0 rewrites → 100 %. 1 rewrite → 50 %. 3 rewrites → 25 %. This is a conservative heuristic; the actual fraction requires diffing the original and current source bytes, which ``--explain`` does for Python files. Usage:: muse code age muse code age --top 30 muse code age --sort rewrites # most-rewritten first muse code age --sort calendar # oldest symbols first muse code age --sort genetic # oldest unmodified body first muse code age --sort survival # most original code remaining first muse code age --kind function muse code age --file billing.py muse code age --since v1.0 muse code age --explain billing.py::Invoice.compute_total muse code age --json Output:: Symbol evolutionary age — HEAD (304 commits · 2026-03-21) Sorted by: most rewrites first # ADDRESS BORN GENETIC REWRITES EST-SURV 1 muse/core/store.py::resolve_commit_ref 8 mo ago 3 wk ago 4 20 % 2 muse/core/errors.py::ExitCode 6 mo ago 6 mo ago 0 100 % 3 billing.py::Invoice.compute_total 3 mo ago 2 wk ago 3 25 % ⚠️ Symbols with high rewrites + low genetic age are the most evolved. They may carry little of their original design intent. --explain output:: Evolutionary age — billing.py::Invoice.compute_total Kind: method Born: 2026-01-12 (commit a3f2c9e1) — 87 days ago Last impl change: 2026-03-08 (commit 97fe52ab) — 13 days ago (genetic age) Last any change: 2026-03-20 (commit b1c2d3e4) — 1 day ago Implementation changes: 3 Signature changes: 1 Renames: 0 Est. survival: 25 % (3/4 original generations replaced) Change timeline (newest first): 2026-03-20 b1c2d3e4 signature changed 2026-03-08 97fe52ab implementation changed 2026-02-14 cc8d7a90 implementation changed 2026-01-29 55f2b0e1 implementation changed 2026-01-12 a3f2c9e1 created JSON output (--json):: { "ref": "HEAD", "as_of": "2026-03-21", "commits_analysed": 304, "truncated": false, "filters": { "kind": null, "file": null, "sort": "rewrites", "top": 20 }, "symbols": [ { "address": "billing.py::Invoice.compute_total", "kind": "method", "file": "billing.py", "born_commit": "a3f2c9e1...", "born_date": "2026-01-12", "last_impl_commit": "97fe52ab...", "last_impl_date": "2026-03-08", "last_change_commit": "b1c2d3e4...", "last_change_date": "2026-03-20", "calendar_age_days": 87, "genetic_age_days": 13, "impl_changes": 3, "sig_changes": 1, "renames": 0, "est_survival_pct": 25 } ] } """ import argparse import datetime import json import logging import pathlib import sys from dataclasses import dataclass, field from typing import Literal, TypedDict from muse.core.types import short_id from muse.core.errors import ExitCode from muse.core.repo import require_repo from muse.core.refs import read_current_branch from muse.core.commits import resolve_commit_ref from muse.core.snapshots import get_commit_snapshot_manifest from muse.core.symbol_cache import load_symbol_cache from muse.core.envelope import EnvelopeJson, make_envelope from muse.core.timing import start_timer from muse.domain import DomainOp from muse.plugins.code._query import ( flat_symbol_ops, symbols_for_snapshot, walk_commits_bfs, ) from muse.core.validation import clamp_int, sanitize_display logger = logging.getLogger(__name__) type _AccMap = dict[str, _Acc] type _KindMap = dict[str, str] # ── Constants ────────────────────────────────────────────────────────────────── _DEFAULT_TOP = 20 _DEFAULT_MAX_COMMITS = 10_000 SortKey = Literal["rewrites", "calendar", "genetic", "survival"] _SORT_CHOICES: tuple[SortKey, ...] = ("rewrites", "calendar", "genetic", "survival") _DEFAULT_SORT: SortKey = "rewrites" # Keywords in op summaries that indicate a body/implementation change. _IMPL_KEYWORDS: frozenset[str] = frozenset( {"implementation", "modified", "body", "reformatted"} ) # Signature-only change keywords (no body change). _SIG_KEYWORDS: frozenset[str] = frozenset({"signature"}) # Rename keywords. _RENAME_KEYWORDS: frozenset[str] = frozenset({"renamed", "moved"}) # ── Helpers ──────────────────────────────────────────────────────────────────── def _classify_op(op: DomainOp) -> Literal["create", "impl", "sig", "rename", "delete", "other"]: """Classify a symbol op into a change category. Returns one of: ``create``, ``impl``, ``sig``, ``rename``, ``delete``, ``other``. """ kind = op.get("op", "") if kind == "insert": return "create" if kind == "delete": return "delete" if kind != "replace": return "other" # For replace ops, check the summaries. new_sum: str = str(op.get("new_summary") or op.get("content_summary") or "").lower() old_sum: str = str(op.get("old_summary") or "").lower() if any(kw in new_sum for kw in _RENAME_KEYWORDS): return "rename" if any(kw in new_sum for kw in _SIG_KEYWORDS): return "sig" # Implementation changes show up in either summary field. if any(kw in new_sum for kw in _IMPL_KEYWORDS) or any(kw in old_sum for kw in _IMPL_KEYWORDS): return "impl" # Any other replace without a clear signal is treated as an impl change # (conservative: unknown changes are more likely to be body changes). return "impl" def _days_ago(dt: datetime.datetime, now: datetime.datetime) -> int: delta = now - dt.replace(tzinfo=None) if dt.tzinfo else now - dt return max(0, delta.days) def _human_delta(days: int) -> str: if days == 0: return "today" if days == 1: return "1 day ago" if days < 7: return f"{days} days ago" if days < 30: weeks = days // 7 return f"{weeks} wk ago" if days < 365: months = days // 30 return f"{months} mo ago" years = days // 365 rem_months = (days % 365) // 30 if rem_months: return f"{years} yr {rem_months} mo ago" return f"{years} yr ago" # ── Per-symbol accumulator ───────────────────────────────────────────────────── @dataclass class _Acc: """Running accumulator for one symbol's history.""" born_ts: datetime.datetime | None = None born_commit: str = "" last_change_ts: datetime.datetime | None = None last_change_commit: str = "" last_impl_ts: datetime.datetime | None = None last_impl_commit: str = "" impl_changes: int = 0 sig_changes: int = 0 renames: int = 0 # Ordered event log for --explain (each item is (ts, commit_id, label)). events: list[tuple[datetime.datetime, str, str]] = field(default_factory=list) def _update_acc(acc: _Acc, ts: datetime.datetime, commit_id: str, label: str) -> None: """Update running min/max and append to event log.""" # last change: keep maximum if acc.last_change_ts is None or ts > acc.last_change_ts: acc.last_change_ts = ts acc.last_change_commit = commit_id # born: keep minimum if acc.born_ts is None or ts < acc.born_ts: acc.born_ts = ts acc.born_commit = commit_id acc.events.append((ts, commit_id, label)) # ── TypedDict for output ─────────────────────────────────────────────────────── class _AgeRecord(TypedDict): address: str kind: str file: str born_commit: str born_date: str last_impl_commit: str last_impl_date: str last_change_commit: str last_change_date: str calendar_age_days: int genetic_age_days: int impl_changes: int sig_changes: int renames: int est_survival_pct: int class _AgeFiltersDict(TypedDict, total=False): kind: str | None file: str | None sort: str top: int since: str | None max_commits: int class _AgeEventDict(TypedDict): date: str commit: str label: str class _AgeListJson(EnvelopeJson): ref: str as_of: str commits_analysed: int truncated: bool filters: _AgeFiltersDict symbols: list[_AgeRecord] class _ExplainJson(EnvelopeJson): address: str kind: str born_commit: str born_date: str last_impl_commit: str last_impl_date: str last_change_commit: str last_change_date: str calendar_age_days: int genetic_age_days: int impl_changes: int sig_changes: int renames: int est_survival_pct: int events: list[_AgeEventDict] # ── Core algorithm ───────────────────────────────────────────────────────────── def _collect_age_data( root: pathlib.Path, head_commit_id: str, stop_at: str | None, max_commits: int, known_addresses: frozenset[str], ) -> tuple[_AccMap, int, bool]: """BFS walk — collect per-symbol history accumulators. Only tracks addresses in *known_addresses* (symbols present at HEAD). This avoids building an unbounded accumulator for deleted symbols. """ commits, truncated = walk_commits_bfs( root, head_commit_id, max_commits, stop_at_commit_id=stop_at ) accumulators: _AccMap = {} for commit in commits: if commit.structured_delta is None: continue ts = commit.committed_at cid = commit.commit_id ops: list[DomainOp] = commit.structured_delta["ops"] for op in flat_symbol_ops(ops): addr: str = op["address"] if "::import::" in addr: continue if addr not in known_addresses: continue category = _classify_op(op) if category == "other": continue acc = accumulators.setdefault(addr, _Acc()) if category == "create": _update_acc(acc, ts, cid, "created") # born is set by _update_acc's min logic elif category == "impl": _update_acc(acc, ts, cid, "implementation changed") acc.impl_changes += 1 if acc.last_impl_ts is None or ts > acc.last_impl_ts: acc.last_impl_ts = ts acc.last_impl_commit = cid elif category == "sig": _update_acc(acc, ts, cid, "signature changed") acc.sig_changes += 1 if acc.last_impl_ts is None or ts > acc.last_impl_ts: acc.last_impl_ts = ts acc.last_impl_commit = cid elif category == "rename": _update_acc(acc, ts, cid, "renamed / moved") acc.renames += 1 elif category == "delete": _update_acc(acc, ts, cid, "deleted") return accumulators, len(commits), truncated def _build_records( accumulators: _AccMap, symbol_kinds: _KindMap, # address → kind now: datetime.datetime, kind_filter: str | None, file_filter: str | None, sort_key: SortKey, top: int, ) -> list[_AgeRecord]: """Convert accumulators into sorted _AgeRecord list.""" records: list[_AgeRecord] = [] for address, acc in accumulators.items(): if acc.born_ts is None: continue # never seen a creation event — skip file_path = address.split("::")[0] kind = symbol_kinds.get(address, "unknown") if kind_filter and kind.lower() != kind_filter: continue if file_filter and not ( file_path == file_filter or file_path.endswith(f"/{file_filter}") ): continue born_ts = acc.born_ts # Genetic reset: the later of born date and last impl change. last_impl_ts = acc.last_impl_ts if acc.last_impl_ts is not None else born_ts last_change_ts = acc.last_change_ts if acc.last_change_ts is not None else born_ts calendar_age = _days_ago(born_ts, now) genetic_age = _days_ago(last_impl_ts, now) est_survival = round(100 / (acc.impl_changes + 1)) records.append(_AgeRecord( address=address, kind=kind, file=file_path, born_commit=acc.born_commit, born_date=born_ts.strftime("%Y-%m-%d"), last_impl_commit=acc.last_impl_commit if acc.last_impl_commit else acc.born_commit, last_impl_date=last_impl_ts.strftime("%Y-%m-%d"), last_change_commit=acc.last_change_commit, last_change_date=last_change_ts.strftime("%Y-%m-%d"), calendar_age_days=calendar_age, genetic_age_days=genetic_age, impl_changes=acc.impl_changes, sig_changes=acc.sig_changes, renames=acc.renames, est_survival_pct=est_survival, )) def _key(r: _AgeRecord) -> tuple[int, ...]: if sort_key == "rewrites": return (-r["impl_changes"], -r["calendar_age_days"]) if sort_key == "calendar": return (-r["calendar_age_days"], -r["impl_changes"]) if sort_key == "genetic": return (-r["genetic_age_days"], -r["impl_changes"]) # survival: lowest survival (most evolved) first return (r["est_survival_pct"], -r["impl_changes"]) records.sort(key=_key) return records[:top] # ── Formatters ───────────────────────────────────────────────────────────────── def _print_table( records: list[_AgeRecord], ref: str, commits_analysed: int, truncated: bool, since: str | None, sort_key: SortKey, now: datetime.datetime, ) -> None: sort_labels: dict[SortKey, str] = { "rewrites": "most rewrites first", "calendar": "oldest symbols first", "genetic": "oldest unmodified body first", "survival": "least original code first", } scope = f"{since}..{ref}" if since else ref trunc = " ⚠️ truncated" if truncated else "" print( f"\nSymbol evolutionary age — {scope}" f" ({commits_analysed} commits · {now.strftime('%Y-%m-%d')}{trunc})" ) print(f"Sorted by: {sort_labels[sort_key]}\n") if not records: print(" (no symbols with recorded history found)") return max_addr = max(len(r["address"]) for r in records) width = len(str(len(records))) hdr = ( f" {'#':>{width}} {'ADDRESS':<{max_addr}} " f"{'BORN':>10} {'GENETIC':>10} {'REWRITES':>8} {'EST-SURV':>8}" ) print(hdr) print(f" {'─' * (len(hdr) - 2)}") for i, r in enumerate(records, 1): born_str = _human_delta(r["calendar_age_days"]) gen_str = _human_delta(r["genetic_age_days"]) surv_str = f"{r['est_survival_pct']:>3} %" print( f" {i:>{width}} {r['address']:<{max_addr}} " f"{born_str:>10} {gen_str:>10} {r['impl_changes']:>8} {surv_str:>8}" ) print( "\n⚠️ High rewrites + low genetic age = symbol far from its original design.\n" " Consider adding integration tests or refactoring toward a stable contract." ) def _print_explain( address: str, acc: _Acc, kind: str, now: datetime.datetime, ) -> None: if acc.born_ts is None: print(f"\n(no creation event found for '{sanitize_display(address)}')") return born_ts = acc.born_ts last_impl_ts = acc.last_impl_ts if acc.last_impl_ts is not None else born_ts last_change_ts = acc.last_change_ts if acc.last_change_ts is not None else born_ts calendar_age = _days_ago(born_ts, now) genetic_age = _days_ago(last_impl_ts, now) last_change_age = _days_ago(last_change_ts, now) est_survival = round(100 / (acc.impl_changes + 1)) print(f"\nEvolutionary age — {sanitize_display(address)}\n") print(f" Kind: {kind}") print( f" Born: {born_ts.strftime('%Y-%m-%d')}" f" ({acc.born_commit}) — {_human_delta(calendar_age)}" ) print( f" Last impl change: {last_impl_ts.strftime('%Y-%m-%d')}" f" ({acc.last_impl_commit if acc.last_impl_commit else acc.born_commit}) — " f"{_human_delta(genetic_age)} (genetic age)" ) print( f" Last any change: {last_change_ts.strftime('%Y-%m-%d')}" f" ({acc.last_change_commit}) — {_human_delta(last_change_age)}" ) print() print(f" Implementation changes: {acc.impl_changes}") print(f" Signature changes: {acc.sig_changes}") print(f" Renames: {acc.renames}") print(f" Est. survival: {est_survival} % ({acc.impl_changes} generation(s) replaced)") # Timeline if acc.events: print("\n Change timeline (newest first):") sorted_events = sorted(acc.events, key=lambda e: e[0], reverse=True) for ev_ts, ev_cid, ev_label in sorted_events[:20]: print(f" {ev_ts.strftime('%Y-%m-%d')} {short_id(ev_cid)} {ev_label}") if len(sorted_events) > 20: print(f" … {len(sorted_events) - 20} older events") # ── CLI ──────────────────────────────────────────────────────────────────────── def register( subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]", ) -> None: """Register the age subcommand.""" parser = subparsers.add_parser( "age", help=( "Evolutionary distance of every symbol: how much original " "implementation remains, and when was it last rewritten." ), description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "--top", type=int, default=_DEFAULT_TOP, metavar="N", help=f"Number of symbols to show (default: {_DEFAULT_TOP}).", ) parser.add_argument( "--sort", choices=list(_SORT_CHOICES), default=_DEFAULT_SORT, metavar="KEY", help=( f"Sort order: rewrites (default), calendar, genetic, survival. " f"'rewrites' = most-rewritten first. " f"'calendar' = oldest symbols first. " f"'genetic' = oldest unmodified body first. " f"'survival' = least original code remaining first." ), ) parser.add_argument( "--kind", "-k", default=None, metavar="KIND", dest="kind_filter", help="Restrict to symbols of this kind (function, class, method, …).", ) parser.add_argument( "--file", default=None, metavar="FILE", dest="file_filter", help="Restrict to symbols in this file (accepts a path suffix).", ) parser.add_argument( "--since", "-s", default=None, metavar="REF", help="Limit analysis to commits reachable from HEAD but not from REF.", ) parser.add_argument( "--max-commits", type=int, default=_DEFAULT_MAX_COMMITS, metavar="N", dest="max_commits", help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).", ) parser.add_argument( "--explain", default=None, metavar="ADDRESS", help="Print a detailed per-event timeline for a single symbol.", ) parser.add_argument( "--json", "-j", action="store_true", dest="json_out", help="Emit results as JSON (agent-friendly; -j is a shorthand alias).", ) parser.set_defaults(func=run) def run(args: argparse.Namespace) -> None: """Show the evolutionary age of every symbol in the repository. Mines the commit history for each symbol's creation event, body rewrites, signature changes, and renames. Computes two age metrics: ``calendar_age`` (days since creation) and ``genetic_age`` (days since the last full body rewrite). A symbol rewritten yesterday has genetic age of 1 even if it was born two years ago. Agent quickstart ---------------- :: muse code age --json muse code age --top 20 --sort calendar --json muse code age --explain src/foo.py::MyClass --json muse code age --kind function --file billing.py --json JSON fields ----------- ref Branch or ref analysed. as_of Date of analysis (``YYYY-MM-DD``). commits_analysed Number of commits scanned. truncated ``true`` if ``--max-commits`` was reached before full history. filters Echo of CLI filter arguments: kind, file, sort, top, since. symbols List of symbol records — one per symbol (see below). Each symbol record: address Fully qualified address (``file.py::Symbol``). kind Symbol kind: function, class, method, … file Source file path. born_commit Short commit ID of the creation event. born_date Date of creation (``YYYY-MM-DD``). last_impl_commit Short commit ID of the most recent body rewrite. last_impl_date Date of that rewrite. last_change_commit Short commit ID of the most recent change of any kind. last_change_date Date of that change. calendar_age_days Days since creation. genetic_age_days Days since the last full body rewrite. impl_changes Total body rewrites recorded. sig_changes Total signature-only changes. renames Total rename operations. est_survival_pct Estimated % of the original implementation still present. With ``--explain``, the response is a single flat record plus an ``events`` list. Each event: ``date`` (YYYY-MM-DD), ``commit`` (short ID), ``label``. Exit codes ---------- 0 Analysis complete. 1 Invalid arguments or symbol not found (``--explain``). 2 Not inside a Muse repository. """ elapsed = start_timer() top: int = clamp_int(args.top, 1, 10_000, 'top') sort_key: SortKey = args.sort kind_filter: str | None = args.kind_filter file_filter: str | None = args.file_filter since: str | None = args.since max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits') explain_addr: str | None = args.explain json_out: bool = args.json_out # ── Validation ──────────────────────────────────────────────────────────── if top < 1: print("❌ --top must be >= 1.", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) if max_commits < 1: print("❌ --max-commits must be >= 1.", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) if kind_filter is not None: kind_filter = kind_filter.strip().lower() if explain_addr is not None and "::" not in explain_addr: print( "❌ --explain requires a qualified address (file.py::SymbolName).", file=sys.stderr, ) raise SystemExit(ExitCode.USER_ERROR) # ── Repo setup ──────────────────────────────────────────────────────────── root = require_repo() branch = read_current_branch(root) head = resolve_commit_ref(root, branch, None) if head is None: print("❌ HEAD commit not found.", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) stop_at: str | None = None if since is not None: since_commit = resolve_commit_ref(root, branch, since) if since_commit is None: print(f"❌ Commit '{since}' not found.", file=sys.stderr) raise SystemExit(ExitCode.USER_ERROR) stop_at = since_commit.commit_id # ── Load HEAD snapshot → known symbols ─────────────────────────────────── manifest = get_commit_snapshot_manifest(root, head.commit_id) or {} cache = load_symbol_cache(root) all_trees = symbols_for_snapshot(root, manifest, cache=cache) symbol_kinds: _KindMap = {} for file_path, tree in all_trees.items(): for addr, sym in tree.items(): if "::import::" not in addr: symbol_kinds[addr] = sym["kind"] known_addresses = frozenset(symbol_kinds) if not known_addresses: if json_out: print(json.dumps(_AgeListJson( **make_envelope(elapsed), ref=branch, as_of="", commits_analysed=0, truncated=False, filters={}, symbols=[], ))) else: print("\n(no symbols found in the current snapshot)") return # ── Mine commit history ─────────────────────────────────────────────────── accumulators, commits_analysed, truncated = _collect_age_data( root, head.commit_id, stop_at, max_commits, known_addresses ) now = datetime.datetime.now() # ── --explain mode ──────────────────────────────────────────────────────── if explain_addr is not None: acc = accumulators.get(explain_addr) if acc is None: print( f"❌ No history found for '{explain_addr}'.", file=sys.stderr, ) print( " The symbol may be new (added in the same commit as HEAD) " "or may not exist.", file=sys.stderr, ) raise SystemExit(ExitCode.USER_ERROR) kind = symbol_kinds.get(explain_addr, "unknown") if json_out: born_ts = acc.born_ts if born_ts is None: print("{}", flush=True) return last_impl_ts = acc.last_impl_ts or born_ts last_change_ts = acc.last_change_ts or born_ts print(json.dumps(_ExplainJson( **make_envelope(elapsed), address=explain_addr, kind=kind, born_commit=acc.born_commit, born_date=born_ts.strftime("%Y-%m-%d"), last_impl_commit=(acc.last_impl_commit if acc.last_impl_commit else acc.born_commit), last_impl_date=last_impl_ts.strftime("%Y-%m-%d"), last_change_commit=acc.last_change_commit, last_change_date=last_change_ts.strftime("%Y-%m-%d"), calendar_age_days=_days_ago(born_ts, now), genetic_age_days=_days_ago(last_impl_ts, now), impl_changes=acc.impl_changes, sig_changes=acc.sig_changes, renames=acc.renames, est_survival_pct=round(100 / (acc.impl_changes + 1)), events=[ {"date": e[0].strftime("%Y-%m-%d"), "commit": e[1], "label": e[2]} for e in sorted(acc.events, key=lambda x: x[0], reverse=True) ], ))) else: _print_explain(explain_addr, acc, kind, now) return # ── Build ranked table ──────────────────────────────────────────────────── records = _build_records( accumulators, symbol_kinds, now, kind_filter=kind_filter, file_filter=file_filter, sort_key=sort_key, top=top, ) # ── Output ──────────────────────────────────────────────────────────────── if json_out: print(json.dumps(_AgeListJson( **make_envelope(elapsed), ref=branch, as_of=now.strftime("%Y-%m-%d"), commits_analysed=commits_analysed, truncated=truncated, filters={ "kind": kind_filter, "file": file_filter, "sort": sort_key, "top": top, "since": since, "max_commits": max_commits, }, symbols=[dict(r) for r in records], ))) return _print_table(records, branch, commits_analysed, truncated, since, sort_key, now)