gabriel / muse public
velocity.py python
645 lines 24.1 KB
Raw
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 2 days ago
1 """muse code velocity — symbol-growth rate by module, with acceleration and next-change prediction.
2
3 This is the *energy map* of a codebase.
4
5 ``muse code hotspots`` tells you which symbols changed most.
6 ``muse code velocity`` tells you where the codebase is growing, where it is
7 shrinking, where it has stalled — and how fast each trend is accelerating.
8
9 It also answers a forward-looking question: **which symbols are most likely to
10 change in the next few commits?** This is computed statistically from
11 recency, frequency, and module acceleration.
12
13 Why this matters
14 ----------------
15 File commit counts or line-change counts obscure the signal. A file with 500
16 line changes might just be a re-format. A module that gained 12 new symbols
17 across 8 commits is actively expanding its API surface — that is genuine
18 architectural investment.
19
20 Three windows
21 -------------
22 Velocity is computed over two consecutive commit windows of equal size
23 (``--window N``, default 20):
24
25 * **Current window** — the most recent N commits.
26 * **Prior window** — the N commits before that.
27
28 The difference between the two gives **acceleration**:
29
30 * Positive acceleration = module growing faster than before.
31 * Negative acceleration = module is decelerating / winding down.
32 * Zero acceleration from zero velocity = stagnation.
33
34 Prediction
35 ----------
36 ``--predict K`` outputs the top K symbols most likely to change in the next
37 commit, ranked by a composite score:
38
39 ``score = frequency × recency_weight × module_velocity_weight``
40
41 where ``recency_weight = 1 / (1 + rank)`` (rank 0 = most recent commit).
42 This is a statistical signal, not a guarantee.
43
44 Usage::
45
46 muse code velocity
47 muse code velocity --window 10 --top 10
48 muse code velocity --predict 5
49 muse code velocity --since v1.0
50 muse code velocity --json
51
52 Output::
53
54 Symbol velocity — HEAD (40 commits · window: 20)
55 Sorted by: net growth, current window
56
57 MODULE ADD DEL NET MOD ACCEL BAR
58 muse/core/ +14 -2 +12 47 ▲ +4 ████████████
59 muse/cli/commands/ +11 -1 +10 31 ▲ +2 ██████████
60 tests/ +8 -0 +8 12 ▲ +3 ████████
61 muse/plugins/ +3 -5 -2 8 ▼ -1 ▏ (net negative)
62 docs/ 0 -0 0 0 ─ (stagnant 15 commits)
63
64 Acceleration leaders: muse/core/ (+4 net vs prior window)
65 Stagnant modules: docs/ (0 changes in 18 commits)
66
67 --predict output::
68
69 Next-change predictions (top 5 by statistical likelihood):
70 1 muse/core/store.py::resolve_commit_ref score: 0.91
71 2 muse/cli/commands/dead.py::run score: 0.84
72 3 tests/test_code_commands.py::TestHotspots score: 0.71
73
74 JSON output (--json)::
75
76 {
77 "ref": "HEAD",
78 "window_size": 20,
79 "commits_analysed": 40,
80 "truncated": false,
81 "filters": { "top": 20, "since": null },
82 "modules": [
83 {
84 "module": "muse/core/",
85 "current": { "added": 14, "removed": 2, "net": 12, "modified": 47, "active_commits": 18 },
86 "prior": { "added": 10, "removed": 2, "net": 8, "modified": 33, "active_commits": 14 },
87 "acceleration": 4,
88 "stagnant_commits": 0
89 }
90 ],
91 "predictions": [
92 { "address": "muse/core/store.py::resolve_commit_ref", "score": 0.91 }
93 ]
94 }
95 """
96
97 import argparse
98 import json
99 import logging
100 import pathlib
101 import sys
102 from dataclasses import dataclass, field
103 from typing import TypedDict
104
105 from muse.core.envelope import EnvelopeJson, make_envelope
106 from muse.core.errors import ExitCode
107 from muse.core.repo import require_repo
108 from muse.core.refs import read_current_branch
109 from muse.core.commits import resolve_commit_ref
110 from muse.core.timing import start_timer
111 from muse.domain import DomainOp
112 from muse.plugins.code._query import flat_symbol_ops, walk_commits_bfs
113 from muse.core.validation import clamp_int, sanitize_display
114
115 type _CounterMap = dict[str, int]
116 type _ModAccumMap = dict[str, "_ModuleAccumulator"]
117 type _SymFreqMap = dict[str, "_SymbolFreq"]
118 type _FiltersDict = dict[str, str | int | bool | None]
119
120 logger = logging.getLogger(__name__)
121
122 # ── Constants ──────────────────────────────────────────────────────────────────
123
124 _DEFAULT_WINDOW = 20
125 _DEFAULT_TOP = 20
126 _DEFAULT_MAX_COMMITS = 10_000
127 _DEFAULT_PREDICT = 0 # 0 = disabled
128 _BAR_WIDTH = 20 # max bar width in characters
129
130 # Commits with more than this many symbol ops are mass-refactors — skip for
131 # module velocity (they would unfairly spike a module's counts).
132 _MAX_OPS_PER_COMMIT = 500
133
134 # ── Helpers ────────────────────────────────────────────────────────────────────
135
136 def _module_of(file_path: str) -> str:
137 """Return the containing directory of a file path.
138
139 ``muse/core/store.py`` → ``muse/core/``
140 ``tests/test_billing.py`` → ``tests/``
141 ``billing.py`` → ``(root)``
142 """
143 parts = file_path.replace("\\", "/").rsplit("/", 1)
144 if len(parts) == 1:
145 return "(root)"
146 return f"{parts[0]}/"
147
148 def _bar(net: int, max_abs: int) -> str:
149 """Return a unicode block bar proportional to *net* / *max_abs*."""
150 if max_abs == 0:
151 return ""
152 ratio = abs(net) / max_abs
153 filled = round(ratio * _BAR_WIDTH)
154 bar = "█" * filled
155 if net < 0:
156 bar = bar or "▏"
157 return f"{bar} (net negative)"
158 return bar or "▏"
159
160 # ── Data types ─────────────────────────────────────────────────────────────────
161
162 @dataclass
163 class _WindowStats:
164 added: int = 0
165 removed: int = 0
166 modified: int = 0
167 active_commits: int = 0
168
169 @property
170 def net(self) -> int:
171 return self.added - self.removed
172
173 @dataclass
174 class _ModuleAccumulator:
175 current: _WindowStats = field(default_factory=_WindowStats)
176 prior: _WindowStats = field(default_factory=_WindowStats)
177 last_active_rank: int = -1 # commit rank (0 = HEAD) of last activity
178 stagnant_commits: int = 0 # consecutive commits with no activity
179
180 class _ModuleOut(TypedDict):
181 module: str
182 current: _CounterMap
183 prior: _CounterMap
184 acceleration: int
185 stagnant_commits: int
186
187 class _PredictionOut(TypedDict):
188 address: str
189 module: str
190 score: float
191 frequency: int
192 last_commit_rank: int
193
194 class _VelocityJson(EnvelopeJson):
195 """Top-level JSON envelope for ``muse code velocity``."""
196
197 mode: str
198 ref: str
199 window_size: int
200 commits_analysed: int
201 truncated: bool
202 filters: _FiltersDict
203 modules: list[_ModuleOut]
204 predictions: list[_PredictionOut]
205
206 # ── Core algorithm ─────────────────────────────────────────────────────────────
207
208 @dataclass
209 class _SymbolFreq:
210 frequency: int = 0
211 last_rank: int = 0 # 0 = most recent commit
212 module: str = ""
213
214 def _walk_and_collect(
215 root: pathlib.Path,
216 head_commit_id: str,
217 stop_at: str | None,
218 window_size: int,
219 max_commits: int,
220 ) -> tuple[
221 dict[str, _ModuleAccumulator], # per-module stats
222 dict[str, _SymbolFreq], # per-symbol frequency (current window only)
223 int, # total commits analysed
224 bool, # truncated
225 ]:
226 """Single BFS pass building module velocity and symbol frequency data."""
227 commits, truncated = walk_commits_bfs(
228 root, head_commit_id, max_commits, stop_at_commit_id=stop_at
229 )
230
231 modules: _ModAccumMap = {}
232 symbol_freq: _SymFreqMap = {}
233
234 # Track which modules were active per commit (for stagnation detection).
235 # commits are sorted newest-first after BFS.
236 for rank, commit in enumerate(commits):
237 if commit.structured_delta is None:
238 continue
239 ops: list[DomainOp] = commit.structured_delta["ops"]
240
241 # Gather all leaf symbol ops for this commit.
242 all_ops = list(flat_symbol_ops(ops))
243
244 # Skip mass-refactor commits.
245 if len(all_ops) > _MAX_OPS_PER_COMMIT:
246 continue
247
248 # Modules that had activity in this commit (for stagnation tracking).
249 active_modules: set[str] = set()
250
251 for op in all_ops:
252 addr: str = op["address"]
253 if "::import::" in addr:
254 continue
255
256 file_path = addr.split("::")[0]
257 mod = _module_of(file_path)
258 active_modules.add(mod)
259 acc = modules.setdefault(mod, _ModuleAccumulator())
260
261 op_kind = op.get("op", "")
262
263 # Determine which window this commit belongs to.
264 in_current = rank < window_size
265 in_prior = window_size <= rank < 2 * window_size
266
267 if in_current:
268 if op_kind == "insert":
269 acc.current.added += 1
270 elif op_kind == "delete":
271 acc.current.removed += 1
272 elif op_kind == "replace":
273 acc.current.modified += 1
274 elif in_prior:
275 if op_kind == "insert":
276 acc.prior.added += 1
277 elif op_kind == "delete":
278 acc.prior.removed += 1
279 elif op_kind == "replace":
280 acc.prior.modified += 1
281
282 # Symbol frequency (current window only) for prediction.
283 if in_current:
284 sf = symbol_freq.setdefault(addr, _SymbolFreq(module=mod))
285 sf.frequency += 1
286 if sf.last_rank == 0 or rank < sf.last_rank:
287 sf.last_rank = rank
288
289 # Track active_commits per window.
290 for mod in active_modules:
291 acc = modules.setdefault(mod, _ModuleAccumulator())
292 if rank < window_size:
293 acc.current.active_commits += 1
294 elif rank < 2 * window_size:
295 acc.prior.active_commits += 1
296 # Update last active rank.
297 if acc.last_active_rank < 0 or rank < acc.last_active_rank:
298 acc.last_active_rank = rank
299
300 # Compute stagnant_commits: how many leading commits (from HEAD) had
301 # zero activity for this module.
302 for mod, acc in modules.items():
303 if acc.last_active_rank < 0:
304 acc.stagnant_commits = len(commits)
305 else:
306 acc.stagnant_commits = acc.last_active_rank
307
308 return modules, symbol_freq, len(commits), truncated
309
310 # ── Prediction ─────────────────────────────────────────────────────────────────
311
312 def _compute_predictions(
313 symbol_freq: _SymFreqMap,
314 modules: _ModAccumMap,
315 window_size: int,
316 top_k: int,
317 ) -> list[_PredictionOut]:
318 """Score each symbol in the current window and return top-K predictions.
319
320 Score = frequency × recency_weight × module_velocity_weight
321
322 recency_weight = 1 / (1 + last_commit_rank)
323 (0 = most recent commit → weight 1.0)
324 module_velocity = max(0, net_current) (growth modules get a boost)
325 module_vel_weight = 1.0 + normalised module velocity
326 """
327 if not symbol_freq or top_k <= 0:
328 return []
329
330 # Normalise module velocity (0..1 range).
331 max_net = max(
332 (max(0, modules[sf.module].current.net) for sf in symbol_freq.values() if sf.module in modules),
333 default=0,
334 ) or 1
335
336 scored: list[tuple[float, _PredictionOut]] = []
337 for addr, sf in symbol_freq.items():
338 if sf.frequency == 0:
339 continue
340 recency_w = 1.0 / (1.0 + sf.last_rank)
341 mod_net = max(0, modules[sf.module].current.net) if sf.module in modules else 0
342 mod_w = 1.0 + (mod_net / max_net) # 1.0 .. 2.0
343 score = round(sf.frequency * recency_w * mod_w, 4)
344
345 scored.append((score, _PredictionOut(
346 address=addr,
347 module=sf.module,
348 score=score,
349 frequency=sf.frequency,
350 last_commit_rank=sf.last_rank,
351 )))
352
353 scored.sort(key=lambda t: -t[0])
354 return [out for _, out in scored[:top_k]]
355
356 # ── Formatters ─────────────────────────────────────────────────────────────────
357
358 def _print_table(
359 ranked: list[tuple[str, _ModuleAccumulator]],
360 predictions: list[_PredictionOut],
361 ref: str,
362 commits_analysed: int,
363 window_size: int,
364 truncated: bool,
365 since: str | None,
366 ) -> None:
367 scope = f"{since}..{ref}" if since else ref
368 trunc = " ⚠️ truncated" if truncated else ""
369 print(
370 f"\nSymbol velocity — {scope}"
371 f" ({commits_analysed} commits · window: {window_size}{trunc})"
372 )
373 print("Sorted by: net growth, current window\n")
374
375 if not ranked:
376 print(" (no modules with symbol-level changes found)")
377 return
378
379 max_abs = max(abs(acc.current.net) for _, acc in ranked) or 1
380 max_mod = max(len(mod) for mod, _ in ranked)
381
382 hdr = (
383 f" {'MODULE':<{max_mod}} {'ADD':>5} {'DEL':>5} {'NET':>5} "
384 f"{'MOD':>5} {'ACCEL':>7} BAR"
385 )
386 print(hdr)
387 print(f" {'─' * (len(hdr) - 2)}")
388
389 accel_leaders: list[str] = []
390 stagnant: list[tuple[str, int]] = []
391
392 for mod, acc in ranked:
393 accel = acc.current.net - acc.prior.net
394 if accel > 0:
395 accel_str = f"▲ +{accel}"
396 elif accel < 0:
397 accel_str = f"▼ {accel}"
398 else:
399 accel_str = "─"
400
401 bar = _bar(acc.current.net, max_abs)
402
403 add_str = f"+{acc.current.added}" if acc.current.added else "0"
404 del_str = f"-{acc.current.removed}" if acc.current.removed else "0"
405 net_str = f"+{acc.current.net}" if acc.current.net > 0 else str(acc.current.net)
406
407 stag = acc.stagnant_commits
408 if stag > 0:
409 stagnant.append((mod, stag))
410 note = f" (stagnant {stag} commit{'s' if stag != 1 else ''})"
411 print(
412 f" {mod:<{max_mod}} {add_str:>5} {del_str:>5} "
413 f"{net_str:>5} {acc.current.modified:>5} {accel_str:>7} {note}"
414 )
415 else:
416 print(
417 f" {mod:<{max_mod}} {add_str:>5} {del_str:>5} "
418 f"{net_str:>5} {acc.current.modified:>5} {accel_str:>7} {bar}"
419 )
420
421 if accel >= 2:
422 accel_leaders.append(f"{mod} (+{accel} net vs prior window)")
423
424 print("")
425 if accel_leaders:
426 print(f"Acceleration leaders: {', '.join(accel_leaders[:3])}")
427 stag_str = ", ".join(f"{m} ({n} commits)" for m, n in stagnant[:3])
428 if stag_str:
429 print(f"Stagnant modules: {stag_str}")
430
431 if predictions:
432 print(f"\nNext-change predictions (top {len(predictions)}):")
433 for i, pred in enumerate(predictions, 1):
434 print(f" {i:>2} {sanitize_display(pred['address']):<60} score: {pred['score']:.2f}")
435
436 # ── CLI ────────────────────────────────────────────────────────────────────────
437
438 def register(
439 subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",
440 ) -> None:
441 """Register the velocity subcommand.
442
443 Arguments
444 ---------
445 --window N, -w N
446 Commits per analysis window (default 20). Two consecutive windows
447 are compared to compute acceleration.
448 --top N, -n N
449 Number of modules to display (default 20).
450 --predict K, -p K
451 Show the top K symbols most likely to change in the next commit
452 (default 0 = disabled). Ranked by recency × frequency × module-velocity.
453 --since REF, -s REF
454 Limit analysis to commits reachable from HEAD but not from REF.
455 --max-commits N
456 Maximum commits to scan (default 10 000).
457 --json, -j
458 Emit results as a JSON object with schema_version, mode, exit_code,
459 and duration_ms in the envelope (agent-safe).
460 """
461 parser = subparsers.add_parser(
462 "velocity",
463 help=(
464 "Symbol-growth rate by module — where the codebase is growing, "
465 "shrinking, accelerating, or stagnating."
466 ),
467 description=__doc__,
468 formatter_class=argparse.RawDescriptionHelpFormatter,
469 )
470 parser.add_argument(
471 "--window", "-w",
472 type=int, default=_DEFAULT_WINDOW, metavar="N",
473 help=(
474 f"Commits per analysis window (default: {_DEFAULT_WINDOW}). "
475 f"Two consecutive windows are compared to compute acceleration."
476 ),
477 )
478 parser.add_argument(
479 "--top",
480 type=int, default=_DEFAULT_TOP, metavar="N",
481 help=f"Number of modules to show (default: {_DEFAULT_TOP}).",
482 )
483 parser.add_argument(
484 "--predict", "-p",
485 type=int, default=_DEFAULT_PREDICT, metavar="K",
486 dest="predict",
487 help=(
488 "Show the top K symbols most likely to change in the next commit "
489 "(default: 0 = disabled). "
490 "Ranked by: recency × frequency × module-velocity."
491 ),
492 )
493 parser.add_argument(
494 "--since", "-s",
495 default=None, metavar="REF",
496 help="Limit analysis to commits reachable from HEAD but not from REF.",
497 )
498 parser.add_argument(
499 "--max-commits",
500 type=int, default=_DEFAULT_MAX_COMMITS, metavar="N",
501 dest="max_commits",
502 help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).",
503 )
504 parser.add_argument(
505 "--json", "-j",
506 action="store_true", dest="json_out",
507 help="Emit results as JSON.",
508 )
509 parser.set_defaults(func=run, json_out=False)
510
511 def run(args: argparse.Namespace) -> None:
512 """Compute symbol-growth velocity by module.
513
514 Mines the commit history for symbol add/remove/modify ops and aggregates
515 by module (directory). Compares the current window to the prior window to
516 detect acceleration and stagnation. With ``--predict K``, ranks symbols
517 by recency × frequency × module-velocity for next-change likelihood.
518
519 Agent quickstart::
520
521 muse code velocity --json
522 muse code velocity --window 10 --top 10 --json
523 muse code velocity --predict 5 --json
524 muse code velocity --since v1.0 --json
525
526 JSON fields::
527
528 mode Always "velocity".
529 ref Branch or ref used as HEAD of the walk.
530 window_size Commits per analysis window (from --window).
531 commits_analysed Total commits actually walked.
532 truncated true when --max-commits capped the walk.
533 filters Echo of top, since, predict, max_commits inputs.
534 modules Per-module velocity entries (module, current, prior, acceleration, stagnant_commits).
535 predictions Top-K next-change predictions (address, module, score, frequency, last_commit_rank).
536 muse_version Muse release that produced this output.
537 schema Envelope schema version (int).
538 exit_code Always 0.
539 duration_ms Wall-clock milliseconds for the command.
540 timestamp ISO-8601 UTC timestamp of command completion.
541 warnings List of non-fatal advisory messages.
542
543 Exit codes::
544
545 0 Success.
546 1 Bad arguments or HEAD commit not found.
547 """
548 elapsed = start_timer()
549 window: int = clamp_int(args.window, 1, 1000, 'window')
550 top: int = clamp_int(args.top, 1, 10_000, 'top')
551 predict_k: int = clamp_int(args.predict, 0, 1000, 'predict_k')
552 since: str | None = args.since
553 max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits')
554 json_out: bool = args.json_out
555
556 # ── Validation ────────────────────────────────────────────────────────────
557 if window < 1:
558 print("❌ --window must be >= 1.", file=sys.stderr)
559 raise SystemExit(ExitCode.USER_ERROR)
560 if top < 1:
561 print("❌ --top must be >= 1.", file=sys.stderr)
562 raise SystemExit(ExitCode.USER_ERROR)
563 if predict_k < 0:
564 print("❌ --predict must be >= 0.", file=sys.stderr)
565 raise SystemExit(ExitCode.USER_ERROR)
566 if max_commits < 1:
567 print("❌ --max-commits must be >= 1.", file=sys.stderr)
568 raise SystemExit(ExitCode.USER_ERROR)
569 # Need at least 2 windows to compute acceleration.
570 effective_max = max(max_commits, window * 2)
571
572 # ── Repo setup ────────────────────────────────────────────────────────────
573 root = require_repo()
574 branch = read_current_branch(root)
575
576 head = resolve_commit_ref(root, branch, None)
577 if head is None:
578 print("❌ HEAD commit not found.", file=sys.stderr)
579 raise SystemExit(ExitCode.USER_ERROR)
580
581 stop_at: str | None = None
582 if since is not None:
583 since_commit = resolve_commit_ref(root, branch, since)
584 if since_commit is None:
585 print(f"❌ Commit '{since}' not found.", file=sys.stderr)
586 raise SystemExit(ExitCode.USER_ERROR)
587 stop_at = since_commit.commit_id
588
589 # ── Main pass ─────────────────────────────────────────────────────────────
590 modules, symbol_freq, commits_analysed, truncated = _walk_and_collect(
591 root, head.commit_id, stop_at, window, effective_max
592 )
593
594 # ── Rank modules by current-window net growth ──────────────────────────────
595 ranked = sorted(
596 modules.items(),
597 key=lambda kv: (-kv[1].current.net, -kv[1].current.modified, kv[0]),
598 )[:top]
599
600 # ── Predictions ───────────────────────────────────────────────────────────
601 predictions = _compute_predictions(symbol_freq, modules, window, predict_k)
602
603 # ── Output ────────────────────────────────────────────────────────────────
604 if json_out:
605 modules_out: list[_ModuleOut] = [
606 _ModuleOut(
607 module=mod,
608 current={
609 "added": acc.current.added,
610 "removed": acc.current.removed,
611 "net": acc.current.net,
612 "modified": acc.current.modified,
613 "active_commits": acc.current.active_commits,
614 },
615 prior={
616 "added": acc.prior.added,
617 "removed": acc.prior.removed,
618 "net": acc.prior.net,
619 "modified": acc.prior.modified,
620 "active_commits": acc.prior.active_commits,
621 },
622 acceleration=acc.current.net - acc.prior.net,
623 stagnant_commits=acc.stagnant_commits,
624 )
625 for mod, acc in ranked
626 ]
627 print(json.dumps(_VelocityJson(
628 **make_envelope(elapsed),
629 mode="velocity",
630 ref=branch,
631 window_size=window,
632 commits_analysed=commits_analysed,
633 truncated=truncated,
634 filters={
635 "top": top,
636 "since": since,
637 "predict": predict_k,
638 "max_commits": max_commits,
639 },
640 modules=[dict(m) for m in modules_out],
641 predictions=[dict(p) for p in predictions],
642 )))
643 return
644
645 _print_table(ranked, predictions, branch, commits_analysed, window, truncated, since)
File History 1 commit
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 2 days ago