muse/cli/commands/velocity.py · gabriel/muse

1

"""muse code velocity — symbol-growth rate by module, with acceleration and next-change prediction.

2

3

This is the *energy map* of a codebase.

4

5

``muse code hotspots`` tells you which symbols changed most.

6

``muse code velocity`` tells you where the codebase is growing, where it is

7

shrinking, where it has stalled — and how fast each trend is accelerating.

8

9

It also answers a forward-looking question: **which symbols are most likely to

10

change in the next few commits?** This is computed statistically from

11

recency, frequency, and module acceleration.

Why this matters

----------------

File commit counts or line-change counts obscure the signal. A file with 500

16

line changes might just be a re-format. A module that gained 12 new symbols

17

across 8 commits is actively expanding its API surface — that is genuine

18

architectural investment.

Three windows

-------------

Velocity is computed over two consecutive commit windows of equal size

23

(``--window N``, default 20):

24

25

* **Current window** — the most recent N commits.

26

* **Prior window** — the N commits before that.

27

28

The difference between the two gives **acceleration**:

29

30

* Positive acceleration = module growing faster than before.

31

* Negative acceleration = module is decelerating / winding down.

32

* Zero acceleration from zero velocity = stagnation.

Prediction

----------

``--predict K`` outputs the top K symbols most likely to change in the next

37

commit, ranked by a composite score:

38

39

``score = frequency × recency_weight × module_velocity_weight``

40

41

where ``recency_weight = 1 / (1 + rank)`` (rank 0 = most recent commit).

42

This is a statistical signal, not a guarantee.

Usage::

muse code velocity

muse code velocity --window 10 --top 10

48

muse code velocity --predict 5

49

muse code velocity --since v1.0

50

muse code velocity --json

Output::

Symbol velocity — HEAD (40 commits · window: 20)

55

Sorted by: net growth, current window

56

57

MODULE ADD DEL NET MOD ACCEL BAR

58

muse/core/ +14 -2 +12 47 ▲ +4 ████████████

59

muse/cli/commands/ +11 -1 +10 31 ▲ +2 ██████████

60

tests/ +8 -0 +8 12 ▲ +3 ████████

61

muse/plugins/ +3 -5 -2 8 ▼ -1 ▏ (net negative)

62

docs/ 0 -0 0 0 ─ (stagnant 15 commits)

63

64

Acceleration leaders: muse/core/ (+4 net vs prior window)

65

Stagnant modules: docs/ (0 changes in 18 commits)

--predict output::

Next-change predictions (top 5 by statistical likelihood):

70

1 muse/core/store.py::resolve_commit_ref score: 0.91

71

2 muse/cli/commands/dead.py::run score: 0.84

72

3 tests/test_code_commands.py::TestHotspots score: 0.71

73

74

JSON output (--json)::

{

"ref": "HEAD",

"window_size": 20,

"commits_analysed": 40,

80

"truncated": false,

81

"filters": { "top": 20, "since": null },

82

"modules": [

83

{

84

"module": "muse/core/",

85

"current": { "added": 14, "removed": 2, "net": 12, "modified": 47, "active_commits": 18 },

86

"prior": { "added": 10, "removed": 2, "net": 8, "modified": 33, "active_commits": 14 },

87

"acceleration": 4,

88

"stagnant_commits": 0

}

],

"predictions": [

{ "address": "muse/core/store.py::resolve_commit_ref", "score": 0.91 }

]

}

"""

import argparse

import json

import logging

import pathlib

import sys

from dataclasses import dataclass, field

103

from typing import TypedDict

104

105

from muse.core.envelope import EnvelopeJson, make_envelope

106

from muse.core.errors import ExitCode

107

from muse.core.repo import require_repo

108

from muse.core.refs import read_current_branch

109

from muse.core.commits import resolve_commit_ref

110

from muse.core.timing import start_timer

111

from muse.domain import DomainOp

112

from muse.plugins.code._query import flat_symbol_ops, walk_commits_bfs

113

from muse.core.validation import clamp_int, sanitize_display

114

115

type _CounterMap = dict[str, int]

116

type _ModAccumMap = dict[str, "_ModuleAccumulator"]

117

type _SymFreqMap = dict[str, "_SymbolFreq"]

118

type _FiltersDict = dict[str, str | int | bool | None]

119

120

logger = logging.getLogger(__name__)

121

122

# ── Constants ──────────────────────────────────────────────────────────────────

_DEFAULT_WINDOW = 20

_DEFAULT_TOP = 20

_DEFAULT_MAX_COMMITS = 10_000

127

_DEFAULT_PREDICT = 0 # 0 = disabled

128

_BAR_WIDTH = 20 # max bar width in characters

129

130

# Commits with more than this many symbol ops are mass-refactors — skip for

131

# module velocity (they would unfairly spike a module's counts).

132

_MAX_OPS_PER_COMMIT = 500

133

134

# ── Helpers ────────────────────────────────────────────────────────────────────

135

136

def _module_of(file_path: str) -> str:

137

"""Return the containing directory of a file path.

138

139

``muse/core/store.py`` → ``muse/core/``

140

``tests/test_billing.py`` → ``tests/``

141

``billing.py`` → ``(root)``

142

"""

143

parts = file_path.replace("\\", "/").rsplit("/", 1)

144

if len(parts) == 1:

145

return "(root)"

146

return f"{parts[0]}/"

147

148

def _bar(net: int, max_abs: int) -> str:

149

"""Return a unicode block bar proportional to *net* / *max_abs*."""

150

if max_abs == 0:

151

return ""

152

ratio = abs(net) / max_abs

153

filled = round(ratio * _BAR_WIDTH)

bar = "█" * filled

if net < 0:

bar = bar or "▏"

return f"{bar} (net negative)"

158

return bar or "▏"

159

160

# ── Data types ─────────────────────────────────────────────────────────────────

@dataclass

class _WindowStats:

added: int = 0

removed: int = 0

modified: int = 0

active_commits: int = 0

168

169

@property

170

def net(self) -> int:

171

return self.added - self.removed

172

173

@dataclass

174

class _ModuleAccumulator:

175

current: _WindowStats = field(default_factory=_WindowStats)

176

prior: _WindowStats = field(default_factory=_WindowStats)

177

last_active_rank: int = -1 # commit rank (0 = HEAD) of last activity

178

stagnant_commits: int = 0 # consecutive commits with no activity

179

180

class _ModuleOut(TypedDict):

module: str

current: _CounterMap

prior: _CounterMap

acceleration: int

stagnant_commits: int

186

187

class _PredictionOut(TypedDict):

address: str

module: str

score: float

frequency: int

last_commit_rank: int

193

194

class _VelocityJson(EnvelopeJson):

195

"""Top-level JSON envelope for ``muse code velocity``."""

mode: str

ref: str

window_size: int

commits_analysed: int

201

truncated: bool

202

filters: _FiltersDict

203

modules: list[_ModuleOut]

204

predictions: list[_PredictionOut]

205

206

# ── Core algorithm ─────────────────────────────────────────────────────────────

@dataclass

class _SymbolFreq:

frequency: int = 0

last_rank: int = 0 # 0 = most recent commit

212

module: str = ""

213

214

def _walk_and_collect(

root: pathlib.Path,

head_commit_id: str,

stop_at: str | None,

window_size: int,

max_commits: int,

) -> tuple[

dict[str, _ModuleAccumulator], # per-module stats

222

dict[str, _SymbolFreq], # per-symbol frequency (current window only)

223

int, # total commits analysed

224

bool, # truncated

225

]:

226

"""Single BFS pass building module velocity and symbol frequency data."""

227

commits, truncated = walk_commits_bfs(

228

root, head_commit_id, max_commits, stop_at_commit_id=stop_at

229

)

230

231

modules: _ModAccumMap = {}

232

symbol_freq: _SymFreqMap = {}

233

234

# Track which modules were active per commit (for stagnation detection).

235

# commits are sorted newest-first after BFS.

236

for rank, commit in enumerate(commits):

237

if commit.structured_delta is None:

238

continue

239

ops: list[DomainOp] = commit.structured_delta["ops"]

240

241

# Gather all leaf symbol ops for this commit.

242

all_ops = list(flat_symbol_ops(ops))

243

244

# Skip mass-refactor commits.

245

if len(all_ops) > _MAX_OPS_PER_COMMIT:

246

continue

247

248

# Modules that had activity in this commit (for stagnation tracking).

249

active_modules: set[str] = set()

250

251

for op in all_ops:

252

addr: str = op["address"]

253

if "::import::" in addr:

254

continue

255

256

file_path = addr.split("::")[0]

257

mod = _module_of(file_path)

258

active_modules.add(mod)

259

acc = modules.setdefault(mod, _ModuleAccumulator())

260

261

op_kind = op.get("op", "")

262

263

# Determine which window this commit belongs to.

264

in_current = rank < window_size

265

in_prior = window_size <= rank < 2 * window_size

266

267

if in_current:

268

if op_kind == "insert":

269

acc.current.added += 1

270

elif op_kind == "delete":

271

acc.current.removed += 1

272

elif op_kind == "replace":

273

acc.current.modified += 1

274

elif in_prior:

275

if op_kind == "insert":

276

acc.prior.added += 1

277

elif op_kind == "delete":

278

acc.prior.removed += 1

279

elif op_kind == "replace":

280

acc.prior.modified += 1

281

282

# Symbol frequency (current window only) for prediction.

283

if in_current:

284

sf = symbol_freq.setdefault(addr, _SymbolFreq(module=mod))

285

sf.frequency += 1

286

if sf.last_rank == 0 or rank < sf.last_rank:

287

sf.last_rank = rank

288

289

# Track active_commits per window.

290

for mod in active_modules:

291

acc = modules.setdefault(mod, _ModuleAccumulator())

292

if rank < window_size:

293

acc.current.active_commits += 1

294

elif rank < 2 * window_size:

295

acc.prior.active_commits += 1

296

# Update last active rank.

297

if acc.last_active_rank < 0 or rank < acc.last_active_rank:

298

acc.last_active_rank = rank

299

300

# Compute stagnant_commits: how many leading commits (from HEAD) had

301

# zero activity for this module.

302

for mod, acc in modules.items():

303

if acc.last_active_rank < 0:

304

acc.stagnant_commits = len(commits)

305

else:

306

acc.stagnant_commits = acc.last_active_rank

307

308

return modules, symbol_freq, len(commits), truncated

309

310

# ── Prediction ─────────────────────────────────────────────────────────────────

311

312

def _compute_predictions(

313

symbol_freq: _SymFreqMap,

314

modules: _ModAccumMap,

315

window_size: int,

316

top_k: int,

317

) -> list[_PredictionOut]:

318

"""Score each symbol in the current window and return top-K predictions.

319

320

Score = frequency × recency_weight × module_velocity_weight

321

322

recency_weight = 1 / (1 + last_commit_rank)

323

(0 = most recent commit → weight 1.0)

324

module_velocity = max(0, net_current) (growth modules get a boost)

325

module_vel_weight = 1.0 + normalised module velocity

326

"""

327

if not symbol_freq or top_k <= 0:

328

return []

329

330

# Normalise module velocity (0..1 range).

331

max_net = max(

332

(max(0, modules[sf.module].current.net) for sf in symbol_freq.values() if sf.module in modules),

default=0,

) or 1

scored: list[tuple[float, _PredictionOut]] = []

337

for addr, sf in symbol_freq.items():

338

if sf.frequency == 0:

339

continue

340

recency_w = 1.0 / (1.0 + sf.last_rank)

341

mod_net = max(0, modules[sf.module].current.net) if sf.module in modules else 0

342

mod_w = 1.0 + (mod_net / max_net) # 1.0 .. 2.0

343

score = round(sf.frequency * recency_w * mod_w, 4)

344

345

scored.append((score, _PredictionOut(

address=addr,

module=sf.module,

score=score,

frequency=sf.frequency,

350

last_commit_rank=sf.last_rank,

351

)))

352

353

scored.sort(key=lambda t: -t[0])

354

return [out for _, out in scored[:top_k]]

355

356

# ── Formatters ─────────────────────────────────────────────────────────────────

357

358

def _print_table(

359

ranked: list[tuple[str, _ModuleAccumulator]],

360

predictions: list[_PredictionOut],

361

ref: str,

362

commits_analysed: int,

window_size: int,

truncated: bool,

since: str | None,

) -> None:

scope = f"{since}..{ref}" if since else ref

368

trunc = " ⚠️ truncated" if truncated else ""

369

print(

370

f"\nSymbol velocity — {scope}"

371

f" ({commits_analysed} commits · window: {window_size}{trunc})"

372

)

373

print("Sorted by: net growth, current window\n")

374

375

if not ranked:

376

print(" (no modules with symbol-level changes found)")

377

return

378

379

max_abs = max(abs(acc.current.net) for _, acc in ranked) or 1

380

max_mod = max(len(mod) for mod, _ in ranked)

381

382

hdr = (

383

f" {'MODULE':<{max_mod}} {'ADD':>5} {'DEL':>5} {'NET':>5} "

384

f"{'MOD':>5} {'ACCEL':>7} BAR"

385

)

386

print(hdr)

387

print(f" {'─' * (len(hdr) - 2)}")

388

389

accel_leaders: list[str] = []

390

stagnant: list[tuple[str, int]] = []

391

392

for mod, acc in ranked:

393

accel = acc.current.net - acc.prior.net

394

if accel > 0:

395

accel_str = f"▲ +{accel}"

396

elif accel < 0:

397

accel_str = f"▼ {accel}"

else:

accel_str = "─"

bar = _bar(acc.current.net, max_abs)

402

403

add_str = f"+{acc.current.added}" if acc.current.added else "0"

404

del_str = f"-{acc.current.removed}" if acc.current.removed else "0"

405

net_str = f"+{acc.current.net}" if acc.current.net > 0 else str(acc.current.net)

406

407

stag = acc.stagnant_commits

408

if stag > 0:

409

stagnant.append((mod, stag))

410

note = f" (stagnant {stag} commit{'s' if stag != 1 else ''})"

411

print(

412

f" {mod:<{max_mod}} {add_str:>5} {del_str:>5} "

413

f"{net_str:>5} {acc.current.modified:>5} {accel_str:>7} {note}"

)

else:

print(

f" {mod:<{max_mod}} {add_str:>5} {del_str:>5} "

418

f"{net_str:>5} {acc.current.modified:>5} {accel_str:>7} {bar}"

)

if accel >= 2:

accel_leaders.append(f"{mod} (+{accel} net vs prior window)")

print("")

if accel_leaders:

print(f"Acceleration leaders: {', '.join(accel_leaders[:3])}")

427

stag_str = ", ".join(f"{m} ({n} commits)" for m, n in stagnant[:3])

428

if stag_str:

429

print(f"Stagnant modules: {stag_str}")

430

431

if predictions:

432

print(f"\nNext-change predictions (top {len(predictions)}):")

433

for i, pred in enumerate(predictions, 1):

434

print(f" {i:>2} {sanitize_display(pred['address']):<60} score: {pred['score']:.2f}")

435

436

# ── CLI ────────────────────────────────────────────────────────────────────────

437

438

def register(

439

subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",

440

) -> None:

441

"""Register the velocity subcommand.

Arguments

---------

--window N, -w N

Commits per analysis window (default 20). Two consecutive windows

447

are compared to compute acceleration.

448

--top N, -n N

449

Number of modules to display (default 20).

450

--predict K, -p K

451

Show the top K symbols most likely to change in the next commit

452

(default 0 = disabled). Ranked by recency × frequency × module-velocity.

453

--since REF, -s REF

454

Limit analysis to commits reachable from HEAD but not from REF.

455

--max-commits N

456

Maximum commits to scan (default 10 000).

457

--json, -j

458

Emit results as a JSON object with schema_version, mode, exit_code,

459

and duration_ms in the envelope (agent-safe).

460

"""

461

parser = subparsers.add_parser(

462

"velocity",

463

help=(

464

"Symbol-growth rate by module — where the codebase is growing, "

465

"shrinking, accelerating, or stagnating."

466

),

467

description=__doc__,

468

formatter_class=argparse.RawDescriptionHelpFormatter,

)

parser.add_argument(

"--window", "-w",

type=int, default=_DEFAULT_WINDOW, metavar="N",

473

help=(

474

f"Commits per analysis window (default: {_DEFAULT_WINDOW}). "

475

f"Two consecutive windows are compared to compute acceleration."

),

)

parser.add_argument(

"--top",

type=int, default=_DEFAULT_TOP, metavar="N",

481

help=f"Number of modules to show (default: {_DEFAULT_TOP}).",

)

parser.add_argument(

"--predict", "-p",

type=int, default=_DEFAULT_PREDICT, metavar="K",

486

dest="predict",

487

help=(

488

"Show the top K symbols most likely to change in the next commit "

489

"(default: 0 = disabled). "

490

"Ranked by: recency × frequency × module-velocity."

),

)

parser.add_argument(

"--since", "-s",

default=None, metavar="REF",

496

help="Limit analysis to commits reachable from HEAD but not from REF.",

)

parser.add_argument(

"--max-commits",

type=int, default=_DEFAULT_MAX_COMMITS, metavar="N",

501

dest="max_commits",

502

help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).",

)

parser.add_argument(

"--json", "-j",

action="store_true", dest="json_out",

507

help="Emit results as JSON.",

508

)

509

parser.set_defaults(func=run, json_out=False)

510

511

def run(args: argparse.Namespace) -> None:

512

"""Compute symbol-growth velocity by module.

513

514

Mines the commit history for symbol add/remove/modify ops and aggregates

515

by module (directory). Compares the current window to the prior window to

516

detect acceleration and stagnation. With ``--predict K``, ranks symbols

517

by recency × frequency × module-velocity for next-change likelihood.

Agent quickstart::

muse code velocity --json

522

muse code velocity --window 10 --top 10 --json

523

muse code velocity --predict 5 --json

524

muse code velocity --since v1.0 --json

JSON fields::

mode Always "velocity".

529

ref Branch or ref used as HEAD of the walk.

530

window_size Commits per analysis window (from --window).

531

commits_analysed Total commits actually walked.

532

truncated true when --max-commits capped the walk.

533

filters Echo of top, since, predict, max_commits inputs.

534

modules Per-module velocity entries (module, current, prior, acceleration, stagnant_commits).

535

predictions Top-K next-change predictions (address, module, score, frequency, last_commit_rank).

536

muse_version Muse release that produced this output.

537

schema Envelope schema version (int).

538

exit_code Always 0.

539

duration_ms Wall-clock milliseconds for the command.

540

timestamp ISO-8601 UTC timestamp of command completion.

541

warnings List of non-fatal advisory messages.

Exit codes::

0 Success.

1 Bad arguments or HEAD commit not found.

547

"""

548

elapsed = start_timer()

549

window: int = clamp_int(args.window, 1, 1000, 'window')

550

top: int = clamp_int(args.top, 1, 10_000, 'top')

551

predict_k: int = clamp_int(args.predict, 0, 1000, 'predict_k')

552

since: str | None = args.since

553

max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits')

554

json_out: bool = args.json_out

555

556

# ── Validation ────────────────────────────────────────────────────────────

557

if window < 1:

558

print("❌ --window must be >= 1.", file=sys.stderr)

559

raise SystemExit(ExitCode.USER_ERROR)

560

if top < 1:

561

print("❌ --top must be >= 1.", file=sys.stderr)

562

raise SystemExit(ExitCode.USER_ERROR)

563

if predict_k < 0:

564

print("❌ --predict must be >= 0.", file=sys.stderr)

565

raise SystemExit(ExitCode.USER_ERROR)

566

if max_commits < 1:

567

print("❌ --max-commits must be >= 1.", file=sys.stderr)

568

raise SystemExit(ExitCode.USER_ERROR)

569

# Need at least 2 windows to compute acceleration.

570

effective_max = max(max_commits, window * 2)

571

572

# ── Repo setup ────────────────────────────────────────────────────────────

573

root = require_repo()

574

branch = read_current_branch(root)

575

576

head = resolve_commit_ref(root, branch, None)

577

if head is None:

578

print("❌ HEAD commit not found.", file=sys.stderr)

579

raise SystemExit(ExitCode.USER_ERROR)

580

581

stop_at: str | None = None

582

if since is not None:

583

since_commit = resolve_commit_ref(root, branch, since)

584

if since_commit is None:

585

print(f"❌ Commit '{since}' not found.", file=sys.stderr)

586

raise SystemExit(ExitCode.USER_ERROR)

587

stop_at = since_commit.commit_id

588

589

# ── Main pass ─────────────────────────────────────────────────────────────

590

modules, symbol_freq, commits_analysed, truncated = _walk_and_collect(

591

root, head.commit_id, stop_at, window, effective_max

592

)

593

594

# ── Rank modules by current-window net growth ──────────────────────────────

595

ranked = sorted(

596

modules.items(),

597

key=lambda kv: (-kv[1].current.net, -kv[1].current.modified, kv[0]),

598

)[:top]

599

600

# ── Predictions ───────────────────────────────────────────────────────────

601

predictions = _compute_predictions(symbol_freq, modules, window, predict_k)

602

603

# ── Output ────────────────────────────────────────────────────────────────

604

if json_out:

605

modules_out: list[_ModuleOut] = [

_ModuleOut(

module=mod,

current={

"added": acc.current.added,

610

"removed": acc.current.removed,

611

"net": acc.current.net,

612

"modified": acc.current.modified,

613

"active_commits": acc.current.active_commits,

614

},

615

prior={

616

"added": acc.prior.added,

617

"removed": acc.prior.removed,

618

"net": acc.prior.net,

619

"modified": acc.prior.modified,

620

"active_commits": acc.prior.active_commits,

621

},

622

acceleration=acc.current.net - acc.prior.net,

623

stagnant_commits=acc.stagnant_commits,

624

)

625

for mod, acc in ranked

626

]

627

print(json.dumps(_VelocityJson(

628

**make_envelope(elapsed),

mode="velocity",

ref=branch,

window_size=window,

commits_analysed=commits_analysed,

truncated=truncated,

filters={

"top": top,

"since": since,

"predict": predict_k,

638

"max_commits": max_commits,

639

},

640

modules=[dict(m) for m in modules_out],

641

predictions=[dict(p) for p in predictions],

)))

return

_print_table(ranked, predictions, branch, commits_analysed, window, truncated, since)