gabriel / muse public
blame.py python
530 lines 20.4 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 22 days ago
1 """muse code blame -- symbol-level attribution.
2
3 ``git blame`` attributes every *line* to a commit -- a 300-line class gives
4 you 300 attribution entries. ``muse code blame`` attributes the *symbol* as a
5 semantic unit: one answer per function, class, or method, regardless of how
6 many lines it occupies.
7
8 Rename tracking
9 ---------------
10 ``muse code blame`` follows renames automatically, in both directions:
11
12 * Blaming the **current** name (post-rename) walks backward through the
13 rename event, then continues tracking the symbol's earlier history under
14 its old name.
15 * Blaming the **original** name (pre-rename) finds both the creation event
16 and the rename event, then follows the symbol forward under its new name.
17
18 Early-exit optimisation
19 -----------------------
20 The scan stops as soon as a ``"created"`` event is found for the symbol.
21 At that point the full lineage is known; continuing would yield no new
22 events. For long-lived symbols this can reduce scan work dramatically.
23
24 Security model
25 --------------
26 - The ``ADDRESS`` argument is validated to reject null bytes and ANSI/control
27 characters before any processing occurs.
28 - All user-controlled values (address, commit references, author strings,
29 commit messages, change details) are sanitized via ``sanitize_display()``
30 before appearing in human-readable output.
31 - The ``from_ref`` argument is sanitized in error messages.
32 - JSON output carries raw stored values (no terminal sanitization applied) —
33 agents receive the exact data as committed.
34 - All error messages go to **stderr**; **stdout** carries only data.
35
36 Agent UX
37 --------
38 Pass ``--json`` for a stable machine-readable object. All fields are always
39 present. Filter by ``--kind`` and/or ``--author`` to narrow output further.
40
41 Usage::
42
43 muse code blame "src/billing.py::compute_invoice_total"
44 muse code blame "api/server.go::Server.HandleRequest"
45 muse code blame "src/models.py::User.save" --all
46 muse code blame "src/billing.py::run" --from feat/my-branch
47 muse code blame "src/billing.py::run" --json
48 muse code blame "src/billing.py::run" --kind created --kind renamed
49 muse code blame "src/billing.py::run" --author alice
50
51 JSON schema (``--json``)::
52
53 {
54 "address": "src/billing.py::compute_invoice_total",
55 "start_ref": "main",
56 "total_commits_scanned": 42,
57 "truncated": false,
58 "events": [
59 {
60 "event": "created",
61 "commit_id": "<64-char hex>",
62 "author": "alice",
63 "message": "initial implementation",
64 "committed_at": "2026-01-15T12:00:00+00:00",
65 "address": "src/billing.py::compute_invoice_total",
66 "detail": "created",
67 "new_address": null
68 }
69 ]
70 }
71
72 Exit codes
73 ----------
74 - 0 — success
75 - 1 — invalid arguments (bad address, bad --kind, bad ref)
76 - 2 — not inside a Muse repository
77 - 4 — commit reference not found
78 """
79
80 import argparse
81 import json
82 import logging
83 import sys
84 from dataclasses import dataclass
85 from typing import Literal, TypedDict
86 import pathlib
87
88 from muse.core.envelope import EnvelopeJson, make_envelope
89 from muse.core.errors import ExitCode
90 from muse.core.repo import require_repo
91 from muse.core.refs import read_current_branch
92 from muse.core.commits import (
93 CommitRecord,
94 resolve_commit_ref,
95 )
96 from muse.core.timing import start_timer
97 from muse.domain import DomainOp
98 from muse.plugins.code._query import walk_commits_bfs
99 from muse.core.validation import clamp_int, sanitize_display, sanitize_provenance
100
101
102 logger = logging.getLogger(__name__)
103
104 _DEFAULT_MAX = 500
105
106 SymbolEventKind = Literal[
107 "created", "modified", "renamed", "moved", "deleted", "signature"
108 ]
109
110 _ALL_KINDS: frozenset[str] = frozenset(
111 ("created", "modified", "renamed", "moved", "deleted", "signature")
112 )
113
114 # ---------------------------------------------------------------------------
115 # JSON TypedDicts — stable machine-readable output schemas
116 # ---------------------------------------------------------------------------
117
118 class _BlameEventJson(TypedDict):
119 """One event in the blame history of a symbol."""
120
121 event: SymbolEventKind
122 commit_id: str
123 author: str
124 message: str
125 committed_at: str
126 address: str
127 detail: str
128 new_address: str | None
129
130 class _BlameResultJson(EnvelopeJson):
131 """Top-level JSON output of ``muse code blame --json``."""
132
133 address: str
134 start_ref: str
135 total_commits_scanned: int
136 truncated: bool
137 events: list[_BlameEventJson]
138
139 # ---------------------------------------------------------------------------
140 # Internal dataclass
141 # ---------------------------------------------------------------------------
142
143 @dataclass
144 class _BlameEvent:
145 """One attributed change event for a symbol across the commit history."""
146
147 kind: SymbolEventKind
148 commit: CommitRecord
149 address: str
150 detail: str
151 new_address: str | None = None
152
153 def to_dict(self) -> _BlameEventJson:
154 """Serialise to the stable ``_BlameEventJson`` schema."""
155 return _BlameEventJson(
156 event=self.kind,
157 commit_id=self.commit.commit_id,
158 author=self.commit.author,
159 message=self.commit.message,
160 committed_at=self.commit.committed_at.isoformat(),
161 address=self.address,
162 detail=self.detail,
163 new_address=self.new_address,
164 )
165
166 # ---------------------------------------------------------------------------
167 # Core event-extraction logic
168 # ---------------------------------------------------------------------------
169
170 def _flat_ops(ops: list[DomainOp]) -> list[DomainOp]:
171 """Flatten patch ops so that every leaf ``DomainOp`` is at the top level."""
172 result: list[DomainOp] = []
173 for op in ops:
174 if op["op"] == "patch":
175 result.extend(op["child_ops"])
176 else:
177 result.append(op)
178 return result
179
180 def _events_in_commit(
181 commit: CommitRecord,
182 address: str,
183 file_prefix: str,
184 bare_name: str,
185 ) -> tuple[list[_BlameEvent], str]:
186 """Scan *commit* for events touching *address*.
187
188 Returns ``(events, next_address)`` where ``next_address`` is the symbol
189 name to search for in older commits (changes only when a rename is detected
190 while walking newest-first).
191
192 Args:
193 commit: The commit record to scan.
194 address: The full symbol address currently being tracked (e.g.
195 ``"src/billing.py::compute_invoice_total"``).
196 file_prefix: The file part of *address* (``"src/billing.py"``).
197 Pre-split by the caller to avoid redundant splits.
198 bare_name: The symbol name part of *address* (``"compute_invoice_total"``).
199 Pre-split by the caller.
200
201 Rename semantics (walking newest-first)
202 ----------------------------------------
203 Renames are stored as ``{op: "replace", address: OLD_NAME,
204 new_summary: "renamed to NEW_NAME"}``.
205
206 * **Direct match** (``op.address == address``): we found an event for the
207 symbol we are currently tracking. If it is a rename, older commits had
208 the symbol under ``address`` (the old name) — ``next_address`` is
209 unchanged.
210
211 * **Reverse rename** (``op.address == some_old_name``,
212 ``new_summary == "renamed to <our bare name>"``): the symbol we are
213 tracking was previously named ``some_old_name``. Switch ``next_address``
214 to ``some_old_name`` so we pick up its earlier history.
215 """
216 events: list[_BlameEvent] = []
217 next_address = address
218
219 if commit.structured_delta is None:
220 return events, next_address
221
222 for op in _flat_ops(commit.structured_delta["ops"]):
223 op_address: str = op["address"]
224
225 if op_address == address:
226 # ── Direct match ─────────────────────────────────────────────────
227 if op["op"] == "insert":
228 events.append(_BlameEvent(
229 "created", commit, address,
230 op.get("content_summary", "created"),
231 ))
232 elif op["op"] == "delete":
233 detail: str = op.get("content_summary", "deleted")
234 kind: SymbolEventKind = "moved" if "moved to" in detail else "deleted"
235 events.append(_BlameEvent(kind, commit, address, detail))
236 elif op["op"] == "replace":
237 ns: str = op.get("new_summary", "")
238 if ns.startswith("renamed to "):
239 new_name = ns.removeprefix("renamed to ").strip()
240 new_addr = f"{file_prefix}::{new_name}"
241 events.append(_BlameEvent(
242 "renamed", commit, address,
243 f"renamed to {new_name}", new_addr,
244 ))
245 # Walking backward: older commits had the symbol as
246 # *address* (the old name). Do NOT update next_address.
247 elif ns.startswith("moved to "):
248 events.append(_BlameEvent("moved", commit, address, ns))
249 elif "signature" in ns:
250 events.append(_BlameEvent(
251 "signature", commit, address, ns or "signature changed",
252 ))
253 else:
254 events.append(_BlameEvent(
255 "modified", commit, address, ns or "modified",
256 ))
257
258 elif op["op"] == "replace":
259 # ── Reverse rename detection ──────────────────────────────────────
260 # Was some other symbol (op_address) renamed TO the symbol we are
261 # currently tracking (address)? e.g.:
262 # op = {address: "billing.py::compute_total",
263 # new_summary: "renamed to compute_invoice_total"}
264 # and address = "billing.py::compute_invoice_total"
265 ns_other: str = op.get("new_summary", "")
266 if ns_other.startswith("renamed to ") and "::" in op_address:
267 renamed_to = ns_other.removeprefix("renamed to ").strip()
268 op_file = op_address.rsplit("::", 1)[0]
269 if op_file == file_prefix and renamed_to == bare_name:
270 # Found: op_address (old name) was renamed to address.
271 old_name = op_address.rsplit("::", 1)[-1]
272 events.append(_BlameEvent(
273 "renamed", commit, op_address,
274 f"renamed from {old_name} to {bare_name}",
275 address,
276 ))
277 # Walking backward: switch to the old name.
278 next_address = op_address
279
280 return events, next_address
281
282 # ---------------------------------------------------------------------------
283 # Argument parser registration
284 # ---------------------------------------------------------------------------
285
286 def register(
287 subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",
288 ) -> None:
289 """Register the ``blame`` subcommand.
290
291 All error messages are routed to stderr. ``--json`` emits a stable
292 ``_BlameResultJson`` object to stdout.
293 """
294 parser = subparsers.add_parser(
295 "blame",
296 help="Show which commit last touched a specific symbol.",
297 description=__doc__,
298 formatter_class=argparse.RawDescriptionHelpFormatter,
299 )
300 parser.add_argument(
301 "address", metavar="ADDRESS",
302 help='Symbol address, e.g. "src/billing.py::compute_invoice_total".',
303 )
304 parser.add_argument(
305 "--from", default=None, metavar="REF", dest="from_ref",
306 help="Start walking from this commit / branch (default: HEAD).",
307 )
308 parser.add_argument(
309 "--all", "-a", action="store_true", dest="show_all",
310 help="Show the full change history, not just the three most recent events.",
311 )
312 parser.add_argument(
313 "--max", default=_DEFAULT_MAX, type=int, metavar="N", dest="max_commits",
314 help=f"Maximum commits to scan (default: {_DEFAULT_MAX}).",
315 )
316 parser.add_argument(
317 "--kind", action="append", dest="kinds", default=None,
318 metavar="KIND",
319 help=(
320 "Filter output to events of this kind. Accepted values: "
321 "created, modified, renamed, moved, deleted, signature. "
322 "Repeat to include multiple kinds."
323 ),
324 )
325 parser.add_argument(
326 "--author", default=None, metavar="PATTERN", dest="author_filter",
327 help=(
328 "Filter output to events from commits whose author contains "
329 "PATTERN (case-insensitive substring match)."
330 ),
331 )
332 parser.add_argument(
333 "--json", "-j",
334 action="store_true", dest="json_out",
335 help="Emit attribution as structured JSON (agent-friendly; -j is a shorthand alias).",
336 )
337 parser.set_defaults(func=run)
338
339 # ---------------------------------------------------------------------------
340 # Command entry point
341 # ---------------------------------------------------------------------------
342
343 def run(args: argparse.Namespace) -> None:
344 """Show which commit last touched a specific symbol.
345
346 Attributes the symbol as a semantic unit — one answer per function, class,
347 or method regardless of line count. Renames are tracked automatically.
348 BFS walk follows both parents of merge commits; stops as soon as a
349 ``"created"`` event is found.
350
351 Agent quickstart
352 ----------------
353 ::
354
355 muse code blame "src/billing.py::compute_total" --json
356 muse code blame "src/billing.py::run" --kind created --json
357 muse code blame "src/models.py::User.save" --all --json
358
359 JSON fields
360 -----------
361 address Qualified symbol address blamed.
362 start_ref Branch or ref the walk started from.
363 total_commits_scanned Number of commits walked.
364 truncated ``true`` if ``--max`` was reached before full history.
365 events List of event objects, newest first.
366
367 Each event:
368
369 event Event kind: ``"created"``, ``"modified"``, ``"renamed"``, etc.
370 commit_id Full 64-char hex commit ID.
371 author Author string from the commit record.
372 message Commit message.
373 committed_at ISO-8601 commit timestamp.
374 address Symbol address at the time of this event.
375 detail Human-readable description of what changed.
376 new_address Post-rename address; ``null`` for non-rename events.
377
378 Exit codes
379 ----------
380 0 Success.
381 1 Invalid arguments (bad address, bad ``--kind``, bad ref).
382 2 Not inside a Muse repository.
383 4 Commit reference not found.
384 """
385 elapsed = start_timer()
386 address: str = args.address
387 from_ref: str | None = args.from_ref
388 show_all: bool = args.show_all
389 max_commits: int = clamp_int(args.max_commits, 1, 100_000, "max_commits")
390 json_out: bool = args.json_out
391 kinds_raw: list[str] | None = args.kinds
392 author_filter: str | None = args.author_filter
393
394 # ── Validation ────────────────────────────────────────────────────────────
395
396 # Reject control characters and null bytes in the address before any use.
397 sanitised_addr = sanitize_provenance(address)
398 if sanitised_addr != address or "\x00" in address:
399 print(
400 "❌ ADDRESS contains control characters or null bytes.",
401 file=sys.stderr,
402 )
403 raise SystemExit(ExitCode.USER_ERROR)
404
405 if "::" not in address:
406 print(
407 f"❌ Invalid address {address!r} — expected 'file.py::SymbolName'.",
408 file=sys.stderr,
409 )
410 raise SystemExit(ExitCode.USER_ERROR)
411
412 # Validate --kind values.
413 kind_filter: frozenset[str] | None = None
414 if kinds_raw:
415 invalid = [k for k in kinds_raw if k not in _ALL_KINDS]
416 if invalid:
417 print(
418 f"❌ Unknown --kind value(s): {', '.join(invalid)}. "
419 f"Accepted: {', '.join(sorted(_ALL_KINDS))}.",
420 file=sys.stderr,
421 )
422 raise SystemExit(ExitCode.USER_ERROR)
423 kind_filter = frozenset(kinds_raw)
424
425 # ── Repo / commit resolution ──────────────────────────────────────────────
426
427 root = require_repo()
428 branch = read_current_branch(root)
429
430 start_commit = resolve_commit_ref(root, branch, from_ref)
431 if start_commit is None:
432 ref_display = sanitize_display(from_ref or "HEAD")
433 print(f"❌ Commit {ref_display!r} not found.", file=sys.stderr)
434 raise SystemExit(ExitCode.NOT_FOUND)
435
436 # ── BFS walk ──────────────────────────────────────────────────────────────
437
438 commits, truncated = walk_commits_bfs(root, start_commit.commit_id, max_commits)
439
440 # ── Event collection (rename-aware, early-exit on "created") ─────────────
441
442 current_address = address
443 all_events: list[_BlameEvent] = []
444 for commit in commits:
445 # Pre-split current_address once per iteration (address may change on
446 # rename detection — splitting here avoids double-split inside the
447 # inner function).
448 if "::" not in current_address:
449 break
450 cur_file, cur_bare = current_address.rsplit("::", 1)
451 evs, current_address = _events_in_commit(
452 commit, current_address, cur_file, cur_bare
453 )
454 all_events.extend(evs)
455 # Early exit: once the symbol's creation is found, full lineage is
456 # established — no older commit can contribute new events.
457 if any(ev.kind == "created" for ev in evs):
458 break
459
460 # ── Apply filters ─────────────────────────────────────────────────────────
461
462 filtered: list[_BlameEvent] = all_events
463 if kind_filter is not None:
464 filtered = [ev for ev in filtered if ev.kind in kind_filter]
465 if author_filter is not None:
466 needle = author_filter.lower()
467 filtered = [
468 ev for ev in filtered
469 if needle in (ev.commit.author or "").lower()
470 ]
471
472 # ── Output ────────────────────────────────────────────────────────────────
473
474 start_ref = sanitize_display(from_ref or branch)
475
476 if json_out:
477 print(json.dumps(_BlameResultJson(
478 **make_envelope(elapsed),
479 address=address,
480 start_ref=from_ref or branch,
481 total_commits_scanned=len(commits),
482 truncated=truncated,
483 events=[e.to_dict() for e in reversed(filtered)],
484 )))
485 return
486
487 print(f"\n{sanitize_display(address)}")
488 print("─" * 62)
489
490 if truncated:
491 print(
492 f" ⚠️ History may be incomplete — scanned {len(commits)} commits "
493 f"(--max {max_commits}).",
494 )
495
496 if not filtered:
497 if all_events:
498 print(" (no events match the active filters)")
499 else:
500 print(
501 " (no events found — symbol may not exist or have no recorded history)"
502 )
503 return
504
505 events_to_show = filtered if show_all else filtered[:3]
506
507 _LABELS = ("last touched:", "previous: ", "before that: ")
508
509 for idx, ev in enumerate(events_to_show):
510 if idx < len(_LABELS):
511 label = _LABELS[idx]
512 else:
513 label = f"#{idx + 1}:".ljust(13)
514 date_str = ev.commit.committed_at.strftime("%Y-%m-%d")
515 cid = sanitize_display(ev.commit.commit_id)
516 print(f"{label} {cid} {date_str}")
517 print(f" author: {sanitize_display(ev.commit.author or 'unknown')}")
518 print(f' message: "{sanitize_display(ev.commit.message)}"')
519 print(f" change: {sanitize_display(ev.detail)}")
520 if ev.new_address:
521 print(f" → tracking continues as {sanitize_display(ev.new_address)}")
522 print("")
523
524 if not show_all and len(filtered) > 3:
525 remaining = len(filtered) - 3
526 print(
527 f" … {remaining} older event(s) — pass --all to see the full history."
528 )
529
530 _ = start_ref # used for JSON; keep linter happy
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 22 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago