gabriel / muse public
name_rev.py python
473 lines 15.3 KB
Raw
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago
1 """muse name-rev — map commit IDs to branch-relative names.
2
3 For each supplied commit ID, walks the commit DAG from all branch tips
4 simultaneously and finds the branch + distance that best describes it.
5 The result is expressed as ``<branch>~N`` where N is the number of parent
6 hops from that branch tip to the commit (0 means the commit IS the tip).
7
8 The multi-source BFS ensures O(total-commits) time regardless of the number
9 of branches or input commit IDs — every commit is visited at most once.
10 Hierarchical branch names (``feat/my-thing``, ``bugfix/PROJ-42``) are
11 fully supported.
12
13 Short SHA prefixes (≥ 4 hex characters) are resolved automatically. Bare
14 hex prefixes (e.g. ``9f7c``) and ``sha256:``-prefixed short IDs
15 (e.g. ``sha256:9f7c``) are both accepted. If a prefix matches more than
16 one commit the result is marked ``ambiguous``.
17
18 Output (JSON, default)::
19
20 {
21 "results": [
22 {
23 "commit_id": "sha256:<hex>",
24 "input": "<as-supplied-by-caller>",
25 "name": "main~3",
26 "branch": "main",
27 "distance": 3,
28 "undefined": false,
29 "ambiguous": false
30 },
31 {
32 "commit_id": null,
33 "input": "deadbeef",
34 "name": null,
35 "branch": null,
36 "distance": null,
37 "undefined": true,
38 "ambiguous": false
39 }
40 ],
41 "duration_ms": 1.2,
42 "exit_code": 0
43 }
44
45 JSON error schema (``--json`` mode)::
46
47 {
48 "status": "error",
49 "error": "<human-readable message>",
50 "exit_code": <int>
51 }
52
53 In ``--json`` mode all errors go to stdout as JSON — stderr will be empty.
54 Agents should parse stdout and check ``exit_code``.
55
56 Text output (``--format text``)::
57
58 <sha256> main~3
59 <sha256> undefined
60
61 With ``--name-only``::
62
63 main~3
64 undefined
65
66 Output contract
67 ---------------
68
69 - ``exit_code`` 0: all names resolved (some may be ``undefined`` or ``ambiguous``).
70 - ``exit_code`` 1: bad ``--format``; no commit IDs provided; non-hex input rejected.
71 - ``exit_code`` 3: I/O error reading commit records.
72 - ``elapsed()``: wall-clock milliseconds for the BFS walk and resolution.
73
74 Agent use
75 ---------
76
77 Resolve HEAD commit to a name::
78
79 muse symbolic-ref HEAD --json \\
80 | python3 -c "import sys,json; print(json.load(sys.stdin)['commit_id'])" \\
81 | xargs muse name-rev --name-only --format text
82
83 Resolve only relative to main (ignore other branches)::
84
85 muse name-rev <sha> --branches main --json
86
87 Resolve a batch of short SHAs piped from another command::
88
89 muse log --max 10 --json \\
90 | python3 -c "import sys,json; [print(c['commit_id'][:12]) for c in json.load(sys.stdin)]" \\
91 | muse name-rev --stdin --json
92
93 Check if any of the last 5 commits are unreachable from any branch::
94
95 muse name-rev --stdin --json \\
96 | python3 -c "import sys,json; r=json.load(sys.stdin)['results']; print(any(x['undefined'] for x in r))"
97 """
98
99 import argparse
100 import fnmatch
101 import json
102 import logging
103 import pathlib
104 import sys
105 from collections import deque
106 from typing import TypedDict
107
108 from muse.core.types import long_id, short_id
109 from muse.core.errors import ExitCode
110 from muse.core.refs import iter_branch_refs
111 from muse.core.envelope import EnvelopeJson, make_envelope
112 from muse.core.repo import require_repo
113 from muse.core.commits import read_commit
114 from muse.core.validation import sanitize_display, validate_object_id
115 from muse.core.timing import start_timer
116
117 type _NameMap = dict[str, tuple[str, int]]
118 logger = logging.getLogger(__name__)
119
120 _MAX_WALK = 50_000 # Safety ceiling — prevents runaway on pathological graphs
121
122 class _NameRevEntry(TypedDict):
123 commit_id: str | None
124 input: str
125 name: str | None
126 branch: str | None
127 distance: int | None
128 undefined: bool
129 ambiguous: bool
130
131 class _NameRevJson(EnvelopeJson):
132 """Stable JSON envelope for name-rev results."""
133 results: list[_NameRevEntry]
134
135 class _NameRevErrorJson(EnvelopeJson):
136 """Error payload for usage/internal errors in --json mode."""
137 status: str # "error"
138 error: str
139
140 def _emit_error(json_out: bool, msg: str, code: "ExitCode", elapsed: float) -> None:
141 """Print an error and raise SystemExit. Never returns.
142
143 In ``--json`` mode the error goes to stdout as a JSON payload so agents
144 always get parseable output. In text mode it goes to stderr.
145 """
146 if json_out:
147 print(json.dumps(_NameRevErrorJson(
148 **make_envelope(elapsed, exit_code=int(code)),
149 status="error",
150 error=msg,
151 )))
152 else:
153 print(f"❌ {msg}", file=sys.stderr)
154 raise SystemExit(code)
155
156 def _build_name_map(
157 root: pathlib.Path,
158 targets: set[str],
159 branch_pattern: str | None = None,
160 max_walk: int = _MAX_WALK,
161 ) -> _NameMap:
162 """Return a map of commit_id → (branch, distance) for all reachable commits.
163
164 Multi-source BFS from every branch tip. Each commit is visited at most
165 once — whichever branch reaches it first (shortest distance) wins.
166 Stops early once all *targets* have been found or *max_walk* is reached.
167
168 Hierarchical branch names (``feat/x``) are discovered via ``rglob``.
169 Symlinks and refs with invalid commit IDs are skipped defensively.
170
171 Args:
172 root: Repository root path.
173 targets: Full commit IDs to find — BFS stops early when all
174 are resolved.
175 branch_pattern: Optional fnmatch glob applied to branch names
176 (not full ref paths) to restrict BFS seeds.
177 ``None`` seeds from all branches.
178 max_walk: Maximum BFS steps before stopping. Defaults to
179 :data:`_MAX_WALK`.
180 """
181 # (commit_id, branch_name, distance)
182 queue: deque[tuple[str, str, int]] = deque()
183 visited: _NameMap = {}
184
185 for branch, tip_id in sorted(iter_branch_refs(root)):
186 if branch_pattern is not None and not fnmatch.fnmatch(branch, branch_pattern):
187 continue
188 try:
189 validate_object_id(tip_id)
190 except ValueError:
191 logger.debug(
192 "name-rev: skipping ref %s — invalid commit ID %r",
193 branch,
194 short_id(tip_id),
195 )
196 continue
197 if tip_id not in visited:
198 visited[tip_id] = (branch, 0)
199 queue.append((tip_id, branch, 0))
200
201 found = set(targets) & set(visited)
202 steps = 0
203
204 while queue and steps < max_walk:
205 cid, branch, dist = queue.popleft()
206 steps += 1
207
208 if cid in targets:
209 found.add(cid)
210 if found >= targets:
211 break
212
213 try:
214 record = read_commit(root, cid)
215 except (OSError, ValueError, KeyError) as exc:
216 logger.debug("name-rev: cannot read commit %s: %s", short_id(cid), exc)
217 continue
218
219 if record is None:
220 continue
221
222 for parent_id in (record.parent_commit_id, record.parent2_commit_id):
223 if parent_id and parent_id not in visited:
224 visited[parent_id] = (branch, dist + 1)
225 queue.append((parent_id, branch, dist + 1))
226
227 return visited
228
229 def _resolve_prefix(
230 cid_input: str,
231 name_map: _NameMap,
232 ) -> tuple[str | None, bool]:
233 """Resolve a (possibly short) commit ID against the BFS name map.
234
235 Returns ``(full_commit_id, ambiguous)`` where:
236 - ``full_commit_id`` is the resolved key in *name_map*, or ``None`` if
237 no match.
238 - ``ambiguous`` is ``True`` when the prefix matches more than one commit.
239
240 An exact match always wins over prefix matches.
241
242 Keys in *name_map* are always ``sha256:``-prefixed. Inputs may be bare
243 hex (e.g. ``9f7c``) or ``sha256:``-prefixed (``sha256:9f7c``); both are
244 normalised to ``sha256:``-prefixed before prefix matching so that bare
245 short SHAs resolve correctly.
246 """
247 # Exact match first (covers full sha256:-prefixed IDs).
248 if cid_input in name_map:
249 return cid_input, False
250 # Normalise bare hex inputs to sha256:-prefixed for prefix matching.
251 lookup = long_id(cid_input)
252 if lookup in name_map:
253 return lookup, False
254 matches = [k for k in name_map if k.startswith(lookup)]
255 if len(matches) == 1:
256 return matches[0], False
257 if len(matches) > 1:
258 return None, True
259 return None, False
260
261 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
262 """Register the name-rev subcommand."""
263 parser = subparsers.add_parser(
264 "name-rev",
265 help="Map commit IDs to descriptive branch-relative names.",
266 description=__doc__,
267 formatter_class=argparse.RawDescriptionHelpFormatter,
268 )
269 parser.add_argument(
270 "commit_ids",
271 nargs="*",
272 help=(
273 "One or more commit IDs (full or short prefix ≥ 4 chars) to map "
274 "to branch-relative names. Combine with ``--stdin`` to read "
275 "additional IDs from standard input."
276 ),
277 )
278 parser.add_argument(
279 "--stdin",
280 action="store_true",
281 dest="from_stdin",
282 help=(
283 "Read additional commit IDs from standard input (one per line). "
284 "Blank lines and lines starting with '#' are ignored."
285 ),
286 )
287 parser.add_argument(
288 "--branches",
289 default=None,
290 dest="branch_pattern",
291 metavar="GLOB",
292 help=(
293 "Restrict BFS seeds to branch names matching this fnmatch glob "
294 "(e.g. 'main', 'feat/*'). Commits unreachable from matching "
295 "branches will appear as ``undefined``."
296 ),
297 )
298 parser.add_argument(
299 "--max-walk",
300 type=int,
301 default=_MAX_WALK,
302 dest="max_walk",
303 metavar="N",
304 help=(
305 f"Maximum BFS steps before stopping (default: {_MAX_WALK:,}). "
306 "Reduce for large repos where a rough answer is acceptable."
307 ),
308 )
309 parser.add_argument(
310 "--name-only",
311 action="store_true",
312 dest="name_only",
313 help="Emit only the name (or the undefined placeholder), not the commit ID.",
314 )
315 parser.add_argument(
316 "--undefined", "-u",
317 default="undefined",
318 dest="undefined_name",
319 metavar="STRING",
320 help="String to emit when a commit cannot be named. (default: 'undefined')",
321 )
322 parser.add_argument(
323 "--json", "-j", action="store_true", dest="json_out",
324 help="Emit machine-readable JSON instead of human text.",
325 )
326 parser.set_defaults(func=run)
327
328 def run(args: argparse.Namespace) -> None:
329 """Map commit IDs to descriptive branch-relative names.
330
331 For each commit ID, finds the branch tip that is closest (fewest parent
332 hops) and returns a name of the form ``<branch>~N``. When N is 0 the
333 commit is the branch tip itself. Short SHA prefixes (≥ 4 hex characters)
334 are resolved automatically.
335
336 Agent quickstart
337 ----------------
338 ::
339
340 muse name-rev sha256:<id> --json
341 muse name-rev sha256:<a> sha256:<b> --json
342 muse name-rev sha256:<id> --branch-pattern "feat/*" --json
343
344 JSON fields
345 -----------
346 results List of result objects per input: ``commit_id``, ``input``,
347 ``name`` (``<branch>~N`` or ``null``), ``branch``, ``distance``,
348 ``undefined`` (``true`` when no branch found), ``ambiguous``.
349
350 Exit codes
351 ----------
352 0 Success (individual entries may have ``undefined: true``).
353 1 Invalid format or ``--max-walk`` < 1.
354 2 Not inside a Muse repository.
355 """
356 elapsed = start_timer()
357
358 json_out: bool = args.json_out
359 cli_ids: list[str] = args.commit_ids
360 from_stdin: bool = args.from_stdin
361 branch_pattern: str | None = args.branch_pattern
362 max_walk: int = args.max_walk
363 name_only: bool = args.name_only
364 undefined_name: str = args.undefined_name
365
366 if max_walk < 1:
367 _emit_error(json_out, f"--max-walk must be >= 1, got {max_walk}", ExitCode.USER_ERROR, elapsed)
368
369 # Collect all commit ID inputs.
370 all_inputs: list[str] = list(cli_ids)
371 if from_stdin:
372 for raw in sys.stdin:
373 line = raw.strip()
374 if not line or line.startswith("#"):
375 continue
376 all_inputs.append(line)
377
378 if not all_inputs:
379 _emit_error(json_out, "At least one commit ID is required.", ExitCode.USER_ERROR, elapsed)
380
381 # Validate inputs: must be hex characters only (short or full), optionally
382 # prefixed with "sha256:" in the canonical form.
383 _HEX = frozenset("0123456789abcdefABCDEF")
384 invalid: list[str] = [
385 s for s in all_inputs
386 if not s or not all(c in _HEX for c in long_id(s, strip=True))
387 ]
388 if invalid:
389 _emit_error(
390 json_out,
391 (
392 f"Invalid commit ID(s): {invalid[:3]!r}. "
393 "Commit IDs must be hex characters only, optionally prefixed with 'sha256:'."
394 ),
395 ExitCode.USER_ERROR,
396 elapsed,
397 )
398
399 root = require_repo()
400
401 # Build name map — BFS from all (filtered) branch tips.
402 try:
403 name_map = _build_name_map(
404 root,
405 set(all_inputs), # exact targets used for early-exit optimisation
406 branch_pattern=branch_pattern,
407 max_walk=max_walk,
408 )
409 except OSError as exc:
410 logger.debug("name-rev I/O error: %s", exc)
411 _emit_error(json_out, str(exc), ExitCode.INTERNAL_ERROR, elapsed)
412
413 results: list[_NameRevEntry] = []
414 for cid_input in all_inputs:
415 full_id, ambiguous = _resolve_prefix(cid_input, name_map)
416 if ambiguous:
417 results.append(
418 _NameRevEntry(
419 commit_id=None,
420 input=cid_input,
421 name=None,
422 branch=None,
423 distance=None,
424 undefined=False,
425 ambiguous=True,
426 )
427 )
428 elif full_id is not None:
429 branch, dist = name_map[full_id]
430 human_name = branch if dist == 0 else f"{branch}~{dist}"
431 results.append(
432 _NameRevEntry(
433 commit_id=full_id,
434 input=cid_input,
435 name=human_name,
436 branch=branch,
437 distance=dist,
438 undefined=False,
439 ambiguous=False,
440 )
441 )
442 else:
443 results.append(
444 _NameRevEntry(
445 commit_id=None,
446 input=cid_input,
447 name=None,
448 branch=None,
449 distance=None,
450 undefined=True,
451 ambiguous=False,
452 )
453 )
454
455 if not json_out:
456 for r in results:
457 if r["ambiguous"]:
458 display_name = "(ambiguous)"
459 elif r["name"] is not None:
460 display_name = r["name"]
461 else:
462 display_name = undefined_name
463 display_cid = r["commit_id"] or r["input"]
464 if name_only:
465 print(sanitize_display(display_name))
466 else:
467 print(f"{sanitize_display(display_cid)} {sanitize_display(display_name)}")
468 return
469
470 print(json.dumps(_NameRevJson(
471 **make_envelope(elapsed),
472 results=[dict(r) for r in results],
473 )))
File History 1 commit
sha256:e6465e8a9b7fa8e6223ed4a3576e96c568c913ae2caeb9c31f15e7a81b250b40 docs: add | jq convention to --json section of agent-guide Sonnet 4.6 1 day ago