gabriel / muse public
rev_list.py python
583 lines 19.7 KB
Raw
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 1 day ago
1 """muse rev-list — emit a filtered stream of commit IDs.
2
3 ``rev-list`` is the raw commit-ID primitive that backs agent scripting,
4 counting, ancestry queries, and pipeline composition. Where ``muse log``
5 is the human-facing history viewer, ``rev-list`` is the machine-facing
6 stream — one commit ID per line (or a single integer for ``--count``).
7
8 Usage examples::
9
10 muse rev-list HEAD # all commits from HEAD to root
11 muse rev-list dev..feat/new # commits on feat/new not on dev
12 muse rev-list -n 10 HEAD # last 10 commit IDs
13 muse rev-list --count HEAD # single integer count
14 muse rev-list --first-parent HEAD # linear chain only (skip merge parents)
15 muse rev-list --no-merges HEAD # skip merge commits
16 muse rev-list --merges HEAD # only merge commits
17 muse rev-list --author alice HEAD # commits by alice (substring match)
18 muse rev-list --after 2026-01-01 HEAD # commits after a date (UTC)
19 muse rev-list --before 2026-12-31 HEAD # commits before a date (UTC)
20 muse rev-list --touches src/ HEAD # commits touching anything under src/
21 muse rev-list --reverse HEAD # oldest-first
22 muse rev-list --json HEAD # JSON array: {"commit_ids": [...]}
23
24 Range syntax (``A..B``)
25 Emits commits reachable from *B* but not reachable from *A*. Equivalent
26 to ``git rev-list A..B``. Useful for counting divergence between branches::
27
28 muse rev-list --count main..feat # how many commits ahead of main
29
30 ``--count`` mode
31 Increments a counter rather than accumulating a list — memory is O(1)
32 with respect to history depth.
33
34 ``--touches <path>``
35 Path is matched as a prefix: ``src/`` matches any file whose POSIX path
36 starts with ``src/``, while ``src/foo.py`` matches only that exact file.
37 Uses a per-invocation manifest cache so each snapshot is read at most once.
38
39 Exit codes:
40 0 — success (including empty result for ``--count 0``).
41 1 — no commits matched (non-empty history but all filtered out).
42 2 — usage error (bad date, invalid ref, traversal attempt in ``--touches``).
43 3 — internal error (repository not found).
44 """
45
46 import argparse
47 import datetime
48 import json
49 import logging
50 import collections
51 import pathlib
52 import re
53 import sys
54 from typing import Callable, TypedDict
55
56 from muse.cli.config import get_limit
57 from muse.core.envelope import EnvelopeJson, make_envelope
58 from muse.core.errors import ExitCode
59 from muse.core.graph import ancestor_ids
60 from muse.core.repo import require_repo
61 from muse.core.refs import (
62 get_head_commit_id,
63 read_current_branch,
64 )
65 from muse.core.commits import (
66 CommitRecord,
67 read_commit,
68 resolve_commit_ref,
69 )
70 from muse.core.snapshots import get_commit_snapshot_manifest
71 from muse.core.validation import sanitize_display
72 from muse.core.timing import start_timer
73
74 logger = logging.getLogger(__name__)
75
76 # ---------------------------------------------------------------------------
77 # Types
78 # ---------------------------------------------------------------------------
79
80 type _FileManifest = dict[str, str]
81 type _ManifestCache = dict[str, _FileManifest]
82 type _Predicate = Callable[[CommitRecord], bool]
83
84 class _RevListJson(EnvelopeJson):
85 """JSON output for ID-list mode."""
86
87 commit_ids: list[str]
88
89 class _RevListCountJson(EnvelopeJson):
90 """JSON output for --count mode."""
91
92 count: int
93
94 # ---------------------------------------------------------------------------
95 # Internal helpers
96 # ---------------------------------------------------------------------------
97
98 def _parse_range(ref: str) -> tuple[str | None, str]:
99 """Parse a ref string into ``(exclude_ref, include_ref)``.
100
101 ``"A..B"`` → ``("A", "B")``. A plain ref with no ``..`` → ``(None, ref)``.
102
103 Args:
104 ref: Raw ref string from the CLI argument.
105
106 Returns:
107 A two-tuple ``(exclude, include)`` where ``exclude`` is ``None`` for
108 single-ref inputs.
109 """
110 if ".." in ref:
111 parts = ref.split("..", 1)
112 return parts[0].strip(), parts[1].strip()
113 return None, ref
114
115 def _parse_date(value: str) -> datetime.datetime:
116 """Parse *value* as a UTC date (``YYYY-MM-DD``) or ISO datetime.
117
118 Args:
119 value: Date string supplied via ``--after`` or ``--before``.
120
121 Returns:
122 A timezone-aware :class:`datetime.datetime` in UTC.
123
124 Raises:
125 ValueError: If *value* cannot be parsed as a recognised date format.
126 """
127 for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"):
128 try:
129 dt = datetime.datetime.strptime(value, fmt)
130 return dt.replace(tzinfo=datetime.timezone.utc)
131 except ValueError:
132 continue
133 raise ValueError(
134 f"Unrecognised date format: {value!r}. "
135 "Use YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS."
136 )
137
138 def _get_manifest(
139 root: pathlib.Path,
140 commit_id: str | None,
141 cache: _ManifestCache,
142 ) -> _FileManifest:
143 """Return the snapshot manifest for *commit_id*, using *cache* to avoid re-reads.
144
145 Each commit_id is fetched from disk at most once per *cache* lifetime.
146 An empty dict is returned (and cached) for ``None`` commit_ids so callers
147 can treat initial commits uniformly.
148
149 Args:
150 root: Repository root.
151 commit_id: Commit whose snapshot manifest to fetch, or ``None``.
152 cache: Shared manifest cache dict; mutated in place.
153
154 Returns:
155 The snapshot manifest mapping POSIX path → object_id.
156 """
157 if commit_id is None:
158 return {}
159 if commit_id not in cache:
160 cache[commit_id] = get_commit_snapshot_manifest(root, commit_id) or {}
161 return cache[commit_id]
162
163 def _commit_touches_path(
164 root: pathlib.Path,
165 commit: CommitRecord,
166 path: str,
167 cache: _ManifestCache,
168 ) -> bool:
169 """Return ``True`` if *commit* added, modified, or removed *path*.
170
171 *path* is matched as a prefix: ``"src/"`` matches any file under ``src/``,
172 while ``"src/foo.py"`` matches only that exact file. Trailing slashes are
173 normalised away before comparison.
174
175 Args:
176 root: Repository root.
177 commit: The commit to inspect.
178 path: POSIX path or prefix to match.
179 cache: Shared manifest cache; passed through to :func:`_get_manifest`.
180
181 Returns:
182 ``True`` when at least one file matching *path* changed between this
183 commit and its first parent.
184 """
185 norm = path.rstrip("/")
186 current = _get_manifest(root, commit.commit_id, cache)
187 parent = _get_manifest(root, commit.parent_commit_id, cache)
188 for p in set(current) | set(parent):
189 if p == norm or p.startswith(f"{norm}/"):
190 if current.get(p) != parent.get(p):
191 return True
192 return False
193
194 def _exclude_set(root: pathlib.Path, start_id: str | None) -> set[str]:
195 """Return the set of all commit IDs reachable from *start_id*.
196
197 Used to implement ``A..B`` range exclusion: compute ancestors of *A*,
198 then walk from *B* stopping at any commit in this set.
199 """
200 if start_id is None:
201 return set()
202 return ancestor_ids(root, start_id)
203
204 def _walk_from(
205 root: pathlib.Path,
206 start_id: str,
207 *,
208 exclude: set[str],
209 first_parent: bool,
210 max_count: int,
211 predicates: list[_Predicate],
212 count_mode: bool,
213 ) -> tuple[list[str], int]:
214 """BFS walk from *start_id*, applying filters, returning matching commit IDs.
215
216 In ``count_mode`` the function increments a counter rather than
217 accumulating IDs, keeping memory O(1) with respect to history depth.
218
219 Args:
220 root: Repository root.
221 start_id: Commit ID to start walking from.
222 exclude: Set of commit IDs to treat as a boundary — commits in
223 this set and their ancestors are not emitted.
224 first_parent: When ``True`` only follow ``parent_commit_id`` (the
225 first/linear parent), skipping ``parent2_commit_id``.
226 max_count: Stop after emitting this many matching commits. ``0``
227 means no limit.
228 predicates: List of filter callables; a commit is emitted only when
229 all predicates return ``True``.
230 count_mode: When ``True`` accumulate a counter instead of a list.
231
232 Returns:
233 A two-tuple ``(commit_ids, count)`` where ``commit_ids`` is the list
234 of matching commit IDs (empty in count_mode) and ``count`` is the
235 number of matching commits.
236 """
237 ids: list[str] = []
238 count = 0
239 visited: set[str] = set(exclude)
240 queue: collections.deque[str] = collections.deque([start_id])
241
242 while queue:
243 cid = queue.popleft()
244 if cid in visited:
245 continue
246 visited.add(cid)
247 try:
248 c = read_commit(root, cid)
249 except Exception:
250 continue
251 if all(pred(c) for pred in predicates):
252 count += 1
253 if not count_mode:
254 ids.append(c.commit_id)
255 if max_count > 0 and count >= max_count:
256 break
257 if c.parent_commit_id:
258 queue.append(c.parent_commit_id)
259 if not first_parent and c.parent2_commit_id:
260 queue.append(c.parent2_commit_id)
261
262 return ids, count
263
264 # ---------------------------------------------------------------------------
265 # CLI registration
266 # ---------------------------------------------------------------------------
267
268 def register(
269 subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",
270 ) -> None:
271 """Register the ``muse rev-list`` subcommand and its flags."""
272 parser = subparsers.add_parser(
273 "rev-list",
274 help="Emit a filtered stream of commit IDs.",
275 description=__doc__,
276 formatter_class=argparse.RawDescriptionHelpFormatter,
277 )
278 parser.add_argument(
279 "ref",
280 nargs="?",
281 default="HEAD",
282 help=(
283 "Ref to walk from, or 'A..B' range (commits on B not on A). "
284 "Defaults to HEAD."
285 ),
286 )
287 parser.add_argument(
288 "--max-count",
289 type=int,
290 default=0,
291 dest="max_count",
292 metavar="N",
293 help="Stop after emitting N commits. 0 = no limit (default).",
294 )
295 parser.add_argument(
296 "--count",
297 action="store_true",
298 help="Print a single integer — number of matching commits — instead of IDs.",
299 )
300 parser.add_argument(
301 "--first-parent",
302 action="store_true",
303 dest="first_parent",
304 help="Follow only the first-parent chain (skip merge parents).",
305 )
306 parser.add_argument(
307 "--no-merges",
308 action="store_true",
309 dest="no_merges",
310 help="Exclude merge commits (commits with two parents).",
311 )
312 parser.add_argument(
313 "--merges",
314 action="store_true",
315 help="Include only merge commits.",
316 )
317 parser.add_argument(
318 "--author",
319 default=None,
320 metavar="PATTERN",
321 help="Only commits whose author matches PATTERN (substring or regex).",
322 )
323 parser.add_argument(
324 "--after",
325 default=None,
326 metavar="DATE",
327 help="Only commits after DATE (YYYY-MM-DD, UTC).",
328 )
329 parser.add_argument(
330 "--before",
331 default=None,
332 metavar="DATE",
333 help="Only commits before DATE (YYYY-MM-DD, UTC).",
334 )
335 parser.add_argument(
336 "--touches",
337 default=None,
338 metavar="PATH",
339 help=(
340 "Only commits that changed PATH. Matched as a prefix: 'src/' "
341 "matches any file under src/."
342 ),
343 )
344 parser.add_argument(
345 "--reverse",
346 action="store_true",
347 help="Output commits oldest-first instead of newest-first.",
348 )
349 parser.add_argument(
350 "--json", "-j",
351 action="store_true",
352 dest="json_out",
353 help='Emit {"commit_ids": [...]} instead of newline-delimited IDs.',
354 )
355 parser.set_defaults(func=run)
356
357 # ---------------------------------------------------------------------------
358 # Command implementation
359 # ---------------------------------------------------------------------------
360
361 def run(args: argparse.Namespace) -> None:
362 """Emit a filtered stream of commit IDs.
363
364 Default output is one commit ID per line, newest-first. Pass ``--count``
365 to receive a single integer. Pass ``--json`` for a stable JSON envelope
366 suitable for agent pipeline consumption. Range syntax (``A..B``) emits
367 commits reachable from *B* but not from *A*.
368
369 Agent quickstart::
370
371 muse rev-list HEAD --json
372 muse rev-list -n 10 HEAD --json
373 muse rev-list --count main..feat --json
374 muse rev-list --author alice --after 2026-01-01 HEAD --json
375
376 JSON fields (ID-list mode)::
377
378 commit_ids list[str] Commit IDs matching the filters, newest-first
379
380 JSON fields (--count mode)::
381
382 count int Number of matching commits
383
384 Exit codes::
385
386 0 Success (including empty result in JSON mode).
387 1 No commits matched (non-empty history, all filtered; plain-text only).
388 2 Usage error (bad date, invalid ref, traversal in --touches).
389 3 Repository not found.
390 """
391 ref_arg: str = args.ref or "HEAD"
392 max_count: int = args.max_count
393 count_mode: bool = args.count
394 first_parent: bool = args.first_parent
395 no_merges: bool = args.no_merges
396 merges_only: bool = args.merges
397 author_pat: str | None = args.author
398 after_str: str | None = args.after
399 before_str: str | None = args.before
400 touches_path: str | None = args.touches
401 reverse: bool = args.reverse
402 json_out: bool = args.json_out
403
404 elapsed = start_timer()
405
406 def _emit_error(msg: str, code: int, error_key: str = "error") -> None:
407 """Emit a structured error (JSON when --json, stderr otherwise) and exit."""
408 if json_out:
409 print(json.dumps({
410 "error": error_key,
411 "message": msg,
412 "commit_ids": [],
413 "count": 0,
414 "duration_ms": elapsed(),
415 "exit_code": code,
416 }))
417 else:
418 print(f"❌ {msg}", file=sys.stderr)
419 raise SystemExit(code)
420
421 # --merges and --no-merges are mutually exclusive.
422 if no_merges and merges_only:
423 _emit_error(
424 "--no-merges and --merges are mutually exclusive.",
425 ExitCode.USER_ERROR,
426 "mutually_exclusive_flags",
427 )
428
429 # Validate --touches path: reject traversal sequences.
430 if touches_path is not None:
431 norm = touches_path.replace("\\", "/")
432 if norm.startswith("/") or ".." in norm.split("/"):
433 _emit_error(
434 f"--touches path {sanitize_display(touches_path)!r} must be a "
435 "relative path with no traversal sequences.",
436 ExitCode.USER_ERROR,
437 "path_traversal",
438 )
439
440 # Parse date filters up-front so errors are reported before any I/O.
441 after_dt: datetime.datetime | None = None
442 before_dt: datetime.datetime | None = None
443 if after_str is not None:
444 try:
445 after_dt = _parse_date(after_str)
446 except ValueError as exc:
447 _emit_error(f"--after: {exc}", ExitCode.USER_ERROR, "bad_date")
448 if before_str is not None:
449 try:
450 before_dt = _parse_date(before_str)
451 except ValueError as exc:
452 _emit_error(f"--before: {exc}", ExitCode.USER_ERROR, "bad_date")
453
454 # Compile --author pattern. Treat compilation errors as literal substring
455 # fallback rather than crashing — this matches user expectation when they
456 # pass a name like "O'Brien" which contains no real regex metacharacters
457 # but might be malformed in edge cases.
458 author_re: re.Pattern[str] | None = None
459 if author_pat is not None:
460 try:
461 author_re = re.compile(author_pat, re.IGNORECASE)
462 except re.error:
463 # Literal substring fallback.
464 escaped = re.escape(author_pat)
465 author_re = re.compile(escaped, re.IGNORECASE)
466
467 root = require_repo()
468
469 # Resolve start and optional exclude ref.
470 exclude_ref_str, include_ref_str = _parse_range(ref_arg)
471
472 # Resolve include ref → commit ID.
473 try:
474 branch = read_current_branch(root)
475 except ValueError as exc:
476 _emit_error(str(exc), ExitCode.INTERNAL_ERROR, "internal_error")
477
478 include_commit = resolve_commit_ref(root, branch, include_ref_str)
479 if include_commit is None:
480 # Try treating the ref as a branch name directly.
481 tip = get_head_commit_id(root, include_ref_str)
482 if tip is not None:
483 include_commit = read_commit(root, tip)
484 if include_commit is None:
485 _emit_error(
486 f"Cannot resolve ref {sanitize_display(include_ref_str)!r}.",
487 ExitCode.USER_ERROR,
488 "bad_ref",
489 )
490
491 # Compute the exclusion set for A..B ranges.
492 excl: set[str] = set()
493 if exclude_ref_str:
494 exclude_commit = resolve_commit_ref(root, branch, exclude_ref_str)
495 if exclude_commit is None:
496 tip = get_head_commit_id(root, exclude_ref_str)
497 if tip is not None:
498 exclude_commit = read_commit(root, tip)
499 if exclude_commit is None:
500 _emit_error(
501 f"Cannot resolve exclude ref {sanitize_display(exclude_ref_str)!r}.",
502 ExitCode.USER_ERROR,
503 "bad_ref",
504 )
505 excl = _exclude_set(root, exclude_commit.commit_id)
506
507 # Build filter predicates.
508 manifest_cache: _ManifestCache = {}
509 predicates: list[_Predicate] = []
510
511 if no_merges:
512 predicates.append(lambda c: c.parent2_commit_id is None)
513 if merges_only:
514 predicates.append(lambda c: c.parent2_commit_id is not None)
515 if author_re is not None:
516 _pat = author_re # capture for closure
517 predicates.append(lambda c: bool(_pat.search(c.author or "")))
518 if after_dt is not None:
519 _after = after_dt # capture for closure
520 # Parens required: `A if B else C > D` parses as `A if B else (C > D)`,
521 # not `(A if B else C) > D`. --before already uses parens; match it here.
522 predicates.append(
523 lambda c: (
524 c.committed_at.replace(tzinfo=datetime.timezone.utc)
525 if c.committed_at.tzinfo is None
526 else c.committed_at
527 ) > _after
528 )
529 if before_dt is not None:
530 _before = before_dt # capture for closure
531 predicates.append(
532 lambda c: (
533 c.committed_at.replace(tzinfo=datetime.timezone.utc)
534 if c.committed_at.tzinfo is None
535 else c.committed_at
536 ) < _before
537 )
538 if touches_path is not None:
539 _path = touches_path.rstrip("/")
540 _cache = manifest_cache
541 _root = root
542 predicates.append(
543 lambda c: _commit_touches_path(_root, c, _path, _cache)
544 )
545
546 # Walk the commit graph.
547 walk_cap = get_limit("max_walk_commits", root)
548 effective_max = max_count if max_count > 0 else walk_cap
549
550 ids, count = _walk_from(
551 root,
552 include_commit.commit_id,
553 exclude=excl,
554 first_parent=first_parent,
555 max_count=effective_max,
556 predicates=predicates,
557 count_mode=count_mode,
558 )
559
560 # Emit output.
561 if count_mode:
562 if json_out:
563 print(json.dumps(_RevListCountJson(**make_envelope(elapsed), count=count)))
564 else:
565 print(count)
566 return
567
568 if reverse:
569 ids = list(reversed(ids))
570
571 exit_code_val = 0 if ids else 1
572 if json_out:
573 print(json.dumps(_RevListJson(
574 **make_envelope(elapsed, exit_code=exit_code_val),
575 commit_ids=ids,
576 )))
577 return
578
579 if ids:
580 print("\n".join(ids))
581 else:
582 # Empty result — exit 1 so scripts can detect "no matches" vs "no repo".
583 raise SystemExit(1)
File History 7 commits
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 1 day ago
sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8 chore: prebuild timing test Sonnet 4.6 8 days ago
sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1 merge: pull staging/dev — advance to 0.2.0rc12 Sonnet 4.6 patch 10 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 31 days ago