gabriel / muse public
snapshot_diff.py python
654 lines 21.7 KB
Raw
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 22 hours ago
1 """muse snapshot-diff — diff two snapshot manifests.
2
3 Compares two Muse snapshots and categorises every path change into one of three
4 buckets: added, modified, or deleted. Accepts snapshot IDs directly or commit
5 IDs / branch names (in which case the commit's snapshot is resolved first).
6
7 Single-pair mode (default)
8 --------------------------
9
10 Output (JSON, default)::
11
12 {
13 "snapshot_a": "<sha256>",
14 "snapshot_b": "<sha256>",
15 "added": [{"path": "src/foo.py", "object_id": "<sha256>"}],
16 "modified": [{"path": "src/bar.py",
17 "object_id_a": "<sha256>", "object_id_b": "<sha256>"}],
18 "deleted": [{"path": "src/old.py", "object_id": "<sha256>"}],
19 "added_count": 1,
20 "modified_count": 1,
21 "deleted_count": 1,
22 "total_changes": 3,
23 "duration_ms": 4.2,
24 "exit_code": 0
25 }
26
27 Text output (``--format text``)::
28
29 A src/foo.py
30 M src/bar.py
31 D src/old.py
32
33 Text output with ``--raw`` (includes object IDs)::
34
35 A <oid_b> src/foo.py
36 M <oid_a> <oid_b> src/bar.py
37 D <oid_a> src/old.py
38
39 Filtering
40 ---------
41
42 ``--only added|modified|deleted``
43 Restrict output to one change category. In JSON mode the suppressed
44 lists are emitted as empty arrays; only the matching list is populated.
45 In text mode only the matching status letters are printed.
46
47 ``--path-prefix PREFIX``
48 Only include paths whose workspace-relative path starts with *PREFIX*.
49 Applied after the diff, before output — counts and ``total_changes``
50 reflect the filtered view.
51
52 Batch mode (``--stdin``)
53 ------------------------
54
55 Reads ref pairs from stdin (one ``<ref_a> <ref_b>`` per line) and processes
56 each pair.
57
58 JSON mode emits one JSON object per pair (newline-delimited)::
59
60 {"snapshot_a": "...", "snapshot_b": "...", "added": [...], ...}
61 {"snapshot_a": "...", "snapshot_b": "...", "added": [...], ...}
62
63 Text mode emits the text diff for each pair separated by a blank line.
64
65 Invalid or unresolvable pairs emit an error object/line and continue.
66
67 Output contract
68 ---------------
69
70 - Exit 0: diff computed (even when zero changes).
71 - Exit 1: snapshot or commit ID cannot be resolved; bad ``--format`` value;
72 bad ``--only`` value.
73 - Exit 3: I/O error reading snapshot records.
74 - Batch mode (``--stdin``) always exits 0; individual errors are reported inline.
75
76 Agent quickstart
77 ----------------
78
79 ::
80
81 # Diff two snapshots — full ID
82 muse snapshot-diff <snap_a> <snap_b>
83
84 # Diff using short hex prefixes (same as muse snapshot read)
85 muse snapshot-diff 0d972e55324c b01270f28f05
86
87 # Diff two branches
88 muse snapshot-diff main dev
89
90 # Only show added files
91 muse snapshot-diff main dev --only added
92
93 # Scope to a subdirectory
94 muse snapshot-diff <snap_a> <snap_b> --path-prefix src/
95
96 # Batch — one pair per line from stdin
97 echo "<id_a> <id_b>" | muse snapshot-diff --stdin
98
99 # Quick summary counts only
100 muse snapshot-diff main dev | jq '{added_count, modified_count, deleted_count}'
101 """
102
103 import argparse
104 import json
105 import logging
106 import pathlib
107 import sys
108 from typing import TypedDict
109
110 from muse.core.types import long_id
111 from muse.core.envelope import EnvelopeJson, make_envelope
112 from muse.core.errors import ExitCode
113 from muse.core.repo import require_repo
114 from muse.core.refs import (
115 get_head_commit_id,
116 read_current_branch,
117 )
118 from muse.core.commits import read_commit
119 from muse.core.snapshots import read_snapshot
120 from muse.core.validation import sanitize_display, validate_object_id
121 from muse.core.timing import start_timer
122
123 logger = logging.getLogger(__name__)
124
125 _ONLY_CHOICES = ("added", "modified", "deleted")
126
127 # Column widths for --raw text output alignment.
128 _OID_WIDTH = 64
129
130 class _AddedEntry(TypedDict):
131 """One added-file entry in a ``snapshot-diff`` JSON result.
132
133 Fields
134 ------
135 path
136 Workspace-relative path of the file that was added in snapshot B.
137 object_id
138 SHA-256 object ID of the file's content in snapshot B.
139 """
140
141 path: str
142 object_id: str
143
144 class _ModifiedEntry(TypedDict):
145 """One modified-file entry in a ``snapshot-diff`` JSON result.
146
147 Fields
148 ------
149 path
150 Workspace-relative path of the file whose content changed.
151 object_id_a
152 SHA-256 object ID of the file's content in snapshot A (before).
153 object_id_b
154 SHA-256 object ID of the file's content in snapshot B (after).
155 """
156
157 path: str
158 object_id_a: str
159 object_id_b: str
160
161 class _DeletedEntry(TypedDict):
162 """One deleted-file entry in a ``snapshot-diff`` JSON result.
163
164 Fields
165 ------
166 path
167 Workspace-relative path of the file that was present in snapshot A
168 but absent in snapshot B.
169 object_id
170 SHA-256 object ID of the file's content in snapshot A.
171 """
172
173 path: str
174 object_id: str
175
176 class _DiffResult(TypedDict):
177 """Internal diff result (no envelope fields) for one ``snapshot-diff`` pair."""
178
179 snapshot_a: str
180 snapshot_b: str
181 added: list[_AddedEntry]
182 modified: list[_ModifiedEntry]
183 deleted: list[_DeletedEntry]
184 added_count: int
185 modified_count: int
186 deleted_count: int
187 total_changes: int
188
189 class _DiffResultJson(EnvelopeJson):
190 """JSON wire format for ``muse snapshot-diff --json``."""
191
192 snapshot_a: str
193 snapshot_b: str
194 added: list[_AddedEntry]
195 modified: list[_ModifiedEntry]
196 deleted: list[_DeletedEntry]
197 added_count: int
198 modified_count: int
199 deleted_count: int
200 total_changes: int
201
202 _HEX_CHARS = frozenset("0123456789abcdef")
203
204 def _resolve_snapshot_prefix(root: pathlib.Path, prefix: str) -> str | None:
205 """Resolve a short ``sha256:``-prefixed hex string to a full snapshot ID.
206
207 Requires the ``sha256:`` prefix — bare hex is never accepted. This
208 mirrors the CLI boundary rule: the algorithm prefix is a type tag, not
209 decoration.
210
211 Returns the full ``sha256:<64hex>`` snapshot ID on an unambiguous match,
212 or ``None`` when no match is found or the prefix is bare hex.
213 """
214 if not prefix.startswith("sha256:"):
215 return None # bare hex — rejected; caller must supply sha256: prefix
216 bare = long_id(prefix, strip=True)
217 safe_prefix = "".join(c for c in bare[:64] if c in _HEX_CHARS)
218 if not safe_prefix:
219 return None
220 from muse.core.object_store import iter_stored_objects, read_muse_object
221 from muse.core.types import split_id
222 import json as _json
223 for object_id, _ in iter_stored_objects(root):
224 _, hex_id = split_id(object_id)
225 if not hex_id.startswith(safe_prefix):
226 continue
227 result = read_muse_object(root, object_id)
228 if result is None or result[0] != "snapshot":
229 continue
230 try:
231 snap = read_snapshot(root, object_id)
232 if snap is not None:
233 return snap.snapshot_id
234 except Exception:
235 continue
236 return None
237
238 def _resolve_to_snapshot_id(root: pathlib.Path, ref: str) -> str | None:
239 """Return the snapshot ID for *ref*, or ``None`` if it cannot be resolved.
240
241 Resolution order
242 ----------------
243 1. ``HEAD`` — reads the current branch and its tip commit's snapshot.
244 2. Branch name — resolves via :func:`~muse.core.store.get_head_commit_id`
245 and follows to the commit's snapshot.
246 3. Full object ID (``sha256:<64hex>``) — tries to load it as a snapshot
247 first, then as a commit. Returns the snapshot ID in either case.
248 4. Short hex prefix (bare hex or ``sha256:<prefix>``) — glob-scans
249 ``.muse/objects/sha256/`` for an unambiguous match, mirroring
250 ``muse snapshot read``'s prefix resolution.
251
252 Parameters
253 ----------
254 root:
255 Repository root (the directory containing ``.muse/``).
256 ref:
257 Any of: ``HEAD``, a branch name, a full snapshot or commit ID
258 (``sha256:<64hex>``), or a short prefix (``sha256:<hex>``). Bare
259 hex without the ``sha256:`` prefix is rejected — the prefix is a
260 type tag identifying the algorithm, not decoration.
261
262 Returns
263 -------
264 str | None
265 The resolved snapshot ID (``sha256:<64hex>``), or ``None`` when *ref*
266 cannot be resolved to a known snapshot.
267 """
268 if ref.upper() == "HEAD":
269 branch = read_current_branch(root)
270 commit_id = get_head_commit_id(root, branch)
271 if commit_id is None:
272 return None
273 commit = read_commit(root, commit_id)
274 return commit.snapshot_id if commit else None
275
276 try:
277 commit_id = get_head_commit_id(root, ref)
278 except ValueError:
279 commit_id = None
280 if commit_id is not None:
281 commit = read_commit(root, commit_id)
282 return commit.snapshot_id if commit else None
283
284 try:
285 validate_object_id(ref)
286 is_full_id = True
287 except ValueError:
288 is_full_id = False
289
290 if is_full_id:
291 snap = read_snapshot(root, ref)
292 if snap is not None:
293 return snap.snapshot_id
294 commit = read_commit(root, ref)
295 if commit is not None:
296 return commit.snapshot_id
297 return None
298
299 # Not a full ID — try as a snapshot prefix (bare hex or sha256:<prefix>).
300 return _resolve_snapshot_prefix(root, ref)
301
302 def _compute_diff(
303 root: pathlib.Path,
304 ref_a: str,
305 ref_b: str,
306 only: str | None = None,
307 path_prefix: str | None = None,
308 ) -> "_DiffResult | dict[str, str]":
309 """Compute the manifest diff between *ref_a* and *ref_b*.
310
311 Resolves both refs to snapshot IDs, loads their manifests, and categorises
312 every path into added, modified, or deleted. Results are sorted
313 lexicographically by path within each category.
314
315 Parameters
316 ----------
317 root:
318 Repository root.
319 ref_a:
320 First ref (before). Passed to :func:`_resolve_to_snapshot_id`.
321 ref_b:
322 Second ref (after). Passed to :func:`_resolve_to_snapshot_id`.
323 only:
324 When set to ``"added"``, ``"modified"``, or ``"deleted"``, the other
325 two lists are returned empty. Counts and ``total_changes`` reflect
326 the filtered view.
327 path_prefix:
328 When set, only paths starting with this prefix are included in the
329 result. Applied before the ``only`` filter.
330
331 Returns
332 -------
333 _DiffResult | dict[str, str]
334 A complete :class:`_DiffResult` on success, or a ``{"error": "..."}``
335 dict when either ref cannot be resolved or a snapshot cannot be read.
336 The ``duration_ms`` field is left at 0.0 and must be filled in by the
337 caller after calling :func:`time.monotonic`.
338 """
339 snap_id_a = _resolve_to_snapshot_id(root, ref_a)
340 if snap_id_a is None:
341 return {"error": f"Cannot resolve ref: {ref_a!r}"}
342
343 snap_id_b = _resolve_to_snapshot_id(root, ref_b)
344 if snap_id_b is None:
345 return {"error": f"Cannot resolve ref: {ref_b!r}"}
346
347 try:
348 snap_a = read_snapshot(root, snap_id_a)
349 snap_b = read_snapshot(root, snap_id_b)
350 except Exception as exc:
351 return {"error": str(exc)}
352
353 if snap_a is None:
354 return {"error": f"Snapshot not found: {snap_id_a}"}
355 if snap_b is None:
356 return {"error": f"Snapshot not found: {snap_id_b}"}
357
358 manifest_a = snap_a.manifest
359 manifest_b = snap_b.manifest
360
361 # Apply path-prefix filter before computing diff sets.
362 if path_prefix:
363 manifest_a = {p: v for p, v in manifest_a.items() if p.startswith(path_prefix)}
364 manifest_b = {p: v for p, v in manifest_b.items() if p.startswith(path_prefix)}
365
366 keys_a = set(manifest_a)
367 keys_b = set(manifest_b)
368
369 added: list[_AddedEntry] = sorted(
370 [{"path": p, "object_id": manifest_b[p]} for p in (keys_b - keys_a)],
371 key=lambda e: e["path"],
372 )
373 deleted: list[_DeletedEntry] = sorted(
374 [{"path": p, "object_id": manifest_a[p]} for p in (keys_a - keys_b)],
375 key=lambda e: e["path"],
376 )
377 modified: list[_ModifiedEntry] = sorted(
378 [
379 {"path": p, "object_id_a": manifest_a[p], "object_id_b": manifest_b[p]}
380 for p in (keys_a & keys_b)
381 if manifest_a[p] != manifest_b[p]
382 ],
383 key=lambda e: e["path"],
384 )
385
386 # Apply --only filter: zero out the suppressed categories.
387 if only == "added":
388 modified = []
389 deleted = []
390 elif only == "modified":
391 added = []
392 deleted = []
393 elif only == "deleted":
394 added = []
395 modified = []
396
397 return _DiffResult(
398 snapshot_a=snap_id_a,
399 snapshot_b=snap_id_b,
400 added=added,
401 modified=modified,
402 deleted=deleted,
403 added_count=len(added),
404 modified_count=len(modified),
405 deleted_count=len(deleted),
406 total_changes=len(added) + len(modified) + len(deleted),
407 )
408
409 def _emit_text(result: _DiffResult, raw: bool, stat: bool, only: str | None) -> None:
410 """Print text-format diff output for *result* to stdout.
411
412 Parameters
413 ----------
414 result:
415 A successfully computed :class:`_DiffResult`.
416 raw:
417 When ``True``, prefix each line with the object ID(s) of the entry.
418 When ``False``, only the status letter and path are printed.
419 stat:
420 When ``True``, append a summary line after the diff lines:
421 ``N added, M modified, D deleted``.
422 only:
423 When set, only lines for that category are printed (the ``only``
424 filter has already been applied to *result*'s lists, so this
425 parameter is unused here — it is accepted for symmetry with
426 :func:`_compute_diff`).
427 """
428 if raw:
429 for entry in result["added"]:
430 oid = entry["object_id"]
431 print(f"A {oid} {sanitize_display(entry['path'])}")
432 for entry in result["modified"]:
433 oid_a = entry["object_id_a"]
434 oid_b = entry["object_id_b"]
435 print(f"M {oid_a} {oid_b} {sanitize_display(entry['path'])}")
436 for entry in result["deleted"]:
437 oid = entry["object_id"]
438 print(f"D {oid} {sanitize_display(entry['path'])}")
439 else:
440 for entry in result["added"]:
441 print(f"A {sanitize_display(entry['path'])}")
442 for entry in result["modified"]:
443 print(f"M {sanitize_display(entry['path'])}")
444 for entry in result["deleted"]:
445 print(f"D {sanitize_display(entry['path'])}")
446
447 if stat:
448 na = len(result["added"])
449 nm = len(result["modified"])
450 nd = len(result["deleted"])
451 print(f"\n{na} added, {nm} modified, {nd} deleted")
452
453 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
454 """Register the snapshot-diff subcommand."""
455 parser = subparsers.add_parser(
456 "snapshot-diff",
457 help="Diff two snapshot manifests: added, modified, deleted paths.",
458 description=__doc__,
459 formatter_class=argparse.RawDescriptionHelpFormatter,
460 )
461 parser.add_argument(
462 "ref_a",
463 nargs="?",
464 default=None,
465 help=(
466 "First snapshot ID, commit ID, branch name, or HEAD. "
467 "Required in single-pair mode; omit when using --stdin."
468 ),
469 )
470 parser.add_argument(
471 "ref_b",
472 nargs="?",
473 default=None,
474 help=(
475 "Second snapshot ID, commit ID, branch name, or HEAD. "
476 "Required in single-pair mode; omit when using --stdin."
477 ),
478 )
479 parser.add_argument(
480 "--stdin",
481 action="store_true",
482 dest="from_stdin",
483 help=(
484 "Batch mode: read '<ref_a> <ref_b>' pairs from stdin (one per line) "
485 "and process each. JSON mode emits one object per line; "
486 "text mode separates pairs with a blank line."
487 ),
488 )
489 parser.add_argument(
490 "--raw",
491 action="store_true",
492 dest="raw",
493 help=(
494 "Include object IDs in text-format output. "
495 "Has no effect on JSON output (which always includes OIDs). "
496 "Format: 'A <oid> <path>', 'M <oid_a> <oid_b> <path>', 'D <oid> <path>'."
497 ),
498 )
499 parser.add_argument(
500 "--json", "-j",
501 action="store_true",
502 dest="json_out",
503 help="Emit JSON output (default: human-readable text).",
504 )
505 parser.add_argument(
506 "--stat", "-s",
507 action="store_true",
508 help="Append a summary line: N added, M modified, D deleted (text mode only).",
509 )
510 parser.add_argument(
511 "--only",
512 dest="only",
513 default=None,
514 metavar="CATEGORY",
515 choices=_ONLY_CHOICES,
516 help=(
517 "Restrict output to one change category: added, modified, or deleted. "
518 "In JSON mode the other two lists are emitted as empty arrays. "
519 "In text mode only the matching status letters are printed."
520 ),
521 )
522 parser.add_argument(
523 "--path-prefix",
524 dest="path_prefix",
525 default=None,
526 metavar="PREFIX",
527 help=(
528 "Filter diff to paths starting with PREFIX. "
529 "Applied before --only. Counts and total_changes reflect the filtered view."
530 ),
531 )
532 parser.set_defaults(func=run)
533
534 def run(args: argparse.Namespace) -> None:
535 """Diff two snapshots and report added, modified, and deleted paths.
536
537 Resolves both refs to snapshot IDs (accepting snapshot IDs, commit IDs,
538 branch names, or ``HEAD``), computes the manifest diff, and emits the
539 result. In batch mode (``--stdin``) one result object per line is
540 emitted; errors are reported inline.
541
542 Agent quickstart::
543
544 muse snapshot-diff main dev --json
545 muse snapshot-diff sha256:abc sha256:def --json
546 muse snapshot-diff main dev --only added --json
547 muse snapshot-diff main dev --path-prefix src/ --json
548
549 JSON fields::
550
551 snapshot_a str Resolved snapshot ID for the first (before) ref
552 snapshot_b str Resolved snapshot ID for the second (after) ref
553 added list Files added: [{path, object_id}]
554 modified list Files modified: [{path, object_id_a, object_id_b}]
555 deleted list Files deleted: [{path, object_id}]
556 added_count int len(added)
557 modified_count int len(modified)
558 deleted_count int len(deleted)
559 total_changes int Sum of all three counts
560
561 Exit codes::
562
563 0 Success (zero-change diffs also exit 0).
564 1 Ref cannot be resolved, bad --format, or bad --only.
565 3 I/O error reading snapshot records.
566 """
567 json_out: bool = args.json_out
568 ref_a: str | None = args.ref_a
569 ref_b: str | None = args.ref_b
570 from_stdin: bool = args.from_stdin
571 raw: bool = args.raw
572 stat: bool = args.stat
573 only: str | None = args.only
574 path_prefix: str | None = args.path_prefix
575
576 elapsed = start_timer()
577
578 root = require_repo()
579
580 # ── Batch (--stdin) mode ──────────────────────────────────────────────────
581 if from_stdin:
582 first = True
583 for raw_line in sys.stdin:
584 line = raw_line.strip()
585 if not line or line.startswith("#"):
586 continue
587 parts = line.split()
588 if len(parts) < 2:
589 if json_out:
590 print(json.dumps({"error": f"Expected '<ref_a> <ref_b>', got: {line!r}"}))
591 else:
592 print(f"error: expected '<ref_a> <ref_b>', got: {sanitize_display(line)}")
593 if not first:
594 print()
595 first = False
596 continue
597
598 pair_a, pair_b = parts[0], parts[1]
599 result = _compute_diff(root, pair_a, pair_b, only=only, path_prefix=path_prefix)
600
601 if "error" in result:
602 if json_out:
603 print(json.dumps(result))
604 else:
605 if not first:
606 print()
607 print(f"error: {sanitize_display(result['error'])}") # type: ignore[index]
608 elif json_out:
609 print(json.dumps({**result, **make_envelope(elapsed)}))
610 else:
611 if not first:
612 print()
613 _emit_text(result, raw=raw, stat=stat, only=only) # type: ignore[arg-type]
614
615 first = False
616 return
617
618 # ── Single-pair mode ──────────────────────────────────────────────────────
619 if ref_a is None or ref_b is None:
620 print(
621 json.dumps({"error": "ref_a and ref_b are required in single-pair mode "
622 "(or use --stdin for batch processing)"}),
623 file=sys.stderr,
624 )
625 raise SystemExit(ExitCode.USER_ERROR)
626
627 # Bare hex is rejected at the CLI boundary. sha256: prefix is required —
628 # it is a type tag (algorithm identifier), not decoration. Branch names
629 # and HEAD contain non-hex characters and are never caught by this check.
630 for _label, _ref in (("ref_a", ref_a), ("ref_b", ref_b)):
631 if all(c in _HEX_CHARS for c in _ref):
632 _safe = sanitize_display(_ref)
633 print(
634 json.dumps({
635 "error": (
636 f"Bare hex IDs are not accepted ({_label}={_ref!r}) — "
637 f"use 'sha256:{_safe}' instead."
638 )
639 }),
640 file=sys.stderr,
641 )
642 raise SystemExit(ExitCode.USER_ERROR)
643
644 result = _compute_diff(root, ref_a, ref_b, only=only, path_prefix=path_prefix)
645
646 if "error" in result:
647 print(json.dumps(result), file=sys.stderr)
648 raise SystemExit(ExitCode.USER_ERROR)
649
650 if not json_out:
651 _emit_text(result, raw=raw, stat=stat, only=only) # type: ignore[arg-type]
652 return
653
654 print(json.dumps(_DiffResultJson(**make_envelope(elapsed), **result)))
File History 7 commits
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 22 hours ago
sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8 chore: prebuild timing test Sonnet 4.6 8 days ago
sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1 merge: pull staging/dev — advance to 0.2.0rc12 Sonnet 4.6 patch 10 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago