gabriel / muse public
patch_id.py python
336 lines 11.5 KB
Raw
sha256:be3641f35bdbcc094677776a77b9aa6a5dab891f8fab201dc162d03c2bab5aea fix(read): strip position:null from structured_delta ops in… Sonnet 4.6 patch 24 days ago
1 """``muse patch-id [<ref>]`` — content-based commit identity.
2
3 Computes a stable SHA-256 hash of a commit's diff content — independent of
4 commit ID, author, timestamp, branch, or merge history. Two commits that
5 make the same logical change produce the same patch-id, enabling reliable
6 cherry-pick detection and duplicate patch identification.
7
8 Algorithm
9 ---------
10 1. Resolve the commit and its parent's snapshot manifests.
11 2. For each file changed between parent and commit (sorted alphabetically),
12 compute the unified diff.
13 3. Extract only the ``+`` and ``-`` content lines (skip ``@@`` context headers
14 and ``---``/``+++`` file headers).
15 4. Feed those lines — in sorted-file order — into a SHA-256 digest.
16 5. The 64-char hex digest is the patch-id.
17
18 With ``--stable``, each content line is stripped of trailing whitespace before
19 hashing so that cosmetic whitespace changes don't produce a new patch-id.
20
21 Output formats
22 --------------
23 Default text::
24
25 <patch_id> <commit_id>
26
27 JSON (``--json``)::
28
29 {
30 "commit_id": "<sha256>",
31 "patch_id": "<64-char hex>",
32 "subject": "feat: add something",
33 "files_changed": 3,
34 "stable": false,
35 "duration_ms": 2.1,
36 "exit_code": 0
37 }
38
39 Exit codes::
40
41 0 — success
42 1 — user error: bad ref, ANSI in ref, empty repo
43 2 — not a Muse repository
44
45 Examples::
46
47 muse patch-id HEAD
48 muse patch-id HEAD --json
49 muse patch-id HEAD --stable
50 muse patch-id <commit-id> --json
51 muse patch-id main --json
52 """
53
54 import argparse
55 import difflib
56 import hashlib
57 import json as _json
58 import logging
59 import pathlib
60 import sys
61 from typing import TypedDict
62
63 from muse.core.errors import ExitCode
64 from muse.core.object_store import read_object
65 from muse.core.repo import require_repo
66 from muse.core.refs import read_ref
67 from muse.core.refs import (
68 get_head_commit_id,
69 read_current_branch,
70 )
71 from muse.core.commits import (
72 CommitRecord,
73 read_commit,
74 resolve_commit_ref,
75 )
76 from muse.core.snapshots import read_snapshot
77 from muse.core.envelope import EnvelopeJson, make_envelope
78 from muse.core.validation import sanitize_display
79 from muse.core.types import Manifest, long_id
80 from muse.core.paths import ref_path as _ref_path
81 from muse.core.timing import start_timer
82
83 logger = logging.getLogger(__name__)
84
85 # ---------------------------------------------------------------------------
86 # Wire-format TypedDicts
87 # ---------------------------------------------------------------------------
88
89 class _PatchIdJson(EnvelopeJson):
90 """Stable JSON envelope for ``muse patch-id --json`` output."""
91 commit_id: str
92 patch_id: str
93 subject: str
94 files_changed: int
95 stable: bool
96
97 # ---------------------------------------------------------------------------
98 # Internal helpers
99 # ---------------------------------------------------------------------------
100
101 def _compute_patch_id(
102 root: pathlib.Path,
103 base_manifest: Manifest,
104 target_manifest: Manifest,
105 *,
106 stable: bool,
107 ) -> str:
108 """Compute a patch-id from the diff between two manifests.
109
110 The patch-id is the SHA-256 of the sorted-file unified diff content lines
111 (``+`` and ``-`` lines only, not ``@@`` or file headers).
112
113 Args:
114 root: Absolute repo root (for object store reads).
115 base_manifest: Parent commit manifest (path → object_id).
116 target_manifest: This commit manifest (path → object_id).
117 stable: When True, strip trailing whitespace from each content
118 line before hashing so cosmetic whitespace changes are
119 ignored.
120
121 Returns:
122 ``sha256:``-prefixed SHA-256 string.
123 """
124 h = hashlib.sha256()
125
126 base_paths = set(base_manifest)
127 target_paths = set(target_manifest)
128 changed = sorted(
129 (target_paths - base_paths) # added
130 | (base_paths - target_paths) # removed
131 | { # modified
132 p for p in base_paths & target_paths
133 if base_manifest[p] != target_manifest[p]
134 }
135 )
136
137 for path in changed:
138 # Read base lines.
139 if path in base_manifest:
140 raw = read_object(root, base_manifest[path])
141 base_lines = raw.decode("utf-8", errors="replace").splitlines() if raw else []
142 else:
143 base_lines = []
144
145 # Read target lines.
146 if path in target_manifest:
147 raw = read_object(root, target_manifest[path])
148 target_lines = raw.decode("utf-8", errors="replace").splitlines() if raw else []
149 else:
150 target_lines = []
151
152 # Generate unified diff and extract content lines (+/-) only.
153 for line in difflib.unified_diff(
154 base_lines, target_lines,
155 fromfile=f"a/{path}", tofile=f"b/{path}",
156 lineterm="",
157 ):
158 if line.startswith("+") or line.startswith("-"):
159 if stable:
160 line = line.rstrip()
161 h.update(line.encode("utf-8", errors="replace"))
162 h.update(b"\n")
163
164 return long_id(h.hexdigest())
165
166 def _resolve_commit(root: pathlib.Path, treeish: str) -> CommitRecord:
167 """Resolve *treeish* to a CommitRecord.
168
169 Args:
170 root: Absolute repo root.
171 treeish: Branch name, commit ID, or ``"HEAD"``.
172
173 Returns:
174 CommitRecord.
175
176 Raises:
177 SystemExit(USER_ERROR): ref not found or empty repo.
178 """
179 try:
180 branch = read_current_branch(root)
181
182 if treeish.upper() == "HEAD":
183 commit_id = get_head_commit_id(root, branch)
184 if not commit_id:
185 print("❌ Repository has no commits yet.", file=sys.stderr)
186 raise SystemExit(ExitCode.USER_ERROR)
187 commit = read_commit(root, commit_id)
188 else:
189 # Try branch name first.
190 branch_ref = _ref_path(root, treeish)
191 commit_id = read_ref(branch_ref)
192 if commit_id is not None:
193 commit = read_commit(root, commit_id)
194 else:
195 commit = resolve_commit_ref(root, branch, treeish)
196
197 if commit is None:
198 print(
199 f"❌ '{sanitize_display(treeish)}' is not a known branch or commit ID.",
200 file=sys.stderr,
201 )
202 raise SystemExit(ExitCode.USER_ERROR)
203
204 return commit
205 except SystemExit:
206 raise
207 except Exception as exc:
208 print(f"❌ Failed to resolve '{sanitize_display(treeish)}': {exc}", file=sys.stderr)
209 raise SystemExit(ExitCode.USER_ERROR)
210
211 # ---------------------------------------------------------------------------
212 # Registration
213 # ---------------------------------------------------------------------------
214
215 def register(
216 subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",
217 ) -> None:
218 """Register the ``muse patch-id`` subcommand."""
219 parser = subparsers.add_parser(
220 "patch-id",
221 help="Compute a stable content-based hash of a commit's diff.",
222 description=__doc__,
223 formatter_class=argparse.RawDescriptionHelpFormatter,
224 )
225 parser.add_argument(
226 "treeish",
227 metavar="REF",
228 nargs="?",
229 default="HEAD",
230 help="Commit ID, branch name, or HEAD (default: HEAD).",
231 )
232 parser.add_argument(
233 "--stable",
234 action="store_true",
235 dest="stable",
236 help=(
237 "Strip trailing whitespace from each diff line before hashing "
238 "so cosmetic whitespace changes don't produce a new patch-id."
239 ),
240 )
241 parser.add_argument(
242 "--json", "-j",
243 action="store_true",
244 dest="json_out",
245 help="Emit machine-readable JSON on stdout.",
246 )
247 parser.set_defaults(func=run)
248
249 # ---------------------------------------------------------------------------
250 # Run
251 # ---------------------------------------------------------------------------
252
253 def run(args: argparse.Namespace) -> None:
254 """Compute the patch-id for a given commit.
255
256 Hashes the diff between a commit and its parent using a content-stable
257 algorithm. Identical patches applied to different bases produce the same
258 patch-id — useful for deduplication and cherry-pick detection.
259
260 Agent quickstart
261 ----------------
262 ::
263
264 muse patch-id --json
265 muse patch-id HEAD~3 --json
266 muse patch-id feat/billing --stable --json
267
268 JSON fields
269 -----------
270 commit_id Commit ID that was analysed.
271 patch_id Content-stable patch fingerprint (sha256: prefixed).
272 subject First line of the commit message.
273 files_changed Number of files that changed relative to the parent.
274 stable ``true`` when ``--stable`` was passed.
275
276 Exit codes
277 ----------
278 0 Success.
279 1 Bad ref, ANSI in ref, or empty repository.
280 2 Not inside a Muse repository.
281 """
282 elapsed = start_timer()
283 treeish: str = args.treeish or "HEAD"
284 stable: bool = args.stable
285 json_out: bool = args.json_out
286
287 root = require_repo()
288
289 # ── Reject ANSI / control characters in the ref ───────────────────────────
290 if any(ord(c) < 32 for c in treeish):
291 print(
292 f"❌ Invalid ref '{sanitize_display(treeish)}': control characters not allowed.",
293 file=sys.stderr,
294 )
295 raise SystemExit(ExitCode.USER_ERROR)
296
297 # ── Resolve the commit ────────────────────────────────────────────────────
298 commit = _resolve_commit(root, treeish)
299
300 # ── Get parent manifest ───────────────────────────────────────────────────
301 base_manifest: Manifest = {}
302 if commit.parent_commit_id:
303 parent = read_commit(root, commit.parent_commit_id)
304 if parent:
305 snap = read_snapshot(root, parent.snapshot_id)
306 if snap:
307 base_manifest = dict(snap.manifest)
308
309 # ── Get this commit's manifest ────────────────────────────────────────────
310 snap = read_snapshot(root, commit.snapshot_id)
311 target_manifest: Manifest = dict(snap.manifest) if snap else {}
312
313 # ── Count changed files (added + removed + modified) ──────────────────────
314 base_paths = set(base_manifest)
315 target_paths = set(target_manifest)
316 files_changed = len(
317 (target_paths - base_paths)
318 | (base_paths - target_paths)
319 | {p for p in base_paths & target_paths if base_manifest[p] != target_manifest[p]}
320 )
321
322 # ── Compute patch-id ──────────────────────────────────────────────────────
323 patch_id = _compute_patch_id(root, base_manifest, target_manifest, stable=stable)
324
325 # ── Output ───────────────────────────────────────────────────────────────
326 if json_out:
327 print(_json.dumps(_PatchIdJson(
328 **make_envelope(elapsed),
329 commit_id=commit.commit_id,
330 patch_id=patch_id,
331 subject=commit.message.splitlines()[0] if commit.message else "",
332 files_changed=files_changed,
333 stable=stable,
334 )))
335 else:
336 print(f"{patch_id} {commit.commit_id}")
File History 3 commits
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago