patch_id.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
8 days ago
| 1 | """``muse patch-id [<ref>]`` — content-based commit identity. |
| 2 | |
| 3 | Computes a stable SHA-256 hash of a commit's diff content — independent of |
| 4 | commit ID, author, timestamp, branch, or merge history. Two commits that |
| 5 | make the same logical change produce the same patch-id, enabling reliable |
| 6 | cherry-pick detection and duplicate patch identification. |
| 7 | |
| 8 | Algorithm |
| 9 | --------- |
| 10 | 1. Resolve the commit and its parent's snapshot manifests. |
| 11 | 2. For each file changed between parent and commit (sorted alphabetically), |
| 12 | compute the unified diff. |
| 13 | 3. Extract only the ``+`` and ``-`` content lines (skip ``@@`` context headers |
| 14 | and ``---``/``+++`` file headers). |
| 15 | 4. Feed those lines — in sorted-file order — into a SHA-256 digest. |
| 16 | 5. The 64-char hex digest is the patch-id. |
| 17 | |
| 18 | With ``--stable``, each content line is stripped of trailing whitespace before |
| 19 | hashing so that cosmetic whitespace changes don't produce a new patch-id. |
| 20 | |
| 21 | Output formats |
| 22 | -------------- |
| 23 | Default text:: |
| 24 | |
| 25 | <patch_id> <commit_id> |
| 26 | |
| 27 | JSON (``--json``):: |
| 28 | |
| 29 | { |
| 30 | "commit_id": "<sha256>", |
| 31 | "patch_id": "<64-char hex>", |
| 32 | "subject": "feat: add something", |
| 33 | "files_changed": 3, |
| 34 | "stable": false, |
| 35 | "duration_ms": 2.1, |
| 36 | "exit_code": 0 |
| 37 | } |
| 38 | |
| 39 | Exit codes:: |
| 40 | |
| 41 | 0 — success |
| 42 | 1 — user error: bad ref, ANSI in ref, empty repo |
| 43 | 2 — not a Muse repository |
| 44 | |
| 45 | Examples:: |
| 46 | |
| 47 | muse patch-id HEAD |
| 48 | muse patch-id HEAD --json |
| 49 | muse patch-id HEAD --stable |
| 50 | muse patch-id <commit-id> --json |
| 51 | muse patch-id main --json |
| 52 | """ |
| 53 | |
| 54 | import argparse |
| 55 | import difflib |
| 56 | import hashlib |
| 57 | import json as _json |
| 58 | import logging |
| 59 | import pathlib |
| 60 | import sys |
| 61 | from typing import TypedDict |
| 62 | |
| 63 | from muse.core.errors import ExitCode |
| 64 | from muse.core.object_store import read_object |
| 65 | from muse.core.repo import require_repo |
| 66 | from muse.core.refs import read_ref |
| 67 | from muse.core.refs import ( |
| 68 | get_head_commit_id, |
| 69 | read_current_branch, |
| 70 | ) |
| 71 | from muse.core.commits import ( |
| 72 | CommitRecord, |
| 73 | read_commit, |
| 74 | resolve_commit_ref, |
| 75 | ) |
| 76 | from muse.core.snapshots import read_snapshot |
| 77 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 78 | from muse.core.validation import sanitize_display |
| 79 | from muse.core.types import Manifest, long_id |
| 80 | from muse.core.paths import ref_path as _ref_path |
| 81 | from muse.core.timing import start_timer |
| 82 | |
| 83 | logger = logging.getLogger(__name__) |
| 84 | |
| 85 | # --------------------------------------------------------------------------- |
| 86 | # Wire-format TypedDicts |
| 87 | # --------------------------------------------------------------------------- |
| 88 | |
| 89 | class _PatchIdJson(EnvelopeJson): |
| 90 | """Stable JSON envelope for ``muse patch-id --json`` output.""" |
| 91 | commit_id: str |
| 92 | patch_id: str |
| 93 | subject: str |
| 94 | files_changed: int |
| 95 | stable: bool |
| 96 | |
| 97 | # --------------------------------------------------------------------------- |
| 98 | # Internal helpers |
| 99 | # --------------------------------------------------------------------------- |
| 100 | |
| 101 | def _compute_patch_id( |
| 102 | root: pathlib.Path, |
| 103 | base_manifest: Manifest, |
| 104 | target_manifest: Manifest, |
| 105 | *, |
| 106 | stable: bool, |
| 107 | ) -> str: |
| 108 | """Compute a patch-id from the diff between two manifests. |
| 109 | |
| 110 | The patch-id is the SHA-256 of the sorted-file unified diff content lines |
| 111 | (``+`` and ``-`` lines only, not ``@@`` or file headers). |
| 112 | |
| 113 | Args: |
| 114 | root: Absolute repo root (for object store reads). |
| 115 | base_manifest: Parent commit manifest (path → object_id). |
| 116 | target_manifest: This commit manifest (path → object_id). |
| 117 | stable: When True, strip trailing whitespace from each content |
| 118 | line before hashing so cosmetic whitespace changes are |
| 119 | ignored. |
| 120 | |
| 121 | Returns: |
| 122 | ``sha256:``-prefixed SHA-256 string. |
| 123 | """ |
| 124 | h = hashlib.sha256() |
| 125 | |
| 126 | base_paths = set(base_manifest) |
| 127 | target_paths = set(target_manifest) |
| 128 | changed = sorted( |
| 129 | (target_paths - base_paths) # added |
| 130 | | (base_paths - target_paths) # removed |
| 131 | | { # modified |
| 132 | p for p in base_paths & target_paths |
| 133 | if base_manifest[p] != target_manifest[p] |
| 134 | } |
| 135 | ) |
| 136 | |
| 137 | for path in changed: |
| 138 | # Read base lines. |
| 139 | if path in base_manifest: |
| 140 | raw = read_object(root, base_manifest[path]) |
| 141 | base_lines = raw.decode("utf-8", errors="replace").splitlines() if raw else [] |
| 142 | else: |
| 143 | base_lines = [] |
| 144 | |
| 145 | # Read target lines. |
| 146 | if path in target_manifest: |
| 147 | raw = read_object(root, target_manifest[path]) |
| 148 | target_lines = raw.decode("utf-8", errors="replace").splitlines() if raw else [] |
| 149 | else: |
| 150 | target_lines = [] |
| 151 | |
| 152 | # Generate unified diff and extract content lines (+/-) only. |
| 153 | for line in difflib.unified_diff( |
| 154 | base_lines, target_lines, |
| 155 | fromfile=f"a/{path}", tofile=f"b/{path}", |
| 156 | lineterm="", |
| 157 | ): |
| 158 | if line.startswith("+") or line.startswith("-"): |
| 159 | if stable: |
| 160 | line = line.rstrip() |
| 161 | h.update(line.encode("utf-8", errors="replace")) |
| 162 | h.update(b"\n") |
| 163 | |
| 164 | return long_id(h.hexdigest()) |
| 165 | |
| 166 | def _resolve_commit(root: pathlib.Path, treeish: str) -> CommitRecord: |
| 167 | """Resolve *treeish* to a CommitRecord. |
| 168 | |
| 169 | Args: |
| 170 | root: Absolute repo root. |
| 171 | treeish: Branch name, commit ID, or ``"HEAD"``. |
| 172 | |
| 173 | Returns: |
| 174 | CommitRecord. |
| 175 | |
| 176 | Raises: |
| 177 | SystemExit(USER_ERROR): ref not found or empty repo. |
| 178 | """ |
| 179 | try: |
| 180 | branch = read_current_branch(root) |
| 181 | |
| 182 | if treeish.upper() == "HEAD": |
| 183 | commit_id = get_head_commit_id(root, branch) |
| 184 | if not commit_id: |
| 185 | print("❌ Repository has no commits yet.", file=sys.stderr) |
| 186 | raise SystemExit(ExitCode.USER_ERROR) |
| 187 | commit = read_commit(root, commit_id) |
| 188 | else: |
| 189 | # Try branch name first. |
| 190 | branch_ref = _ref_path(root, treeish) |
| 191 | commit_id = read_ref(branch_ref) |
| 192 | if commit_id is not None: |
| 193 | commit = read_commit(root, commit_id) |
| 194 | else: |
| 195 | commit = resolve_commit_ref(root, branch, treeish) |
| 196 | |
| 197 | if commit is None: |
| 198 | print( |
| 199 | f"❌ '{sanitize_display(treeish)}' is not a known branch or commit ID.", |
| 200 | file=sys.stderr, |
| 201 | ) |
| 202 | raise SystemExit(ExitCode.USER_ERROR) |
| 203 | |
| 204 | return commit |
| 205 | except SystemExit: |
| 206 | raise |
| 207 | except Exception as exc: |
| 208 | print(f"❌ Failed to resolve '{sanitize_display(treeish)}': {exc}", file=sys.stderr) |
| 209 | raise SystemExit(ExitCode.USER_ERROR) |
| 210 | |
| 211 | # --------------------------------------------------------------------------- |
| 212 | # Registration |
| 213 | # --------------------------------------------------------------------------- |
| 214 | |
| 215 | def register( |
| 216 | subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]", |
| 217 | ) -> None: |
| 218 | """Register the ``muse patch-id`` subcommand.""" |
| 219 | parser = subparsers.add_parser( |
| 220 | "patch-id", |
| 221 | help="Compute a stable content-based hash of a commit's diff.", |
| 222 | description=__doc__, |
| 223 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 224 | ) |
| 225 | parser.add_argument( |
| 226 | "treeish", |
| 227 | metavar="REF", |
| 228 | nargs="?", |
| 229 | default="HEAD", |
| 230 | help="Commit ID, branch name, or HEAD (default: HEAD).", |
| 231 | ) |
| 232 | parser.add_argument( |
| 233 | "--stable", |
| 234 | action="store_true", |
| 235 | dest="stable", |
| 236 | help=( |
| 237 | "Strip trailing whitespace from each diff line before hashing " |
| 238 | "so cosmetic whitespace changes don't produce a new patch-id." |
| 239 | ), |
| 240 | ) |
| 241 | parser.add_argument( |
| 242 | "--json", "-j", |
| 243 | action="store_true", |
| 244 | dest="json_out", |
| 245 | help="Emit machine-readable JSON on stdout.", |
| 246 | ) |
| 247 | parser.set_defaults(func=run) |
| 248 | |
| 249 | # --------------------------------------------------------------------------- |
| 250 | # Run |
| 251 | # --------------------------------------------------------------------------- |
| 252 | |
| 253 | def run(args: argparse.Namespace) -> None: |
| 254 | """Compute the patch-id for a given commit. |
| 255 | |
| 256 | Hashes the diff between a commit and its parent using a content-stable |
| 257 | algorithm. Identical patches applied to different bases produce the same |
| 258 | patch-id — useful for deduplication and cherry-pick detection. |
| 259 | |
| 260 | Agent quickstart |
| 261 | ---------------- |
| 262 | :: |
| 263 | |
| 264 | muse patch-id --json |
| 265 | muse patch-id HEAD~3 --json |
| 266 | muse patch-id feat/billing --stable --json |
| 267 | |
| 268 | JSON fields |
| 269 | ----------- |
| 270 | commit_id Commit ID that was analysed. |
| 271 | patch_id Content-stable patch fingerprint (sha256: prefixed). |
| 272 | subject First line of the commit message. |
| 273 | files_changed Number of files that changed relative to the parent. |
| 274 | stable ``true`` when ``--stable`` was passed. |
| 275 | |
| 276 | Exit codes |
| 277 | ---------- |
| 278 | 0 Success. |
| 279 | 1 Bad ref, ANSI in ref, or empty repository. |
| 280 | 2 Not inside a Muse repository. |
| 281 | """ |
| 282 | elapsed = start_timer() |
| 283 | treeish: str = args.treeish or "HEAD" |
| 284 | stable: bool = args.stable |
| 285 | json_out: bool = args.json_out |
| 286 | |
| 287 | root = require_repo() |
| 288 | |
| 289 | # ── Reject ANSI / control characters in the ref ─────────────────────────── |
| 290 | if any(ord(c) < 32 for c in treeish): |
| 291 | print( |
| 292 | f"❌ Invalid ref '{sanitize_display(treeish)}': control characters not allowed.", |
| 293 | file=sys.stderr, |
| 294 | ) |
| 295 | raise SystemExit(ExitCode.USER_ERROR) |
| 296 | |
| 297 | # ── Resolve the commit ──────────────────────────────────────────────────── |
| 298 | commit = _resolve_commit(root, treeish) |
| 299 | |
| 300 | # ── Get parent manifest ─────────────────────────────────────────────────── |
| 301 | base_manifest: Manifest = {} |
| 302 | if commit.parent_commit_id: |
| 303 | parent = read_commit(root, commit.parent_commit_id) |
| 304 | if parent: |
| 305 | snap = read_snapshot(root, parent.snapshot_id) |
| 306 | if snap: |
| 307 | base_manifest = dict(snap.manifest) |
| 308 | |
| 309 | # ── Get this commit's manifest ──────────────────────────────────────────── |
| 310 | snap = read_snapshot(root, commit.snapshot_id) |
| 311 | target_manifest: Manifest = dict(snap.manifest) if snap else {} |
| 312 | |
| 313 | # ── Count changed files (added + removed + modified) ────────────────────── |
| 314 | base_paths = set(base_manifest) |
| 315 | target_paths = set(target_manifest) |
| 316 | files_changed = len( |
| 317 | (target_paths - base_paths) |
| 318 | | (base_paths - target_paths) |
| 319 | | {p for p in base_paths & target_paths if base_manifest[p] != target_manifest[p]} |
| 320 | ) |
| 321 | |
| 322 | # ── Compute patch-id ────────────────────────────────────────────────────── |
| 323 | patch_id = _compute_patch_id(root, base_manifest, target_manifest, stable=stable) |
| 324 | |
| 325 | # ── Output ─────────────────────────────────────────────────────────────── |
| 326 | if json_out: |
| 327 | print(_json.dumps(_PatchIdJson( |
| 328 | **make_envelope(elapsed), |
| 329 | commit_id=commit.commit_id, |
| 330 | patch_id=patch_id, |
| 331 | subject=commit.message.splitlines()[0] if commit.message else "", |
| 332 | files_changed=files_changed, |
| 333 | stable=stable, |
| 334 | ))) |
| 335 | else: |
| 336 | print(f"{patch_id} {commit.commit_id}") |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
8 days ago