snapshot.py
python
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
1 day ago
| 1 | """Pure filesystem snapshot logic for ``muse commit``. |
| 2 | |
| 3 | All functions here are side-effect-free (no DB, no I/O besides reading |
| 4 | files under ``workdir``). They are kept separate so they can be |
| 5 | unit-tested without a database. |
| 6 | |
| 7 | ID derivation contract (deterministic, no random components): |
| 8 | |
| 9 | object_id = "sha256:" + sha256(file_bytes).hexdigest() |
| 10 | |
| 11 | snapshot_id = "sha256:" + sha256( |
| 12 | NUL.join(sorted(f"{path}NUL{strip(oid)}" |
| 13 | for path, oid in manifest.items())) |
| 14 | ).hexdigest() # strip() removes any leading "sha256:" prefix |
| 15 | |
| 16 | commit_id = "sha256:" + sha256( |
| 17 | NUL.join([NUL.join(sorted(strip(p) for p in parent_ids)), |
| 18 | strip(snapshot_id), message, committed_at_iso]) |
| 19 | ).hexdigest() # strip() removes any leading "sha256:" prefix |
| 20 | |
| 21 | All three functions normalize their inputs by stripping any ``sha256:`` prefix |
| 22 | before hashing. This makes the IDs stable regardless of whether callers pass |
| 23 | canonical ``sha256:<hex>`` or legacy bare-hex strings — the resulting ID is |
| 24 | always identical. |
| 25 | |
| 26 | The null byte (\\x00) is used as the field separator because it is: |
| 27 | - Illegal in POSIX filenames (preventing separator-injection attacks from |
| 28 | crafted file paths). |
| 29 | - Absent from SHA-256 hex strings (preventing injection via object IDs). |
| 30 | - Absent from ISO-8601 timestamps and typical message text. |
| 31 | |
| 32 | This replaces the previous ``|`` / ``:`` separator scheme which allowed two |
| 33 | distinct manifests or commit inputs to produce the same hash if filenames |
| 34 | contained those characters. |
| 35 | |
| 36 | Symlinks in the working tree are excluded from snapshots. Following a |
| 37 | symlink that points outside state/ would silently commit the contents |
| 38 | of arbitrary filesystem paths. |
| 39 | |
| 40 | Exclusion policy |
| 41 | ---------------- |
| 42 | Dotfiles and dot-directories are **tracked by default** — ``.cursorrules``, |
| 43 | ``.editorconfig``, ``.eslintrc`` are intentional project configuration that |
| 44 | collaborators need. Exclusion is driven entirely by ``.museignore`` plus the |
| 45 | built-in secrets blocklist below. The only hard-coded directory skip is |
| 46 | ``.muse/`` itself (internal VCS storage) and a performance-only list of |
| 47 | directories that are universally noise (``node_modules/``, ``__pycache__/``, |
| 48 | ``.venv/`` etc.). |
| 49 | """ |
| 50 | |
| 51 | import fnmatch |
| 52 | import os |
| 53 | import pathlib |
| 54 | import re |
| 55 | import stat as _stat |
| 56 | |
| 57 | import hashlib as _hashlib |
| 58 | |
| 59 | from muse.core.types import Manifest, blob_id, hash_file, load_json_file, split_id |
| 60 | from muse.core.paths import repo_json_path as _repo_json_path |
| 61 | from muse.core.ignore import is_ignored, load_force_track_paths, load_ignore_config, resolve_patterns |
| 62 | from muse.core.stat_cache import load_cache |
| 63 | |
| 64 | # Directories that are always pruned before os.walk descends into them. |
| 65 | # These are either internal VCS storage (.muse) or universally-noisy |
| 66 | # directories whose contents are never meaningful project source. |
| 67 | # Kept as a frozenset for O(1) lookup inside the hot walk loop. |
| 68 | _ALWAYS_PRUNE_DIRS: frozenset[str] = frozenset( |
| 69 | { |
| 70 | ".muse", |
| 71 | ".git", |
| 72 | "node_modules", |
| 73 | "__pycache__", |
| 74 | ".venv", |
| 75 | "venv", |
| 76 | ".tox", |
| 77 | ".nox", |
| 78 | ".mypy_cache", |
| 79 | ".ruff_cache", |
| 80 | ".pytest_cache", |
| 81 | ".coverage", |
| 82 | "htmlcov", |
| 83 | "dist", |
| 84 | "build", |
| 85 | } |
| 86 | ) |
| 87 | |
| 88 | # Built-in secrets blocklist — applied even when .museignore is absent. |
| 89 | # This is the last line of defence: these files must never appear in a |
| 90 | # snapshot regardless of what a user configures in .museignore. |
| 91 | # |
| 92 | # Note: .env.example is intentionally NOT listed here — it is the universal |
| 93 | # convention for a safe, credential-free environment template and must be |
| 94 | # trackable. We block the real secret files explicitly instead of using a |
| 95 | # wildcard that would accidentally catch .env.example. |
| 96 | _BUILTIN_SECRET_PATTERNS: list[str] = [ |
| 97 | ".env", |
| 98 | ".env.local", |
| 99 | ".env.development", |
| 100 | ".env.staging", |
| 101 | ".env.production", |
| 102 | ".env.prod", |
| 103 | ".envrc", |
| 104 | "*.pem", |
| 105 | "*.key", |
| 106 | "*.p12", |
| 107 | "*.pfx", |
| 108 | ".DS_Store", |
| 109 | "Thumbs.db", |
| 110 | ] |
| 111 | |
| 112 | def _build_filename_filter(patterns: list[str]) -> re.Pattern[str] | None: |
| 113 | """Compile a combined regex for fast per-file ignore pre-rejection. |
| 114 | |
| 115 | Translates every *simple* pattern (no ``/`` in the body) into a single |
| 116 | alternating regex so ``re.search(fname)`` can reject the overwhelming |
| 117 | majority of files in one call instead of N ``fnmatch`` calls. |
| 118 | |
| 119 | Only patterns without ``/`` in their body are included — they test the |
| 120 | filename component. Patterns with an embedded ``/`` (e.g. |
| 121 | ``docs/*.md``) or a trailing ``/`` (directory patterns) must still go |
| 122 | through the full :func:`~muse.core.ignore.is_ignored` path. |
| 123 | |
| 124 | Returns ``None`` when *patterns* is empty or contains no simple patterns. |
| 125 | |
| 126 | Performance: at 75 000 files with 9 builtin patterns, replacing 9 × N |
| 127 | ``fnmatch`` calls with one ``re.search`` call reduces ignore-matching |
| 128 | overhead by ~10×, dropping warm ``walk_workdir`` time from ~850 ms to |
| 129 | ~85 ms at 75 k scale, making the 1-file-change target of < 200 ms |
| 130 | achievable. |
| 131 | """ |
| 132 | translated: list[str] = [] |
| 133 | for raw_pat in patterns: |
| 134 | body = raw_pat.lstrip("!") # strip negation marker |
| 135 | if body.endswith("/"): |
| 136 | body = body.rstrip("/") |
| 137 | if "/" in body: |
| 138 | continue # path-level pattern — needs full is_ignored evaluation |
| 139 | translated.append(fnmatch.translate(body)) |
| 140 | if not translated: |
| 141 | return None |
| 142 | return re.compile(f"(?:{'|'.join(translated)})") |
| 143 | |
| 144 | def load_ignore_patterns(workdir: pathlib.Path) -> list[str]: |
| 145 | """Return the combined ignore pattern list for *workdir*. |
| 146 | |
| 147 | Reads ``.museignore`` from *workdir* and detects the active domain from |
| 148 | ``.muse/repo.json``. Falls back to ``"code"`` when either file is absent. |
| 149 | The built-in secrets blocklist is always prepended so it cannot be |
| 150 | overridden by user configuration. |
| 151 | |
| 152 | This function is intentionally public so that commands outside |
| 153 | ``snapshot.py`` (e.g. ``shelf``) can apply the same ignore rules without |
| 154 | duplicating the domain-detection logic. |
| 155 | """ |
| 156 | domain = "code" |
| 157 | repo_json = _repo_json_path(workdir) |
| 158 | if repo_json.exists(): |
| 159 | raw = load_json_file(repo_json) |
| 160 | if isinstance(raw, dict) and isinstance(raw.get("domain"), str): |
| 161 | domain = raw["domain"] |
| 162 | |
| 163 | config = load_ignore_config(workdir) |
| 164 | user_patterns = resolve_patterns(config, domain) |
| 165 | return _BUILTIN_SECRET_PATTERNS + user_patterns |
| 166 | |
| 167 | _SEP = "\x00" |
| 168 | |
| 169 | def build_snapshot_manifest(workdir: pathlib.Path) -> Manifest: |
| 170 | """Return ``{rel_path: object_id}`` for every tracked file in *workdir*. |
| 171 | |
| 172 | Preferred public name; delegates to :func:`walk_workdir`. |
| 173 | """ |
| 174 | return walk_workdir(workdir) |
| 175 | |
| 176 | def directories_from_manifest(files: Manifest) -> list[str]: |
| 177 | """Derive all implicit parent directories from a file manifest. |
| 178 | |
| 179 | For every file path in *files*, all ancestor directory components are |
| 180 | collected. The result is a sorted, deduplicated list of POSIX directory |
| 181 | paths relative to the repository root. |
| 182 | |
| 183 | Empty directories that have no files are not present in *files* and |
| 184 | therefore cannot be derived here — they require an explicit ``.musekeep`` |
| 185 | marker file so the filesystem walk in :func:`walk_workdir_with_dirs` can |
| 186 | detect them. |
| 187 | |
| 188 | This helper is used by merge / rebase / cherry-pick operations that |
| 189 | compute a merged file manifest without performing a fresh filesystem |
| 190 | walk, so that every ``SnapshotRecord`` stores a consistent directory list. |
| 191 | """ |
| 192 | dirs: set[str] = set() |
| 193 | for path in files: |
| 194 | parts = path.split("/") |
| 195 | for i in range(1, len(parts)): |
| 196 | dirs.add("/".join(parts[:i])) |
| 197 | return sorted(dirs) |
| 198 | |
| 199 | def walk_workdir_with_dirs( |
| 200 | workdir: pathlib.Path, |
| 201 | ) -> tuple[Manifest, list[str]]: |
| 202 | """Walk *workdir* and return ``(files_manifest, sorted_directories)``. |
| 203 | |
| 204 | A single ``os.walk`` pass collects both the file content map and the |
| 205 | list of every non-root directory encountered (minus always-pruned dirs). |
| 206 | This is the canonical entry point for commit and status operations that |
| 207 | need first-class directory identity. |
| 208 | |
| 209 | See :func:`walk_workdir` for the exclusion rules that apply to files. |
| 210 | Directories follow the same pruning rules — any directory whose name is |
| 211 | in :data:`_ALWAYS_PRUNE_DIRS` is never descended into and therefore |
| 212 | never appears in the returned list. |
| 213 | """ |
| 214 | ignore_patterns = load_ignore_patterns(workdir) |
| 215 | force_track = load_force_track_paths(workdir) |
| 216 | cache = load_cache(workdir) |
| 217 | manifest: Manifest = {} |
| 218 | dirs: list[str] = [] |
| 219 | root_str = str(workdir) |
| 220 | prefix_len = len(root_str) + 1 |
| 221 | |
| 222 | _filename_filter: re.Pattern[str] | None = _build_filename_filter(ignore_patterns) |
| 223 | _has_complex_patterns: bool = any( |
| 224 | "/" in p.lstrip("!") |
| 225 | for p in ignore_patterns |
| 226 | ) |
| 227 | |
| 228 | for dirpath, dirnames, filenames in os.walk(root_str, followlinks=False): |
| 229 | # Prune always-excluded names and any subdirectory that is itself a |
| 230 | # nested muse repo (contains .muse/). Nested repos are independent |
| 231 | # version-controlled units — their contents belong to their own |
| 232 | # snapshot, not the parent repo's. |
| 233 | dirnames[:] = [ |
| 234 | d for d in dirnames |
| 235 | if d not in _ALWAYS_PRUNE_DIRS |
| 236 | and not os.path.isdir(os.path.join(dirpath, d, ".muse")) |
| 237 | ] |
| 238 | |
| 239 | # Track every non-root directory we descend into. |
| 240 | if dirpath != root_str: |
| 241 | rel_dir = dirpath[prefix_len:] |
| 242 | if os.sep != "/": |
| 243 | rel_dir = rel_dir.replace(os.sep, "/") |
| 244 | dirs.append(rel_dir) |
| 245 | |
| 246 | for fname in filenames: |
| 247 | abs_str = os.path.join(dirpath, fname) |
| 248 | try: |
| 249 | st = os.lstat(abs_str) |
| 250 | except OSError: |
| 251 | continue |
| 252 | if not _stat.S_ISREG(st.st_mode): |
| 253 | continue |
| 254 | rel = abs_str[prefix_len:] |
| 255 | if os.sep != "/": |
| 256 | rel = rel.replace(os.sep, "/") |
| 257 | if rel in force_track: |
| 258 | manifest[rel] = cache.get_cached(rel, abs_str, st.st_mtime, st.st_size, st.st_ino) |
| 259 | continue |
| 260 | if ( |
| 261 | _filename_filter is not None |
| 262 | and not _filename_filter.search(fname) |
| 263 | and not _has_complex_patterns |
| 264 | ): |
| 265 | manifest[rel] = cache.get_cached(rel, abs_str, st.st_mtime, st.st_size, st.st_ino) |
| 266 | continue |
| 267 | if is_ignored(rel, ignore_patterns): |
| 268 | continue |
| 269 | manifest[rel] = cache.get_cached(rel, abs_str, st.st_mtime, st.st_size, st.st_ino) |
| 270 | |
| 271 | cache.prune(set(manifest)) |
| 272 | cache.save() |
| 273 | return manifest, sorted(dirs) |
| 274 | |
| 275 | def walk_workdir(workdir: pathlib.Path) -> Manifest: |
| 276 | """Walk *workdir* and return only the file manifest. |
| 277 | |
| 278 | Thin wrapper around :func:`walk_workdir_with_dirs` that discards the |
| 279 | directory list. Callers that need both files and directories should call |
| 280 | :func:`walk_workdir_with_dirs` directly to avoid a second filesystem walk. |
| 281 | |
| 282 | Walk *workdir* recursively and return ``{rel_path: object_id}``. |
| 283 | |
| 284 | Exclusions (all silent, no warning emitted): |
| 285 | - Symlinks — following them could commit content from outside the repo. |
| 286 | - Non-regular files — only regular files are included. |
| 287 | - Paths matched by ``.museignore`` or the built-in secrets blocklist. |
| 288 | - Directories in ``_ALWAYS_PRUNE_DIRS`` — internal VCS storage and |
| 289 | universally-noisy directories (node_modules, __pycache__, .venv, …). |
| 290 | |
| 291 | Dotfiles and dot-directories are tracked unless excluded by the above |
| 292 | rules. ``.cursorrules``, ``.editorconfig``, ``.eslintrc`` etc. are |
| 293 | intentional project configuration; the blanket dot-skip that Git-adjacent |
| 294 | tools inherited is not carried forward here. |
| 295 | |
| 296 | Paths use POSIX separators regardless of host OS for cross-platform |
| 297 | reproducibility. |
| 298 | |
| 299 | Performance note: ``os.walk`` with in-place ``dirnames`` pruning is used |
| 300 | instead of ``pathlib.rglob`` so that large noisy directories are never |
| 301 | descended into. The stat cache further skips re-hashing files whose |
| 302 | ``(mtime, size)`` is unchanged since the last walk. |
| 303 | |
| 304 | Ignore-pattern fast path: patterns are compiled into a single combined |
| 305 | regex (see :func:`_build_filename_filter`) that is evaluated against the |
| 306 | bare filename once per file. For the builtin secrets blocklist (9 simple |
| 307 | ``*.ext`` / ``name`` patterns with no ``/``), this replaces 9 separate |
| 308 | ``fnmatch`` calls with one ``re.search`` call — a ~10× speedup at 75 k |
| 309 | scale that brings warm 1-file-change latency from ~850 ms to < 200 ms. |
| 310 | Files whose filename can't possibly match any pattern skip ``is_ignored`` |
| 311 | entirely; files that might match (rare) fall through to the full check. |
| 312 | """ |
| 313 | files, _ = walk_workdir_with_dirs(workdir) |
| 314 | return files |
| 315 | |
| 316 | def snapshot_identity_bytes( |
| 317 | manifest: Manifest, |
| 318 | directories: list[str] | None = None, |
| 319 | ) -> bytes: |
| 320 | """Return the canonical payload whose sha256 equals the snapshot ID. |
| 321 | |
| 322 | This is the preimage of compute_snapshot_id — the bytes stored in the |
| 323 | object store at key=snapshot_id so that DB presence implies blob presence |
| 324 | (the same content-addressing invariant enforced for file blobs and commits). |
| 325 | |
| 326 | The null-byte separator prevents collisions from filenames or object IDs |
| 327 | that contain the previous ``|`` / ``:`` separators. Sorting ensures two |
| 328 | identical working trees always produce the same bytes regardless of |
| 329 | filesystem traversal order. |
| 330 | """ |
| 331 | parts = sorted( |
| 332 | f"{path}{_SEP}{split_id(oid)[1]}" for path, oid in manifest.items() |
| 333 | ) |
| 334 | if directories: |
| 335 | # Prefix directory entries with "dir" so they occupy a distinct namespace |
| 336 | # from file entries and cannot collide with path/oid pairs. |
| 337 | parts.extend(f"dir{_SEP}{d}" for d in sorted(directories)) |
| 338 | return _SEP.join(parts).encode() |
| 339 | |
| 340 | |
| 341 | def compute_snapshot_id( |
| 342 | manifest: Manifest, |
| 343 | directories: list[str] | None = None, |
| 344 | ) -> str: |
| 345 | """Return sha256 of the sorted ``path NUL object_id`` pairs and directory paths. |
| 346 | |
| 347 | Uses the git-style typed-object formula: ``sha256("snapshot <size>\\0" + canonical)`` |
| 348 | where canonical is the null-separated path/oid pairs from :func:`snapshot_identity_bytes`. |
| 349 | """ |
| 350 | canonical = snapshot_identity_bytes(manifest, directories) |
| 351 | header = f"snapshot {len(canonical)}\0".encode() |
| 352 | return "sha256:" + _hashlib.sha256(header + canonical).hexdigest() |
| 353 | |
| 354 | def detect_directory_renames( |
| 355 | deleted_dirs: set[str], |
| 356 | added_dirs: set[str], |
| 357 | last_manifest: Manifest, |
| 358 | current_manifest: Manifest, |
| 359 | ) -> list[tuple[str, str]]: |
| 360 | """Return ``[(old_dir, new_dir)]`` pairs detected from manifest diffs. |
| 361 | |
| 362 | A directory rename is inferred when all files that were under *old_dir* |
| 363 | in *last_manifest* appear under *new_dir* in *current_manifest* with |
| 364 | identical object IDs (same content, different path). Empty directories |
| 365 | and directories whose file sets do not match any added directory are not |
| 366 | returned. |
| 367 | |
| 368 | The heuristic is conservative: only 1-to-1 renames are reported. If |
| 369 | multiple added directories share the same file set (unusual but possible), |
| 370 | the match is ambiguous and no rename is emitted for that pair. |
| 371 | """ |
| 372 | renames: list[tuple[str, str]] = [] |
| 373 | matched_new: set[str] = set() |
| 374 | |
| 375 | for old_dir in sorted(deleted_dirs): |
| 376 | prefix = f"{old_dir}/" |
| 377 | old_files = { |
| 378 | path[len(prefix):]: oid |
| 379 | for path, oid in last_manifest.items() |
| 380 | if path.startswith(prefix) |
| 381 | } |
| 382 | if not old_files: |
| 383 | continue # empty dir — can't match by content |
| 384 | |
| 385 | candidates = [ |
| 386 | new_dir for new_dir in sorted(added_dirs) |
| 387 | if new_dir not in matched_new |
| 388 | ] |
| 389 | for new_dir in candidates: |
| 390 | new_prefix = f"{new_dir}/" |
| 391 | new_files = { |
| 392 | path[len(new_prefix):]: oid |
| 393 | for path, oid in current_manifest.items() |
| 394 | if path.startswith(new_prefix) |
| 395 | } |
| 396 | if new_files == old_files: |
| 397 | renames.append((old_dir, new_dir)) |
| 398 | matched_new.add(new_dir) |
| 399 | break |
| 400 | |
| 401 | return renames |
| 402 | |
| 403 | def diff_workdir_vs_snapshot( |
| 404 | workdir: pathlib.Path, |
| 405 | last_manifest: Manifest, |
| 406 | last_directories: list[str] | None = None, |
| 407 | ) -> tuple[set[str], set[str], set[str], set[str], set[str], set[str]]: |
| 408 | """Compare *workdir* against *last_manifest* from the previous commit. |
| 409 | |
| 410 | Returns a tuple of six disjoint path sets: |
| 411 | |
| 412 | - ``added`` — files in *workdir* absent from *last_manifest*. |
| 413 | - ``modified`` — files present in both but with a differing sha256 hash. |
| 414 | - ``deleted`` — files in *last_manifest* absent from *workdir*. |
| 415 | - ``untracked`` — non-empty only when *last_manifest* is empty (first |
| 416 | commit): every file in *workdir* is untracked. |
| 417 | - ``added_dirs`` — directories present in *workdir* but not in |
| 418 | *last_directories*. |
| 419 | - ``deleted_dirs``— directories in *last_directories* absent from *workdir*. |
| 420 | |
| 421 | All paths use POSIX separators for cross-platform reproducibility. |
| 422 | """ |
| 423 | if not workdir.exists(): |
| 424 | return ( |
| 425 | set(), set(), |
| 426 | set(last_manifest.keys()), set(), |
| 427 | set(), set(last_directories or []), |
| 428 | ) |
| 429 | |
| 430 | current_manifest, current_dirs = walk_workdir_with_dirs(workdir) |
| 431 | current_paths = set(current_manifest.keys()) |
| 432 | last_paths = set(last_manifest.keys()) |
| 433 | |
| 434 | if not last_paths: |
| 435 | return set(), set(), set(), current_paths, set(current_dirs), set() |
| 436 | |
| 437 | added = current_paths - last_paths |
| 438 | deleted = last_paths - current_paths |
| 439 | common = current_paths & last_paths |
| 440 | modified = {p for p in common if current_manifest[p] != last_manifest[p]} |
| 441 | |
| 442 | # A file that was tracked in the last snapshot but is now listed in |
| 443 | # .museignore and still present on disk is not "deleted" — it has been |
| 444 | # intentionally moved out of tracking. Reporting it as deleted would |
| 445 | # block checkout, pollute status output, and cause shelf pop to unlink it. |
| 446 | # Only files that are genuinely absent from the working tree are deleted. |
| 447 | if deleted: |
| 448 | ignore_patterns = load_ignore_patterns(workdir) |
| 449 | deleted = { |
| 450 | p for p in deleted |
| 451 | if not (is_ignored(p, ignore_patterns) and (workdir / p).exists()) |
| 452 | } |
| 453 | |
| 454 | last_dirs_set = set(last_directories or []) |
| 455 | current_dirs_set = set(current_dirs) |
| 456 | added_dirs = current_dirs_set - last_dirs_set |
| 457 | deleted_dirs = last_dirs_set - current_dirs_set |
| 458 | |
| 459 | return added, modified, deleted, set(), added_dirs, deleted_dirs |
| 460 | |
| 461 | def commit_identity_bytes( |
| 462 | parent_ids: list[str], |
| 463 | snapshot_id: str, |
| 464 | message: str, |
| 465 | committed_at_iso: str, |
| 466 | author: str = "", |
| 467 | signer_public_key: str = "", |
| 468 | ) -> bytes: |
| 469 | """Return the canonical payload whose sha256 equals the commit ID. |
| 470 | |
| 471 | This is the preimage of compute_commit_id — the bytes stored in the object |
| 472 | store at key=commit_id so that DB presence implies blob presence (the same |
| 473 | content-addressing invariant enforced for file blobs). |
| 474 | |
| 475 | Field order (null-byte separated): |
| 476 | parents, snapshot_id, message, committed_at, author, signer_public_key |
| 477 | |
| 478 | Uses null bytes as field separators to prevent separator-injection attacks. |
| 479 | ``parent_ids`` is sorted before hashing so insertion order does not affect |
| 480 | determinism. |
| 481 | """ |
| 482 | parts = [ |
| 483 | _SEP.join(sorted(split_id(p)[1] for p in parent_ids)), |
| 484 | split_id(snapshot_id)[1] if snapshot_id else "", |
| 485 | message, |
| 486 | committed_at_iso, |
| 487 | author, |
| 488 | signer_public_key, |
| 489 | ] |
| 490 | return _SEP.join(parts).encode() |
| 491 | |
| 492 | |
| 493 | def compute_commit_id( |
| 494 | parent_ids: list[str], |
| 495 | snapshot_id: str, |
| 496 | message: str, |
| 497 | committed_at_iso: str, |
| 498 | author: str = "", |
| 499 | signer_public_key: str = "", |
| 500 | ) -> str: |
| 501 | """Return sha256 of the commit's canonical inputs (portable formula). |
| 502 | |
| 503 | Uses the git-style typed-object formula: ``sha256("commit <size>\\0" + canonical)`` |
| 504 | where canonical is the null-separated field payload from :func:`commit_identity_bytes`. |
| 505 | ``author`` and ``signer_public_key`` bind the commit ID to its origin identity — |
| 506 | preventing key-swap replay. |
| 507 | """ |
| 508 | canonical = commit_identity_bytes( |
| 509 | parent_ids=parent_ids, |
| 510 | snapshot_id=snapshot_id, |
| 511 | message=message, |
| 512 | committed_at_iso=committed_at_iso, |
| 513 | author=author, |
| 514 | signer_public_key=signer_public_key, |
| 515 | ) |
| 516 | header = f"commit {len(canonical)}\0".encode() |
| 517 | return "sha256:" + _hashlib.sha256(header + canonical).hexdigest() |
File History
1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
1 day ago