provenance.py
python
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
15 hours ago
| 1 | """Agent identity and commit signing for the Muse VCS. |
| 2 | |
| 3 | Every commit in Muse can carry cryptographic provenance metadata that |
| 4 | identifies *who* or *what* produced it — a human author, an autonomous AI |
| 5 | agent, or a specific toolchain run. |
| 6 | |
| 7 | Signing model |
| 8 | ------------- |
| 9 | Signatures use **Ed25519** with a key derived in memory from the BIP39 |
| 10 | mnemonic stored in the OS keychain (same key used for MSign request |
| 11 | authentication — no PEM file is ever written to disk). This is an |
| 12 | asymmetric scheme: the private key signs; the public key (embedded in the |
| 13 | commit record) verifies. Any party with the commit record can verify the |
| 14 | signature without access to the private key or any external service. |
| 15 | |
| 16 | Signature format |
| 17 | ---------------- |
| 18 | The signed input is :func:`provenance_payload` — a SHA-256 hex digest that |
| 19 | binds the commit content identity (``commit_id``) to authorship claims |
| 20 | (``author``, ``agent_id``, ``model_id``, ``toolchain_id``, ``prompt_hash``). |
| 21 | |
| 22 | ``CommitRecord.signature`` |
| 23 | Base64url-encoded Ed25519 signature (no padding), 86 characters. |
| 24 | |
| 25 | ``CommitRecord.signer_public_key`` |
| 26 | Base64url-encoded raw Ed25519 public key bytes (32 bytes → 43 chars). |
| 27 | Embedded in the commit record so that verification is fully offline. |
| 28 | |
| 29 | ``CommitRecord.signer_key_id`` |
| 30 | First 16 hex characters of SHA-256(raw public key bytes). Short enough |
| 31 | to log, long enough for practical uniqueness. |
| 32 | |
| 33 | Key management |
| 34 | -------------- |
| 35 | Keys are the same Ed25519 keypairs used for MSign HTTP authentication. |
| 36 | Generate and register a keypair with:: |
| 37 | |
| 38 | muse auth keygen --hub https://localhost:1337 |
| 39 | muse auth register --hub https://localhost:1337 --handle <handle> |
| 40 | |
| 41 | Usage |
| 42 | ----- |
| 43 | :: |
| 44 | |
| 45 | from muse.core.provenance import ( |
| 46 | make_agent_identity, sign_commit_ed25519, verify_commit_ed25519, |
| 47 | sign_commit_record, |
| 48 | ) |
| 49 | from muse.core.types import public_key_fingerprint |
| 50 | from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey |
| 51 | |
| 52 | private_key: Ed25519PrivateKey = ... |
| 53 | payload = provenance_payload(commit_id, agent_id="my-agent", ...) |
| 54 | sig = sign_commit_ed25519(payload, private_key) |
| 55 | pub_bytes = private_key.public_key().public_bytes(Raw, Raw) |
| 56 | assert verify_commit_ed25519(payload, sig, pub_bytes) |
| 57 | """ |
| 58 | |
| 59 | import hashlib |
| 60 | import logging |
| 61 | import pathlib |
| 62 | |
| 63 | from typing import TYPE_CHECKING, TypedDict |
| 64 | |
| 65 | from muse.core.types import DEFAULT_SIGN_ALGO, blob_id, decode_pubkey, decode_sig, encode_pubkey, encode_sig, short_id, sig_algo |
| 66 | |
| 67 | if TYPE_CHECKING: |
| 68 | from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey |
| 69 | |
| 70 | logger = logging.getLogger(__name__) |
| 71 | |
| 72 | # Null-byte field separator — same convention as hash_commit in ids.py. |
| 73 | # Prevents separator-injection attacks where a field value contains the separator. |
| 74 | _PROV_SEP = "\x00" |
| 75 | |
| 76 | # Version constant for the provenance payload format. |
| 77 | # v2 adds: version prefix + committed_at timestamp binding. |
| 78 | # v1 (no prefix): commit_id + author + agent_id + model_id + toolchain_id + prompt_hash |
| 79 | _PROV_VERSION_PREFIX = "muse-provenance-v2\n" |
| 80 | |
| 81 | # --------------------------------------------------------------------------- |
| 82 | # Provenance signing payload |
| 83 | # --------------------------------------------------------------------------- |
| 84 | |
| 85 | def provenance_payload( |
| 86 | commit_id: str, |
| 87 | *, |
| 88 | author: str = "", |
| 89 | agent_id: str = "", |
| 90 | model_id: str = "", |
| 91 | toolchain_id: str = "", |
| 92 | prompt_hash: str = "", |
| 93 | committed_at: str = "", |
| 94 | ) -> str: |
| 95 | """Compute the SHA-256 hex digest of the canonical provenance signing payload. |
| 96 | |
| 97 | Binds the commit's *content identity* (``commit_id``, which covers |
| 98 | snapshot + message + parents + timestamp) to its *authorship claims* |
| 99 | (``author``, ``agent_id``, ``model_id``, ``toolchain_id``, |
| 100 | ``prompt_hash``) and the ``committed_at`` wall-clock timestamp. |
| 101 | |
| 102 | This is what :func:`sign_commit_ed25519` signs — not the bare |
| 103 | ``commit_id``. The verifier must recompute this payload from the stored |
| 104 | record fields and then call :func:`verify_commit_ed25519`. |
| 105 | |
| 106 | ``branch``, ``repo_id``, and ``metadata`` are intentionally excluded: |
| 107 | they are mutable by design (a commit is reachable from multiple branches |
| 108 | after a merge) and their mutation does not represent an integrity |
| 109 | violation. |
| 110 | |
| 111 | Null bytes (``\\x00``) are used as field separators to prevent injection |
| 112 | attacks from field values that contain the separator character. |
| 113 | |
| 114 | Canonical payload format:: |
| 115 | |
| 116 | muse-provenance-v2\\n |
| 117 | <commit_id>\\x00<author>\\x00<agent_id>\\x00<model_id>\\x00<toolchain_id> |
| 118 | \\x00<prompt_hash>\\x00<committed_at> |
| 119 | |
| 120 | Args: |
| 121 | commit_id: ``sha256:``-prefixed commit ID (canonical content identity). |
| 122 | author: Display author name / email. |
| 123 | agent_id: Stable agent identifier. |
| 124 | model_id: Model name/version (empty for humans). |
| 125 | toolchain_id: Toolchain producing the commit. |
| 126 | prompt_hash: ``sha256:``-prefixed SHA-256 of the instruction prompt (privacy-preserving). |
| 127 | committed_at: ISO-8601 timestamp (e.g. ``"2026-04-08T12:00:00+00:00"``). |
| 128 | Empty string is accepted for unsigned commits. |
| 129 | |
| 130 | Returns: |
| 131 | 64-character lowercase hex SHA-256 digest of the combined payload. |
| 132 | """ |
| 133 | fields = [commit_id, author, agent_id, model_id, toolchain_id, prompt_hash, committed_at] |
| 134 | raw = (_PROV_VERSION_PREFIX + _PROV_SEP.join(fields)).encode() |
| 135 | return hashlib.sha256(raw).hexdigest() |
| 136 | |
| 137 | # --------------------------------------------------------------------------- |
| 138 | # Agent identity |
| 139 | # --------------------------------------------------------------------------- |
| 140 | |
| 141 | class AgentIdentity(TypedDict, total=False): |
| 142 | """Structured identity record for a human or AI agent. |
| 143 | |
| 144 | All fields are optional so that partial provenance (e.g. only |
| 145 | ``agent_id`` is known) can be expressed without filling dummy values. |
| 146 | |
| 147 | ``agent_id`` |
| 148 | Stable human-readable identifier chosen by the agent or its operator. |
| 149 | Should be unique within a team (e.g. ``"counterpoint-bot-v1"``). |
| 150 | ``model_id`` |
| 151 | Model identifier for AI agents (e.g. ``"claude-opus-4"``). |
| 152 | Empty for human authors. |
| 153 | ``toolchain_id`` |
| 154 | Build system or IDE that produced the commit |
| 155 | (e.g. ``"cursor-agent-v2"``). |
| 156 | ``prompt_hash`` |
| 157 | ``sha256:``-prefixed SHA-256 of the instruction/prompt that triggered this session. |
| 158 | Privacy-preserving: the hash is logged without storing the content. |
| 159 | ``execution_context_hash`` |
| 160 | ``sha256:``-prefixed SHA-256 of any additional execution context (system prompt, |
| 161 | environment config, etc.). |
| 162 | """ |
| 163 | |
| 164 | agent_id: str |
| 165 | model_id: str |
| 166 | toolchain_id: str |
| 167 | prompt_hash: str |
| 168 | execution_context_hash: str |
| 169 | |
| 170 | def make_agent_identity( |
| 171 | agent_id: str, |
| 172 | *, |
| 173 | model_id: str = "", |
| 174 | toolchain_id: str = "", |
| 175 | prompt: str = "", |
| 176 | execution_context: str = "", |
| 177 | ) -> AgentIdentity: |
| 178 | """Build an :class:`AgentIdentity` with optional hashed sensitive fields. |
| 179 | |
| 180 | ``prompt`` and ``execution_context`` are hashed before storage so that |
| 181 | the raw instruction text never appears in the commit record. |
| 182 | |
| 183 | Args: |
| 184 | agent_id: Stable agent identifier string. |
| 185 | model_id: Model name/version (empty for humans). |
| 186 | toolchain_id: Toolchain producing the commit. |
| 187 | prompt: Raw instruction text to hash (not stored). |
| 188 | execution_context: Additional context to hash (not stored). |
| 189 | |
| 190 | Returns: |
| 191 | An :class:`AgentIdentity` with only non-empty fields populated. |
| 192 | """ |
| 193 | identity = AgentIdentity(agent_id=agent_id) |
| 194 | if model_id: |
| 195 | identity["model_id"] = model_id |
| 196 | if toolchain_id: |
| 197 | identity["toolchain_id"] = toolchain_id |
| 198 | if prompt: |
| 199 | identity["prompt_hash"] = blob_id(prompt.encode()) |
| 200 | if execution_context: |
| 201 | identity["execution_context_hash"] = blob_id(execution_context.encode()) |
| 202 | return identity |
| 203 | |
| 204 | # --------------------------------------------------------------------------- |
| 205 | # Ed25519 signing and verification |
| 206 | # --------------------------------------------------------------------------- |
| 207 | |
| 208 | def sign_commit_ed25519(payload: str, private_key: Ed25519PrivateKey) -> str: |
| 209 | """Sign *payload* with an Ed25519 private key. |
| 210 | |
| 211 | Args: |
| 212 | payload: Hex SHA-256 provenance payload from :func:`provenance_payload`. |
| 213 | private_key: ``Ed25519PrivateKey`` instance from the ``cryptography`` package. |
| 214 | |
| 215 | Returns: |
| 216 | ``"ed25519:<base64url>"`` — algorithm prefix + signature, no padding. |
| 217 | The prefix makes the stored value self-describing, analogous to how |
| 218 | object IDs carry a ``sha256:`` prefix. Verifiers dispatch on the |
| 219 | prefix instead of a separate ``signature_format`` integer field. |
| 220 | """ |
| 221 | return encode_sig(DEFAULT_SIGN_ALGO, private_key.sign(payload.encode())) |
| 222 | |
| 223 | def verify_commit_ed25519(payload: str, signature: str, public_key_bytes: bytes) -> bool: |
| 224 | """Verify an Ed25519 *signature* over *payload* using *public_key_bytes*. |
| 225 | |
| 226 | *signature* must be in the canonical ``"ed25519:<base64url>"`` format |
| 227 | produced by :func:`sign_commit_ed25519`. Any other prefix (or no prefix) |
| 228 | returns ``False`` — there is no fallback to bare base64url. |
| 229 | |
| 230 | Args: |
| 231 | payload: Hex SHA-256 provenance payload from :func:`provenance_payload`. |
| 232 | signature: ``"ed25519:<base64url>"`` — algorithm-prefixed signature. |
| 233 | public_key_bytes: Raw 32-byte Ed25519 public key. |
| 234 | |
| 235 | Returns: |
| 236 | ``True`` when the signature is valid, ``False`` otherwise (including |
| 237 | unknown algorithm prefix, malformed base64, or cryptographic failure). |
| 238 | """ |
| 239 | from cryptography.exceptions import InvalidSignature |
| 240 | from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey |
| 241 | |
| 242 | if sig_algo(signature) != DEFAULT_SIGN_ALGO: |
| 243 | return False |
| 244 | |
| 245 | try: |
| 246 | _, sig_bytes = decode_sig(signature) |
| 247 | except ValueError: |
| 248 | return False |
| 249 | |
| 250 | try: |
| 251 | pub_key = Ed25519PublicKey.from_public_bytes(public_key_bytes) |
| 252 | pub_key.verify(sig_bytes, payload.encode()) |
| 253 | return True |
| 254 | except InvalidSignature: |
| 255 | return False |
| 256 | except Exception: |
| 257 | return False |
| 258 | |
| 259 | def encode_public_key(private_key: Ed25519PrivateKey) -> tuple[bytes, str]: |
| 260 | """Extract and encode the public key from an Ed25519 private key. |
| 261 | |
| 262 | Args: |
| 263 | private_key: ``Ed25519PrivateKey`` instance. |
| 264 | |
| 265 | Returns: |
| 266 | ``(raw_bytes, "ed25519:<b64url>")`` — the 32-byte raw public key and |
| 267 | its algorithm-prefixed base64url encoding (no padding). The prefix |
| 268 | makes the stored value self-describing, consistent with the |
| 269 | ``sha256:``-prefixed object IDs used throughout Muse. |
| 270 | """ |
| 271 | from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat |
| 272 | |
| 273 | pub_key = private_key.public_key() |
| 274 | raw_bytes = pub_key.public_bytes(Encoding.Raw, PublicFormat.Raw) |
| 275 | return raw_bytes, encode_pubkey(DEFAULT_SIGN_ALGO, raw_bytes) |
| 276 | |
| 277 | # --------------------------------------------------------------------------- |
| 278 | # Convenience: sign a CommitRecord in-place |
| 279 | # --------------------------------------------------------------------------- |
| 280 | |
| 281 | def sign_commit_record( |
| 282 | commit_id: str, |
| 283 | agent_id: str, |
| 284 | private_key: Ed25519PrivateKey, |
| 285 | *, |
| 286 | author: str = "", |
| 287 | model_id: str = "", |
| 288 | toolchain_id: str = "", |
| 289 | prompt_hash: str = "", |
| 290 | committed_at: str = "", |
| 291 | ) -> tuple[str, str, str] | None: |
| 292 | """Sign the provenance payload (v2) for *commit_id* with *private_key*. |
| 293 | |
| 294 | Computes :func:`provenance_payload` from the supplied fields so the |
| 295 | signature covers both the content identity (``commit_id``) and the |
| 296 | authorship claims, including the ``committed_at`` timestamp. |
| 297 | |
| 298 | Args: |
| 299 | commit_id: SHA-256 hex commit ID. |
| 300 | agent_id: Stable agent identifier (metadata only; key is from hub identity). |
| 301 | private_key: ``Ed25519PrivateKey`` instance. |
| 302 | author: Display author name / email. |
| 303 | model_id: Model name/version (empty for humans). |
| 304 | toolchain_id: Toolchain producing the commit. |
| 305 | prompt_hash: ``sha256:``-prefixed SHA-256 of the instruction prompt. |
| 306 | committed_at: ISO-8601 timestamp — binds the wall-clock time to the signature. |
| 307 | |
| 308 | Returns: |
| 309 | ``(signature_b64, public_key_b64, key_fingerprint)`` on success. |
| 310 | """ |
| 311 | payload = provenance_payload( |
| 312 | commit_id, |
| 313 | author=author, |
| 314 | agent_id=agent_id, |
| 315 | model_id=model_id, |
| 316 | toolchain_id=toolchain_id, |
| 317 | prompt_hash=prompt_hash, |
| 318 | committed_at=committed_at, |
| 319 | ) |
| 320 | sig = sign_commit_ed25519(payload, private_key) |
| 321 | raw_bytes, pub_b64 = encode_public_key(private_key) |
| 322 | from muse.core.types import public_key_fingerprint |
| 323 | fprint = public_key_fingerprint(raw_bytes) |
| 324 | logger.debug("✅ Ed25519-signed commit %s with key %s", short_id(commit_id), fprint) |
| 325 | return sig, pub_b64, fprint |
File History
1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
15 hours ago