gabriel / muse public
provenance.py python
325 lines 12.6 KB
Raw
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 15 hours ago
1 """Agent identity and commit signing for the Muse VCS.
2
3 Every commit in Muse can carry cryptographic provenance metadata that
4 identifies *who* or *what* produced it — a human author, an autonomous AI
5 agent, or a specific toolchain run.
6
7 Signing model
8 -------------
9 Signatures use **Ed25519** with a key derived in memory from the BIP39
10 mnemonic stored in the OS keychain (same key used for MSign request
11 authentication — no PEM file is ever written to disk). This is an
12 asymmetric scheme: the private key signs; the public key (embedded in the
13 commit record) verifies. Any party with the commit record can verify the
14 signature without access to the private key or any external service.
15
16 Signature format
17 ----------------
18 The signed input is :func:`provenance_payload` — a SHA-256 hex digest that
19 binds the commit content identity (``commit_id``) to authorship claims
20 (``author``, ``agent_id``, ``model_id``, ``toolchain_id``, ``prompt_hash``).
21
22 ``CommitRecord.signature``
23 Base64url-encoded Ed25519 signature (no padding), 86 characters.
24
25 ``CommitRecord.signer_public_key``
26 Base64url-encoded raw Ed25519 public key bytes (32 bytes → 43 chars).
27 Embedded in the commit record so that verification is fully offline.
28
29 ``CommitRecord.signer_key_id``
30 First 16 hex characters of SHA-256(raw public key bytes). Short enough
31 to log, long enough for practical uniqueness.
32
33 Key management
34 --------------
35 Keys are the same Ed25519 keypairs used for MSign HTTP authentication.
36 Generate and register a keypair with::
37
38 muse auth keygen --hub https://localhost:1337
39 muse auth register --hub https://localhost:1337 --handle <handle>
40
41 Usage
42 -----
43 ::
44
45 from muse.core.provenance import (
46 make_agent_identity, sign_commit_ed25519, verify_commit_ed25519,
47 sign_commit_record,
48 )
49 from muse.core.types import public_key_fingerprint
50 from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
51
52 private_key: Ed25519PrivateKey = ...
53 payload = provenance_payload(commit_id, agent_id="my-agent", ...)
54 sig = sign_commit_ed25519(payload, private_key)
55 pub_bytes = private_key.public_key().public_bytes(Raw, Raw)
56 assert verify_commit_ed25519(payload, sig, pub_bytes)
57 """
58
59 import hashlib
60 import logging
61 import pathlib
62
63 from typing import TYPE_CHECKING, TypedDict
64
65 from muse.core.types import DEFAULT_SIGN_ALGO, blob_id, decode_pubkey, decode_sig, encode_pubkey, encode_sig, short_id, sig_algo
66
67 if TYPE_CHECKING:
68 from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
69
70 logger = logging.getLogger(__name__)
71
72 # Null-byte field separator — same convention as hash_commit in ids.py.
73 # Prevents separator-injection attacks where a field value contains the separator.
74 _PROV_SEP = "\x00"
75
76 # Version constant for the provenance payload format.
77 # v2 adds: version prefix + committed_at timestamp binding.
78 # v1 (no prefix): commit_id + author + agent_id + model_id + toolchain_id + prompt_hash
79 _PROV_VERSION_PREFIX = "muse-provenance-v2\n"
80
81 # ---------------------------------------------------------------------------
82 # Provenance signing payload
83 # ---------------------------------------------------------------------------
84
85 def provenance_payload(
86 commit_id: str,
87 *,
88 author: str = "",
89 agent_id: str = "",
90 model_id: str = "",
91 toolchain_id: str = "",
92 prompt_hash: str = "",
93 committed_at: str = "",
94 ) -> str:
95 """Compute the SHA-256 hex digest of the canonical provenance signing payload.
96
97 Binds the commit's *content identity* (``commit_id``, which covers
98 snapshot + message + parents + timestamp) to its *authorship claims*
99 (``author``, ``agent_id``, ``model_id``, ``toolchain_id``,
100 ``prompt_hash``) and the ``committed_at`` wall-clock timestamp.
101
102 This is what :func:`sign_commit_ed25519` signs — not the bare
103 ``commit_id``. The verifier must recompute this payload from the stored
104 record fields and then call :func:`verify_commit_ed25519`.
105
106 ``branch``, ``repo_id``, and ``metadata`` are intentionally excluded:
107 they are mutable by design (a commit is reachable from multiple branches
108 after a merge) and their mutation does not represent an integrity
109 violation.
110
111 Null bytes (``\\x00``) are used as field separators to prevent injection
112 attacks from field values that contain the separator character.
113
114 Canonical payload format::
115
116 muse-provenance-v2\\n
117 <commit_id>\\x00<author>\\x00<agent_id>\\x00<model_id>\\x00<toolchain_id>
118 \\x00<prompt_hash>\\x00<committed_at>
119
120 Args:
121 commit_id: ``sha256:``-prefixed commit ID (canonical content identity).
122 author: Display author name / email.
123 agent_id: Stable agent identifier.
124 model_id: Model name/version (empty for humans).
125 toolchain_id: Toolchain producing the commit.
126 prompt_hash: ``sha256:``-prefixed SHA-256 of the instruction prompt (privacy-preserving).
127 committed_at: ISO-8601 timestamp (e.g. ``"2026-04-08T12:00:00+00:00"``).
128 Empty string is accepted for unsigned commits.
129
130 Returns:
131 64-character lowercase hex SHA-256 digest of the combined payload.
132 """
133 fields = [commit_id, author, agent_id, model_id, toolchain_id, prompt_hash, committed_at]
134 raw = (_PROV_VERSION_PREFIX + _PROV_SEP.join(fields)).encode()
135 return hashlib.sha256(raw).hexdigest()
136
137 # ---------------------------------------------------------------------------
138 # Agent identity
139 # ---------------------------------------------------------------------------
140
141 class AgentIdentity(TypedDict, total=False):
142 """Structured identity record for a human or AI agent.
143
144 All fields are optional so that partial provenance (e.g. only
145 ``agent_id`` is known) can be expressed without filling dummy values.
146
147 ``agent_id``
148 Stable human-readable identifier chosen by the agent or its operator.
149 Should be unique within a team (e.g. ``"counterpoint-bot-v1"``).
150 ``model_id``
151 Model identifier for AI agents (e.g. ``"claude-opus-4"``).
152 Empty for human authors.
153 ``toolchain_id``
154 Build system or IDE that produced the commit
155 (e.g. ``"cursor-agent-v2"``).
156 ``prompt_hash``
157 ``sha256:``-prefixed SHA-256 of the instruction/prompt that triggered this session.
158 Privacy-preserving: the hash is logged without storing the content.
159 ``execution_context_hash``
160 ``sha256:``-prefixed SHA-256 of any additional execution context (system prompt,
161 environment config, etc.).
162 """
163
164 agent_id: str
165 model_id: str
166 toolchain_id: str
167 prompt_hash: str
168 execution_context_hash: str
169
170 def make_agent_identity(
171 agent_id: str,
172 *,
173 model_id: str = "",
174 toolchain_id: str = "",
175 prompt: str = "",
176 execution_context: str = "",
177 ) -> AgentIdentity:
178 """Build an :class:`AgentIdentity` with optional hashed sensitive fields.
179
180 ``prompt`` and ``execution_context`` are hashed before storage so that
181 the raw instruction text never appears in the commit record.
182
183 Args:
184 agent_id: Stable agent identifier string.
185 model_id: Model name/version (empty for humans).
186 toolchain_id: Toolchain producing the commit.
187 prompt: Raw instruction text to hash (not stored).
188 execution_context: Additional context to hash (not stored).
189
190 Returns:
191 An :class:`AgentIdentity` with only non-empty fields populated.
192 """
193 identity = AgentIdentity(agent_id=agent_id)
194 if model_id:
195 identity["model_id"] = model_id
196 if toolchain_id:
197 identity["toolchain_id"] = toolchain_id
198 if prompt:
199 identity["prompt_hash"] = blob_id(prompt.encode())
200 if execution_context:
201 identity["execution_context_hash"] = blob_id(execution_context.encode())
202 return identity
203
204 # ---------------------------------------------------------------------------
205 # Ed25519 signing and verification
206 # ---------------------------------------------------------------------------
207
208 def sign_commit_ed25519(payload: str, private_key: Ed25519PrivateKey) -> str:
209 """Sign *payload* with an Ed25519 private key.
210
211 Args:
212 payload: Hex SHA-256 provenance payload from :func:`provenance_payload`.
213 private_key: ``Ed25519PrivateKey`` instance from the ``cryptography`` package.
214
215 Returns:
216 ``"ed25519:<base64url>"`` — algorithm prefix + signature, no padding.
217 The prefix makes the stored value self-describing, analogous to how
218 object IDs carry a ``sha256:`` prefix. Verifiers dispatch on the
219 prefix instead of a separate ``signature_format`` integer field.
220 """
221 return encode_sig(DEFAULT_SIGN_ALGO, private_key.sign(payload.encode()))
222
223 def verify_commit_ed25519(payload: str, signature: str, public_key_bytes: bytes) -> bool:
224 """Verify an Ed25519 *signature* over *payload* using *public_key_bytes*.
225
226 *signature* must be in the canonical ``"ed25519:<base64url>"`` format
227 produced by :func:`sign_commit_ed25519`. Any other prefix (or no prefix)
228 returns ``False`` — there is no fallback to bare base64url.
229
230 Args:
231 payload: Hex SHA-256 provenance payload from :func:`provenance_payload`.
232 signature: ``"ed25519:<base64url>"`` — algorithm-prefixed signature.
233 public_key_bytes: Raw 32-byte Ed25519 public key.
234
235 Returns:
236 ``True`` when the signature is valid, ``False`` otherwise (including
237 unknown algorithm prefix, malformed base64, or cryptographic failure).
238 """
239 from cryptography.exceptions import InvalidSignature
240 from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey
241
242 if sig_algo(signature) != DEFAULT_SIGN_ALGO:
243 return False
244
245 try:
246 _, sig_bytes = decode_sig(signature)
247 except ValueError:
248 return False
249
250 try:
251 pub_key = Ed25519PublicKey.from_public_bytes(public_key_bytes)
252 pub_key.verify(sig_bytes, payload.encode())
253 return True
254 except InvalidSignature:
255 return False
256 except Exception:
257 return False
258
259 def encode_public_key(private_key: Ed25519PrivateKey) -> tuple[bytes, str]:
260 """Extract and encode the public key from an Ed25519 private key.
261
262 Args:
263 private_key: ``Ed25519PrivateKey`` instance.
264
265 Returns:
266 ``(raw_bytes, "ed25519:<b64url>")`` — the 32-byte raw public key and
267 its algorithm-prefixed base64url encoding (no padding). The prefix
268 makes the stored value self-describing, consistent with the
269 ``sha256:``-prefixed object IDs used throughout Muse.
270 """
271 from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
272
273 pub_key = private_key.public_key()
274 raw_bytes = pub_key.public_bytes(Encoding.Raw, PublicFormat.Raw)
275 return raw_bytes, encode_pubkey(DEFAULT_SIGN_ALGO, raw_bytes)
276
277 # ---------------------------------------------------------------------------
278 # Convenience: sign a CommitRecord in-place
279 # ---------------------------------------------------------------------------
280
281 def sign_commit_record(
282 commit_id: str,
283 agent_id: str,
284 private_key: Ed25519PrivateKey,
285 *,
286 author: str = "",
287 model_id: str = "",
288 toolchain_id: str = "",
289 prompt_hash: str = "",
290 committed_at: str = "",
291 ) -> tuple[str, str, str] | None:
292 """Sign the provenance payload (v2) for *commit_id* with *private_key*.
293
294 Computes :func:`provenance_payload` from the supplied fields so the
295 signature covers both the content identity (``commit_id``) and the
296 authorship claims, including the ``committed_at`` timestamp.
297
298 Args:
299 commit_id: SHA-256 hex commit ID.
300 agent_id: Stable agent identifier (metadata only; key is from hub identity).
301 private_key: ``Ed25519PrivateKey`` instance.
302 author: Display author name / email.
303 model_id: Model name/version (empty for humans).
304 toolchain_id: Toolchain producing the commit.
305 prompt_hash: ``sha256:``-prefixed SHA-256 of the instruction prompt.
306 committed_at: ISO-8601 timestamp — binds the wall-clock time to the signature.
307
308 Returns:
309 ``(signature_b64, public_key_b64, key_fingerprint)`` on success.
310 """
311 payload = provenance_payload(
312 commit_id,
313 author=author,
314 agent_id=agent_id,
315 model_id=model_id,
316 toolchain_id=toolchain_id,
317 prompt_hash=prompt_hash,
318 committed_at=committed_at,
319 )
320 sig = sign_commit_ed25519(payload, private_key)
321 raw_bytes, pub_b64 = encode_public_key(private_key)
322 from muse.core.types import public_key_fingerprint
323 fprint = public_key_fingerprint(raw_bytes)
324 logger.debug("✅ Ed25519-signed commit %s with key %s", short_id(commit_id), fprint)
325 return sig, pub_b64, fprint
File History 1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 15 hours ago