gabriel / muse public
identity.py python
592 lines 23.4 KB
Raw
1 """Global identity store — ``~/.muse/identity.toml``.
2
3 Ed25519 key-pair credentials are kept here, separate from per-repository
4 configuration. This means keys are never accidentally committed to
5 version control, and a single identity can authenticate across all
6 repositories on the same hub.
7
8 Why global, not per-repo
9 -------------------------
10 Muse makes identity a first-class, machine-scoped concept.
11 The repository knows *where* the hub is (``[hub] url`` in config.toml).
12 The machine knows *who you are* (this file). The two concerns are
13 deliberately separated.
14
15 Identity types
16 --------------
17 ``type = "human"``
18 A person. Authenticated via Ed25519 key pair.
19
20 ``type = "agent"``
21 An autonomous process. The ``capabilities`` field reflects what the
22 agent is allowed to do, enabling self-inspection before attempting an
23 operation. The ``provisioned_by`` field records the human handle that
24 authorised this agent, establishing the trust chain at provisioning time.
25
26 File format
27 -----------
28 TOML with one section per hub hostname or compound ``hostname#agent_id`` key::
29
30 ["localhost:1337"]
31 type = "human"
32 handle = "gabriel"
33 key_path = "/Users/gabriel/.muse/keys/localhost:1337.pem"
34 algorithm = "ed25519"
35 fingerprint = "<sha256hex>"
36
37 ["localhost:1337#agentception-abc123"]
38 type = "agent"
39 handle = "agentception-abc123"
40 key_path = "/Users/gabriel/.muse/keys/localhost:1337__agentception-abc123.pem"
41 algorithm = "ed25519"
42 fingerprint = "<sha256hex>"
43 provisioned_by = "gabriel"
44 capabilities = ["push", "pull"]
45
46 Compound key format
47 -------------------
48 ``"hostname#agent_id"`` — the ``#`` separator is not a valid hostname
49 character so it unambiguously separates the hub from the agent handle.
50 Human entries use the bare hostname; agent entries append ``#<agent_id>``.
51
52 Security model
53 --------------
54 - ``~/.muse/`` is created with mode 0o700 (user-only directory).
55 - ``~/.muse/identity.toml`` is written with mode 0o600 **from the first
56 byte** — using ``os.open()`` + ``os.fchmod()`` before any data is written,
57 eliminating the TOCTOU window that ``write_text()`` + ``chmod()`` creates.
58 - Writes are atomic: data goes to a temp file in the same directory, then
59 ``os.replace()`` renames it over the target. A kill signal during write
60 leaves the old file intact, never a partial file.
61 - Symlink guard: if the target path is already a symlink, write is refused.
62 This blocks symlink-based credential-overwrite attacks.
63 - The file is never read or written as part of a repository snapshot.
64 """
65
66 import contextlib
67 import fcntl
68 import logging
69 import os
70 import pathlib
71 import stat
72 import tempfile
73 import tomllib
74 from collections.abc import Generator
75 from typing import TypedDict
76
77 from muse.core.paths import user_muse_dir as _user_muse_dir, user_identity_toml_path as _user_identity_toml_path
78
79 logger = logging.getLogger(__name__)
80
81 _IDENTITY_DIR = _user_muse_dir()
82 _IDENTITY_FILE = _user_identity_toml_path()
83
84 type _IdentityMap = dict[str, "IdentityEntry"]
85
86 # ---------------------------------------------------------------------------
87 # Types
88 # ---------------------------------------------------------------------------
89
90 class IdentityEntry(TypedDict, total=False):
91 """One authenticated identity, keyed by hub hostname in identity.toml.
92
93 Human entry (``~/.muse/identity.toml``)::
94
95 ["localhost:1337"]
96 type = "human"
97 handle = "gabriel"
98 key_path = "/Users/gabriel/.muse/keys/localhost:1337.pem"
99 algorithm = "ed25519"
100 fingerprint = "<sha256hex>"
101
102 Agent entry (compound key ``hostname#agent_id``)::
103
104 ["localhost:1337#agentception-abc123"]
105 type = "agent"
106 handle = "agentception-abc123"
107 key_path = "/Users/gabriel/.muse/keys/localhost:1337__agentception-abc123.pem"
108 algorithm = "ed25519"
109 fingerprint = "<sha256hex>"
110 provisioned_by = "gabriel"
111 capabilities = ["push", "pull"]
112 """
113
114 type: str # "human" | "agent"
115 handle: str # hub-assigned handle, e.g. "gabriel" or "agentception-abc123"
116 algorithm: str # "ed25519" | "ml-dsa-65"
117 fingerprint: str # sha256:<64-hex> fingerprint of public key
118 capabilities: list[str] # agent capability strings (empty for humans)
119 provisioned_by: str # for agents: handle of the human who provisioned this key
120 # HD wallet provenance (present only when key was derived via BIP39/SLIP-0010)
121 hd_path: str # SLIP-0010 derivation path
122 provisioned_by_fingerprint: str # agent keys: sha256:<64-hex> fingerprint of operator key
123 # In-memory only — never persisted to TOML
124 mnemonic: str # injected at load time from keychain; not written to disk
125
126 # ---------------------------------------------------------------------------
127 # Path helper
128 # ---------------------------------------------------------------------------
129
130 def get_identity_path() -> pathlib.Path:
131 """Return the path to the global identity file (``~/.muse/identity.toml``)."""
132 return _IDENTITY_FILE
133
134 def _identity_key(hostname: str, agent_id: str | None = None) -> str:
135 """Build the TOML section key for a given hostname and optional agent_id.
136
137 Human identities: ``"localhost:1337"``
138 Agent identities: ``"localhost:1337#agentception-abc123"``
139
140 The ``#`` character is not valid in a hostname so it unambiguously marks
141 the boundary between hub and agent handle.
142 """
143 if agent_id:
144 return f"{hostname}#{agent_id}"
145 return hostname
146
147 # ---------------------------------------------------------------------------
148 # URL → hostname normalisation
149 # ---------------------------------------------------------------------------
150
151 def hostname_from_url(url: str) -> str:
152 """Normalise *url* to a lowercase hostname suitable for use as a dict key.
153
154 Security properties
155 -------------------
156 - Strips the scheme (``https://``), so different scheme representations of
157 the same host resolve to the same key.
158 - Strips userinfo (``user:password@``) — embedded credentials in a URL are
159 never stored as part of the hostname key.
160 - Normalises to lowercase — DNS is case-insensitive, so ``MUSEHUB.AI``
161 and ``musehub.ai`` are the same host and must resolve to the same entry.
162
163 Examples::
164
165 "https://musehub.ai/repos/x" → "musehub.ai"
166 "https://admin:[email protected]" → "musehub.ai"
167 "MUSEHUB.AI" → "musehub.ai"
168 "https://musehub.ai" → "musehub.ai"
169 "musehub.ai:8443" → "musehub.ai:8443"
170 """
171 stripped = url.strip().rstrip("/")
172 # Remove scheme.
173 if "://" in stripped:
174 stripped = stripped.split("://", 1)[1]
175 # Remove userinfo (user:password@) — never embed credentials in the key.
176 if "@" in stripped:
177 stripped = stripped.rsplit("@", 1)[1]
178 # Keep only host[:port], strip any path.
179 hostname = stripped.split("/")[0]
180 # Normalise to lowercase — DNS is case-insensitive.
181 return hostname.lower()
182
183 # ---------------------------------------------------------------------------
184 # TOML serialiser (write-side — stdlib tomllib is read-only)
185 # ---------------------------------------------------------------------------
186
187 def _toml_escape(value: str) -> str:
188 """Escape a string value for embedding in a TOML double-quoted string."""
189 return value.replace("\\", "\\\\").replace('"', '\\"')
190
191 def _dump_identity(identities: _IdentityMap) -> str:
192 """Serialise a hostname → entry mapping to TOML text.
193
194 All hostnames are quoted in the section header so that dotted names
195 (e.g. ``musehub.ai``) are treated as literal keys, not nested tables.
196 All string values are TOML-escaped to prevent injection.
197 """
198 lines: list[str] = []
199 for hostname in sorted(identities):
200 entry = identities[hostname]
201 # Always quote the section key — dotted names are literal, not nested.
202 lines.append(f'["{_toml_escape(hostname)}"]')
203 t = entry.get("type", "")
204 if t:
205 lines.append(f'type = "{_toml_escape(t)}"')
206 handle = entry.get("handle", "")
207 if handle:
208 lines.append(f'handle = "{_toml_escape(handle)}"')
209 algorithm = entry.get("algorithm", "")
210 if algorithm:
211 lines.append(f'algorithm = "{_toml_escape(algorithm)}"')
212 fingerprint = entry.get("fingerprint", "")
213 if fingerprint:
214 lines.append(f'fingerprint = "{_toml_escape(fingerprint)}"')
215 provisioned_by = entry.get("provisioned_by", "")
216 if provisioned_by:
217 lines.append(f'provisioned_by = "{_toml_escape(provisioned_by)}"')
218 caps = entry.get("capabilities") or []
219 if caps:
220 caps_str = ", ".join(f'"{_toml_escape(c)}"' for c in caps)
221 lines.append(f"capabilities = [{caps_str}]")
222 # HD wallet provenance — only written when present
223 # mnemonic is NEVER written to disk — it lives only in the OS keychain
224 hd_path = entry.get("hd_path", "")
225 if hd_path:
226 lines.append(f'hd_path = "{_toml_escape(hd_path)}"')
227 provisioned_by_fingerprint = entry.get("provisioned_by_fingerprint", "")
228 if provisioned_by_fingerprint:
229 lines.append(f'provisioned_by_fingerprint = "{_toml_escape(provisioned_by_fingerprint)}"')
230 lines.append("")
231 return "\n".join(lines)
232
233 # ---------------------------------------------------------------------------
234 # Load / save
235 # ---------------------------------------------------------------------------
236
237 def _load_all(path: pathlib.Path) -> _IdentityMap:
238 """Load all identity entries from *path*. Returns empty dict if absent."""
239 if not path.is_file():
240 return {}
241 try:
242 with path.open("rb") as fh:
243 raw = tomllib.load(fh)
244 except Exception as exc: # noqa: BLE001
245 # Log only the exception *type*, never its message — a TOML parse
246 # error surfaced by tomllib includes the offending line, which can
247 # contain a fragment of the token being written when the file is corrupt.
248 logger.warning(
249 "⚠️ Failed to parse identity file %s (%s — run `muse auth register` to re-authenticate)",
250 path,
251 type(exc).__name__,
252 )
253 return {}
254
255 result: _IdentityMap = {}
256 for hostname, raw_entry in raw.items():
257 if not isinstance(raw_entry, dict):
258 continue
259 entry: IdentityEntry = {}
260 t = raw_entry.get("type")
261 if isinstance(t, str):
262 entry["type"] = t
263 handle = raw_entry.get("handle")
264 if isinstance(handle, str):
265 entry["handle"] = handle
266 key_path = raw_entry.get("key_path")
267 if isinstance(key_path, str):
268 entry["key_path"] = key_path
269 algorithm = raw_entry.get("algorithm")
270 if isinstance(algorithm, str):
271 entry["algorithm"] = algorithm
272 fingerprint = raw_entry.get("fingerprint")
273 if isinstance(fingerprint, str):
274 entry["fingerprint"] = fingerprint
275 caps = raw_entry.get("capabilities")
276 if isinstance(caps, list):
277 entry["capabilities"] = [str(c) for c in caps if isinstance(c, str)]
278 provisioned_by = raw_entry.get("provisioned_by")
279 if isinstance(provisioned_by, str):
280 entry["provisioned_by"] = provisioned_by
281 # mnemonic is never read from TOML — it comes from the keychain at call time
282 hd_path = raw_entry.get("hd_path")
283 if isinstance(hd_path, str):
284 entry["hd_path"] = hd_path
285 provisioned_by_fingerprint = raw_entry.get("provisioned_by_fingerprint")
286 if isinstance(provisioned_by_fingerprint, str):
287 entry["provisioned_by_fingerprint"] = provisioned_by_fingerprint
288 result[hostname] = entry
289
290 return result
291
292 @contextlib.contextmanager
293 def _identity_write_lock() -> Generator[None, None, None]:
294 """Acquire an exclusive advisory write-lock on the identity store.
295
296 Uses a dedicated lock file (``~/.muse/.identity.lock``) so that the lock
297 survives the atomic rename of ``identity.toml`` itself.
298
299 Advisory (cooperative) locking protects all Muse processes that use this
300 lock against concurrent read-modify-write races. Direct file edits by
301 external tools bypass the lock — that is acceptable; the user is then
302 responsible for data consistency.
303
304 POSIX-only (``fcntl.flock``). The lock is blocking with no timeout;
305 CLI commands are short-lived and lock contention is expected to be brief.
306 """
307 lock_path = _IDENTITY_DIR / ".identity.lock"
308 _IDENTITY_DIR.mkdir(parents=True, exist_ok=True)
309 # Create the lock file with owner-only permissions; O_CLOEXEC prevents
310 # child processes from inheriting the file descriptor.
311 lock_fd = os.open(
312 str(lock_path),
313 os.O_CREAT | os.O_WRONLY | os.O_CLOEXEC,
314 stat.S_IRUSR | stat.S_IWUSR,
315 )
316 try:
317 fcntl.flock(lock_fd, fcntl.LOCK_EX)
318 try:
319 yield
320 finally:
321 fcntl.flock(lock_fd, fcntl.LOCK_UN)
322 finally:
323 os.close(lock_fd)
324
325 def _save_all(identities: _IdentityMap, path: pathlib.Path) -> None:
326 """Write *identities* to *path* securely.
327
328 Security guarantees
329 -------------------
330 1. **Symlink guard** — refuses to write if *path* is already a symlink,
331 preventing an attacker from pre-placing a symlink to a file they want
332 overwritten.
333 2. **0o700 directory** — ``~/.muse/`` is restricted to the owner so other
334 local users cannot list or traverse it.
335 3. **0o600 from byte zero** — the temp file is ``fchmod``-ed to 0o600
336 *before* any data is written, eliminating the TOCTOU window that
337 ``write_text()`` + ``chmod()`` creates.
338 4. **Atomic rename** — ``os.replace()`` swaps the temp file over the
339 target atomically; a kill signal during write leaves the old file intact.
340 """
341 dir_path = path.parent
342
343 # 1. Create ~/.muse/ with owner-only permissions (0o700).
344 dir_path.mkdir(parents=True, exist_ok=True)
345 try:
346 os.chmod(dir_path, stat.S_IRWXU) # 0o700
347 except OSError as exc:
348 logger.warning("⚠️ Could not set permissions on %s: %s", dir_path, exc)
349
350 # 2. Symlink guard — never follow a symlink placed at the target path.
351 if path.is_symlink():
352 raise OSError(
353 f"Security: {path} is a symlink. "
354 "Refusing to write credentials to a symlink target."
355 )
356
357 text = _dump_identity(identities)
358
359 # 3. Write to a temp file in the same directory (same fs → atomic rename).
360 # Set 0o600 via fchmod *before* writing any data.
361 fd, tmp_path_str = tempfile.mkstemp(dir=dir_path, prefix=".identity-tmp-")
362 tmp_path = pathlib.Path(tmp_path_str)
363 try:
364 os.fchmod(fd, stat.S_IRUSR | stat.S_IWUSR) # 0o600 before any data
365 with os.fdopen(fd, "w", encoding="utf-8") as fh:
366 fh.write(text)
367 # 4. Atomic rename — old file stays intact if we crash before this.
368 os.replace(tmp_path, path)
369 except Exception:
370 try:
371 tmp_path.unlink(missing_ok=True)
372 except OSError:
373 pass
374 raise
375
376 # ---------------------------------------------------------------------------
377 # Public API
378 # ---------------------------------------------------------------------------
379
380 def load_identity(hub_url: str, agent_id: str | None = None) -> IdentityEntry | None:
381 """Return the stored identity for *hub_url* (and optional *agent_id*), or ``None``.
382
383 The URL is normalised to a hostname before lookup, so
384 ``https://musehub.ai/repos/x`` and ``musehub.ai`` resolve to the same
385 entry.
386
387 For human (non-agent) entries, the mnemonic is fetched from the OS keychain
388 and injected into the returned entry under the ``mnemonic`` key.
389 The mnemonic is never read from the TOML file.
390
391 Args:
392 hub_url: Hub URL or bare hostname.
393 agent_id: Agent handle (e.g. ``"agentception-abc123"``). When
394 supplied the lookup key is ``"hostname#agent_id"``,
395 i.e. the agent's dedicated entry rather than the human's.
396
397 Returns:
398 :class:`IdentityEntry` if an identity is stored, else ``None``.
399 """
400 hostname = hostname_from_url(hub_url)
401 key = _identity_key(hostname, agent_id)
402 entry = _load_all(_IDENTITY_FILE).get(key)
403 if entry is None:
404 return None
405 # Inject mnemonic from keychain for human entries
406 if not agent_id:
407 from muse.core.keychain import load as kc_load
408 mnemonic = kc_load()
409 if mnemonic:
410 entry = dict(entry) # type: ignore[assignment]
411 entry["mnemonic"] = mnemonic
412 return entry
413
414 def save_identity(
415 hub_url: str,
416 entry: IdentityEntry,
417 agent_id: str | None = None,
418 mnemonic: str | None = None,
419 ) -> None:
420 """Store *entry* as the identity for *hub_url* (and optional *agent_id*).
421
422 The entire read-modify-write cycle is wrapped in an exclusive advisory
423 lock so that concurrent ``muse auth register`` calls (e.g. from parallel
424 agents) cannot race and overwrite each other's entries.
425
426 Creates ``~/.muse/identity.toml`` with mode 0o600 if it does not exist.
427
428 When *mnemonic* is provided it is stored in the OS keychain (never in the
429 TOML file).
430
431 Args:
432 hub_url: Hub URL or bare hostname.
433 entry: Identity data to store. Must not contain a ``"mnemonic"``
434 key — pass *mnemonic* as the separate keyword argument.
435 agent_id: When supplied, the entry is stored under the compound key
436 ``"hostname#agent_id"`` rather than the bare hostname.
437 mnemonic: BIP39 mnemonic phrase. Stored in the OS keychain; the TOML
438 file is never touched. When keychain is disabled (CI/test)
439 the mnemonic is ephemeral for the lifetime of the process.
440 """
441 if mnemonic:
442 from muse.core.keychain import store as kc_store, is_available as kc_avail
443 if kc_avail():
444 kc_store(mnemonic)
445 elif os.environ.get("MUSE_KEYCHAIN_BACKEND") != "disabled":
446 logger.warning(
447 "⚠️ Keychain is unavailable — mnemonic is ephemeral and will not "
448 "survive process exit. Install a keyring backend or check your OS keychain "
449 "configuration. Run `muse auth keygen` again once the keychain is operational.",
450 )
451 # Strip mnemonic from the dict we write to TOML regardless of keychain availability.
452 entry = dict(entry) # type: ignore[assignment]
453 entry.pop("mnemonic", None) # type: ignore[misc]
454 elif "mnemonic" in entry: # type: ignore[operator]
455 # Ensure mnemonic never sneaks into TOML even if caller put it in entry
456 entry = dict(entry) # type: ignore[assignment]
457 entry.pop("mnemonic", None) # type: ignore[misc]
458
459 hostname = hostname_from_url(hub_url)
460 key = _identity_key(hostname, agent_id)
461 with _identity_write_lock():
462 identities = _load_all(_IDENTITY_FILE)
463 identities[key] = entry
464 _save_all(identities, _IDENTITY_FILE)
465 logger.info("✅ Identity for %s saved", key)
466
467 def clear_identity(hub_url: str, agent_id: str | None = None) -> bool:
468 """Remove the stored identity for *hub_url* (and optional *agent_id*).
469
470 The entire read-modify-write cycle is wrapped in an exclusive advisory
471 lock (see :func:`save_identity`).
472
473 Args:
474 hub_url: Hub URL or bare hostname.
475 agent_id: When supplied, removes the agent's compound-key entry.
476
477 Returns:
478 ``True`` if an entry was removed, ``False`` if no entry existed.
479 """
480 hostname = hostname_from_url(hub_url)
481 key = _identity_key(hostname, agent_id)
482 with _identity_write_lock():
483 identities = _load_all(_IDENTITY_FILE)
484 if key not in identities:
485 return False
486 del identities[key]
487 _save_all(identities, _IDENTITY_FILE)
488 logger.info("✅ Identity for %s cleared", key)
489 return True
490
491 def resolve_signing_identity(
492 hub_url: str,
493 agent_id: str | None = None,
494 ) -> "tuple[str, object] | None":
495 """Return ``(handle, private_key)`` for *hub_url*, or ``None``.
496
497 Derives the Ed25519 private key from the mnemonic stored in the OS keychain
498 at call time — no PEM file is ever read or written.
499
500 Resolution order when *agent_id* is provided:
501 1. Agent-specific entry (``"hostname#agent_id"``) — used if present.
502 2. Human entry (bare hostname) — fallback.
503
504 When *agent_id* is ``None``, only the human (bare-hostname) entry is tried.
505
506 Returns ``None`` when no identity is configured, the entry has no ``hd_path``,
507 or the keychain has no mnemonic.
508
509 Args:
510 hub_url: Hub URL or bare hostname.
511 agent_id: Optional agent handle to look up a dedicated agent key first.
512
513 Returns:
514 ``(handle, Ed25519PrivateKey)`` tuple, or ``None``.
515 """
516 from muse.core.keychain import load as kc_load
517 from muse.core.bip39 import mnemonic_to_seed
518 from muse.core.slip010 import derive_path, to_ed25519_private_key
519
520 def _derive(entry: "IdentityEntry") -> "tuple[str, object] | None":
521 handle = entry.get("handle", "")
522 hd_path = entry.get("hd_path", "")
523 if not handle or not hd_path:
524 return None
525 mnemonic = kc_load()
526 if not mnemonic:
527 return None
528 seed = mnemonic_to_seed(mnemonic)
529 dk = derive_path(seed, hd_path)
530 try:
531 private_key = to_ed25519_private_key(dk)
532 except Exception:
533 return None
534 finally:
535 dk.zero()
536 return handle, private_key
537
538 # 1. Try agent-specific entry when an agent_id is given.
539 if agent_id:
540 agent_entry = load_identity(hub_url, agent_id=agent_id)
541 if agent_entry is not None:
542 result = _derive(agent_entry)
543 if result is not None:
544 logger.debug("✅ Agent signing key resolved: handle=%s", result[0])
545 return result
546
547 # 2. Fall back to the human entry.
548 human_entry = load_identity(hub_url)
549 if human_entry is None:
550 return None
551
552 result = _derive(human_entry)
553 if result is None:
554 return None
555
556 if agent_id:
557 logger.debug(
558 "⚠️ No dedicated key for agent '%s' — signing with human key (handle=%s). "
559 "Run `muse auth keygen --agent-id %s` to provision a dedicated agent key.",
560 agent_id, result[0], agent_id,
561 )
562 return result
563
564 def list_all_identities() -> _IdentityMap:
565 """Return all stored identities keyed by hub hostname.
566
567 Returns an empty dict if the identity file does not exist.
568 """
569 return _load_all(_IDENTITY_FILE)
570
571 def clear_all_identities() -> list[str]:
572 """Remove every stored identity in a single atomic write.
573
574 The entire read-modify-write cycle is wrapped in an exclusive advisory
575 lock (see :func:`save_identity`) so no concurrent process can race.
576
577 This is O(1) writes regardless of how many identities are stored —
578 far cheaper than calling :func:`clear_identity` in a loop, which does
579 N separate lock → read → write cycles.
580
581 Returns:
582 Sorted list of hostnames that were removed. Empty list if the
583 identity file did not exist or contained no entries.
584 """
585 with _identity_write_lock():
586 identities = _load_all(_IDENTITY_FILE)
587 if not identities:
588 return []
589 removed = sorted(identities.keys())
590 _save_all({}, _IDENTITY_FILE)
591 logger.info("✅ All identities cleared (%d hub(s))", len(removed))
592 return removed
File History 1 commit