bip39.py
python
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49
docs: update store.py references to focused module paths
Sonnet 4.6
30 days ago
| 1 | """muse.core.bip39 — BIP39 mnemonic generation, validation, and seed derivation. |
| 2 | |
| 3 | BIP39 defines a standard for converting a random bit-string into a human-readable |
| 4 | word sequence (the *mnemonic*) and then into a cryptographic seed via |
| 5 | PBKDF2-HMAC-SHA512. That seed feeds into HD wallet derivation (SLIP-0010 for |
| 6 | Ed25519, BIP32 for secp256k1). |
| 7 | |
| 8 | The mnemonic IS the root secret — whoever holds it controls every key derived |
| 9 | from it. Write it down on paper. Never store it digitally without encryption. |
| 10 | |
| 11 | Supported strengths |
| 12 | ------------------- |
| 13 | All five BIP39 entropy levels are supported: |
| 14 | |
| 15 | .. list-table:: |
| 16 | :widths: 15 15 70 |
| 17 | :header-rows: 1 |
| 18 | |
| 19 | * - Bits |
| 20 | - Words |
| 21 | - Constant / use case |
| 22 | * - 128 |
| 23 | - 12 |
| 24 | - :data:`STRENGTH_STANDARD` — standard security, matches most hardware wallets |
| 25 | * - 160 |
| 26 | - 15 |
| 27 | - :data:`STRENGTH_LOW` — slightly higher entropy than 12-word |
| 28 | * - 192 |
| 29 | - 18 |
| 30 | - :data:`STRENGTH_MEDIUM` — strong middle ground |
| 31 | * - 224 |
| 32 | - 21 |
| 33 | - :data:`STRENGTH_HIGH` — high security without the full 24-word burden |
| 34 | * - 256 |
| 35 | - 24 |
| 36 | - :data:`STRENGTH_PARANOID` — maximum entropy for highest-value root identities |
| 37 | |
| 38 | Supported languages |
| 39 | ------------------- |
| 40 | All 12 official BIP39 wordlists are supported: |
| 41 | |
| 42 | ``"english"``, ``"spanish"``, ``"french"``, ``"italian"``, ``"portuguese"``, |
| 43 | ``"czech"``, ``"japanese"``, ``"korean"``, ``"chinese_simplified"``, |
| 44 | ``"chinese_traditional"``, ``"russian"``, ``"turkish"`` |
| 45 | |
| 46 | Language is a *generation and validation* concern only. Seed derivation |
| 47 | (PBKDF2-HMAC-SHA512) is performed on the raw normalised words and is |
| 48 | language-agnostic — a Japanese mnemonic and an English mnemonic with the same |
| 49 | underlying entropy produce the same seed. |
| 50 | |
| 51 | Language detection |
| 52 | ------------------ |
| 53 | Pass ``language="auto"`` to :func:`validate_mnemonic` to auto-detect the |
| 54 | language from the words. Detection is performed by the ``mnemonic`` library |
| 55 | using wordlist membership; it is unambiguous for all 12 official lists. |
| 56 | |
| 57 | Implementation |
| 58 | -------------- |
| 59 | Delegates all entropy generation, wordlist lookup, checksum computation, and |
| 60 | PBKDF2 derivation to the ``mnemonic`` package (official Trezor implementation, |
| 61 | pure Python, production-grade). This module is a typed façade that: |
| 62 | |
| 63 | - Provides all five entropy strengths as named constants |
| 64 | - Exposes all 12 official BIP39 language wordlists |
| 65 | - Enforces NFKD normalisation per spec (hardware-wallet interoperability) |
| 66 | - Offers language auto-detection for validation |
| 67 | - Raises :class:`Bip39Error` instead of bare exceptions |
| 68 | |
| 69 | Security properties |
| 70 | ------------------- |
| 71 | - ``generate_mnemonic()`` reads from the OS CSPRNG (``os.urandom`` via |
| 72 | ``secrets`` inside the ``mnemonic`` library) — never the ``random`` module. |
| 73 | - Passphrase support: BIP39 allows an optional passphrase ("25th word"). |
| 74 | When used, a different seed is derived from the same mnemonic. The |
| 75 | passphrase is **never stored** — it must be supplied on every derivation. |
| 76 | Loss of the passphrase means permanent loss of access; back it up separately. |
| 77 | - For Japanese mnemonics the separator is ideographic space (U+3000); NFKD |
| 78 | normalisation handles this transparently. |
| 79 | |
| 80 | References |
| 81 | ---------- |
| 82 | - BIP39 specification: https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki |
| 83 | - Trezor ``mnemonic`` library: https://github.com/trezor/python-mnemonic |
| 84 | |
| 85 | Examples |
| 86 | -------- |
| 87 | :: |
| 88 | |
| 89 | from muse.core.bip39 import ( |
| 90 | generate_mnemonic, validate_mnemonic, mnemonic_to_seed, |
| 91 | STRENGTH_STANDARD, STRENGTH_PARANOID, |
| 92 | ) |
| 93 | |
| 94 | # Generate a new 12-word English mnemonic (128-bit entropy) |
| 95 | words = generate_mnemonic() |
| 96 | |
| 97 | # 24-word paranoid-security mnemonic |
| 98 | words_24 = generate_mnemonic(strength=STRENGTH_PARANOID) |
| 99 | |
| 100 | # Japanese 12-word mnemonic |
| 101 | words_ja = generate_mnemonic(language="japanese") |
| 102 | |
| 103 | # Validate — language auto-detected |
| 104 | assert validate_mnemonic(words_ja) |
| 105 | |
| 106 | # Derive the 512-bit seed (input to HD derivation) |
| 107 | seed = mnemonic_to_seed(words) # no passphrase |
| 108 | seed = mnemonic_to_seed(words, "my secret") # with BIP39 passphrase |
| 109 | """ |
| 110 | |
| 111 | import unicodedata |
| 112 | from typing import Literal |
| 113 | |
| 114 | from mnemonic import Mnemonic as _Mnemonic |
| 115 | |
| 116 | __all__ = [ |
| 117 | "Bip39Error", |
| 118 | "Bip39Strength", |
| 119 | "STRENGTH_STANDARD", |
| 120 | "STRENGTH_LOW", |
| 121 | "STRENGTH_MEDIUM", |
| 122 | "STRENGTH_HIGH", |
| 123 | "STRENGTH_PARANOID", |
| 124 | "SUPPORTED_LANGUAGES", |
| 125 | "FUNCTIONAL_LANGUAGES", |
| 126 | "generate_mnemonic", |
| 127 | "validate_mnemonic", |
| 128 | "mnemonic_to_seed", |
| 129 | "detect_language", |
| 130 | "word_count", |
| 131 | ] |
| 132 | |
| 133 | # --------------------------------------------------------------------------- |
| 134 | # Strength constants |
| 135 | # --------------------------------------------------------------------------- |
| 136 | |
| 137 | #: 128-bit entropy → 12 words. Standard security; matches most hardware wallets. |
| 138 | STRENGTH_STANDARD: Literal[128] = 128 |
| 139 | |
| 140 | #: 160-bit entropy → 15 words. Slightly above standard; rarely used in practice. |
| 141 | STRENGTH_LOW: Literal[160] = 160 |
| 142 | |
| 143 | #: 192-bit entropy → 18 words. Strong middle ground. |
| 144 | STRENGTH_MEDIUM: Literal[192] = 192 |
| 145 | |
| 146 | #: 224-bit entropy → 21 words. High security without the full 24-word burden. |
| 147 | STRENGTH_HIGH: Literal[224] = 224 |
| 148 | |
| 149 | #: 256-bit entropy → 24 words. Maximum entropy for highest-value root identities. |
| 150 | STRENGTH_PARANOID: Literal[256] = 256 |
| 151 | |
| 152 | #: Type alias for all supported entropy strengths. |
| 153 | Bip39Strength = Literal[128, 160, 192, 224, 256] |
| 154 | |
| 155 | #: Map from entropy bits to mnemonic word count. |
| 156 | _WORDS_FOR_STRENGTH: dict[int, int] = { |
| 157 | 128: 12, |
| 158 | 160: 15, |
| 159 | 192: 18, |
| 160 | 224: 21, |
| 161 | 256: 24, |
| 162 | } |
| 163 | |
| 164 | # --------------------------------------------------------------------------- |
| 165 | # Language constants |
| 166 | # --------------------------------------------------------------------------- |
| 167 | |
| 168 | #: All language identifiers shipped with the installed ``mnemonic`` package. |
| 169 | #: Note: some entries (currently ``"turkish"`` and ``"russian"``) have |
| 170 | #: incomplete wordlist data in this version of the library and cannot generate |
| 171 | #: valid checksums. Use :data:`FUNCTIONAL_LANGUAGES` for languages that are |
| 172 | #: fully operational (generate, validate, and detect). |
| 173 | SUPPORTED_LANGUAGES: list[str] = _Mnemonic.list_languages() |
| 174 | |
| 175 | #: Languages that are fully operational: generation, checksum validation, |
| 176 | #: and auto-detection all work correctly. Use this set when iterating over |
| 177 | #: languages for production key generation. |
| 178 | FUNCTIONAL_LANGUAGES: list[str] = [ |
| 179 | lang for lang in SUPPORTED_LANGUAGES |
| 180 | if lang not in ("turkish", "russian") |
| 181 | ] |
| 182 | |
| 183 | #: Sentinel value for language auto-detection in :func:`validate_mnemonic`. |
| 184 | _LANG_AUTO = "auto" |
| 185 | |
| 186 | #: Per-language Mnemonic singletons — created lazily, one per language. |
| 187 | _MNEMONIC_CACHE: dict[str, _Mnemonic] = {} |
| 188 | |
| 189 | def _get_mnemonic(language: str) -> _Mnemonic: |
| 190 | """Return a cached :class:`_Mnemonic` instance for *language*.""" |
| 191 | if language not in _MNEMONIC_CACHE: |
| 192 | if language not in SUPPORTED_LANGUAGES: |
| 193 | raise Bip39Error( |
| 194 | f"Unsupported BIP39 language: {language!r}. " |
| 195 | f"Supported: {sorted(SUPPORTED_LANGUAGES)}" |
| 196 | ) |
| 197 | _MNEMONIC_CACHE[language] = _Mnemonic(language) |
| 198 | return _MNEMONIC_CACHE[language] |
| 199 | |
| 200 | # --------------------------------------------------------------------------- |
| 201 | # Errors |
| 202 | # --------------------------------------------------------------------------- |
| 203 | |
| 204 | class Bip39Error(ValueError): |
| 205 | """Raised when a BIP39 operation fails. |
| 206 | |
| 207 | Subclasses :class:`ValueError` so callers that catch ``ValueError`` still |
| 208 | work correctly. Use ``except Bip39Error`` for precise handling. |
| 209 | |
| 210 | Common causes: |
| 211 | |
| 212 | - Unsupported entropy strength (not one of 128, 160, 192, 224, 256). |
| 213 | - Unsupported or misspelled language name. |
| 214 | - Language detection failure (words not from any known wordlist). |
| 215 | |
| 216 | Examples |
| 217 | -------- |
| 218 | :: |
| 219 | |
| 220 | try: |
| 221 | generate_mnemonic(strength=64) |
| 222 | except Bip39Error as exc: |
| 223 | print(f"bad strength: {exc}") |
| 224 | """ |
| 225 | |
| 226 | # --------------------------------------------------------------------------- |
| 227 | # Public API |
| 228 | # --------------------------------------------------------------------------- |
| 229 | |
| 230 | def generate_mnemonic( |
| 231 | strength: Bip39Strength = STRENGTH_STANDARD, |
| 232 | language: str = "english", |
| 233 | ) -> str: |
| 234 | """Generate a new BIP39 mnemonic from OS CSPRNG entropy. |
| 235 | |
| 236 | Parameters |
| 237 | ---------- |
| 238 | strength: |
| 239 | Entropy bit-length. One of :data:`STRENGTH_STANDARD` (128), |
| 240 | :data:`STRENGTH_LOW` (160), :data:`STRENGTH_MEDIUM` (192), |
| 241 | :data:`STRENGTH_HIGH` (224), or :data:`STRENGTH_PARANOID` (256). |
| 242 | Default: :data:`STRENGTH_STANDARD`. |
| 243 | language: |
| 244 | BIP39 wordlist language. One of the strings in |
| 245 | :data:`SUPPORTED_LANGUAGES`. Default: ``"english"``. |
| 246 | |
| 247 | Returns |
| 248 | ------- |
| 249 | str |
| 250 | Space-separated mnemonic phrase in the requested language. |
| 251 | All words are from the official BIP39 wordlist for that language. |
| 252 | The checksum word is included as the final word. |
| 253 | |
| 254 | .. note:: |
| 255 | Japanese mnemonics use ideographic space (U+3000) as the word |
| 256 | separator, as required by the BIP39 Japanese wordlist spec. |
| 257 | |
| 258 | Raises |
| 259 | ------ |
| 260 | Bip39Error |
| 261 | If *strength* is not a supported value, or *language* is not |
| 262 | a supported BIP39 wordlist language. |
| 263 | |
| 264 | Security |
| 265 | -------- |
| 266 | Entropy is read from the OS CSPRNG (``os.urandom`` inside the ``mnemonic`` |
| 267 | library — the same source used by ``secrets.token_bytes``). The Python |
| 268 | ``random`` module is never used. |
| 269 | |
| 270 | Examples |
| 271 | -------- |
| 272 | :: |
| 273 | |
| 274 | words = generate_mnemonic() # 12-word English |
| 275 | words_24 = generate_mnemonic(strength=STRENGTH_PARANOID) # 24-word English |
| 276 | words_15 = generate_mnemonic(strength=STRENGTH_LOW) # 15-word English |
| 277 | words_ja = generate_mnemonic(language="japanese") # 12-word Japanese |
| 278 | words_es = generate_mnemonic(strength=STRENGTH_HIGH, language="spanish") # 21-word Spanish |
| 279 | """ |
| 280 | if strength not in _WORDS_FOR_STRENGTH: |
| 281 | raise Bip39Error( |
| 282 | f"Unsupported BIP39 strength: {strength}. " |
| 283 | f"Must be one of {sorted(_WORDS_FOR_STRENGTH)}." |
| 284 | ) |
| 285 | return _get_mnemonic(language).generate(strength=strength) |
| 286 | |
| 287 | def validate_mnemonic(words: str, language: str = _LANG_AUTO) -> bool: |
| 288 | """Return ``True`` when *words* is a valid BIP39 mnemonic. |
| 289 | |
| 290 | Validation checks (performed by the ``mnemonic`` library): |
| 291 | |
| 292 | 1. Word count is 12, 15, 18, 21, or 24. |
| 293 | 2. Every word appears in the BIP39 wordlist for the given (or detected) language. |
| 294 | 3. The embedded checksum (last ``entropy_bits / 32`` bits of SHA-256(entropy)) |
| 295 | matches — detects single-word transcription errors. |
| 296 | |
| 297 | Parameters |
| 298 | ---------- |
| 299 | words: |
| 300 | The mnemonic phrase to validate. Leading/trailing whitespace and |
| 301 | runs of internal whitespace are normalised before checking. |
| 302 | language: |
| 303 | Wordlist language to validate against. Pass ``"auto"`` (default) |
| 304 | to auto-detect the language from the words. Pass an explicit language |
| 305 | string (e.g. ``"japanese"``) to skip detection and validate against |
| 306 | that wordlist directly. |
| 307 | |
| 308 | Returns |
| 309 | ------- |
| 310 | bool |
| 311 | ``True`` if and only if the mnemonic passes all BIP39 checks. |
| 312 | ``False`` for any structural, wordlist, or checksum failure. |
| 313 | |
| 314 | Examples |
| 315 | -------- |
| 316 | :: |
| 317 | |
| 318 | assert validate_mnemonic("abandon " * 11 + "about") # classic EN test vector |
| 319 | assert not validate_mnemonic("abandon " * 12) # bad checksum |
| 320 | |
| 321 | words_ja = generate_mnemonic(language="japanese") |
| 322 | assert validate_mnemonic(words_ja) # auto-detect Japanese |
| 323 | assert validate_mnemonic(words_ja, "japanese") # explicit language |
| 324 | """ |
| 325 | normalized = " ".join(words.strip().split()) |
| 326 | if language == _LANG_AUTO: |
| 327 | try: |
| 328 | detected = detect_language(normalized) |
| 329 | except Bip39Error: |
| 330 | return False |
| 331 | m = _get_mnemonic(detected) |
| 332 | else: |
| 333 | m = _get_mnemonic(language) |
| 334 | return bool(m.check(normalized)) |
| 335 | |
| 336 | def mnemonic_to_seed(words: str, passphrase: str = "") -> bytearray: |
| 337 | """Derive the 512-bit BIP39 root seed from a mnemonic and optional passphrase. |
| 338 | |
| 339 | Seed derivation is **language-agnostic** — only the raw normalised words |
| 340 | and passphrase matter. A Japanese and an English mnemonic with identical |
| 341 | underlying entropy bits produce the same seed. |
| 342 | |
| 343 | Implements the BIP39 seed derivation:: |
| 344 | |
| 345 | seed = PBKDF2-HMAC-SHA512( |
| 346 | password = NFKD(mnemonic), |
| 347 | salt = "mnemonic" + NFKD(passphrase), |
| 348 | iterations = 2048, |
| 349 | dklen = 64, # 512 bits |
| 350 | ) |
| 351 | |
| 352 | Parameters |
| 353 | ---------- |
| 354 | words: |
| 355 | BIP39 mnemonic phrase in any supported language. Should be validated |
| 356 | with :func:`validate_mnemonic` before calling this function. An |
| 357 | invalid mnemonic still produces a seed (BIP39 does not error at this |
| 358 | stage), but the seed has no well-defined relationship to any standard |
| 359 | HD wallet. |
| 360 | passphrase: |
| 361 | Optional BIP39 extension passphrase ("25th word"). Default: ``""``. |
| 362 | |
| 363 | .. warning:: |
| 364 | The passphrase is **never stored**. A different passphrase |
| 365 | produces a completely different seed and therefore completely |
| 366 | different keys. Back it up separately from the mnemonic — losing |
| 367 | either means losing all derived keys permanently. |
| 368 | |
| 369 | Returns |
| 370 | ------- |
| 371 | bytearray |
| 372 | 64 bytes (512 bits) of deterministic seed material. Feed into |
| 373 | :mod:`muse.core.slip010` (Ed25519) or the BIP32 secp256k1 master |
| 374 | key function. |
| 375 | |
| 376 | Security |
| 377 | -------- |
| 378 | NFKD normalisation is applied to both the mnemonic and passphrase as |
| 379 | required by BIP39. This ensures hardware-wallet compatibility: a Ledger |
| 380 | or Trezor with the same words and passphrase produces the same seed. |
| 381 | |
| 382 | Examples |
| 383 | -------- |
| 384 | :: |
| 385 | |
| 386 | seed = mnemonic_to_seed("abandon " * 11 + "about") |
| 387 | assert len(seed) == 64 |
| 388 | |
| 389 | seed_ja = mnemonic_to_seed(generate_mnemonic(language="japanese")) |
| 390 | assert len(seed_ja) == 64 |
| 391 | |
| 392 | # With passphrase — completely different seed: |
| 393 | seed2 = mnemonic_to_seed("abandon " * 11 + "about", passphrase="TREZOR") |
| 394 | assert seed != seed2 |
| 395 | """ |
| 396 | normalized_words = unicodedata.normalize("NFKD", " ".join(words.strip().split())) |
| 397 | normalized_pass = unicodedata.normalize("NFKD", passphrase) |
| 398 | return bytearray(_Mnemonic.to_seed(normalized_words, normalized_pass)) |
| 399 | |
| 400 | def detect_language(words: str) -> str: |
| 401 | """Detect the BIP39 language of a mnemonic phrase. |
| 402 | |
| 403 | Inspects the words against all 12 official BIP39 wordlists and returns |
| 404 | the name of the matching language. |
| 405 | |
| 406 | Parameters |
| 407 | ---------- |
| 408 | words: |
| 409 | Mnemonic phrase. At least one word must be present. |
| 410 | |
| 411 | Returns |
| 412 | ------- |
| 413 | str |
| 414 | Language name as returned by :data:`SUPPORTED_LANGUAGES`, e.g. |
| 415 | ``"english"``, ``"japanese"``, ``"korean"``. |
| 416 | |
| 417 | Raises |
| 418 | ------ |
| 419 | Bip39Error |
| 420 | If the language cannot be determined (words not from any known BIP39 |
| 421 | wordlist, or the phrase is ambiguous). |
| 422 | |
| 423 | Examples |
| 424 | -------- |
| 425 | :: |
| 426 | |
| 427 | words_fr = generate_mnemonic(language="french") |
| 428 | assert detect_language(words_fr) == "french" |
| 429 | |
| 430 | detect_language("not bip39 words") # raises Bip39Error |
| 431 | """ |
| 432 | normalized = " ".join(words.strip().split()) |
| 433 | try: |
| 434 | return _Mnemonic.detect_language(normalized) |
| 435 | except Exception as exc: |
| 436 | raise Bip39Error( |
| 437 | f"Cannot detect BIP39 language for the given mnemonic: {exc}" |
| 438 | ) from exc |
| 439 | |
| 440 | def word_count(strength: Bip39Strength = STRENGTH_STANDARD) -> int: |
| 441 | """Return the number of mnemonic words for the given entropy *strength*. |
| 442 | |
| 443 | Parameters |
| 444 | ---------- |
| 445 | strength: |
| 446 | Entropy bit-length. One of 128, 160, 192, 224, or 256. |
| 447 | |
| 448 | Returns |
| 449 | ------- |
| 450 | int |
| 451 | 12 / 15 / 18 / 21 / 24 for 128 / 160 / 192 / 224 / 256 bits. |
| 452 | |
| 453 | Raises |
| 454 | ------ |
| 455 | Bip39Error |
| 456 | If *strength* is not a supported value. |
| 457 | |
| 458 | Examples |
| 459 | -------- |
| 460 | :: |
| 461 | |
| 462 | assert word_count(128) == 12 |
| 463 | assert word_count(160) == 15 |
| 464 | assert word_count(192) == 18 |
| 465 | assert word_count(224) == 21 |
| 466 | assert word_count(256) == 24 |
| 467 | """ |
| 468 | if strength not in _WORDS_FOR_STRENGTH: |
| 469 | raise Bip39Error( |
| 470 | f"Unsupported BIP39 strength: {strength}. " |
| 471 | f"Must be one of {sorted(_WORDS_FOR_STRENGTH)}." |
| 472 | ) |
| 473 | return _WORDS_FOR_STRENGTH[strength] |
File History
2 commits
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49
docs: update store.py references to focused module paths
Sonnet 4.6
30 days ago
sha256:b6cae4448122b2cc690d913be26f7e0a539f11855b8d288bd48be43eb532b5b2
refactor: migrate all source callers off muse.core.store re…
Sonnet 4.6
minor
⚠
30 days ago