"""Tests for muse.core.bip39 — BIP39 mnemonic generation, validation, and seed derivation. Test categories --------------- - Unit: individual function contracts, argument validation, return types - Integration: round-trip (generate → validate → seed), known-vector verification - Stress: entropy quality across 1 000 independent mnemonics - Security: no module-level CSPRNG, NFKD normalization, passphrase isolation - Data integrity: official BIP39 test vectors from the Trezor reference suite Official test vectors --------------------- BIP39 specifies a set of test vectors (mnemonic + passphrase → seed). The vectors used here come from the Trezor python-mnemonic test suite, which is the same library we delegate to. Each vector is independently verifiable at https://github.com/trezor/python-mnemonic/blob/master/vectors.json The vectors include the standard "TREZOR" passphrase as well as the empty passphrase case, covering both branches of the PBKDF2 derivation. """ from __future__ import annotations import unicodedata from typing import NamedTuple import pytest from muse.core.bip39 import ( FUNCTIONAL_LANGUAGES, STRENGTH_HIGH, STRENGTH_LOW, STRENGTH_MEDIUM, STRENGTH_PARANOID, STRENGTH_STANDARD, SUPPORTED_LANGUAGES, Bip39Error, detect_language, generate_mnemonic, mnemonic_to_seed, validate_mnemonic, word_count, ) # --------------------------------------------------------------------------- # Official BIP39 test vectors (subset from Trezor reference) # --------------------------------------------------------------------------- # Format: (mnemonic, passphrase, expected_seed_hex) # Source: https://github.com/trezor/python-mnemonic/blob/master/vectors.json _BIP39_VECTORS = [ # ── Official Trezor BIP39 test vectors ─────────────────────────────── # Source: https://github.com/trezor/python-mnemonic/blob/master/vectors.json # All seeds verified against the live mnemonic library and cross-checked # against the BIP39 specification. ( "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about", "TREZOR", "c55257c360c07c72029aebc1b53c05ed0362ada38ead3e3e9efa3708e53495531f09a6987599d18264c1e1c92f2cf141630c7a3c4ab7c81b2f001698e7463b04", ), ( "legal winner thank year wave sausage worth useful legal winner thank yellow", "TREZOR", "2e8905819b8723fe2c1d161860e5ee1830318dbf49a83bd451cfb8440c28bd6fa457fe1296106559a3c80937a1c1069be3a3a5bd381ee6260e8d9739fce1f607", ), ( "letter advice cage absurd amount doctor acoustic avoid letter advice cage above", "TREZOR", "d71de856f81a8acc65e6fc851a38d4d7ec216fd0796d0a6827a3ad6ed5511a30fa280f12eb2e47ed2ac03b5c462a0358d18d69fe4f985ec81778c1b370b652a8", ), ( "zoo zoo zoo zoo zoo zoo zoo zoo zoo zoo zoo wrong", "TREZOR", "ac27495480225222079d7be181583751e86f571027b0497b5b5d11218e0a8a13332572917f0f8e5a589620c6f15b11c61dee327651a14c34e18231052e48c069", ), # 24-word (256-bit entropy) ( "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon " "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon art", "TREZOR", "bda85446c68413707090a52022edd26a1c9462295029f2e60cd7c4f2bbd3097170af7a4d73245cafa9c3cca8d561a7c3de6f5d4a10be8ed2a5e608d68f92fcc8", ), # Empty passphrase ( "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about", "", "5eb00bbddcf069084889a8ab9155568165f5c453ccb85e70811aaed6f6da5fc19a5ac40b389cd370d086206dec8aa6c43daea6690f20ad3d8d48b2d2ce9e38e4", ), ] class _Vector(NamedTuple): mnemonic: str passphrase: str expected_hex: str _VECTORS = [_Vector(*v) for v in _BIP39_VECTORS] # --------------------------------------------------------------------------- # Unit — generate_mnemonic # --------------------------------------------------------------------------- class TestGenerateMnemonic: # ── word counts for all five strengths ─────────────────────────────── @pytest.mark.parametrize("strength,expected_words", [ (STRENGTH_STANDARD, 12), (STRENGTH_LOW, 15), (STRENGTH_MEDIUM, 18), (STRENGTH_HIGH, 21), (STRENGTH_PARANOID, 24), ]) def test_word_count_for_strength(self, strength: int, expected_words: int) -> None: words = generate_mnemonic(strength=strength) # type: ignore[arg-type] assert len(words.split()) == expected_words def test_default_returns_12_words(self) -> None: assert len(generate_mnemonic().split()) == 12 def test_returns_string(self) -> None: assert isinstance(generate_mnemonic(), str) def test_english_all_lowercase(self) -> None: words = generate_mnemonic() assert words == words.lower() def test_no_leading_trailing_whitespace(self) -> None: words = generate_mnemonic() assert words == words.strip() def test_english_single_space_separator(self) -> None: assert " " not in generate_mnemonic() def test_generated_passes_validation(self) -> None: assert validate_mnemonic(generate_mnemonic()) is True def test_all_strengths_pass_validation(self) -> None: for strength in (STRENGTH_STANDARD, STRENGTH_LOW, STRENGTH_MEDIUM, STRENGTH_HIGH, STRENGTH_PARANOID): words = generate_mnemonic(strength=strength) # type: ignore[arg-type] assert validate_mnemonic(words), f"Failed for strength={strength}" # ── language coverage (functional languages only) ──────────────────── @pytest.mark.parametrize("language", FUNCTIONAL_LANGUAGES) def test_functional_languages_generate_and_validate(self, language: str) -> None: words = generate_mnemonic(language=language) assert validate_mnemonic(words), f"Validation failed for language={language}" @pytest.mark.parametrize("language", FUNCTIONAL_LANGUAGES) def test_functional_languages_detect_correctly(self, language: str) -> None: words = generate_mnemonic(language=language) assert detect_language(words) == language def test_japanese_uses_ideographic_separator(self) -> None: words = generate_mnemonic(language="japanese") # Japanese BIP39 uses U+3000 (ideographic space) as separator assert "\u3000" in words def test_invalid_language_raises(self) -> None: with pytest.raises(Bip39Error, match="Unsupported BIP39 language"): generate_mnemonic(language="klingon") # ── invalid strengths ──────────────────────────────────────────────── def test_invalid_strength_raises(self) -> None: with pytest.raises(Bip39Error, match="Unsupported BIP39 strength"): generate_mnemonic(strength=64) # type: ignore[arg-type] def test_invalid_strength_100_raises(self) -> None: with pytest.raises(Bip39Error): generate_mnemonic(strength=100) # type: ignore[arg-type] # --------------------------------------------------------------------------- # Unit — validate_mnemonic # --------------------------------------------------------------------------- class TestValidateMnemonic: def test_known_valid_12_word(self) -> None: assert validate_mnemonic("abandon abandon abandon abandon abandon abandon " "abandon abandon abandon abandon abandon about") is True def test_known_valid_24_word(self) -> None: assert validate_mnemonic( "abandon abandon abandon abandon abandon abandon abandon abandon " "abandon abandon abandon abandon abandon abandon abandon abandon " "abandon abandon abandon abandon abandon abandon abandon art" ) is True def test_bad_checksum_returns_false(self) -> None: # 12 "abandon"s has a bad checksum (last word should be "about") assert validate_mnemonic("abandon " * 12) is False def test_unknown_word_returns_false(self) -> None: assert validate_mnemonic("notaword " * 11 + "about") is False def test_empty_string_returns_false(self) -> None: assert validate_mnemonic("") is False def test_too_few_words_returns_false(self) -> None: assert validate_mnemonic("abandon") is False def test_strips_and_normalizes_whitespace(self) -> None: # extra spaces should be tolerated spaced = " abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about " assert validate_mnemonic(spaced) is True def test_returns_bool_not_truthy(self) -> None: result = validate_mnemonic("abandon abandon abandon abandon abandon abandon " "abandon abandon abandon abandon abandon about") assert result is True bad = validate_mnemonic("not a valid mnemonic phrase at all here") assert bad is False # --------------------------------------------------------------------------- # Unit — mnemonic_to_seed # --------------------------------------------------------------------------- class TestMnemonicToSeed: def test_returns_64_bytes(self) -> None: seed = mnemonic_to_seed("abandon abandon abandon abandon abandon abandon " "abandon abandon abandon abandon abandon about") assert isinstance(seed, (bytes, bytearray)) assert len(seed) == 64 def test_empty_passphrase_default(self) -> None: words = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" seed_explicit = mnemonic_to_seed(words, passphrase="") seed_default = mnemonic_to_seed(words) assert seed_explicit == seed_default def test_different_passphrase_different_seed(self) -> None: words = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" s1 = mnemonic_to_seed(words, passphrase="") s2 = mnemonic_to_seed(words, passphrase="horse") assert s1 != s2 def test_deterministic(self) -> None: words = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" assert mnemonic_to_seed(words) == mnemonic_to_seed(words) def test_nfkd_normalization_passphrase(self) -> None: """Both composed and decomposed unicode should produce the same seed.""" words = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" composed = "caf\u00e9" # é as single code point (NFC) decomposed = "cafe\u0301" # é as e + combining accent (NFD) # After NFKD both normalise to the same sequence assert ( unicodedata.normalize("NFKD", composed) == unicodedata.normalize("NFKD", decomposed) ) seed_composed = mnemonic_to_seed(words, passphrase=composed) seed_decomposed = mnemonic_to_seed(words, passphrase=decomposed) assert seed_composed == seed_decomposed def test_nfkd_normalization_mnemonic(self) -> None: """Mnemonic whitespace normalisation should be transparent.""" words_normal = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" words_extra_space = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" seed_normal = mnemonic_to_seed(words_normal) seed_extra = mnemonic_to_seed(words_extra_space) assert seed_normal == seed_extra # --------------------------------------------------------------------------- # Unit — word_count # --------------------------------------------------------------------------- class TestWordCount: @pytest.mark.parametrize("strength,expected", [ (STRENGTH_STANDARD, 12), (STRENGTH_LOW, 15), (STRENGTH_MEDIUM, 18), (STRENGTH_HIGH, 21), (STRENGTH_PARANOID, 24), ]) def test_all_strengths(self, strength: int, expected: int) -> None: assert word_count(strength) == expected # type: ignore[arg-type] def test_default(self) -> None: assert word_count() == 12 def test_invalid_raises(self) -> None: with pytest.raises(Bip39Error): word_count(64) # type: ignore[arg-type] class TestDetectLanguage: @pytest.mark.parametrize("language", FUNCTIONAL_LANGUAGES) def test_detect_all_languages(self, language: str) -> None: words = generate_mnemonic(language=language) assert detect_language(words) == language def test_unknown_words_raises(self) -> None: with pytest.raises(Bip39Error): detect_language("zzz qqq xxx invalid nonsense words here blah") def test_empty_raises(self) -> None: with pytest.raises(Bip39Error): detect_language("") class TestSupportedLanguages: def test_is_list_of_strings(self) -> None: assert isinstance(SUPPORTED_LANGUAGES, list) assert all(isinstance(l, str) for l in SUPPORTED_LANGUAGES) def test_includes_english(self) -> None: assert "english" in SUPPORTED_LANGUAGES def test_includes_japanese(self) -> None: assert "japanese" in SUPPORTED_LANGUAGES def test_includes_korean(self) -> None: assert "korean" in SUPPORTED_LANGUAGES def test_at_least_10_languages(self) -> None: assert len(SUPPORTED_LANGUAGES) >= 10 # --------------------------------------------------------------------------- # Data integrity — official BIP39 test vectors # --------------------------------------------------------------------------- class TestOfficialVectors: """Verify mnemonic_to_seed against official Trezor BIP39 test vectors. These vectors are independently verifiable and cover both 12-word (128-bit) and 24-word (256-bit) entropy, with and without passphrase. """ @pytest.mark.parametrize("vector", _VECTORS, ids=[v.mnemonic[:20] for v in _VECTORS]) def test_seed_matches_vector(self, vector: _Vector) -> None: seed = mnemonic_to_seed(vector.mnemonic, passphrase=vector.passphrase) # Strip whitespace from hex — test vectors may have formatting expected = bytes.fromhex(vector.expected_hex.replace(" ", "").replace("\n", "")) assert seed == expected, ( f"Seed mismatch for mnemonic '{vector.mnemonic[:30]}...' " f"with passphrase '{vector.passphrase}'" ) def test_all_vectors_produce_64_bytes(self) -> None: for v in _VECTORS: seed = mnemonic_to_seed(v.mnemonic, passphrase=v.passphrase) assert len(seed) == 64, f"Expected 64 bytes, got {len(seed)}" # --------------------------------------------------------------------------- # Integration — round-trip # --------------------------------------------------------------------------- class TestRoundTrip: def test_generate_validate_seed_pipeline(self) -> None: mnemonic = generate_mnemonic() assert validate_mnemonic(mnemonic) seed = mnemonic_to_seed(mnemonic) assert len(seed) == 64 assert isinstance(seed, (bytes, bytearray)) def test_paranoid_generate_validate_seed_pipeline(self) -> None: mnemonic = generate_mnemonic(strength=STRENGTH_PARANOID) assert validate_mnemonic(mnemonic) seed = mnemonic_to_seed(mnemonic) assert len(seed) == 64 def test_seed_is_deterministic_across_calls(self) -> None: mnemonic = generate_mnemonic() seed1 = mnemonic_to_seed(mnemonic) seed2 = mnemonic_to_seed(mnemonic) assert seed1 == seed2 # --------------------------------------------------------------------------- # Stress — entropy quality # --------------------------------------------------------------------------- class TestEntropyQuality: """Verify that generated mnemonics are not duplicated across a large sample. A collision in 1 000 independent 128-bit entropy samples is so unlikely (probability ≈ 1/(2^128)) that any collision indicates a CSPRNG failure. """ def test_no_duplicates_in_1000_standard_mnemonics(self) -> None: seen: set[str] = set() for _ in range(1_000): m = generate_mnemonic(strength=STRENGTH_STANDARD) assert m not in seen, "Duplicate mnemonic generated — CSPRNG failure" seen.add(m) def test_no_duplicates_in_100_paranoid_mnemonics(self) -> None: seen: set[str] = set() for _ in range(100): m = generate_mnemonic(strength=STRENGTH_PARANOID) assert m not in seen, "Duplicate paranoid mnemonic generated — CSPRNG failure" seen.add(m) def test_seeds_differ_across_distinct_mnemonics(self) -> None: seeds: set[bytes] = set() for _ in range(100): m = generate_mnemonic() seeds.add(bytes(mnemonic_to_seed(m))) assert len(seeds) == 100, "Seed collision detected — derivation is not injective" # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecurity: def test_passphrase_isolation(self) -> None: """Every distinct passphrase should yield a completely different seed.""" words = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" passphrases = ["", "a", "ab", "abc", "TREZOR", "hunter2", " ", "\t", "日本語"] seeds = [bytes(mnemonic_to_seed(words, p)) for p in passphrases] assert len(set(seeds)) == len(passphrases), "Passphrase collision — derivation is broken" def test_single_word_change_completely_changes_seed(self) -> None: """Changing one word should produce a completely different, unrelated seed.""" base = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" alt = "zoo zoo zoo zoo zoo zoo zoo zoo zoo zoo zoo wrong" seed_base = mnemonic_to_seed(base) seed_alt = mnemonic_to_seed(alt) # Seeds should differ in most bytes — definitely not equal assert seed_base != seed_alt # At least 50% of bytes should differ (approximate avalanche) diff = sum(a != b for a, b in zip(seed_base, seed_alt)) assert diff >= 20, f"Only {diff}/64 bytes differ — suspiciously low avalanche" def test_mnemonic_to_seed_does_not_raise_on_invalid_mnemonic(self) -> None: """BIP39 spec: seed derivation succeeds even for invalid mnemonics. Validation is separate. mnemonic_to_seed should not raise on invalid input — it just produces a seed with no well-defined derivation. The caller is responsible for calling validate_mnemonic first. """ # Should not raise seed = mnemonic_to_seed("these are not bip39 words at all") assert len(seed) == 64 # --------------------------------------------------------------------------- # Performance # --------------------------------------------------------------------------- class TestPerformance: """Timing budgets for BIP39 operations. These tests enforce upper bounds on latency. mnemonic_to_seed is intentionally the slowest operation (PBKDF2 with 2 048 iterations is designed to be slow for brute-force resistance); all other operations must complete in well under 100 ms. """ def test_generate_mnemonic_under_100ms(self) -> None: import time start = time.perf_counter() for _ in range(10): generate_mnemonic() elapsed = (time.perf_counter() - start) / 10 assert elapsed < 0.1, f"generate_mnemonic averaged {elapsed*1000:.1f}ms — too slow" def test_validate_mnemonic_under_10ms(self) -> None: import time words = generate_mnemonic() start = time.perf_counter() for _ in range(20): validate_mnemonic(words) elapsed = (time.perf_counter() - start) / 20 assert elapsed < 0.01, f"validate_mnemonic averaged {elapsed*1000:.1f}ms — too slow" def test_mnemonic_to_seed_under_500ms(self) -> None: """PBKDF2 budget: intentionally slow by spec, but must complete < 500ms.""" import time words = "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" start = time.perf_counter() mnemonic_to_seed(words, "TREZOR") elapsed = time.perf_counter() - start assert elapsed < 0.5, f"mnemonic_to_seed took {elapsed*1000:.1f}ms — exceeds 500ms budget" def test_detect_language_under_50ms(self) -> None: import time words = generate_mnemonic() start = time.perf_counter() for _ in range(20): detect_language(words) elapsed = (time.perf_counter() - start) / 20 assert elapsed < 0.05, f"detect_language averaged {elapsed*1000:.1f}ms — too slow" # --------------------------------------------------------------------------- # Docstrings # --------------------------------------------------------------------------- class TestDocstrings: """Every public symbol in muse.core.bip39 must have a docstring. Docstrings are part of the contract — they are the first thing an agent or engineer reads when integrating this module. A missing docstring is a missing specification. """ def test_module_has_docstring(self) -> None: import muse.core.bip39 as mod assert mod.__doc__, "muse.core.bip39 module has no docstring" @pytest.mark.parametrize("name", [ "Bip39Error", "generate_mnemonic", "validate_mnemonic", "mnemonic_to_seed", "detect_language", "word_count", ]) def test_public_symbol_has_docstring(self, name: str) -> None: import muse.core.bip39 as mod obj = getattr(mod, name) assert obj.__doc__, f"muse.core.bip39.{name} has no docstring"