mnemonic.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. # Originally from: https://github.com/trezor/python-mnemonic
  2. #
  3. # Copyright (c) 2013 Pavol Rusnak
  4. # Copyright (c) 2017 mruddy
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining a copy of
  7. # this software and associated documentation files (the "Software"), to deal in
  8. # the Software without restriction, including without limitation the rights to
  9. # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10. # of the Software, and to permit persons to whom the Software is furnished to do
  11. # so, subject to the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be included in all
  14. # copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  20. # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21. # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22. #
  23. import os
  24. from pathlib import (
  25. Path,
  26. )
  27. import secrets
  28. from typing import (
  29. Dict,
  30. List,
  31. Union,
  32. )
  33. import warnings
  34. from bitarray import (
  35. bitarray,
  36. )
  37. from bitarray.util import (
  38. ba2int,
  39. int2ba,
  40. )
  41. from eth_utils import (
  42. ValidationError,
  43. )
  44. from eth_account.types import (
  45. Language,
  46. )
  47. from ._utils import (
  48. pbkdf2_hmac_sha512,
  49. sha256,
  50. unicode_decompose_string,
  51. )
  52. VALID_ENTROPY_SIZES = [16, 20, 24, 28, 32]
  53. VALID_WORD_COUNTS = [12, 15, 18, 21, 24]
  54. WORDLIST_DIR = Path(__file__).parent / "wordlist"
  55. WORDLIST_LEN = 2048
  56. _cached_wordlists: Dict[str, List[str]] = dict()
  57. def get_wordlist(language: str) -> List[str]:
  58. if language in _cached_wordlists.keys():
  59. return _cached_wordlists[language]
  60. with open(WORDLIST_DIR / f"{language}.txt", encoding="utf-8") as f:
  61. wordlist = [w.strip() for w in f.readlines()]
  62. if len(wordlist) != WORDLIST_LEN:
  63. raise ValidationError(
  64. f"Wordlist should contain {WORDLIST_LEN} words, "
  65. f"but it contains {len(wordlist)} words."
  66. )
  67. _cached_wordlists[language] = wordlist
  68. return wordlist
  69. class Mnemonic:
  70. r"""
  71. Creates and validates BIP39 mnemonics.
  72. .. doctest:: python
  73. >>> from eth_account.hdaccount import Language, Mnemonic
  74. >>> # Create a new Mnemonic instance with Czech language
  75. >>> cz_mnemonic = Mnemonic(Language.CZECH)
  76. >>> # English is the default language
  77. >>> en_mnemonic = Mnemonic()
  78. >>> # List available languages
  79. >>> available_languages = Mnemonic.list_languages()
  80. >>> print(available_languages)
  81. ['chinese_simplified', 'chinese_traditional', 'czech', 'english', 'french', 'italian', 'japanese', 'korean', 'spanish']
  82. >>> # List available enumerated languages
  83. >>> available_languages = Mnemonic.list_languages_enum()
  84. >>> print(available_languages)
  85. [<Language.CHINESE_SIMPLIFIED: 'chinese_simplified'>, <Language.CHINESE_TRADITIONAL: 'chinese_traditional'>, <Language.CZECH: 'czech'>, <Language.ENGLISH: 'english'>, <Language.FRENCH: 'french'>, <Language.ITALIAN: 'italian'>, <Language.JAPANESE: 'japanese'>, <Language.KOREAN: 'korean'>, <Language.SPANISH: 'spanish'>]
  86. >>> # Generate a new mnemonic phrase
  87. >>> mnemonic_phrase = en_mnemonic.generate()
  88. >>> print(mnemonic_phrase) # doctest: +SKIP
  89. 'cabin raise oven oven knock fantasy flock letter click empty skate volcano'
  90. >>> # Validate a mnemonic phrase
  91. >>> is_valid = en_mnemonic.is_mnemonic_valid(mnemonic_phrase)
  92. >>> print(is_valid)
  93. True
  94. >>> # Convert mnemonic phrase to seed
  95. >>> seed = en_mnemonic.to_seed(mnemonic_phrase, passphrase="optional passphrase")
  96. >>> print(seed) # doctest: +SKIP
  97. b'\x97ii\x07\x12\xf0$\x81\x98\xb6?\x07\x08t7\x18d\x87\xe1\x7f\xbe\xbaL\xb4i%\xeb\x12\xce\xe2h\x1c\xb2\x19\x13\xfb9wtoV\x9c\xb8\xdf;5\xba4X\xa3\xd6b`|\xdc\xb1\x10\xb0\xeeS\x86\x95\xd75'
  98. """ # noqa: E501
  99. def __init__(self, raw_language: Union[Language, str] = Language.ENGLISH):
  100. if isinstance(raw_language, str):
  101. warnings.warn(
  102. "The language parameter should be a Language enum, not a string. "
  103. "This will be enforced in a future version.",
  104. DeprecationWarning,
  105. stacklevel=2,
  106. )
  107. language = raw_language.lower().replace(" ", "_")
  108. languages = Mnemonic.list_languages()
  109. if language not in languages:
  110. raise ValidationError(
  111. f"Invalid language choice '{language}', must be one of {languages}"
  112. )
  113. else:
  114. language = raw_language.value
  115. self.language = language
  116. self.wordlist = get_wordlist(self.language)
  117. @staticmethod
  118. def list_languages() -> List[str]:
  119. """
  120. Returns a list of languages available for the seed phrase
  121. """
  122. return sorted(Path(f).stem for f in WORDLIST_DIR.rglob("*.txt"))
  123. @staticmethod
  124. def list_languages_enum() -> List[Language]:
  125. """
  126. Returns a list of Language objects available for the seed phrase
  127. """
  128. return sorted(Language(Path(f).stem) for f in WORDLIST_DIR.rglob("*.txt"))
  129. @classmethod
  130. def detect_language(cls, raw_mnemonic: str) -> Language:
  131. mnemonic = unicode_decompose_string(raw_mnemonic)
  132. words = set(mnemonic.split(" "))
  133. matching_languages = {
  134. lang
  135. for lang in Mnemonic.list_languages()
  136. if len(words.intersection(cls(Language(lang)).wordlist)) == len(words)
  137. }
  138. # No language had all words match it, so the language can't be fully determined
  139. if len(matching_languages) < 1:
  140. raise ValidationError(f"Language not detected for word(s): {raw_mnemonic}")
  141. # If both chinese simplified and chinese traditional match (because one is a
  142. # subset of the other) then return simplified. This doesn't hold for
  143. # other languages.
  144. if len(matching_languages) == 2 and all(
  145. "chinese" in lang for lang in matching_languages
  146. ):
  147. return Language.CHINESE_SIMPLIFIED
  148. # Because certain wordlists share some similar words, if we detect multiple
  149. # languages that the provided mnemonic word(s) could be valid in, we have
  150. # to throw
  151. if len(matching_languages) > 1:
  152. raise ValidationError(
  153. f"Word(s) are valid in multiple languages: {raw_mnemonic}"
  154. )
  155. (language,) = matching_languages
  156. return Language(language)
  157. def generate(self, num_words: int = 12) -> str:
  158. """
  159. Generate a new mnemonic with the specified number of words.
  160. """
  161. if num_words not in VALID_WORD_COUNTS:
  162. raise ValidationError(
  163. f"Invalid choice for number of words: {num_words}, should be one of "
  164. f"{VALID_WORD_COUNTS}"
  165. )
  166. return self.to_mnemonic(os.urandom(4 * num_words // 3)) # 4/3 bytes per word
  167. def to_mnemonic(self, entropy: bytes) -> str:
  168. entropy_size = len(entropy)
  169. if entropy_size not in VALID_ENTROPY_SIZES:
  170. raise ValidationError(
  171. f"Invalid data length {len(entropy)}, should be one of "
  172. f"{VALID_ENTROPY_SIZES}"
  173. )
  174. bits = bitarray()
  175. bits.frombytes(entropy)
  176. checksum = bitarray()
  177. checksum.frombytes(sha256(entropy))
  178. # Add enough bits from the checksum to make it modulo 11 (2**11 = 2048)
  179. bits.extend(checksum[: entropy_size // 4])
  180. indices = tuple(
  181. ba2int(bits[i * 11 : (i + 1) * 11]) for i in range(len(bits) // 11)
  182. )
  183. words = tuple(self.wordlist[idx] for idx in indices)
  184. if self.language == "japanese": # Japanese must be joined by ideographic space.
  185. phrase = "\u3000".join(words)
  186. else:
  187. phrase = " ".join(words)
  188. return phrase
  189. def is_mnemonic_valid(self, mnemonic: str) -> bool:
  190. """
  191. Checks if mnemonic is valid
  192. :param str mnemonic: Mnemonic string
  193. """
  194. words = unicode_decompose_string(mnemonic).split(" ")
  195. num_words = len(words)
  196. if num_words not in VALID_WORD_COUNTS:
  197. return False
  198. try:
  199. indices = tuple(self.wordlist.index(w) for w in words)
  200. except ValueError:
  201. return False
  202. encoded_seed = bitarray()
  203. for idx in indices:
  204. # Build bitarray from tightly packing indices (which are 11-bits integers)
  205. encoded_seed.extend(int2ba(idx, length=11))
  206. entropy_size = 4 * num_words // 3
  207. # Checksum the raw entropy bits
  208. checksum = bitarray()
  209. checksum.frombytes(sha256(encoded_seed[: entropy_size * 8].tobytes()))
  210. computed_checksum = checksum[: len(encoded_seed) - entropy_size * 8].tobytes()
  211. # Extract the stored checksum bits
  212. stored_checksum = encoded_seed[entropy_size * 8 :].tobytes()
  213. # Check that the stored matches the relevant slice of the actual checksum
  214. # NOTE: Use secrets.compare_digest for protection again timing attacks
  215. return secrets.compare_digest(stored_checksum, computed_checksum)
  216. def expand_word(self, prefix: str) -> str:
  217. if prefix in self.wordlist:
  218. return prefix
  219. else:
  220. matches: List[str] = [
  221. word for word in self.wordlist if word.startswith(prefix)
  222. ]
  223. if len(matches) == 1: # matched exactly one word in the wordlist
  224. return matches[0]
  225. else:
  226. # exact match not found.
  227. # this is not a validation routine, just return the input
  228. return prefix
  229. def expand(self, mnemonic: str) -> str:
  230. return " ".join(map(self.expand_word, mnemonic.split(" ")))
  231. @classmethod
  232. def to_seed(cls, checked_mnemonic: str, passphrase: str = "") -> bytes:
  233. """
  234. :param str checked_mnemonic: Must be a correct, fully-expanded BIP39 seed phrase
  235. :param str passphrase: Encryption passphrase used to secure the mnemonic
  236. :returns bytes: 64 bytes of raw seed material from PRNG
  237. """
  238. mnemonic = unicode_decompose_string(checked_mnemonic)
  239. # NOTE: This domain separater ("mnemonic") is added per BIP39 spec
  240. # to the passphrase
  241. # https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki#from-mnemonic-to-seed # blocklint: URL pragma # noqa: E501
  242. salt = "mnemonic" + unicode_decompose_string(passphrase)
  243. # From BIP39:
  244. # To create a binary seed from the mnemonic, we use the PBKDF2 function with a
  245. # mnemonic sentence (in UTF-8 NFKD) used as the password and the string
  246. # "mnemonic" and passphrase (again in UTF-8 NFKD) used as the salt.
  247. stretched = pbkdf2_hmac_sha512(mnemonic, salt)
  248. return stretched[:64]