codec.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. import collections
  2. from eth_utils import (
  3. big_endian_to_int,
  4. int_to_big_endian,
  5. is_bytes,
  6. )
  7. from rlp.exceptions import (
  8. DecodingError,
  9. EncodingError,
  10. )
  11. from rlp.sedes import (
  12. big_endian_int,
  13. binary,
  14. boolean,
  15. text,
  16. )
  17. from rlp.sedes.binary import (
  18. Binary as BinaryClass,
  19. )
  20. from rlp.sedes.lists import (
  21. List,
  22. is_sedes,
  23. is_sequence,
  24. )
  25. from rlp.sedes.serializable import (
  26. Serializable,
  27. )
  28. from rlp.utils import (
  29. ALL_BYTES,
  30. )
  31. try:
  32. import rusty_rlp
  33. except ImportError:
  34. import logging
  35. from rlp.atomic import (
  36. Atomic,
  37. )
  38. logger = logging.getLogger("rlp.codec")
  39. logger.debug(
  40. "Consider installing rusty-rlp to improve pyrlp performance with a rust based"
  41. "backend. Not currently functional for Python 3.11"
  42. )
  43. def encode_raw(item):
  44. r"""RLP encode (a nested sequence of) :class:`Atomic`\s."""
  45. if isinstance(item, Atomic):
  46. if len(item) == 1 and item[0] < 128:
  47. return item
  48. payload = item
  49. prefix_offset = 128 # string
  50. elif not isinstance(item, str) and isinstance(item, collections.abc.Sequence):
  51. payload = b"".join(encode_raw(x) for x in item)
  52. prefix_offset = 192 # list
  53. else:
  54. msg = f"Cannot encode object of type {type(item).__name__}"
  55. raise EncodingError(msg, item)
  56. try:
  57. prefix = length_prefix(len(payload), prefix_offset)
  58. except ValueError:
  59. raise EncodingError("Item too big to encode", item)
  60. return prefix + payload
  61. def decode_raw(item, strict, _):
  62. try:
  63. result, per_item_rlp, end = consume_item(item, 0)
  64. except IndexError:
  65. raise DecodingError("RLP string too short", item)
  66. if end != len(item) and strict:
  67. msg = f"RLP string ends with {len(item) - end} superfluous bytes"
  68. raise DecodingError(msg, item)
  69. return result, per_item_rlp
  70. else:
  71. def decode_raw(item, strict, preserve_per_item_rlp):
  72. try:
  73. return rusty_rlp.decode_raw(item, strict, preserve_per_item_rlp)
  74. except (TypeError, rusty_rlp.DecodingError) as e:
  75. raise DecodingError(e, item)
  76. def encode_raw(obj):
  77. try:
  78. if isinstance(obj, bytearray):
  79. obj = bytes(obj)
  80. return rusty_rlp.encode_raw(obj)
  81. except rusty_rlp.EncodingError as e:
  82. raise EncodingError(e, obj)
  83. def encode(obj, sedes=None, infer_serializer=True, cache=True):
  84. """
  85. Encode a Python object in RLP format.
  86. By default, the object is serialized in a suitable way first (using
  87. :func:`rlp.infer_sedes`) and then encoded. Serialization can be explicitly
  88. suppressed by setting `infer_serializer` to ``False`` and not passing an
  89. alternative as `sedes`.
  90. If `obj` has an attribute :attr:`_cached_rlp` (as, notably,
  91. :class:`rlp.Serializable`) and its value is not `None`, this value is
  92. returned bypassing serialization and encoding, unless `sedes` is given (as
  93. the cache is assumed to refer to the standard serialization which can be
  94. replaced by specifying `sedes`).
  95. If `obj` is a :class:`rlp.Serializable` and `cache` is true, the result of
  96. the encoding will be stored in :attr:`_cached_rlp` if it is empty.
  97. :param sedes: an object implementing a function ``serialize(obj)`` which will be
  98. used to serialize ``obj`` before encoding, or ``None`` to use the
  99. infered one (if any)
  100. :param infer_serializer: if ``True`` an appropriate serializer will be selected
  101. using :func:`rlp.infer_sedes` to serialize `obj` before
  102. encoding
  103. :param cache: cache the return value in `obj._cached_rlp` if possible
  104. (default `True`)
  105. :returns: the RLP encoded item
  106. :raises: :exc:`rlp.EncodingError` in the rather unlikely case that the item is too
  107. big to encode (will not happen)
  108. :raises: :exc:`rlp.SerializationError` if the serialization fails
  109. """
  110. if isinstance(obj, Serializable):
  111. cached_rlp = obj._cached_rlp
  112. if sedes is None and cached_rlp:
  113. return cached_rlp
  114. else:
  115. really_cache = cache and sedes is None
  116. else:
  117. really_cache = False
  118. if sedes:
  119. item = sedes.serialize(obj)
  120. elif infer_serializer:
  121. item = infer_sedes(obj).serialize(obj)
  122. else:
  123. item = obj
  124. result = encode_raw(item)
  125. if really_cache:
  126. obj._cached_rlp = result
  127. return result
  128. LONG_LENGTH = 256**8
  129. def length_prefix(length, offset):
  130. """
  131. Construct the prefix to lists or strings denoting their length.
  132. :param length: the length of the item in bytes
  133. :param offset: ``0x80`` when encoding raw bytes, ``0xc0`` when encoding a
  134. list
  135. """
  136. if length < 56:
  137. return ALL_BYTES[offset + length]
  138. elif length < LONG_LENGTH:
  139. length_string = int_to_big_endian(length)
  140. return ALL_BYTES[offset + 56 - 1 + len(length_string)] + length_string
  141. else:
  142. raise ValueError("Length greater than 256**8")
  143. SHORT_STRING = 128 + 56
  144. def consume_length_prefix(rlp, start):
  145. """
  146. Read a length prefix from an RLP string.
  147. :param rlp: the rlp byte string to read from
  148. :param start: the position at which to start reading
  149. :returns: a tuple ``(prefix, type, length, end)``, where ``type`` is either ``str``
  150. or ``list`` depending on the type of the following payload,
  151. ``length`` is the length of the payload in bytes, and ``end`` is
  152. the position of the first payload byte in the rlp string
  153. """
  154. b0 = rlp[start]
  155. if b0 < 128: # single byte
  156. return (b"", bytes, 1, start)
  157. elif b0 < SHORT_STRING: # short string
  158. if b0 - 128 == 1 and rlp[start + 1] < 128:
  159. raise DecodingError(
  160. "Encoded as short string although single byte was possible", rlp
  161. )
  162. return (rlp[start : start + 1], bytes, b0 - 128, start + 1)
  163. elif b0 < 192: # long string
  164. ll = b0 - 183 # - (128 + 56 - 1)
  165. if rlp[start + 1 : start + 2] == b"\x00":
  166. raise DecodingError("Length starts with zero bytes", rlp)
  167. len_prefix = rlp[start + 1 : start + 1 + ll]
  168. l = big_endian_to_int(len_prefix) # noqa: E741
  169. if l < 56:
  170. raise DecodingError("Long string prefix used for short string", rlp)
  171. return (rlp[start : start + 1] + len_prefix, bytes, l, start + 1 + ll)
  172. elif b0 < 192 + 56: # short list
  173. return (rlp[start : start + 1], list, b0 - 192, start + 1)
  174. else: # long list
  175. ll = b0 - 192 - 56 + 1
  176. if rlp[start + 1 : start + 2] == b"\x00":
  177. raise DecodingError("Length starts with zero bytes", rlp)
  178. len_prefix = rlp[start + 1 : start + 1 + ll]
  179. l = big_endian_to_int(len_prefix) # noqa: E741
  180. if l < 56:
  181. raise DecodingError("Long list prefix used for short list", rlp)
  182. return (rlp[start : start + 1] + len_prefix, list, l, start + 1 + ll)
  183. def consume_payload(rlp, prefix, start, type_, length):
  184. """
  185. Read the payload of an item from an RLP string.
  186. :param rlp: the rlp string to read from
  187. :param type_: the type of the payload (``bytes`` or ``list``)
  188. :param start: the position at which to start reading
  189. :param length: the length of the payload in bytes
  190. :returns: a tuple ``(item, per_item_rlp, end)``, where ``item`` is
  191. the read item, per_item_rlp is a list containing the RLP
  192. encoding of each item and ``end`` is the position of the
  193. first unprocessed byte
  194. """
  195. if type_ is bytes:
  196. item = rlp[start : start + length]
  197. return (item, [prefix + item], start + length)
  198. elif type_ is list:
  199. items = []
  200. per_item_rlp = []
  201. list_rlp = prefix
  202. next_item_start = start
  203. end = next_item_start + length
  204. while next_item_start < end:
  205. p, t, l, s = consume_length_prefix(rlp, next_item_start)
  206. item, item_rlp, next_item_start = consume_payload(rlp, p, s, t, l)
  207. per_item_rlp.append(item_rlp)
  208. # When the item returned above is a single element, item_rlp will also
  209. # contain a single element, but when it's a list, the first element will be
  210. # the RLP of the whole List, which is what we want here.
  211. list_rlp += item_rlp[0]
  212. items.append(item)
  213. per_item_rlp.insert(0, list_rlp)
  214. if next_item_start > end:
  215. raise DecodingError(
  216. "List length prefix announced a too small " "length", rlp
  217. )
  218. return (items, per_item_rlp, next_item_start)
  219. else:
  220. raise TypeError("Type must be either list or bytes")
  221. def consume_item(rlp, start):
  222. """
  223. Read an item from an RLP string.
  224. :param rlp: the rlp string to read from
  225. :param start: the position at which to start reading
  226. :returns: a tuple ``(item, per_item_rlp, end)``, where ``item`` is
  227. the read item, per_item_rlp is a list containing the RLP
  228. encoding of each item and ``end`` is the position of the
  229. first unprocessed byte
  230. """
  231. p, t, l, s = consume_length_prefix(rlp, start)
  232. return consume_payload(rlp, p, s, t, l)
  233. def decode(rlp, sedes=None, strict=True, recursive_cache=False, **kwargs):
  234. """
  235. Decode an RLP encoded object.
  236. If the deserialized result `obj` has an attribute :attr:`_cached_rlp` (e.g. if
  237. `sedes` is a subclass of :class:`rlp.Serializable`) it will be set to `rlp`, which
  238. will improve performance on subsequent :func:`rlp.encode` calls. Bear in mind
  239. however that `obj` needs to make sure that this value is updated whenever one of its
  240. fields changes or prevent such changes entirely (:class:`rlp.sedes.Serializable`
  241. does the latter).
  242. :param sedes: an object implementing a function ``deserialize(code)`` which will be
  243. applied after decoding, or ``None`` if no deserialization should be
  244. performed
  245. :param `**kwargs`: additional keyword arguments that will be passed to the
  246. deserializer
  247. :param strict: if false inputs that are longer than necessary don't cause an
  248. exception
  249. :returns: the decoded and maybe deserialized Python object
  250. :raises: :exc:`rlp.DecodingError` if the input string does not end after the root
  251. item and `strict` is true
  252. :raises: :exc:`rlp.DeserializationError` if the deserialization fails
  253. """
  254. if not is_bytes(rlp):
  255. raise DecodingError(
  256. "Can only decode RLP bytes, got type %s" % type(rlp).__name__, rlp
  257. )
  258. item, per_item_rlp = decode_raw(rlp, strict, recursive_cache)
  259. if len(per_item_rlp) == 0:
  260. per_item_rlp = [rlp]
  261. if sedes:
  262. obj = sedes.deserialize(item, **kwargs)
  263. if is_sequence(obj) or hasattr(obj, "_cached_rlp"):
  264. _apply_rlp_cache(obj, per_item_rlp, recursive_cache)
  265. return obj
  266. else:
  267. return item
  268. def _apply_rlp_cache(obj, split_rlp, recursive):
  269. item_rlp = split_rlp.pop(0)
  270. if isinstance(obj, (int, bool, str, bytes, bytearray)):
  271. return
  272. elif hasattr(obj, "_cached_rlp"):
  273. obj._cached_rlp = item_rlp
  274. if not recursive:
  275. return
  276. for sub in obj:
  277. if isinstance(sub, (int, bool, str, bytes, bytearray)):
  278. split_rlp.pop(0)
  279. else:
  280. sub_rlp = split_rlp.pop(0)
  281. _apply_rlp_cache(sub, sub_rlp, recursive)
  282. def infer_sedes(obj):
  283. """
  284. Try to find a sedes objects suitable for a given Python object.
  285. The sedes objects considered are `obj`'s class, `big_endian_int` and
  286. `binary`. If `obj` is a sequence, a :class:`rlp.sedes.List` will be
  287. constructed recursively.
  288. :param obj: the python object for which to find a sedes object
  289. :raises: :exc:`TypeError` if no appropriate sedes could be found
  290. """
  291. if is_sedes(obj.__class__):
  292. return obj.__class__
  293. elif not isinstance(obj, bool) and isinstance(obj, int) and obj >= 0:
  294. return big_endian_int
  295. elif BinaryClass.is_valid_type(obj):
  296. return binary
  297. elif not isinstance(obj, str) and isinstance(obj, collections.abc.Sequence):
  298. return List(map(infer_sedes, obj))
  299. elif isinstance(obj, bool):
  300. return boolean
  301. elif isinstance(obj, str):
  302. return text
  303. msg = f"Did not find sedes handling type {type(obj).__name__}"
  304. raise TypeError(msg)