decoding.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. import abc
  2. import decimal
  3. import io
  4. from typing import (
  5. Any,
  6. Generator,
  7. )
  8. from eth_utils import (
  9. big_endian_to_int,
  10. to_normalized_address,
  11. to_tuple,
  12. )
  13. from eth_abi.base import (
  14. BaseCoder,
  15. parse_tuple_type_str,
  16. parse_type_str,
  17. )
  18. from eth_abi.exceptions import (
  19. InsufficientDataBytes,
  20. InvalidPointer,
  21. NonEmptyPaddingBytes,
  22. )
  23. from eth_abi.utils.numeric import (
  24. TEN,
  25. abi_decimal_context,
  26. ceil32,
  27. )
  28. class ContextFramesBytesIO(io.BytesIO):
  29. """
  30. A byte stream which can track a series of contextual frames in a stack. This
  31. data structure is necessary to perform nested decodings using the
  32. :py:class:``HeadTailDecoder`` since offsets present in head sections are
  33. relative only to a particular encoded object. These offsets can only be
  34. used to locate a position in a decoding stream if they are paired with a
  35. contextual offset that establishes the position of the object in which they
  36. are found.
  37. For example, consider the encoding of a value for the following type::
  38. type: (int,(int,int[]))
  39. value: (1,(2,[3,3]))
  40. There are two tuples in this type: one inner and one outer. The inner tuple
  41. type contains a dynamic type ``int[]`` and, therefore, is itself dynamic.
  42. This means that its value encoding will be placed in the tail section of the
  43. outer tuple's encoding. Furthermore, the inner tuple's encoding will,
  44. itself, contain a tail section with the encoding for ``[3,3]``. All
  45. together, the encoded value of ``(1,(2,[3,3]))`` would look like this (the
  46. data values are normally 32 bytes wide but have been truncated to remove the
  47. redundant zeros at the beginnings of their encodings)::
  48. offset data
  49. --------------------------
  50. ^ 0 0x01
  51. | 32 0x40 <-- Offset of object A in global frame (64)
  52. -----|--------------------
  53. Global frame ^ 64 0x02 <-- Beginning of object A (64 w/offset 0 = 64)
  54. | | 96 0x40 <-- Offset of object B in frame of object A (64)
  55. -----|-Object A's frame---
  56. | | 128 0x02 <-- Beginning of object B (64 w/offset 64 = 128)
  57. | | 160 0x03
  58. v v 192 0x03
  59. --------------------------
  60. Note that the offset of object B is encoded as 64 which only specifies the
  61. beginning of its encoded value relative to the beginning of object A's
  62. encoding. Globally, object B is located at offset 128. In order to make
  63. sense out of object B's offset, it needs to be positioned in the context of
  64. its enclosing object's frame (object A).
  65. """
  66. def __init__(self, *args, **kwargs):
  67. super().__init__(*args, **kwargs)
  68. self._frames = []
  69. self._total_offset = 0
  70. def seek_in_frame(self, pos: int, *args: Any, **kwargs: Any) -> None:
  71. """
  72. Seeks relative to the total offset of the current contextual frames.
  73. """
  74. self.seek(self._total_offset + pos, *args, **kwargs)
  75. def push_frame(self, offset: int) -> None:
  76. """
  77. Pushes a new contextual frame onto the stack with the given offset and a
  78. return position at the current cursor position then seeks to the new
  79. total offset.
  80. """
  81. self._frames.append((offset, self.tell()))
  82. self._total_offset += offset
  83. self.seek_in_frame(0)
  84. def pop_frame(self):
  85. """
  86. Pops the current contextual frame off of the stack and returns the
  87. cursor to the frame's return position.
  88. """
  89. try:
  90. offset, return_pos = self._frames.pop()
  91. except IndexError:
  92. raise IndexError("no frames to pop")
  93. self._total_offset -= offset
  94. self.seek(return_pos)
  95. class BaseDecoder(BaseCoder, metaclass=abc.ABCMeta):
  96. """
  97. Base class for all decoder classes. Subclass this if you want to define a
  98. custom decoder class. Subclasses must also implement
  99. :any:`BaseCoder.from_type_str`.
  100. """
  101. strict = True
  102. @abc.abstractmethod
  103. def decode(self, stream: ContextFramesBytesIO) -> Any: # pragma: no cover
  104. """
  105. Decodes the given stream of bytes into a python value. Should raise
  106. :any:`exceptions.DecodingError` if a python value cannot be decoded
  107. from the given byte stream.
  108. """
  109. def __call__(self, stream: ContextFramesBytesIO) -> Any:
  110. return self.decode(stream)
  111. class HeadTailDecoder(BaseDecoder):
  112. """
  113. Decoder for a dynamic element of a dynamic container (a dynamic array, or a sized
  114. array or tuple that contains dynamic elements). A dynamic element consists of a
  115. pointer, aka offset, which is located in the head section of the encoded container,
  116. and the actual value, which is located in the tail section of the encoding.
  117. """
  118. is_dynamic = True
  119. tail_decoder = None
  120. def validate(self):
  121. super().validate()
  122. if self.tail_decoder is None:
  123. raise ValueError("No `tail_decoder` set")
  124. def decode(self, stream: ContextFramesBytesIO) -> Any:
  125. # Decode the offset and move the stream cursor forward 32 bytes
  126. start_pos = decode_uint_256(stream)
  127. # Jump ahead to the start of the value
  128. stream.push_frame(start_pos)
  129. # assertion check for mypy
  130. if self.tail_decoder is None:
  131. raise AssertionError("`tail_decoder` is None")
  132. # Decode the value
  133. value = self.tail_decoder(stream)
  134. # Return the cursor
  135. stream.pop_frame()
  136. return value
  137. class TupleDecoder(BaseDecoder):
  138. decoders = None
  139. def __init__(self, **kwargs):
  140. super().__init__(**kwargs)
  141. self.decoders = tuple(
  142. HeadTailDecoder(tail_decoder=d) if getattr(d, "is_dynamic", False) else d
  143. for d in self.decoders
  144. )
  145. self.is_dynamic = any(getattr(d, "is_dynamic", False) for d in self.decoders)
  146. def validate(self):
  147. super().validate()
  148. if self.decoders is None:
  149. raise ValueError("No `decoders` set")
  150. def validate_pointers(self, stream: ContextFramesBytesIO) -> None:
  151. """
  152. Verify that all pointers point to a valid location in the stream.
  153. """
  154. current_location = stream.tell()
  155. len_of_head = sum(
  156. decoder.array_size if hasattr(decoder, "array_size") else 1
  157. for decoder in self.decoders
  158. )
  159. end_of_offsets = current_location + 32 * len_of_head
  160. total_stream_length = len(stream.getbuffer())
  161. for decoder in self.decoders:
  162. if isinstance(decoder, HeadTailDecoder):
  163. # the next 32 bytes are a pointer
  164. offset = decode_uint_256(stream)
  165. indicated_idx = current_location + offset
  166. if (
  167. indicated_idx < end_of_offsets
  168. or indicated_idx >= total_stream_length
  169. ):
  170. # the pointer is indicating its data is located either within the
  171. # offsets section of the stream or beyond the end of the stream,
  172. # both of which are invalid
  173. raise InvalidPointer(
  174. "Invalid pointer in tuple at location "
  175. f"{stream.tell() - 32} in payload"
  176. )
  177. else:
  178. # the next 32 bytes are not a pointer, so progress the stream per
  179. # the decoder
  180. decoder(stream)
  181. # return the stream to its original location for actual decoding
  182. stream.seek(current_location)
  183. @to_tuple
  184. def decode(self, stream: ContextFramesBytesIO) -> Generator[Any, None, None]:
  185. self.validate_pointers(stream)
  186. for decoder in self.decoders:
  187. yield decoder(stream)
  188. @parse_tuple_type_str
  189. def from_type_str(cls, abi_type, registry):
  190. decoders = tuple(
  191. registry.get_decoder(c.to_type_str()) for c in abi_type.components
  192. )
  193. return cls(decoders=decoders)
  194. class SingleDecoder(BaseDecoder):
  195. decoder_fn = None
  196. def validate(self):
  197. super().validate()
  198. if self.decoder_fn is None:
  199. raise ValueError("No `decoder_fn` set")
  200. def validate_padding_bytes(self, value, padding_bytes):
  201. raise NotImplementedError("Must be implemented by subclasses")
  202. def decode(self, stream):
  203. raw_data = self.read_data_from_stream(stream)
  204. data, padding_bytes = self.split_data_and_padding(raw_data)
  205. if self.decoder_fn is None:
  206. raise AssertionError("`decoder_fn` is None")
  207. value = self.decoder_fn(data)
  208. self.validate_padding_bytes(value, padding_bytes)
  209. return value
  210. def read_data_from_stream(self, stream):
  211. raise NotImplementedError("Must be implemented by subclasses")
  212. def split_data_and_padding(self, raw_data):
  213. return raw_data, b""
  214. class BaseArrayDecoder(BaseDecoder):
  215. item_decoder = None
  216. def __init__(self, **kwargs):
  217. super().__init__(**kwargs)
  218. # Use a head-tail decoder to decode dynamic elements
  219. if self.item_decoder.is_dynamic:
  220. self.item_decoder = HeadTailDecoder(
  221. tail_decoder=self.item_decoder,
  222. )
  223. def validate(self):
  224. super().validate()
  225. if self.item_decoder is None:
  226. raise ValueError("No `item_decoder` set")
  227. @parse_type_str(with_arrlist=True)
  228. def from_type_str(cls, abi_type, registry):
  229. item_decoder = registry.get_decoder(abi_type.item_type.to_type_str())
  230. array_spec = abi_type.arrlist[-1]
  231. if len(array_spec) == 1:
  232. # If array dimension is fixed
  233. return SizedArrayDecoder(
  234. array_size=array_spec[0],
  235. item_decoder=item_decoder,
  236. )
  237. else:
  238. # If array dimension is dynamic
  239. return DynamicArrayDecoder(item_decoder=item_decoder)
  240. def validate_pointers(self, stream: ContextFramesBytesIO, array_size: int) -> None:
  241. """
  242. Verify that all pointers point to a valid location in the stream.
  243. """
  244. if isinstance(self.item_decoder, HeadTailDecoder):
  245. current_location = stream.tell()
  246. end_of_offsets = current_location + 32 * array_size
  247. total_stream_length = len(stream.getbuffer())
  248. for _ in range(array_size):
  249. offset = decode_uint_256(stream)
  250. indicated_idx = current_location + offset
  251. if (
  252. indicated_idx < end_of_offsets
  253. or indicated_idx >= total_stream_length
  254. ):
  255. # the pointer is indicating its data is located either within the
  256. # offsets section of the stream or beyond the end of the stream,
  257. # both of which are invalid
  258. raise InvalidPointer(
  259. "Invalid pointer in array at location "
  260. f"{stream.tell() - 32} in payload"
  261. )
  262. stream.seek(current_location)
  263. class SizedArrayDecoder(BaseArrayDecoder):
  264. array_size = None
  265. def __init__(self, **kwargs):
  266. super().__init__(**kwargs)
  267. self.is_dynamic = self.item_decoder.is_dynamic
  268. @to_tuple
  269. def decode(self, stream):
  270. if self.item_decoder is None:
  271. raise AssertionError("`item_decoder` is None")
  272. self.validate_pointers(stream, self.array_size)
  273. for _ in range(self.array_size):
  274. yield self.item_decoder(stream)
  275. class DynamicArrayDecoder(BaseArrayDecoder):
  276. # Dynamic arrays are always dynamic, regardless of their elements
  277. is_dynamic = True
  278. @to_tuple
  279. def decode(self, stream):
  280. array_size = decode_uint_256(stream)
  281. stream.push_frame(32)
  282. if self.item_decoder is None:
  283. raise AssertionError("`item_decoder` is None")
  284. self.validate_pointers(stream, array_size)
  285. for _ in range(array_size):
  286. yield self.item_decoder(stream)
  287. stream.pop_frame()
  288. class FixedByteSizeDecoder(SingleDecoder):
  289. decoder_fn = None
  290. value_bit_size = None
  291. data_byte_size = None
  292. is_big_endian = None
  293. def validate(self):
  294. super().validate()
  295. if self.value_bit_size is None:
  296. raise ValueError("`value_bit_size` may not be None")
  297. if self.data_byte_size is None:
  298. raise ValueError("`data_byte_size` may not be None")
  299. if self.decoder_fn is None:
  300. raise ValueError("`decoder_fn` may not be None")
  301. if self.is_big_endian is None:
  302. raise ValueError("`is_big_endian` may not be None")
  303. if self.value_bit_size % 8 != 0:
  304. raise ValueError(
  305. "Invalid value bit size: {self.value_bit_size}. Must be a multiple of 8"
  306. )
  307. if self.value_bit_size > self.data_byte_size * 8:
  308. raise ValueError("Value byte size exceeds data size")
  309. def read_data_from_stream(self, stream):
  310. data = stream.read(self.data_byte_size)
  311. if len(data) != self.data_byte_size:
  312. raise InsufficientDataBytes(
  313. f"Tried to read {self.data_byte_size} bytes, "
  314. f"only got {len(data)} bytes."
  315. )
  316. return data
  317. def split_data_and_padding(self, raw_data):
  318. value_byte_size = self._get_value_byte_size()
  319. padding_size = self.data_byte_size - value_byte_size
  320. if self.is_big_endian:
  321. padding_bytes = raw_data[:padding_size]
  322. data = raw_data[padding_size:]
  323. else:
  324. data = raw_data[:value_byte_size]
  325. padding_bytes = raw_data[value_byte_size:]
  326. return data, padding_bytes
  327. def validate_padding_bytes(self, value, padding_bytes):
  328. value_byte_size = self._get_value_byte_size()
  329. padding_size = self.data_byte_size - value_byte_size
  330. if padding_bytes != b"\x00" * padding_size:
  331. raise NonEmptyPaddingBytes(
  332. f"Padding bytes were not empty: {repr(padding_bytes)}"
  333. )
  334. def _get_value_byte_size(self):
  335. value_byte_size = self.value_bit_size // 8
  336. return value_byte_size
  337. class Fixed32ByteSizeDecoder(FixedByteSizeDecoder):
  338. data_byte_size = 32
  339. class BooleanDecoder(Fixed32ByteSizeDecoder):
  340. value_bit_size = 8
  341. is_big_endian = True
  342. @staticmethod
  343. def decoder_fn(data):
  344. if data == b"\x00":
  345. return False
  346. elif data == b"\x01":
  347. return True
  348. else:
  349. raise NonEmptyPaddingBytes(
  350. f"Boolean must be either 0x0 or 0x1. Got: {repr(data)}"
  351. )
  352. @parse_type_str("bool")
  353. def from_type_str(cls, abi_type, registry):
  354. return cls()
  355. class AddressDecoder(Fixed32ByteSizeDecoder):
  356. value_bit_size = 20 * 8
  357. is_big_endian = True
  358. decoder_fn = staticmethod(to_normalized_address)
  359. @parse_type_str("address")
  360. def from_type_str(cls, abi_type, registry):
  361. return cls()
  362. #
  363. # Unsigned Integer Decoders
  364. #
  365. class UnsignedIntegerDecoder(Fixed32ByteSizeDecoder):
  366. decoder_fn = staticmethod(big_endian_to_int)
  367. is_big_endian = True
  368. @parse_type_str("uint")
  369. def from_type_str(cls, abi_type, registry):
  370. return cls(value_bit_size=abi_type.sub)
  371. decode_uint_256 = UnsignedIntegerDecoder(value_bit_size=256)
  372. #
  373. # Signed Integer Decoders
  374. #
  375. class SignedIntegerDecoder(Fixed32ByteSizeDecoder):
  376. is_big_endian = True
  377. def decoder_fn(self, data):
  378. value = big_endian_to_int(data)
  379. if value >= 2 ** (self.value_bit_size - 1):
  380. return value - 2**self.value_bit_size
  381. else:
  382. return value
  383. def validate_padding_bytes(self, value, padding_bytes):
  384. value_byte_size = self._get_value_byte_size()
  385. padding_size = self.data_byte_size - value_byte_size
  386. if value >= 0:
  387. expected_padding_bytes = b"\x00" * padding_size
  388. else:
  389. expected_padding_bytes = b"\xff" * padding_size
  390. if padding_bytes != expected_padding_bytes:
  391. raise NonEmptyPaddingBytes(
  392. f"Padding bytes were not empty: {repr(padding_bytes)}"
  393. )
  394. @parse_type_str("int")
  395. def from_type_str(cls, abi_type, registry):
  396. return cls(value_bit_size=abi_type.sub)
  397. #
  398. # Bytes1..32
  399. #
  400. class BytesDecoder(Fixed32ByteSizeDecoder):
  401. is_big_endian = False
  402. @staticmethod
  403. def decoder_fn(data):
  404. return data
  405. @parse_type_str("bytes")
  406. def from_type_str(cls, abi_type, registry):
  407. return cls(value_bit_size=abi_type.sub * 8)
  408. class BaseFixedDecoder(Fixed32ByteSizeDecoder):
  409. frac_places = None
  410. is_big_endian = True
  411. def validate(self):
  412. super().validate()
  413. if self.frac_places is None:
  414. raise ValueError("must specify `frac_places`")
  415. if self.frac_places <= 0 or self.frac_places > 80:
  416. raise ValueError("`frac_places` must be in range (0, 80]")
  417. class UnsignedFixedDecoder(BaseFixedDecoder):
  418. def decoder_fn(self, data):
  419. value = big_endian_to_int(data)
  420. with decimal.localcontext(abi_decimal_context):
  421. decimal_value = decimal.Decimal(value) / TEN**self.frac_places
  422. return decimal_value
  423. @parse_type_str("ufixed")
  424. def from_type_str(cls, abi_type, registry):
  425. value_bit_size, frac_places = abi_type.sub
  426. return cls(value_bit_size=value_bit_size, frac_places=frac_places)
  427. class SignedFixedDecoder(BaseFixedDecoder):
  428. def decoder_fn(self, data):
  429. value = big_endian_to_int(data)
  430. if value >= 2 ** (self.value_bit_size - 1):
  431. signed_value = value - 2**self.value_bit_size
  432. else:
  433. signed_value = value
  434. with decimal.localcontext(abi_decimal_context):
  435. decimal_value = decimal.Decimal(signed_value) / TEN**self.frac_places
  436. return decimal_value
  437. def validate_padding_bytes(self, value, padding_bytes):
  438. value_byte_size = self._get_value_byte_size()
  439. padding_size = self.data_byte_size - value_byte_size
  440. if value >= 0:
  441. expected_padding_bytes = b"\x00" * padding_size
  442. else:
  443. expected_padding_bytes = b"\xff" * padding_size
  444. if padding_bytes != expected_padding_bytes:
  445. raise NonEmptyPaddingBytes(
  446. f"Padding bytes were not empty: {repr(padding_bytes)}"
  447. )
  448. @parse_type_str("fixed")
  449. def from_type_str(cls, abi_type, registry):
  450. value_bit_size, frac_places = abi_type.sub
  451. return cls(value_bit_size=value_bit_size, frac_places=frac_places)
  452. #
  453. # String and Bytes
  454. #
  455. class ByteStringDecoder(SingleDecoder):
  456. is_dynamic = True
  457. @staticmethod
  458. def decoder_fn(data):
  459. return data
  460. def read_data_from_stream(self, stream):
  461. data_length = decode_uint_256(stream)
  462. padded_length = ceil32(data_length)
  463. data = stream.read(padded_length)
  464. if self.strict:
  465. if len(data) < padded_length:
  466. raise InsufficientDataBytes(
  467. f"Tried to read {padded_length} bytes, only got {len(data)} bytes"
  468. )
  469. padding_bytes = data[data_length:]
  470. if padding_bytes != b"\x00" * (padded_length - data_length):
  471. raise NonEmptyPaddingBytes(
  472. f"Padding bytes were not empty: {repr(padding_bytes)}"
  473. )
  474. return data[:data_length]
  475. def validate_padding_bytes(self, value, padding_bytes):
  476. pass
  477. @parse_type_str("bytes")
  478. def from_type_str(cls, abi_type, registry):
  479. return cls()
  480. class StringDecoder(ByteStringDecoder):
  481. def __init__(self, handle_string_errors="strict"):
  482. self.bytes_errors = handle_string_errors
  483. super().__init__()
  484. @parse_type_str("string")
  485. def from_type_str(cls, abi_type, registry):
  486. return cls()
  487. def decode(self, stream):
  488. raw_data = self.read_data_from_stream(stream)
  489. data, padding_bytes = self.split_data_and_padding(raw_data)
  490. value = self.decoder_fn(data, self.bytes_errors)
  491. self.validate_padding_bytes(value, padding_bytes)
  492. return value
  493. @staticmethod
  494. def decoder_fn(data, handle_string_errors="strict"):
  495. return data.decode("utf-8", errors=handle_string_errors)