lazy.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. from collections.abc import (
  2. Iterable,
  3. Sequence,
  4. )
  5. from .atomic import (
  6. Atomic,
  7. )
  8. from .codec import (
  9. consume_length_prefix,
  10. consume_payload,
  11. )
  12. from .exceptions import (
  13. DecodingError,
  14. )
  15. def decode_lazy(rlp, sedes=None, **sedes_kwargs):
  16. """
  17. Decode an RLP encoded object in a lazy fashion.
  18. If the encoded object is a bytestring, this function acts similar to
  19. :func:`rlp.decode`. If it is a list however, a :class:`LazyList` is
  20. returned instead. This object will decode the string lazily, avoiding
  21. both horizontal and vertical traversing as much as possible.
  22. The way `sedes` is applied depends on the decoded object: If it is a string
  23. `sedes` deserializes it as a whole; if it is a list, each element is
  24. deserialized individually. In both cases, `sedes_kwargs` are passed on.
  25. Note that, if a deserializer is used, only "horizontal" but not
  26. "vertical lazyness" can be preserved.
  27. :param rlp: the RLP string to decode
  28. :param sedes: an object implementing a method ``deserialize(code)`` which
  29. is used as described above, or ``None`` if no
  30. deserialization should be performed
  31. :param `**sedes_kwargs`: additional keyword arguments that will be passed
  32. to the deserializers
  33. :returns: either the already decoded and deserialized object (if encoded as
  34. a string) or an instance of :class:`rlp.LazyList`
  35. """
  36. item, end = consume_item_lazy(rlp, 0)
  37. if end != len(rlp):
  38. raise DecodingError("RLP length prefix announced wrong length", rlp)
  39. if isinstance(item, LazyList):
  40. item.sedes = sedes
  41. item.sedes_kwargs = sedes_kwargs
  42. return item
  43. elif sedes:
  44. return sedes.deserialize(item, **sedes_kwargs)
  45. else:
  46. return item
  47. def consume_item_lazy(rlp, start):
  48. """
  49. Read an item from an RLP string lazily.
  50. If the length prefix announces a string, the string is read; if it
  51. announces a list, a :class:`LazyList` is created.
  52. :param rlp: the rlp string to read from
  53. :param start: the position at which to start reading
  54. :returns: a tuple ``(item, end)`` where ``item`` is the read string or a
  55. :class:`LazyList` and ``end`` is the position of the first
  56. unprocessed byte.
  57. """
  58. p, t, l, s = consume_length_prefix(rlp, start)
  59. if t is bytes:
  60. item, _, end = consume_payload(rlp, p, s, bytes, l)
  61. return item, end
  62. else:
  63. assert t is list
  64. return LazyList(rlp, s, s + l), s + l
  65. class LazyList(Sequence):
  66. """
  67. A RLP encoded list which decodes itself when necessary.
  68. Both indexing with positive indices and iterating are supported.
  69. Getting the length with :func:`len` is possible as well but requires full
  70. horizontal encoding.
  71. :param rlp: the rlp string in which the list is encoded
  72. :param start: the position of the first payload byte of the encoded list
  73. :param end: the position of the last payload byte of the encoded list
  74. :param sedes: a sedes object which deserializes each element of the list,
  75. or ``None`` for no deserialization
  76. :param `**sedes_kwargs`: keyword arguments which will be passed on to the
  77. deserializer
  78. """
  79. def __init__(self, rlp, start, end, sedes=None, **sedes_kwargs):
  80. self.rlp = rlp
  81. self.start = start
  82. self.end = end
  83. self.index = start
  84. self._elements = []
  85. self._len = None
  86. self.sedes = sedes
  87. self.sedes_kwargs = sedes_kwargs
  88. def next(self):
  89. if self.index == self.end:
  90. self._len = len(self._elements)
  91. raise StopIteration
  92. assert self.index < self.end
  93. item, end = consume_item_lazy(self.rlp, self.index)
  94. self.index = end
  95. if self.sedes:
  96. item = self.sedes.deserialize(item, **self.sedes_kwargs)
  97. self._elements.append(item)
  98. return item
  99. def __getitem__(self, i):
  100. if isinstance(i, slice):
  101. if i.step is not None:
  102. raise TypeError("Step not supported")
  103. start = i.start
  104. stop = i.stop
  105. else:
  106. start = i
  107. stop = i + 1
  108. if stop is None:
  109. stop = self.end - 1
  110. try:
  111. while len(self._elements) < stop:
  112. self.next()
  113. except StopIteration:
  114. assert self.index == self.end
  115. raise IndexError("Index %s out of range" % i)
  116. if isinstance(i, slice):
  117. return self._elements[start:stop]
  118. else:
  119. return self._elements[start]
  120. def __len__(self):
  121. if not self._len:
  122. try:
  123. while True:
  124. self.next()
  125. except StopIteration:
  126. self._len = len(self._elements)
  127. return self._len
  128. def peek(rlp, index, sedes=None):
  129. """
  130. Get a specific element from an rlp encoded nested list.
  131. This function uses :func:`rlp.decode_lazy` and, thus, decodes only the
  132. necessary parts of the string.
  133. Usage example::
  134. >>> import rlp
  135. >>> rlpdata = rlp.encode([1, 2, [3, [4, 5]]])
  136. >>> rlp.peek(rlpdata, 0, rlp.sedes.big_endian_int)
  137. 1
  138. >>> rlp.peek(rlpdata, [2, 0], rlp.sedes.big_endian_int)
  139. 3
  140. :param rlp: the rlp string
  141. :param index: the index of the element to peek at (can be a list for
  142. nested data)
  143. :param sedes: a sedes used to deserialize the peeked at object, or `None`
  144. if no deserialization should be performed
  145. :raises: :exc:`IndexError` if `index` is invalid (out of range or too many
  146. levels)
  147. """
  148. ll = decode_lazy(rlp)
  149. if not isinstance(index, Iterable):
  150. index = [index]
  151. for i in index:
  152. if isinstance(ll, Atomic):
  153. raise IndexError("Too many indices given")
  154. ll = ll[i]
  155. if sedes:
  156. return sedes.deserialize(ll)
  157. else:
  158. return ll