fallback.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929
  1. """Fallback pure Python implementation of msgpack"""
  2. import struct
  3. import sys
  4. from datetime import datetime as _DateTime
  5. if hasattr(sys, "pypy_version_info"):
  6. from __pypy__ import newlist_hint
  7. from __pypy__.builders import BytesBuilder
  8. _USING_STRINGBUILDER = True
  9. class BytesIO:
  10. def __init__(self, s=b""):
  11. if s:
  12. self.builder = BytesBuilder(len(s))
  13. self.builder.append(s)
  14. else:
  15. self.builder = BytesBuilder()
  16. def write(self, s):
  17. if isinstance(s, memoryview):
  18. s = s.tobytes()
  19. elif isinstance(s, bytearray):
  20. s = bytes(s)
  21. self.builder.append(s)
  22. def getvalue(self):
  23. return self.builder.build()
  24. else:
  25. from io import BytesIO
  26. _USING_STRINGBUILDER = False
  27. def newlist_hint(size):
  28. return []
  29. from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError
  30. from .ext import ExtType, Timestamp
  31. EX_SKIP = 0
  32. EX_CONSTRUCT = 1
  33. EX_READ_ARRAY_HEADER = 2
  34. EX_READ_MAP_HEADER = 3
  35. TYPE_IMMEDIATE = 0
  36. TYPE_ARRAY = 1
  37. TYPE_MAP = 2
  38. TYPE_RAW = 3
  39. TYPE_BIN = 4
  40. TYPE_EXT = 5
  41. DEFAULT_RECURSE_LIMIT = 511
  42. def _check_type_strict(obj, t, type=type, tuple=tuple):
  43. if type(t) is tuple:
  44. return type(obj) in t
  45. else:
  46. return type(obj) is t
  47. def _get_data_from_buffer(obj):
  48. view = memoryview(obj)
  49. if view.itemsize != 1:
  50. raise ValueError("cannot unpack from multi-byte object")
  51. return view
  52. def unpackb(packed, **kwargs):
  53. """
  54. Unpack an object from `packed`.
  55. Raises ``ExtraData`` when *packed* contains extra bytes.
  56. Raises ``ValueError`` when *packed* is incomplete.
  57. Raises ``FormatError`` when *packed* is not valid msgpack.
  58. Raises ``StackError`` when *packed* contains too nested.
  59. Other exceptions can be raised during unpacking.
  60. See :class:`Unpacker` for options.
  61. """
  62. unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs)
  63. unpacker.feed(packed)
  64. try:
  65. ret = unpacker._unpack()
  66. except OutOfData:
  67. raise ValueError("Unpack failed: incomplete input")
  68. except RecursionError:
  69. raise StackError
  70. if unpacker._got_extradata():
  71. raise ExtraData(ret, unpacker._get_extradata())
  72. return ret
  73. _NO_FORMAT_USED = ""
  74. _MSGPACK_HEADERS = {
  75. 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN),
  76. 0xC5: (2, ">H", TYPE_BIN),
  77. 0xC6: (4, ">I", TYPE_BIN),
  78. 0xC7: (2, "Bb", TYPE_EXT),
  79. 0xC8: (3, ">Hb", TYPE_EXT),
  80. 0xC9: (5, ">Ib", TYPE_EXT),
  81. 0xCA: (4, ">f"),
  82. 0xCB: (8, ">d"),
  83. 0xCC: (1, _NO_FORMAT_USED),
  84. 0xCD: (2, ">H"),
  85. 0xCE: (4, ">I"),
  86. 0xCF: (8, ">Q"),
  87. 0xD0: (1, "b"),
  88. 0xD1: (2, ">h"),
  89. 0xD2: (4, ">i"),
  90. 0xD3: (8, ">q"),
  91. 0xD4: (1, "b1s", TYPE_EXT),
  92. 0xD5: (2, "b2s", TYPE_EXT),
  93. 0xD6: (4, "b4s", TYPE_EXT),
  94. 0xD7: (8, "b8s", TYPE_EXT),
  95. 0xD8: (16, "b16s", TYPE_EXT),
  96. 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW),
  97. 0xDA: (2, ">H", TYPE_RAW),
  98. 0xDB: (4, ">I", TYPE_RAW),
  99. 0xDC: (2, ">H", TYPE_ARRAY),
  100. 0xDD: (4, ">I", TYPE_ARRAY),
  101. 0xDE: (2, ">H", TYPE_MAP),
  102. 0xDF: (4, ">I", TYPE_MAP),
  103. }
  104. class Unpacker:
  105. """Streaming unpacker.
  106. Arguments:
  107. :param file_like:
  108. File-like object having `.read(n)` method.
  109. If specified, unpacker reads serialized data from it and `.feed()` is not usable.
  110. :param int read_size:
  111. Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
  112. :param bool use_list:
  113. If true, unpack msgpack array to Python list.
  114. Otherwise, unpack to Python tuple. (default: True)
  115. :param bool raw:
  116. If true, unpack msgpack raw to Python bytes.
  117. Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
  118. :param int timestamp:
  119. Control how timestamp type is unpacked:
  120. 0 - Timestamp
  121. 1 - float (Seconds from the EPOCH)
  122. 2 - int (Nanoseconds from the EPOCH)
  123. 3 - datetime.datetime (UTC).
  124. :param bool strict_map_key:
  125. If true (default), only str or bytes are accepted for map (dict) keys.
  126. :param object_hook:
  127. When specified, it should be callable.
  128. Unpacker calls it with a dict argument after unpacking msgpack map.
  129. (See also simplejson)
  130. :param object_pairs_hook:
  131. When specified, it should be callable.
  132. Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
  133. (See also simplejson)
  134. :param str unicode_errors:
  135. The error handler for decoding unicode. (default: 'strict')
  136. This option should be used only when you have msgpack data which
  137. contains invalid UTF-8 string.
  138. :param int max_buffer_size:
  139. Limits size of data waiting unpacked. 0 means 2**32-1.
  140. The default value is 100*1024*1024 (100MiB).
  141. Raises `BufferFull` exception when it is insufficient.
  142. You should set this parameter when unpacking data from untrusted source.
  143. :param int max_str_len:
  144. Deprecated, use *max_buffer_size* instead.
  145. Limits max length of str. (default: max_buffer_size)
  146. :param int max_bin_len:
  147. Deprecated, use *max_buffer_size* instead.
  148. Limits max length of bin. (default: max_buffer_size)
  149. :param int max_array_len:
  150. Limits max length of array.
  151. (default: max_buffer_size)
  152. :param int max_map_len:
  153. Limits max length of map.
  154. (default: max_buffer_size//2)
  155. :param int max_ext_len:
  156. Deprecated, use *max_buffer_size* instead.
  157. Limits max size of ext type. (default: max_buffer_size)
  158. Example of streaming deserialize from file-like object::
  159. unpacker = Unpacker(file_like)
  160. for o in unpacker:
  161. process(o)
  162. Example of streaming deserialize from socket::
  163. unpacker = Unpacker()
  164. while True:
  165. buf = sock.recv(1024**2)
  166. if not buf:
  167. break
  168. unpacker.feed(buf)
  169. for o in unpacker:
  170. process(o)
  171. Raises ``ExtraData`` when *packed* contains extra bytes.
  172. Raises ``OutOfData`` when *packed* is incomplete.
  173. Raises ``FormatError`` when *packed* is not valid msgpack.
  174. Raises ``StackError`` when *packed* contains too nested.
  175. Other exceptions can be raised during unpacking.
  176. """
  177. def __init__(
  178. self,
  179. file_like=None,
  180. *,
  181. read_size=0,
  182. use_list=True,
  183. raw=False,
  184. timestamp=0,
  185. strict_map_key=True,
  186. object_hook=None,
  187. object_pairs_hook=None,
  188. list_hook=None,
  189. unicode_errors=None,
  190. max_buffer_size=100 * 1024 * 1024,
  191. ext_hook=ExtType,
  192. max_str_len=-1,
  193. max_bin_len=-1,
  194. max_array_len=-1,
  195. max_map_len=-1,
  196. max_ext_len=-1,
  197. ):
  198. if unicode_errors is None:
  199. unicode_errors = "strict"
  200. if file_like is None:
  201. self._feeding = True
  202. else:
  203. if not callable(file_like.read):
  204. raise TypeError("`file_like.read` must be callable")
  205. self.file_like = file_like
  206. self._feeding = False
  207. #: array of bytes fed.
  208. self._buffer = bytearray()
  209. #: Which position we currently reads
  210. self._buff_i = 0
  211. # When Unpacker is used as an iterable, between the calls to next(),
  212. # the buffer is not "consumed" completely, for efficiency sake.
  213. # Instead, it is done sloppily. To make sure we raise BufferFull at
  214. # the correct moments, we have to keep track of how sloppy we were.
  215. # Furthermore, when the buffer is incomplete (that is: in the case
  216. # we raise an OutOfData) we need to rollback the buffer to the correct
  217. # state, which _buf_checkpoint records.
  218. self._buf_checkpoint = 0
  219. if not max_buffer_size:
  220. max_buffer_size = 2**31 - 1
  221. if max_str_len == -1:
  222. max_str_len = max_buffer_size
  223. if max_bin_len == -1:
  224. max_bin_len = max_buffer_size
  225. if max_array_len == -1:
  226. max_array_len = max_buffer_size
  227. if max_map_len == -1:
  228. max_map_len = max_buffer_size // 2
  229. if max_ext_len == -1:
  230. max_ext_len = max_buffer_size
  231. self._max_buffer_size = max_buffer_size
  232. if read_size > self._max_buffer_size:
  233. raise ValueError("read_size must be smaller than max_buffer_size")
  234. self._read_size = read_size or min(self._max_buffer_size, 16 * 1024)
  235. self._raw = bool(raw)
  236. self._strict_map_key = bool(strict_map_key)
  237. self._unicode_errors = unicode_errors
  238. self._use_list = use_list
  239. if not (0 <= timestamp <= 3):
  240. raise ValueError("timestamp must be 0..3")
  241. self._timestamp = timestamp
  242. self._list_hook = list_hook
  243. self._object_hook = object_hook
  244. self._object_pairs_hook = object_pairs_hook
  245. self._ext_hook = ext_hook
  246. self._max_str_len = max_str_len
  247. self._max_bin_len = max_bin_len
  248. self._max_array_len = max_array_len
  249. self._max_map_len = max_map_len
  250. self._max_ext_len = max_ext_len
  251. self._stream_offset = 0
  252. if list_hook is not None and not callable(list_hook):
  253. raise TypeError("`list_hook` is not callable")
  254. if object_hook is not None and not callable(object_hook):
  255. raise TypeError("`object_hook` is not callable")
  256. if object_pairs_hook is not None and not callable(object_pairs_hook):
  257. raise TypeError("`object_pairs_hook` is not callable")
  258. if object_hook is not None and object_pairs_hook is not None:
  259. raise TypeError("object_pairs_hook and object_hook are mutually exclusive")
  260. if not callable(ext_hook):
  261. raise TypeError("`ext_hook` is not callable")
  262. def feed(self, next_bytes):
  263. assert self._feeding
  264. view = _get_data_from_buffer(next_bytes)
  265. if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size:
  266. raise BufferFull
  267. # Strip buffer before checkpoint before reading file.
  268. if self._buf_checkpoint > 0:
  269. del self._buffer[: self._buf_checkpoint]
  270. self._buff_i -= self._buf_checkpoint
  271. self._buf_checkpoint = 0
  272. # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython
  273. self._buffer.extend(view)
  274. view.release()
  275. def _consume(self):
  276. """Gets rid of the used parts of the buffer."""
  277. self._stream_offset += self._buff_i - self._buf_checkpoint
  278. self._buf_checkpoint = self._buff_i
  279. def _got_extradata(self):
  280. return self._buff_i < len(self._buffer)
  281. def _get_extradata(self):
  282. return self._buffer[self._buff_i :]
  283. def read_bytes(self, n):
  284. ret = self._read(n, raise_outofdata=False)
  285. self._consume()
  286. return ret
  287. def _read(self, n, raise_outofdata=True):
  288. # (int) -> bytearray
  289. self._reserve(n, raise_outofdata=raise_outofdata)
  290. i = self._buff_i
  291. ret = self._buffer[i : i + n]
  292. self._buff_i = i + len(ret)
  293. return ret
  294. def _reserve(self, n, raise_outofdata=True):
  295. remain_bytes = len(self._buffer) - self._buff_i - n
  296. # Fast path: buffer has n bytes already
  297. if remain_bytes >= 0:
  298. return
  299. if self._feeding:
  300. self._buff_i = self._buf_checkpoint
  301. raise OutOfData
  302. # Strip buffer before checkpoint before reading file.
  303. if self._buf_checkpoint > 0:
  304. del self._buffer[: self._buf_checkpoint]
  305. self._buff_i -= self._buf_checkpoint
  306. self._buf_checkpoint = 0
  307. # Read from file
  308. remain_bytes = -remain_bytes
  309. if remain_bytes + len(self._buffer) > self._max_buffer_size:
  310. raise BufferFull
  311. while remain_bytes > 0:
  312. to_read_bytes = max(self._read_size, remain_bytes)
  313. read_data = self.file_like.read(to_read_bytes)
  314. if not read_data:
  315. break
  316. assert isinstance(read_data, bytes)
  317. self._buffer += read_data
  318. remain_bytes -= len(read_data)
  319. if len(self._buffer) < n + self._buff_i and raise_outofdata:
  320. self._buff_i = 0 # rollback
  321. raise OutOfData
  322. def _read_header(self):
  323. typ = TYPE_IMMEDIATE
  324. n = 0
  325. obj = None
  326. self._reserve(1)
  327. b = self._buffer[self._buff_i]
  328. self._buff_i += 1
  329. if b & 0b10000000 == 0:
  330. obj = b
  331. elif b & 0b11100000 == 0b11100000:
  332. obj = -1 - (b ^ 0xFF)
  333. elif b & 0b11100000 == 0b10100000:
  334. n = b & 0b00011111
  335. typ = TYPE_RAW
  336. if n > self._max_str_len:
  337. raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
  338. obj = self._read(n)
  339. elif b & 0b11110000 == 0b10010000:
  340. n = b & 0b00001111
  341. typ = TYPE_ARRAY
  342. if n > self._max_array_len:
  343. raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
  344. elif b & 0b11110000 == 0b10000000:
  345. n = b & 0b00001111
  346. typ = TYPE_MAP
  347. if n > self._max_map_len:
  348. raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
  349. elif b == 0xC0:
  350. obj = None
  351. elif b == 0xC2:
  352. obj = False
  353. elif b == 0xC3:
  354. obj = True
  355. elif 0xC4 <= b <= 0xC6:
  356. size, fmt, typ = _MSGPACK_HEADERS[b]
  357. self._reserve(size)
  358. if len(fmt) > 0:
  359. n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
  360. else:
  361. n = self._buffer[self._buff_i]
  362. self._buff_i += size
  363. if n > self._max_bin_len:
  364. raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})")
  365. obj = self._read(n)
  366. elif 0xC7 <= b <= 0xC9:
  367. size, fmt, typ = _MSGPACK_HEADERS[b]
  368. self._reserve(size)
  369. L, n = struct.unpack_from(fmt, self._buffer, self._buff_i)
  370. self._buff_i += size
  371. if L > self._max_ext_len:
  372. raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})")
  373. obj = self._read(L)
  374. elif 0xCA <= b <= 0xD3:
  375. size, fmt = _MSGPACK_HEADERS[b]
  376. self._reserve(size)
  377. if len(fmt) > 0:
  378. obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
  379. else:
  380. obj = self._buffer[self._buff_i]
  381. self._buff_i += size
  382. elif 0xD4 <= b <= 0xD8:
  383. size, fmt, typ = _MSGPACK_HEADERS[b]
  384. if self._max_ext_len < size:
  385. raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})")
  386. self._reserve(size + 1)
  387. n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i)
  388. self._buff_i += size + 1
  389. elif 0xD9 <= b <= 0xDB:
  390. size, fmt, typ = _MSGPACK_HEADERS[b]
  391. self._reserve(size)
  392. if len(fmt) > 0:
  393. (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
  394. else:
  395. n = self._buffer[self._buff_i]
  396. self._buff_i += size
  397. if n > self._max_str_len:
  398. raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
  399. obj = self._read(n)
  400. elif 0xDC <= b <= 0xDD:
  401. size, fmt, typ = _MSGPACK_HEADERS[b]
  402. self._reserve(size)
  403. (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
  404. self._buff_i += size
  405. if n > self._max_array_len:
  406. raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
  407. elif 0xDE <= b <= 0xDF:
  408. size, fmt, typ = _MSGPACK_HEADERS[b]
  409. self._reserve(size)
  410. (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
  411. self._buff_i += size
  412. if n > self._max_map_len:
  413. raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
  414. else:
  415. raise FormatError("Unknown header: 0x%x" % b)
  416. return typ, n, obj
  417. def _unpack(self, execute=EX_CONSTRUCT):
  418. typ, n, obj = self._read_header()
  419. if execute == EX_READ_ARRAY_HEADER:
  420. if typ != TYPE_ARRAY:
  421. raise ValueError("Expected array")
  422. return n
  423. if execute == EX_READ_MAP_HEADER:
  424. if typ != TYPE_MAP:
  425. raise ValueError("Expected map")
  426. return n
  427. # TODO should we eliminate the recursion?
  428. if typ == TYPE_ARRAY:
  429. if execute == EX_SKIP:
  430. for i in range(n):
  431. # TODO check whether we need to call `list_hook`
  432. self._unpack(EX_SKIP)
  433. return
  434. ret = newlist_hint(n)
  435. for i in range(n):
  436. ret.append(self._unpack(EX_CONSTRUCT))
  437. if self._list_hook is not None:
  438. ret = self._list_hook(ret)
  439. # TODO is the interaction between `list_hook` and `use_list` ok?
  440. return ret if self._use_list else tuple(ret)
  441. if typ == TYPE_MAP:
  442. if execute == EX_SKIP:
  443. for i in range(n):
  444. # TODO check whether we need to call hooks
  445. self._unpack(EX_SKIP)
  446. self._unpack(EX_SKIP)
  447. return
  448. if self._object_pairs_hook is not None:
  449. ret = self._object_pairs_hook(
  450. (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n)
  451. )
  452. else:
  453. ret = {}
  454. for _ in range(n):
  455. key = self._unpack(EX_CONSTRUCT)
  456. if self._strict_map_key and type(key) not in (str, bytes):
  457. raise ValueError("%s is not allowed for map key" % str(type(key)))
  458. if isinstance(key, str):
  459. key = sys.intern(key)
  460. ret[key] = self._unpack(EX_CONSTRUCT)
  461. if self._object_hook is not None:
  462. ret = self._object_hook(ret)
  463. return ret
  464. if execute == EX_SKIP:
  465. return
  466. if typ == TYPE_RAW:
  467. if self._raw:
  468. obj = bytes(obj)
  469. else:
  470. obj = obj.decode("utf_8", self._unicode_errors)
  471. return obj
  472. if typ == TYPE_BIN:
  473. return bytes(obj)
  474. if typ == TYPE_EXT:
  475. if n == -1: # timestamp
  476. ts = Timestamp.from_bytes(bytes(obj))
  477. if self._timestamp == 1:
  478. return ts.to_unix()
  479. elif self._timestamp == 2:
  480. return ts.to_unix_nano()
  481. elif self._timestamp == 3:
  482. return ts.to_datetime()
  483. else:
  484. return ts
  485. else:
  486. return self._ext_hook(n, bytes(obj))
  487. assert typ == TYPE_IMMEDIATE
  488. return obj
  489. def __iter__(self):
  490. return self
  491. def __next__(self):
  492. try:
  493. ret = self._unpack(EX_CONSTRUCT)
  494. self._consume()
  495. return ret
  496. except OutOfData:
  497. self._consume()
  498. raise StopIteration
  499. except RecursionError:
  500. raise StackError
  501. next = __next__
  502. def skip(self):
  503. self._unpack(EX_SKIP)
  504. self._consume()
  505. def unpack(self):
  506. try:
  507. ret = self._unpack(EX_CONSTRUCT)
  508. except RecursionError:
  509. raise StackError
  510. self._consume()
  511. return ret
  512. def read_array_header(self):
  513. ret = self._unpack(EX_READ_ARRAY_HEADER)
  514. self._consume()
  515. return ret
  516. def read_map_header(self):
  517. ret = self._unpack(EX_READ_MAP_HEADER)
  518. self._consume()
  519. return ret
  520. def tell(self):
  521. return self._stream_offset
  522. class Packer:
  523. """
  524. MessagePack Packer
  525. Usage::
  526. packer = Packer()
  527. astream.write(packer.pack(a))
  528. astream.write(packer.pack(b))
  529. Packer's constructor has some keyword arguments:
  530. :param default:
  531. When specified, it should be callable.
  532. Convert user type to builtin type that Packer supports.
  533. See also simplejson's document.
  534. :param bool use_single_float:
  535. Use single precision float type for float. (default: False)
  536. :param bool autoreset:
  537. Reset buffer after each pack and return its content as `bytes`. (default: True).
  538. If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
  539. :param bool use_bin_type:
  540. Use bin type introduced in msgpack spec 2.0 for bytes.
  541. It also enables str8 type for unicode. (default: True)
  542. :param bool strict_types:
  543. If set to true, types will be checked to be exact. Derived classes
  544. from serializable types will not be serialized and will be
  545. treated as unsupported type and forwarded to default.
  546. Additionally tuples will not be serialized as lists.
  547. This is useful when trying to implement accurate serialization
  548. for python types.
  549. :param bool datetime:
  550. If set to true, datetime with tzinfo is packed into Timestamp type.
  551. Note that the tzinfo is stripped in the timestamp.
  552. You can get UTC datetime with `timestamp=3` option of the Unpacker.
  553. :param str unicode_errors:
  554. The error handler for encoding unicode. (default: 'strict')
  555. DO NOT USE THIS!! This option is kept for very specific usage.
  556. :param int buf_size:
  557. Internal buffer size. This option is used only for C implementation.
  558. """
  559. def __init__(
  560. self,
  561. *,
  562. default=None,
  563. use_single_float=False,
  564. autoreset=True,
  565. use_bin_type=True,
  566. strict_types=False,
  567. datetime=False,
  568. unicode_errors=None,
  569. buf_size=None,
  570. ):
  571. self._strict_types = strict_types
  572. self._use_float = use_single_float
  573. self._autoreset = autoreset
  574. self._use_bin_type = use_bin_type
  575. self._buffer = BytesIO()
  576. self._datetime = bool(datetime)
  577. self._unicode_errors = unicode_errors or "strict"
  578. if default is not None and not callable(default):
  579. raise TypeError("default must be callable")
  580. self._default = default
  581. def _pack(
  582. self,
  583. obj,
  584. nest_limit=DEFAULT_RECURSE_LIMIT,
  585. check=isinstance,
  586. check_type_strict=_check_type_strict,
  587. ):
  588. default_used = False
  589. if self._strict_types:
  590. check = check_type_strict
  591. list_types = list
  592. else:
  593. list_types = (list, tuple)
  594. while True:
  595. if nest_limit < 0:
  596. raise ValueError("recursion limit exceeded")
  597. if obj is None:
  598. return self._buffer.write(b"\xc0")
  599. if check(obj, bool):
  600. if obj:
  601. return self._buffer.write(b"\xc3")
  602. return self._buffer.write(b"\xc2")
  603. if check(obj, int):
  604. if 0 <= obj < 0x80:
  605. return self._buffer.write(struct.pack("B", obj))
  606. if -0x20 <= obj < 0:
  607. return self._buffer.write(struct.pack("b", obj))
  608. if 0x80 <= obj <= 0xFF:
  609. return self._buffer.write(struct.pack("BB", 0xCC, obj))
  610. if -0x80 <= obj < 0:
  611. return self._buffer.write(struct.pack(">Bb", 0xD0, obj))
  612. if 0xFF < obj <= 0xFFFF:
  613. return self._buffer.write(struct.pack(">BH", 0xCD, obj))
  614. if -0x8000 <= obj < -0x80:
  615. return self._buffer.write(struct.pack(">Bh", 0xD1, obj))
  616. if 0xFFFF < obj <= 0xFFFFFFFF:
  617. return self._buffer.write(struct.pack(">BI", 0xCE, obj))
  618. if -0x80000000 <= obj < -0x8000:
  619. return self._buffer.write(struct.pack(">Bi", 0xD2, obj))
  620. if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF:
  621. return self._buffer.write(struct.pack(">BQ", 0xCF, obj))
  622. if -0x8000000000000000 <= obj < -0x80000000:
  623. return self._buffer.write(struct.pack(">Bq", 0xD3, obj))
  624. if not default_used and self._default is not None:
  625. obj = self._default(obj)
  626. default_used = True
  627. continue
  628. raise OverflowError("Integer value out of range")
  629. if check(obj, (bytes, bytearray)):
  630. n = len(obj)
  631. if n >= 2**32:
  632. raise ValueError("%s is too large" % type(obj).__name__)
  633. self._pack_bin_header(n)
  634. return self._buffer.write(obj)
  635. if check(obj, str):
  636. obj = obj.encode("utf-8", self._unicode_errors)
  637. n = len(obj)
  638. if n >= 2**32:
  639. raise ValueError("String is too large")
  640. self._pack_raw_header(n)
  641. return self._buffer.write(obj)
  642. if check(obj, memoryview):
  643. n = obj.nbytes
  644. if n >= 2**32:
  645. raise ValueError("Memoryview is too large")
  646. self._pack_bin_header(n)
  647. return self._buffer.write(obj)
  648. if check(obj, float):
  649. if self._use_float:
  650. return self._buffer.write(struct.pack(">Bf", 0xCA, obj))
  651. return self._buffer.write(struct.pack(">Bd", 0xCB, obj))
  652. if check(obj, (ExtType, Timestamp)):
  653. if check(obj, Timestamp):
  654. code = -1
  655. data = obj.to_bytes()
  656. else:
  657. code = obj.code
  658. data = obj.data
  659. assert isinstance(code, int)
  660. assert isinstance(data, bytes)
  661. L = len(data)
  662. if L == 1:
  663. self._buffer.write(b"\xd4")
  664. elif L == 2:
  665. self._buffer.write(b"\xd5")
  666. elif L == 4:
  667. self._buffer.write(b"\xd6")
  668. elif L == 8:
  669. self._buffer.write(b"\xd7")
  670. elif L == 16:
  671. self._buffer.write(b"\xd8")
  672. elif L <= 0xFF:
  673. self._buffer.write(struct.pack(">BB", 0xC7, L))
  674. elif L <= 0xFFFF:
  675. self._buffer.write(struct.pack(">BH", 0xC8, L))
  676. else:
  677. self._buffer.write(struct.pack(">BI", 0xC9, L))
  678. self._buffer.write(struct.pack("b", code))
  679. self._buffer.write(data)
  680. return
  681. if check(obj, list_types):
  682. n = len(obj)
  683. self._pack_array_header(n)
  684. for i in range(n):
  685. self._pack(obj[i], nest_limit - 1)
  686. return
  687. if check(obj, dict):
  688. return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1)
  689. if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None:
  690. obj = Timestamp.from_datetime(obj)
  691. default_used = 1
  692. continue
  693. if not default_used and self._default is not None:
  694. obj = self._default(obj)
  695. default_used = 1
  696. continue
  697. if self._datetime and check(obj, _DateTime):
  698. raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None")
  699. raise TypeError(f"Cannot serialize {obj!r}")
  700. def pack(self, obj):
  701. try:
  702. self._pack(obj)
  703. except:
  704. self._buffer = BytesIO() # force reset
  705. raise
  706. if self._autoreset:
  707. ret = self._buffer.getvalue()
  708. self._buffer = BytesIO()
  709. return ret
  710. def pack_map_pairs(self, pairs):
  711. self._pack_map_pairs(len(pairs), pairs)
  712. if self._autoreset:
  713. ret = self._buffer.getvalue()
  714. self._buffer = BytesIO()
  715. return ret
  716. def pack_array_header(self, n):
  717. if n >= 2**32:
  718. raise ValueError
  719. self._pack_array_header(n)
  720. if self._autoreset:
  721. ret = self._buffer.getvalue()
  722. self._buffer = BytesIO()
  723. return ret
  724. def pack_map_header(self, n):
  725. if n >= 2**32:
  726. raise ValueError
  727. self._pack_map_header(n)
  728. if self._autoreset:
  729. ret = self._buffer.getvalue()
  730. self._buffer = BytesIO()
  731. return ret
  732. def pack_ext_type(self, typecode, data):
  733. if not isinstance(typecode, int):
  734. raise TypeError("typecode must have int type.")
  735. if not 0 <= typecode <= 127:
  736. raise ValueError("typecode should be 0-127")
  737. if not isinstance(data, bytes):
  738. raise TypeError("data must have bytes type")
  739. L = len(data)
  740. if L > 0xFFFFFFFF:
  741. raise ValueError("Too large data")
  742. if L == 1:
  743. self._buffer.write(b"\xd4")
  744. elif L == 2:
  745. self._buffer.write(b"\xd5")
  746. elif L == 4:
  747. self._buffer.write(b"\xd6")
  748. elif L == 8:
  749. self._buffer.write(b"\xd7")
  750. elif L == 16:
  751. self._buffer.write(b"\xd8")
  752. elif L <= 0xFF:
  753. self._buffer.write(b"\xc7" + struct.pack("B", L))
  754. elif L <= 0xFFFF:
  755. self._buffer.write(b"\xc8" + struct.pack(">H", L))
  756. else:
  757. self._buffer.write(b"\xc9" + struct.pack(">I", L))
  758. self._buffer.write(struct.pack("B", typecode))
  759. self._buffer.write(data)
  760. def _pack_array_header(self, n):
  761. if n <= 0x0F:
  762. return self._buffer.write(struct.pack("B", 0x90 + n))
  763. if n <= 0xFFFF:
  764. return self._buffer.write(struct.pack(">BH", 0xDC, n))
  765. if n <= 0xFFFFFFFF:
  766. return self._buffer.write(struct.pack(">BI", 0xDD, n))
  767. raise ValueError("Array is too large")
  768. def _pack_map_header(self, n):
  769. if n <= 0x0F:
  770. return self._buffer.write(struct.pack("B", 0x80 + n))
  771. if n <= 0xFFFF:
  772. return self._buffer.write(struct.pack(">BH", 0xDE, n))
  773. if n <= 0xFFFFFFFF:
  774. return self._buffer.write(struct.pack(">BI", 0xDF, n))
  775. raise ValueError("Dict is too large")
  776. def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
  777. self._pack_map_header(n)
  778. for k, v in pairs:
  779. self._pack(k, nest_limit - 1)
  780. self._pack(v, nest_limit - 1)
  781. def _pack_raw_header(self, n):
  782. if n <= 0x1F:
  783. self._buffer.write(struct.pack("B", 0xA0 + n))
  784. elif self._use_bin_type and n <= 0xFF:
  785. self._buffer.write(struct.pack(">BB", 0xD9, n))
  786. elif n <= 0xFFFF:
  787. self._buffer.write(struct.pack(">BH", 0xDA, n))
  788. elif n <= 0xFFFFFFFF:
  789. self._buffer.write(struct.pack(">BI", 0xDB, n))
  790. else:
  791. raise ValueError("Raw is too large")
  792. def _pack_bin_header(self, n):
  793. if not self._use_bin_type:
  794. return self._pack_raw_header(n)
  795. elif n <= 0xFF:
  796. return self._buffer.write(struct.pack(">BB", 0xC4, n))
  797. elif n <= 0xFFFF:
  798. return self._buffer.write(struct.pack(">BH", 0xC5, n))
  799. elif n <= 0xFFFFFFFF:
  800. return self._buffer.write(struct.pack(">BI", 0xC6, n))
  801. else:
  802. raise ValueError("Bin is too large")
  803. def bytes(self):
  804. """Return internal buffer contents as bytes object"""
  805. return self._buffer.getvalue()
  806. def reset(self):
  807. """Reset internal buffer.
  808. This method is useful only when autoreset=False.
  809. """
  810. self._buffer = BytesIO()
  811. def getbuffer(self):
  812. """Return view of internal buffer."""
  813. if _USING_STRINGBUILDER:
  814. return memoryview(self.bytes())
  815. else:
  816. return self._buffer.getbuffer()