serializer.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. import sys
  2. import math
  3. from collections.abc import Mapping, Sequence, Set
  4. from datetime import datetime
  5. from sentry_sdk.utils import (
  6. AnnotatedValue,
  7. capture_internal_exception,
  8. disable_capture_event,
  9. format_timestamp,
  10. safe_repr,
  11. strip_string,
  12. )
  13. from typing import TYPE_CHECKING
  14. if TYPE_CHECKING:
  15. from types import TracebackType
  16. from typing import Any
  17. from typing import Callable
  18. from typing import ContextManager
  19. from typing import Dict
  20. from typing import List
  21. from typing import Optional
  22. from typing import Type
  23. from typing import Union
  24. from sentry_sdk._types import NotImplementedType
  25. Span = Dict[str, Any]
  26. ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]]
  27. Segment = Union[str, int]
  28. # Bytes are technically not strings in Python 3, but we can serialize them
  29. serializable_str_types = (str, bytes, bytearray, memoryview)
  30. # Maximum length of JSON-serialized event payloads that can be safely sent
  31. # before the server may reject the event due to its size. This is not intended
  32. # to reflect actual values defined server-side, but rather only be an upper
  33. # bound for events sent by the SDK.
  34. #
  35. # Can be overwritten if wanting to send more bytes, e.g. with a custom server.
  36. # When changing this, keep in mind that events may be a little bit larger than
  37. # this value due to attached metadata, so keep the number conservative.
  38. MAX_EVENT_BYTES = 10**6
  39. # Maximum depth and breadth of databags. Excess data will be trimmed. If
  40. # max_request_body_size is "always", request bodies won't be trimmed.
  41. MAX_DATABAG_DEPTH = 5
  42. MAX_DATABAG_BREADTH = 10
  43. CYCLE_MARKER = "<cyclic>"
  44. global_repr_processors = [] # type: List[ReprProcessor]
  45. def add_global_repr_processor(processor):
  46. # type: (ReprProcessor) -> None
  47. global_repr_processors.append(processor)
  48. sequence_types = [Sequence, Set] # type: List[type]
  49. def add_repr_sequence_type(ty):
  50. # type: (type) -> None
  51. sequence_types.append(ty)
  52. class Memo:
  53. __slots__ = ("_ids", "_objs")
  54. def __init__(self):
  55. # type: () -> None
  56. self._ids = {} # type: Dict[int, Any]
  57. self._objs = [] # type: List[Any]
  58. def memoize(self, obj):
  59. # type: (Any) -> ContextManager[bool]
  60. self._objs.append(obj)
  61. return self
  62. def __enter__(self):
  63. # type: () -> bool
  64. obj = self._objs[-1]
  65. if id(obj) in self._ids:
  66. return True
  67. else:
  68. self._ids[id(obj)] = obj
  69. return False
  70. def __exit__(
  71. self,
  72. ty, # type: Optional[Type[BaseException]]
  73. value, # type: Optional[BaseException]
  74. tb, # type: Optional[TracebackType]
  75. ):
  76. # type: (...) -> None
  77. self._ids.pop(id(self._objs.pop()), None)
  78. def serialize(event, **kwargs):
  79. # type: (Dict[str, Any], **Any) -> Dict[str, Any]
  80. """
  81. A very smart serializer that takes a dict and emits a json-friendly dict.
  82. Currently used for serializing the final Event and also prematurely while fetching the stack
  83. local variables for each frame in a stacktrace.
  84. It works internally with 'databags' which are arbitrary data structures like Mapping, Sequence and Set.
  85. The algorithm itself is a recursive graph walk down the data structures it encounters.
  86. It has the following responsibilities:
  87. * Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
  88. * Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
  89. * Annotating the payload with the _meta field whenever trimming happens.
  90. :param max_request_body_size: If set to "always", will never trim request bodies.
  91. :param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
  92. :param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.
  93. :param custom_repr: A custom repr function that runs before safe_repr on the object to be serialized. If it returns None or throws internally, we will fallback to safe_repr.
  94. """
  95. memo = Memo()
  96. path = [] # type: List[Segment]
  97. meta_stack = [] # type: List[Dict[str, Any]]
  98. keep_request_bodies = kwargs.pop("max_request_body_size", None) == "always" # type: bool
  99. max_value_length = kwargs.pop("max_value_length", None) # type: Optional[int]
  100. is_vars = kwargs.pop("is_vars", False)
  101. custom_repr = kwargs.pop("custom_repr", None) # type: Callable[..., Optional[str]]
  102. def _safe_repr_wrapper(value):
  103. # type: (Any) -> str
  104. try:
  105. repr_value = None
  106. if custom_repr is not None:
  107. repr_value = custom_repr(value)
  108. return repr_value or safe_repr(value)
  109. except Exception:
  110. return safe_repr(value)
  111. def _annotate(**meta):
  112. # type: (**Any) -> None
  113. while len(meta_stack) <= len(path):
  114. try:
  115. segment = path[len(meta_stack) - 1]
  116. node = meta_stack[-1].setdefault(str(segment), {})
  117. except IndexError:
  118. node = {}
  119. meta_stack.append(node)
  120. meta_stack[-1].setdefault("", {}).update(meta)
  121. def _is_databag():
  122. # type: () -> Optional[bool]
  123. """
  124. A databag is any value that we need to trim.
  125. True for stuff like vars, request bodies, breadcrumbs and extra.
  126. :returns: `True` for "yes", `False` for :"no", `None` for "maybe soon".
  127. """
  128. try:
  129. if is_vars:
  130. return True
  131. is_request_body = _is_request_body()
  132. if is_request_body in (True, None):
  133. return is_request_body
  134. p0 = path[0]
  135. if p0 == "breadcrumbs" and path[1] == "values":
  136. path[2]
  137. return True
  138. if p0 == "extra":
  139. return True
  140. except IndexError:
  141. return None
  142. return False
  143. def _is_span_attribute():
  144. # type: () -> Optional[bool]
  145. try:
  146. if path[0] == "spans" and path[2] == "data":
  147. return True
  148. except IndexError:
  149. return None
  150. return False
  151. def _is_request_body():
  152. # type: () -> Optional[bool]
  153. try:
  154. if path[0] == "request" and path[1] == "data":
  155. return True
  156. except IndexError:
  157. return None
  158. return False
  159. def _serialize_node(
  160. obj, # type: Any
  161. is_databag=None, # type: Optional[bool]
  162. is_request_body=None, # type: Optional[bool]
  163. should_repr_strings=None, # type: Optional[bool]
  164. segment=None, # type: Optional[Segment]
  165. remaining_breadth=None, # type: Optional[Union[int, float]]
  166. remaining_depth=None, # type: Optional[Union[int, float]]
  167. ):
  168. # type: (...) -> Any
  169. if segment is not None:
  170. path.append(segment)
  171. try:
  172. with memo.memoize(obj) as result:
  173. if result:
  174. return CYCLE_MARKER
  175. return _serialize_node_impl(
  176. obj,
  177. is_databag=is_databag,
  178. is_request_body=is_request_body,
  179. should_repr_strings=should_repr_strings,
  180. remaining_depth=remaining_depth,
  181. remaining_breadth=remaining_breadth,
  182. )
  183. except BaseException:
  184. capture_internal_exception(sys.exc_info())
  185. if is_databag:
  186. return "<failed to serialize, use init(debug=True) to see error logs>"
  187. return None
  188. finally:
  189. if segment is not None:
  190. path.pop()
  191. del meta_stack[len(path) + 1 :]
  192. def _flatten_annotated(obj):
  193. # type: (Any) -> Any
  194. if isinstance(obj, AnnotatedValue):
  195. _annotate(**obj.metadata)
  196. obj = obj.value
  197. return obj
  198. def _serialize_node_impl(
  199. obj,
  200. is_databag,
  201. is_request_body,
  202. should_repr_strings,
  203. remaining_depth,
  204. remaining_breadth,
  205. ):
  206. # type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[Union[float, int]], Optional[Union[float, int]]) -> Any
  207. if isinstance(obj, AnnotatedValue):
  208. should_repr_strings = False
  209. if should_repr_strings is None:
  210. should_repr_strings = is_vars
  211. if is_databag is None:
  212. is_databag = _is_databag()
  213. if is_request_body is None:
  214. is_request_body = _is_request_body()
  215. if is_databag:
  216. if is_request_body and keep_request_bodies:
  217. remaining_depth = float("inf")
  218. remaining_breadth = float("inf")
  219. else:
  220. if remaining_depth is None:
  221. remaining_depth = MAX_DATABAG_DEPTH
  222. if remaining_breadth is None:
  223. remaining_breadth = MAX_DATABAG_BREADTH
  224. obj = _flatten_annotated(obj)
  225. if remaining_depth is not None and remaining_depth <= 0:
  226. _annotate(rem=[["!limit", "x"]])
  227. if is_databag:
  228. return _flatten_annotated(
  229. strip_string(_safe_repr_wrapper(obj), max_length=max_value_length)
  230. )
  231. return None
  232. is_span_attribute = _is_span_attribute()
  233. if (is_databag or is_span_attribute) and global_repr_processors:
  234. hints = {"memo": memo, "remaining_depth": remaining_depth}
  235. for processor in global_repr_processors:
  236. result = processor(obj, hints)
  237. if result is not NotImplemented:
  238. return _flatten_annotated(result)
  239. sentry_repr = getattr(type(obj), "__sentry_repr__", None)
  240. if obj is None or isinstance(obj, (bool, int, float)):
  241. if should_repr_strings or (
  242. isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))
  243. ):
  244. return _safe_repr_wrapper(obj)
  245. else:
  246. return obj
  247. elif callable(sentry_repr):
  248. return sentry_repr(obj)
  249. elif isinstance(obj, datetime):
  250. return (
  251. str(format_timestamp(obj))
  252. if not should_repr_strings
  253. else _safe_repr_wrapper(obj)
  254. )
  255. elif isinstance(obj, Mapping):
  256. # Create temporary copy here to avoid calling too much code that
  257. # might mutate our dictionary while we're still iterating over it.
  258. obj = dict(obj.items())
  259. rv_dict = {} # type: Dict[str, Any]
  260. i = 0
  261. for k, v in obj.items():
  262. if remaining_breadth is not None and i >= remaining_breadth:
  263. _annotate(len=len(obj))
  264. break
  265. str_k = str(k)
  266. v = _serialize_node(
  267. v,
  268. segment=str_k,
  269. should_repr_strings=should_repr_strings,
  270. is_databag=is_databag,
  271. is_request_body=is_request_body,
  272. remaining_depth=(
  273. remaining_depth - 1 if remaining_depth is not None else None
  274. ),
  275. remaining_breadth=remaining_breadth,
  276. )
  277. rv_dict[str_k] = v
  278. i += 1
  279. return rv_dict
  280. elif not isinstance(obj, serializable_str_types) and isinstance(
  281. obj, tuple(sequence_types)
  282. ):
  283. rv_list = []
  284. for i, v in enumerate(obj):
  285. if remaining_breadth is not None and i >= remaining_breadth:
  286. _annotate(len=len(obj))
  287. break
  288. rv_list.append(
  289. _serialize_node(
  290. v,
  291. segment=i,
  292. should_repr_strings=should_repr_strings,
  293. is_databag=is_databag,
  294. is_request_body=is_request_body,
  295. remaining_depth=(
  296. remaining_depth - 1 if remaining_depth is not None else None
  297. ),
  298. remaining_breadth=remaining_breadth,
  299. )
  300. )
  301. return rv_list
  302. if should_repr_strings:
  303. obj = _safe_repr_wrapper(obj)
  304. else:
  305. if isinstance(obj, bytes) or isinstance(obj, bytearray):
  306. obj = obj.decode("utf-8", "replace")
  307. if not isinstance(obj, str):
  308. obj = _safe_repr_wrapper(obj)
  309. is_span_description = (
  310. len(path) == 3 and path[0] == "spans" and path[-1] == "description"
  311. )
  312. if is_span_description:
  313. return obj
  314. return _flatten_annotated(strip_string(obj, max_length=max_value_length))
  315. #
  316. # Start of serialize() function
  317. #
  318. disable_capture_event.set(True)
  319. try:
  320. serialized_event = _serialize_node(event, **kwargs)
  321. if not is_vars and meta_stack and isinstance(serialized_event, dict):
  322. serialized_event["_meta"] = meta_stack[0]
  323. return serialized_event
  324. finally:
  325. disable_capture_event.set(False)