utils.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. import os
  2. from collections import deque
  3. from sentry_sdk._compat import PY311
  4. from sentry_sdk.utils import filename_for_module
  5. from typing import TYPE_CHECKING
  6. if TYPE_CHECKING:
  7. from sentry_sdk._lru_cache import LRUCache
  8. from types import FrameType
  9. from typing import Deque
  10. from typing import List
  11. from typing import Optional
  12. from typing import Sequence
  13. from typing import Tuple
  14. from typing_extensions import TypedDict
  15. ThreadId = str
  16. ProcessedStack = List[int]
  17. ProcessedFrame = TypedDict(
  18. "ProcessedFrame",
  19. {
  20. "abs_path": str,
  21. "filename": Optional[str],
  22. "function": str,
  23. "lineno": int,
  24. "module": Optional[str],
  25. },
  26. )
  27. ProcessedThreadMetadata = TypedDict(
  28. "ProcessedThreadMetadata",
  29. {"name": str},
  30. )
  31. FrameId = Tuple[
  32. str, # abs_path
  33. int, # lineno
  34. str, # function
  35. ]
  36. FrameIds = Tuple[FrameId, ...]
  37. # The exact value of this id is not very meaningful. The purpose
  38. # of this id is to give us a compact and unique identifier for a
  39. # raw stack that can be used as a key to a dictionary so that it
  40. # can be used during the sampled format generation.
  41. StackId = Tuple[int, int]
  42. ExtractedStack = Tuple[StackId, FrameIds, List[ProcessedFrame]]
  43. ExtractedSample = Sequence[Tuple[ThreadId, ExtractedStack]]
  44. # The default sampling frequency to use. This is set at 101 in order to
  45. # mitigate the effects of lockstep sampling.
  46. DEFAULT_SAMPLING_FREQUENCY = 101
  47. # We want to impose a stack depth limit so that samples aren't too large.
  48. MAX_STACK_DEPTH = 128
  49. if PY311:
  50. def get_frame_name(frame):
  51. # type: (FrameType) -> str
  52. return frame.f_code.co_qualname
  53. else:
  54. def get_frame_name(frame):
  55. # type: (FrameType) -> str
  56. f_code = frame.f_code
  57. co_varnames = f_code.co_varnames
  58. # co_name only contains the frame name. If the frame was a method,
  59. # the class name will NOT be included.
  60. name = f_code.co_name
  61. # if it was a method, we can get the class name by inspecting
  62. # the f_locals for the `self` argument
  63. try:
  64. if (
  65. # the co_varnames start with the frame's positional arguments
  66. # and we expect the first to be `self` if its an instance method
  67. co_varnames and co_varnames[0] == "self" and "self" in frame.f_locals
  68. ):
  69. for cls in type(frame.f_locals["self"]).__mro__:
  70. if name in cls.__dict__:
  71. return "{}.{}".format(cls.__name__, name)
  72. except (AttributeError, ValueError):
  73. pass
  74. # if it was a class method, (decorated with `@classmethod`)
  75. # we can get the class name by inspecting the f_locals for the `cls` argument
  76. try:
  77. if (
  78. # the co_varnames start with the frame's positional arguments
  79. # and we expect the first to be `cls` if its a class method
  80. co_varnames and co_varnames[0] == "cls" and "cls" in frame.f_locals
  81. ):
  82. for cls in frame.f_locals["cls"].__mro__:
  83. if name in cls.__dict__:
  84. return "{}.{}".format(cls.__name__, name)
  85. except (AttributeError, ValueError):
  86. pass
  87. # nothing we can do if it is a staticmethod (decorated with @staticmethod)
  88. # we've done all we can, time to give up and return what we have
  89. return name
  90. def frame_id(raw_frame):
  91. # type: (FrameType) -> FrameId
  92. return (raw_frame.f_code.co_filename, raw_frame.f_lineno, get_frame_name(raw_frame))
  93. def extract_frame(fid, raw_frame, cwd):
  94. # type: (FrameId, FrameType, str) -> ProcessedFrame
  95. abs_path = raw_frame.f_code.co_filename
  96. try:
  97. module = raw_frame.f_globals["__name__"]
  98. except Exception:
  99. module = None
  100. # namedtuples can be many times slower when initialing
  101. # and accessing attribute so we opt to use a tuple here instead
  102. return {
  103. # This originally was `os.path.abspath(abs_path)` but that had
  104. # a large performance overhead.
  105. #
  106. # According to docs, this is equivalent to
  107. # `os.path.normpath(os.path.join(os.getcwd(), path))`.
  108. # The `os.getcwd()` call is slow here, so we precompute it.
  109. #
  110. # Additionally, since we are using normalized path already,
  111. # we skip calling `os.path.normpath` entirely.
  112. "abs_path": os.path.join(cwd, abs_path),
  113. "module": module,
  114. "filename": filename_for_module(module, abs_path) or None,
  115. "function": fid[2],
  116. "lineno": raw_frame.f_lineno,
  117. }
  118. def extract_stack(
  119. raw_frame, # type: Optional[FrameType]
  120. cache, # type: LRUCache
  121. cwd, # type: str
  122. max_stack_depth=MAX_STACK_DEPTH, # type: int
  123. ):
  124. # type: (...) -> ExtractedStack
  125. """
  126. Extracts the stack starting the specified frame. The extracted stack
  127. assumes the specified frame is the top of the stack, and works back
  128. to the bottom of the stack.
  129. In the event that the stack is more than `MAX_STACK_DEPTH` frames deep,
  130. only the first `MAX_STACK_DEPTH` frames will be returned.
  131. """
  132. raw_frames = deque(maxlen=max_stack_depth) # type: Deque[FrameType]
  133. while raw_frame is not None:
  134. f_back = raw_frame.f_back
  135. raw_frames.append(raw_frame)
  136. raw_frame = f_back
  137. frame_ids = tuple(frame_id(raw_frame) for raw_frame in raw_frames)
  138. frames = []
  139. for i, fid in enumerate(frame_ids):
  140. frame = cache.get(fid)
  141. if frame is None:
  142. frame = extract_frame(fid, raw_frames[i], cwd)
  143. cache.set(fid, frame)
  144. frames.append(frame)
  145. # Instead of mapping the stack into frame ids and hashing
  146. # that as a tuple, we can directly hash the stack.
  147. # This saves us from having to generate yet another list.
  148. # Additionally, using the stack as the key directly is
  149. # costly because the stack can be large, so we pre-hash
  150. # the stack, and use the hash as the key as this will be
  151. # needed a few times to improve performance.
  152. #
  153. # To Reduce the likelihood of hash collisions, we include
  154. # the stack depth. This means that only stacks of the same
  155. # depth can suffer from hash collisions.
  156. stack_id = len(raw_frames), hash(frame_ids)
  157. return stack_id, frame_ids, frames