wsgi.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. import sys
  2. from functools import partial
  3. import sentry_sdk
  4. from sentry_sdk._werkzeug import get_host, _get_headers
  5. from sentry_sdk.api import continue_trace
  6. from sentry_sdk.consts import OP
  7. from sentry_sdk.scope import should_send_default_pii
  8. from sentry_sdk.integrations._wsgi_common import (
  9. DEFAULT_HTTP_METHODS_TO_CAPTURE,
  10. _filter_headers,
  11. nullcontext,
  12. )
  13. from sentry_sdk.sessions import track_session
  14. from sentry_sdk.scope import use_isolation_scope
  15. from sentry_sdk.tracing import Transaction, TransactionSource
  16. from sentry_sdk.utils import (
  17. ContextVar,
  18. capture_internal_exceptions,
  19. event_from_exception,
  20. reraise,
  21. )
  22. from typing import TYPE_CHECKING
  23. if TYPE_CHECKING:
  24. from typing import Callable
  25. from typing import Dict
  26. from typing import Iterator
  27. from typing import Any
  28. from typing import Tuple
  29. from typing import Optional
  30. from typing import TypeVar
  31. from typing import Protocol
  32. from sentry_sdk.utils import ExcInfo
  33. from sentry_sdk._types import Event, EventProcessor
  34. WsgiResponseIter = TypeVar("WsgiResponseIter")
  35. WsgiResponseHeaders = TypeVar("WsgiResponseHeaders")
  36. WsgiExcInfo = TypeVar("WsgiExcInfo")
  37. class StartResponse(Protocol):
  38. def __call__(self, status, response_headers, exc_info=None): # type: ignore
  39. # type: (str, WsgiResponseHeaders, Optional[WsgiExcInfo]) -> WsgiResponseIter
  40. pass
  41. _wsgi_middleware_applied = ContextVar("sentry_wsgi_middleware_applied")
  42. def wsgi_decoding_dance(s, charset="utf-8", errors="replace"):
  43. # type: (str, str, str) -> str
  44. return s.encode("latin1").decode(charset, errors)
  45. def get_request_url(environ, use_x_forwarded_for=False):
  46. # type: (Dict[str, str], bool) -> str
  47. """Return the absolute URL without query string for the given WSGI
  48. environment."""
  49. script_name = environ.get("SCRIPT_NAME", "").rstrip("/")
  50. path_info = environ.get("PATH_INFO", "").lstrip("/")
  51. path = f"{script_name}/{path_info}"
  52. return "%s://%s/%s" % (
  53. environ.get("wsgi.url_scheme"),
  54. get_host(environ, use_x_forwarded_for),
  55. wsgi_decoding_dance(path).lstrip("/"),
  56. )
  57. class SentryWsgiMiddleware:
  58. __slots__ = (
  59. "app",
  60. "use_x_forwarded_for",
  61. "span_origin",
  62. "http_methods_to_capture",
  63. )
  64. def __init__(
  65. self,
  66. app, # type: Callable[[Dict[str, str], Callable[..., Any]], Any]
  67. use_x_forwarded_for=False, # type: bool
  68. span_origin="manual", # type: str
  69. http_methods_to_capture=DEFAULT_HTTP_METHODS_TO_CAPTURE, # type: Tuple[str, ...]
  70. ):
  71. # type: (...) -> None
  72. self.app = app
  73. self.use_x_forwarded_for = use_x_forwarded_for
  74. self.span_origin = span_origin
  75. self.http_methods_to_capture = http_methods_to_capture
  76. def __call__(self, environ, start_response):
  77. # type: (Dict[str, str], Callable[..., Any]) -> _ScopedResponse
  78. if _wsgi_middleware_applied.get(False):
  79. return self.app(environ, start_response)
  80. _wsgi_middleware_applied.set(True)
  81. try:
  82. with sentry_sdk.isolation_scope() as scope:
  83. with track_session(scope, session_mode="request"):
  84. with capture_internal_exceptions():
  85. scope.clear_breadcrumbs()
  86. scope._name = "wsgi"
  87. scope.add_event_processor(
  88. _make_wsgi_event_processor(
  89. environ, self.use_x_forwarded_for
  90. )
  91. )
  92. method = environ.get("REQUEST_METHOD", "").upper()
  93. transaction = None
  94. if method in self.http_methods_to_capture:
  95. transaction = continue_trace(
  96. environ,
  97. op=OP.HTTP_SERVER,
  98. name="generic WSGI request",
  99. source=TransactionSource.ROUTE,
  100. origin=self.span_origin,
  101. )
  102. transaction_context = (
  103. sentry_sdk.start_transaction(
  104. transaction,
  105. custom_sampling_context={"wsgi_environ": environ},
  106. )
  107. if transaction is not None
  108. else nullcontext()
  109. )
  110. with transaction_context:
  111. try:
  112. response = self.app(
  113. environ,
  114. partial(
  115. _sentry_start_response, start_response, transaction
  116. ),
  117. )
  118. except BaseException:
  119. reraise(*_capture_exception())
  120. finally:
  121. _wsgi_middleware_applied.set(False)
  122. return _ScopedResponse(scope, response)
  123. def _sentry_start_response( # type: ignore
  124. old_start_response, # type: StartResponse
  125. transaction, # type: Optional[Transaction]
  126. status, # type: str
  127. response_headers, # type: WsgiResponseHeaders
  128. exc_info=None, # type: Optional[WsgiExcInfo]
  129. ):
  130. # type: (...) -> WsgiResponseIter
  131. with capture_internal_exceptions():
  132. status_int = int(status.split(" ", 1)[0])
  133. if transaction is not None:
  134. transaction.set_http_status(status_int)
  135. if exc_info is None:
  136. # The Django Rest Framework WSGI test client, and likely other
  137. # (incorrect) implementations, cannot deal with the exc_info argument
  138. # if one is present. Avoid providing a third argument if not necessary.
  139. return old_start_response(status, response_headers)
  140. else:
  141. return old_start_response(status, response_headers, exc_info)
  142. def _get_environ(environ):
  143. # type: (Dict[str, str]) -> Iterator[Tuple[str, str]]
  144. """
  145. Returns our explicitly included environment variables we want to
  146. capture (server name, port and remote addr if pii is enabled).
  147. """
  148. keys = ["SERVER_NAME", "SERVER_PORT"]
  149. if should_send_default_pii():
  150. # make debugging of proxy setup easier. Proxy headers are
  151. # in headers.
  152. keys += ["REMOTE_ADDR"]
  153. for key in keys:
  154. if key in environ:
  155. yield key, environ[key]
  156. def get_client_ip(environ):
  157. # type: (Dict[str, str]) -> Optional[Any]
  158. """
  159. Infer the user IP address from various headers. This cannot be used in
  160. security sensitive situations since the value may be forged from a client,
  161. but it's good enough for the event payload.
  162. """
  163. try:
  164. return environ["HTTP_X_FORWARDED_FOR"].split(",")[0].strip()
  165. except (KeyError, IndexError):
  166. pass
  167. try:
  168. return environ["HTTP_X_REAL_IP"]
  169. except KeyError:
  170. pass
  171. return environ.get("REMOTE_ADDR")
  172. def _capture_exception():
  173. # type: () -> ExcInfo
  174. """
  175. Captures the current exception and sends it to Sentry.
  176. Returns the ExcInfo tuple to it can be reraised afterwards.
  177. """
  178. exc_info = sys.exc_info()
  179. e = exc_info[1]
  180. # SystemExit(0) is the only uncaught exception that is expected behavior
  181. should_skip_capture = isinstance(e, SystemExit) and e.code in (0, None)
  182. if not should_skip_capture:
  183. event, hint = event_from_exception(
  184. exc_info,
  185. client_options=sentry_sdk.get_client().options,
  186. mechanism={"type": "wsgi", "handled": False},
  187. )
  188. sentry_sdk.capture_event(event, hint=hint)
  189. return exc_info
  190. class _ScopedResponse:
  191. """
  192. Users a separate scope for each response chunk.
  193. This will make WSGI apps more tolerant against:
  194. - WSGI servers streaming responses from a different thread/from
  195. different threads than the one that called start_response
  196. - close() not being called
  197. - WSGI servers streaming responses interleaved from the same thread
  198. """
  199. __slots__ = ("_response", "_scope")
  200. def __init__(self, scope, response):
  201. # type: (sentry_sdk.scope.Scope, Iterator[bytes]) -> None
  202. self._scope = scope
  203. self._response = response
  204. def __iter__(self):
  205. # type: () -> Iterator[bytes]
  206. iterator = iter(self._response)
  207. while True:
  208. with use_isolation_scope(self._scope):
  209. try:
  210. chunk = next(iterator)
  211. except StopIteration:
  212. break
  213. except BaseException:
  214. reraise(*_capture_exception())
  215. yield chunk
  216. def close(self):
  217. # type: () -> None
  218. with use_isolation_scope(self._scope):
  219. try:
  220. self._response.close() # type: ignore
  221. except AttributeError:
  222. pass
  223. except BaseException:
  224. reraise(*_capture_exception())
  225. def _make_wsgi_event_processor(environ, use_x_forwarded_for):
  226. # type: (Dict[str, str], bool) -> EventProcessor
  227. # It's a bit unfortunate that we have to extract and parse the request data
  228. # from the environ so eagerly, but there are a few good reasons for this.
  229. #
  230. # We might be in a situation where the scope never gets torn down
  231. # properly. In that case we will have an unnecessary strong reference to
  232. # all objects in the environ (some of which may take a lot of memory) when
  233. # we're really just interested in a few of them.
  234. #
  235. # Keeping the environment around for longer than the request lifecycle is
  236. # also not necessarily something uWSGI can deal with:
  237. # https://github.com/unbit/uwsgi/issues/1950
  238. client_ip = get_client_ip(environ)
  239. request_url = get_request_url(environ, use_x_forwarded_for)
  240. query_string = environ.get("QUERY_STRING")
  241. method = environ.get("REQUEST_METHOD")
  242. env = dict(_get_environ(environ))
  243. headers = _filter_headers(dict(_get_headers(environ)))
  244. def event_processor(event, hint):
  245. # type: (Event, Dict[str, Any]) -> Event
  246. with capture_internal_exceptions():
  247. # if the code below fails halfway through we at least have some data
  248. request_info = event.setdefault("request", {})
  249. if should_send_default_pii():
  250. user_info = event.setdefault("user", {})
  251. if client_ip:
  252. user_info.setdefault("ip_address", client_ip)
  253. request_info["url"] = request_url
  254. request_info["query_string"] = query_string
  255. request_info["method"] = method
  256. request_info["env"] = env
  257. request_info["headers"] = headers
  258. return event
  259. return event_processor