monitor.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import os
  2. import time
  3. from threading import Thread, Lock
  4. import sentry_sdk
  5. from sentry_sdk.utils import logger
  6. from typing import TYPE_CHECKING
  7. if TYPE_CHECKING:
  8. from typing import Optional
  9. MAX_DOWNSAMPLE_FACTOR = 10
  10. class Monitor:
  11. """
  12. Performs health checks in a separate thread once every interval seconds
  13. and updates the internal state. Other parts of the SDK only read this state
  14. and act accordingly.
  15. """
  16. name = "sentry.monitor"
  17. def __init__(self, transport, interval=10):
  18. # type: (sentry_sdk.transport.Transport, float) -> None
  19. self.transport = transport # type: sentry_sdk.transport.Transport
  20. self.interval = interval # type: float
  21. self._healthy = True
  22. self._downsample_factor = 0 # type: int
  23. self._thread = None # type: Optional[Thread]
  24. self._thread_lock = Lock()
  25. self._thread_for_pid = None # type: Optional[int]
  26. self._running = True
  27. def _ensure_running(self):
  28. # type: () -> None
  29. """
  30. Check that the monitor has an active thread to run in, or create one if not.
  31. Note that this might fail (e.g. in Python 3.12 it's not possible to
  32. spawn new threads at interpreter shutdown). In that case self._running
  33. will be False after running this function.
  34. """
  35. if self._thread_for_pid == os.getpid() and self._thread is not None:
  36. return None
  37. with self._thread_lock:
  38. if self._thread_for_pid == os.getpid() and self._thread is not None:
  39. return None
  40. def _thread():
  41. # type: (...) -> None
  42. while self._running:
  43. time.sleep(self.interval)
  44. if self._running:
  45. self.run()
  46. thread = Thread(name=self.name, target=_thread)
  47. thread.daemon = True
  48. try:
  49. thread.start()
  50. except RuntimeError:
  51. # Unfortunately at this point the interpreter is in a state that no
  52. # longer allows us to spawn a thread and we have to bail.
  53. self._running = False
  54. return None
  55. self._thread = thread
  56. self._thread_for_pid = os.getpid()
  57. return None
  58. def run(self):
  59. # type: () -> None
  60. self.check_health()
  61. self.set_downsample_factor()
  62. def set_downsample_factor(self):
  63. # type: () -> None
  64. if self._healthy:
  65. if self._downsample_factor > 0:
  66. logger.debug(
  67. "[Monitor] health check positive, reverting to normal sampling"
  68. )
  69. self._downsample_factor = 0
  70. else:
  71. if self.downsample_factor < MAX_DOWNSAMPLE_FACTOR:
  72. self._downsample_factor += 1
  73. logger.debug(
  74. "[Monitor] health check negative, downsampling with a factor of %d",
  75. self._downsample_factor,
  76. )
  77. def check_health(self):
  78. # type: () -> None
  79. """
  80. Perform the actual health checks,
  81. currently only checks if the transport is rate-limited.
  82. TODO: augment in the future with more checks.
  83. """
  84. self._healthy = self.transport.is_healthy()
  85. def is_healthy(self):
  86. # type: () -> bool
  87. self._ensure_running()
  88. return self._healthy
  89. @property
  90. def downsample_factor(self):
  91. # type: () -> int
  92. self._ensure_running()
  93. return self._downsample_factor
  94. def kill(self):
  95. # type: () -> None
  96. self._running = False