litellm.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. from typing import TYPE_CHECKING
  2. import sentry_sdk
  3. from sentry_sdk import consts
  4. from sentry_sdk.ai.monitoring import record_token_usage
  5. from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
  6. from sentry_sdk.consts import SPANDATA
  7. from sentry_sdk.integrations import DidNotEnable, Integration
  8. from sentry_sdk.scope import should_send_default_pii
  9. from sentry_sdk.utils import event_from_exception
  10. if TYPE_CHECKING:
  11. from typing import Any, Dict
  12. from datetime import datetime
  13. try:
  14. import litellm # type: ignore[import-not-found]
  15. except ImportError:
  16. raise DidNotEnable("LiteLLM not installed")
  17. def _get_metadata_dict(kwargs):
  18. # type: (Dict[str, Any]) -> Dict[str, Any]
  19. """Get the metadata dictionary from the kwargs."""
  20. litellm_params = kwargs.setdefault("litellm_params", {})
  21. # we need this weird little dance, as metadata might be set but may be None initially
  22. metadata = litellm_params.get("metadata")
  23. if metadata is None:
  24. metadata = {}
  25. litellm_params["metadata"] = metadata
  26. return metadata
  27. def _input_callback(kwargs):
  28. # type: (Dict[str, Any]) -> None
  29. """Handle the start of a request."""
  30. integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
  31. if integration is None:
  32. return
  33. # Get key parameters
  34. full_model = kwargs.get("model", "")
  35. try:
  36. model, provider, _, _ = litellm.get_llm_provider(full_model)
  37. except Exception:
  38. model = full_model
  39. provider = "unknown"
  40. messages = kwargs.get("messages", [])
  41. operation = "chat" if messages else "embeddings"
  42. # Start a new span/transaction
  43. span = get_start_span_function()(
  44. op=(
  45. consts.OP.GEN_AI_CHAT
  46. if operation == "chat"
  47. else consts.OP.GEN_AI_EMBEDDINGS
  48. ),
  49. name=f"{operation} {model}",
  50. origin=LiteLLMIntegration.origin,
  51. )
  52. span.__enter__()
  53. # Store span for later
  54. _get_metadata_dict(kwargs)["_sentry_span"] = span
  55. # Set basic data
  56. set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
  57. set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
  58. # Record messages if allowed
  59. if messages and should_send_default_pii() and integration.include_prompts:
  60. set_data_normalized(
  61. span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
  62. )
  63. # Record other parameters
  64. params = {
  65. "model": SPANDATA.GEN_AI_REQUEST_MODEL,
  66. "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
  67. "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
  68. "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
  69. "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
  70. "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
  71. "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
  72. }
  73. for key, attribute in params.items():
  74. value = kwargs.get(key)
  75. if value is not None:
  76. set_data_normalized(span, attribute, value)
  77. # Record LiteLLM-specific parameters
  78. litellm_params = {
  79. "api_base": kwargs.get("api_base"),
  80. "api_version": kwargs.get("api_version"),
  81. "custom_llm_provider": kwargs.get("custom_llm_provider"),
  82. }
  83. for key, value in litellm_params.items():
  84. if value is not None:
  85. set_data_normalized(span, f"gen_ai.litellm.{key}", value)
  86. def _success_callback(kwargs, completion_response, start_time, end_time):
  87. # type: (Dict[str, Any], Any, datetime, datetime) -> None
  88. """Handle successful completion."""
  89. span = _get_metadata_dict(kwargs).get("_sentry_span")
  90. if span is None:
  91. return
  92. integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
  93. if integration is None:
  94. return
  95. try:
  96. # Record model information
  97. if hasattr(completion_response, "model"):
  98. set_data_normalized(
  99. span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
  100. )
  101. # Record response content if allowed
  102. if should_send_default_pii() and integration.include_prompts:
  103. if hasattr(completion_response, "choices"):
  104. response_messages = []
  105. for choice in completion_response.choices:
  106. if hasattr(choice, "message"):
  107. if hasattr(choice.message, "model_dump"):
  108. response_messages.append(choice.message.model_dump())
  109. elif hasattr(choice.message, "dict"):
  110. response_messages.append(choice.message.dict())
  111. else:
  112. # Fallback for basic message objects
  113. msg = {}
  114. if hasattr(choice.message, "role"):
  115. msg["role"] = choice.message.role
  116. if hasattr(choice.message, "content"):
  117. msg["content"] = choice.message.content
  118. if hasattr(choice.message, "tool_calls"):
  119. msg["tool_calls"] = choice.message.tool_calls
  120. response_messages.append(msg)
  121. if response_messages:
  122. set_data_normalized(
  123. span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
  124. )
  125. # Record token usage
  126. if hasattr(completion_response, "usage"):
  127. usage = completion_response.usage
  128. record_token_usage(
  129. span,
  130. input_tokens=getattr(usage, "prompt_tokens", None),
  131. output_tokens=getattr(usage, "completion_tokens", None),
  132. total_tokens=getattr(usage, "total_tokens", None),
  133. )
  134. finally:
  135. # Always finish the span and clean up
  136. span.__exit__(None, None, None)
  137. def _failure_callback(kwargs, exception, start_time, end_time):
  138. # type: (Dict[str, Any], Exception, datetime, datetime) -> None
  139. """Handle request failure."""
  140. span = _get_metadata_dict(kwargs).get("_sentry_span")
  141. if span is None:
  142. return
  143. try:
  144. # Capture the exception
  145. event, hint = event_from_exception(
  146. exception,
  147. client_options=sentry_sdk.get_client().options,
  148. mechanism={"type": "litellm", "handled": False},
  149. )
  150. sentry_sdk.capture_event(event, hint=hint)
  151. finally:
  152. # Always finish the span and clean up
  153. span.__exit__(type(exception), exception, None)
  154. class LiteLLMIntegration(Integration):
  155. """
  156. LiteLLM integration for Sentry.
  157. This integration automatically captures LiteLLM API calls and sends them to Sentry
  158. for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM
  159. supports, including OpenAI, Anthropic, Google, Cohere, and many others.
  160. Features:
  161. - Automatic exception capture for all LiteLLM calls
  162. - Token usage tracking across all providers
  163. - Provider detection and attribution
  164. - Input/output message capture (configurable)
  165. - Streaming response support
  166. - Cost tracking integration
  167. Usage:
  168. ```python
  169. import litellm
  170. import sentry_sdk
  171. # Initialize Sentry with the LiteLLM integration
  172. sentry_sdk.init(
  173. dsn="your-dsn",
  174. send_default_pii=True
  175. integrations=[
  176. sentry_sdk.integrations.LiteLLMIntegration(
  177. include_prompts=True # Set to False to exclude message content
  178. )
  179. ]
  180. )
  181. # All LiteLLM calls will now be monitored
  182. response = litellm.completion(
  183. model="gpt-3.5-turbo",
  184. messages=[{"role": "user", "content": "Hello!"}]
  185. )
  186. ```
  187. Configuration:
  188. - include_prompts (bool): Whether to include prompts and responses in spans.
  189. Defaults to True. Set to False to exclude potentially sensitive data.
  190. """
  191. identifier = "litellm"
  192. origin = f"auto.ai.{identifier}"
  193. def __init__(self, include_prompts=True):
  194. # type: (LiteLLMIntegration, bool) -> None
  195. self.include_prompts = include_prompts
  196. @staticmethod
  197. def setup_once():
  198. # type: () -> None
  199. """Set up LiteLLM callbacks for monitoring."""
  200. litellm.input_callback = litellm.input_callback or []
  201. if _input_callback not in litellm.input_callback:
  202. litellm.input_callback.append(_input_callback)
  203. litellm.success_callback = litellm.success_callback or []
  204. if _success_callback not in litellm.success_callback:
  205. litellm.success_callback.append(_success_callback)
  206. litellm.failure_callback = litellm.failure_callback or []
  207. if _failure_callback not in litellm.failure_callback:
  208. litellm.failure_callback.append(_failure_callback)