From 985bd27e68b5a7e5f1fc15827a198c13ce7b7132 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 10 Apr 2024 16:54:04 -0400
Subject: [PATCH 01/41] Add the ability to put measurements on a specific span

---
 sentry_sdk/tracing.py | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 6e82d839db..f8eb6a4131 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -155,6 +155,28 @@ def add(self, span):
             self.spans.append(span)
 
 
+class MeasurementValue:
+    """A measurement is an indexed, numeric value on a span or transaction.
+    It can be something like score.total to represent a webvital value, or ai.total_tokens
+    to represent the number of tokens used during that span's execution.
+    """
+
+    __slots__ = "value", "unit"
+
+    def __init__(
+        self,
+        value,  # type: float,
+        unit=None,  # type: Optional[MeasurementUnit]
+    ):
+        self.value = value
+        self.unit = unit
+
+    def to_json(self):
+        rv = {"value": self.value}
+        if self.unit is not None:
+            rv["unit"] = self.unit
+
+
 class Span:
     """A span holds timing information of a block of code.
     Spans can have multiple child spans thus forming a span tree.
@@ -189,6 +211,7 @@ class Span:
         "sampled",
         "op",
         "description",
+        "_measurements",
         "start_timestamp",
         "_start_timestamp_monotonic_ns",
         "status",
@@ -229,6 +252,7 @@ def __init__(
         self.status = status
         self.hub = hub
         self.scope = scope
+        self._measurements = {}  # type: Dict[str, MeasurementValue]
         self._tags = {}  # type: MutableMapping[str, str]
         self._data = {}  # type: Dict[str, Any]
         self._containing_transaction = containing_transaction
@@ -488,6 +512,10 @@ def set_status(self, value):
         # type: (str) -> None
         self.status = value
 
+    def set_measurement(self, name, value, unit=""):
+        # type: (str, float, MeasurementUnit) -> None
+        self._measurements[name] = MeasurementValue(value, unit)
+
     def set_thread(self, thread_id, thread_name):
         # type: (Optional[int], Optional[str]) -> None
 
@@ -598,6 +626,9 @@ def to_json(self):
             if metrics_summary:
                 rv["_metrics_summary"] = metrics_summary
 
+        if len(self._measurements) > 0:
+            rv["measurements"] = {k: v.to_json() for k, v in self._measurements.items()}
+
         tags = self._tags
         if tags:
             rv["tags"] = tags
@@ -674,7 +705,7 @@ def __init__(
         self.source = source
         self.sample_rate = None  # type: Optional[float]
         self.parent_sampled = parent_sampled
-        self._measurements = {}  # type: Dict[str, Any]
+        self._measurements = {}  # type: Dict[str, MeasurementValue]
         self._contexts = {}  # type: Dict[str, Any]
         self._profile = None  # type: Optional[sentry_sdk.profiler.Profile]
         self._baggage = baggage
@@ -816,7 +847,7 @@ def finish(self, hub=None, end_timestamp=None):
             event["profile"] = self._profile
             self._profile = None
 
-        event["measurements"] = self._measurements
+        event["measurements"] = {k: v.to_json() for k, v in self._measurements.items()}
 
         # This is here since `to_json` is not invoked.  This really should
         # be gone when we switch to onlyspans.
@@ -829,7 +860,7 @@ def finish(self, hub=None, end_timestamp=None):
 
     def set_measurement(self, name, value, unit=""):
         # type: (str, float, MeasurementUnit) -> None
-        self._measurements[name] = {"value": value, "unit": unit}
+        self._measurements[name] = MeasurementValue(value, unit)
 
     def set_context(self, key, value):
         # type: (str, Any) -> None

From 068f0e3130dedc0a6c4b1d327918f454398dbd06 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 10 Apr 2024 16:59:15 -0400
Subject: [PATCH 02/41] Fix linting issues

---
 sentry_sdk/tracing.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index f8eb6a4131..f57e756d42 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -163,18 +163,17 @@ class MeasurementValue:
 
     __slots__ = "value", "unit"
 
-    def __init__(
-        self,
-        value,  # type: float,
-        unit=None,  # type: Optional[MeasurementUnit]
-    ):
+    def __init__(self, value, unit=None):
+        # type: (float, Optional[MeasurementUnit]) -> None
         self.value = value
         self.unit = unit
 
     def to_json(self):
+        # type: () -> dict[str, Any]
         rv = {"value": self.value}
         if self.unit is not None:
             rv["unit"] = self.unit
+        return rv
 
 
 class Span:

From 53b89e2e08a462abf07d7effeaa4a8bf57b71a88 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 10 Apr 2024 17:02:40 -0400
Subject: [PATCH 03/41] Use typed dict for performance

---
 sentry_sdk/tracing.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index f57e756d42..5bbdc0facb 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -155,25 +155,14 @@ def add(self, span):
             self.spans.append(span)
 
 
-class MeasurementValue:
+class MeasurementValue(TypedDict):
     """A measurement is an indexed, numeric value on a span or transaction.
     It can be something like score.total to represent a webvital value, or ai.total_tokens
     to represent the number of tokens used during that span's execution.
     """
 
-    __slots__ = "value", "unit"
-
-    def __init__(self, value, unit=None):
-        # type: (float, Optional[MeasurementUnit]) -> None
-        self.value = value
-        self.unit = unit
-
-    def to_json(self):
-        # type: () -> dict[str, Any]
-        rv = {"value": self.value}
-        if self.unit is not None:
-            rv["unit"] = self.unit
-        return rv
+    value: float
+    unit: Optional[MeasurementUnit]
 
 
 class Span:
@@ -513,7 +502,7 @@ def set_status(self, value):
 
     def set_measurement(self, name, value, unit=""):
         # type: (str, float, MeasurementUnit) -> None
-        self._measurements[name] = MeasurementValue(value, unit)
+        self._measurements[name] = {"value": value, "unit": unit}
 
     def set_thread(self, thread_id, thread_name):
         # type: (Optional[int], Optional[str]) -> None
@@ -626,7 +615,7 @@ def to_json(self):
                 rv["_metrics_summary"] = metrics_summary
 
         if len(self._measurements) > 0:
-            rv["measurements"] = {k: v.to_json() for k, v in self._measurements.items()}
+            rv["measurements"] = self._measurements
 
         tags = self._tags
         if tags:
@@ -846,7 +835,7 @@ def finish(self, hub=None, end_timestamp=None):
             event["profile"] = self._profile
             self._profile = None
 
-        event["measurements"] = {k: v.to_json() for k, v in self._measurements.items()}
+        event["measurements"] = self._measurements
 
         # This is here since `to_json` is not invoked.  This really should
         # be gone when we switch to onlyspans.
@@ -859,7 +848,7 @@ def finish(self, hub=None, end_timestamp=None):
 
     def set_measurement(self, name, value, unit=""):
         # type: (str, float, MeasurementUnit) -> None
-        self._measurements[name] = MeasurementValue(value, unit)
+        self._measurements[name] = {"value": value, "unit": unit}
 
     def set_context(self, key, value):
         # type: (str, Any) -> None

From 16dcf0542809e109b160f768427faf27666133a8 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 10 Apr 2024 17:09:43 -0400
Subject: [PATCH 04/41] Move typed dict into _types

---
 sentry_sdk/_types.py  | 70 ++++++++++++++++++++++++-------------------
 sentry_sdk/tracing.py | 13 +-------
 2 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py
index 1577dbde4f..7117213944 100644
--- a/sentry_sdk/_types.py
+++ b/sentry_sdk/_types.py
@@ -28,6 +28,45 @@
     # "critical" is an alias of "fatal" recognized by Relay
     LogLevelStr = Literal["fatal", "critical", "error", "warning", "info", "debug"]
 
+    DurationUnit = Literal[
+        "nanosecond",
+        "microsecond",
+        "millisecond",
+        "second",
+        "minute",
+        "hour",
+        "day",
+        "week",
+    ]
+
+    InformationUnit = Literal[
+        "bit",
+        "byte",
+        "kilobyte",
+        "kibibyte",
+        "megabyte",
+        "mebibyte",
+        "gigabyte",
+        "gibibyte",
+        "terabyte",
+        "tebibyte",
+        "petabyte",
+        "pebibyte",
+        "exabyte",
+        "exbibyte",
+    ]
+
+    FractionUnit = Literal["ratio", "percent"]
+    MeasurementUnit = Union[DurationUnit, InformationUnit, FractionUnit, str]
+
+    MeasurementValue = TypedDict(
+        "MeasurementValue",
+        {
+            "value": float,
+            "unit": Optional[MeasurementUnit],
+        },
+    )
+
     Event = TypedDict(
         "Event",
         {
@@ -118,37 +157,6 @@
     ]
     SessionStatus = Literal["ok", "exited", "crashed", "abnormal"]
 
-    DurationUnit = Literal[
-        "nanosecond",
-        "microsecond",
-        "millisecond",
-        "second",
-        "minute",
-        "hour",
-        "day",
-        "week",
-    ]
-
-    InformationUnit = Literal[
-        "bit",
-        "byte",
-        "kilobyte",
-        "kibibyte",
-        "megabyte",
-        "mebibyte",
-        "gigabyte",
-        "gibibyte",
-        "terabyte",
-        "tebibyte",
-        "petabyte",
-        "pebibyte",
-        "exabyte",
-        "exbibyte",
-    ]
-
-    FractionUnit = Literal["ratio", "percent"]
-    MeasurementUnit = Union[DurationUnit, InformationUnit, FractionUnit, str]
-
     ProfilerMode = Literal["sleep", "thread", "gevent", "unknown"]
 
     # Type of the metric.
diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 5bbdc0facb..7cfc8e4fbb 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -10,8 +10,7 @@
     logger,
     nanosecond_time,
 )
-from sentry_sdk._types import TYPE_CHECKING
-
+from sentry_sdk._types import TYPE_CHECKING, MeasurementValue
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Mapping, MutableMapping
@@ -155,16 +154,6 @@ def add(self, span):
             self.spans.append(span)
 
 
-class MeasurementValue(TypedDict):
-    """A measurement is an indexed, numeric value on a span or transaction.
-    It can be something like score.total to represent a webvital value, or ai.total_tokens
-    to represent the number of tokens used during that span's execution.
-    """
-
-    value: float
-    unit: Optional[MeasurementUnit]
-
-
 class Span:
     """A span holds timing information of a block of code.
     Spans can have multiple child spans thus forming a span tree.

From cb3a2371b8baf6301dbc1508062727a038768944 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 10 Apr 2024 17:11:29 -0400
Subject: [PATCH 05/41] Add type to measurements in event

---
 sentry_sdk/_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py
index 7117213944..fd0747eef3 100644
--- a/sentry_sdk/_types.py
+++ b/sentry_sdk/_types.py
@@ -88,7 +88,7 @@
             "level": LogLevelStr,
             "logentry": Mapping[str, object],
             "logger": str,
-            "measurements": dict[str, object],
+            "measurements": dict[str, MeasurementValue],
             "message": str,
             "modules": dict[str, str],
             "monitor_config": Mapping[str, object],

From 8550914a96dd425696b2ae56853bcf9d5b568dc9 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 10 Apr 2024 17:13:36 -0400
Subject: [PATCH 06/41] Fix test

---
 sentry_sdk/tracing.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 7cfc8e4fbb..36aab2896c 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -10,7 +10,7 @@
     logger,
     nanosecond_time,
 )
-from sentry_sdk._types import TYPE_CHECKING, MeasurementValue
+from sentry_sdk._types import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Mapping, MutableMapping
@@ -31,7 +31,12 @@
     R = TypeVar("R")
 
     import sentry_sdk.profiler
-    from sentry_sdk._types import Event, MeasurementUnit, SamplingContext
+    from sentry_sdk._types import (
+        Event,
+        MeasurementUnit,
+        SamplingContext,
+        MeasurementValue,
+    )
 
     class SpanKwargs(TypedDict, total=False):
         trace_id: str

From eb30752af32b3506db0d3153c9cb7edc12809763 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Tue, 26 Mar 2024 13:58:05 -0400
Subject: [PATCH 07/41] Initial work for langchain integration

---
 mypy.ini                                      |   2 +
 .../split-tox-gh-actions.py                   |   1 +
 sentry_sdk/consts.py                          |  42 ++++
 sentry_sdk/integrations/__init__.py           |   1 +
 sentry_sdk/integrations/langchain.py          | 190 ++++++++++++++++++
 sentry_sdk/integrations/openai.py             |  26 ++-
 setup.py                                      |   1 +
 tox.ini                                       |  10 +
 8 files changed, 259 insertions(+), 14 deletions(-)
 create mode 100644 sentry_sdk/integrations/langchain.py

diff --git a/mypy.ini b/mypy.ini
index c1444d61e5..844e140de2 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -48,6 +48,8 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-asgiref.*]
 ignore_missing_imports = True
+[mypy-langchain_core.*]
+ignore_missing_imports = True
 [mypy-executing.*]
 ignore_missing_imports = True
 [mypy-asttokens.*]
diff --git a/scripts/split-tox-gh-actions/split-tox-gh-actions.py b/scripts/split-tox-gh-actions/split-tox-gh-actions.py
index 6b456c5544..288725d2c5 100755
--- a/scripts/split-tox-gh-actions/split-tox-gh-actions.py
+++ b/scripts/split-tox-gh-actions/split-tox-gh-actions.py
@@ -70,6 +70,7 @@
         "beam",
         "celery",
         "huey",
+        "langchain",
         "openai",
         "rq",
     ],
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index b72701daed..515739de12 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -91,6 +91,48 @@ class SPANDATA:
     See: https://develop.sentry.dev/sdk/performance/span-data-conventions/
     """
 
+    AI_INPUT_MESSAGES = "ai.input_messages"
+    """
+    The input messages to an LLM call.
+    Example: [{"role": "user", "message": "hello"}]
+    """
+
+    AI_COMPLETION_TOKENS_USED = "ai.completion_tokens.used"
+    """
+    The number of tokens used to respond to an AI model request
+    Example: 10
+    """
+
+    AI_PROMPT_TOKENS_USED = "ai.prompt_tokens.used"
+    """
+    The number of tokens used to process the input text to an AI model request
+    Example: 20
+    """
+
+    AI_TOTAL_TOKENS_USED = "ai.total_tokens.used"
+    """
+    The number of tokens used in total to process an AI model request
+    Example: 30
+    """
+
+    AI_MODEL_ID = "ai.model_id"
+    """
+    The unique descriptor of the model being execugted
+    Example: gpt-4
+    """
+
+    AI_STREAMING = "ai.streaming"
+    """
+    Whether or not the AI model call's repsonse was streamed back asynchronously
+    Example: true
+    """
+
+    AI_RESPONSES = "ai.responses"
+    """
+    The responses to an AI model call. Always as a list.
+    Example: ["hello", "world"]
+    """
+
     DB_NAME = "db.name"
     """
     The name of the database being accessed. For commands that switch the database, this should be set to the target database (even if the command fails).
diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py
index b0ec5e2d3e..f692e88294 100644
--- a/sentry_sdk/integrations/__init__.py
+++ b/sentry_sdk/integrations/__init__.py
@@ -85,6 +85,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
     "sentry_sdk.integrations.graphene.GrapheneIntegration",
     "sentry_sdk.integrations.httpx.HttpxIntegration",
     "sentry_sdk.integrations.huey.HueyIntegration",
+    "sentry_sdk.integrations.langchain.LangchainIntegration",
     "sentry_sdk.integrations.loguru.LoguruIntegration",
     "sentry_sdk.integrations.openai.OpenAIIntegration",
     "sentry_sdk.integrations.pymongo.PyMongoIntegration",
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
new file mode 100644
index 0000000000..e423c213e0
--- /dev/null
+++ b/sentry_sdk/integrations/langchain.py
@@ -0,0 +1,190 @@
+from functools import wraps
+
+from sentry_sdk._types import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, List, Callable, Dict, Union
+from sentry_sdk.hub import Hub
+from sentry_sdk.integrations import DidNotEnable, Integration
+from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception
+
+try:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.messages import BaseMessage
+    from langchain_core.outputs import LLMResult
+    from langchain_core.callbacks import (
+        manager,
+        BaseCallbackHandler,
+    )
+except ImportError:
+    raise DidNotEnable("langchain not installed")
+
+try:
+    import tiktoken  # type: ignore
+
+    enc = tiktoken.get_encoding("cl100k_base")
+
+    def count_tokens(s):
+        # type: (str) -> int
+        return len(enc.encode_ordinary(s))
+
+    logger.debug("[langchain] using tiktoken to count tokens")
+except ImportError:
+    logger.info(
+        "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some Langchain APIs"
+        "Please install 'tiktoken' if you aren't receiving token usage in Sentry."
+        "See https://docs.sentry.io/platforms/python/integrations/langchain/ for more information."
+    )
+
+    def count_tokens(s):
+        # type: (str) -> int
+        return 0
+
+
+class LangchainIntegration(Integration):
+    identifier = "langchain"
+
+    def __init__(self, include_prompts=False):
+        # type: (LangchainIntegration, bool) -> None
+        self.include_prompts = include_prompts
+
+    @staticmethod
+    def setup_once():
+        # type: () -> None
+        manager._configure = _wrap_configure(manager._configure)
+
+
+def _capture_exception(hub, exc, type="langchain"):
+    # type: (Hub, Any, str) -> None
+
+    if hub.client is not None:
+        event, hint = event_from_exception(
+            exc,
+            client_options=hub.client.options,
+            mechanism={"type": type, "handled": False},
+        )
+        hub.capture_event(event, hint=hint)
+
+
+# TODO types
+class SentryLangchainCallback(BaseCallbackHandler):
+    """Base callback handler that can be used to handle callbacks from langchain."""
+
+    def on_llm_start(self, serialized, prompts, **kwargs):
+        # type: (Dict[str, Any], List[str], **Any) -> Any
+        """Run when LLM starts running."""
+        print("on_llm_start")
+
+    def on_chat_model_start(self, serialized, messages, **kwargs):
+        # type: (Dict[str, Any], List[List[BaseMessage]], **Any) -> Any
+        """Run when Chat Model starts running."""
+        print("on_chat_model_start")
+
+    def on_llm_new_token(self, token, **kwargs):
+        # type: (str, **Any) -> Any
+        """Run on new LLM token. Only available when streaming is enabled."""
+        print("new token")
+
+    def on_llm_end(self, response, **kwargs):
+        # type: (LLMResult, **Any) -> Any
+        """Run when LLM ends running."""
+        print("llm end")
+
+    def on_llm_error(self, error, **kwargs):
+        # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
+        """Run when LLM errors."""
+        hub = Hub.current
+        if hub:
+            _capture_exception(hub, error, "langchain-llm")
+
+    def on_chain_start(self, serialized, inputs, **kwargs):
+        # type: (Dict[str, Any], Dict[str, Any], **kwargs) -> Any
+        """Run when chain starts running."""
+        print("chain start: ", serialized)
+
+    def on_chain_end(self, outputs, **kwargs):
+        # type: (Dict[str, Any], **Any) -> Any
+        """Run when chain ends running."""
+        print("chain end")
+
+    def on_chain_error(self, error, **kwargs):
+        # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
+        """Run when chain errors."""
+        hub = Hub.current
+        if hub:
+            _capture_exception(hub, error, "langchain-chain")
+
+    def on_tool_start(self, serialized, input_str, **kwargs):
+        # type: (Dict[str, Any], str, **Any) -> Any
+        """Run when tool starts running."""
+        print("tool_start")
+
+    def on_tool_end(self, output, **kwargs):
+        # type: (str, **Any) -> Any
+        """Run when tool ends running."""
+        print("tool_end", output)
+
+    def on_tool_error(self, error, **kwargs):
+        # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
+        """Run when tool errors."""
+        hub = Hub.current
+        if hub:
+            _capture_exception(hub, error, "langchain-tool")
+
+    def on_text(self, text, **kwargs):
+        # type: (str, Any) -> Any
+        """Run on arbitrary text."""
+        print("text: ", text)
+
+    def on_agent_action(self, action, **kwargs):
+        # type: (AgentAction, **Any) -> Any
+        """Run on agent action."""
+        print("agent_action", action)
+
+    def on_agent_finish(self, finish, **kwargs):
+        # type: (AgentFinish, **Any) -> Any
+        """Run on agent end."""
+        print("agent_finish", finish)
+
+
+def _wrap_configure(f):
+    # type: (Callable[..., Any]) -> Callable[..., Any]
+
+    @wraps(f)
+    def new_configure(*args, **kwargs):
+        # type: (*Any, **Any) -> Any
+
+        with capture_internal_exceptions():
+            new_callbacks = []
+            if "local_callbacks" in kwargs:
+                existing_callbacks = kwargs["local_callbacks"]
+                kwargs["local_callbacks"] = new_callbacks
+            elif len(args) > 2:
+                existing_callbacks = args[2]
+                args = (
+                    args[0],
+                    args[1],
+                    new_callbacks,
+                ) + args[3:]
+            else:
+                existing_callbacks = []
+
+            if existing_callbacks:
+                if isinstance(existing_callbacks, list):
+                    for cb in existing_callbacks:
+                        new_callbacks.append(cb)
+                elif isinstance(existing_callbacks, BaseCallbackHandler):
+                    new_callbacks.append(existing_callbacks)
+                else:
+                    logger.warn("Unknown callback type: %s", existing_callbacks)
+
+            already_added = False
+            for callback in new_callbacks:
+                if isinstance(callback, SentryLangchainCallback):
+                    already_added = True
+
+            if not already_added:
+                new_callbacks.append(SentryLangchainCallback())
+        return f(*args, **kwargs)
+
+    return new_configure
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 0d77a27ec0..4460eb8828 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -2,6 +2,7 @@
 
 from sentry_sdk import consts
 from sentry_sdk._types import TYPE_CHECKING
+from sentry_sdk.consts import SPANDATA
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, Iterator
@@ -48,11 +49,6 @@ def count_tokens(s):
         return 0
 
 
-COMPLETION_TOKENS_USED = "ai.completion_tоkens.used"
-PROMPT_TOKENS_USED = "ai.prompt_tоkens.used"
-TOTAL_TOKENS_USED = "ai.total_tоkens.used"
-
-
 class OpenAIIntegration(Integration):
     identifier = "openai"
 
@@ -138,11 +134,11 @@ def _calculate_chat_completion_usage(
         total_tokens = prompt_tokens + completion_tokens
 
     if completion_tokens != 0:
-        set_data_normalized(span, COMPLETION_TOKENS_USED, completion_tokens)
+        set_data_normalized(span, SPANDATA.AI_COMPLETION_TOKENS_USED, completion_tokens)
     if prompt_tokens != 0:
-        set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens)
+        set_data_normalized(span, SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens)
     if total_tokens != 0:
-        set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens)
+        set_data_normalized(span, SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens)
 
 
 def _wrap_chat_completion_create(f):
@@ -181,10 +177,10 @@ def new_chat_completion(*args, **kwargs):
 
         with capture_internal_exceptions():
             if should_send_default_pii() and integration.include_prompts:
-                set_data_normalized(span, "ai.input_messages", messages)
+                set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, messages)
 
-            set_data_normalized(span, "ai.model_id", model)
-            set_data_normalized(span, "ai.streaming", streaming)
+            set_data_normalized(span, SPANDATA.AI_MODEL_ID, model)
+            set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
 
             if hasattr(res, "choices"):
                 if should_send_default_pii() and integration.include_prompts:
@@ -224,7 +220,9 @@ def new_iterator():
                                 should_send_default_pii()
                                 and integration.include_prompts
                             ):
-                                set_data_normalized(span, "ai.responses", all_responses)
+                                set_data_normalized(
+                                    span, SPANDATA.AI_RESPONSES, all_responses
+                                )
                             _calculate_chat_completion_usage(
                                 messages, res, span, all_responses
                             )
@@ -288,8 +286,8 @@ def new_embeddings_create(*args, **kwargs):
             if total_tokens == 0:
                 total_tokens = prompt_tokens
 
-            set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens)
-            set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens)
+            set_data_normalized(span, SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens)
+            set_data_normalized(span, SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens)
 
             return response
 
diff --git a/setup.py b/setup.py
index 037a621ddf..bef9842119 100644
--- a/setup.py
+++ b/setup.py
@@ -59,6 +59,7 @@ def get_file_text(file_name):
         "grpcio": ["grpcio>=1.21.1"],
         "httpx": ["httpx>=0.16.0"],
         "huey": ["huey>=2"],
+        "langchain": ["langchain>=0.0.210"],
         "loguru": ["loguru>=0.5"],
         "openai": ["openai>=1.0.0", "tiktoken>=0.3.0"],
         "opentelemetry": ["opentelemetry-distro>=0.35b0"],
diff --git a/tox.ini b/tox.ini
index e193de52b1..37abd54277 100644
--- a/tox.ini
+++ b/tox.ini
@@ -140,6 +140,11 @@ envlist =
     {py3.6,py3.11,py3.12}-huey-v{2.0}
     {py3.6,py3.11,py3.12}-huey-latest
 
+    # Langchain
+    {py3.9,3.11,3.12}-langchain-0.0
+    {py3.9,3.11,3.12}-langchain-0.1
+    {py3.9,3.11,3.12}-langchain-latest
+
     # Loguru
     {py3.6,py3.11,py3.12}-loguru-v{0.5}
     {py3.6,py3.11,py3.12}-loguru-latest
@@ -437,6 +442,11 @@ deps =
     huey-v2.0: huey~=2.0.0
     huey-latest: huey
 
+    # Langchain
+    langchain-0.0: langchain~=0.0.210
+    langchain-0.1: langchain~=0.1.11
+    langchain-latest: langchain
+
     # Loguru
     loguru-v0.5: loguru~=0.5.0
     loguru-latest: loguru

From 26a9b9a78c1edb0493a374506f90302755d35c49 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Tue, 26 Mar 2024 14:01:44 -0400
Subject: [PATCH 08/41] Add more langchain tox targets

---
 .github/workflows/test-integrations-data-processing.yml | 8 ++++++++
 tox.ini                                                 | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/.github/workflows/test-integrations-data-processing.yml b/.github/workflows/test-integrations-data-processing.yml
index ebcd89efea..1f618bd93d 100644
--- a/.github/workflows/test-integrations-data-processing.yml
+++ b/.github/workflows/test-integrations-data-processing.yml
@@ -58,6 +58,10 @@ jobs:
         run: |
           set -x # print commands that are executed
           ./scripts/runtox.sh "py${{ matrix.python-version }}-huey-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch
+      - name: Test langchain latest
+        run: |
+          set -x # print commands that are executed
+          ./scripts/runtox.sh "py${{ matrix.python-version }}-langchain-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch
       - name: Test openai latest
         run: |
           set -x # print commands that are executed
@@ -114,6 +118,10 @@ jobs:
         run: |
           set -x # print commands that are executed
           ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch
+      - name: Test langchain pinned
+        run: |
+          set -x # print commands that are executed
+          ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-langchain" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch
       - name: Test openai pinned
         run: |
           set -x # print commands that are executed
diff --git a/tox.ini b/tox.ini
index 37abd54277..96f75e84b1 100644
--- a/tox.ini
+++ b/tox.ini
@@ -444,8 +444,12 @@ deps =
 
     # Langchain
     langchain-0.0: langchain~=0.0.210
+    langchain-0.0: tiktoken~=0.6.0
     langchain-0.1: langchain~=0.1.11
+    langchain-0.1: tiktoken~=0.6.0
     langchain-latest: langchain
+    langchain-latest: tiktoken~=0.6.0
+    langchain-notiktoken: langchain
 
     # Loguru
     loguru-v0.5: loguru~=0.5.0

From a491d3f26ca2caa3b1bd428f88eba321e4455f62 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Tue, 26 Mar 2024 15:53:54 -0400
Subject: [PATCH 09/41] Add an LRU cache of spans and send things upstream

---
 sentry_sdk/consts.py                 |   2 +
 sentry_sdk/integrations/langchain.py | 195 +++++++++++++++++++--------
 2 files changed, 140 insertions(+), 57 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 515739de12..789563a905 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -287,6 +287,8 @@ class OP:
     MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send"
     OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai"
     OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai"
+    LANGCHAIN_INFERENCE = "ai.inference.langchain"
+    LANGCHAIN_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.langchain"
     QUEUE_SUBMIT_ARQ = "queue.submit.arq"
     QUEUE_TASK_ARQ = "queue.task.arq"
     QUEUE_SUBMIT_CELERY = "queue.submit.celery"
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index e423c213e0..f9478baf30 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -1,10 +1,14 @@
+from collections import OrderedDict
 from functools import wraps
 
+import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
+from sentry_sdk.consts import OP
+from sentry_sdk.tracing import Span
 
 if TYPE_CHECKING:
-    from typing import Any, List, Callable, Dict, Union
-from sentry_sdk.hub import Hub
+    from typing import Any, List, Callable, Dict, Union, Optional
+    from uuid import UUID
 from sentry_sdk.integrations import DidNotEnable, Integration
 from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception
 
@@ -44,9 +48,13 @@ def count_tokens(s):
 class LangchainIntegration(Integration):
     identifier = "langchain"
 
-    def __init__(self, include_prompts=False):
+    # The most number of spans (e.g., LLM calls) that can be processed at the same time.
+    max_spans = 1024
+
+    def __init__(self, include_prompts=False, max_spans=1024):
         # type: (LangchainIntegration, bool) -> None
         self.include_prompts = include_prompts
+        self.max_spans = max_spans
 
     @staticmethod
     def setup_once():
@@ -54,97 +62,168 @@ def setup_once():
         manager._configure = _wrap_configure(manager._configure)
 
 
-def _capture_exception(hub, exc, type="langchain"):
-    # type: (Hub, Any, str) -> None
+def _capture_exception(exc, type="langchain"):
+    # type: (Any, str) -> None
 
-    if hub.client is not None:
-        event, hint = event_from_exception(
-            exc,
-            client_options=hub.client.options,
-            mechanism={"type": type, "handled": False},
-        )
-        hub.capture_event(event, hint=hint)
+    event, hint = event_from_exception(
+        exc,
+        client_options=sentry_sdk.get_client().options,
+        mechanism={"type": type, "handled": False},
+    )
+    sentry_sdk.capture_event(event, hint=hint)
+
+
+class WatchedSpan:
+    span = None  # type: Span
+    num_tokens = 0  # type: int
+
+    def __init__(self, span, num_tokens=0):
+        # type: (Span, int) -> None
+        self.span = span
+        self.num_tokens = num_tokens
 
 
-# TODO types
 class SentryLangchainCallback(BaseCallbackHandler):
     """Base callback handler that can be used to handle callbacks from langchain."""
 
-    def on_llm_start(self, serialized, prompts, **kwargs):
-        # type: (Dict[str, Any], List[str], **Any) -> Any
+    span_map = OrderedDict()  # type: OrderedDict[UUID, WatchedSpan]
+
+    max_span_map_size = 0
+
+    def __init__(self, max_span_map_size):
+        self.max_span_map_size = max_span_map_size
+
+    def gc_span_map(self):
+        while len(self.span_map) > self.max_span_map_size:
+            self.span_map.popitem(last=False)
+
+    def on_llm_start(
+        self,
+        serialized,
+        prompts,
+        *,
+        run_id,
+        tags=None,
+        parent_run_id=None,
+        metadata=None,
+        name=None,
+        **kwargs,
+    ):
+        # type: (Dict[str, Any], List[str], *Any, UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Optional[str], **Any) -> Any
         """Run when LLM starts running."""
-        print("on_llm_start")
+        if not run_id:
+            return
+        span = sentry_sdk.start_span(
+            op=OP.LANGCHAIN_INFERENCE, description="Langchain LLM call"
+        )
+        self.span_map[run_id] = WatchedSpan(span)
+        self.gc_span_map()
+        span.__enter__()
 
-    def on_chat_model_start(self, serialized, messages, **kwargs):
-        # type: (Dict[str, Any], List[List[BaseMessage]], **Any) -> Any
+    def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
+        # type: (Dict[str, Any], List[List[BaseMessage]], *Any, UUID, **Any) -> Any
         """Run when Chat Model starts running."""
-        print("on_chat_model_start")
+        if not run_id:
+            return
+        span = sentry_sdk.start_span(
+            op=OP.LANGCHAIN_INFERENCE, description="Langchain LLM call"
+        )
+        self.span_map[run_id] = WatchedSpan(span)
+        self.gc_span_map()
+        span.__enter__()
 
-    def on_llm_new_token(self, token, **kwargs):
-        # type: (str, **Any) -> Any
+    def on_llm_new_token(self, token, *, run_id, **kwargs):
+        # type: (str, *Any, UUID, **Any) -> Any
         """Run on new LLM token. Only available when streaming is enabled."""
-        print("new token")
-
-    def on_llm_end(self, response, **kwargs):
-        # type: (LLMResult, **Any) -> Any
+        if not run_id or not self.span_map[run_id]:
+            return
+        span_data = self.span_map[run_id]
+        if not span_data:
+            return
+        span_data.num_tokens += count_tokens(token)
+
+    def on_llm_end(self, response, *, run_id, **kwargs):
+        # type: (LLMResult, *Any, UUID, **Any) -> Any
         """Run when LLM ends running."""
+        if not run_id:
+            return
+
+        span_data = self.span_map[run_id]
+        if not span_data:
+            return
+        span_data.span.__exit__(None, None, None)
+
         print("llm end")
 
-    def on_llm_error(self, error, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
+    def on_llm_error(self, error, *, run_id, **kwargs):
+        # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
         """Run when LLM errors."""
-        hub = Hub.current
-        if hub:
-            _capture_exception(hub, error, "langchain-llm")
+        _capture_exception(error, "langchain-llm")
 
-    def on_chain_start(self, serialized, inputs, **kwargs):
-        # type: (Dict[str, Any], Dict[str, Any], **kwargs) -> Any
+    def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
+        # type: (Dict[str, Any], Dict[str, Any], *Any, UUID, **Any) -> Any
         """Run when chain starts running."""
-        print("chain start: ", serialized)
+        if not run_id:
+            return
+        span = sentry_sdk.start_span(
+            op=OP.LANGCHAIN_INFERENCE, description="Langchain chain execution"
+        )
+        self.span_map[run_id] = WatchedSpan(span)
+        self.gc_span_map()
+        span.__enter__()
 
-    def on_chain_end(self, outputs, **kwargs):
-        # type: (Dict[str, Any], **Any) -> Any
+    def on_chain_end(self, outputs, *, run_id, **kwargs):
+        # type: (Dict[str, Any], *Any, UUID, **Any) -> Any
         """Run when chain ends running."""
+        if not run_id or not self.span_map[run_id]:
+            return
+
+        span_data = self.span_map[run_id]
+        if not span_data:
+            return
+        span_data.span.__exit__(None, None, None)
         print("chain end")
 
     def on_chain_error(self, error, **kwargs):
         # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
         """Run when chain errors."""
-        hub = Hub.current
-        if hub:
-            _capture_exception(hub, error, "langchain-chain")
+        _capture_exception(error, "langchain-chain")
 
-    def on_tool_start(self, serialized, input_str, **kwargs):
-        # type: (Dict[str, Any], str, **Any) -> Any
+    def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
+        # type: (Dict[str, Any], str, *Any, UUID, **Any) -> Any
         """Run when tool starts running."""
+        if not run_id:
+            return
         print("tool_start")
 
-    def on_tool_end(self, output, **kwargs):
-        # type: (str, **Any) -> Any
+    def on_tool_end(self, output, *, run_id, **kwargs):
+        # type: (str, *Any, UUID, **Any) -> Any
         """Run when tool ends running."""
+        if not run_id or not self.span_map[run_id]:
+            return
+
+        span_data = self.span_map[run_id]
+        if not span_data:
+            return
+        span_data.span.__exit__(None, None, None)
         print("tool_end", output)
 
     def on_tool_error(self, error, **kwargs):
         # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
         """Run when tool errors."""
-        hub = Hub.current
-        if hub:
-            _capture_exception(hub, error, "langchain-tool")
+        _capture_exception(error, "langchain-tool")
 
-    def on_text(self, text, **kwargs):
-        # type: (str, Any) -> Any
-        """Run on arbitrary text."""
-        print("text: ", text)
-
-    def on_agent_action(self, action, **kwargs):
-        # type: (AgentAction, **Any) -> Any
+    def on_agent_action(self, action, *, run_id, **kwargs):
+        # type: (AgentAction, *Any, UUID, **Any) -> Any
         """Run on agent action."""
-        print("agent_action", action)
+        if not run_id:
+            return
 
-    def on_agent_finish(self, finish, **kwargs):
-        # type: (AgentFinish, **Any) -> Any
+    def on_agent_finish(self, finish, *, run_id, **kwargs):
+        # type: (AgentFinish, *Any, UUID, **Any) -> Any
         """Run on agent end."""
-        print("agent_finish", finish)
+        if not run_id:
+            return
 
 
 def _wrap_configure(f):
@@ -154,6 +233,8 @@ def _wrap_configure(f):
     def new_configure(*args, **kwargs):
         # type: (*Any, **Any) -> Any
 
+        integration = sentry_sdk.get_client().get_integration(LangchainIntegration)
+
         with capture_internal_exceptions():
             new_callbacks = []
             if "local_callbacks" in kwargs:
@@ -184,7 +265,7 @@ def new_configure(*args, **kwargs):
                     already_added = True
 
             if not already_added:
-                new_callbacks.append(SentryLangchainCallback())
+                new_callbacks.append(SentryLangchainCallback(integration.max_spans))
         return f(*args, **kwargs)
 
     return new_configure

From 442942ea8940ed747c9ae7ad39afd937dedf24b1 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 27 Mar 2024 12:46:49 -0400
Subject: [PATCH 10/41] Start writing the tests, PII gates

---
 sentry_sdk/consts.py                          |   2 +-
 sentry_sdk/integrations/langchain.py          | 185 ++++++++++--------
 tests/integrations/langchain/__init__.py      |   3 +
 .../integrations/langchain/test_langchain.py  |  39 ++++
 tests/integrations/openai/test_openai.py      |  24 +--
 tox.ini                                       |   4 -
 6 files changed, 158 insertions(+), 99 deletions(-)
 create mode 100644 tests/integrations/langchain/__init__.py
 create mode 100644 tests/integrations/langchain/test_langchain.py

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 789563a905..bb19e56b55 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -287,7 +287,7 @@ class OP:
     MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send"
     OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai"
     OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai"
-    LANGCHAIN_INFERENCE = "ai.inference.langchain"
+    LANGCHAIN_RUN = "ai.run.langchain"
     LANGCHAIN_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.langchain"
     QUEUE_SUBMIT_ARQ = "queue.submit.arq"
     QUEUE_TASK_ARQ = "queue.task.arq"
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index f9478baf30..d054300bff 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -3,17 +3,17 @@
 
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
-from sentry_sdk.consts import OP
+from sentry_sdk.consts import OP, SPANDATA
+from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.tracing import Span
 
 if TYPE_CHECKING:
     from typing import Any, List, Callable, Dict, Union, Optional
     from uuid import UUID
 from sentry_sdk.integrations import DidNotEnable, Integration
-from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception
+from sentry_sdk.utils import logger, capture_internal_exceptions
 
 try:
-    from langchain_core.agents import AgentAction, AgentFinish
     from langchain_core.messages import BaseMessage
     from langchain_core.outputs import LLMResult
     from langchain_core.callbacks import (
@@ -23,27 +23,6 @@
 except ImportError:
     raise DidNotEnable("langchain not installed")
 
-try:
-    import tiktoken  # type: ignore
-
-    enc = tiktoken.get_encoding("cl100k_base")
-
-    def count_tokens(s):
-        # type: (str) -> int
-        return len(enc.encode_ordinary(s))
-
-    logger.debug("[langchain] using tiktoken to count tokens")
-except ImportError:
-    logger.info(
-        "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some Langchain APIs"
-        "Please install 'tiktoken' if you aren't receiving token usage in Sentry."
-        "See https://docs.sentry.io/platforms/python/integrations/langchain/ for more information."
-    )
-
-    def count_tokens(s):
-        # type: (str) -> int
-        return 0
-
 
 class LangchainIntegration(Integration):
     identifier = "langchain"
@@ -52,7 +31,7 @@ class LangchainIntegration(Integration):
     max_spans = 1024
 
     def __init__(self, include_prompts=False, max_spans=1024):
-        # type: (LangchainIntegration, bool) -> None
+        # type: (LangchainIntegration, bool, int) -> None
         self.include_prompts = include_prompts
         self.max_spans = max_spans
 
@@ -62,17 +41,6 @@ def setup_once():
         manager._configure = _wrap_configure(manager._configure)
 
 
-def _capture_exception(exc, type="langchain"):
-    # type: (Any, str) -> None
-
-    event, hint = event_from_exception(
-        exc,
-        client_options=sentry_sdk.get_client().options,
-        mechanism={"type": type, "handled": False},
-    )
-    sentry_sdk.capture_event(event, hint=hint)
-
-
 class WatchedSpan:
     span = None  # type: Span
     num_tokens = 0  # type: int
@@ -90,12 +58,42 @@ class SentryLangchainCallback(BaseCallbackHandler):
 
     max_span_map_size = 0
 
-    def __init__(self, max_span_map_size):
+    def __init__(self, max_span_map_size, include_prompts):
         self.max_span_map_size = max_span_map_size
+        self.include_prompts = include_prompts
 
     def gc_span_map(self):
         while len(self.span_map) > self.max_span_map_size:
-            self.span_map.popitem(last=False)
+            self.span_map.popitem(last=False)[1].span.__exit__(None, None, None)
+
+    def _handle_error(self, run_id, error):
+        # type: (str, Any) -> None
+        if not run_id or not self.span_map[run_id]:
+            return
+
+        span_data = self.span_map[run_id]
+        if not span_data:
+            return
+        sentry_sdk.capture_exception(error, span_data.span.scope)
+        span_data.span.__exit__(None, None, None)
+        del self.span_map[run_id]
+
+    def _create_span(self, run_id, parent_id, **kwargs):
+        # type: (UUID, Optional[UUID], **Any) -> WatchedSpan
+
+        span = None  # type: Optional[Span]
+        if parent_id:
+            parent_span = self.span_map[parent_id]  # type: Optional[WatchedSpan]
+            if parent_span:
+                span = parent_span.span.start_child(**kwargs)
+        if span is None:
+            span = sentry_sdk.start_span(**kwargs)
+
+        span.__enter__()
+        watched_span = WatchedSpan(span)
+        self.span_map[run_id] = watched_span
+        self.gc_span_map()
+        return watched_span
 
     def on_llm_start(
         self,
@@ -106,31 +104,36 @@ def on_llm_start(
         tags=None,
         parent_run_id=None,
         metadata=None,
-        name=None,
         **kwargs,
     ):
-        # type: (Dict[str, Any], List[str], *Any, UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Optional[str], **Any) -> Any
+        # type: (Dict[str, Any], List[str], *Any, UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], **Any) -> Any
         """Run when LLM starts running."""
         if not run_id:
             return
-        span = sentry_sdk.start_span(
-            op=OP.LANGCHAIN_INFERENCE, description="Langchain LLM call"
+        watched_span = self._create_span(
+            run_id,
+            kwargs.get("parent_run_id"),
+            op=OP.LANGCHAIN_RUN,
+            description=kwargs.get("name", "Langchain LLM call"),
         )
-        self.span_map[run_id] = WatchedSpan(span)
-        self.gc_span_map()
-        span.__enter__()
+        if should_send_default_pii() and self.include_prompts:
+            watched_span.span.set_data(SPANDATA.AI_INPUT_MESSAGES, prompts)
 
     def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
         # type: (Dict[str, Any], List[List[BaseMessage]], *Any, UUID, **Any) -> Any
         """Run when Chat Model starts running."""
         if not run_id:
             return
-        span = sentry_sdk.start_span(
-            op=OP.LANGCHAIN_INFERENCE, description="Langchain LLM call"
+
+        watched_span = self._create_span(
+            run_id,
+            kwargs.get("parent_run_id"),
+            op=OP.LANGCHAIN_CHAT_COMPLETIONS_CREATE,
+            description=kwargs.get("name", kwargs.get("name", "Langchain Chat Model")),
         )
-        self.span_map[run_id] = WatchedSpan(span)
-        self.gc_span_map()
-        span.__enter__()
+        # TODO model ids
+        if should_send_default_pii() and self.include_prompts:
+            watched_span.span.set_data(SPANDATA.AI_INPUT_MESSAGES, messages)
 
     def on_llm_new_token(self, token, *, run_id, **kwargs):
         # type: (str, *Any, UUID, **Any) -> Any
@@ -140,7 +143,7 @@ def on_llm_new_token(self, token, *, run_id, **kwargs):
         span_data = self.span_map[run_id]
         if not span_data:
             return
-        span_data.num_tokens += count_tokens(token)
+        span_data.num_tokens += 1
 
     def on_llm_end(self, response, *, run_id, **kwargs):
         # type: (LLMResult, *Any, UUID, **Any) -> Any
@@ -148,29 +151,52 @@ def on_llm_end(self, response, *, run_id, **kwargs):
         if not run_id:
             return
 
+        token_usage = (
+            response.llm_output.get("token_usage") if response.llm_output else None
+        )
+
         span_data = self.span_map[run_id]
         if not span_data:
             return
-        span_data.span.__exit__(None, None, None)
 
-        print("llm end")
+        if should_send_default_pii() and self.include_prompts:
+            span_data.span.set_data(SPANDATA.AI_RESPONSES, response.generations)
+
+        if token_usage:
+            span_data.span.set_data(
+                SPANDATA.AI_PROMPT_TOKENS_USED, token_usage.get("prompt_tokens")
+            )
+            span_data.span.set_data(
+                SPANDATA.AI_COMPLETION_TOKENS_USED, token_usage.get("completion_tokens")
+            )
+            span_data.span.set_data(
+                SPANDATA.AI_TOTAL_TOKENS_USED, token_usage.get("total_tokens")
+            )
+        elif span_data.num_tokens:
+            span_data.span.set_data(
+                SPANDATA.AI_COMPLETION_TOKENS_USED, span_data.num_tokens
+            )
+            span_data.span.set_data(SPANDATA.AI_TOTAL_TOKENS_USED, span_data.num_tokens)
+
+        span_data.span.__exit__(None, None, None)
+        del self.span_map[run_id]
 
     def on_llm_error(self, error, *, run_id, **kwargs):
         # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
         """Run when LLM errors."""
-        _capture_exception(error, "langchain-llm")
+        self._handle_error(run_id, error)
 
     def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
         # type: (Dict[str, Any], Dict[str, Any], *Any, UUID, **Any) -> Any
         """Run when chain starts running."""
         if not run_id:
             return
-        span = sentry_sdk.start_span(
-            op=OP.LANGCHAIN_INFERENCE, description="Langchain chain execution"
+        self._create_span(
+            run_id,
+            kwargs.get("parent_run_id"),
+            op=OP.LANGCHAIN_RUN,
+            description=kwargs.get("name", "Chain execution"),
         )
-        self.span_map[run_id] = WatchedSpan(span)
-        self.gc_span_map()
-        span.__enter__()
 
     def on_chain_end(self, outputs, *, run_id, **kwargs):
         # type: (Dict[str, Any], *Any, UUID, **Any) -> Any
@@ -182,19 +208,28 @@ def on_chain_end(self, outputs, *, run_id, **kwargs):
         if not span_data:
             return
         span_data.span.__exit__(None, None, None)
-        print("chain end")
+        del self.span_map[run_id]
 
-    def on_chain_error(self, error, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
+    def on_chain_error(self, error, *, run_id, **kwargs):
+        # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
         """Run when chain errors."""
-        _capture_exception(error, "langchain-chain")
+        self._handle_error(run_id, error)
 
     def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
         # type: (Dict[str, Any], str, *Any, UUID, **Any) -> Any
         """Run when tool starts running."""
         if not run_id:
             return
-        print("tool_start")
+        watched_span = self._create_span(
+            run_id,
+            kwargs.get("parent_run_id"),
+            op=OP.LANGCHAIN_RUN,
+            description=kwargs.get("name", "AI tool usage"),
+        )
+        if should_send_default_pii() and self.include_prompts:
+            watched_span.span.set_data(
+                SPANDATA.AI_INPUT_MESSAGES, kwargs.get("inputs", [input_str])
+            )
 
     def on_tool_end(self, output, *, run_id, **kwargs):
         # type: (str, *Any, UUID, **Any) -> Any
@@ -205,25 +240,15 @@ def on_tool_end(self, output, *, run_id, **kwargs):
         span_data = self.span_map[run_id]
         if not span_data:
             return
+        if should_send_default_pii() and self.include_prompts:
+            span_data.span.set_data(SPANDATA.AI_RESPONSES, [output])
         span_data.span.__exit__(None, None, None)
-        print("tool_end", output)
+        del self.span_map[run_id]
 
-    def on_tool_error(self, error, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], **Any) -> Any
+    def on_tool_error(self, error, *args, run_id, **kwargs):
+        # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
         """Run when tool errors."""
-        _capture_exception(error, "langchain-tool")
-
-    def on_agent_action(self, action, *, run_id, **kwargs):
-        # type: (AgentAction, *Any, UUID, **Any) -> Any
-        """Run on agent action."""
-        if not run_id:
-            return
-
-    def on_agent_finish(self, finish, *, run_id, **kwargs):
-        # type: (AgentFinish, *Any, UUID, **Any) -> Any
-        """Run on agent end."""
-        if not run_id:
-            return
+        self._handle_error(run_id, error)
 
 
 def _wrap_configure(f):
diff --git a/tests/integrations/langchain/__init__.py b/tests/integrations/langchain/__init__.py
new file mode 100644
index 0000000000..a286454a56
--- /dev/null
+++ b/tests/integrations/langchain/__init__.py
@@ -0,0 +1,3 @@
+import pytest
+
+pytest.importorskip("langchain_core")
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
new file mode 100644
index 0000000000..048a51c729
--- /dev/null
+++ b/tests/integrations/langchain/test_langchain.py
@@ -0,0 +1,39 @@
+import pytest
+
+from sentry_sdk import start_transaction
+from sentry_sdk.consts import SPANDATA
+from sentry_sdk.integrations.langchain import LangchainIntegration
+
+
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [(True, True), (True, False), (False, True), (False, False)],
+)
+def test_nonstreaming_chat_completion(
+    sentry_init, capture_events, send_default_pii, include_prompts
+):
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=include_prompts)],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    with start_transaction():
+        pass
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "ai.chat_completions.create.openai"
+
+    if send_default_pii and include_prompts:
+        assert "hello" in span["data"]["ai.input_messages"][0]["content"]
+        assert "the model response" in span["data"]["ai.responses"][0]["content"]
+    else:
+        assert "ai.input_messages" not in span["data"]
+        assert "ai.responses" not in span["data"]
+
+    assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10
+    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
+    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 074d859274..91634b4b37 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -7,12 +7,8 @@
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
 
 from sentry_sdk import start_transaction
-from sentry_sdk.integrations.openai import (
-    OpenAIIntegration,
-    COMPLETION_TOKENS_USED,
-    PROMPT_TOKENS_USED,
-    TOTAL_TOKENS_USED,
-)
+from sentry_sdk.consts import SPANDATA
+from sentry_sdk.integrations.openai import OpenAIIntegration
 
 from unittest import mock  # python 3.3 and above
 
@@ -78,9 +74,9 @@ def test_nonstreaming_chat_completion(
         assert "ai.input_messages" not in span["data"]
         assert "ai.responses" not in span["data"]
 
-    assert span["data"][COMPLETION_TOKENS_USED] == 10
-    assert span["data"][PROMPT_TOKENS_USED] == 20
-    assert span["data"][TOTAL_TOKENS_USED] == 30
+    assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10
+    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
+    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
 
 
 # noinspection PyTypeChecker
@@ -160,9 +156,9 @@ def test_streaming_chat_completion(
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["data"][COMPLETION_TOKENS_USED] == 2
-        assert span["data"][PROMPT_TOKENS_USED] == 1
-        assert span["data"][TOTAL_TOKENS_USED] == 3
+        assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 2
+        assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 1
+        assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 3
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -227,5 +223,5 @@ def test_embeddings_create(
     else:
         assert "ai.input_messages" not in span["data"]
 
-    assert span["data"][PROMPT_TOKENS_USED] == 20
-    assert span["data"][TOTAL_TOKENS_USED] == 30
+    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
+    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
diff --git a/tox.ini b/tox.ini
index 96f75e84b1..37abd54277 100644
--- a/tox.ini
+++ b/tox.ini
@@ -444,12 +444,8 @@ deps =
 
     # Langchain
     langchain-0.0: langchain~=0.0.210
-    langchain-0.0: tiktoken~=0.6.0
     langchain-0.1: langchain~=0.1.11
-    langchain-0.1: tiktoken~=0.6.0
     langchain-latest: langchain
-    langchain-latest: tiktoken~=0.6.0
-    langchain-notiktoken: langchain
 
     # Loguru
     loguru-v0.5: loguru~=0.5.0

From f6f27d7e4087e460fa92d68a0856e66ca151746b Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 27 Mar 2024 16:01:31 -0400
Subject: [PATCH 11/41] Finish test for langchain

---
 sentry_sdk/consts.py                          |   1 +
 sentry_sdk/integrations/_ai_common.py         |  30 +++
 sentry_sdk/integrations/langchain.py          | 219 ++++++++++--------
 sentry_sdk/integrations/openai.py             |  23 +-
 .../integrations/langchain/test_langchain.py  | 154 ++++++++++--
 5 files changed, 299 insertions(+), 128 deletions(-)
 create mode 100644 sentry_sdk/integrations/_ai_common.py

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index bb19e56b55..c696f78ebf 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -288,6 +288,7 @@ class OP:
     OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai"
     OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai"
     LANGCHAIN_RUN = "ai.run.langchain"
+    LANGCHAIN_TOOL = "ai.tool.langchain"
     LANGCHAIN_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.langchain"
     QUEUE_SUBMIT_ARQ = "queue.submit.arq"
     QUEUE_TASK_ARQ = "queue.task.arq"
diff --git a/sentry_sdk/integrations/_ai_common.py b/sentry_sdk/integrations/_ai_common.py
new file mode 100644
index 0000000000..4a416cfa46
--- /dev/null
+++ b/sentry_sdk/integrations/_ai_common.py
@@ -0,0 +1,30 @@
+from sentry_sdk._types import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any
+
+from sentry_sdk.tracing import Span
+from sentry_sdk.utils import logger
+
+
+def _normalize_data(data):
+    # type: (Any) -> Any
+
+    # convert pydantic data (e.g. OpenAI v1+) to json compatible format
+    if hasattr(data, "model_dump"):
+        try:
+            return data.model_dump()
+        except Exception as e:
+            logger.warning("Could not convert pydantic data to JSON: %s", e)
+            return data
+    if isinstance(data, list):
+        return list(_normalize_data(x) for x in data)
+    if isinstance(data, dict):
+        return {k: _normalize_data(v) for (k, v) in data.items()}
+    return data
+
+
+def set_data_normalized(span, key, value):
+    # type: (Span, str, Any) -> None
+    normalized = _normalize_data(value)
+    span.set_data(key, normalized)
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index d054300bff..21519db79d 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -4,6 +4,7 @@
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
 from sentry_sdk.consts import OP, SPANDATA
+from sentry_sdk.integrations._ai_common import set_data_normalized
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.tracing import Span
 
@@ -78,8 +79,14 @@ def _handle_error(self, run_id, error):
         span_data.span.__exit__(None, None, None)
         del self.span_map[run_id]
 
+    def _normalize_langchain_message(self, message):
+        # type: (BaseMessage) -> dict
+        parsed = {"content": message.content, "role": message.type}
+        parsed.update(message.additional_kwargs)
+        return parsed
+
     def _create_span(self, run_id, parent_id, **kwargs):
-        # type: (UUID, Optional[UUID], **Any) -> WatchedSpan
+        # type: (UUID, Optional[UUID], **Any) -> Span
 
         span = None  # type: Optional[Span]
         if parent_id:
@@ -93,7 +100,7 @@ def _create_span(self, run_id, parent_id, **kwargs):
         watched_span = WatchedSpan(span)
         self.span_map[run_id] = watched_span
         self.gc_span_map()
-        return watched_span
+        return span
 
     def on_llm_start(
         self,
@@ -108,16 +115,17 @@ def on_llm_start(
     ):
         # type: (Dict[str, Any], List[str], *Any, UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], **Any) -> Any
         """Run when LLM starts running."""
-        if not run_id:
-            return
-        watched_span = self._create_span(
-            run_id,
-            kwargs.get("parent_run_id"),
-            op=OP.LANGCHAIN_RUN,
-            description=kwargs.get("name", "Langchain LLM call"),
-        )
-        if should_send_default_pii() and self.include_prompts:
-            watched_span.span.set_data(SPANDATA.AI_INPUT_MESSAGES, prompts)
+        with capture_internal_exceptions():
+            if not run_id:
+                return
+            span = self._create_span(
+                run_id,
+                kwargs.get("parent_run_id"),
+                op=OP.LANGCHAIN_RUN,
+                description=kwargs.get("name") or "Langchain LLM call",
+            )
+            if should_send_default_pii() and self.include_prompts:
+                set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
 
     def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
         # type: (Dict[str, Any], List[List[BaseMessage]], *Any, UUID, **Any) -> Any
@@ -125,90 +133,109 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
         if not run_id:
             return
 
-        watched_span = self._create_span(
-            run_id,
-            kwargs.get("parent_run_id"),
-            op=OP.LANGCHAIN_CHAT_COMPLETIONS_CREATE,
-            description=kwargs.get("name", kwargs.get("name", "Langchain Chat Model")),
-        )
-        # TODO model ids
-        if should_send_default_pii() and self.include_prompts:
-            watched_span.span.set_data(SPANDATA.AI_INPUT_MESSAGES, messages)
+        with capture_internal_exceptions():
+            span = self._create_span(
+                run_id,
+                kwargs.get("parent_run_id"),
+                op=OP.LANGCHAIN_CHAT_COMPLETIONS_CREATE,
+                description=kwargs.get("name") or "Langchain Chat Model",
+            )
+            # TODO model ids
+            if should_send_default_pii() and self.include_prompts:
+                span.set_data(
+                    SPANDATA.AI_INPUT_MESSAGES,
+                    [
+                        [self._normalize_langchain_message(x) for x in list_]
+                        for list_ in messages
+                    ],
+                )
 
     def on_llm_new_token(self, token, *, run_id, **kwargs):
         # type: (str, *Any, UUID, **Any) -> Any
         """Run on new LLM token. Only available when streaming is enabled."""
-        if not run_id or not self.span_map[run_id]:
-            return
-        span_data = self.span_map[run_id]
-        if not span_data:
-            return
-        span_data.num_tokens += 1
+        with capture_internal_exceptions():
+            if not run_id or not self.span_map[run_id]:
+                return
+            span_data = self.span_map[run_id]
+            if not span_data:
+                return
+            span_data.num_tokens += 1
 
     def on_llm_end(self, response, *, run_id, **kwargs):
         # type: (LLMResult, *Any, UUID, **Any) -> Any
         """Run when LLM ends running."""
-        if not run_id:
-            return
-
-        token_usage = (
-            response.llm_output.get("token_usage") if response.llm_output else None
-        )
-
-        span_data = self.span_map[run_id]
-        if not span_data:
-            return
-
-        if should_send_default_pii() and self.include_prompts:
-            span_data.span.set_data(SPANDATA.AI_RESPONSES, response.generations)
+        with capture_internal_exceptions():
+            if not run_id:
+                return
 
-        if token_usage:
-            span_data.span.set_data(
-                SPANDATA.AI_PROMPT_TOKENS_USED, token_usage.get("prompt_tokens")
-            )
-            span_data.span.set_data(
-                SPANDATA.AI_COMPLETION_TOKENS_USED, token_usage.get("completion_tokens")
+            token_usage = (
+                response.llm_output.get("token_usage") if response.llm_output else None
             )
-            span_data.span.set_data(
-                SPANDATA.AI_TOTAL_TOKENS_USED, token_usage.get("total_tokens")
-            )
-        elif span_data.num_tokens:
-            span_data.span.set_data(
-                SPANDATA.AI_COMPLETION_TOKENS_USED, span_data.num_tokens
-            )
-            span_data.span.set_data(SPANDATA.AI_TOTAL_TOKENS_USED, span_data.num_tokens)
 
-        span_data.span.__exit__(None, None, None)
-        del self.span_map[run_id]
+            span_data = self.span_map[run_id]
+            if not span_data:
+                return
+
+            if should_send_default_pii() and self.include_prompts:
+                set_data_normalized(
+                    span_data.span,
+                    SPANDATA.AI_RESPONSES,
+                    [[x.text for x in list_] for list_ in response.generations],
+                )
+
+            if token_usage:
+                span_data.span.set_data(
+                    SPANDATA.AI_PROMPT_TOKENS_USED, token_usage.get("prompt_tokens")
+                )
+                span_data.span.set_data(
+                    SPANDATA.AI_COMPLETION_TOKENS_USED,
+                    token_usage.get("completion_tokens"),
+                )
+                span_data.span.set_data(
+                    SPANDATA.AI_TOTAL_TOKENS_USED, token_usage.get("total_tokens")
+                )
+            elif span_data.num_tokens:
+                span_data.span.set_data(
+                    SPANDATA.AI_COMPLETION_TOKENS_USED, span_data.num_tokens
+                )
+                span_data.span.set_data(
+                    SPANDATA.AI_TOTAL_TOKENS_USED, span_data.num_tokens
+                )
+
+            span_data.span.__exit__(None, None, None)
+            del self.span_map[run_id]
 
     def on_llm_error(self, error, *, run_id, **kwargs):
         # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
         """Run when LLM errors."""
-        self._handle_error(run_id, error)
+        with capture_internal_exceptions():
+            self._handle_error(run_id, error)
 
     def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
         # type: (Dict[str, Any], Dict[str, Any], *Any, UUID, **Any) -> Any
         """Run when chain starts running."""
-        if not run_id:
-            return
-        self._create_span(
-            run_id,
-            kwargs.get("parent_run_id"),
-            op=OP.LANGCHAIN_RUN,
-            description=kwargs.get("name", "Chain execution"),
-        )
+        with capture_internal_exceptions():
+            if not run_id:
+                return
+            self._create_span(
+                run_id,
+                kwargs.get("parent_run_id"),
+                op=OP.LANGCHAIN_RUN,
+                description=kwargs.get("name") or "Chain execution",
+            )
 
     def on_chain_end(self, outputs, *, run_id, **kwargs):
         # type: (Dict[str, Any], *Any, UUID, **Any) -> Any
         """Run when chain ends running."""
-        if not run_id or not self.span_map[run_id]:
-            return
+        with capture_internal_exceptions():
+            if not run_id or not self.span_map[run_id]:
+                return
 
-        span_data = self.span_map[run_id]
-        if not span_data:
-            return
-        span_data.span.__exit__(None, None, None)
-        del self.span_map[run_id]
+            span_data = self.span_map[run_id]
+            if not span_data:
+                return
+            span_data.span.__exit__(None, None, None)
+            del self.span_map[run_id]
 
     def on_chain_error(self, error, *, run_id, **kwargs):
         # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
@@ -218,32 +245,34 @@ def on_chain_error(self, error, *, run_id, **kwargs):
     def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
         # type: (Dict[str, Any], str, *Any, UUID, **Any) -> Any
         """Run when tool starts running."""
-        if not run_id:
-            return
-        watched_span = self._create_span(
-            run_id,
-            kwargs.get("parent_run_id"),
-            op=OP.LANGCHAIN_RUN,
-            description=kwargs.get("name", "AI tool usage"),
-        )
-        if should_send_default_pii() and self.include_prompts:
-            watched_span.span.set_data(
-                SPANDATA.AI_INPUT_MESSAGES, kwargs.get("inputs", [input_str])
+        with capture_internal_exceptions():
+            if not run_id:
+                return
+            span = self._create_span(
+                run_id,
+                kwargs.get("parent_run_id"),
+                op=OP.LANGCHAIN_TOOL,
+                description=kwargs.get("name") or "AI tool usage",
             )
+            if should_send_default_pii() and self.include_prompts:
+                set_data_normalized(
+                    span, SPANDATA.AI_INPUT_MESSAGES, kwargs.get("inputs", [input_str])
+                )
 
     def on_tool_end(self, output, *, run_id, **kwargs):
         # type: (str, *Any, UUID, **Any) -> Any
         """Run when tool ends running."""
-        if not run_id or not self.span_map[run_id]:
-            return
+        with capture_internal_exceptions():
+            if not run_id or not self.span_map[run_id]:
+                return
 
-        span_data = self.span_map[run_id]
-        if not span_data:
-            return
-        if should_send_default_pii() and self.include_prompts:
-            span_data.span.set_data(SPANDATA.AI_RESPONSES, [output])
-        span_data.span.__exit__(None, None, None)
-        del self.span_map[run_id]
+            span_data = self.span_map[run_id]
+            if not span_data:
+                return
+            if should_send_default_pii() and self.include_prompts:
+                set_data_normalized(span_data.span, SPANDATA.AI_RESPONSES, [output])
+            span_data.span.__exit__(None, None, None)
+            del self.span_map[run_id]
 
     def on_tool_error(self, error, *args, run_id, **kwargs):
         # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
@@ -290,7 +319,11 @@ def new_configure(*args, **kwargs):
                     already_added = True
 
             if not already_added:
-                new_callbacks.append(SentryLangchainCallback(integration.max_spans))
+                new_callbacks.append(
+                    SentryLangchainCallback(
+                        integration.max_spans, integration.include_prompts
+                    )
+                )
         return f(*args, **kwargs)
 
     return new_configure
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 4460eb8828..81f0d0c332 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -3,6 +3,7 @@
 from sentry_sdk import consts
 from sentry_sdk._types import TYPE_CHECKING
 from sentry_sdk.consts import SPANDATA
+from sentry_sdk.integrations._ai_common import set_data_normalized
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, Iterator
@@ -73,28 +74,6 @@ def _capture_exception(exc):
     sentry_sdk.capture_event(event, hint=hint)
 
 
-def _normalize_data(data):
-    # type: (Any) -> Any
-
-    # convert pydantic data (e.g. OpenAI v1+) to json compatible format
-    if hasattr(data, "model_dump"):
-        try:
-            return data.model_dump()
-        except Exception as e:
-            logger.warning("Could not convert pydantic data to JSON: %s", e)
-            return data
-    if isinstance(data, list):
-        return list(_normalize_data(x) for x in data)
-    if isinstance(data, dict):
-        return {k: _normalize_data(v) for (k, v) in data.items()}
-    return data
-
-
-def set_data_normalized(span, key, value):
-    # type: (Span, str, Any) -> None
-    span.set_data(key, _normalize_data(value))
-
-
 def _calculate_chat_completion_usage(
     messages, response, span, streaming_message_responses=None
 ):
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 048a51c729..3489157463 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -1,15 +1,45 @@
+from pathlib import Path
+from typing import List, Optional, Any, Iterator
+from unittest.mock import Mock
+
 import pytest
+from langchain_community.chat_models import ChatOpenAI
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.messages import BaseMessage, AIMessageChunk
+from langchain_core.outputs import ChatGenerationChunk
 
 from sentry_sdk import start_transaction
-from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations.langchain import LangchainIntegration
+from langchain.agents import tool, AgentExecutor, create_openai_tools_agent
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+
+
+@tool
+def get_word_length(word: str) -> int:
+    """Returns the length of a word."""
+    return len(word)
+
+
+global stream_result_mock  # type: Mock
+
+
+class MockOpenAI(ChatOpenAI):
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        for x in stream_result_mock():
+            yield x
 
 
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [(True, True), (True, False), (False, True), (False, False)],
 )
-def test_nonstreaming_chat_completion(
+def test_langchain_agent(
     sentry_init, capture_events, send_default_pii, include_prompts
 ):
     sentry_init(
@@ -19,21 +49,119 @@ def test_nonstreaming_chat_completion(
     )
     events = capture_events()
 
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                "You are very powerful assistant, but don't know current events",
+            ),
+            ("user", "{input}"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+    )
+    global stream_result_mock
+    stream_result_mock = Mock(
+        side_effect=[
+            [
+                ChatGenerationChunk(
+                    type="ChatGenerationChunk",
+                    message=AIMessageChunk(
+                        content="",
+                        additional_kwargs={
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": "call_BbeyNhCKa6kYLYzrD40NGm3b",
+                                    "function": {
+                                        "arguments": "",
+                                        "name": "get_word_length",
+                                    },
+                                    "type": "function",
+                                }
+                            ]
+                        },
+                    ),
+                ),
+                ChatGenerationChunk(
+                    type="ChatGenerationChunk",
+                    message=AIMessageChunk(
+                        content="",
+                        additional_kwargs={
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": None,
+                                    "function": {
+                                        "arguments": '{"word": "eudca"}',
+                                        "name": None,
+                                    },
+                                    "type": None,
+                                }
+                            ]
+                        },
+                    ),
+                ),
+                ChatGenerationChunk(
+                    type="ChatGenerationChunk",
+                    message=AIMessageChunk(content="5"),
+                    generation_info={"finish_reason": "function_call"},
+                ),
+            ],
+            [
+                ChatGenerationChunk(
+                    text="The word eudca has 5 letters.",
+                    type="ChatGenerationChunk",
+                    message=AIMessageChunk(content="The word eudca has 5 letters."),
+                ),
+                ChatGenerationChunk(
+                    type="ChatGenerationChunk",
+                    generation_info={"finish_reason": "stop"},
+                    message=AIMessageChunk(content=""),
+                ),
+            ],
+        ]
+    )
+    llm = MockOpenAI(
+        model_name="gpt-3.5-turbo",
+        temperature=0,
+        openai_api_key=open(Path.home() / "open_ai_key").read().strip(),
+    )
+    agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+
+    agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
+
     with start_transaction():
-        pass
+        list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
     tx = events[0]
     assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "ai.chat_completions.create.openai"
+    chat_spans = list(
+        x for x in tx["spans"] if x["op"] == "ai.chat_completions.create.langchain"
+    )
+    tool_exec_span = next(x for x in tx["spans"] if x["op"] == "ai.tool.langchain")
+
+    assert len(chat_spans) == 2
+
+    # We can't guarantee anything about the "shape" of the langchain execution graph
+    assert len(list(x for x in tx["spans"] if x["op"] == "ai.run.langchain")) > 0
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["ai.input_messages"][0]["content"]
-        assert "the model response" in span["data"]["ai.responses"][0]["content"]
+        assert (
+            "You are very powerful"
+            in chat_spans[0]["data"]["ai.input_messages"][0][0]["content"]
+        )
+        assert "5" in chat_spans[0]["data"]["ai.responses"][0][0]
+        assert "word" in tool_exec_span["data"]["ai.input_messages"]
+        assert "5" in tool_exec_span["data"]["ai.responses"][0]
+        assert (
+            "You are very powerful"
+            in chat_spans[1]["data"]["ai.input_messages"][0][0]["content"]
+        )
+        assert "5" in chat_spans[1]["data"]["ai.responses"][0][0]
     else:
-        assert "ai.input_messages" not in span["data"]
-        assert "ai.responses" not in span["data"]
-
-    assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10
-    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
-    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
+        assert "ai.input_messages" not in chat_spans[0].get("data", {})
+        assert "ai.responses" not in chat_spans[0].get("data", {})
+        assert "ai.input_messages" not in chat_spans[1].get("data", {})
+        assert "ai.responses" not in chat_spans[1].get("data", {})
+        assert "ai.input_messages" not in tool_exec_span.get("data", {})
+        assert "ai.responses" not in tool_exec_span.get("data", {})

From 40f21192ab8a1831839f2c2505753ccd389f8bc8 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 27 Mar 2024 16:10:45 -0400
Subject: [PATCH 12/41] Remove variadic **Any

---
 sentry_sdk/integrations/langchain.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 21519db79d..9d213e3a6b 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -86,7 +86,7 @@ def _normalize_langchain_message(self, message):
         return parsed
 
     def _create_span(self, run_id, parent_id, **kwargs):
-        # type: (UUID, Optional[UUID], **Any) -> Span
+        # type: (UUID, Optional[UUID], Any) -> Span
 
         span = None  # type: Optional[Span]
         if parent_id:
@@ -113,7 +113,7 @@ def on_llm_start(
         metadata=None,
         **kwargs,
     ):
-        # type: (Dict[str, Any], List[str], *Any, UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], **Any) -> Any
+        # type: (Dict[str, Any], List[str], Any, UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Any) -> Any
         """Run when LLM starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -128,7 +128,7 @@ def on_llm_start(
                 set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
 
     def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
-        # type: (Dict[str, Any], List[List[BaseMessage]], *Any, UUID, **Any) -> Any
+        # type: (Dict[str, Any], List[List[BaseMessage]], Any, UUID, Any) -> Any
         """Run when Chat Model starts running."""
         if not run_id:
             return
@@ -151,7 +151,7 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
                 )
 
     def on_llm_new_token(self, token, *, run_id, **kwargs):
-        # type: (str, *Any, UUID, **Any) -> Any
+        # type: (str, Any, UUID, Any) -> Any
         """Run on new LLM token. Only available when streaming is enabled."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -162,7 +162,7 @@ def on_llm_new_token(self, token, *, run_id, **kwargs):
             span_data.num_tokens += 1
 
     def on_llm_end(self, response, *, run_id, **kwargs):
-        # type: (LLMResult, *Any, UUID, **Any) -> Any
+        # type: (LLMResult, Any, UUID, Any) -> Any
         """Run when LLM ends running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -206,13 +206,13 @@ def on_llm_end(self, response, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_llm_error(self, error, *, run_id, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
+        # type: (Union[Exception, KeyboardInterrupt], Any, UUID, Any) -> Any
         """Run when LLM errors."""
         with capture_internal_exceptions():
             self._handle_error(run_id, error)
 
     def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
-        # type: (Dict[str, Any], Dict[str, Any], *Any, UUID, **Any) -> Any
+        # type: (Dict[str, Any], Dict[str, Any], Any, UUID, Any) -> Any
         """Run when chain starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -225,7 +225,7 @@ def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
             )
 
     def on_chain_end(self, outputs, *, run_id, **kwargs):
-        # type: (Dict[str, Any], *Any, UUID, **Any) -> Any
+        # type: (Dict[str, Any], Any, UUID, Any) -> Any
         """Run when chain ends running."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -238,12 +238,12 @@ def on_chain_end(self, outputs, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_chain_error(self, error, *, run_id, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
+        # type: (Union[Exception, KeyboardInterrupt], Any, UUID, Any) -> Any
         """Run when chain errors."""
         self._handle_error(run_id, error)
 
     def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
-        # type: (Dict[str, Any], str, *Any, UUID, **Any) -> Any
+        # type: (Dict[str, Any], str, Any, UUID, Any) -> Any
         """Run when tool starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -260,7 +260,7 @@ def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
                 )
 
     def on_tool_end(self, output, *, run_id, **kwargs):
-        # type: (str, *Any, UUID, **Any) -> Any
+        # type: (str, Any, UUID, Any) -> Any
         """Run when tool ends running."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -275,7 +275,7 @@ def on_tool_end(self, output, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_tool_error(self, error, *args, run_id, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], *Any, UUID, **Any) -> Any
+        # type: (Union[Exception, KeyboardInterrupt], Any, UUID, Any) -> Any
         """Run when tool errors."""
         self._handle_error(run_id, error)
 
@@ -285,7 +285,7 @@ def _wrap_configure(f):
 
     @wraps(f)
     def new_configure(*args, **kwargs):
-        # type: (*Any, **Any) -> Any
+        # type: (Any, Any) -> Any
 
         integration = sentry_sdk.get_client().get_integration(LangchainIntegration)
 

From a8b6ff04665e71e8f26e710e145abf1963696b2e Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 27 Mar 2024 16:18:46 -0400
Subject: [PATCH 13/41] Fix some type issues

---
 sentry_sdk/integrations/langchain.py | 35 +++++++++++++++-------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 9d213e3a6b..a6c05f761f 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -52,7 +52,7 @@ def __init__(self, span, num_tokens=0):
         self.num_tokens = num_tokens
 
 
-class SentryLangchainCallback(BaseCallbackHandler):
+class SentryLangchainCallback(BaseCallbackHandler):  # type: ignore[misc]
     """Base callback handler that can be used to handle callbacks from langchain."""
 
     span_map = OrderedDict()  # type: OrderedDict[UUID, WatchedSpan]
@@ -60,15 +60,18 @@ class SentryLangchainCallback(BaseCallbackHandler):
     max_span_map_size = 0
 
     def __init__(self, max_span_map_size, include_prompts):
+        # type: (int, bool) -> None
         self.max_span_map_size = max_span_map_size
         self.include_prompts = include_prompts
 
     def gc_span_map(self):
+        # type: () -> None
+
         while len(self.span_map) > self.max_span_map_size:
             self.span_map.popitem(last=False)[1].span.__exit__(None, None, None)
 
     def _handle_error(self, run_id, error):
-        # type: (str, Any) -> None
+        # type: (UUID, Any) -> None
         if not run_id or not self.span_map[run_id]:
             return
 
@@ -80,13 +83,13 @@ def _handle_error(self, run_id, error):
         del self.span_map[run_id]
 
     def _normalize_langchain_message(self, message):
-        # type: (BaseMessage) -> dict
+        # type: (BaseMessage) -> Any
         parsed = {"content": message.content, "role": message.type}
         parsed.update(message.additional_kwargs)
         return parsed
 
     def _create_span(self, run_id, parent_id, **kwargs):
-        # type: (UUID, Optional[UUID], Any) -> Span
+        # type: (SentryLangchainCallback, UUID, Optional[Any], Dict[str, Any]) -> Span
 
         span = None  # type: Optional[Span]
         if parent_id:
@@ -113,7 +116,7 @@ def on_llm_start(
         metadata=None,
         **kwargs,
     ):
-        # type: (Dict[str, Any], List[str], Any, UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Any) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], List[str], UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Dict[str, Any]) -> Any
         """Run when LLM starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -128,7 +131,7 @@ def on_llm_start(
                 set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
 
     def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
-        # type: (Dict[str, Any], List[List[BaseMessage]], Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Dict[str, Any]) -> Any
         """Run when Chat Model starts running."""
         if not run_id:
             return
@@ -151,7 +154,7 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
                 )
 
     def on_llm_new_token(self, token, *, run_id, **kwargs):
-        # type: (str, Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, str, UUID, Dict[str, Any]) -> Any
         """Run on new LLM token. Only available when streaming is enabled."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -162,7 +165,7 @@ def on_llm_new_token(self, token, *, run_id, **kwargs):
             span_data.num_tokens += 1
 
     def on_llm_end(self, response, *, run_id, **kwargs):
-        # type: (LLMResult, Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, LLMResult, UUID, Dict[str, Any]) -> Any
         """Run when LLM ends running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -206,13 +209,13 @@ def on_llm_end(self, response, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_llm_error(self, error, *, run_id, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Dict[str, Any]) -> Any
         """Run when LLM errors."""
         with capture_internal_exceptions():
             self._handle_error(run_id, error)
 
     def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
-        # type: (Dict[str, Any], Dict[str, Any], Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], Dict[str, Any], UUID, Dict[str, Any]) -> Any
         """Run when chain starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -225,7 +228,7 @@ def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
             )
 
     def on_chain_end(self, outputs, *, run_id, **kwargs):
-        # type: (Dict[str, Any], Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], UUID, Dict[str, Any]) -> Any
         """Run when chain ends running."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -238,12 +241,12 @@ def on_chain_end(self, outputs, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_chain_error(self, error, *, run_id, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Dict[str, Any]) -> Any
         """Run when chain errors."""
         self._handle_error(run_id, error)
 
     def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
-        # type: (Dict[str, Any], str, Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], str, UUID, Dict[str, Any]) -> Any
         """Run when tool starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -260,7 +263,7 @@ def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
                 )
 
     def on_tool_end(self, output, *, run_id, **kwargs):
-        # type: (str, Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, str, UUID, Dict[str, Any]) -> Any
         """Run when tool ends running."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -275,7 +278,7 @@ def on_tool_end(self, output, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_tool_error(self, error, *args, run_id, **kwargs):
-        # type: (Union[Exception, KeyboardInterrupt], Any, UUID, Any) -> Any
+        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Dict[str, Any]) -> Any
         """Run when tool errors."""
         self._handle_error(run_id, error)
 
@@ -290,7 +293,7 @@ def new_configure(*args, **kwargs):
         integration = sentry_sdk.get_client().get_integration(LangchainIntegration)
 
         with capture_internal_exceptions():
-            new_callbacks = []
+            new_callbacks = []  # type: List[BaseCallbackHandler]
             if "local_callbacks" in kwargs:
                 existing_callbacks = kwargs["local_callbacks"]
                 kwargs["local_callbacks"] = new_callbacks

From 5d581221088c0f5a98e5e9e4c0ea617128490a78 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 27 Mar 2024 16:34:01 -0400
Subject: [PATCH 14/41] Fix kwargs types?

---
 sentry_sdk/integrations/langchain.py | 24 ++++++++++++------------
 tox.ini                              |  1 +
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index a6c05f761f..a5ed7817ab 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -89,7 +89,7 @@ def _normalize_langchain_message(self, message):
         return parsed
 
     def _create_span(self, run_id, parent_id, **kwargs):
-        # type: (SentryLangchainCallback, UUID, Optional[Any], Dict[str, Any]) -> Span
+        # type: (SentryLangchainCallback, UUID, Optional[Any], Any) -> Span
 
         span = None  # type: Optional[Span]
         if parent_id:
@@ -116,7 +116,7 @@ def on_llm_start(
         metadata=None,
         **kwargs,
     ):
-        # type: (SentryLangchainCallback, Dict[str, Any], List[str], UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], List[str], UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Any) -> Any
         """Run when LLM starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -131,7 +131,7 @@ def on_llm_start(
                 set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
 
     def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any
         """Run when Chat Model starts running."""
         if not run_id:
             return
@@ -154,7 +154,7 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
                 )
 
     def on_llm_new_token(self, token, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, str, UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, str, UUID, Any) -> Any
         """Run on new LLM token. Only available when streaming is enabled."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -165,7 +165,7 @@ def on_llm_new_token(self, token, *, run_id, **kwargs):
             span_data.num_tokens += 1
 
     def on_llm_end(self, response, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, LLMResult, UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any
         """Run when LLM ends running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -209,13 +209,13 @@ def on_llm_end(self, response, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_llm_error(self, error, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
         """Run when LLM errors."""
         with capture_internal_exceptions():
             self._handle_error(run_id, error)
 
     def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, Dict[str, Any], Dict[str, Any], UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], Dict[str, Any], UUID, Any) -> Any
         """Run when chain starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -228,7 +228,7 @@ def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
             )
 
     def on_chain_end(self, outputs, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, Dict[str, Any], UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], UUID, Any) -> Any
         """Run when chain ends running."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -241,12 +241,12 @@ def on_chain_end(self, outputs, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_chain_error(self, error, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
         """Run when chain errors."""
         self._handle_error(run_id, error)
 
     def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, Dict[str, Any], str, UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Dict[str, Any], str, UUID, Any) -> Any
         """Run when tool starts running."""
         with capture_internal_exceptions():
             if not run_id:
@@ -263,7 +263,7 @@ def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
                 )
 
     def on_tool_end(self, output, *, run_id, **kwargs):
-        # type: (SentryLangchainCallback, str, UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, str, UUID, Any) -> Any
         """Run when tool ends running."""
         with capture_internal_exceptions():
             if not run_id or not self.span_map[run_id]:
@@ -278,7 +278,7 @@ def on_tool_end(self, output, *, run_id, **kwargs):
             del self.span_map[run_id]
 
     def on_tool_error(self, error, *args, run_id, **kwargs):
-        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Dict[str, Any]) -> Any
+        # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
         """Run when tool errors."""
         self._handle_error(run_id, error)
 
diff --git a/tox.ini b/tox.ini
index 37abd54277..c20f613491 100644
--- a/tox.ini
+++ b/tox.ini
@@ -615,6 +615,7 @@ setenv =
     graphene: TESTPATH=tests/integrations/graphene
     httpx: TESTPATH=tests/integrations/httpx
     huey: TESTPATH=tests/integrations/huey
+    langchain: TESTPATH=tests/integrations/langchain
     loguru: TESTPATH=tests/integrations/loguru
     openai: TESTPATH=tests/integrations/openai
     opentelemetry: TESTPATH=tests/integrations/opentelemetry

From fb15ba80a3ef2c8500bdd705498581706637006e Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 27 Mar 2024 16:39:30 -0400
Subject: [PATCH 15/41] Fix API key in tests

---
 tests/integrations/langchain/test_langchain.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 3489157463..6ce98444bc 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 from typing import List, Optional, Any, Iterator
 from unittest.mock import Mock
 
@@ -124,7 +123,7 @@ def test_langchain_agent(
     llm = MockOpenAI(
         model_name="gpt-3.5-turbo",
         temperature=0,
-        openai_api_key=open(Path.home() / "open_ai_key").read().strip(),
+        openai_api_key="badkey",
     )
     agent = create_openai_tools_agent(llm, [get_word_length], prompt)
 

From e5527a05b7bba4677ec345a6184ece948f65d151 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 27 Mar 2024 16:42:18 -0400
Subject: [PATCH 16/41] Require OpenAI for tests

---
 tox.ini | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tox.ini b/tox.ini
index c20f613491..944751a3b2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -443,6 +443,7 @@ deps =
     huey-latest: huey
 
     # Langchain
+    langchain: openai~=1.0.0
     langchain-0.0: langchain~=0.0.210
     langchain-0.1: langchain~=0.1.11
     langchain-latest: langchain

From b393622e3a6ddda57987a2d11c74c979aec4481b Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 00:47:15 -0400
Subject: [PATCH 17/41] Remove langchain 0.0

---
 tox.ini | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 944751a3b2..6d768a138c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -444,7 +444,6 @@ deps =
 
     # Langchain
     langchain: openai~=1.0.0
-    langchain-0.0: langchain~=0.0.210
     langchain-0.1: langchain~=0.1.11
     langchain-latest: langchain
 

From 959d81014b34cefce27c4a7ffac59baa7cb36eaf Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 00:57:42 -0400
Subject: [PATCH 18/41] Add exception test

---
 .../integrations/langchain/test_langchain.py  | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 6ce98444bc..23e5e4d6cb 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -164,3 +164,41 @@ def test_langchain_agent(
         assert "ai.responses" not in chat_spans[1].get("data", {})
         assert "ai.input_messages" not in tool_exec_span.get("data", {})
         assert "ai.responses" not in tool_exec_span.get("data", {})
+
+
+def test_langchain_error(sentry_init, capture_events):
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                "You are very powerful assistant, but don't know current events",
+            ),
+            ("user", "{input}"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+    )
+    global stream_result_mock
+    stream_result_mock = Mock(side_effect=Exception("API rate limit error"))
+    llm = MockOpenAI(
+        model_name="gpt-3.5-turbo",
+        temperature=0,
+        openai_api_key="badkey",
+    )
+    agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+
+    agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
+
+    with start_transaction(), pytest.raises(Exception):
+        list(agent_executor.stream({"input": "How many letters in the word eudca"}))
+
+    for event in events:
+        print(event)
+    error = events[0]
+    assert error["level"] == "error"

From 6ca6bcba652d3fe58477a66f8dff603f9fe00de1 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 01:01:44 -0400
Subject: [PATCH 19/41] Remove langchain 0.0

---
 tox.ini | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 6d768a138c..b637a8c35a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -141,7 +141,6 @@ envlist =
     {py3.6,py3.11,py3.12}-huey-latest
 
     # Langchain
-    {py3.9,3.11,3.12}-langchain-0.0
     {py3.9,3.11,3.12}-langchain-0.1
     {py3.9,3.11,3.12}-langchain-latest
 

From 02e09189ab00b56201fe7be7a27620634692c20b Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 12:11:03 -0400
Subject: [PATCH 20/41] Add tiktoken to langchain

---
 sentry_sdk/integrations/langchain.py | 75 +++++++++++++++++++++++-----
 tox.ini                              |  9 ++--
 2 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index a5ed7817ab..49952a3825 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -25,13 +25,47 @@
     raise DidNotEnable("langchain not installed")
 
 
+try:
+    import tiktoken  # type: ignore
+
+    enc = tiktoken.get_encoding("cl100k_base")
+
+    def count_tokens(s):
+        # type: (str) -> int
+        return len(enc.encode_ordinary(s))
+
+    logger.debug("[langchain] using tiktoken to count tokens")
+except ImportError:
+    logger.info(
+        "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from streaming langchain calls."
+        "Please install 'tiktoken' if you aren't receiving accurate token usage in Sentry."
+        "See https://docs.sentry.io/platforms/python/integrations/langchain/ for more information."
+    )
+
+    def count_tokens(s):
+        # type: (str) -> int
+        return 1
+
+
+DATA_FIELDS = [
+    "temperature",
+    "top_p",
+    "top_k",
+    "function_call",
+    "functions",
+    "tools",
+    "response_format",
+    "logit_bias",
+]
+
+
 class LangchainIntegration(Integration):
     identifier = "langchain"
 
     # The most number of spans (e.g., LLM calls) that can be processed at the same time.
     max_spans = 1024
 
-    def __init__(self, include_prompts=False, max_spans=1024):
+    def __init__(self, include_prompts=True, max_spans=1024):
         # type: (LangchainIntegration, bool, int) -> None
         self.include_prompts = include_prompts
         self.max_spans = max_spans
@@ -44,12 +78,12 @@ def setup_once():
 
 class WatchedSpan:
     span = None  # type: Span
-    num_tokens = 0  # type: int
+    num_completion_tokens = 0  # type: int
+    num_prompt_tokens = 0  # type: int
 
-    def __init__(self, span, num_tokens=0):
-        # type: (Span, int) -> None
+    def __init__(self, span):
+        # type: (Span) -> None
         self.span = span
-        self.num_tokens = num_tokens
 
 
 class SentryLangchainCallback(BaseCallbackHandler):  # type: ignore[misc]
@@ -121,6 +155,8 @@ def on_llm_start(
         with capture_internal_exceptions():
             if not run_id:
                 return
+            params = kwargs.get("invocation_params", {})
+            params.update(serialized.get("kwargs", {}))
             span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
@@ -152,6 +188,11 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
                         for list_ in messages
                     ],
                 )
+            for list_ in messages:
+                for message in list_:
+                    self.span_map[run_id].num_prompt_tokens += count_tokens(
+                        message.content
+                    ) + count_tokens(message.type)
 
     def on_llm_new_token(self, token, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, str, UUID, Any) -> Any
@@ -162,7 +203,7 @@ def on_llm_new_token(self, token, *, run_id, **kwargs):
             span_data = self.span_map[run_id]
             if not span_data:
                 return
-            span_data.num_tokens += 1
+            span_data.num_completion_tokens += count_tokens(token)
 
     def on_llm_end(self, response, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any
@@ -197,13 +238,21 @@ def on_llm_end(self, response, *, run_id, **kwargs):
                 span_data.span.set_data(
                     SPANDATA.AI_TOTAL_TOKENS_USED, token_usage.get("total_tokens")
                 )
-            elif span_data.num_tokens:
-                span_data.span.set_data(
-                    SPANDATA.AI_COMPLETION_TOKENS_USED, span_data.num_tokens
-                )
-                span_data.span.set_data(
-                    SPANDATA.AI_TOTAL_TOKENS_USED, span_data.num_tokens
-                )
+            else:
+                if span_data.num_completion_tokens:
+                    span_data.span.set_data(
+                        SPANDATA.AI_COMPLETION_TOKENS_USED,
+                        span_data.num_completion_tokens,
+                    )
+                if span_data.num_prompt_tokens:
+                    span_data.span.set_data(
+                        SPANDATA.AI_PROMPT_TOKENS_USED, span_data.num_prompt_tokens
+                    )
+                if span_data.num_prompt_tokens and span_data.num_completion_tokens:
+                    span_data.span.set_data(
+                        SPANDATA.AI_TOTAL_TOKENS_USED,
+                        span_data.num_prompt_tokens + span_data.num_completion_tokens,
+                    )
 
             span_data.span.__exit__(None, None, None)
             del self.span_map[run_id]
diff --git a/tox.ini b/tox.ini
index b637a8c35a..48a260e17e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -143,6 +143,7 @@ envlist =
     # Langchain
     {py3.9,3.11,3.12}-langchain-0.1
     {py3.9,3.11,3.12}-langchain-latest
+    {py3.9,3.11,3.12}-langchain-notiktoken
 
     # Loguru
     {py3.6,py3.11,py3.12}-loguru-v{0.5}
@@ -153,11 +154,6 @@ envlist =
     {py3.9,py3.11,py3.12}-openai-latest
     {py3.9,py3.11,py3.12}-openai-notiktoken
 
-    # OpenAI
-    {py3.9,py3.11,py3.12}-openai-v1
-    {py3.9,py3.11,py3.12}-openai-latest
-    {py3.9,py3.11,py3.12}-openai-notiktoken
-
     # OpenTelemetry (OTel)
     {py3.7,py3.9,py3.11,py3.12}-opentelemetry
 
@@ -444,7 +440,10 @@ deps =
     # Langchain
     langchain: openai~=1.0.0
     langchain-0.1: langchain~=0.1.11
+    langchain-0.1: tiktoken~=0.6.0
     langchain-latest: langchain
+    langchain-latest: tiktoken~=0.6.0
+    langchain-notiktoken: langchain
 
     # Loguru
     loguru-v0.5: loguru~=0.5.0

From 0638f43a4414c0bdec15c704021bad0deea5795a Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 14:51:15 -0400
Subject: [PATCH 21/41] Add much more metadata to LLM calls

---
 sentry_sdk/consts.py                 | 43 ++++++++++++++++++++++++++++
 sentry_sdk/integrations/langchain.py | 23 ++++++++-------
 2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index c696f78ebf..c4de674311 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -127,6 +127,49 @@ class SPANDATA:
     Example: true
     """
 
+    AI_TEMPERATURE = "ai.temperature"
+    """
+    For an AI model call, the temperature parameter. Temperature essentially means how random the output will be.
+    Example: 0.5
+    """
+
+    AI_TOP_P = "ai.top_p"
+    """
+    For an AI model call, the top_p parameter. Top_p essentially controls how random the output will be.
+    Example: 0.5
+    """
+
+    AI_TOP_K = "ai.top_k"
+    """
+    For an AI model call, the top_k parameter. Top_k essentially controls how random the output will be.
+    Example: 35
+    """
+
+    AI_FUNCTION_CALL = "ai.function_call"
+    """
+    For an AI model call, the function that was called. This is deprecated for OpenAI, and replaced by tool_calls
+    """
+
+    AI_TOOL_CALLS = "ai.tool_calls"
+    """
+    For an AI model call, the function that was called. This is deprecated for OpenAI, and replaced by tool_calls
+    """
+
+    AI_TOOLS = "ai.tools"
+    """
+    For an AI model call, the functions that are available
+    """
+
+    AI_RESPONSE_FORMAT = "ai.response_format"
+    """
+    For an AI model call, the format of the response
+    """
+
+    AI_LOGIT_BIAS = "ai.response_format"
+    """
+    For an AI model call, the logit bias
+    """
+
     AI_RESPONSES = "ai.responses"
     """
     The responses to an AI model call. Always as a list.
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 49952a3825..67c7f8cfcc 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -47,16 +47,16 @@ def count_tokens(s):
         return 1
 
 
-DATA_FIELDS = [
-    "temperature",
-    "top_p",
-    "top_k",
-    "function_call",
-    "functions",
-    "tools",
-    "response_format",
-    "logit_bias",
-]
+DATA_FIELDS = {
+    "temperature": SPANDATA.AI_TEMPERATURE,
+    "top_p": SPANDATA.AI_TOP_P,
+    "top_k": SPANDATA.AI_TOP_K,
+    "function_call": SPANDATA.AI_FUNCTION_CALL,
+    "tool_calls": SPANDATA.AI_TOOL_CALLS,
+    "tools": SPANDATA.AI_TOOLS,
+    "response_format": SPANDATA.AI_RESPONSE_FORMAT,
+    "logit_bias": SPANDATA.AI_LOGIT_BIAS,
+}
 
 
 class LangchainIntegration(Integration):
@@ -165,6 +165,9 @@ def on_llm_start(
             )
             if should_send_default_pii() and self.include_prompts:
                 set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
+            for k, v in DATA_FIELDS.items():
+                if k in params:
+                    span.set_data(v, params[k])
 
     def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any

From 2005565894f4cd0f4208dc89817aad65185f3503 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 15:11:39 -0400
Subject: [PATCH 22/41] Instrument agents too

---
 sentry_sdk/consts.py                 |  7 ++++
 sentry_sdk/integrations/langchain.py | 63 ++++++++++++++++++++++------
 2 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index c4de674311..f42c969089 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -121,6 +121,12 @@ class SPANDATA:
     Example: gpt-4
     """
 
+    AI_METADATA = "ai.metadata"
+    """
+    Extra metadata passed to an AI pipeline step.
+    Example: {"executed_function": "add_integers"}
+    """
+
     AI_STREAMING = "ai.streaming"
     """
     Whether or not the AI model call's repsonse was streamed back asynchronously
@@ -332,6 +338,7 @@ class OP:
     OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai"
     LANGCHAIN_RUN = "ai.run.langchain"
     LANGCHAIN_TOOL = "ai.tool.langchain"
+    LANGCHAIN_AGENT = "ai.agent.langchain"
     LANGCHAIN_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.langchain"
     QUEUE_SUBMIT_ARQ = "queue.submit.arq"
     QUEUE_TASK_ARQ = "queue.task.arq"
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 67c7f8cfcc..52a710aff4 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -1,6 +1,8 @@
 from collections import OrderedDict
 from functools import wraps
 
+from langchain_core.agents import AgentAction, AgentFinish
+
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
 from sentry_sdk.consts import OP, SPANDATA
@@ -106,7 +108,7 @@ def gc_span_map(self):
 
     def _handle_error(self, run_id, error):
         # type: (UUID, Any) -> None
-        if not run_id or not self.span_map[run_id]:
+        if not run_id or not run_id not in self.span_map:
             return
 
         span_data = self.span_map[run_id]
@@ -155,8 +157,8 @@ def on_llm_start(
         with capture_internal_exceptions():
             if not run_id:
                 return
-            params = kwargs.get("invocation_params", {})
-            params.update(serialized.get("kwargs", {}))
+            all_params = kwargs.get("invocation_params", {})
+            all_params.update(serialized.get("kwargs", {}))
             span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
@@ -166,16 +168,17 @@ def on_llm_start(
             if should_send_default_pii() and self.include_prompts:
                 set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
             for k, v in DATA_FIELDS.items():
-                if k in params:
-                    span.set_data(v, params[k])
+                if k in all_params:
+                    set_data_normalized(span, v, all_params[k])
 
     def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any
         """Run when Chat Model starts running."""
-        if not run_id:
-            return
-
         with capture_internal_exceptions():
+            if not run_id:
+                return
+            all_params = kwargs.get("invocation_params", {})
+            all_params.update(serialized.get("kwargs", {}))
             span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
@@ -191,6 +194,9 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
                         for list_ in messages
                     ],
                 )
+            for k, v in DATA_FIELDS.items():
+                if k in all_params:
+                    set_data_normalized(span, v, all_params[k])
             for list_ in messages:
                 for message in list_:
                     self.span_map[run_id].num_prompt_tokens += count_tokens(
@@ -201,7 +207,7 @@ def on_llm_new_token(self, token, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, str, UUID, Any) -> Any
         """Run on new LLM token. Only available when streaming is enabled."""
         with capture_internal_exceptions():
-            if not run_id or not self.span_map[run_id]:
+            if not run_id or run_id not in self.span_map:
                 return
             span_data = self.span_map[run_id]
             if not span_data:
@@ -272,18 +278,21 @@ def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
         with capture_internal_exceptions():
             if not run_id:
                 return
-            self._create_span(
+            span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
                 op=OP.LANGCHAIN_RUN,
                 description=kwargs.get("name") or "Chain execution",
             )
+            metadata = kwargs.get("metadata")
+            if metadata:
+                set_data_normalized(span, SPANDATA.AI_METADATA, metadata)
 
     def on_chain_end(self, outputs, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Dict[str, Any], UUID, Any) -> Any
         """Run when chain ends running."""
         with capture_internal_exceptions():
-            if not run_id or not self.span_map[run_id]:
+            if not run_id or run_id not in self.span_map:
                 return
 
             span_data = self.span_map[run_id]
@@ -297,6 +306,36 @@ def on_chain_error(self, error, *, run_id, **kwargs):
         """Run when chain errors."""
         self._handle_error(run_id, error)
 
+    def on_agent_action(self, action, *, run_id, **kwargs):
+        # type: (SentryLangchainCallback, AgentAction, UUID, Any) -> Any
+        with capture_internal_exceptions():
+            if not run_id:
+                return
+            span = self._create_span(
+                run_id,
+                kwargs.get("parent_run_id"),
+                op=OP.LANGCHAIN_AGENT,
+                description=action.tool or "AI tool usage",
+            )
+            if action.tool_input and should_send_default_pii() and self.include_prompts:
+                set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, action.tool_input)
+
+    def on_agent_finish(self, finish, *, run_id, **kwargs):
+        # type: (SentryLangchainCallback, AgentFinish, UUID, Any) -> Any
+        with capture_internal_exceptions():
+            if not run_id:
+                return
+
+            span_data = self.span_map[run_id]
+            if not span_data:
+                return
+            if should_send_default_pii() and self.include_prompts:
+                set_data_normalized(
+                    span_data.span, SPANDATA.AI_RESPONSES, finish.return_values.items()
+                )
+            span_data.span.__exit__(None, None, None)
+            del self.span_map[run_id]
+
     def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Dict[str, Any], str, UUID, Any) -> Any
         """Run when tool starts running."""
@@ -318,7 +357,7 @@ def on_tool_end(self, output, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, str, UUID, Any) -> Any
         """Run when tool ends running."""
         with capture_internal_exceptions():
-            if not run_id or not self.span_map[run_id]:
+            if not run_id or run_id not in self.span_map:
                 return
 
             span_data = self.span_map[run_id]

From 518a44f5431b5d8a6c07fd5d1d7d88bf52e811ae Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 15:32:18 -0400
Subject: [PATCH 23/41] Send tags as well

---
 sentry_sdk/consts.py                 | 6 ++++++
 sentry_sdk/integrations/langchain.py | 9 ++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index f42c969089..b10d4ed3ec 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -127,6 +127,12 @@ class SPANDATA:
     Example: {"executed_function": "add_integers"}
     """
 
+    AI_TAGS = "ai.tags"
+    """
+    Tags that describe an AI pipeline step.
+    Example: {"executed_function": "add_integers"}
+    """
+
     AI_STREAMING = "ai.streaming"
     """
     Whether or not the AI model call's repsonse was streamed back asynchronously
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 52a710aff4..ae931677c9 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -58,6 +58,7 @@ def count_tokens(s):
     "tools": SPANDATA.AI_TOOLS,
     "response_format": SPANDATA.AI_RESPONSE_FORMAT,
     "logit_bias": SPANDATA.AI_LOGIT_BIAS,
+    "tags": SPANDATA.AI_TAGS,
 }
 
 
@@ -346,12 +347,18 @@ def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
                 run_id,
                 kwargs.get("parent_run_id"),
                 op=OP.LANGCHAIN_TOOL,
-                description=kwargs.get("name") or "AI tool usage",
+                description=serialized.get("name")
+                or kwargs.get("name")
+                or "AI tool usage",
             )
             if should_send_default_pii() and self.include_prompts:
                 set_data_normalized(
                     span, SPANDATA.AI_INPUT_MESSAGES, kwargs.get("inputs", [input_str])
                 )
+                if kwargs.get("metadata"):
+                    set_data_normalized(
+                        span, SPANDATA.AI_METADATA, kwargs.get("metadata")
+                    )
 
     def on_tool_end(self, output, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, str, UUID, Any) -> Any

From 67e4d95b9f1b91e1fa842c6550730d2c6600b1c4 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 15:45:37 -0400
Subject: [PATCH 24/41] Send model ids for langchain inference

---
 sentry_sdk/integrations/langchain.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index ae931677c9..f7efc567d8 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -186,7 +186,13 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
                 op=OP.LANGCHAIN_CHAT_COMPLETIONS_CREATE,
                 description=kwargs.get("name") or "Langchain Chat Model",
             )
-            # TODO model ids
+            model = all_params.get(
+                "model", all_params.get("model_name", all_params.get("model_id"))
+            )
+            if not model and "anthropic" in all_params.get("_type"):
+                model = "claude-2"
+            if model:
+                span.set_data(SPANDATA.AI_MODEL_ID, model)
             if should_send_default_pii() and self.include_prompts:
                 span.set_data(
                     SPANDATA.AI_INPUT_MESSAGES,

From 2b1db20824d1a3f050ef11a478e67cd71ade9d02 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 15:49:36 -0400
Subject: [PATCH 25/41] import gate

---
 sentry_sdk/integrations/langchain.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index f7efc567d8..f0d701a24c 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -1,8 +1,6 @@
 from collections import OrderedDict
 from functools import wraps
 
-from langchain_core.agents import AgentAction, AgentFinish
-
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
 from sentry_sdk.consts import OP, SPANDATA
@@ -23,6 +21,7 @@
         manager,
         BaseCallbackHandler,
     )
+    from langchain_core.agents import AgentAction, AgentFinish
 except ImportError:
     raise DidNotEnable("langchain not installed")
 

From 175297332f76a8397e401f55649e802892ecb216 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 15:57:02 -0400
Subject: [PATCH 26/41] fix the bug

---
 sentry_sdk/integrations/langchain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index f0d701a24c..d57881f453 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -108,7 +108,7 @@ def gc_span_map(self):
 
     def _handle_error(self, run_id, error):
         # type: (UUID, Any) -> None
-        if not run_id or not run_id not in self.span_map:
+        if not run_id or run_id not in self.span_map:
             return
 
         span_data = self.span_map[run_id]

From c29600aa7945dfcc2c799311e488ced17f9f1284 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 16:03:46 -0400
Subject: [PATCH 27/41] Remove empty dimensions from AI fields

---
 sentry_sdk/integrations/_ai_common.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sentry_sdk/integrations/_ai_common.py b/sentry_sdk/integrations/_ai_common.py
index 4a416cfa46..42d46304e4 100644
--- a/sentry_sdk/integrations/_ai_common.py
+++ b/sentry_sdk/integrations/_ai_common.py
@@ -18,6 +18,8 @@ def _normalize_data(data):
             logger.warning("Could not convert pydantic data to JSON: %s", e)
             return data
     if isinstance(data, list):
+        if len(data) == 1:
+            return _normalize_data(data[0])  # remove empty dimensions
         return list(_normalize_data(x) for x in data)
     if isinstance(data, dict):
         return {k: _normalize_data(v) for (k, v) in data.items()}

From 1a8623c5e6a63e766bc5c96660bcd27aee6749c3 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 28 Mar 2024 17:49:48 -0400
Subject: [PATCH 28/41] Fix tests for removed dimensions

---
 sentry_sdk/integrations/langchain.py           |  3 ++-
 tests/integrations/langchain/test_langchain.py | 10 ++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index d57881f453..78e16ffd42 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -193,7 +193,8 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
             if model:
                 span.set_data(SPANDATA.AI_MODEL_ID, model)
             if should_send_default_pii() and self.include_prompts:
-                span.set_data(
+                set_data_normalized(
+                    span,
                     SPANDATA.AI_INPUT_MESSAGES,
                     [
                         [self._normalize_langchain_message(x) for x in list_]
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 23e5e4d6cb..14e01a9a15 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -147,16 +147,16 @@ def test_langchain_agent(
     if send_default_pii and include_prompts:
         assert (
             "You are very powerful"
-            in chat_spans[0]["data"]["ai.input_messages"][0][0]["content"]
+            in chat_spans[0]["data"]["ai.input_messages"][0]["content"]
         )
-        assert "5" in chat_spans[0]["data"]["ai.responses"][0][0]
+        assert "5" in chat_spans[0]["data"]["ai.responses"][0]
         assert "word" in tool_exec_span["data"]["ai.input_messages"]
         assert "5" in tool_exec_span["data"]["ai.responses"][0]
         assert (
             "You are very powerful"
-            in chat_spans[1]["data"]["ai.input_messages"][0][0]["content"]
+            in chat_spans[1]["data"]["ai.input_messages"][0]["content"]
         )
-        assert "5" in chat_spans[1]["data"]["ai.responses"][0][0]
+        assert "5" in chat_spans[1]["data"]["ai.responses"]
     else:
         assert "ai.input_messages" not in chat_spans[0].get("data", {})
         assert "ai.responses" not in chat_spans[0].get("data", {})
@@ -198,7 +198,5 @@ def test_langchain_error(sentry_init, capture_events):
     with start_transaction(), pytest.raises(Exception):
         list(agent_executor.stream({"input": "How many letters in the word eudca"}))
 
-    for event in events:
-        print(event)
     error = events[0]
     assert error["level"] == "error"

From 51ea2c278474e5dea63747aa8bc2696169c8b63d Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Mon, 1 Apr 2024 12:14:48 -0400
Subject: [PATCH 29/41] Fix openai tests

---
 tests/integrations/openai/test_openai.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 91634b4b37..9c35795a87 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -68,8 +68,8 @@ def test_nonstreaming_chat_completion(
     assert span["op"] == "ai.chat_completions.create.openai"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["ai.input_messages"][0]["content"]
-        assert "the model response" in span["data"]["ai.responses"][0]["content"]
+        assert "hello" in span["data"]["ai.input_messages"]["content"]
+        assert "the model response" in span["data"]["ai.responses"]["content"]
     else:
         assert "ai.input_messages" not in span["data"]
         assert "ai.responses" not in span["data"]
@@ -147,8 +147,8 @@ def test_streaming_chat_completion(
     assert span["op"] == "ai.chat_completions.create.openai"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["ai.input_messages"][0]["content"]
-        assert "hello world" in span["data"]["ai.responses"][0]
+        assert "hello" in span["data"]["ai.input_messages"]["content"]
+        assert "hello world" in span["data"]["ai.responses"]
     else:
         assert "ai.input_messages" not in span["data"]
         assert "ai.responses" not in span["data"]
@@ -219,7 +219,7 @@ def test_embeddings_create(
     span = tx["spans"][0]
     assert span["op"] == "ai.embeddings.create.openai"
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["ai.input_messages"][0]
+        assert "hello" in span["data"]["ai.input_messages"]
     else:
         assert "ai.input_messages" not in span["data"]
 

From e0f270a93be1fd7b19bc4888f9e36676ce8ee4d5 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 3 Apr 2024 11:12:45 -0400
Subject: [PATCH 30/41] Record metrics for AI tokens used

---
 sentry_sdk/integrations/_ai_common.py | 27 +++++++++++++++++++++-
 sentry_sdk/integrations/langchain.py  | 33 ++++++++-------------------
 sentry_sdk/integrations/openai.py     | 22 ++++++------------
 3 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/sentry_sdk/integrations/_ai_common.py b/sentry_sdk/integrations/_ai_common.py
index 42d46304e4..53fab37e45 100644
--- a/sentry_sdk/integrations/_ai_common.py
+++ b/sentry_sdk/integrations/_ai_common.py
@@ -1,7 +1,9 @@
+from sentry_sdk import metrics
 from sentry_sdk._types import TYPE_CHECKING
+from sentry_sdk.consts import SPANDATA
 
 if TYPE_CHECKING:
-    from typing import Any
+    from typing import Any, Optional
 
 from sentry_sdk.tracing import Span
 from sentry_sdk.utils import logger
@@ -30,3 +32,26 @@ def set_data_normalized(span, key, value):
     # type: (Span, str, Any) -> None
     normalized = _normalize_data(value)
     span.set_data(key, normalized)
+
+
+def record_token_usage(
+    span, prompt_tokens=None, completion_tokens=None, total_tokens=None
+):
+    # type: (Span, Optional[int], Optional[int], Optional[int]) -> None
+    if prompt_tokens is not None:
+        span.set_data(SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens)
+        metrics.incr(SPANDATA.AI_PROMPT_TOKENS_USED, value=prompt_tokens, unit="tokens")
+    if completion_tokens is not None:
+        span.set_data(SPANDATA.AI_COMPLETION_TOKENS_USED, completion_tokens)
+        metrics.incr(
+            SPANDATA.AI_COMPLETION_TOKENS_USED, value=completion_tokens, unit="tokens"
+        )
+    if (
+        total_tokens is None
+        and prompt_tokens is not None
+        and completion_tokens is not None
+    ):
+        total_tokens = prompt_tokens + completion_tokens
+    if total_tokens is not None:
+        span.set_data(SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens)
+        metrics.incr(SPANDATA.AI_TOTAL_TOKENS_USED, value=total_tokens, unit="tokens")
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 78e16ffd42..669cc56e87 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -4,7 +4,7 @@
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
 from sentry_sdk.consts import OP, SPANDATA
-from sentry_sdk.integrations._ai_common import set_data_normalized
+from sentry_sdk.integrations._ai_common import set_data_normalized, record_token_usage
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.tracing import Span
 
@@ -244,31 +244,18 @@ def on_llm_end(self, response, *, run_id, **kwargs):
                 )
 
             if token_usage:
-                span_data.span.set_data(
-                    SPANDATA.AI_PROMPT_TOKENS_USED, token_usage.get("prompt_tokens")
-                )
-                span_data.span.set_data(
-                    SPANDATA.AI_COMPLETION_TOKENS_USED,
+                record_token_usage(
+                    span_data.span,
+                    token_usage.get("prompt_tokens"),
                     token_usage.get("completion_tokens"),
-                )
-                span_data.span.set_data(
-                    SPANDATA.AI_TOTAL_TOKENS_USED, token_usage.get("total_tokens")
+                    token_usage.get("total_tokens"),
                 )
             else:
-                if span_data.num_completion_tokens:
-                    span_data.span.set_data(
-                        SPANDATA.AI_COMPLETION_TOKENS_USED,
-                        span_data.num_completion_tokens,
-                    )
-                if span_data.num_prompt_tokens:
-                    span_data.span.set_data(
-                        SPANDATA.AI_PROMPT_TOKENS_USED, span_data.num_prompt_tokens
-                    )
-                if span_data.num_prompt_tokens and span_data.num_completion_tokens:
-                    span_data.span.set_data(
-                        SPANDATA.AI_TOTAL_TOKENS_USED,
-                        span_data.num_prompt_tokens + span_data.num_completion_tokens,
-                    )
+                record_token_usage(
+                    span_data.span,
+                    span_data.num_prompt_tokens,
+                    span_data.num_completion_tokens,
+                )
 
             span_data.span.__exit__(None, None, None)
             del self.span_map[run_id]
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 81f0d0c332..e3d6d8e39d 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -3,7 +3,7 @@
 from sentry_sdk import consts
 from sentry_sdk._types import TYPE_CHECKING
 from sentry_sdk.consts import SPANDATA
-from sentry_sdk.integrations._ai_common import set_data_normalized
+from sentry_sdk.integrations._ai_common import set_data_normalized, record_token_usage
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, Iterator
@@ -109,15 +109,11 @@ def _calculate_chat_completion_usage(
                 if hasattr(choice, "message"):
                     completion_tokens += count_tokens(choice.message)
 
-    if total_tokens == 0:
-        total_tokens = prompt_tokens + completion_tokens
-
-    if completion_tokens != 0:
-        set_data_normalized(span, SPANDATA.AI_COMPLETION_TOKENS_USED, completion_tokens)
-    if prompt_tokens != 0:
-        set_data_normalized(span, SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens)
-    if total_tokens != 0:
-        set_data_normalized(span, SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens)
+    if prompt_tokens == 0:
+        prompt_tokens = None
+    if completion_tokens == 0:
+        completion_tokens = None
+    record_token_usage(span, prompt_tokens, completion_tokens, total_tokens)
 
 
 def _wrap_chat_completion_create(f):
@@ -262,11 +258,7 @@ def new_embeddings_create(*args, **kwargs):
             if prompt_tokens == 0:
                 prompt_tokens = count_tokens(kwargs["input"] or "")
 
-            if total_tokens == 0:
-                total_tokens = prompt_tokens
-
-            set_data_normalized(span, SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens)
-            set_data_normalized(span, SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens)
+            record_token_usage(span, prompt_tokens, None, total_tokens or prompt_tokens)
 
             return response
 

From f270c87de3ef402f37f5933070947db92d5336bf Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 3 Apr 2024 11:58:40 -0400
Subject: [PATCH 31/41] Fix tests and linting

---
 sentry_sdk/integrations/langchain.py           | 2 +-
 sentry_sdk/integrations/openai.py              | 6 +++---
 tests/integrations/langchain/test_langchain.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 669cc56e87..651a3d7068 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -364,7 +364,7 @@ def on_tool_end(self, output, *, run_id, **kwargs):
             if not span_data:
                 return
             if should_send_default_pii() and self.include_prompts:
-                set_data_normalized(span_data.span, SPANDATA.AI_RESPONSES, [output])
+                set_data_normalized(span_data.span, SPANDATA.AI_RESPONSES, output)
             span_data.span.__exit__(None, None, None)
             del self.span_map[run_id]
 
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index e3d6d8e39d..462788ce19 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -78,9 +78,9 @@ def _calculate_chat_completion_usage(
     messages, response, span, streaming_message_responses=None
 ):
     # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]]) -> None
-    completion_tokens = 0
-    prompt_tokens = 0
-    total_tokens = 0
+    completion_tokens = 0  # type: Optional[int]
+    prompt_tokens = 0  # type: Optional[int]
+    total_tokens = 0  # type: Optional[int]
     if hasattr(response, "usage"):
         if hasattr(response.usage, "completion_tokens") and isinstance(
             response.usage.completion_tokens, int
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 14e01a9a15..b3ca452267 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -149,9 +149,9 @@ def test_langchain_agent(
             "You are very powerful"
             in chat_spans[0]["data"]["ai.input_messages"][0]["content"]
         )
-        assert "5" in chat_spans[0]["data"]["ai.responses"][0]
+        assert "5" in chat_spans[0]["data"]["ai.responses"]
         assert "word" in tool_exec_span["data"]["ai.input_messages"]
-        assert "5" in tool_exec_span["data"]["ai.responses"][0]
+        assert 5 == int(tool_exec_span["data"]["ai.responses"])
         assert (
             "You are very powerful"
             in chat_spans[1]["data"]["ai.input_messages"][0]["content"]

From 58588cfb6c7dad97af0c8581d03cc9786047d1f1 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 3 Apr 2024 12:31:04 -0400
Subject: [PATCH 32/41] Fix another test

---
 sentry_sdk/integrations/openai.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 462788ce19..b6ad215d09 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -113,6 +113,8 @@ def _calculate_chat_completion_usage(
         prompt_tokens = None
     if completion_tokens == 0:
         completion_tokens = None
+    if total_tokens == 0:
+        total_tokens = None
     record_token_usage(span, prompt_tokens, completion_tokens, total_tokens)
 
 

From 64962e04ec9393354278b67dd16129c60a69e46d Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Wed, 10 Apr 2024 16:25:16 -0400
Subject: [PATCH 33/41] Add a new opcode for top level langchain runs

---
 sentry_sdk/consts.py                 | 1 +
 sentry_sdk/integrations/langchain.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index b10d4ed3ec..b2d5476e0a 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -342,6 +342,7 @@ class OP:
     MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send"
     OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai"
     OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai"
+    LANGCHAIN_PIPELINE = "ai.pipeline.langchain"
     LANGCHAIN_RUN = "ai.run.langchain"
     LANGCHAIN_TOOL = "ai.tool.langchain"
     LANGCHAIN_AGENT = "ai.agent.langchain"
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 651a3d7068..798be2ed06 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -275,7 +275,11 @@ def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
             span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
-                op=OP.LANGCHAIN_RUN,
+                op=(
+                    OP.LANGCHAIN_RUN
+                    if kwargs.get("parent_run_id") is not None
+                    else OP.LANGCHAIN_PIPELINE
+                ),
                 description=kwargs.get("name") or "Chain execution",
             )
             metadata = kwargs.get("metadata")

From f65bd46a3fa6dfa7bc41007087b08c110f04480d Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 11 Apr 2024 20:44:12 -0400
Subject: [PATCH 34/41] Switch to a metric for total tokens used

---
 sentry_sdk/consts.py                     | 18 ------------------
 sentry_sdk/integrations/_ai_common.py    | 13 +++----------
 sentry_sdk/integrations/langchain.py     |  3 +++
 sentry_sdk/integrations/openai.py        |  5 ++++-
 tests/integrations/openai/test_openai.py | 17 ++++++++---------
 5 files changed, 18 insertions(+), 38 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index b2d5476e0a..19595ed7fa 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -97,24 +97,6 @@ class SPANDATA:
     Example: [{"role": "user", "message": "hello"}]
     """
 
-    AI_COMPLETION_TOKENS_USED = "ai.completion_tokens.used"
-    """
-    The number of tokens used to respond to an AI model request
-    Example: 10
-    """
-
-    AI_PROMPT_TOKENS_USED = "ai.prompt_tokens.used"
-    """
-    The number of tokens used to process the input text to an AI model request
-    Example: 20
-    """
-
-    AI_TOTAL_TOKENS_USED = "ai.total_tokens.used"
-    """
-    The number of tokens used in total to process an AI model request
-    Example: 30
-    """
-
     AI_MODEL_ID = "ai.model_id"
     """
     The unique descriptor of the model being execugted
diff --git a/sentry_sdk/integrations/_ai_common.py b/sentry_sdk/integrations/_ai_common.py
index 53fab37e45..5b25d1fc69 100644
--- a/sentry_sdk/integrations/_ai_common.py
+++ b/sentry_sdk/integrations/_ai_common.py
@@ -1,6 +1,4 @@
-from sentry_sdk import metrics
 from sentry_sdk._types import TYPE_CHECKING
-from sentry_sdk.consts import SPANDATA
 
 if TYPE_CHECKING:
     from typing import Any, Optional
@@ -39,13 +37,9 @@ def record_token_usage(
 ):
     # type: (Span, Optional[int], Optional[int], Optional[int]) -> None
     if prompt_tokens is not None:
-        span.set_data(SPANDATA.AI_PROMPT_TOKENS_USED, prompt_tokens)
-        metrics.incr(SPANDATA.AI_PROMPT_TOKENS_USED, value=prompt_tokens, unit="tokens")
+        span.set_measurement("ai_prompt_tokens_used", value=prompt_tokens)
     if completion_tokens is not None:
-        span.set_data(SPANDATA.AI_COMPLETION_TOKENS_USED, completion_tokens)
-        metrics.incr(
-            SPANDATA.AI_COMPLETION_TOKENS_USED, value=completion_tokens, unit="tokens"
-        )
+        span.set_measurement("ai_completion_tokens_used", value=completion_tokens)
     if (
         total_tokens is None
         and prompt_tokens is not None
@@ -53,5 +47,4 @@ def record_token_usage(
     ):
         total_tokens = prompt_tokens + completion_tokens
     if total_tokens is not None:
-        span.set_data(SPANDATA.AI_TOTAL_TOKENS_USED, total_tokens)
-        metrics.incr(SPANDATA.AI_TOTAL_TOKENS_USED, value=total_tokens, unit="tokens")
+        span.set_measurement("ai_total_tokens_used", total_tokens)
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 798be2ed06..345b172d17 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -127,6 +127,9 @@ def _normalize_langchain_message(self, message):
     def _create_span(self, run_id, parent_id, **kwargs):
         # type: (SentryLangchainCallback, UUID, Optional[Any], Any) -> Span
 
+        if "origin" not in kwargs:
+            kwargs["origin"] = "auto.ai.langchain"
+
         span = None  # type: Optional[Span]
         if parent_id:
             parent_span = self.span_map[parent_id]  # type: Optional[WatchedSpan]
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index b6ad215d09..ffb8a391fa 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -140,7 +140,9 @@ def new_chat_completion(*args, **kwargs):
         streaming = kwargs.get("stream")
 
         span = sentry_sdk.start_span(
-            op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE, description="Chat Completion"
+            op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE,
+            origin="auto.ai.openai",
+            description="Chat Completion",
         )
         span.__enter__()
         try:
@@ -223,6 +225,7 @@ def new_embeddings_create(*args, **kwargs):
         # type: (*Any, **Any) -> Any
         with sentry_sdk.start_span(
             op=consts.OP.OPENAI_EMBEDDINGS_CREATE,
+            origin="auto.ai.openai",
             description="OpenAI Embedding Creation",
         ) as span:
             integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 9c35795a87..f14ae82333 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -7,7 +7,6 @@
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
 
 from sentry_sdk import start_transaction
-from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations.openai import OpenAIIntegration
 
 from unittest import mock  # python 3.3 and above
@@ -74,9 +73,9 @@ def test_nonstreaming_chat_completion(
         assert "ai.input_messages" not in span["data"]
         assert "ai.responses" not in span["data"]
 
-    assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 10
-    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
-    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
+    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
+    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
+    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
 
 
 # noinspection PyTypeChecker
@@ -156,9 +155,9 @@ def test_streaming_chat_completion(
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
 
-        assert span["data"][SPANDATA.AI_COMPLETION_TOKENS_USED] == 2
-        assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 1
-        assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 3
+        assert span["measurements"]["ai_completion_tokens_used"]["value"] == 2
+        assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 1
+        assert span["measurements"]["ai_total_tokens_used"]["value"] == 3
     except ImportError:
         pass  # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
 
@@ -223,5 +222,5 @@ def test_embeddings_create(
     else:
         assert "ai.input_messages" not in span["data"]
 
-    assert span["data"][SPANDATA.AI_PROMPT_TOKENS_USED] == 20
-    assert span["data"][SPANDATA.AI_TOTAL_TOKENS_USED] == 30
+    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
+    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30

From e187911aee4820ac3bdf29fa09feef7089673d7d Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 18 Apr 2024 14:06:19 +0200
Subject: [PATCH 35/41] Fixed langchain test matrix

---
 tox.ini | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tox.ini b/tox.ini
index 48a260e17e..f7c84d818b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -141,9 +141,9 @@ envlist =
     {py3.6,py3.11,py3.12}-huey-latest
 
     # Langchain
-    {py3.9,3.11,3.12}-langchain-0.1
-    {py3.9,3.11,3.12}-langchain-latest
-    {py3.9,3.11,3.12}-langchain-notiktoken
+    {py3.9,py3.11,py3.12}-langchain-0.1
+    {py3.9,py3.11,py3.12}-langchain-latest
+    {py3.9,py3.11,py3.12}-langchain-notiktoken
 
     # Loguru
     {py3.6,py3.11,py3.12}-loguru-v{0.5}

From 27db63ba502662fdf629e028bded1ff3f51dc514 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Fri, 19 Apr 2024 15:41:06 -0400
Subject: [PATCH 36/41] Avoid double counting tokens with explicit blocklist

---
 sentry_sdk/integrations/langchain.py          | 110 +++++++++++-------
 .../integrations/langchain/test_langchain.py  |  27 ++++-
 2 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 345b172d17..0cebe1ec17 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -60,6 +60,10 @@ def count_tokens(s):
     "tags": SPANDATA.AI_TAGS,
 }
 
+# To avoid double collecting tokens, we do *not* measure
+# token counts for models for which we have an explicit integration
+NO_COLLECT_TOKEN_MODELS = ["openai-chat"]
+
 
 class LangchainIntegration(Integration):
     identifier = "langchain"
@@ -82,6 +86,8 @@ class WatchedSpan:
     span = None  # type: Span
     num_completion_tokens = 0  # type: int
     num_prompt_tokens = 0  # type: int
+    no_collect_tokens = False  # type: bool
+    children = []  # type: List[WatchedSpan]
 
     def __init__(self, span):
         # type: (Span) -> None
@@ -104,7 +110,8 @@ def gc_span_map(self):
         # type: () -> None
 
         while len(self.span_map) > self.max_span_map_size:
-            self.span_map.popitem(last=False)[1].span.__exit__(None, None, None)
+            run_id, watched_span = self.span_map.popitem(last=False)
+            self._exit_span(watched_span, run_id)
 
     def _handle_error(self, run_id, error):
         # type: (UUID, Any) -> None
@@ -125,24 +132,30 @@ def _normalize_langchain_message(self, message):
         return parsed
 
     def _create_span(self, run_id, parent_id, **kwargs):
-        # type: (SentryLangchainCallback, UUID, Optional[Any], Any) -> Span
+        # type: (SentryLangchainCallback, UUID, Optional[Any], Any) -> WatchedSpan
 
         if "origin" not in kwargs:
             kwargs["origin"] = "auto.ai.langchain"
 
-        span = None  # type: Optional[Span]
+        watched_span = None  # type: Optional[WatchedSpan]
         if parent_id:
             parent_span = self.span_map[parent_id]  # type: Optional[WatchedSpan]
             if parent_span:
-                span = parent_span.span.start_child(**kwargs)
-        if span is None:
-            span = sentry_sdk.start_span(**kwargs)
+                watched_span = WatchedSpan(parent_span.span.start_child(**kwargs))
+                parent_span.children.append(watched_span)
+        if watched_span is None:
+            watched_span = WatchedSpan(sentry_sdk.start_span(**kwargs))
 
-        span.__enter__()
-        watched_span = WatchedSpan(span)
+        watched_span.span.__enter__()
         self.span_map[run_id] = watched_span
         self.gc_span_map()
-        return span
+        return watched_span
+
+    def _exit_span(self, span_data, run_id):
+        # type: (SentryLangchainCallback, WatchedSpan, UUID) -> None
+
+        span_data.span.__exit__(None, None, None)
+        del self.span_map[run_id]
 
     def on_llm_start(
         self,
@@ -162,12 +175,13 @@ def on_llm_start(
                 return
             all_params = kwargs.get("invocation_params", {})
             all_params.update(serialized.get("kwargs", {}))
-            span = self._create_span(
+            watched_span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
                 op=OP.LANGCHAIN_RUN,
                 description=kwargs.get("name") or "Langchain LLM call",
             )
+            span = watched_span.span
             if should_send_default_pii() and self.include_prompts:
                 set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
             for k, v in DATA_FIELDS.items():
@@ -182,15 +196,19 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
                 return
             all_params = kwargs.get("invocation_params", {})
             all_params.update(serialized.get("kwargs", {}))
-            span = self._create_span(
+            watched_span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
                 op=OP.LANGCHAIN_CHAT_COMPLETIONS_CREATE,
                 description=kwargs.get("name") or "Langchain Chat Model",
             )
+            span = watched_span.span
             model = all_params.get(
                 "model", all_params.get("model_name", all_params.get("model_id"))
             )
+            watched_span.no_collect_tokens = any(
+                x in all_params.get("_type", "") for x in NO_COLLECT_TOKEN_MODELS
+            )
             if not model and "anthropic" in all_params.get("_type"):
                 model = "claude-2"
             if model:
@@ -207,11 +225,12 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
             for k, v in DATA_FIELDS.items():
                 if k in all_params:
                     set_data_normalized(span, v, all_params[k])
-            for list_ in messages:
-                for message in list_:
-                    self.span_map[run_id].num_prompt_tokens += count_tokens(
-                        message.content
-                    ) + count_tokens(message.type)
+            if not watched_span.no_collect_tokens:
+                for list_ in messages:
+                    for message in list_:
+                        self.span_map[run_id].num_prompt_tokens += count_tokens(
+                            message.content
+                        ) + count_tokens(message.type)
 
     def on_llm_new_token(self, token, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, str, UUID, Any) -> Any
@@ -220,7 +239,7 @@ def on_llm_new_token(self, token, *, run_id, **kwargs):
             if not run_id or run_id not in self.span_map:
                 return
             span_data = self.span_map[run_id]
-            if not span_data:
+            if not span_data or span_data.no_collect_tokens:
                 return
             span_data.num_completion_tokens += count_tokens(token)
 
@@ -246,22 +265,22 @@ def on_llm_end(self, response, *, run_id, **kwargs):
                     [[x.text for x in list_] for list_ in response.generations],
                 )
 
-            if token_usage:
-                record_token_usage(
-                    span_data.span,
-                    token_usage.get("prompt_tokens"),
-                    token_usage.get("completion_tokens"),
-                    token_usage.get("total_tokens"),
-                )
-            else:
-                record_token_usage(
-                    span_data.span,
-                    span_data.num_prompt_tokens,
-                    span_data.num_completion_tokens,
-                )
+            if not span_data.no_collect_tokens:
+                if token_usage:
+                    record_token_usage(
+                        span_data.span,
+                        token_usage.get("prompt_tokens"),
+                        token_usage.get("completion_tokens"),
+                        token_usage.get("total_tokens"),
+                    )
+                else:
+                    record_token_usage(
+                        span_data.span,
+                        span_data.num_prompt_tokens,
+                        span_data.num_completion_tokens,
+                    )
 
-            span_data.span.__exit__(None, None, None)
-            del self.span_map[run_id]
+            self._exit_span(span_data, run_id)
 
     def on_llm_error(self, error, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
@@ -275,7 +294,7 @@ def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
         with capture_internal_exceptions():
             if not run_id:
                 return
-            span = self._create_span(
+            watched_span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
                 op=(
@@ -287,7 +306,7 @@ def on_chain_start(self, serialized, inputs, *, run_id, **kwargs):
             )
             metadata = kwargs.get("metadata")
             if metadata:
-                set_data_normalized(span, SPANDATA.AI_METADATA, metadata)
+                set_data_normalized(watched_span.span, SPANDATA.AI_METADATA, metadata)
 
     def on_chain_end(self, outputs, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Dict[str, Any], UUID, Any) -> Any
@@ -299,8 +318,7 @@ def on_chain_end(self, outputs, *, run_id, **kwargs):
             span_data = self.span_map[run_id]
             if not span_data:
                 return
-            span_data.span.__exit__(None, None, None)
-            del self.span_map[run_id]
+            self._exit_span(span_data, run_id)
 
     def on_chain_error(self, error, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
@@ -312,14 +330,16 @@ def on_agent_action(self, action, *, run_id, **kwargs):
         with capture_internal_exceptions():
             if not run_id:
                 return
-            span = self._create_span(
+            watched_span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
                 op=OP.LANGCHAIN_AGENT,
                 description=action.tool or "AI tool usage",
             )
             if action.tool_input and should_send_default_pii() and self.include_prompts:
-                set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, action.tool_input)
+                set_data_normalized(
+                    watched_span.span, SPANDATA.AI_INPUT_MESSAGES, action.tool_input
+                )
 
     def on_agent_finish(self, finish, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, AgentFinish, UUID, Any) -> Any
@@ -334,8 +354,7 @@ def on_agent_finish(self, finish, *, run_id, **kwargs):
                 set_data_normalized(
                     span_data.span, SPANDATA.AI_RESPONSES, finish.return_values.items()
                 )
-            span_data.span.__exit__(None, None, None)
-            del self.span_map[run_id]
+            self._exit_span(span_data, run_id)
 
     def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
         # type: (SentryLangchainCallback, Dict[str, Any], str, UUID, Any) -> Any
@@ -343,7 +362,7 @@ def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
         with capture_internal_exceptions():
             if not run_id:
                 return
-            span = self._create_span(
+            watched_span = self._create_span(
                 run_id,
                 kwargs.get("parent_run_id"),
                 op=OP.LANGCHAIN_TOOL,
@@ -353,11 +372,13 @@ def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
             )
             if should_send_default_pii() and self.include_prompts:
                 set_data_normalized(
-                    span, SPANDATA.AI_INPUT_MESSAGES, kwargs.get("inputs", [input_str])
+                    watched_span.span,
+                    SPANDATA.AI_INPUT_MESSAGES,
+                    kwargs.get("inputs", [input_str]),
                 )
                 if kwargs.get("metadata"):
                     set_data_normalized(
-                        span, SPANDATA.AI_METADATA, kwargs.get("metadata")
+                        watched_span.span, SPANDATA.AI_METADATA, kwargs.get("metadata")
                     )
 
     def on_tool_end(self, output, *, run_id, **kwargs):
@@ -372,8 +393,7 @@ def on_tool_end(self, output, *, run_id, **kwargs):
                 return
             if should_send_default_pii() and self.include_prompts:
                 set_data_normalized(span_data.span, SPANDATA.AI_RESPONSES, output)
-            span_data.span.__exit__(None, None, None)
-            del self.span_map[run_id]
+            self._exit_span(span_data, run_id)
 
     def on_tool_error(self, error, *args, run_id, **kwargs):
         # type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index b3ca452267..6498cefbaf 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -20,6 +20,7 @@ def get_word_length(word: str) -> int:
 
 
 global stream_result_mock  # type: Mock
+global llm_type  # type: str
 
 
 class MockOpenAI(ChatOpenAI):
@@ -33,14 +34,26 @@ def _stream(
         for x in stream_result_mock():
             yield x
 
+    @property
+    def _llm_type(self) -> str:
+        return llm_type
+
 
 @pytest.mark.parametrize(
-    "send_default_pii, include_prompts",
-    [(True, True), (True, False), (False, True), (False, False)],
+    "send_default_pii, include_prompts, use_unknown_llm_type",
+    [
+        (True, True, False),
+        (True, False, False),
+        (False, True, False),
+        (False, False, True),
+    ],
 )
 def test_langchain_agent(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init, capture_events, send_default_pii, include_prompts, use_unknown_llm_type
 ):
+    global llm_type
+    llm_type = "acme-llm" if use_unknown_llm_type else "openai-chat"
+
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
@@ -144,6 +157,14 @@ def test_langchain_agent(
     # We can't guarantee anything about the "shape" of the langchain execution graph
     assert len(list(x for x in tx["spans"] if x["op"] == "ai.run.langchain")) > 0
 
+    if use_unknown_llm_type:
+        assert "ai_prompt_tokens_used" in chat_spans[0]["measurements"]
+        assert "ai_total_tokens_used" in chat_spans[0]["measurements"]
+    else:
+        # important: to avoid double counting, we do *not* measure
+        # tokens used if we have an explicit integration (e.g. OpenAI)
+        assert "measurements" not in chat_spans[0]
+
     if send_default_pii and include_prompts:
         assert (
             "You are very powerful"

From 55606488b674606997a45b9eefa1efc9b84eee1b Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Mon, 22 Apr 2024 16:33:03 -0400
Subject: [PATCH 37/41] Add preliminary AI analytics SDK

---
 sentry_sdk/ai_analytics.py            | 78 +++++++++++++++++++++++++++
 sentry_sdk/integrations/_ai_common.py | 20 +------
 sentry_sdk/integrations/langchain.py  | 15 ++++--
 sentry_sdk/integrations/openai.py     |  5 +-
 4 files changed, 92 insertions(+), 26 deletions(-)
 create mode 100644 sentry_sdk/ai_analytics.py

diff --git a/sentry_sdk/ai_analytics.py b/sentry_sdk/ai_analytics.py
new file mode 100644
index 0000000000..ebdbc56c54
--- /dev/null
+++ b/sentry_sdk/ai_analytics.py
@@ -0,0 +1,78 @@
+from functools import wraps
+
+from sentry_sdk import start_span
+from sentry_sdk.tracing import Span
+from sentry_sdk.utils import ContextVar
+from sentry_sdk._types import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Optional, Callable, Any
+
+_ai_pipeline_name = ContextVar("ai_pipeline_name", default=None)
+
+
+def set_ai_pipeline_name(name):
+    # type: (Optional[str]) -> None
+    _ai_pipeline_name.set(name)
+
+
+def get_ai_pipeline_name():
+    # type: () -> Optional[str]
+    return _ai_pipeline_name.get()
+
+
+def ai_pipeline(description, op="ai.pipeline", **span_kwargs):
+    # type: (str, str, Any) -> Callable[..., Any]
+    def decorator(f):
+        # type: (Callable[..., Any]) -> Callable[..., Any]
+        @wraps(f)
+        def wrapped(*args, **kwargs):
+            # type: (Any, Any) -> Any
+            with start_span(description=description, op=op, **span_kwargs):
+                _ai_pipeline_name.set(description)
+                res = f(*args, **kwargs)
+                _ai_pipeline_name.set(None)
+                return res
+
+        return wrapped
+
+    return decorator
+
+
+def ai_run(description, op="ai.run", **span_kwargs):
+    # type: (str, str, Any) -> Callable[..., Any]
+    def decorator(f):
+        # type: (Callable[..., Any]) -> Callable[..., Any]
+        @wraps(f)
+        def wrapped(*args, **kwargs):
+            # type: (Any, Any) -> Any
+            with start_span(description=description, op=op, **span_kwargs) as span:
+                curr_pipeline = _ai_pipeline_name.get()
+                if curr_pipeline:
+                    span.set_data("ai.pipeline.name", curr_pipeline)
+                return f(*args, **kwargs)
+
+        return wrapped
+
+    return decorator
+
+
+def record_token_usage(
+    span, prompt_tokens=None, completion_tokens=None, total_tokens=None
+):
+    # type: (Span, Optional[int], Optional[int], Optional[int]) -> None
+    ai_pipeline_name = get_ai_pipeline_name()
+    if ai_pipeline_name:
+        span.set_data("ai.pipeline.name", ai_pipeline_name)
+    if prompt_tokens is not None:
+        span.set_measurement("ai_prompt_tokens_used", value=prompt_tokens)
+    if completion_tokens is not None:
+        span.set_measurement("ai_completion_tokens_used", value=completion_tokens)
+    if (
+        total_tokens is None
+        and prompt_tokens is not None
+        and completion_tokens is not None
+    ):
+        total_tokens = prompt_tokens + completion_tokens
+    if total_tokens is not None:
+        span.set_measurement("ai_total_tokens_used", total_tokens)
diff --git a/sentry_sdk/integrations/_ai_common.py b/sentry_sdk/integrations/_ai_common.py
index 5b25d1fc69..42d46304e4 100644
--- a/sentry_sdk/integrations/_ai_common.py
+++ b/sentry_sdk/integrations/_ai_common.py
@@ -1,7 +1,7 @@
 from sentry_sdk._types import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from typing import Any, Optional
+    from typing import Any
 
 from sentry_sdk.tracing import Span
 from sentry_sdk.utils import logger
@@ -30,21 +30,3 @@ def set_data_normalized(span, key, value):
     # type: (Span, str, Any) -> None
     normalized = _normalize_data(value)
     span.set_data(key, normalized)
-
-
-def record_token_usage(
-    span, prompt_tokens=None, completion_tokens=None, total_tokens=None
-):
-    # type: (Span, Optional[int], Optional[int], Optional[int]) -> None
-    if prompt_tokens is not None:
-        span.set_measurement("ai_prompt_tokens_used", value=prompt_tokens)
-    if completion_tokens is not None:
-        span.set_measurement("ai_completion_tokens_used", value=completion_tokens)
-    if (
-        total_tokens is None
-        and prompt_tokens is not None
-        and completion_tokens is not None
-    ):
-        total_tokens = prompt_tokens + completion_tokens
-    if total_tokens is not None:
-        span.set_measurement("ai_total_tokens_used", total_tokens)
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 0cebe1ec17..f3058fe087 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -3,8 +3,9 @@
 
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
+from sentry_sdk.ai_analytics import set_ai_pipeline_name, record_token_usage
 from sentry_sdk.consts import OP, SPANDATA
-from sentry_sdk.integrations._ai_common import set_data_normalized, record_token_usage
+from sentry_sdk.integrations._ai_common import set_data_normalized
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.tracing import Span
 
@@ -88,6 +89,7 @@ class WatchedSpan:
     num_prompt_tokens = 0  # type: int
     no_collect_tokens = False  # type: bool
     children = []  # type: List[WatchedSpan]
+    is_pipeline = False  # type: bool
 
     def __init__(self, span):
         # type: (Span) -> None
@@ -134,9 +136,6 @@ def _normalize_langchain_message(self, message):
     def _create_span(self, run_id, parent_id, **kwargs):
         # type: (SentryLangchainCallback, UUID, Optional[Any], Any) -> WatchedSpan
 
-        if "origin" not in kwargs:
-            kwargs["origin"] = "auto.ai.langchain"
-
         watched_span = None  # type: Optional[WatchedSpan]
         if parent_id:
             parent_span = self.span_map[parent_id]  # type: Optional[WatchedSpan]
@@ -146,6 +145,11 @@ def _create_span(self, run_id, parent_id, **kwargs):
         if watched_span is None:
             watched_span = WatchedSpan(sentry_sdk.start_span(**kwargs))
 
+        if kwargs.get("op", "").startswith("ai.pipeline."):
+            if kwargs.get("description"):
+                set_ai_pipeline_name(kwargs.get("description"))
+            watched_span.is_pipeline = True
+
         watched_span.span.__enter__()
         self.span_map[run_id] = watched_span
         self.gc_span_map()
@@ -154,6 +158,9 @@ def _create_span(self, run_id, parent_id, **kwargs):
     def _exit_span(self, span_data, run_id):
         # type: (SentryLangchainCallback, WatchedSpan, UUID) -> None
 
+        if span_data.is_pipeline:
+            set_ai_pipeline_name(None)
+
         span_data.span.__exit__(None, None, None)
         del self.span_map[run_id]
 
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index ffb8a391fa..ae5c9e70ac 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -2,8 +2,9 @@
 
 from sentry_sdk import consts
 from sentry_sdk._types import TYPE_CHECKING
+from sentry_sdk.ai_analytics import record_token_usage
 from sentry_sdk.consts import SPANDATA
-from sentry_sdk.integrations._ai_common import set_data_normalized, record_token_usage
+from sentry_sdk.integrations._ai_common import set_data_normalized
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, Iterator
@@ -141,7 +142,6 @@ def new_chat_completion(*args, **kwargs):
 
         span = sentry_sdk.start_span(
             op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE,
-            origin="auto.ai.openai",
             description="Chat Completion",
         )
         span.__enter__()
@@ -225,7 +225,6 @@ def new_embeddings_create(*args, **kwargs):
         # type: (*Any, **Any) -> Any
         with sentry_sdk.start_span(
             op=consts.OP.OPENAI_EMBEDDINGS_CREATE,
-            origin="auto.ai.openai",
             description="OpenAI Embedding Creation",
         ) as span:
             integration = sentry_sdk.get_client().get_integration(OpenAIIntegration)

From 3295b043d17bec0bfde228470fe66635c4f6f8de Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Tue, 23 Apr 2024 13:48:14 -0400
Subject: [PATCH 38/41] Merge decorators together into "ai_track"

---
 sentry_sdk/ai_analytics.py | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/sentry_sdk/ai_analytics.py b/sentry_sdk/ai_analytics.py
index ebdbc56c54..2fd39a4f98 100644
--- a/sentry_sdk/ai_analytics.py
+++ b/sentry_sdk/ai_analytics.py
@@ -21,36 +21,24 @@ def get_ai_pipeline_name():
     return _ai_pipeline_name.get()
 
 
-def ai_pipeline(description, op="ai.pipeline", **span_kwargs):
-    # type: (str, str, Any) -> Callable[..., Any]
-    def decorator(f):
-        # type: (Callable[..., Any]) -> Callable[..., Any]
-        @wraps(f)
-        def wrapped(*args, **kwargs):
-            # type: (Any, Any) -> Any
-            with start_span(description=description, op=op, **span_kwargs):
-                _ai_pipeline_name.set(description)
-                res = f(*args, **kwargs)
-                _ai_pipeline_name.set(None)
-                return res
-
-        return wrapped
-
-    return decorator
-
-
-def ai_run(description, op="ai.run", **span_kwargs):
-    # type: (str, str, Any) -> Callable[..., Any]
+def ai_track(description, **span_kwargs):
+    # type: (str, Any) -> Callable[..., Any]
     def decorator(f):
         # type: (Callable[..., Any]) -> Callable[..., Any]
         @wraps(f)
         def wrapped(*args, **kwargs):
             # type: (Any, Any) -> Any
+            curr_pipeline = _ai_pipeline_name.get()
+            op = span_kwargs.get("op", "ai.run" if curr_pipeline else "ai.pipeline")
             with start_span(description=description, op=op, **span_kwargs) as span:
-                curr_pipeline = _ai_pipeline_name.get()
                 if curr_pipeline:
                     span.set_data("ai.pipeline.name", curr_pipeline)
-                return f(*args, **kwargs)
+                    return f(*args, **kwargs)
+                else:
+                    _ai_pipeline_name.set(description)
+                    res = f(*args, **kwargs)
+                    _ai_pipeline_name.set(None)
+                    return res
 
         return wrapped
 

From 879e42ac6d6d68d570d5716405ed6388a3eb314f Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 25 Apr 2024 10:40:35 -0400
Subject: [PATCH 39/41] Add exception handling to AI monitoring

---
 sentry_sdk/{ai_analytics.py => ai_monitoring.py} | 15 +++++++++++++--
 sentry_sdk/integrations/langchain.py             |  2 +-
 sentry_sdk/integrations/openai.py                |  2 +-
 3 files changed, 15 insertions(+), 4 deletions(-)
 rename sentry_sdk/{ai_analytics.py => ai_monitoring.py} (77%)

diff --git a/sentry_sdk/ai_analytics.py b/sentry_sdk/ai_monitoring.py
similarity index 77%
rename from sentry_sdk/ai_analytics.py
rename to sentry_sdk/ai_monitoring.py
index 2fd39a4f98..f5f9cd7aad 100644
--- a/sentry_sdk/ai_analytics.py
+++ b/sentry_sdk/ai_monitoring.py
@@ -1,5 +1,6 @@
 from functools import wraps
 
+import sentry_sdk.utils
 from sentry_sdk import start_span
 from sentry_sdk.tracing import Span
 from sentry_sdk.utils import ContextVar
@@ -36,8 +37,18 @@ def wrapped(*args, **kwargs):
                     return f(*args, **kwargs)
                 else:
                     _ai_pipeline_name.set(description)
-                    res = f(*args, **kwargs)
-                    _ai_pipeline_name.set(None)
+                    try:
+                        res = f(*args, **kwargs)
+                    except Exception as e:
+                        event, hint = sentry_sdk.utils.event_from_exception(
+                            e,
+                            client_options=sentry_sdk.get_client().options,
+                            mechanism={"type": "ai_monitoring", "handled": False},
+                        )
+                        sentry_sdk.capture_event(event, hint=hint)
+                        raise e from None
+                    finally:
+                        _ai_pipeline_name.set(None)
                     return res
 
         return wrapped
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index f3058fe087..3e8ed89fff 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -3,7 +3,7 @@
 
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
-from sentry_sdk.ai_analytics import set_ai_pipeline_name, record_token_usage
+from sentry_sdk.ai_monitoring import set_ai_pipeline_name, record_token_usage
 from sentry_sdk.consts import OP, SPANDATA
 from sentry_sdk.integrations._ai_common import set_data_normalized
 from sentry_sdk.scope import should_send_default_pii
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index ae5c9e70ac..5af79c4d93 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -2,7 +2,7 @@
 
 from sentry_sdk import consts
 from sentry_sdk._types import TYPE_CHECKING
-from sentry_sdk.ai_analytics import record_token_usage
+from sentry_sdk.ai_monitoring import record_token_usage
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations._ai_common import set_data_normalized
 

From 1643bdb2e07aa2edd5172fa465eac7d7a436e5a7 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Fri, 26 Apr 2024 10:18:47 -0400
Subject: [PATCH 40/41] Move stuff around

---
 sentry_sdk/ai/__init__.py                              | 0
 sentry_sdk/{ai_monitoring.py => ai/monitoring.py}      | 0
 sentry_sdk/{integrations/_ai_common.py => ai/utils.py} | 0
 sentry_sdk/integrations/langchain.py                   | 4 ++--
 sentry_sdk/integrations/openai.py                      | 4 ++--
 5 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 sentry_sdk/ai/__init__.py
 rename sentry_sdk/{ai_monitoring.py => ai/monitoring.py} (100%)
 rename sentry_sdk/{integrations/_ai_common.py => ai/utils.py} (100%)

diff --git a/sentry_sdk/ai/__init__.py b/sentry_sdk/ai/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/sentry_sdk/ai_monitoring.py b/sentry_sdk/ai/monitoring.py
similarity index 100%
rename from sentry_sdk/ai_monitoring.py
rename to sentry_sdk/ai/monitoring.py
diff --git a/sentry_sdk/integrations/_ai_common.py b/sentry_sdk/ai/utils.py
similarity index 100%
rename from sentry_sdk/integrations/_ai_common.py
rename to sentry_sdk/ai/utils.py
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 3e8ed89fff..35e955b958 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -3,9 +3,9 @@
 
 import sentry_sdk
 from sentry_sdk._types import TYPE_CHECKING
-from sentry_sdk.ai_monitoring import set_ai_pipeline_name, record_token_usage
+from sentry_sdk.ai.monitoring import set_ai_pipeline_name, record_token_usage
 from sentry_sdk.consts import OP, SPANDATA
-from sentry_sdk.integrations._ai_common import set_data_normalized
+from sentry_sdk.ai.utils import set_data_normalized
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.tracing import Span
 
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 5af79c4d93..20147b342f 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -2,9 +2,9 @@
 
 from sentry_sdk import consts
 from sentry_sdk._types import TYPE_CHECKING
-from sentry_sdk.ai_monitoring import record_token_usage
+from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.consts import SPANDATA
-from sentry_sdk.integrations._ai_common import set_data_normalized
+from sentry_sdk.ai.utils import set_data_normalized
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, Iterator

From d8a8ca740cb3e56e272d071b53ccabf8b28d4a74 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 30 Apr 2024 16:20:31 +0200
Subject: [PATCH 41/41] trigger ci