wip

shellmayr · shellmayr · commit 65f92305af09 · 2026-03-04T13:58:43.000+01:00
diff --git a/sentry_sdk/ai/span_config.py b/sentry_sdk/ai/span_config.py
@@ -0,0 +1,68 @@
+import sentry_sdk
+from sentry_sdk.consts import SPANDATA
+from sentry_sdk.ai.utils import (
+    set_data_normalized,
+    normalize_message_roles,
+    truncate_and_annotate_messages,
+)
+from sentry_sdk.scope import should_send_default_pii
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Dict
+    from sentry_sdk.tracing import Span
+
+
+def set_input_span_data(span, kwargs, integration, config):
+    # type: (Span, Dict[str, Any], Any, Dict[str, Any]) -> None
+    """
+    Set input span data from a declarative config.
+
+    Config keys:
+        system: str - gen_ai.system value
+        operation: str - gen_ai.operation.name value
+        params: dict - kwargs key -> span attr (always set if present)
+        pii_params: dict - kwargs key -> span attr (only when PII allowed)
+        extract_messages: callable(kwargs) -> list or None
+        message_target: str - span attr for messages (default: GEN_AI_REQUEST_MESSAGES)
+        truncation_fn: callable or None - truncation function (default: truncate_and_annotate_messages, None to skip)
+        is_given: callable(value) -> bool - for NotGiven sentinels
+        extra_static: dict - additional key/value pairs to set
+    """
+    set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, config["system"])
+    set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, config["operation"])
+
+    is_given = config.get("is_given")
+    for kwarg_key, span_attr in config.get("params", {}).items():
+        if kwarg_key in kwargs:
+            value = kwargs[kwarg_key]
+            if is_given is None or is_given(value):
+                set_data_normalized(span, span_attr, value)
+
+    if should_send_default_pii() and integration.include_prompts:
+        extract = config.get("extract_messages")
+        if extract is not None:
+            messages = extract(kwargs)
+            if messages:
+                messages = normalize_message_roles(messages)
+                truncation_fn = config.get(
+                    "truncation_fn", truncate_and_annotate_messages
+                )
+                if truncation_fn is not None:
+                    scope = sentry_sdk.get_current_scope()
+                    messages = truncation_fn(messages, span, scope)
+                if messages is not None:
+                    target = config.get(
+                        "message_target", SPANDATA.GEN_AI_REQUEST_MESSAGES
+                    )
+                    set_data_normalized(span, target, messages, unpack=False)
+
+        for kwarg_key, span_attr in config.get("pii_params", {}).items():
+            if kwarg_key in kwargs:
+                value = kwargs[kwarg_key]
+                if is_given is None or is_given(value):
+                    set_data_normalized(span, span_attr, value)
+
+    for key, value in config.get("extra_static", {}).items():
+        set_data_normalized(span, key, value)
diff --git a/sentry_sdk/integrations/cohere/__init__.py b/sentry_sdk/integrations/cohere/__init__.py
@@ -3,7 +3,7 @@
 
 from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.consts import OP, SPANDATA
-from sentry_sdk.ai.utils import set_data_normalized
+from sentry_sdk.ai.span_config import set_input_span_data
 
 from typing import TYPE_CHECKING
 
@@ -13,7 +13,6 @@
     from typing import Any, Callable
 
 import sentry_sdk
-from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.integrations import DidNotEnable, Integration
 from sentry_sdk.utils import capture_internal_exceptions, event_from_exception, reraise
 
@@ -43,6 +42,16 @@ def _normalize_embedding_input(texts):
     return [texts]
 
 
+COHERE_EMBED_CONFIG = {
+    "system": "cohere",
+    "operation": "embeddings",
+    "params": {"model": SPANDATA.GEN_AI_REQUEST_MODEL},
+    "extract_messages": lambda kw: _normalize_embedding_input(kw["texts"]) if "texts" in kw else None,
+    "message_target": SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
+    "truncation_fn": None,
+}
+
+
 class CohereIntegration(Integration):
     identifier = "cohere"
     origin = f"auto.ai.{identifier}"
@@ -91,22 +100,7 @@ def new_embed(*args, **kwargs):
             name=f"embeddings {model}".strip(),
             origin=CohereIntegration.origin,
         ) as span:
-            set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "cohere")
-            set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
-
-            if "texts" in kwargs and (
-                should_send_default_pii() and integration.include_prompts
-            ):
-                set_data_normalized(
-                    span,
-                    SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
-                    _normalize_embedding_input(kwargs["texts"]),
-                )
-
-            if "model" in kwargs:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_REQUEST_MODEL, kwargs["model"]
-                )
+            set_input_span_data(span, kwargs, integration, COHERE_EMBED_CONFIG)
 
             try:
                 res = f(*args, **kwargs)
diff --git a/sentry_sdk/integrations/cohere/v1.py b/sentry_sdk/integrations/cohere/v1.py
@@ -3,11 +3,8 @@
 
 from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.consts import OP, SPANDATA
-from sentry_sdk.ai.utils import (
-    set_data_normalized,
-    normalize_message_roles,
-    truncate_and_annotate_messages,
-)
+from sentry_sdk.ai.utils import set_data_normalized
+from sentry_sdk.ai.span_config import set_input_span_data
 
 from typing import TYPE_CHECKING
 
@@ -48,6 +45,32 @@
     "preamble": SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
 }
 
+
+def _extract_messages_v1(kwargs):
+    # type: (dict[str, Any]) -> list[dict[str, str]]
+    """Extract role/content dicts from V1-style chat_history + message."""
+    messages = []
+    for x in kwargs.get("chat_history", []):
+        messages.append(
+            {
+                "role": getattr(x, "role", "").lower(),
+                "content": getattr(x, "message", ""),
+            }
+        )
+    message = kwargs.get("message")
+    if message:
+        messages.append({"role": "user", "content": message})
+    return messages
+
+
+COHERE_V1_CHAT_CONFIG = {
+    "system": "cohere",
+    "operation": "chat",
+    "params": COLLECTED_CHAT_PARAMS,
+    "pii_params": COLLECTED_PII_CHAT_PARAMS,
+    "extract_messages": _extract_messages_v1,
+}
+
 COLLECTED_CHAT_RESP_ATTRS = {
     "generation_id": SPANDATA.GEN_AI_RESPONSE_ID,
     "finish_reason": SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
@@ -77,36 +100,6 @@ def _wrap_chat(f, streaming):
     if not _has_chat_types:
         return f
 
-    def collect_chat_response_fields(span, res, include_pii):
-        # type: (Span, NonStreamedChatResponse, bool) -> None
-        if include_pii:
-            if hasattr(res, "text"):
-                set_data_normalized(
-                    span,
-                    SPANDATA.GEN_AI_RESPONSE_TEXT,
-                    [res.text],
-                )
-            for attr, spandata_key in COLLECTED_PII_CHAT_RESP_ATTRS.items():
-                if hasattr(res, attr):
-                    set_data_normalized(span, spandata_key, getattr(res, attr))
-
-        for attr, spandata_key in COLLECTED_CHAT_RESP_ATTRS.items():
-            if hasattr(res, attr):
-                set_data_normalized(span, spandata_key, getattr(res, attr))
-
-        if hasattr(res, "meta"):
-            if hasattr(res.meta, "billed_units"):
-                record_token_usage(
-                    span,
-                    input_tokens=res.meta.billed_units.input_tokens,
-                    output_tokens=res.meta.billed_units.output_tokens,
-                )
-            elif hasattr(res.meta, "tokens"):
-                record_token_usage(
-                    span,
-                    input_tokens=res.meta.tokens.input_tokens,
-                    output_tokens=res.meta.tokens.output_tokens,
-                )
 
     @wraps(f)
     def new_chat(*args, **kwargs):
@@ -120,7 +113,6 @@ def new_chat(*args, **kwargs):
         ):
             return f(*args, **kwargs)
 
-        message = kwargs.get("message")
         model = kwargs.get("model", "")
 
         with sentry_sdk.start_span(
@@ -137,41 +129,10 @@ def new_chat(*args, **kwargs):
                 reraise(*exc_info)
 
             with capture_internal_exceptions():
-                set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "cohere")
-                set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
-                if model:
-                    set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
-
-                if should_send_default_pii() and integration.include_prompts:
-                    messages = []
-                    for x in kwargs.get("chat_history", []):
-                        messages.append(
-                            {
-                                "role": getattr(x, "role", "").lower(),
-                                "content": getattr(x, "message", ""),
-                            }
-                        )
-                    messages.append({"role": "user", "content": message})
-                    messages = normalize_message_roles(messages)
-                    scope = sentry_sdk.get_current_scope()
-                    messages_data = truncate_and_annotate_messages(
-                        messages, span, scope
-                    )
-                    if messages_data is not None:
-                        set_data_normalized(
-                            span,
-                            SPANDATA.GEN_AI_REQUEST_MESSAGES,
-                            messages_data,
-                            unpack=False,
-                        )
-                    for k, v in COLLECTED_PII_CHAT_PARAMS.items():
-                        if k in kwargs:
-                            set_data_normalized(span, v, kwargs[k])
-
-                for k, v in COLLECTED_CHAT_PARAMS.items():
-                    if k in kwargs:
-                        set_data_normalized(span, v, kwargs[k])
-                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming)
+                set_input_span_data(span, kwargs, integration, {
+                    **COHERE_V1_CHAT_CONFIG,
+                    "extra_static": {SPANDATA.GEN_AI_RESPONSE_STREAMING: streaming},
+                })
 
                 if streaming:
                     old_iterator = res
@@ -203,4 +164,34 @@ def new_iterator():
                     set_data_normalized(span, "unknown_response", True)
                 return res
 
+    def collect_chat_response_fields(span, res, include_pii):
+        # type: (Span, NonStreamedChatResponse, bool) -> None
+        if include_pii:
+            if hasattr(res, "text"):
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_RESPONSE_TEXT,
+                    [res.text],
+                )
+            for attr, spandata_key in COLLECTED_PII_CHAT_RESP_ATTRS.items():
+                if hasattr(res, attr):
+                    set_data_normalized(span, spandata_key, getattr(res, attr))
+
+        for attr, spandata_key in COLLECTED_CHAT_RESP_ATTRS.items():
+            if hasattr(res, attr):
+                set_data_normalized(span, spandata_key, getattr(res, attr))
+
+        if hasattr(res, "meta"):
+            if hasattr(res.meta, "billed_units"):
+                record_token_usage(
+                    span,
+                    input_tokens=res.meta.billed_units.input_tokens,
+                    output_tokens=res.meta.billed_units.output_tokens,
+                )
+            elif hasattr(res.meta, "tokens"):
+                record_token_usage(
+                    span,
+                    input_tokens=res.meta.tokens.input_tokens,
+                    output_tokens=res.meta.tokens.output_tokens,
+                )
     return new_chat
diff --git a/sentry_sdk/integrations/cohere/v2.py b/sentry_sdk/integrations/cohere/v2.py
@@ -3,11 +3,8 @@
 
 from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.consts import OP, SPANDATA
-from sentry_sdk.ai.utils import (
-    set_data_normalized,
-    normalize_message_roles,
-    truncate_and_annotate_messages,
-)
+from sentry_sdk.ai.utils import set_data_normalized
+from sentry_sdk.ai.span_config import set_input_span_data
 
 from typing import TYPE_CHECKING
 
@@ -101,6 +98,15 @@ def _extract_messages_v2(messages):
     return result
 
 
+COHERE_V2_CHAT_CONFIG = {
+    "system": "cohere",
+    "operation": "chat",
+    "params": COLLECTED_CHAT_PARAMS,
+    "pii_params": {"tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS},
+    "extract_messages": lambda kw: _extract_messages_v2(kw.get("messages", [])),
+}
+
+
 def _record_token_usage_v2(span, usage):
     # type: (Span, Any) -> None
     """Extract and record token usage from a V2 Usage object."""
@@ -180,36 +186,13 @@ def new_chat(*args, **kwargs):
                 reraise(*exc_info)
 
             with capture_internal_exceptions():
-                set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "cohere")
-                set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat")
+                extra = {SPANDATA.GEN_AI_RESPONSE_STREAMING: streaming}
                 if model:
-                    set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
-
-                if should_send_default_pii() and integration.include_prompts:
-                    messages = _extract_messages_v2(kwargs.get("messages", []))
-                    messages = normalize_message_roles(messages)
-                    scope = sentry_sdk.get_current_scope()
-                    messages_data = truncate_and_annotate_messages(
-                        messages, span, scope
-                    )
-                    if messages_data is not None:
-                        set_data_normalized(
-                            span,
-                            SPANDATA.GEN_AI_REQUEST_MESSAGES,
-                            messages_data,
-                            unpack=False,
-                        )
-                    if "tools" in kwargs:
-                        set_data_normalized(
-                            span,
-                            SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
-                            kwargs["tools"],
-                        )
-
-                for k, v in COLLECTED_CHAT_PARAMS.items():
-                    if k in kwargs:
-                        set_data_normalized(span, v, kwargs[k])
-                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming)
+                    extra[SPANDATA.GEN_AI_RESPONSE_MODEL] = model
+                set_input_span_data(span, kwargs, integration, {
+                    **COHERE_V2_CHAT_CONFIG,
+                    "extra_static": extra,
+                })
 
                 if streaming:
                     old_iterator = res