diff --git a/packages/data-designer-config/src/data_designer/config/utils/constants.py b/packages/data-designer-config/src/data_designer/config/utils/constants.py index fb1b5a084..7c7d5bfc1 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/constants.py +++ b/packages/data-designer-config/src/data_designer/config/utils/constants.py @@ -298,6 +298,17 @@ class NordColor(Enum): OPENROUTER_API_KEY_ENV_VAR_NAME = "OPENROUTER_API_KEY" +ATTRIBUTION_TITLE = "NeMo Data Designer" +ATTRIBUTION_REFERER = "https://github.com/NVIDIA-NeMo/DataDesigner" + +OPENROUTER_ATTRIBUTION_HEADERS: dict[str, str] = { + "HTTP-Referer": ATTRIBUTION_REFERER, + "X-OpenRouter-Title": ATTRIBUTION_TITLE, + "X-OpenRouter-Categories": "programming-app", +} + +# OpenRouter attribution is injected in the engine so telemetry opt-out can +# suppress it cleanly for both default and existing provider configs. PREDEFINED_PROVIDERS = [ { "name": NVIDIA_PROVIDER_NAME, diff --git a/packages/data-designer-config/tests/config/test_default_model_settings.py b/packages/data-designer-config/tests/config/test_default_model_settings.py index 338c2de8a..c609cf20a 100644 --- a/packages/data-designer-config/tests/config/test_default_model_settings.py +++ b/packages/data-designer-config/tests/config/test_default_model_settings.py @@ -100,14 +100,17 @@ def test_get_builtin_model_providers(): assert builtin_model_providers[0].endpoint == "https://integrate.api.nvidia.com/v1" assert builtin_model_providers[0].provider_type == "openai" assert builtin_model_providers[0].api_key == "NVIDIA_API_KEY" + assert builtin_model_providers[0].extra_headers is None assert builtin_model_providers[1].name == "openai" assert builtin_model_providers[1].endpoint == "https://api.openai.com/v1" assert builtin_model_providers[1].provider_type == "openai" assert builtin_model_providers[1].api_key == "OPENAI_API_KEY" + assert builtin_model_providers[1].extra_headers is None assert builtin_model_providers[2].name == "openrouter" assert builtin_model_providers[2].endpoint == "https://openrouter.ai/api/v1" assert builtin_model_providers[2].provider_type == "openai" assert builtin_model_providers[2].api_key == "OPENROUTER_API_KEY" + assert builtin_model_providers[2].extra_headers is None def test_get_default_model_configs_path_exists(tmp_path: Path): diff --git a/packages/data-designer-engine/src/data_designer/engine/models/facade.py b/packages/data-designer-engine/src/data_designer/engine/models/facade.py index ec1fd6b7a..3083133a7 100644 --- a/packages/data-designer-engine/src/data_designer/engine/models/facade.py +++ b/packages/data-designer-engine/src/data_designer/engine/models/facade.py @@ -10,6 +10,11 @@ from typing import TYPE_CHECKING, Any from data_designer.config.models import GenerationType, ModelConfig, ModelProvider +from data_designer.config.utils.constants import ( + ATTRIBUTION_TITLE, + OPENROUTER_ATTRIBUTION_HEADERS, + OPENROUTER_PROVIDER_NAME, +) from data_designer.config.utils.image_helpers import is_image_diffusion_model from data_designer.engine.mcp.errors import MCPConfigurationError from data_designer.engine.model_provider import ModelProviderRegistry @@ -30,6 +35,7 @@ get_exception_primary_cause, ) from data_designer.engine.models.parsers.errors import ParserException +from data_designer.engine.models.telemetry import TELEMETRY_ENABLED from data_designer.engine.models.usage import ImageUsageStats, ModelUsageStats, RequestUsageStats, TokenUsageStats from data_designer.engine.models.utils import ChatMessage, prompt_to_messages @@ -156,7 +162,21 @@ def consolidate_kwargs(self, **kwargs: Any) -> dict[str, Any]: if self.model_provider.extra_body: kwargs["extra_body"] = {**kwargs.get("extra_body", {}), **self.model_provider.extra_body} if self.model_provider.extra_headers: - kwargs["extra_headers"] = {**kwargs.get("extra_headers", {}), **self.model_provider.extra_headers} + kwargs["extra_headers"] = {**(kwargs.get("extra_headers") or {}), **self.model_provider.extra_headers} + # Inject framework-level attribution header when telemetry is enabled. + # Applied last so that user-supplied or provider-level headers take precedence. + if TELEMETRY_ENABLED: + headers = kwargs.get("extra_headers") or {} + if "X-Title" not in headers: + kwargs["extra_headers"] = {"X-Title": ATTRIBUTION_TITLE, **headers} + # Inject OpenRouter-specific attribution headers when the provider is + # OpenRouter. This ensures attribution works even when existing users + # have ``extra_headers: null`` in their provider config. Provider- or + # user-supplied values take precedence (only missing keys are filled). + if self.model_provider.name == OPENROUTER_PROVIDER_NAME: + headers = kwargs.get("extra_headers") or {} + merged = {**OPENROUTER_ATTRIBUTION_HEADERS, **headers} + kwargs["extra_headers"] = merged return kwargs # --- completion / acompletion --- diff --git a/packages/data-designer-engine/tests/engine/models/test_facade.py b/packages/data-designer-engine/tests/engine/models/test_facade.py index 84e91325e..ee9ace2ce 100644 --- a/packages/data-designer-engine/tests/engine/models/test_facade.py +++ b/packages/data-designer-engine/tests/engine/models/test_facade.py @@ -202,13 +202,21 @@ def test_usage_stats_property(stub_model_facade: ModelFacade) -> None: def test_consolidate_kwargs(stub_model_configs: list[Any], stub_model_facade: ModelFacade) -> None: - # Model config generate kwargs are used as base, and purpose is removed + # Model config generate kwargs are used as base, and purpose is removed. + # When telemetry is enabled (default), X-Title is injected. result = stub_model_facade.consolidate_kwargs(purpose="test") - assert result == stub_model_configs[0].inference_parameters.generate_kwargs + assert result == { + **stub_model_configs[0].inference_parameters.generate_kwargs, + "extra_headers": {"X-Title": "NeMo Data Designer"}, + } # kwargs overrides model config generate kwargs result = stub_model_facade.consolidate_kwargs(temperature=0.01, purpose="test") - assert result == {**stub_model_configs[0].inference_parameters.generate_kwargs, "temperature": 0.01} + assert result == { + **stub_model_configs[0].inference_parameters.generate_kwargs, + "temperature": 0.01, + "extra_headers": {"X-Title": "NeMo Data Designer"}, + } # Provider extra_body overrides all other kwargs stub_model_facade.model_provider.extra_body = {"foo_provider": "bar_provider"} @@ -216,6 +224,7 @@ def test_consolidate_kwargs(stub_model_configs: list[Any], stub_model_facade: Mo assert result == { **stub_model_configs[0].inference_parameters.generate_kwargs, "extra_body": {"foo_provider": "bar_provider", "foo": "bar"}, + "extra_headers": {"X-Title": "NeMo Data Designer"}, } # Provider extra_headers merges with caller headers (provider takes precedence) @@ -224,8 +233,117 @@ def test_consolidate_kwargs(stub_model_configs: list[Any], stub_model_facade: Mo result = stub_model_facade.consolidate_kwargs(extra_headers={"hello": "caller", "X-Trace-ID": "abc"}) assert result == { **stub_model_configs[0].inference_parameters.generate_kwargs, - "extra_headers": {"hello": "world", "hola": "mundo", "X-Trace-ID": "abc"}, + "extra_headers": {"X-Title": "NeMo Data Designer", "hello": "world", "hola": "mundo", "X-Trace-ID": "abc"}, + } + + +@patch("data_designer.engine.models.facade.TELEMETRY_ENABLED", False) +def test_consolidate_kwargs_telemetry_disabled(stub_model_configs: list[Any], stub_model_facade: ModelFacade) -> None: + """Framework attribution headers are omitted when telemetry is disabled.""" + result = stub_model_facade.consolidate_kwargs() + assert "extra_headers" not in result + + # Provider extra_headers still applied even with telemetry off + stub_model_facade.model_provider.extra_headers = {"Custom": "header"} + result = stub_model_facade.consolidate_kwargs() + assert result["extra_headers"] == {"Custom": "header"} + + +def test_consolidate_kwargs_user_x_title_override( + stub_model_configs: list[Any], stub_model_facade: ModelFacade +) -> None: + """User-supplied X-Title takes precedence over the framework default.""" + stub_model_facade.model_provider.extra_headers = {"X-Title": "My Custom App"} + result = stub_model_facade.consolidate_kwargs() + assert result["extra_headers"]["X-Title"] == "My Custom App" + + stub_model_facade.model_provider.extra_headers = None + result = stub_model_facade.consolidate_kwargs(extra_headers={"X-Title": "Caller App"}) + assert result["extra_headers"]["X-Title"] == "Caller App" + + +def test_consolidate_kwargs_with_explicit_none_extra_headers( + stub_model_configs: list[Any], stub_model_facade: ModelFacade +) -> None: + """Explicit None extra_headers does not break provider merges or framework attribution.""" + stub_model_facade.model_provider.extra_headers = {"hello": "world"} + result = stub_model_facade.consolidate_kwargs(extra_headers=None) + assert result["extra_headers"] == {"X-Title": "NeMo Data Designer", "hello": "world"} + + +def test_consolidate_kwargs_openrouter_attribution( + stub_model_configs: list[Any], stub_model_facade: ModelFacade +) -> None: + """OpenRouter-specific attribution headers are injected when provider is openrouter.""" + stub_model_facade.model_provider.name = "openrouter" + stub_model_facade.model_provider.extra_headers = None + result = stub_model_facade.consolidate_kwargs() + assert result["extra_headers"] == { + "X-Title": "NeMo Data Designer", + "HTTP-Referer": "https://github.com/NVIDIA-NeMo/DataDesigner", + "X-OpenRouter-Title": "NeMo Data Designer", + "X-OpenRouter-Categories": "programming-app", + } + + +def test_consolidate_kwargs_openrouter_user_override_preserved( + stub_model_configs: list[Any], stub_model_facade: ModelFacade +) -> None: + """User-supplied OpenRouter headers take precedence over framework defaults.""" + stub_model_facade.model_provider.name = "openrouter" + stub_model_facade.model_provider.extra_headers = None + result = stub_model_facade.consolidate_kwargs( + extra_headers={"X-OpenRouter-Title": "Custom App", "X-Custom": "value"} + ) + # User-supplied X-OpenRouter-Title should NOT be overwritten + assert result["extra_headers"]["X-OpenRouter-Title"] == "Custom App" + assert result["extra_headers"]["X-Custom"] == "value" + # Framework defaults still fill in missing keys + assert result["extra_headers"]["HTTP-Referer"] == "https://github.com/NVIDIA-NeMo/DataDesigner" + assert result["extra_headers"]["X-OpenRouter-Categories"] == "programming-app" + assert result["extra_headers"]["X-Title"] == "NeMo Data Designer" + + +def test_consolidate_kwargs_openrouter_provider_headers_preserved( + stub_model_configs: list[Any], stub_model_facade: ModelFacade +) -> None: + """Provider-level OpenRouter headers override programmatic injection.""" + stub_model_facade.model_provider.name = "openrouter" + stub_model_facade.model_provider.extra_headers = { + "HTTP-Referer": "https://custom-site.example.com", + "X-OpenRouter-Title": "Provider Title", } + result = stub_model_facade.consolidate_kwargs() + # Provider-level values take precedence + assert result["extra_headers"]["HTTP-Referer"] == "https://custom-site.example.com" + assert result["extra_headers"]["X-OpenRouter-Title"] == "Provider Title" + # Framework still fills in what's missing + assert result["extra_headers"]["X-OpenRouter-Categories"] == "programming-app" + assert result["extra_headers"]["X-Title"] == "NeMo Data Designer" + + +@patch("data_designer.engine.models.facade.TELEMETRY_ENABLED", False) +def test_consolidate_kwargs_openrouter_no_attribution_when_telemetry_off( + stub_model_configs: list[Any], stub_model_facade: ModelFacade +) -> None: + """OpenRouter attribution headers are NOT injected when telemetry is disabled.""" + stub_model_facade.model_provider.name = "openrouter" + stub_model_facade.model_provider.extra_headers = None + result = stub_model_facade.consolidate_kwargs() + assert "extra_headers" not in result + + +def test_consolidate_kwargs_non_openrouter_no_openrouter_headers( + stub_model_configs: list[Any], stub_model_facade: ModelFacade +) -> None: + """Non-openrouter providers do NOT get OpenRouter-specific headers.""" + stub_model_facade.model_provider.name = "nvidia" + stub_model_facade.model_provider.extra_headers = None + result = stub_model_facade.consolidate_kwargs() + assert result["extra_headers"] == {"X-Title": "NeMo Data Designer"} + assert "HTTP-Referer" not in result["extra_headers"] + assert "X-OpenRouter-Title" not in result["extra_headers"] + assert "X-OpenRouter-Categories" not in result["extra_headers"] @pytest.mark.parametrize(