diff --git a/app/desktop/studio_server/provider_api.py b/app/desktop/studio_server/provider_api.py index 8162dc9bb..e681dcc63 100644 --- a/app/desktop/studio_server/provider_api.py +++ b/app/desktop/studio_server/provider_api.py @@ -181,6 +181,7 @@ class EmbeddingModelDetails(BaseModel): max_input_tokens: int | None supports_custom_dimensions: bool suggested_for_chunk_embedding: bool + supports_instructions: bool class EmbeddingProvider(BaseModel): @@ -402,6 +403,7 @@ async def get_available_embedding_models() -> List[EmbeddingProvider]: max_input_tokens=provider.max_input_tokens, supports_custom_dimensions=provider.supports_custom_dimensions, suggested_for_chunk_embedding=provider.suggested_for_chunk_embedding, + supports_instructions=provider.supports_instructions, ) ) @@ -1495,6 +1497,7 @@ async def available_ollama_embedding_models() -> EmbeddingProvider | None: max_input_tokens=ollama_provider.max_input_tokens, supports_custom_dimensions=ollama_provider.supports_custom_dimensions, suggested_for_chunk_embedding=ollama_provider.suggested_for_chunk_embedding, + supports_instructions=ollama_provider.supports_instructions, ) ) diff --git a/app/desktop/studio_server/test_provider_api.py b/app/desktop/studio_server/test_provider_api.py index 242135c25..ae508003e 100644 --- a/app/desktop/studio_server/test_provider_api.py +++ b/app/desktop/studio_server/test_provider_api.py @@ -3042,6 +3042,7 @@ async def test_get_embedding_providers(app, client): "n_dimensions": 1536, "max_input_tokens": 8192, "supports_custom_dimensions": True, + "supports_instructions": False, "suggested_for_chunk_embedding": True, } ], @@ -3056,6 +3057,7 @@ async def test_get_embedding_providers(app, client): "n_dimensions": 1536, "max_input_tokens": None, "supports_custom_dimensions": False, + "supports_instructions": False, "suggested_for_chunk_embedding": False, } ], @@ -3070,6 +3072,7 @@ async def test_get_embedding_providers(app, client): "n_dimensions": 1536, "max_input_tokens": 8192, "supports_custom_dimensions": False, + "supports_instructions": False, "suggested_for_chunk_embedding": True, } ], @@ -3275,6 +3278,7 @@ def test_available_embedding_models_endpoint_includes_ollama(client): "n_dimensions": 768, "max_input_tokens": 8192, "supports_custom_dimensions": False, + "supports_instructions": False, "suggested_for_chunk_embedding": True, } ], diff --git a/app/web_ui/src/lib/api_schema.d.ts b/app/web_ui/src/lib/api_schema.d.ts index 754759f8a..cc381c41e 100644 --- a/app/web_ui/src/lib/api_schema.d.ts +++ b/app/web_ui/src/lib/api_schema.d.ts @@ -3914,6 +3914,8 @@ export interface components { supports_custom_dimensions: boolean; /** Suggested For Chunk Embedding */ suggested_for_chunk_embedding: boolean; + /** Supports Instructions */ + supports_instructions: boolean; }; /** * EmbeddingModelName @@ -3925,6 +3927,11 @@ export interface components { EmbeddingProperties: { /** Dimensions */ dimensions?: number; + /** + * Instructions + * @description Optional instructions to prepend/format embedding inputs. + */ + instructions?: string; }; /** EmbeddingProvider */ EmbeddingProvider: { diff --git a/app/web_ui/src/routes/(app)/docs/rag_configs/[project_id]/create_rag_config/create_embedding_form.svelte b/app/web_ui/src/routes/(app)/docs/rag_configs/[project_id]/create_rag_config/create_embedding_form.svelte index d2b9f26b0..bb6ad9219 100644 --- a/app/web_ui/src/routes/(app)/docs/rag_configs/[project_id]/create_rag_config/create_embedding_form.svelte +++ b/app/web_ui/src/routes/(app)/docs/rag_configs/[project_id]/create_rag_config/create_embedding_form.svelte @@ -26,6 +26,7 @@ let description: string = "" let selectedModel: EmbeddingOptionValue | null = null let customDimensions: number | null = null + let customInstructions: string = "" let embeddingModels: OptionGroup[] = [] export let keyboard_submit: boolean = false @@ -35,6 +36,7 @@ n_dimensions: number | null max_input_tokens: number | null supports_custom_dimensions: boolean | null + supports_instructions: boolean | null } const dispatch = createEventDispatcher<{ @@ -69,6 +71,7 @@ n_dimensions: model.n_dimensions, max_input_tokens: model.max_input_tokens, supports_custom_dimensions: model.supports_custom_dimensions, + supports_instructions: model.supports_instructions, suggested_for_chunk_embedding: model.suggested_for_chunk_embedding, }, @@ -88,10 +91,17 @@ function get_properties( selectedModel: EmbeddingOptionValue, ): Record | EmbeddingProperties { + const properties: EmbeddingProperties = {} + if (customDimensions && selectedModel.supports_custom_dimensions) { - return { dimensions: customDimensions } + properties.dimensions = customDimensions + } + + if (customInstructions.trim() && selectedModel.supports_instructions) { + properties.instructions = customInstructions.trim() } - return {} + + return properties } async function create_embedding_config() { @@ -203,6 +213,19 @@ })} /> {/if} + + {#if selectedModel && selectedModel.supports_instructions} + + {/if} {/if} diff --git a/libs/core/kiln_ai/adapters/embedding/litellm_embedding_adapter.py b/libs/core/kiln_ai/adapters/embedding/litellm_embedding_adapter.py index b31f07225..27aec33eb 100644 --- a/libs/core/kiln_ai/adapters/embedding/litellm_embedding_adapter.py +++ b/libs/core/kiln_ai/adapters/embedding/litellm_embedding_adapter.py @@ -31,6 +31,10 @@ class EmbeddingOptions(BaseModel): default=None, description="The number of dimensions to return for embeddings. Some models support requesting vectors of different dimensions.", ) + instructions: str | None = Field( + default=None, + description="Custom instructions for embedding generation. Some models support custom instructions to guide the embedding process.", + ) def validate_map_to_embeddings( @@ -132,6 +136,17 @@ async def _generate_embeddings(self, input_texts: List[str]) -> EmbeddingResult: usage=combined_usage, ) + def _apply_instructions_to_texts( + self, input_texts: List[str], instructions: str | None + ) -> List[str]: + """Apply instructions to input texts in the format expected by the model.""" + if not instructions: + return input_texts + + # Format according to Qwen3-Embedding documentation + # "Instruct: [instructions]\nQuery: [text]" + return [f"Instruct: {instructions}\nQuery: {text}" for text in input_texts] + async def _generate_embeddings_for_batch( self, input_texts: List[str] ) -> EmbeddingResult: @@ -140,6 +155,13 @@ async def _generate_embeddings_for_batch( f"Too many input texts, max batch size is {MAX_BATCH_SIZE}, got {len(input_texts)}" ) + # Validate once and reuse the same instructions everywhere + options = self.build_options() + # Apply instructions to input texts if present (validated) + processed_texts = self._apply_instructions_to_texts( + input_texts, options.instructions + ) + completion_kwargs: Dict[str, Any] = {} if self.litellm_core_config.additional_body_options: completion_kwargs.update(self.litellm_core_config.additional_body_options) @@ -152,10 +174,15 @@ async def _generate_embeddings_for_batch( self.litellm_core_config.default_headers ) + # Get options excluding instructions since they're applied to text + embedding_options = options.model_dump( + exclude_none=True, exclude={"instructions"} + ) + response = await litellm.aembedding( model=self.litellm_model_id, - input=input_texts, - **self.build_options().model_dump(exclude_none=True), + input=processed_texts, + **embedding_options, **completion_kwargs, ) @@ -174,8 +201,16 @@ def build_options(self) -> EmbeddingOptions: if not isinstance(dimensions, int) or dimensions <= 0: raise ValueError("Dimensions must be a positive integer") + instructions = self.embedding_config.properties.get("instructions", None) + if instructions is not None: + if not isinstance(instructions, str) or len(instructions.strip()) == 0: + raise ValueError("Instructions must be a non-empty string") + if len(instructions) > 1000: + raise ValueError("Instructions must be less than 1000 characters") + return EmbeddingOptions( dimensions=dimensions, + instructions=instructions, ) @cached_property diff --git a/libs/core/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py b/libs/core/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py index 37874a68f..05682a4e5 100644 --- a/libs/core/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +++ b/libs/core/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py @@ -606,6 +606,59 @@ def test_embedding_config_inheritance( ) assert adapter.embedding_config == mock_embedding_config + def test_apply_instructions_to_texts_no_instructions(self, mock_litellm_adapter): + """Test _apply_instructions_to_texts when no instructions are provided.""" + input_texts = ["text1", "text2", "text3"] + result = mock_litellm_adapter._apply_instructions_to_texts(input_texts, None) + assert result == input_texts + + def test_apply_instructions_to_texts_with_instructions(self, mock_litellm_adapter): + """Test _apply_instructions_to_texts when instructions are provided.""" + input_texts = ["What is AI?", "How does ML work?"] + result = mock_litellm_adapter._apply_instructions_to_texts( + input_texts, "Use semantic similarity" + ) + + expected = [ + "Instruct: Use semantic similarity\nQuery: What is AI?", + "Instruct: Use semantic similarity\nQuery: How does ML work?", + ] + assert result == expected + + def test_apply_instructions_to_texts_empty_instructions(self, mock_litellm_adapter): + """Test _apply_instructions_to_texts when instructions are empty string.""" + input_texts = ["text1", "text2"] + result = mock_litellm_adapter._apply_instructions_to_texts(input_texts, "") + # Empty instructions should be treated as no instructions + assert result == input_texts + + @pytest.mark.paid + @pytest.mark.parametrize( + "provider,model_name", + get_all_embedding_models_and_providers(), + ) + async def test_paid_generate_embeddings_with_custom_instructions_supported( + self, provider, model_name, mock_litellm_core_config + ): + model_provider = built_in_embedding_models_from_provider(provider, model_name) + assert model_provider is not None + if not model_provider.supports_instructions: + pytest.skip("Model does not support custom instructions. Skipping.") + + # generate embedding with instructions + adapter = embedding_adapter_from_type( + EmbeddingConfig( + name="paid-embedding", + model_provider_name=provider, + model_name=model_name, + properties={"instructions": "Focus on the color being mentioned"}, + ) + ) + text = ["Kiln is an open-source evaluation platform for LLMs."] + result = await adapter.generate_embeddings(text) + assert len(result.embeddings) == 1 + assert isinstance(result.embeddings[0].vector, list) + async def test_generate_embeddings_method_integration(self, mock_litellm_adapter): """Test the public embed method integration.""" mock_response = AsyncMock(spec=EmbeddingResponse) diff --git a/libs/core/kiln_ai/adapters/ml_embedding_model_list.py b/libs/core/kiln_ai/adapters/ml_embedding_model_list.py index f3324686a..b88e243c3 100644 --- a/libs/core/kiln_ai/adapters/ml_embedding_model_list.py +++ b/libs/core/kiln_ai/adapters/ml_embedding_model_list.py @@ -113,6 +113,11 @@ class KilnEmbeddingModelProvider(BaseModel): description="Whether the model supports setting a custom output dimension. If true, the user can set the output dimension in the UI.", ) + supports_instructions: bool = Field( + default=False, + description="Whether the model supports custom instructions for embedding generation. If true, the user can provide custom instructions in the UI.", + ) + suggested_for_chunk_embedding: bool = Field( default=False, description="Whether the model is particularly good for chunk embedding.", @@ -301,6 +306,7 @@ class KilnEmbeddingModel(BaseModel): # because litellm rejects the param: # https://github.com/BerriAI/litellm/issues/11940 supports_custom_dimensions=False, + supports_instructions=True, ollama_model_aliases=[ # 8b is default "qwen3-embedding", @@ -313,6 +319,7 @@ class KilnEmbeddingModel(BaseModel): max_input_tokens=32_000, # the model itself does support custom dimensions, but not working supports_custom_dimensions=False, + supports_instructions=True, ), KilnEmbeddingModelProvider( name=ModelProviderName.siliconflow_cn, @@ -323,6 +330,7 @@ class KilnEmbeddingModel(BaseModel): # because litellm rejects the param: # https://github.com/BerriAI/litellm/issues/11940 supports_custom_dimensions=False, + supports_instructions=True, ), KilnEmbeddingModelProvider( name=ModelProviderName.openrouter, @@ -331,6 +339,7 @@ class KilnEmbeddingModel(BaseModel): max_input_tokens=32_000, # litellm rejecting - but model itself supports it supports_custom_dimensions=False, + supports_instructions=True, ), ], ), @@ -349,6 +358,7 @@ class KilnEmbeddingModel(BaseModel): # because litellm rejects the param: # https://github.com/BerriAI/litellm/issues/11940 supports_custom_dimensions=False, + supports_instructions=True, ), KilnEmbeddingModelProvider( name=ModelProviderName.siliconflow_cn, @@ -359,6 +369,7 @@ class KilnEmbeddingModel(BaseModel): # because litellm rejects the param: # https://github.com/BerriAI/litellm/issues/11940 supports_custom_dimensions=False, + supports_instructions=True, ), KilnEmbeddingModelProvider( name=ModelProviderName.openrouter, @@ -367,6 +378,7 @@ class KilnEmbeddingModel(BaseModel): max_input_tokens=32_000, # litellm rejecting - but model itself supports it supports_custom_dimensions=False, + supports_instructions=True, ), ], ), @@ -385,6 +397,7 @@ class KilnEmbeddingModel(BaseModel): # because litellm rejects the param: # https://github.com/BerriAI/litellm/issues/11940 supports_custom_dimensions=False, + supports_instructions=True, ), KilnEmbeddingModelProvider( name=ModelProviderName.siliconflow_cn, @@ -395,6 +408,7 @@ class KilnEmbeddingModel(BaseModel): # because litellm rejects the param: # https://github.com/BerriAI/litellm/issues/11940 supports_custom_dimensions=False, + supports_instructions=True, ), KilnEmbeddingModelProvider( name=ModelProviderName.openrouter, @@ -403,6 +417,7 @@ class KilnEmbeddingModel(BaseModel): max_input_tokens=32_000, # litellm rejecting - but model itself supports it supports_custom_dimensions=False, + supports_instructions=True, ), ], ), diff --git a/libs/core/kiln_ai/datamodel/embedding.py b/libs/core/kiln_ai/datamodel/embedding.py index 5aadf1b77..466f728c0 100644 --- a/libs/core/kiln_ai/datamodel/embedding.py +++ b/libs/core/kiln_ai/datamodel/embedding.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, List, Union +from typing import TYPE_CHECKING, Annotated, List, Union from pydantic import BaseModel, Field, PositiveInt from typing_extensions import TypedDict @@ -13,6 +13,13 @@ class EmbeddingProperties(TypedDict, total=False): dimensions: PositiveInt + instructions: Annotated[ + str, + Field( + description="Optional instructions to prepend/format embedding inputs.", + max_length=1000, + ), + ] class EmbeddingConfig(KilnParentedModel):