Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions app/desktop/studio_server/provider_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ class EmbeddingModelDetails(BaseModel):
max_input_tokens: int | None
supports_custom_dimensions: bool
suggested_for_chunk_embedding: bool
supports_instructions: bool


class EmbeddingProvider(BaseModel):
Expand Down Expand Up @@ -402,6 +403,7 @@ async def get_available_embedding_models() -> List[EmbeddingProvider]:
max_input_tokens=provider.max_input_tokens,
supports_custom_dimensions=provider.supports_custom_dimensions,
suggested_for_chunk_embedding=provider.suggested_for_chunk_embedding,
supports_instructions=provider.supports_instructions,
)
)

Expand Down Expand Up @@ -1495,6 +1497,7 @@ async def available_ollama_embedding_models() -> EmbeddingProvider | None:
max_input_tokens=ollama_provider.max_input_tokens,
supports_custom_dimensions=ollama_provider.supports_custom_dimensions,
suggested_for_chunk_embedding=ollama_provider.suggested_for_chunk_embedding,
supports_instructions=ollama_provider.supports_instructions,
)
)

Expand Down
4 changes: 4 additions & 0 deletions app/desktop/studio_server/test_provider_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3042,6 +3042,7 @@ async def test_get_embedding_providers(app, client):
"n_dimensions": 1536,
"max_input_tokens": 8192,
"supports_custom_dimensions": True,
"supports_instructions": False,
"suggested_for_chunk_embedding": True,
}
],
Expand All @@ -3056,6 +3057,7 @@ async def test_get_embedding_providers(app, client):
"n_dimensions": 1536,
"max_input_tokens": None,
"supports_custom_dimensions": False,
"supports_instructions": False,
"suggested_for_chunk_embedding": False,
}
],
Expand All @@ -3070,6 +3072,7 @@ async def test_get_embedding_providers(app, client):
"n_dimensions": 1536,
"max_input_tokens": 8192,
"supports_custom_dimensions": False,
"supports_instructions": False,
"suggested_for_chunk_embedding": True,
}
],
Expand Down Expand Up @@ -3275,6 +3278,7 @@ def test_available_embedding_models_endpoint_includes_ollama(client):
"n_dimensions": 768,
"max_input_tokens": 8192,
"supports_custom_dimensions": False,
"supports_instructions": False,
"suggested_for_chunk_embedding": True,
}
],
Expand Down
7 changes: 7 additions & 0 deletions app/web_ui/src/lib/api_schema.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3914,6 +3914,8 @@ export interface components {
supports_custom_dimensions: boolean;
/** Suggested For Chunk Embedding */
suggested_for_chunk_embedding: boolean;
/** Supports Instructions */
supports_instructions: boolean;
};
/**
* EmbeddingModelName
Expand All @@ -3925,6 +3927,11 @@ export interface components {
EmbeddingProperties: {
/** Dimensions */
dimensions?: number;
/**
* Instructions
* @description Optional instructions to prepend/format embedding inputs.
*/
instructions?: string;
};
/** EmbeddingProvider */
EmbeddingProvider: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
let description: string = ""
let selectedModel: EmbeddingOptionValue | null = null
let customDimensions: number | null = null
let customInstructions: string = ""
let embeddingModels: OptionGroup[] = []
export let keyboard_submit: boolean = false

Expand All @@ -35,6 +36,7 @@
n_dimensions: number | null
max_input_tokens: number | null
supports_custom_dimensions: boolean | null
supports_instructions: boolean | null
}

const dispatch = createEventDispatcher<{
Expand Down Expand Up @@ -69,6 +71,7 @@
n_dimensions: model.n_dimensions,
max_input_tokens: model.max_input_tokens,
supports_custom_dimensions: model.supports_custom_dimensions,
supports_instructions: model.supports_instructions,
suggested_for_chunk_embedding:
model.suggested_for_chunk_embedding,
},
Expand All @@ -88,10 +91,17 @@
function get_properties(
selectedModel: EmbeddingOptionValue,
): Record<never, never> | EmbeddingProperties {
const properties: EmbeddingProperties = {}

if (customDimensions && selectedModel.supports_custom_dimensions) {
return { dimensions: customDimensions }
properties.dimensions = customDimensions
}

if (customInstructions.trim() && selectedModel.supports_instructions) {
properties.instructions = customInstructions.trim()
}
return {}

return properties
}

async function create_embedding_config() {
Expand Down Expand Up @@ -203,6 +213,19 @@
})}
/>
{/if}

{#if selectedModel && selectedModel.supports_instructions}
<FormElement
label="Custom Instructions"
description="Optional instructions to guide the embedding generation process."
info_description="Not all models support instructions. Leave blank if unsure."
optional={true}
inputType="textarea"
id="custom_instructions"
bind:value={customInstructions}
placeholder="Enter custom instructions for the embedding model..."
/>
{/if}
</Collapse>
</FormContainer>
{/if}
39 changes: 37 additions & 2 deletions libs/core/kiln_ai/adapters/embedding/litellm_embedding_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ class EmbeddingOptions(BaseModel):
default=None,
description="The number of dimensions to return for embeddings. Some models support requesting vectors of different dimensions.",
)
instructions: str | None = Field(
default=None,
description="Custom instructions for embedding generation. Some models support custom instructions to guide the embedding process.",
)


def validate_map_to_embeddings(
Expand Down Expand Up @@ -132,6 +136,17 @@ async def _generate_embeddings(self, input_texts: List[str]) -> EmbeddingResult:
usage=combined_usage,
)

def _apply_instructions_to_texts(
self, input_texts: List[str], instructions: str | None
) -> List[str]:
"""Apply instructions to input texts in the format expected by the model."""
if not instructions:
return input_texts

# Format according to Qwen3-Embedding documentation
# "Instruct: [instructions]\nQuery: [text]"
return [f"Instruct: {instructions}\nQuery: {text}" for text in input_texts]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Their docs suggest to that the instructions wrapper should only be used for the actual query (rather than the docs themselves).

Their README shows this:

# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'

queries = [
    get_detailed_instruct(task, 'What is the capital of China?'),
    get_detailed_instruct(task, 'Explain gravity')
]
# No need to add instruction for retrieval documents
documents = [
    "The capital of China is Beijing.",
    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
]
input_texts = queries + documents

tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Embedding-0.6B', padding_side='left')
model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B')

That suggests we should only apply the instructions during retrieval (when embedding the query) and not during embedding of the document chunks that we index.

Much of the current logic from the PR can be preserved, and we can add a flag to the embedding methods here to specify whether or not to apply the instructions (if any), have it default to False, and pass in True from here during retrieval:

query_embedding_result = await embedding_adapter.generate_embeddings(
  [query],
  apply_embedding_instructions=True,
)

@klamas1 - thoughts?

Looping in @tawnymanticore

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this contribution @klamas1! great idea.

re: should we be conditioning queries or documents on instructions...

agree with @leonardmq. from that readme they are conditioning queries on the instruction so that it will have a better hit rate against documents. their example shows this

conditoned_query = "Instruct: Given a web search query, retrieve relevant passages that answer the query \nQuery:What is the capital of China?"
document = "The capital of China is Beijing"

conditoned_query.dot(document) --> high

so for the indexing portion of RAG, we should be embedding the documents as normal. then building a custom query function that gets some instructions as conditionals.

now that's what Qwen recommends anyways. does the inverse work to save on runtime compute? maaaaybeeee? Would depend if Qwen specifically fine-tuned with an Instruct/Query setup. if they did, then it must be conditoned_query.dot(document) at runtime. if this is zero shot then the inverse may be possible with the following framing

("What is the capital of China?").dot("Instruct: Given a retrieval passage, what was the original web search query? \m Passage: The capital of China is Beijing). this is probably all pretty testable in a python notebook or something

Copy link
Author

@klamas1 klamas1 Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, in some cases, instructions are also needed when embedding documents, but, indeed, this is an cornercase, such as clustering.
I will add a flag to the search function.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Later, I also want to try HyDE (https://aclanthology.org/2023.acl-long.99/).

I'm diving into this so deeply because my task is very specific; my documents are deeply nested, branched YAML configs with consistency across branches.

Maybe I'm going in the wrong direction, but if you have any ideas, I'd be happy to explore them.


async def _generate_embeddings_for_batch(
self, input_texts: List[str]
) -> EmbeddingResult:
Expand All @@ -140,6 +155,13 @@ async def _generate_embeddings_for_batch(
f"Too many input texts, max batch size is {MAX_BATCH_SIZE}, got {len(input_texts)}"
)

# Validate once and reuse the same instructions everywhere
options = self.build_options()
# Apply instructions to input texts if present (validated)
processed_texts = self._apply_instructions_to_texts(
input_texts, options.instructions
)

completion_kwargs: Dict[str, Any] = {}
if self.litellm_core_config.additional_body_options:
completion_kwargs.update(self.litellm_core_config.additional_body_options)
Expand All @@ -152,10 +174,15 @@ async def _generate_embeddings_for_batch(
self.litellm_core_config.default_headers
)

# Get options excluding instructions since they're applied to text
embedding_options = options.model_dump(
exclude_none=True, exclude={"instructions"}
)

response = await litellm.aembedding(
model=self.litellm_model_id,
input=input_texts,
**self.build_options().model_dump(exclude_none=True),
input=processed_texts,
**embedding_options,
**completion_kwargs,
)

Expand All @@ -174,8 +201,16 @@ def build_options(self) -> EmbeddingOptions:
if not isinstance(dimensions, int) or dimensions <= 0:
raise ValueError("Dimensions must be a positive integer")

instructions = self.embedding_config.properties.get("instructions", None)
if instructions is not None:
if not isinstance(instructions, str) or len(instructions.strip()) == 0:
raise ValueError("Instructions must be a non-empty string")
if len(instructions) > 1000:
raise ValueError("Instructions must be less than 1000 characters")

return EmbeddingOptions(
dimensions=dimensions,
instructions=instructions,
)

@cached_property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,59 @@ def test_embedding_config_inheritance(
)
assert adapter.embedding_config == mock_embedding_config

def test_apply_instructions_to_texts_no_instructions(self, mock_litellm_adapter):
"""Test _apply_instructions_to_texts when no instructions are provided."""
input_texts = ["text1", "text2", "text3"]
result = mock_litellm_adapter._apply_instructions_to_texts(input_texts, None)
assert result == input_texts

def test_apply_instructions_to_texts_with_instructions(self, mock_litellm_adapter):
"""Test _apply_instructions_to_texts when instructions are provided."""
input_texts = ["What is AI?", "How does ML work?"]
result = mock_litellm_adapter._apply_instructions_to_texts(
input_texts, "Use semantic similarity"
)

expected = [
"Instruct: Use semantic similarity\nQuery: What is AI?",
"Instruct: Use semantic similarity\nQuery: How does ML work?",
]
assert result == expected

def test_apply_instructions_to_texts_empty_instructions(self, mock_litellm_adapter):
"""Test _apply_instructions_to_texts when instructions are empty string."""
input_texts = ["text1", "text2"]
result = mock_litellm_adapter._apply_instructions_to_texts(input_texts, "")
# Empty instructions should be treated as no instructions
assert result == input_texts

@pytest.mark.paid
@pytest.mark.parametrize(
"provider,model_name",
get_all_embedding_models_and_providers(),
)
async def test_paid_generate_embeddings_with_custom_instructions_supported(
self, provider, model_name, mock_litellm_core_config
):
model_provider = built_in_embedding_models_from_provider(provider, model_name)
assert model_provider is not None
if not model_provider.supports_instructions:
pytest.skip("Model does not support custom instructions. Skipping.")

# generate embedding with instructions
adapter = embedding_adapter_from_type(
EmbeddingConfig(
name="paid-embedding",
model_provider_name=provider,
model_name=model_name,
properties={"instructions": "Focus on the color being mentioned"},
)
)
text = ["Kiln is an open-source evaluation platform for LLMs."]
result = await adapter.generate_embeddings(text)
assert len(result.embeddings) == 1
assert isinstance(result.embeddings[0].vector, list)

async def test_generate_embeddings_method_integration(self, mock_litellm_adapter):
"""Test the public embed method integration."""
mock_response = AsyncMock(spec=EmbeddingResponse)
Expand Down
15 changes: 15 additions & 0 deletions libs/core/kiln_ai/adapters/ml_embedding_model_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ class KilnEmbeddingModelProvider(BaseModel):
description="Whether the model supports setting a custom output dimension. If true, the user can set the output dimension in the UI.",
)

supports_instructions: bool = Field(
default=False,
description="Whether the model supports custom instructions for embedding generation. If true, the user can provide custom instructions in the UI.",
)

suggested_for_chunk_embedding: bool = Field(
default=False,
description="Whether the model is particularly good for chunk embedding.",
Expand Down Expand Up @@ -301,6 +306,7 @@ class KilnEmbeddingModel(BaseModel):
# because litellm rejects the param:
# https://github.com/BerriAI/litellm/issues/11940
supports_custom_dimensions=False,
supports_instructions=True,
ollama_model_aliases=[
# 8b is default
"qwen3-embedding",
Expand All @@ -313,6 +319,7 @@ class KilnEmbeddingModel(BaseModel):
max_input_tokens=32_000,
# the model itself does support custom dimensions, but not working
supports_custom_dimensions=False,
supports_instructions=True,
),
KilnEmbeddingModelProvider(
name=ModelProviderName.siliconflow_cn,
Expand All @@ -323,6 +330,7 @@ class KilnEmbeddingModel(BaseModel):
# because litellm rejects the param:
# https://github.com/BerriAI/litellm/issues/11940
supports_custom_dimensions=False,
supports_instructions=True,
),
KilnEmbeddingModelProvider(
name=ModelProviderName.openrouter,
Expand All @@ -331,6 +339,7 @@ class KilnEmbeddingModel(BaseModel):
max_input_tokens=32_000,
# litellm rejecting - but model itself supports it
supports_custom_dimensions=False,
supports_instructions=True,
),
],
),
Expand All @@ -349,6 +358,7 @@ class KilnEmbeddingModel(BaseModel):
# because litellm rejects the param:
# https://github.com/BerriAI/litellm/issues/11940
supports_custom_dimensions=False,
supports_instructions=True,
),
KilnEmbeddingModelProvider(
name=ModelProviderName.siliconflow_cn,
Expand All @@ -359,6 +369,7 @@ class KilnEmbeddingModel(BaseModel):
# because litellm rejects the param:
# https://github.com/BerriAI/litellm/issues/11940
supports_custom_dimensions=False,
supports_instructions=True,
),
KilnEmbeddingModelProvider(
name=ModelProviderName.openrouter,
Expand All @@ -367,6 +378,7 @@ class KilnEmbeddingModel(BaseModel):
max_input_tokens=32_000,
# litellm rejecting - but model itself supports it
supports_custom_dimensions=False,
supports_instructions=True,
),
],
),
Expand All @@ -385,6 +397,7 @@ class KilnEmbeddingModel(BaseModel):
# because litellm rejects the param:
# https://github.com/BerriAI/litellm/issues/11940
supports_custom_dimensions=False,
supports_instructions=True,
),
KilnEmbeddingModelProvider(
name=ModelProviderName.siliconflow_cn,
Expand All @@ -395,6 +408,7 @@ class KilnEmbeddingModel(BaseModel):
# because litellm rejects the param:
# https://github.com/BerriAI/litellm/issues/11940
supports_custom_dimensions=False,
supports_instructions=True,
),
KilnEmbeddingModelProvider(
name=ModelProviderName.openrouter,
Expand All @@ -403,6 +417,7 @@ class KilnEmbeddingModel(BaseModel):
max_input_tokens=32_000,
# litellm rejecting - but model itself supports it
supports_custom_dimensions=False,
supports_instructions=True,
),
],
),
Expand Down
9 changes: 8 additions & 1 deletion libs/core/kiln_ai/datamodel/embedding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, List, Union
from typing import TYPE_CHECKING, Annotated, List, Union

from pydantic import BaseModel, Field, PositiveInt
from typing_extensions import TypedDict
Expand All @@ -13,6 +13,13 @@

class EmbeddingProperties(TypedDict, total=False):
dimensions: PositiveInt
instructions: Annotated[
str,
Field(
description="Optional instructions to prepend/format embedding inputs.",
max_length=1000,
),
]


class EmbeddingConfig(KilnParentedModel):
Expand Down