Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 1 addition & 38 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ exclude = [
# All files now have type annotations! 🎉
#
# ============================================================================
# Section 2: Files that need strict typing issues fixed (131 files)
# Section 2: Files that need strict typing issues fixed (104 files)
# ============================================================================
# These files have some type hints but fail strict type checking due to
# incomplete annotations, Any usage, or other strict mode violations.
Expand All @@ -335,43 +335,6 @@ exclude = [
"^src/llama_stack/core/testing_context\\.py$",
"^src/llama_stack/core/utils/exec\\.py$",
"^src/llama_stack/core/utils/serialize\\.py$",
# CLI files (8 files)
"^src/llama_stack/cli/stack/_list_deps\\.py$",
"^src/llama_stack/cli/stack/list_apis\\.py$",
"^src/llama_stack/cli/stack/list_deps\\.py$",
"^src/llama_stack/cli/stack/list_providers\\.py$",
"^src/llama_stack/cli/stack/run\\.py$",
"^src/llama_stack/cli/stack/utils\\.py$",
"^src/llama_stack/cli/subcommand\\.py$",
"^src/llama_stack/cli/utils\\.py$",
# Providers - Inline (27 files)
"^src/llama_stack/providers/inline/batches/reference/__init__\\.py$",
"^src/llama_stack/providers/inline/batches/reference/batches\\.py$",
"^src/llama_stack/providers/inline/eval/builtin/__init__\\.py$",
"^src/llama_stack/providers/inline/eval/builtin/eval\\.py$",
"^src/llama_stack/providers/inline/file_processor/pypdf/__init__\\.py$",
"^src/llama_stack/providers/inline/file_processor/pypdf/adapter\\.py$",
"^src/llama_stack/providers/inline/file_processor/pypdf/pypdf\\.py$",
"^src/llama_stack/providers/inline/files/localfs/__init__\\.py$",
"^src/llama_stack/providers/inline/inference/sentence_transformers/__init__\\.py$",
"^src/llama_stack/providers/inline/inference/sentence_transformers/config\\.py$",
"^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
"^src/llama_stack/providers/inline/inference/transformers/__init__\\.py$",
"^src/llama_stack/providers/inline/inference/transformers/config\\.py$",
"^src/llama_stack/providers/inline/inference/transformers/transformers\\.py$",
"^src/llama_stack/providers/inline/safety/prompt_guard/__init__\\.py$",
"^src/llama_stack/providers/inline/safety/prompt_guard/config\\.py$",
"^src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard\\.py$",
"^src/llama_stack/providers/inline/tool_runtime/file_search/__init__\\.py$",
"^src/llama_stack/providers/inline/tool_runtime/file_search/context_retriever\\.py$",
"^src/llama_stack/providers/inline/tool_runtime/file_search/file_search\\.py$",
"^src/llama_stack/providers/inline/vector_io/chroma/__init__\\.py$",
"^src/llama_stack/providers/inline/vector_io/faiss/__init__\\.py$",
"^src/llama_stack/providers/inline/vector_io/faiss/faiss\\.py$",
"^src/llama_stack/providers/inline/vector_io/milvus/__init__\\.py$",
"^src/llama_stack/providers/inline/vector_io/qdrant/__init__\\.py$",
"^src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__\\.py$",
"^src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec\\.py$",
# Providers - Remote (43 files)
"^src/llama_stack/providers/remote/eval/nvidia/__init__\\.py$",
"^src/llama_stack/providers/remote/eval/nvidia/config\\.py$",
Expand Down
32 changes: 19 additions & 13 deletions src/llama_stack/providers/inline/eval/builtin/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
from collections.abc import AsyncIterator
from typing import Any

from tqdm import tqdm
Expand Down Expand Up @@ -33,6 +34,7 @@
RunEvalRequest,
ScoreRequest,
Scoring,
SystemMessage,
)

from .config import BuiltinEvalConfig
Expand Down Expand Up @@ -63,9 +65,9 @@ def __init__(
self.responses_api = responses_api

# TODO: assume sync job, will need jobs API for async scheduling
self.jobs = {}
self.jobs: dict[str, EvaluateResponse] = {}

self.benchmarks = {}
self.benchmarks: dict[str, Benchmark] = {}

async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore)
Expand All @@ -74,8 +76,8 @@ async def initialize(self) -> None:
end_key = f"{EVAL_TASKS_PREFIX}\xff"
stored_benchmarks = await self.kvstore.values_in_range(start_key, end_key)

for benchmark in stored_benchmarks:
benchmark = Benchmark.model_validate_json(benchmark)
for benchmark_json in stored_benchmarks:
benchmark = Benchmark.model_validate_json(benchmark_json)
self.benchmarks[benchmark.identifier] = benchmark

async def shutdown(self) -> None: ...
Expand Down Expand Up @@ -132,7 +134,7 @@ async def _run_model_generation(
) -> list[dict[str, Any]]:
candidate = request.benchmark_config.eval_candidate
assert candidate.sampling_params.max_tokens is not None, "SamplingParams.max_tokens must be provided"
sampling_params = {"max_tokens": candidate.sampling_params.max_tokens}
sampling_params: dict[str, Any] = {"max_tokens": candidate.sampling_params.max_tokens}

generations = []
for x in tqdm(input_rows):
Expand All @@ -141,33 +143,37 @@ async def _run_model_generation(
sampling_params["stop"] = candidate.sampling_params.stop

input_content = json.loads(x[ColumnName.completion_input.value])
params = OpenAICompletionRequestWithExtraBody(
completion_params = OpenAICompletionRequestWithExtraBody(
model=candidate.model,
prompt=input_content,
**sampling_params,
)
response = await self.inference_api.openai_completion(params)
generations.append({ColumnName.generated_answer.value: response.choices[0].text})
completion_response = await self.inference_api.openai_completion(completion_params)
assert not isinstance(completion_response, AsyncIterator), "Streaming not supported in eval"
generations.append({ColumnName.generated_answer.value: completion_response.choices[0].text})
elif ColumnName.chat_completion_input.value in x:
chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value])
input_messages = [
OpenAIUserMessageParam(**x) for x in chat_completion_input_json if x["role"] == "user"
]

messages = []
messages: list[SystemMessage | OpenAISystemMessageParam | OpenAIUserMessageParam] = []
if candidate.system_message:
messages.append(candidate.system_message)

messages += [OpenAISystemMessageParam(**x) for x in chat_completion_input_json if x["role"] == "system"]

messages += input_messages
params = OpenAIChatCompletionRequestWithExtraBody(
chat_params = OpenAIChatCompletionRequestWithExtraBody(
model=candidate.model,
messages=messages,
messages=messages, # type: ignore[arg-type]
**sampling_params,
)
response = await self.inference_api.openai_chat_completion(params)
generations.append({ColumnName.generated_answer.value: response.choices[0].message.content})
chat_response = await self.inference_api.openai_chat_completion(chat_params)
assert not isinstance(chat_response, AsyncIterator), "Streaming not supported in eval"
content = chat_response.choices[0].message.content
assert content is not None, "Expected content in chat response"
generations.append({ColumnName.generated_answer.value: content})
else:
raise ValueError("Invalid input row")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
ModelType,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionChunkWithReasoning,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIChatCompletionWithReasoning,
OpenAICompletion,
OpenAICompletionRequestWithExtraBody,
)
Expand All @@ -39,7 +41,9 @@ class SentenceTransformersInferenceImpl(
def __init__(self, config: SentenceTransformersInferenceConfig) -> None:
self.config = config

async def openai_chat_completions_with_reasoning(self, params: OpenAIChatCompletionRequestWithExtraBody) -> None:
async def openai_chat_completions_with_reasoning(
self, params: OpenAIChatCompletionRequestWithExtraBody
) -> OpenAIChatCompletionWithReasoning | AsyncIterator[OpenAIChatCompletionChunkWithReasoning]:
raise NotImplementedError("SentenceTransformers provider does not support reasoning in chat completions")

async def initialize(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
ModelType,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionChunkWithReasoning,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIChatCompletionWithReasoning,
OpenAICompletion,
OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingsRequestWithExtraBody,
Expand Down Expand Up @@ -56,7 +58,9 @@ class TransformersInferenceImpl(
def __init__(self, config: TransformersInferenceConfig) -> None:
self.config = config

async def openai_chat_completions_with_reasoning(self, params: OpenAIChatCompletionRequestWithExtraBody) -> None:
async def openai_chat_completions_with_reasoning(
self, params: OpenAIChatCompletionRequestWithExtraBody
) -> OpenAIChatCompletionWithReasoning | AsyncIterator[OpenAIChatCompletionChunkWithReasoning]:
raise NotImplementedError("Transformers provider does not support reasoning in chat completions")

async def initialize(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
class SentenceTransformerEmbeddingMixin:
"""Mixin providing OpenAI-compatible embeddings via sentence-transformers models."""

model_store: ModelStore
model_store: ModelStore | None

async def openai_embeddings(
self,
Expand Down
Loading