diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml
index 1e17176ba7..57e8382140 100644
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@@ -215,7 +215,7 @@ jobs:
INFINISPAN_PASSWORD: ${{ matrix.vector-io-provider == 'remote::infinispan' && 'password' || '' }}
run: |
uv run --no-sync \
- pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers?trust_remote_code=true,vector_io=${{ matrix.vector-io-provider }}" \
+ pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers?trust_remote_code=true,vector_io=${{ matrix.vector-io-provider }},file_processors=inline::pypdf" \
tests/integration/vector_io
- name: Check Storage and Memory Available After Tests
diff --git a/docs/docs/providers/file_processors/inline_pypdf.mdx b/docs/docs/providers/file_processors/inline_pypdf.mdx
index 7bea49f330..bd7b08ccc5 100644
--- a/docs/docs/providers/file_processors/inline_pypdf.mdx
+++ b/docs/docs/providers/file_processors/inline_pypdf.mdx
@@ -16,7 +16,6 @@ PyPDF-based file processor for extracting text content from documents.
|-------|------|----------|---------|-------------|
| `default_chunk_size_tokens` | `int` | No | 800 | Default chunk size in tokens when chunking_strategy type is 'auto' |
| `default_chunk_overlap_tokens` | `int` | No | 400 | Default chunk overlap in tokens when chunking_strategy type is 'auto' |
-| `max_file_size_bytes` | `int` | No | 104857600 | Maximum file size in bytes for uploaded files (default 100MB) |
| `extract_metadata` | `bool` | No | True | Whether to extract PDF metadata (title, author, etc.) |
| `clean_text` | `bool` | No | True | Whether to clean extracted text (remove extra whitespace, normalize line breaks) |
diff --git a/src/llama_stack/providers/inline/file_processor/pypdf/__init__.py b/src/llama_stack/providers/inline/file_processor/pypdf/__init__.py
index 8262d17bfb..c8f8fa6ef1 100644
--- a/src/llama_stack/providers/inline/file_processor/pypdf/__init__.py
+++ b/src/llama_stack/providers/inline/file_processor/pypdf/__init__.py
@@ -17,7 +17,7 @@ async def get_provider_impl(config: PyPDFFileProcessorConfig, deps: dict[Api, An
assert isinstance(config, PyPDFFileProcessorConfig), f"Unexpected config type: {type(config)}"
- files_api = deps.get(Api.files)
+ files_api = deps[Api.files]
impl = PyPDFFileProcessorAdapter(config, files_api)
return impl
diff --git a/src/llama_stack/providers/inline/file_processor/pypdf/adapter.py b/src/llama_stack/providers/inline/file_processor/pypdf/adapter.py
index 62f469ef1d..6bdbe93bc0 100644
--- a/src/llama_stack/providers/inline/file_processor/pypdf/adapter.py
+++ b/src/llama_stack/providers/inline/file_processor/pypdf/adapter.py
@@ -16,7 +16,7 @@
class PyPDFFileProcessorAdapter:
"""Adapter for PyPDF file processor."""
- def __init__(self, config: PyPDFFileProcessorConfig, files_api=None) -> None:
+ def __init__(self, config: PyPDFFileProcessorConfig, files_api) -> None:
self.config = config
self.files_api = files_api
self.processor = PyPDFFileProcessor(config, files_api)
diff --git a/src/llama_stack/providers/inline/file_processor/pypdf/config.py b/src/llama_stack/providers/inline/file_processor/pypdf/config.py
index 64cf6c3bf7..829d38fccc 100644
--- a/src/llama_stack/providers/inline/file_processor/pypdf/config.py
+++ b/src/llama_stack/providers/inline/file_processor/pypdf/config.py
@@ -28,12 +28,6 @@ class PyPDFFileProcessorConfig(BaseModel):
description="Default chunk overlap in tokens when chunking_strategy type is 'auto'",
)
- max_file_size_bytes: int = Field(
- default=100 * 1024 * 1024,
- ge=1,
- description="Maximum file size in bytes for uploaded files (default 100MB)",
- )
-
# PDF-specific options
extract_metadata: bool = Field(default=True, description="Whether to extract PDF metadata (title, author, etc.)")
diff --git a/src/llama_stack/providers/inline/file_processor/pypdf/pypdf.py b/src/llama_stack/providers/inline/file_processor/pypdf/pypdf.py
index bbe9b98959..b656bb04aa 100644
--- a/src/llama_stack/providers/inline/file_processor/pypdf/pypdf.py
+++ b/src/llama_stack/providers/inline/file_processor/pypdf/pypdf.py
@@ -5,10 +5,12 @@
# the root directory of this source tree.
import io
+import mimetypes
import time
import uuid
from typing import Any
+import chardet
from fastapi import HTTPException, UploadFile
from pypdf import PdfReader
@@ -33,7 +35,7 @@
class PyPDFFileProcessor:
"""PyPDF-based file processor for PDF documents."""
- def __init__(self, config: PyPDFFileProcessorConfig, files_api=None) -> None:
+ def __init__(self, config: PyPDFFileProcessorConfig, files_api) -> None:
self.config = config
self.files_api = files_api
@@ -44,9 +46,8 @@ async def process_file(
options: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> ProcessFileResponse:
- """Process a PDF file and return chunks."""
+ """Process a file and return chunks. Supports PDF and text-based files."""
- # Validate input
if not file and not file_id:
raise ValueError("Either file or file_id must be provided")
if file and file_id:
@@ -54,62 +55,64 @@ async def process_file(
start_time = time.time()
- # Get PDF content
+ # Upload size limits are enforced by the router layer (upload_safety.py).
+ # The provider trusts that `file` has already been bounded-read and
+ # `file_id` references a file accepted by the Files API.
if file:
- # Read from uploaded file
content = await file.read()
- if len(content) > self.config.max_file_size_bytes:
- raise ValueError(
- f"File size {len(content)} bytes exceeds maximum of {self.config.max_file_size_bytes} bytes"
- )
filename = file.filename or f"{uuid.uuid4()}.pdf"
elif file_id:
- # Get file from file storage using Files API
- if not self.files_api:
- raise ValueError("Files API not available - cannot process file_id")
-
- # Get file metadata
file_info = await self.files_api.openai_retrieve_file(RetrieveFileRequest(file_id=file_id))
filename = file_info.filename
- # Get file content
content_response = await self.files_api.openai_retrieve_file_content(
RetrieveFileContentRequest(file_id=file_id)
)
content = content_response.body
+ mime_type, _ = mimetypes.guess_type(filename)
+ mime_category = mime_type.split("/")[0] if (mime_type and "/" in mime_type) else None
+
+ if mime_type == "application/pdf":
+ return self._process_pdf(content, filename, file_id, chunking_strategy, start_time)
+ elif mime_category == "text":
+ return self._process_text(content, filename, file_id, chunking_strategy, start_time)
+ else:
+ # Attempt text decoding as a fallback for unknown types
+ log.warning("Unknown mime type, attempting text extraction", mime_type=mime_type, filename=filename)
+ return self._process_text(content, filename, file_id, chunking_strategy, start_time)
+
+ def _process_pdf(
+ self,
+ content: bytes,
+ filename: str,
+ file_id: str | None,
+ chunking_strategy: VectorStoreChunkingStrategy | None,
+ start_time: float,
+ ) -> ProcessFileResponse:
+ """Process a PDF file."""
pdf_bytes = io.BytesIO(content)
reader = PdfReader(pdf_bytes)
if reader.is_encrypted:
raise HTTPException(status_code=422, detail="Password-protected PDFs are not supported")
- # Extract text from PDF
text_content, failed_pages = self._extract_pdf_text(reader)
- # Clean text if configured
if self.config.clean_text:
text_content = self._clean_text(text_content)
- # Extract metadata if configured
pdf_metadata = {}
if self.config.extract_metadata:
pdf_metadata = self._extract_pdf_metadata(reader)
document_id = str(uuid.uuid4())
-
- # Prepare document metadata (include filename and file_id)
- document_metadata = {
- "filename": filename,
- **pdf_metadata,
- }
+ document_metadata: dict[str, Any] = {"filename": filename, **pdf_metadata}
if file_id:
document_metadata["file_id"] = file_id
processing_time_ms = int((time.time() - start_time) * 1000)
-
- # Create response metadata
- response_metadata = {
+ response_metadata: dict[str, Any] = {
"processor": "pypdf",
"processing_time_ms": processing_time_ms,
"page_count": pdf_metadata.get("page_count", 0),
@@ -120,13 +123,48 @@ async def process_file(
if failed_pages:
response_metadata["failed_pages"] = failed_pages
- # Handle empty text - return empty chunks with metadata
if not text_content or not text_content.strip():
return ProcessFileResponse(chunks=[], metadata=response_metadata)
- # Create chunks for non-empty text
chunks = self._create_chunks(text_content, document_id, chunking_strategy, document_metadata)
+ return ProcessFileResponse(chunks=chunks, metadata=response_metadata)
+
+ def _process_text(
+ self,
+ content: bytes,
+ filename: str,
+ file_id: str | None,
+ chunking_strategy: VectorStoreChunkingStrategy | None,
+ start_time: float,
+ ) -> ProcessFileResponse:
+ """Process a text-based file (txt, csv, md, etc.)."""
+ detected = chardet.detect(content)
+ encoding = detected["encoding"] or "utf-8"
+ try:
+ text_content = content.decode(encoding)
+ except UnicodeDecodeError:
+ text_content = content.decode("utf-8", errors="replace")
+ if self.config.clean_text:
+ text_content = self._clean_text(text_content)
+
+ document_id = str(uuid.uuid4())
+ document_metadata: dict[str, Any] = {"filename": filename}
+ if file_id:
+ document_metadata["file_id"] = file_id
+
+ processing_time_ms = int((time.time() - start_time) * 1000)
+ response_metadata: dict[str, Any] = {
+ "processor": "text",
+ "processing_time_ms": processing_time_ms,
+ "extraction_method": "text",
+ "file_size_bytes": len(content),
+ }
+
+ if not text_content or not text_content.strip():
+ return ProcessFileResponse(chunks=[], metadata=response_metadata)
+
+ chunks = self._create_chunks(text_content, document_id, chunking_strategy, document_metadata)
return ProcessFileResponse(chunks=chunks, metadata=response_metadata)
def _extract_pdf_text(self, reader: PdfReader) -> tuple[str, list[str]]:
@@ -140,8 +178,9 @@ def _extract_pdf_text(self, reader: PdfReader) -> tuple[str, list[str]]:
except Exception as e:
failed_pages.append(f"page {page_num + 1}: {e}")
continue
- if page_text and page_text.strip():
- text_parts.append(page_text)
+ if page_text:
+ if not self.config.clean_text or page_text.strip():
+ text_parts.append(page_text)
return "\n".join(text_parts), failed_pages
@@ -209,6 +248,9 @@ def _create_chunks(
elif chunking_strategy.type == "static":
chunk_size = chunking_strategy.static.max_chunk_size_tokens
overlap_size = chunking_strategy.static.chunk_overlap_tokens
+ elif chunking_strategy.type == "contextual":
+ chunk_size = chunking_strategy.contextual.max_chunk_size_tokens
+ overlap_size = chunking_strategy.contextual.chunk_overlap_tokens
else:
chunk_size = self.config.default_chunk_size_tokens
overlap_size = self.config.default_chunk_overlap_tokens
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index aeda6d97bb..bd0803cf5e 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -344,7 +344,7 @@ async def query_hybrid(
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
- """Vector I/O adapter using FAISS for in-memory vector similarity search."""
+ """VectorIO adapter that uses FAISS for similarity search and vector storage."""
def __init__(
self,
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index c4bc1c6b4a..73a398d8eb 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -290,7 +290,7 @@ async def query_hybrid(
class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
- """Vector I/O adapter for remote Weaviate instances."""
+ """VectorIO adapter that uses Weaviate for similarity search and vector storage."""
def __init__(
self,
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index a09ddc3bf7..7c1a00728d 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -25,7 +25,6 @@
)
from llama_stack.providers.utils.memory.vector_store import (
content_from_data_and_mime_type,
- make_overlapped_chunks,
validate_tiktoken_encoding,
)
from llama_stack.providers.utils.vector_io.filters import parse_filter
@@ -926,13 +925,7 @@ async def openai_attach_file_to_vector_store(
)
return vector_store_file_object
- if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic):
- max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens
- chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens
- elif isinstance(chunking_strategy, VectorStoreChunkingStrategyContextual):
- max_chunk_size_tokens = chunking_strategy.contextual.max_chunk_size_tokens
- chunk_overlap_tokens = chunking_strategy.contextual.chunk_overlap_tokens
- # Fail fast on missing model_id before entering the file-processing try/except
+ if isinstance(chunking_strategy, VectorStoreChunkingStrategyContextual):
ctx = chunking_strategy.contextual
if not ctx.model_id and not self.vector_stores_config.contextual_retrieval_params.model:
raise ValueError(
@@ -940,9 +933,6 @@ async def openai_attach_file_to_vector_store(
"Provide it in chunking_strategy.contextual or configure a default "
"in contextual_retrieval_params.model on the server."
)
- else:
- max_chunk_size_tokens = DEFAULT_CHUNK_SIZE_TOKENS
- chunk_overlap_tokens = DEFAULT_CHUNK_OVERLAP_TOKENS
try:
file_response = await self.files_api.openai_retrieve_file(RetrieveFileRequest(file_id=file_id))
@@ -956,49 +946,37 @@ async def openai_attach_file_to_vector_store(
chunk_attributes["filename"] = file_response.filename
chunk_attributes["file_id"] = file_id
- # Try using FileProcessor API if available
- if hasattr(self, "file_processor_api") and self.file_processor_api:
- try:
- logger.debug("Using FileProcessor API to process file", file_id=file_id)
- pf_resp = await self.file_processor_api.process_file(
- ProcessFileRequest(file_id=file_id, chunking_strategy=chunking_strategy)
- )
-
- chunks = []
- for chunk in pf_resp.chunks:
- # Enhance chunk metadata with file info and attributes
- enhanced_metadata = chunk.metadata.copy() if chunk.metadata else {}
- enhanced_metadata.update(chunk_attributes)
-
- # Ensure document_id consistency
- if chunk.chunk_metadata:
- chunk.chunk_metadata.document_id = file_id
-
- # Create enhanced chunk
- enhanced_chunk = Chunk(
- content=chunk.content,
- chunk_id=chunk.chunk_id,
- metadata=enhanced_metadata,
- chunk_metadata=chunk.chunk_metadata,
- )
- chunks.append(enhanced_chunk)
+ if not self.file_processor_api:
+ raise RuntimeError(
+ "FileProcessor API is required for file processing but is not configured. "
+ "Please ensure a file_processors provider is registered in your stack configuration."
+ )
- logger.debug("FileProcessor generated chunks for file", chunks_count=len(chunks), file_id=file_id)
+ logger.debug("Using FileProcessor API to process file", file_id=file_id)
+ pf_resp = await self.file_processor_api.process_file(
+ ProcessFileRequest(file_id=file_id, chunking_strategy=chunking_strategy)
+ )
- except Exception as e:
- logger.warning(
- "FileProcessor failed for file, falling back to legacy chunking", file_id=file_id, error=str(e)
- )
- # Fall back to legacy chunking path
- chunks = await self._legacy_chunk_file(
- file_id, file_response, max_chunk_size_tokens, chunk_overlap_tokens, chunk_attributes
- )
- else:
- logger.debug("FileProcessor API not available, using legacy chunking for file", file_id=file_id)
- # Legacy chunking path when FileProcessor not available
- chunks = await self._legacy_chunk_file(
- file_id, file_response, max_chunk_size_tokens, chunk_overlap_tokens, chunk_attributes
+ chunks = []
+ for chunk in pf_resp.chunks:
+ # Enhance chunk metadata with file info and attributes
+ enhanced_metadata = chunk.metadata.copy() if chunk.metadata else {}
+ enhanced_metadata.update(chunk_attributes)
+
+ # Ensure document_id consistency
+ if chunk.chunk_metadata:
+ chunk.chunk_metadata.document_id = file_id
+
+ # Create enhanced chunk
+ enhanced_chunk = Chunk(
+ content=chunk.content,
+ chunk_id=chunk.chunk_id,
+ metadata=enhanced_metadata,
+ chunk_metadata=chunk.chunk_metadata,
)
+ chunks.append(enhanced_chunk)
+
+ logger.debug("FileProcessor generated chunks for file", chunk_count=len(chunks), file_id=file_id)
if isinstance(chunking_strategy, VectorStoreChunkingStrategyContextual):
mime_type, _ = mimetypes.guess_type(file_response.filename)
@@ -1089,35 +1067,6 @@ async def openai_attach_file_to_vector_store(
return vector_store_file_object
- async def _legacy_chunk_file(
- self,
- file_id: str,
- file_response: OpenAIFileObject,
- max_chunk_size_tokens: int,
- chunk_overlap_tokens: int,
- chunk_attributes: dict[str, Any],
- ) -> list[Chunk]:
- """Legacy file chunking method using content extraction and make_overlapped_chunks."""
-
- mime_type, _ = mimetypes.guess_type(file_response.filename)
- if not self.files_api:
- raise ValueError("Files API not available")
- content_response = await self.files_api.openai_retrieve_file_content(
- RetrieveFileContentRequest(file_id=file_id)
- )
-
- content = content_from_data_and_mime_type(content_response.body, mime_type)
-
- chunks = make_overlapped_chunks(
- file_id, # Use file_id as document_id for stability
- content,
- max_chunk_size_tokens,
- chunk_overlap_tokens,
- chunk_attributes,
- )
-
- return chunks
-
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index e1bc93f564..5e2c81f0f0 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -286,7 +286,6 @@ class VectorStoreWithIndex:
vector_store: VectorStore
index: EmbeddingIndex
inference_api: Inference
- file_processor_api: Any = None
vector_stores_config: VectorStoresConfig | None = None
async def insert_chunks(
diff --git a/src/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py
index 0499d53039..e528536f0a 100644
--- a/src/llama_stack/testing/api_recorder.py
+++ b/src/llama_stack/testing/api_recorder.py
@@ -69,6 +69,9 @@ class APIRecordingMode(StrEnum):
_FLOAT_IN_STRING_PATTERN = re.compile(r"(-?\d+\.\d{4,})")
+_FILE_SEARCH_SCORE_PATTERN = re.compile(r"score:\s*[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?")
+_FILE_SEARCH_ATTRIBUTES_PATTERN = re.compile(r",?\s*attributes:\s*\{[^}]*\}")
+
def _normalize_numeric_literal_strings(value: str) -> str:
"""Round any long decimal literals embedded in strings for stable hashing."""
@@ -80,6 +83,17 @@ def _replace(match: re.Match[str]) -> str:
return _FLOAT_IN_STRING_PATTERN.sub(_replace, value)
+def _normalize_file_search_metadata(value: str) -> str:
+ """Replace non-deterministic file_search fields with placeholders for stable hashing.
+
+ Vector search scores and attribute dicts vary between runs even for identical
+ documents, which causes request hash mismatches during replay.
+ """
+ value = _FILE_SEARCH_SCORE_PATTERN.sub("score: __NORMALIZED__", value)
+ value = _FILE_SEARCH_ATTRIBUTES_PATTERN.sub("", value)
+ return value
+
+
def _normalize_body_for_hash(value: Any, exclude_stream_options: bool = False, *, _is_root: bool = True) -> Any:
"""Recursively normalize a JSON-like value to improve hash stability."""
@@ -104,6 +118,7 @@ def _normalize_body_for_hash(value: Any, exclude_stream_options: bool = False, *
if isinstance(value, float):
return round(value, 5)
if isinstance(value, str):
+ value = _normalize_file_search_metadata(value)
return _normalize_numeric_literal_strings(value)
return value
@@ -204,25 +219,6 @@ def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str,
return hashlib.sha256(normalized_json.encode()).hexdigest()
-def normalize_file_processor_request(request: Any) -> str:
- """Create a normalized hash of a file processor request for consistent matching."""
- test_id = get_test_context()
- normalized: dict[str, Any] = {
- "test_id": test_id,
- "api": "file_processors",
- "file_id": getattr(request, "file_id", None),
- }
- chunking = getattr(request, "chunking_strategy", None)
- if chunking and hasattr(chunking, "model_dump"):
- normalized["chunking_strategy"] = chunking.model_dump(mode="json")
- options = getattr(request, "options", None)
- if options:
- normalized["options"] = options
-
- normalized_json = json.dumps(normalized, sort_keys=True)
- return hashlib.sha256(normalized_json.encode()).hexdigest()
-
-
def normalize_http_request(url: str, method: str, payload: dict[str, Any]) -> str:
"""Create a normalized hash of an HTTP request for consistent matching.
@@ -735,47 +731,14 @@ async def _patched_tool_invoke_method(
async def _patched_file_processor_method(original_method, provider_name: str, self, request, file=None):
- """Patched version of file processor process_file method for recording/replay.
+ """Patched version of file processor process_file method.
- Only intercepts calls that reference a file_id (internal calls from the
- OpenAIVectorStoreMixin). Direct HTTP uploads to the file-processors
- endpoint have file_id=None and are passed through unmodified.
+ File processors are local, deterministic operations (no network calls)
+ so they always execute the real method. Recording/replaying them is
+ unreliable because file_id values are randomly generated per test run,
+ making hash-based lookup fail on replay.
"""
- global _current_mode, _current_storage
-
- file_id = getattr(request, "file_id", None)
- if _current_mode == APIRecordingMode.LIVE or _current_storage is None or not file_id:
- return await original_method(self, request, file)
-
- request_hash = normalize_file_processor_request(request)
-
- if _current_mode in (APIRecordingMode.REPLAY, APIRecordingMode.RECORD_IF_MISSING):
- recording = _current_storage.find_recording(request_hash)
- if recording:
- return recording["response"]["body"]
- elif _current_mode == APIRecordingMode.REPLAY:
- raise RuntimeError(
- f"Recording not found for {provider_name}.process_file | file_id: {getattr(request, 'file_id', None)}\n"
- f"\n"
- f"Run './scripts/integration-tests.sh --inference-mode record-if-missing' with required API keys to generate."
- )
-
- if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
- result = await original_method(self, request, file)
-
- request_data = {
- "test_id": get_test_context(),
- "provider": provider_name,
- "api": "file_processors",
- "file_id": getattr(request, "file_id", None),
- }
- response_data = {"body": result, "is_streaming": False}
-
- _current_storage.store_recording(request_hash, request_data, response_data)
- return result
-
- else:
- raise AssertionError(f"Invalid mode: {_current_mode}")
+ return await original_method(self, request, file)
def _patched_aiohttp_post(original_post, session_self, url: str, **kwargs):
diff --git a/tests/integration/file_processors/test_pypdf_processor.py b/tests/integration/file_processors/test_pypdf_processor.py
index 0aacd499df..0f6b2937c3 100644
--- a/tests/integration/file_processors/test_pypdf_processor.py
+++ b/tests/integration/file_processors/test_pypdf_processor.py
@@ -7,12 +7,14 @@
import io
import uuid
from pathlib import Path
+from unittest.mock import AsyncMock
import pytest
from fastapi import UploadFile
from llama_stack.providers.inline.file_processor.pypdf import PyPDFFileProcessorConfig
from llama_stack.providers.inline.file_processor.pypdf.pypdf import PyPDFFileProcessor
+from llama_stack_api.common.errors import OpenAIFileObjectNotFoundError
from llama_stack_api.vector_io import (
VectorStoreChunkingStrategyAuto,
VectorStoreChunkingStrategyStatic,
@@ -121,7 +123,7 @@ def config(self) -> PyPDFFileProcessorConfig:
@pytest.fixture
def processor(self, config: PyPDFFileProcessorConfig) -> PyPDFFileProcessor:
"""PyPDF processor instance for testing."""
- return PyPDFFileProcessor(config, files_api=None)
+ return PyPDFFileProcessor(config, files_api=AsyncMock())
@pytest.fixture
def test_pdf_path(self) -> Path:
@@ -256,7 +258,7 @@ async def test_metadata_extraction(self, processor: PyPDFFileProcessor, upload_f
async def test_text_cleaning(self):
"""Test text cleaning functionality."""
config = PyPDFFileProcessorConfig(clean_text=True)
- processor = PyPDFFileProcessor(config, files_api=None)
+ processor = PyPDFFileProcessor(config, files_api=AsyncMock())
# Test the text cleaning method directly
raw_text = " This has multiple spaces\n\n\nand extra\n\n newlines "
@@ -269,7 +271,7 @@ async def test_no_text_cleaning(self, upload_file: UploadFile):
"""Test processing without text cleaning."""
upload_file.file.seek(0) # Rewind stream before use
config = PyPDFFileProcessorConfig(clean_text=False)
- processor = PyPDFFileProcessor(config, files_api=None)
+ processor = PyPDFFileProcessor(config, files_api=AsyncMock())
response = await processor.process_file(file=upload_file, chunking_strategy=None)
@@ -282,7 +284,7 @@ async def test_no_metadata_extraction(self, upload_file: UploadFile):
"""Test processing without metadata extraction."""
upload_file.file.seek(0) # Rewind stream before use
config = PyPDFFileProcessorConfig(extract_metadata=False)
- processor = PyPDFFileProcessor(config, files_api=None)
+ processor = PyPDFFileProcessor(config, files_api=AsyncMock())
response = await processor.process_file(file=upload_file, chunking_strategy=None)
@@ -307,10 +309,16 @@ async def test_input_validation(self, processor: PyPDFFileProcessor):
with pytest.raises(ValueError, match="Cannot provide both file and file_id"):
await processor.process_file(file=upload_file, file_id="test_id")
- async def test_file_id_without_files_api(self, processor: PyPDFFileProcessor):
- """Test processing file_id without files API."""
- with pytest.raises(ValueError, match="Files API not available"):
- await processor.process_file(file_id="test_file_id")
+ async def test_nonexistent_file_id_raises_error(self):
+ """Test that a non-existent file_id raises a clear error."""
+ mock_files_api = AsyncMock()
+ mock_files_api.openai_retrieve_file.side_effect = OpenAIFileObjectNotFoundError("nonexistent_id")
+
+ config = PyPDFFileProcessorConfig()
+ processor = PyPDFFileProcessor(config, files_api=mock_files_api)
+
+ with pytest.raises(OpenAIFileObjectNotFoundError, match="not found"):
+ await processor.process_file(file_id="nonexistent_id")
async def test_minimal_pdf_processing(self, processor: PyPDFFileProcessor):
"""Test processing a minimal PDF with no extractable text."""
diff --git a/tests/integration/vector_io/recordings/68fc1c10759e8ecc7d230c8db94dba29261b4539d9e845839fea7507cc8ed143.json b/tests/integration/vector_io/recordings/68fc1c10759e8ecc7d230c8db94dba29261b4539d9e845839fea7507cc8ed143.json
new file mode 100644
index 0000000000..b4df8eed05
--- /dev/null
+++ b/tests/integration/vector_io/recordings/68fc1c10759e8ecc7d230c8db94dba29261b4539d9e845839fea7507cc8ed143.json
@@ -0,0 +1,67 @@
+{
+ "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_contextual_chunking[client_with_models-vector_io=sqlite-vec-txt=ollama/llama3.2:3b-instruct-fp16:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+ "request": {
+ "method": "POST",
+ "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "llama3.2:3b-instruct-fp16",
+ "messages": [
+ {
+ "role": "system",
+ "content": "\n# Technical Overview of Machine Learning Systems\n\n## Introduction to Neural Networks\n\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n\n## Gradient Descent Optimization\n\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n\n## Data Preprocessing\n\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\n\nHere is the chunk we want to situate within the whole document\n"
+ },
+ {
+ "role": "user",
+ "content": "# Technical Overview of Machine Learning Systems\n## Introduction to Neural Networks\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n## Gradient Descent Optimization\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n## Data Preprocessing\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\nPlease give a short succinct description to situate this chunk of text within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct description and nothing else."
+ }
+ ],
+ "max_tokens": 256,
+ "stream": false,
+ "temperature": 0.0
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "llama3.2:3b-instruct-fp16",
+ "provider_metadata": {
+ "openai_sdk_version": "2.5.0"
+ }
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "rec-68fc1c10759e",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": "\"Key components of neural network architecture, including structure, optimization, data preparation, and training processes.\"",
+ "refusal": null,
+ "role": "assistant",
+ "annotations": null,
+ "audio": null,
+ "function_call": null,
+ "tool_calls": null
+ }
+ }
+ ],
+ "created": 0,
+ "model": "llama3.2:3b-instruct-fp16",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": "fp_ollama",
+ "usage": {
+ "completion_tokens": 21,
+ "prompt_tokens": 325,
+ "total_tokens": 346,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": null
+ }
+ }
+ },
+ "is_streaming": false
+ },
+ "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/6b40ec65cd837781105c826d302b4c1cb4f8ad40911d91dfd368e4b8e3bd2e29.json b/tests/integration/vector_io/recordings/6b40ec65cd837781105c826d302b4c1cb4f8ad40911d91dfd368e4b8e3bd2e29.json
new file mode 100644
index 0000000000..b8d497cd40
--- /dev/null
+++ b/tests/integration/vector_io/recordings/6b40ec65cd837781105c826d302b4c1cb4f8ad40911d91dfd368e4b8e3bd2e29.json
@@ -0,0 +1,67 @@
+{
+ "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_contextual_vs_static_chunks[client_with_models-vector_io=sqlite-vec-txt=ollama/llama3.2:3b-instruct-fp16:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+ "request": {
+ "method": "POST",
+ "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "llama3.2:3b-instruct-fp16",
+ "messages": [
+ {
+ "role": "system",
+ "content": "\n# Technical Overview of Machine Learning Systems\n\n## Introduction to Neural Networks\n\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n\n## Gradient Descent Optimization\n\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n\n## Data Preprocessing\n\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\n\nHere is the chunk we want to situate within the whole document\n"
+ },
+ {
+ "role": "user",
+ "content": "# Technical Overview of Machine Learning Systems\n## Introduction to Neural Networks\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n## Gradient Descent Optimization\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n## Data Preprocessing\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\nPlease give a short succinct description to situate this chunk of text within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct description and nothing else."
+ }
+ ],
+ "max_tokens": 256,
+ "stream": false,
+ "temperature": 0.0
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "llama3.2:3b-instruct-fp16",
+ "provider_metadata": {
+ "openai_sdk_version": "2.5.0"
+ }
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "rec-6b40ec65cd83",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": "\"Key components of neural network architecture, including structure, optimization, data preparation, and training processes.\"",
+ "refusal": null,
+ "role": "assistant",
+ "annotations": null,
+ "audio": null,
+ "function_call": null,
+ "tool_calls": null
+ }
+ }
+ ],
+ "created": 0,
+ "model": "llama3.2:3b-instruct-fp16",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": "fp_ollama",
+ "usage": {
+ "completion_tokens": 21,
+ "prompt_tokens": 325,
+ "total_tokens": 346,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": null
+ }
+ }
+ },
+ "is_streaming": false
+ },
+ "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/d864e69a414033d9e2c2080b78f409041d750c78e07d7c3c2c0e1794c17ae547.json b/tests/integration/vector_io/recordings/d864e69a414033d9e2c2080b78f409041d750c78e07d7c3c2c0e1794c17ae547.json
new file mode 100644
index 0000000000..6897b5aa3f
--- /dev/null
+++ b/tests/integration/vector_io/recordings/d864e69a414033d9e2c2080b78f409041d750c78e07d7c3c2c0e1794c17ae547.json
@@ -0,0 +1,67 @@
+{
+ "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_contextual_chunking[client_with_models-vector_io=faiss-txt=ollama/llama3.2:3b-instruct-fp16:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+ "request": {
+ "method": "POST",
+ "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "llama3.2:3b-instruct-fp16",
+ "messages": [
+ {
+ "role": "system",
+ "content": "\n# Technical Overview of Machine Learning Systems\n\n## Introduction to Neural Networks\n\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n\n## Gradient Descent Optimization\n\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n\n## Data Preprocessing\n\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\n\nHere is the chunk we want to situate within the whole document\n"
+ },
+ {
+ "role": "user",
+ "content": "# Technical Overview of Machine Learning Systems\n## Introduction to Neural Networks\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n## Gradient Descent Optimization\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n## Data Preprocessing\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\nPlease give a short succinct description to situate this chunk of text within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct description and nothing else."
+ }
+ ],
+ "max_tokens": 256,
+ "stream": false,
+ "temperature": 0.0
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "llama3.2:3b-instruct-fp16",
+ "provider_metadata": {
+ "openai_sdk_version": "2.5.0"
+ }
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "rec-d864e69a4140",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": "\"Key components of neural network architecture, including structure, optimization, data preparation, and training processes.\"",
+ "refusal": null,
+ "role": "assistant",
+ "annotations": null,
+ "audio": null,
+ "function_call": null,
+ "tool_calls": null
+ }
+ }
+ ],
+ "created": 0,
+ "model": "llama3.2:3b-instruct-fp16",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": "fp_ollama",
+ "usage": {
+ "completion_tokens": 21,
+ "prompt_tokens": 325,
+ "total_tokens": 346,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": null
+ }
+ }
+ },
+ "is_streaming": false
+ },
+ "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/fd68b6cb2dfae2b48ffd4c8be06e3e0215da6bb3f33315f12b6ccc2665f19ca7.json b/tests/integration/vector_io/recordings/fd68b6cb2dfae2b48ffd4c8be06e3e0215da6bb3f33315f12b6ccc2665f19ca7.json
new file mode 100644
index 0000000000..bf176b8ca1
--- /dev/null
+++ b/tests/integration/vector_io/recordings/fd68b6cb2dfae2b48ffd4c8be06e3e0215da6bb3f33315f12b6ccc2665f19ca7.json
@@ -0,0 +1,67 @@
+{
+ "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_contextual_vs_static_chunks[client_with_models-vector_io=faiss-txt=ollama/llama3.2:3b-instruct-fp16:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+ "request": {
+ "method": "POST",
+ "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "llama3.2:3b-instruct-fp16",
+ "messages": [
+ {
+ "role": "system",
+ "content": "\n# Technical Overview of Machine Learning Systems\n\n## Introduction to Neural Networks\n\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n\n## Gradient Descent Optimization\n\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n\n## Data Preprocessing\n\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\n\nHere is the chunk we want to situate within the whole document\n"
+ },
+ {
+ "role": "user",
+ "content": "# Technical Overview of Machine Learning Systems\n## Introduction to Neural Networks\nNeural networks are computational models inspired by biological neural networks.\nThey consist of interconnected nodes called neurons organized in layers.\nEach connection has a weight that is adjusted during training.\n## Gradient Descent Optimization\nThe backpropagation algorithm computes gradients for each layer.\nThese gradients are used to update weights using gradient descent.\nThe learning rate controls the step size during optimization.\n## Data Preprocessing\nRaw data must be normalized before training.\nFeature scaling ensures all inputs have similar ranges.\nData augmentation can increase the effective training set size.\n\nPlease give a short succinct description to situate this chunk of text within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct description and nothing else."
+ }
+ ],
+ "max_tokens": 256,
+ "stream": false,
+ "temperature": 0.0
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "llama3.2:3b-instruct-fp16",
+ "provider_metadata": {
+ "openai_sdk_version": "2.5.0"
+ }
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "rec-fd68b6cb2dfa",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": "\"Key components of neural network architecture, including structure, optimization, data preparation, and training processes.\"",
+ "refusal": null,
+ "role": "assistant",
+ "annotations": null,
+ "audio": null,
+ "function_call": null,
+ "tool_calls": null
+ }
+ }
+ ],
+ "created": 0,
+ "model": "llama3.2:3b-instruct-fp16",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": "fp_ollama",
+ "usage": {
+ "completion_tokens": 21,
+ "prompt_tokens": 325,
+ "total_tokens": 346,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": null
+ }
+ }
+ },
+ "is_streaming": false
+ },
+ "id_normalization_mapping": {}
+}
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index 2165e82654..1b300522e6 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -172,6 +172,113 @@ def test_request_normalization_edge_cases(self):
)
assert hash7 == hash8
+ def test_file_search_score_normalization(self):
+ """Test that file_search scores are normalized for stable hashing.
+
+ Vector search returns non-deterministic scores that vary between runs.
+ The score values must be replaced entirely (not just rounded) so that
+ replay hashes match regardless of the exact score returned.
+ """
+ url = "http://test/v1/chat/completions"
+
+ body_a = {
+ "messages": [
+ {
+ "role": "tool",
+ "content": "document_id: file-123, score: 0.8523456789",
+ }
+ ]
+ }
+ body_b = {
+ "messages": [
+ {
+ "role": "tool",
+ "content": "document_id: file-123, score: 0.4217891234",
+ }
+ ]
+ }
+ hash_a = normalize_inference_request("POST", url, {}, body_a)
+ hash_b = normalize_inference_request("POST", url, {}, body_b)
+ assert hash_a == hash_b
+
+ def test_file_search_score_normalization_short_decimals(self):
+ """Scores with few decimal places also get normalized."""
+ url = "http://test/v1/chat/completions"
+
+ body_a = {"messages": [{"role": "tool", "content": "document_id: file-1, score: 0.85"}]}
+ body_b = {"messages": [{"role": "tool", "content": "document_id: file-1, score: 0.42"}]}
+ hash_a = normalize_inference_request("POST", url, {}, body_a)
+ hash_b = normalize_inference_request("POST", url, {}, body_b)
+ assert hash_a == hash_b
+
+ def test_file_search_attributes_normalization(self):
+ """Test that file_search attribute dicts are stripped for stable hashing."""
+ url = "http://test/v1/chat/completions"
+
+ body_a = {
+ "messages": [
+ {
+ "role": "tool",
+ "content": "document_id: file-123, score: 0.85, attributes: {'document_id': 'file-123', 'source': 'a.txt'}",
+ }
+ ]
+ }
+ body_b = {
+ "messages": [
+ {
+ "role": "tool",
+ "content": "document_id: file-123, score: 0.85, attributes: {'document_id': 'file-456', 'source': 'b.txt'}",
+ }
+ ]
+ }
+ hash_a = normalize_inference_request("POST", url, {}, body_a)
+ hash_b = normalize_inference_request("POST", url, {}, body_b)
+ assert hash_a == hash_b
+
+ def test_file_search_full_metadata_normalization(self):
+ """End-to-end test with realistic file_search metadata in chat messages."""
+ url = "http://test/v1/chat/completions"
+
+ body_a = {
+ "messages": [
+ {"role": "user", "content": "What is in the document?"},
+ {
+ "role": "tool",
+ "content": (
+ "document_id: file-100, score: 0.9321"
+ ", attributes: {'document_id': 'file-100', 'region': 'us'}"
+ "\nThe document discusses quarterly earnings."
+ ),
+ },
+ ]
+ }
+ body_b = {
+ "messages": [
+ {"role": "user", "content": "What is in the document?"},
+ {
+ "role": "tool",
+ "content": (
+ "document_id: file-100, score: 0.6178"
+ ", attributes: {'document_id': 'file-100', 'region': 'eu'}"
+ "\nThe document discusses quarterly earnings."
+ ),
+ },
+ ]
+ }
+ hash_a = normalize_inference_request("POST", url, {}, body_a)
+ hash_b = normalize_inference_request("POST", url, {}, body_b)
+ assert hash_a == hash_b
+
+ def test_non_file_search_content_still_differs(self):
+ """Ensure normalization does not collapse genuinely different requests."""
+ url = "http://test/v1/chat/completions"
+
+ body_a = {"messages": [{"role": "user", "content": "What is machine learning?"}]}
+ body_b = {"messages": [{"role": "user", "content": "What is deep learning?"}]}
+ hash_a = normalize_inference_request("POST", url, {}, body_a)
+ hash_b = normalize_inference_request("POST", url, {}, body_b)
+ assert hash_a != hash_b
+
def test_response_storage(self, temp_storage_dir):
"""Test the ResponseStorage class."""
temp_storage_dir = temp_storage_dir / "test_response_storage"
diff --git a/tests/unit/providers/utils/memory/test_openai_vector_store_mixin.py b/tests/unit/providers/utils/memory/test_openai_vector_store_mixin.py
new file mode 100644
index 0000000000..e272cc8b49
--- /dev/null
+++ b/tests/unit/providers/utils/memory/test_openai_vector_store_mixin.py
@@ -0,0 +1,129 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack_api import (
+ VectorStoreChunkingStrategyAuto,
+)
+from llama_stack_api.vector_io.models import OpenAIAttachFileRequest
+
+
+def _make_store_info():
+ """Build a minimal in-memory vector store dict matching the mixin's expectations."""
+ return {
+ "file_ids": [],
+ "file_counts": {"total": 0, "completed": 0, "cancelled": 0, "failed": 0, "in_progress": 0},
+ "metadata": {},
+ }
+
+
+class MockVectorStoreMixin(OpenAIVectorStoreMixin):
+ """Mock implementation of OpenAIVectorStoreMixin for testing."""
+
+ def __init__(self, inference_api, files_api, kvstore=None, file_processor_api=None):
+ super().__init__(
+ inference_api=inference_api,
+ files_api=files_api,
+ kvstore=kvstore,
+ file_processor_api=file_processor_api,
+ )
+
+ async def register_vector_store(self, vector_store):
+ pass
+
+ async def unregister_vector_store(self, vector_store_id):
+ pass
+
+ async def insert_chunks(self, request):
+ pass
+
+ async def query_chunks(self, request):
+ pass
+
+ async def delete_chunks(self, request):
+ pass
+
+
+class TestOpenAIVectorStoreMixin:
+ """Unit tests for OpenAIVectorStoreMixin."""
+
+ @pytest.fixture
+ def mock_files_api(self):
+ mock = AsyncMock()
+ mock.openai_retrieve_file = AsyncMock()
+ mock.openai_retrieve_file.return_value = MagicMock(filename="test.pdf")
+ return mock
+
+ @pytest.fixture
+ def mock_inference_api(self):
+ return AsyncMock()
+
+ @pytest.fixture
+ def mock_kvstore(self):
+ kv = AsyncMock()
+ kv.set = AsyncMock()
+ kv.get = AsyncMock(return_value=None)
+ return kv
+
+ async def test_missing_file_processor_api_returns_failed_status(
+ self, mock_inference_api, mock_files_api, mock_kvstore
+ ):
+ """Test that missing file_processor_api marks the file as failed with a clear error."""
+ mixin = MockVectorStoreMixin(
+ inference_api=mock_inference_api,
+ files_api=mock_files_api,
+ kvstore=mock_kvstore,
+ file_processor_api=None,
+ )
+
+ vector_store_id = "test_vector_store"
+ file_id = "test_file_id"
+ mixin.openai_vector_stores[vector_store_id] = _make_store_info()
+
+ result = await mixin.openai_attach_file_to_vector_store(
+ vector_store_id=vector_store_id,
+ request=OpenAIAttachFileRequest(
+ file_id=file_id,
+ chunking_strategy=VectorStoreChunkingStrategyAuto(),
+ ),
+ )
+
+ assert result.status == "failed"
+ assert result.last_error is not None
+ assert "FileProcessor API is required" in result.last_error.message
+
+ async def test_file_processor_api_configured_succeeds(self, mock_inference_api, mock_files_api, mock_kvstore):
+ """Test that with file_processor_api configured, processing proceeds past the check."""
+ mock_file_processor_api = AsyncMock()
+ mock_file_processor_api.process_file = AsyncMock()
+ mock_file_processor_api.process_file.return_value = MagicMock(chunks=[], metadata={"processor": "pypdf"})
+
+ mixin = MockVectorStoreMixin(
+ inference_api=mock_inference_api,
+ files_api=mock_files_api,
+ kvstore=mock_kvstore,
+ file_processor_api=mock_file_processor_api,
+ )
+
+ vector_store_id = "test_vector_store"
+ file_id = "test_file_id"
+ mixin.openai_vector_stores[vector_store_id] = _make_store_info()
+
+ result = await mixin.openai_attach_file_to_vector_store(
+ vector_store_id=vector_store_id,
+ request=OpenAIAttachFileRequest(
+ file_id=file_id,
+ chunking_strategy=VectorStoreChunkingStrategyAuto(),
+ ),
+ )
+
+ # Should not fail with the file_processor_api error
+ if result.last_error:
+ assert "FileProcessor API is required" not in result.last_error.message