feat: support for Solido RAG endpoints (#396)

ajcasagrande · web-flow · commit 141c244eec77 · 2025-10-27T20:58:49.000-07:00
diff --git a/docs/genai-perf-feature-comparison.md b/docs/genai-perf-feature-comparison.md
@@ -51,6 +51,7 @@ This comparison matrix shows the supported CLI options between GenAI-Perf and AI
 | **template** | Template-based inference endpoints | ✅ | ❌ | |
 | **tensorrtllm_engine** | TensorRT-LLM engine direct access | ✅ | ❌ | |
 | **vision** | Computer vision model endpoints | ✅ | ❌ | |
+| **solido_rag** | SOLIDO RAG endpoint | 🟡 | ✅ | |
 
 ---
 
diff --git a/src/aiperf/common/enums/plugin_enums.py b/src/aiperf/common/enums/plugin_enums.py
@@ -29,6 +29,7 @@ class EndpointType(CaseInsensitiveStrEnum):
     EMBEDDINGS = "embeddings"
     HF_TEI_RANKINGS = "hf_tei_rankings"
     NIM_RANKINGS = "nim_rankings"
+    SOLIDO_RAG = "solido_rag"
 
 
 class TransportType(CaseInsensitiveStrEnum):
diff --git a/src/aiperf/common/models/__init__.py b/src/aiperf/common/models/__init__.py
@@ -77,6 +77,7 @@
     ParsedResponseRecord,
     ProcessRecordsResult,
     ProfileResults,
+    RAGSources,
     RankingsResponseData,
     RawRecordInfo,
     ReasoningResponseData,
@@ -158,6 +159,7 @@
     "ProcessTelemetryResult",
     "ProcessingStats",
     "ProfileResults",
+    "RAGSources",
     "RankingsResponseData",
     "RawRecordInfo",
     "ReasoningResponseData",
diff --git a/src/aiperf/common/models/record_models.py b/src/aiperf/common/models/record_models.py
@@ -11,6 +11,7 @@
 from pydantic import (
     BaseModel,
     Field,
+    RootModel,
     SerializeAsAny,
 )
 from typing_extensions import Self
@@ -575,17 +576,17 @@ def get_text(self) -> str:
         return "".join([self.reasoning or "", self.content or ""])
 
 
+class RAGSources(RootModel[dict[str, Any] | list[Any]]):
+    """RAG sources can be either a dictionary or list format."""
+
+
 class EmbeddingResponseData(BaseResponseData):
     """Parsed embedding response data."""
 
     embeddings: list[list[float]] = Field(
         ..., description="The embedding vectors from the response."
     )
 
-    def get_text(self) -> str:
-        """Get the text of the response (empty for embeddings)."""
-        return ""
-
 
 class RankingsResponseData(BaseResponseData):
     """Parsed rankings response data."""
@@ -594,10 +595,6 @@ class RankingsResponseData(BaseResponseData):
         ..., description="The rankings results from the response."
     )
 
-    def get_text(self) -> str:
-        """Get the text of the response (empty for rankings)."""
-        return ""
-
 
 class ParsedResponse(AIPerfBaseModel):
     """Parsed response from a inference client."""
@@ -610,6 +607,11 @@ class ParsedResponse(AIPerfBaseModel):
         | RankingsResponseData
         | BaseResponseData
     ] = Field(..., description="The parsed response data.")
+    sources: RAGSources | None = Field(
+        default=None,
+        description="The sources used in the RAG query of the response. This can be a dictionary of source documents, "
+        "a list of sources, or None. Only applicable to responses with RAG response data.",
+    )
 
 
 class ParsedResponseRecord(AIPerfBaseModel):
diff --git a/src/aiperf/endpoints/__init__.py b/src/aiperf/endpoints/__init__.py
@@ -25,6 +25,9 @@
 from aiperf.endpoints.openai_embeddings import (
     EmbeddingsEndpoint,
 )
+from aiperf.endpoints.solido_rag import (
+    SolidoEndpoint,
+)
 
 __all__ = [
     "BaseEndpoint",
@@ -35,4 +38,5 @@
     "EmbeddingsEndpoint",
     "HFTeiRankingsEndpoint",
     "NIMRankingsEndpoint",
+    "SolidoEndpoint",
 ]
diff --git a/src/aiperf/endpoints/solido_rag.py b/src/aiperf/endpoints/solido_rag.py
@@ -0,0 +1,122 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from typing import Any
+
+from aiperf.common.decorators import implements_protocol
+from aiperf.common.enums import EndpointType
+from aiperf.common.factories import EndpointFactory
+from aiperf.common.models import (
+    ParsedResponse,
+)
+from aiperf.common.models.metadata import EndpointMetadata
+from aiperf.common.models.record_models import RAGSources, RequestInfo, TextResponseData
+from aiperf.common.protocols import EndpointProtocol, InferenceServerResponse
+from aiperf.common.types import JsonObject, RequestOutputT
+from aiperf.endpoints.base_endpoint import BaseEndpoint
+
+
+@implements_protocol(EndpointProtocol)
+@EndpointFactory.register(EndpointType.SOLIDO_RAG)
+class SolidoEndpoint(BaseEndpoint):
+    """SOLIDO RAG endpoint.
+
+    SOLIDO is a RAG (Retrieval-Augmented Generation) endpoint that processes
+    queries with filters and inference model specifications. Supports streaming
+    responses.
+    """
+
+    @classmethod
+    def metadata(cls) -> EndpointMetadata:
+        """Return SOLIDO endpoint metadata."""
+        return EndpointMetadata(
+            endpoint_path="/rag/api/prompt",
+            supports_streaming=True,
+            produces_tokens=True,
+            tokenizes_input=True,
+            metrics_title="SOLIDO RAG Metrics",
+        )
+
+    def format_payload(self, request_info: RequestInfo) -> RequestOutputT:
+        """Format SOLIDO RAG request payload from RequestInfo.
+
+        Args:
+            request_info: Request context including model endpoint, metadata, and turns
+
+        Returns:
+            SOLIDO API payload with query, filters, and inference_model fields
+        """
+        if not request_info.turns:
+            raise ValueError("SOLIDO endpoint requires at least one turn.")
+
+        turn = request_info.turns[-1]
+        model_endpoint = request_info.model_endpoint
+
+        # Extract query text from turn
+        query = [content for text in turn.texts for content in text.contents if content]
+
+        # Default filters for SOLIDO
+        filters = {"family": "Solido", "tool": "SDE"}
+
+        # Use the model name from the turn or model endpoint
+        inference_model = turn.model or model_endpoint.primary_model_name
+
+        payload: dict[str, Any] = {
+            "query": query,
+            "filters": filters,
+            "inference_model": inference_model,
+        }
+
+        if model_endpoint.endpoint.extra:
+            payload.update(model_endpoint.endpoint.extra)
+
+        self.debug(lambda: f"Formatted SOLIDO payload: {payload}")
+        return payload
+
+    def parse_response(
+        self, response: InferenceServerResponse
+    ) -> ParsedResponse | None:
+        """Parse SOLIDO API response.
+
+        Args:
+            response: Raw response from inference server
+
+        Returns:
+            Parsed response with extracted content or None if parsing fails
+        """
+        json_obj = response.get_json()
+        if not json_obj:
+            self.debug(lambda: f"No JSON in response: {response.get_raw()}")
+            return None
+
+        data, sources = self._extract_solido_response_data(json_obj)
+        return (
+            ParsedResponse(perf_ns=response.perf_ns, data=data, sources=sources)
+            if data
+            else None
+        )
+
+    def _extract_solido_response_data(
+        self, json_obj: JsonObject
+    ) -> tuple[TextResponseData, RAGSources | None]:
+        """Extract content from SOLIDO JSON response.
+
+        Args:
+            json_obj: Deserialized SOLIDO response
+
+        Returns:
+            Extracted response data or None if no content
+        """
+        # SOLIDO responses contain a "content" field with the generated text
+        content = json_obj.get("content")
+        if not content:
+            self.debug(lambda: f"No content found in SOLIDO response: {json_obj}")
+            return None, None
+
+        sources = json_obj.get("sources")
+        if not sources:
+            self.debug(lambda: f"No sources found in SOLIDO response: {json_obj}")
+
+        return self.make_text_response_data(content), sources
diff --git a/tests/endpoints/test_solido_rag.py b/tests/endpoints/test_solido_rag.py

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,9 @@`
`25`	`25`	`from aiperf.endpoints.openai_embeddings import (`
`26`	`26`	`EmbeddingsEndpoint,`
`27`	`27`	`)`
	`28`	`+from aiperf.endpoints.solido_rag import (`
	`29`	`+ SolidoEndpoint,`
	`30`	`+)`
`28`	`31`
`29`	`32`	`__all__ = [`
`30`	`33`	`"BaseEndpoint",`
`@@ -35,4 +38,5 @@`
`35`	`38`	`"EmbeddingsEndpoint",`
`36`	`39`	`"HFTeiRankingsEndpoint",`
`37`	`40`	`"NIMRankingsEndpoint",`
	`41`	`+ "SolidoEndpoint",`
`38`	`42`	`]`