Skip to content

Commit 141c244

Browse files
authored
feat: support for Solido RAG endpoints (#396)
1 parent 59e73bd commit 141c244

File tree

7 files changed

+509
-8
lines changed

7 files changed

+509
-8
lines changed

docs/genai-perf-feature-comparison.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ This comparison matrix shows the supported CLI options between GenAI-Perf and AI
5151
| **template** | Template-based inference endpoints ||| |
5252
| **tensorrtllm_engine** | TensorRT-LLM engine direct access ||| |
5353
| **vision** | Computer vision model endpoints ||| |
54+
| **solido_rag** | SOLIDO RAG endpoint | 🟡 || |
5455

5556
---
5657

src/aiperf/common/enums/plugin_enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class EndpointType(CaseInsensitiveStrEnum):
2929
EMBEDDINGS = "embeddings"
3030
HF_TEI_RANKINGS = "hf_tei_rankings"
3131
NIM_RANKINGS = "nim_rankings"
32+
SOLIDO_RAG = "solido_rag"
3233

3334

3435
class TransportType(CaseInsensitiveStrEnum):

src/aiperf/common/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
ParsedResponseRecord,
7878
ProcessRecordsResult,
7979
ProfileResults,
80+
RAGSources,
8081
RankingsResponseData,
8182
RawRecordInfo,
8283
ReasoningResponseData,
@@ -158,6 +159,7 @@
158159
"ProcessTelemetryResult",
159160
"ProcessingStats",
160161
"ProfileResults",
162+
"RAGSources",
161163
"RankingsResponseData",
162164
"RawRecordInfo",
163165
"ReasoningResponseData",

src/aiperf/common/models/record_models.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pydantic import (
1212
BaseModel,
1313
Field,
14+
RootModel,
1415
SerializeAsAny,
1516
)
1617
from typing_extensions import Self
@@ -575,17 +576,17 @@ def get_text(self) -> str:
575576
return "".join([self.reasoning or "", self.content or ""])
576577

577578

579+
class RAGSources(RootModel[dict[str, Any] | list[Any]]):
580+
"""RAG sources can be either a dictionary or list format."""
581+
582+
578583
class EmbeddingResponseData(BaseResponseData):
579584
"""Parsed embedding response data."""
580585

581586
embeddings: list[list[float]] = Field(
582587
..., description="The embedding vectors from the response."
583588
)
584589

585-
def get_text(self) -> str:
586-
"""Get the text of the response (empty for embeddings)."""
587-
return ""
588-
589590

590591
class RankingsResponseData(BaseResponseData):
591592
"""Parsed rankings response data."""
@@ -594,10 +595,6 @@ class RankingsResponseData(BaseResponseData):
594595
..., description="The rankings results from the response."
595596
)
596597

597-
def get_text(self) -> str:
598-
"""Get the text of the response (empty for rankings)."""
599-
return ""
600-
601598

602599
class ParsedResponse(AIPerfBaseModel):
603600
"""Parsed response from a inference client."""
@@ -610,6 +607,11 @@ class ParsedResponse(AIPerfBaseModel):
610607
| RankingsResponseData
611608
| BaseResponseData
612609
] = Field(..., description="The parsed response data.")
610+
sources: RAGSources | None = Field(
611+
default=None,
612+
description="The sources used in the RAG query of the response. This can be a dictionary of source documents, "
613+
"a list of sources, or None. Only applicable to responses with RAG response data.",
614+
)
613615

614616

615617
class ParsedResponseRecord(AIPerfBaseModel):

src/aiperf/endpoints/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
from aiperf.endpoints.openai_embeddings import (
2626
EmbeddingsEndpoint,
2727
)
28+
from aiperf.endpoints.solido_rag import (
29+
SolidoEndpoint,
30+
)
2831

2932
__all__ = [
3033
"BaseEndpoint",
@@ -35,4 +38,5 @@
3538
"EmbeddingsEndpoint",
3639
"HFTeiRankingsEndpoint",
3740
"NIMRankingsEndpoint",
41+
"SolidoEndpoint",
3842
]

src/aiperf/endpoints/solido_rag.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from __future__ import annotations
5+
6+
from typing import Any
7+
8+
from aiperf.common.decorators import implements_protocol
9+
from aiperf.common.enums import EndpointType
10+
from aiperf.common.factories import EndpointFactory
11+
from aiperf.common.models import (
12+
ParsedResponse,
13+
)
14+
from aiperf.common.models.metadata import EndpointMetadata
15+
from aiperf.common.models.record_models import RAGSources, RequestInfo, TextResponseData
16+
from aiperf.common.protocols import EndpointProtocol, InferenceServerResponse
17+
from aiperf.common.types import JsonObject, RequestOutputT
18+
from aiperf.endpoints.base_endpoint import BaseEndpoint
19+
20+
21+
@implements_protocol(EndpointProtocol)
22+
@EndpointFactory.register(EndpointType.SOLIDO_RAG)
23+
class SolidoEndpoint(BaseEndpoint):
24+
"""SOLIDO RAG endpoint.
25+
26+
SOLIDO is a RAG (Retrieval-Augmented Generation) endpoint that processes
27+
queries with filters and inference model specifications. Supports streaming
28+
responses.
29+
"""
30+
31+
@classmethod
32+
def metadata(cls) -> EndpointMetadata:
33+
"""Return SOLIDO endpoint metadata."""
34+
return EndpointMetadata(
35+
endpoint_path="/rag/api/prompt",
36+
supports_streaming=True,
37+
produces_tokens=True,
38+
tokenizes_input=True,
39+
metrics_title="SOLIDO RAG Metrics",
40+
)
41+
42+
def format_payload(self, request_info: RequestInfo) -> RequestOutputT:
43+
"""Format SOLIDO RAG request payload from RequestInfo.
44+
45+
Args:
46+
request_info: Request context including model endpoint, metadata, and turns
47+
48+
Returns:
49+
SOLIDO API payload with query, filters, and inference_model fields
50+
"""
51+
if not request_info.turns:
52+
raise ValueError("SOLIDO endpoint requires at least one turn.")
53+
54+
turn = request_info.turns[-1]
55+
model_endpoint = request_info.model_endpoint
56+
57+
# Extract query text from turn
58+
query = [content for text in turn.texts for content in text.contents if content]
59+
60+
# Default filters for SOLIDO
61+
filters = {"family": "Solido", "tool": "SDE"}
62+
63+
# Use the model name from the turn or model endpoint
64+
inference_model = turn.model or model_endpoint.primary_model_name
65+
66+
payload: dict[str, Any] = {
67+
"query": query,
68+
"filters": filters,
69+
"inference_model": inference_model,
70+
}
71+
72+
if model_endpoint.endpoint.extra:
73+
payload.update(model_endpoint.endpoint.extra)
74+
75+
self.debug(lambda: f"Formatted SOLIDO payload: {payload}")
76+
return payload
77+
78+
def parse_response(
79+
self, response: InferenceServerResponse
80+
) -> ParsedResponse | None:
81+
"""Parse SOLIDO API response.
82+
83+
Args:
84+
response: Raw response from inference server
85+
86+
Returns:
87+
Parsed response with extracted content or None if parsing fails
88+
"""
89+
json_obj = response.get_json()
90+
if not json_obj:
91+
self.debug(lambda: f"No JSON in response: {response.get_raw()}")
92+
return None
93+
94+
data, sources = self._extract_solido_response_data(json_obj)
95+
return (
96+
ParsedResponse(perf_ns=response.perf_ns, data=data, sources=sources)
97+
if data
98+
else None
99+
)
100+
101+
def _extract_solido_response_data(
102+
self, json_obj: JsonObject
103+
) -> tuple[TextResponseData, RAGSources | None]:
104+
"""Extract content from SOLIDO JSON response.
105+
106+
Args:
107+
json_obj: Deserialized SOLIDO response
108+
109+
Returns:
110+
Extracted response data or None if no content
111+
"""
112+
# SOLIDO responses contain a "content" field with the generated text
113+
content = json_obj.get("content")
114+
if not content:
115+
self.debug(lambda: f"No content found in SOLIDO response: {json_obj}")
116+
return None, None
117+
118+
sources = json_obj.get("sources")
119+
if not sources:
120+
self.debug(lambda: f"No sources found in SOLIDO response: {json_obj}")
121+
122+
return self.make_text_response_data(content), sources

0 commit comments

Comments
 (0)