Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions serving/deploy/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import sys
import requests
from urllib3.exceptions import InsecureRequestWarning
from transformers import DistilBertTokenizer

requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)

tokenizer = DistilBertTokenizer.from_pretrained(
"distilbert-base-uncased-finetuned-sst-2-english"
)

GREEN = "\033[92m"
RESET = "\033[0m"
BOLD = "\033[1m"


def tokenize(text: str):
tokens = tokenizer(
text, truncation=True, padding="max_length", max_length=128, return_tensors="pt"
)

# Extract input IDs and attention mask
input_ids = tokens["input_ids"].tolist()[0]
attention_mask = tokens["attention_mask"].tolist()[0]
return {"input_ids": input_ids, "attention_mask": attention_mask}


def old_prepare_distilbert_request(tokens):
"""
DEPRECATED: TensorFlow Serving v1 API format (legacy)
This format is still supported by OpenVINO Model Server for backward compatibility,
but KServe V2 API is recommended for new deployments.

Endpoint: /v1/models/<model>:predict
"""
return {
"instances": [
{
"input_ids": tokens["input_ids"],
"attention_mask": tokens["attention_mask"],
}
]
}


def prepare_distilbert_request(tokens):
"""
KServe V2 API format (recommended)
This is the current standard API for model inference in RHOAI 2.25.

Endpoint: /v2/models/<model>/infer
"""
return {
"inputs": [
{
"name": "input_ids",
"shape": [1, 128],
"datatype": "INT64",
"data": tokens["input_ids"]
},
{
"name": "attention_mask",
"shape": [1, 128],
"datatype": "INT64",
"data": tokens["attention_mask"]
}
]
}


def prepare_diabetes_request():
return {
"inputs": [
{"name": "dense_input", "shape": [1, 8], "datatype": "FP32", "data": [6.0, 110.0, 65.0, 15.0, 1.0, 45.7, 0.627, 50.0]}
]
}


def send_inference_request(url, body, token=None):
headers = {"Content-Type": "application/json"}
if token is not None:
headers["Authorization"] = f"Bearer {token}"
return requests.post(url, json=body, headers=headers, verify=False)


def print_curl_request(url, query):
print(
f'\n{BOLD}{GREEN}Inference request for the {url} url, using "{query}" as input.{RESET}\n'
)
# Tokenize the input text
tokens = tokenize(query)

# Define request in KServe V2 format and print
body = f"""'{{"inputs": [
{{
"name": "input_ids",
"shape": [1, 128],
"datatype": "INT64",
"data": [{", ".join([str(i) for i in tokens["input_ids"]])}]
}},
{{
"name": "attention_mask",
"shape": [1, 128],
"datatype": "INT64",
"data": [{", ".join([str(i) for i in tokens["attention_mask"]])}]
}}
]}}'
"""
request = f'curl -X POST -k {url} \\ \n -H "Content-Type: application/json" \\ \n -d {body}'
print(request)


if __name__ == "__main__":
query = sys.argv[2]
url = sys.argv[1]
print_curl_request(url, query)
85 changes: 85 additions & 0 deletions serving/deploy/validate_model_servers.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip -qq install transformers==4.46.3\n",
"import utils"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Paste the authentication token from the RHOAI Models tab.\n",
"# In the raw deployment mode there is one different token for each model deployment\n",
"diabetes_auth_token = \"paste-token-here\"\n",
"distilbert_auth_token = \"paste-token-here\"\n",
"diabetes_url = \"https://diabetes-serving-deploy.apps.ocp4.example.com/v2/models/diabetes/infer\"\n",
"distilbert_url = \"https://distilbert-serving-deploy.apps.ocp4.example.com/v2/models/distilbert/infer\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Validate that the diabetes model responds using the KServe V2 API."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"\\nValidating diabetes model...\\n\")\n",
"diabetes_request = utils.prepare_diabetes_request()\n",
"print(f\"Diabetes request:\\n {diabetes_request}\")\n",
"response = utils.send_inference_request(diabetes_url, diabetes_request, diabetes_auth_token)\n",
"output = response.json()[\"outputs\"][0]\n",
"diabetes_probability = output[\"data\"][1]\n",
"print(f\"Probability of diabetes: {100 * diabetes_probability:.2f}%\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Validate that the DistilBERT model performs sentiment analysis using the KServe V2 API."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "prompt = \"OpenShift AI is great!\"\n\nprint(f\"\\nPerforming sentiment analysis on '{prompt}' ...\\n\")\ntokens = utils.tokenize(prompt)\nprint(f\"Tokens:\\n {tokens}\")\n\n# Prepare request in KServe V2 API format\ndistilbert_request = utils.prepare_distilbert_request(tokens)\nprint(f\"\\nDistilBERT request (KServe V2 format):\\n {distilbert_request}\\n\")\n\nresponse = utils.send_inference_request(distilbert_url, distilbert_request, distilbert_auth_token)\n\n# Parse the response\noutput = response.json()[\"outputs\"][0]\nlogits = output[\"data\"]\n\n# DistilBERT outputs two scores: [negative_score, positive_score]\nnegative_score = logits[0]\npositive_score = logits[1]\n\n# Determine sentiment based on which score is higher\nif positive_score > negative_score:\n sentiment = \"POSITIVE\"\n confidence = positive_score\nelse:\n sentiment = \"NEGATIVE\"\n confidence = negative_score\n\nprint(f\"\\nSentiment: {sentiment}\")\nprint(f\"Confidence score: {confidence:.2f}\")\n"
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.20"
}
},
"nbformat": 4,
"nbformat_minor": 4
}