RedHatTraining · rruizher · Jan 14, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 21, 2026
diff --git a/serving/deploy/utils.py b/serving/deploy/utils.py
@@ -0,0 +1,116 @@
+import sys
+import requests
+from urllib3.exceptions import InsecureRequestWarning
+from transformers import DistilBertTokenizer
+
+requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
+
+tokenizer = DistilBertTokenizer.from_pretrained(
+    "distilbert-base-uncased-finetuned-sst-2-english"
+)
+
+GREEN = "\033[92m"
+RESET = "\033[0m"
+BOLD = "\033[1m"
+
+
+def tokenize(text: str):
+    tokens = tokenizer(
+        text, truncation=True, padding="max_length", max_length=128, return_tensors="pt"
+    )
+
+    # Extract input IDs and attention mask
+    input_ids = tokens["input_ids"].tolist()[0]
+    attention_mask = tokens["attention_mask"].tolist()[0]
+    return {"input_ids": input_ids, "attention_mask": attention_mask}
+
+
+def old_prepare_distilbert_request(tokens):
+    """
+    DEPRECATED: TensorFlow Serving v1 API format (legacy)
+    This format is still supported by OpenVINO Model Server for backward compatibility,
+    but KServe V2 API is recommended for new deployments.
+
+    Endpoint: /v1/models/<model>:predict
+    """
+    return {
+        "instances": [
+            {
+                "input_ids": tokens["input_ids"],
+                "attention_mask": tokens["attention_mask"],
+            }
+        ]
+    }
+
+
+def prepare_distilbert_request(tokens):
+    """
+    KServe V2 API format (recommended)
+    This is the current standard API for model inference in RHOAI 2.25.
+
+    Endpoint: /v2/models/<model>/infer
+    """
+    return {
+        "inputs": [
+            {
+                "name": "input_ids",
+                "shape": [1, 128],
+                "datatype": "INT64",
+                "data": tokens["input_ids"]
+            },
+            {
+                "name": "attention_mask",
+                "shape": [1, 128],
+                "datatype": "INT64",
+                "data": tokens["attention_mask"]
+            }
+        ]
+    }
+
+
+def prepare_diabetes_request():
+    return {
+        "inputs": [
+            {"name": "dense_input", "shape": [1, 8], "datatype": "FP32", "data": [6.0, 110.0, 65.0, 15.0, 1.0, 45.7, 0.627, 50.0]}
+        ]
+    }
+
+
+def send_inference_request(url, body, token=None):
+    headers = {"Content-Type": "application/json"}
+    if token is not None:
+        headers["Authorization"] = f"Bearer {token}"
+    return requests.post(url, json=body, headers=headers, verify=False)
+
+
+def print_curl_request(url, query):
+    print(
+        f'\n{BOLD}{GREEN}Inference request for the {url} url, using "{query}" as input.{RESET}\n'
+    )
+    # Tokenize the input text
+    tokens = tokenize(query)
+
+    # Define request in KServe V2 format and print
+    body = f"""'{{"inputs": [
+        {{
+            "name": "input_ids",
+            "shape": [1, 128],
+            "datatype": "INT64",
+            "data": [{", ".join([str(i) for i in tokens["input_ids"]])}]
+        }},
+        {{
+            "name": "attention_mask",
+            "shape": [1, 128],
+            "datatype": "INT64",
+            "data": [{", ".join([str(i) for i in tokens["attention_mask"]])}]
+        }}
+    ]}}'
+    """
+    request = f'curl -X POST -k {url} \\ \n -H "Content-Type: application/json" \\ \n -d {body}'
+    print(request)
+
+
+if __name__ == "__main__":
+    query = sys.argv[2]
+    url = sys.argv[1]
+    print_curl_request(url, query)
diff --git a/serving/deploy/validate_model_servers.ipynb b/serving/deploy/validate_model_servers.ipynb
@@ -0,0 +1,85 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -qq install transformers==4.46.3\n",
+    "import utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Paste the authentication token from the RHOAI Models tab.\n",
+    "# In the raw deployment mode there is one different token for each model deployment\n",
+    "diabetes_auth_token = \"paste-token-here\"\n",
+    "distilbert_auth_token = \"paste-token-here\"\n",
+    "diabetes_url = \"https://diabetes-serving-deploy.apps.ocp4.example.com/v2/models/diabetes/infer\"\n",
+    "distilbert_url = \"https://distilbert-serving-deploy.apps.ocp4.example.com/v2/models/distilbert/infer\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "1. Validate that the diabetes model responds using the KServe V2 API."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nValidating diabetes model...\\n\")\n",
+    "diabetes_request = utils.prepare_diabetes_request()\n",
+    "print(f\"Diabetes request:\\n {diabetes_request}\")\n",
+    "response = utils.send_inference_request(diabetes_url, diabetes_request, diabetes_auth_token)\n",
+    "output = response.json()[\"outputs\"][0]\n",
+    "diabetes_probability = output[\"data\"][1]\n",
+    "print(f\"Probability of diabetes: {100 * diabetes_probability:.2f}%\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "2. Validate that the DistilBERT model performs sentiment analysis using the KServe V2 API."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "prompt = \"OpenShift AI is great!\"\n\nprint(f\"\\nPerforming sentiment analysis on '{prompt}' ...\\n\")\ntokens = utils.tokenize(prompt)\nprint(f\"Tokens:\\n {tokens}\")\n\n# Prepare request in KServe V2 API format\ndistilbert_request = utils.prepare_distilbert_request(tokens)\nprint(f\"\\nDistilBERT request (KServe V2 format):\\n {distilbert_request}\\n\")\n\nresponse = utils.send_inference_request(distilbert_url, distilbert_request, distilbert_auth_token)\n\n# Parse the response\noutput = response.json()[\"outputs\"][0]\nlogits = output[\"data\"]\n\n# DistilBERT outputs two scores: [negative_score, positive_score]\nnegative_score = logits[0]\npositive_score = logits[1]\n\n# Determine sentiment based on which score is higher\nif positive_score > negative_score:\n    sentiment = \"POSITIVE\"\n    confidence = positive_score\nelse:\n    sentiment = \"NEGATIVE\"\n    confidence = negative_score\n\nprint(f\"\\nSentiment: {sentiment}\")\nprint(f\"Confidence score: {confidence:.2f}\")\n"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.20"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}