diff --git a/serving/deploy/utils.py b/serving/deploy/utils.py new file mode 100644 index 0000000..6b1a6a5 --- /dev/null +++ b/serving/deploy/utils.py @@ -0,0 +1,116 @@ +import sys +import requests +from urllib3.exceptions import InsecureRequestWarning +from transformers import DistilBertTokenizer + +requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) + +tokenizer = DistilBertTokenizer.from_pretrained( + "distilbert-base-uncased-finetuned-sst-2-english" +) + +GREEN = "\033[92m" +RESET = "\033[0m" +BOLD = "\033[1m" + + +def tokenize(text: str): + tokens = tokenizer( + text, truncation=True, padding="max_length", max_length=128, return_tensors="pt" + ) + + # Extract input IDs and attention mask + input_ids = tokens["input_ids"].tolist()[0] + attention_mask = tokens["attention_mask"].tolist()[0] + return {"input_ids": input_ids, "attention_mask": attention_mask} + + +def old_prepare_distilbert_request(tokens): + """ + DEPRECATED: TensorFlow Serving v1 API format (legacy) + This format is still supported by OpenVINO Model Server for backward compatibility, + but KServe V2 API is recommended for new deployments. + + Endpoint: /v1/models/:predict + """ + return { + "instances": [ + { + "input_ids": tokens["input_ids"], + "attention_mask": tokens["attention_mask"], + } + ] + } + + +def prepare_distilbert_request(tokens): + """ + KServe V2 API format (recommended) + This is the current standard API for model inference in RHOAI 2.25. + + Endpoint: /v2/models//infer + """ + return { + "inputs": [ + { + "name": "input_ids", + "shape": [1, 128], + "datatype": "INT64", + "data": tokens["input_ids"] + }, + { + "name": "attention_mask", + "shape": [1, 128], + "datatype": "INT64", + "data": tokens["attention_mask"] + } + ] + } + + +def prepare_diabetes_request(): + return { + "inputs": [ + {"name": "dense_input", "shape": [1, 8], "datatype": "FP32", "data": [6.0, 110.0, 65.0, 15.0, 1.0, 45.7, 0.627, 50.0]} + ] + } + + +def send_inference_request(url, body, token=None): + headers = {"Content-Type": "application/json"} + if token is not None: + headers["Authorization"] = f"Bearer {token}" + return requests.post(url, json=body, headers=headers, verify=False) + + +def print_curl_request(url, query): + print( + f'\n{BOLD}{GREEN}Inference request for the {url} url, using "{query}" as input.{RESET}\n' + ) + # Tokenize the input text + tokens = tokenize(query) + + # Define request in KServe V2 format and print + body = f"""'{{"inputs": [ + {{ + "name": "input_ids", + "shape": [1, 128], + "datatype": "INT64", + "data": [{", ".join([str(i) for i in tokens["input_ids"]])}] + }}, + {{ + "name": "attention_mask", + "shape": [1, 128], + "datatype": "INT64", + "data": [{", ".join([str(i) for i in tokens["attention_mask"]])}] + }} + ]}}' + """ + request = f'curl -X POST -k {url} \\ \n -H "Content-Type: application/json" \\ \n -d {body}' + print(request) + + +if __name__ == "__main__": + query = sys.argv[2] + url = sys.argv[1] + print_curl_request(url, query) diff --git a/serving/deploy/validate_model_servers.ipynb b/serving/deploy/validate_model_servers.ipynb new file mode 100644 index 0000000..2939d31 --- /dev/null +++ b/serving/deploy/validate_model_servers.ipynb @@ -0,0 +1,85 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip -qq install transformers==4.46.3\n", + "import utils" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Paste the authentication token from the RHOAI Models tab.\n", + "# In the raw deployment mode there is one different token for each model deployment\n", + "diabetes_auth_token = \"paste-token-here\"\n", + "distilbert_auth_token = \"paste-token-here\"\n", + "diabetes_url = \"https://diabetes-serving-deploy.apps.ocp4.example.com/v2/models/diabetes/infer\"\n", + "distilbert_url = \"https://distilbert-serving-deploy.apps.ocp4.example.com/v2/models/distilbert/infer\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Validate that the diabetes model responds using the KServe V2 API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\nValidating diabetes model...\\n\")\n", + "diabetes_request = utils.prepare_diabetes_request()\n", + "print(f\"Diabetes request:\\n {diabetes_request}\")\n", + "response = utils.send_inference_request(diabetes_url, diabetes_request, diabetes_auth_token)\n", + "output = response.json()[\"outputs\"][0]\n", + "diabetes_probability = output[\"data\"][1]\n", + "print(f\"Probability of diabetes: {100 * diabetes_probability:.2f}%\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Validate that the DistilBERT model performs sentiment analysis using the KServe V2 API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "prompt = \"OpenShift AI is great!\"\n\nprint(f\"\\nPerforming sentiment analysis on '{prompt}' ...\\n\")\ntokens = utils.tokenize(prompt)\nprint(f\"Tokens:\\n {tokens}\")\n\n# Prepare request in KServe V2 API format\ndistilbert_request = utils.prepare_distilbert_request(tokens)\nprint(f\"\\nDistilBERT request (KServe V2 format):\\n {distilbert_request}\\n\")\n\nresponse = utils.send_inference_request(distilbert_url, distilbert_request, distilbert_auth_token)\n\n# Parse the response\noutput = response.json()[\"outputs\"][0]\nlogits = output[\"data\"]\n\n# DistilBERT outputs two scores: [negative_score, positive_score]\nnegative_score = logits[0]\npositive_score = logits[1]\n\n# Determine sentiment based on which score is higher\nif positive_score > negative_score:\n sentiment = \"POSITIVE\"\n confidence = positive_score\nelse:\n sentiment = \"NEGATIVE\"\n confidence = negative_score\n\nprint(f\"\\nSentiment: {sentiment}\")\nprint(f\"Confidence score: {confidence:.2f}\")\n" + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.20" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file