Skip to content

Commit 9cf39e4

Browse files
committed
add example-runs folder
Signed-off-by: sallyom <[email protected]>
1 parent 678adea commit 9cf39e4

File tree

9 files changed

+371
-0
lines changed

9 files changed

+371
-0
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,6 @@ cython_debug/
178178
# Project specific files
179179
*.json
180180
*.yaml
181+
182+
# Allow specific YAML files
183+
!docs/guides/k8s/*.yaml
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# GuideLLM Example Analysis
2+
3+
This directory contains example analysis script for GuideLLM performance testing.
4+
5+
## Running Benchmarks in Kubernetes
6+
7+
To run comprehensive GuideLLM benchmarks in Kubernetes, follow the instructions in the [k8s/README.md](../k8s/README.md). This will help you:
8+
9+
- Set up the necessary Kubernetes environment
10+
- Configure benchmark parameters
11+
- Execute the benchmarks
12+
- Collect performance data
13+
14+
## Analyzing Results
15+
16+
### Using the Analysis Script
17+
18+
The [analyze_benchmarks.py](./analyze_benchmarks.py) script processes benchmark YAML output and generates visualizations and statistics. To use it:
19+
20+
1. Install required dependencies:
21+
22+
```bash
23+
pip install -r requirements.txt
24+
```
25+
26+
2. Ensure the GuideLLM benchmark YAML file from the Kubernetes guidellm-job pod is copied to your local environment.
27+
28+
```bash
29+
# From the k8s/README.md instructions
30+
kubectl cp <pod-name>:/path/to/benchmark.yaml ./llama32-3b.yaml
31+
```
32+
33+
3. Run the analysis script (make sure the YAML file is in the same directory):
34+
35+
```bash
36+
python analyze_benchmarks.py
37+
```
38+
39+
The script will:
40+
41+
- Process the benchmark YAML file
42+
- Generate visualizations in the `benchmark_plots` directory
43+
- Create a CSV file with processed metrics
44+
- Print summary statistics
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import yaml
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
import seaborn as sns
5+
from pathlib import Path
6+
7+
def process_benchmark_yaml(yaml_file):
8+
"""Process the benchmark YAML file and return a DataFrame with the data."""
9+
with open(yaml_file, 'r') as f:
10+
data = yaml.safe_load(f)
11+
12+
# Extract concurrency levels from the benchmark configuration
13+
concurrency_levels = data['benchmarks'][0]['args']['profile']['measured_concurrencies']
14+
15+
# Process metrics for each concurrency level
16+
processed_data = []
17+
for i, benchmark in enumerate(data['benchmarks']):
18+
if 'metrics' in benchmark:
19+
metrics = benchmark['metrics']
20+
concurrency = concurrency_levels[i] if i < len(concurrency_levels) else 1.0
21+
22+
# Extract successful metrics
23+
for metric_name, metric_data in metrics.items():
24+
if 'successful' in metric_data:
25+
successful = metric_data['successful']
26+
processed_data.append({
27+
'concurrency': concurrency,
28+
'metric': metric_name,
29+
'count': successful.get('count', 0),
30+
'mean': successful.get('mean', 0),
31+
'median': successful.get('median', 0),
32+
'min': successful.get('min', 0),
33+
'max': successful.get('max', 0),
34+
'std_dev': successful.get('std_dev', 0),
35+
'p95': successful.get('percentiles', {}).get('p95', 0),
36+
'p99': successful.get('percentiles', {}).get('p99', 0)
37+
})
38+
39+
# Convert to DataFrame
40+
df = pd.DataFrame(processed_data)
41+
return df
42+
43+
def create_visualizations(df):
44+
"""Create visualizations for the benchmark data."""
45+
# Create plots directory if it doesn't exist
46+
plot_dir = Path('benchmark_plots')
47+
plot_dir.mkdir(exist_ok=True)
48+
49+
# Set style
50+
plt.style.use('default')
51+
52+
# Sort by concurrency for better visualization
53+
df = df.sort_values('concurrency')
54+
55+
# Create visualizations for each metric
56+
metrics_to_plot = [
57+
'request_latency',
58+
'time_to_first_token_ms',
59+
'tokens_per_second',
60+
'inter_token_latency_ms'
61+
]
62+
63+
for metric in metrics_to_plot:
64+
metric_df = df[df['metric'] == metric]
65+
if not metric_df.empty:
66+
# Mean vs Median
67+
plt.figure(figsize=(12, 6))
68+
plt.plot(metric_df['concurrency'], metric_df['mean'], 'b-', label='Mean')
69+
plt.plot(metric_df['concurrency'], metric_df['median'], 'r--', label='Median')
70+
plt.title(f'{metric.replace("_", " ").title()} vs Concurrency')
71+
plt.xlabel('Concurrency Level')
72+
plt.ylabel('Value')
73+
plt.legend()
74+
plt.grid(True)
75+
plt.tight_layout()
76+
plt.savefig(plot_dir / f'{metric}_mean_median.png')
77+
plt.close()
78+
79+
# Min-Max Range
80+
plt.figure(figsize=(12, 6))
81+
plt.fill_between(metric_df['concurrency'],
82+
metric_df['min'],
83+
metric_df['max'],
84+
alpha=0.3,
85+
label='Min-Max Range')
86+
plt.plot(metric_df['concurrency'], metric_df['mean'], 'b-', label='Mean')
87+
plt.title(f'{metric.replace("_", " ").title()} Range vs Concurrency')
88+
plt.xlabel('Concurrency Level')
89+
plt.ylabel('Value')
90+
plt.legend()
91+
plt.grid(True)
92+
plt.tight_layout()
93+
plt.savefig(plot_dir / f'{metric}_range.png')
94+
plt.close()
95+
96+
# Percentiles
97+
plt.figure(figsize=(12, 6))
98+
plt.plot(metric_df['concurrency'], metric_df['p95'], 'g--', label='95th Percentile')
99+
plt.plot(metric_df['concurrency'], metric_df['p99'], 'r--', label='99th Percentile')
100+
plt.plot(metric_df['concurrency'], metric_df['mean'], 'b-', label='Mean')
101+
plt.title(f'{metric.replace("_", " ").title()} Percentiles vs Concurrency')
102+
plt.xlabel('Concurrency Level')
103+
plt.ylabel('Value')
104+
plt.legend()
105+
plt.grid(True)
106+
plt.tight_layout()
107+
plt.savefig(plot_dir / f'{metric}_percentiles.png')
108+
plt.close()
109+
110+
def main():
111+
# Process the YAML file
112+
df = process_benchmark_yaml('llama32-3b.yaml')
113+
114+
# Create visualizations
115+
create_visualizations(df)
116+
117+
# Print summary statistics by concurrency level
118+
print("\nSummary Statistics by Concurrency Level:")
119+
for concurrency in sorted(df['concurrency'].unique()):
120+
print(f"\nConcurrency Level: {concurrency:.2f}")
121+
subset = df[df['concurrency'] == concurrency]
122+
123+
for metric in subset['metric'].unique():
124+
metric_data = subset[subset['metric'] == metric]
125+
print(f"\n{metric.replace('_', ' ').title()}:")
126+
print(f"Count: {metric_data['count'].iloc[0]}")
127+
print(f"Mean: {metric_data['mean'].iloc[0]:.2f}")
128+
print(f"Median: {metric_data['median'].iloc[0]:.2f}")
129+
print(f"Min: {metric_data['min'].iloc[0]:.2f}")
130+
print(f"Max: {metric_data['max'].iloc[0]:.2f}")
131+
print(f"Std Dev: {metric_data['std_dev'].iloc[0]:.2f}")
132+
print(f"95th Percentile: {metric_data['p95'].iloc[0]:.2f}")
133+
print(f"99th Percentile: {metric_data['p99'].iloc[0]:.2f}")
134+
135+
# Save processed data
136+
df.to_csv('benchmark_processed_data.csv', index=False)
137+
print("\nProcessed data saved to benchmark_processed_data.csv")
138+
139+
if __name__ == "__main__":
140+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pyyaml>=6.0
2+
pandas>=2.0.0
3+
matplotlib>=3.7.0
4+
seaborn>=0.12.0

docs/guides/k8s/Dockerfile

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
FROM registry.access.redhat.com/ubi9/python-312:9.5-1744198409
2+
3+
RUN pip install --upgrade pip && \
4+
pip install git+https://github.com/neuralmagic/guidellm.git@main
5+
6+
# Replace these env vars in the guidellm-job.yaml
7+
ENV TARGET=http://localhost:8000/v1 \
8+
MODEL=neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16 \
9+
DATA_TYPE=emulated \
10+
DATA=prompt_tokens=512,generated_tokens=128
11+
12+
ENTRYPOINT ["guidellm"]

docs/guides/k8s/README.md

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
## Run Guidellm with Kubernetes Job
2+
3+
Here's an example to run `guidellm` with `meta-llama/Llama-3.2-3B-Instruct` that has been deployed with
4+
[llm-d-deployer](https://github.com/neuralmagic/llm-d-deployer/blob/main/quickstart/README-minikube.md).
5+
Replace the `--target` and references to `Llama-3.2-3B` in [guidellm-job.yaml](./guidellm-job.yaml) to evaluate any served LLM.
6+
7+
### Run evaluation
8+
9+
```bash
10+
# Update the claim-name in accessor-pod.yaml, and guidellm-job.yaml if using a different pvc-name
11+
kubectl apply -f pvc.yaml
12+
kubectl apply -f guidellm-job.yaml
13+
```
14+
15+
> **📝 NOTE:** [Dockerfile](./Dockerfile) was used to build the image for the guidellm-job pod.
16+
17+
> **📝 NOTE:** The HF_TOKEN is passed to the job, but this will not be necessary if you use the same PVC as the one storing your model.
18+
> Guidellm uses the model's tokenizer/processor files in its evaluation. You can pass a path instead with `--tokenizer=/path/to/model`.
19+
> This eliminates the need for Guidellm to download the files from Huggingface.
20+
21+
The logs from the job will show pretty tables that summarize the results. There is also a large yaml file created. The evaluation for this model
22+
will take ~20-30 minutes.
23+
24+
### Extract Guidellm Report
25+
26+
```bash
27+
kubectl apply -f accessor-pod.yaml
28+
29+
# Wait for the pod to be ready
30+
kubectl wait --for=condition=Ready pod/guidellm-accessor
31+
32+
# Copy the report file from the pod (accessor pod mounts the volume as read-only)
33+
kubectl cp guidellm-accessor:/app/data/guidellm-reports.tgz ./guidellm-reports.tgz
34+
```
35+
36+
Extract the report:
37+
38+
```bash
39+
tar -xvf guidellm-reports.tgz
40+
```
41+
42+
You will now have a local file `./guidellm-reports/llama32-3b.yaml`
43+
44+
You can remove the accessor pod with:
45+
46+
```bash
47+
kubectl delete pod guidellm-accessor
48+
```
49+
50+
### Gather Insights from Guidellm Report
51+
52+
You can follow the ["Analyzing Results" section](../example-analysis/README.md#analyzing-results) to gain insights from your LLM
53+
deployments using the GuideLLM report.

docs/guides/k8s/accessor-pod.yaml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: guidellm-accessor
5+
spec:
6+
containers:
7+
- command:
8+
- sleep
9+
- "3600"
10+
image: registry.access.redhat.com/ubi9/ubi
11+
name: accessor
12+
volumeMounts:
13+
- mountPath: /app/data
14+
name: output
15+
readOnly: true
16+
volumes:
17+
- name: output
18+
persistentVolumeClaim:
19+
claimName: guidellm-output-pvc
20+
readOnly: true

docs/guides/k8s/guidellm-job.yaml

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# This job takes ~25min to complete.
2+
# This will create a very large yaml file. To extract the file, run:
3+
# oc apply -f accessor-pod.yaml
4+
# mkdir ./guidellm-reports
5+
# kubectl cp guidellm-accessor:/app/data/guidellm-reports.tgz ./guidellm-reports/guidellm-reports.tgz
6+
# You will now have a local ./guidellm-reports/guidellm-reports.tgz, to extract it run:
7+
# tar -xvf guidellm-reports.tgz
8+
# You will now have a local file ./guidellm-reports/llama32-3b.yaml
9+
apiVersion: batch/v1
10+
kind: Job
11+
metadata:
12+
name: run-guidellm
13+
spec:
14+
template:
15+
spec:
16+
containers:
17+
- name: guidellm
18+
# TODO: replace this image
19+
image: quay.io/sallyom/guidellm:latest
20+
imagePullPolicy: IfNotPresent
21+
securityContext:
22+
allowPrivilegeEscalation: false
23+
capabilities:
24+
drop:
25+
- ALL
26+
runAsNonRoot: true
27+
seccompProfile:
28+
type: RuntimeDefault
29+
args:
30+
- benchmark
31+
- --target=$(TARGET)
32+
- --data=$(DATA)
33+
- --rate-type=sweep
34+
- --model=$(MODEL)
35+
- --output-path=/app/data/llama32-3b.yaml
36+
env:
37+
# HF_TOKEN is not necessary if you share/use the model PVC. Guidellm needs to access the tokenizer file.
38+
# You can provide a path to the tokenizer file by passing `--tokenizer=/path/to/model`. If you do not
39+
# pass the tokenizer path, Guidellm will get the tokenizer file(s) from Huggingface.
40+
- name: HF_TOKEN
41+
valueFrom:
42+
secretKeyRef:
43+
key: HF_TOKEN
44+
name: huggingface-secret
45+
- name: TARGET
46+
value: "http://llm-d-inference-gateway.llm-d.svc.cluster.local:80/v1"
47+
- name: DATA_TYPE
48+
value: "emulated"
49+
- name: DATA
50+
value: "prompt_tokens=512,output_tokens=128"
51+
- name: MODEL
52+
value: "meta-llama/Llama-3.2-3B-Instruct"
53+
volumeMounts:
54+
- name: output
55+
mountPath: /app/data
56+
- name: extract
57+
image: registry.access.redhat.com/ubi9/ubi
58+
securityContext:
59+
allowPrivilegeEscalation: false
60+
capabilities:
61+
drop:
62+
- ALL
63+
runAsNonRoot: true
64+
seccompProfile:
65+
type: RuntimeDefault
66+
command: ["sh", "-c"]
67+
args:
68+
- |
69+
echo "Waiting for guidellm container to complete...";
70+
while [ ! -f /app/data/llama32-3b.yaml ]; do
71+
sleep 60;
72+
done;
73+
echo "Guidellm completed, packing reports...";
74+
cd /app/data && \
75+
tar czf guidellm-reports.tgz *.yaml && \
76+
rm /app/data/llama32-3b.yaml
77+
volumeMounts:
78+
- name: output
79+
mountPath: /app/data
80+
restartPolicy: Never
81+
volumes:
82+
- name: output
83+
persistentVolumeClaim:
84+
claimName: guidellm-output-pvc

docs/guides/k8s/pvc.yaml

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Example PVC - update to match your cluster
2+
apiVersion: v1
3+
kind: PersistentVolumeClaim
4+
metadata:
5+
name: guidellm-output-pvc
6+
spec:
7+
accessModes:
8+
- ReadWriteOnce
9+
resources:
10+
requests:
11+
storage: 2Gi

0 commit comments

Comments
 (0)