File tree 2 files changed +70
-0
lines changed
2 files changed +70
-0
lines changed Original file line number Diff line number Diff line change
1
+ FROM python:3.11-slim
2
+
3
+ LABEL org.opencontainers.image.source="https://github.com/neuralmagic/guidellm"
4
+ LABEL org.opencontainers.image.description="GuideLLM Benchmark Container"
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ git \
9
+ curl \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Create non-root user
13
+ RUN useradd -m -u 1000 guidellm
14
+
15
+ # Set working directory
16
+ WORKDIR /app
17
+
18
+ # Install GuideLLM
19
+ RUN pip install git+https://github.com/neuralmagic/guidellm.git
20
+
21
+ # Copy and set up the benchmark script
22
+ COPY run_benchmark.sh /app/
23
+ RUN chmod +x /app/run_benchmark.sh
24
+
25
+ # Set ownership to non-root user
26
+ RUN chown -R guidellm:guidellm /app
27
+
28
+ # Switch to non-root user
29
+ USER guidellm
30
+
31
+ # Healthcheck
32
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
33
+ CMD curl -f http://localhost:8000/health || exit 1
34
+
35
+ # Set the entrypoint
36
+ ENTRYPOINT ["/app/run_benchmark.sh" ]
Original file line number Diff line number Diff line change
1
+ #! /bin/bash
2
+
3
+ # Required environment variables
4
+ TARGET=${TARGET:- " http://localhost:8000" }
5
+ MODEL=${MODEL:- " neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16" }
6
+ RATE_TYPE=${RATE_TYPE:- " sweep" }
7
+ DATA=${DATA:- " prompt_tokens=256,output_tokens=128" }
8
+ MAX_REQUESTS=${MAX_REQUESTS:- " 100" }
9
+ MAX_SECONDS=${MAX_SECONDS:- " " }
10
+
11
+ # Output configuration
12
+ OUTPUT_PATH=${OUTPUT_PATH:- " /results/guidellm_benchmark_results" }
13
+ OUTPUT_FORMAT=${OUTPUT_FORMAT:- " json" } # Can be json, yaml, or yml
14
+
15
+ # Build the command
16
+ CMD=" guidellm benchmark --target \" ${TARGET} \" --model \" ${MODEL} \" --rate-type \" ${RATE_TYPE} \" --data \" ${DATA} \" "
17
+
18
+ # Add optional parameters
19
+ if [ ! -z " ${MAX_REQUESTS} " ]; then
20
+ CMD=" ${CMD} --max-requests ${MAX_REQUESTS} "
21
+ fi
22
+
23
+ if [ ! -z " ${MAX_SECONDS} " ]; then
24
+ CMD=" ${CMD} --max-seconds ${MAX_SECONDS} "
25
+ fi
26
+
27
+ # Add output path with appropriate extension
28
+ if [ ! -z " ${OUTPUT_PATH} " ]; then
29
+ CMD=" ${CMD} --output-path \" ${OUTPUT_PATH} .${OUTPUT_FORMAT} \" "
30
+ fi
31
+
32
+ # Execute the command
33
+ echo " Running command: ${CMD} "
34
+ eval " ${CMD} "
You can’t perform that action at this time.
0 commit comments