-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDockerfile.gpu
More file actions
83 lines (67 loc) · 2.63 KB
/
Dockerfile.gpu
File metadata and controls
83 lines (67 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# ------------------------------
# Stage 1: Builder
# ------------------------------
FROM rust:1.89-bookworm AS builder
# Install build tools, including wget and tar for manual download
RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
pkg-config \
libssl-dev \
protobuf-compiler \
wget \
tar \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# --- Manually Download and Extract ONNX Runtime ---
# Improved to use the ARG variable consistently
ARG ORT_VERSION=1.22.0
RUN wget https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-${ORT_VERSION}.tgz && \
tar -zxvf onnxruntime-linux-x64-gpu-${ORT_VERSION}.tgz
# Build the application
COPY Cargo.toml Cargo.lock ./
RUN mkdir src && echo "fn main() {}" > src/main.rs
RUN cargo build --release
COPY src ./src
COPY build.rs ./build.rs
COPY proto ./proto
RUN touch src/main.rs
RUN cargo build --release
# ------------------------------
# Stage 2: Runtime
# ------------------------------
FROM nvidia/cuda:12.3.2-runtime-ubuntu22.04 AS runtime
WORKDIR /app
# Install runtime dependencies and the correct cuDNN version
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
wget \
&& wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
&& dpkg -i cuda-keyring_1.1-1_all.deb \
&& rm cuda-keyring_1.1-1_all.deb \
&& apt-get update \
# --- THIS IS THE FIX ---
# Install cuDNN 9 for CUDA 12, which is what ONNX Runtime v1.22 requires.
&& apt-get install -y --no-install-recommends libcudnn9-cuda-12 \
# -----------------------
&& rm -rf /var/lib/apt/lists/*
# Create a dedicated directory for the ONNX libraries
RUN mkdir -p /onnxruntime/lib
# Copy your compiled application
COPY --from=builder /app/target/release/gemma-embedder-rust /app/gemma-embedder-rust
# Copy the manually downloaded .so files
COPY --from=builder /app/onnxruntime-linux-x64-gpu-*/lib/*.so /onnxruntime/lib/
# Set the library path so the system can find both CUDA and ONNX libraries
ENV LD_LIBRARY_PATH=/onnxruntime/lib:/usr/local/cuda/lib64
# Copy model script and make it executable
COPY download_models.sh /usr/local/bin/download_models.sh
RUN chmod +x /usr/local/bin/download_models.sh
# Set environment variables for the application
ENV EXECUTION_PROVIDER=gpu
ENV MODEL_VARIANT=q4
ENV MAX_TOKENS=2048
ENV MAX_BATCH_SIZE=32
ENV MAX_WAIT_MS=5
# Expose the application ports
EXPOSE 3000 50051
# Define the command to run the application
CMD ["sh", "-c", "download_models.sh && MODEL_PATH=$(ls model/*.onnx) exec ./gemma-embedder-rust"]