gemma-embedder-rust/Dockerfile.gpu at main · goravaa/gemma-embedder-rust · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# ------------------------------
# Stage 1: Builder
# ------------------------------
FROM rust:1.89-bookworm AS builder

# Install build tools, including wget and tar for manual download
RUN apt-get update && apt-get install -y --no-install-recommends \
    cmake \
    pkg-config \
    libssl-dev \
    protobuf-compiler \
    wget \
    tar \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# --- Manually Download and Extract ONNX Runtime ---
# Improved to use the ARG variable consistently
ARG ORT_VERSION=1.22.0
RUN wget https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-${ORT_VERSION}.tgz && \
    tar -zxvf onnxruntime-linux-x64-gpu-${ORT_VERSION}.tgz


# Build the application
COPY Cargo.toml Cargo.lock ./
RUN mkdir src && echo "fn main() {}" > src/main.rs
RUN cargo build --release
COPY src ./src
COPY build.rs ./build.rs
COPY proto ./proto
RUN touch src/main.rs
RUN cargo build --release

# ------------------------------
# Stage 2: Runtime
# ------------------------------
FROM nvidia/cuda:12.3.2-runtime-ubuntu22.04 AS runtime

WORKDIR /app

# Install runtime dependencies and the correct cuDNN version
RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates \
    wget \
    && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
    && dpkg -i cuda-keyring_1.1-1_all.deb \
    && rm cuda-keyring_1.1-1_all.deb \
    && apt-get update \
    # --- THIS IS THE FIX ---
    # Install cuDNN 9 for CUDA 12, which is what ONNX Runtime v1.22 requires.
    && apt-get install -y --no-install-recommends libcudnn9-cuda-12 \
    # -----------------------
    && rm -rf /var/lib/apt/lists/*

# Create a dedicated directory for the ONNX libraries
RUN mkdir -p /onnxruntime/lib

# Copy your compiled application
COPY --from=builder /app/target/release/gemma-embedder-rust /app/gemma-embedder-rust

# Copy the manually downloaded .so files
COPY --from=builder /app/onnxruntime-linux-x64-gpu-*/lib/*.so /onnxruntime/lib/

# Set the library path so the system can find both CUDA and ONNX libraries
ENV LD_LIBRARY_PATH=/onnxruntime/lib:/usr/local/cuda/lib64

# Copy model script and make it executable
COPY download_models.sh /usr/local/bin/download_models.sh
RUN chmod +x /usr/local/bin/download_models.sh

# Set environment variables for the application
ENV EXECUTION_PROVIDER=gpu
ENV MODEL_VARIANT=q4
ENV MAX_TOKENS=2048
ENV MAX_BATCH_SIZE=32
ENV MAX_WAIT_MS=5

# Expose the application ports
EXPOSE 3000 50051

# Define the command to run the application
CMD ["sh", "-c", "download_models.sh && MODEL_PATH=$(ls model/*.onnx) exec ./gemma-embedder-rust"]