@@ -75,12 +75,50 @@ jobs:
7575 which tornado || { echo "::error::tornado unavailable during GPULlama3 build"; exit 1; }
7676 tornado --version
7777 make
78- - name : Run Test Inference
78+
79+ test-models :
80+ runs-on : self-hosted
81+ needs : build-and-run
82+
83+ strategy :
84+ fail-fast : false
85+ matrix :
86+ model :
87+ - /opt/models/DeepSeek-R1-Distill-Qwen-1.5B-F16.gguf
88+ - /opt/models/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf
89+ - /opt/models/Llama-3.2-1B-Instruct-F16.gguf
90+ - /opt/models/Llama-3.2-1B-Instruct-Q8_0.gguf
91+ - /opt/models/Llama-3.2-3B-Instruct-F16.gguf
92+ - /opt/models/Llama-3.2-3B-Instruct-Q8_0.gguf
93+ - /opt/models/Mistral-7B-Instruct-v0.3.fp16.gguf
94+ - /opt/models/Mistral-7B-Instruct-v0.3.Q8_0.gguf
95+ - /opt/models/Phi-3-mini-4k-instruct-fp16.gguf
96+ - /opt/models/Phi-3-mini-4k-instruct-Q8_0.gguf
97+ - /opt/models/Qwen2.5-0.5B-Instruct-f16.gguf
98+ - /opt/models/Qwen2.5-0.5B-Instruct-Q8_0.gguf
99+ - /opt/models/qwen2.5-1.5b-instruct-fp16.gguf
100+ - /opt/models/qwen2.5-1.5b-instruct-q8_0.gguf
101+ - /opt/models/Qwen3-0.6B-f16.gguf
102+ - /opt/models/Qwen3-0.6B-Q8_0.gguf
103+ - /opt/models/Qwen3-4B-f16.gguf
104+ - /opt/models/Qwen3-4B-Q8_0.gguf
105+
106+ env :
107+ JAVA_HOME : /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3
108+ TORNADO_SDK : ${{ needs.build-and-run.outputs.tornado_sdk }}
109+
110+ steps :
111+ - name : Checkout GPULlama3
112+ uses : actions/checkout@v4
113+
114+ - name : Run inference for ${{ matrix.model }}
79115 run : |
80116 set -x
81117 cd ${{ github.workspace }}
118+
82119 export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
83- which tornado || { echo "::error::tornado not found at runtime"; exit 1; }
120+ echo "Using Tornado SDK: $TORNADO_SDK"
121+
84122 ./llama-tornado --gpu --opencl \
85- --model /home/michalis/models/Llama-3.2-1B-Instruct-F16.gguf \
123+ --model "${{ matrix.model }}" \
86124 --prompt "Say hello"
0 commit comments