From 5a9dc3e4a1a59dab1ef0f05faad4784a6a529a17 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 3 Feb 2025 13:00:10 -0500
Subject: [PATCH 1/2] CUDA: update compilation flags for improved performance

---
 src/ggml-cuda/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt
index 119fd39b8..54989d3cd 100644
--- a/src/ggml-cuda/CMakeLists.txt
+++ b/src/ggml-cuda/CMakeLists.txt
@@ -96,7 +96,7 @@ if (CUDAToolkit_FOUND)
 
     set(CUDA_CXX_FLAGS "")
 
-    set(CUDA_FLAGS -use_fast_math)
+    set(CUDA_FLAGS -use_fast_math --threads=0 --split-compile=0)
 
     if (GGML_FATAL_WARNINGS)
         list(APPEND CUDA_FLAGS -Werror all-warnings)

From 9172f862bfe6aa3498e6481ec4dc5803e6d4bf38 Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Mon, 3 Feb 2025 13:25:49 -0500
Subject: [PATCH 2/2] CMake: add option for CUDA compile threads and update
 flags

---
 CMakeLists.txt               | 1 +
 src/ggml-cuda/CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c069e420..63a191411 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -152,6 +152,7 @@ option(GGML_CUDA_NO_PEER_COPY               "ggml: do not use peer to peer copie
 option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"                OFF)
 option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF)
 option(GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp only)"          ${GGML_CUDA_GRAPHS_DEFAULT})
+option(GGML_CUDA_COMPILE_THREADS            "ggml: CUDA compile threads (0 - auto)"           0)
 
 option(GGML_HIP                             "ggml: use HIP"                                   OFF)
 option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF)
diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt
index 54989d3cd..721f9aeae 100644
--- a/src/ggml-cuda/CMakeLists.txt
+++ b/src/ggml-cuda/CMakeLists.txt
@@ -96,7 +96,7 @@ if (CUDAToolkit_FOUND)
 
     set(CUDA_CXX_FLAGS "")
 
-    set(CUDA_FLAGS -use_fast_math --threads=0 --split-compile=0)
+    set(CUDA_FLAGS -use_fast_math --threads=${GGML_CUDA_COMPILE_THREADS} --split-compile=${GGML_CUDA_COMPILE_THREADS})
 
     if (GGML_FATAL_WARNINGS)
         list(APPEND CUDA_FLAGS -Werror all-warnings)