From 5a9dc3e4a1a59dab1ef0f05faad4784a6a529a17 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 3 Feb 2025 13:00:10 -0500 Subject: [PATCH 1/2] CUDA: update compilation flags for improved performance --- src/ggml-cuda/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt index 119fd39b8..54989d3cd 100644 --- a/src/ggml-cuda/CMakeLists.txt +++ b/src/ggml-cuda/CMakeLists.txt @@ -96,7 +96,7 @@ if (CUDAToolkit_FOUND) set(CUDA_CXX_FLAGS "") - set(CUDA_FLAGS -use_fast_math) + set(CUDA_FLAGS -use_fast_math --threads=0 --split-compile=0) if (GGML_FATAL_WARNINGS) list(APPEND CUDA_FLAGS -Werror all-warnings) From 9172f862bfe6aa3498e6481ec4dc5803e6d4bf38 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 3 Feb 2025 13:25:49 -0500 Subject: [PATCH 2/2] CMake: add option for CUDA compile threads and update flags --- CMakeLists.txt | 1 + src/ggml-cuda/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c069e420..63a191411 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,6 +152,7 @@ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copie option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT}) +option(GGML_CUDA_COMPILE_THREADS "ggml: CUDA compile threads (0 - auto)" 0) option(GGML_HIP "ggml: use HIP" OFF) option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF) diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt index 54989d3cd..721f9aeae 100644 --- a/src/ggml-cuda/CMakeLists.txt +++ b/src/ggml-cuda/CMakeLists.txt @@ -96,7 +96,7 @@ if (CUDAToolkit_FOUND) set(CUDA_CXX_FLAGS "") - set(CUDA_FLAGS -use_fast_math --threads=0 --split-compile=0) + set(CUDA_FLAGS -use_fast_math --threads=${GGML_CUDA_COMPILE_THREADS} --split-compile=${GGML_CUDA_COMPILE_THREADS}) if (GGML_FATAL_WARNINGS) list(APPEND CUDA_FLAGS -Werror all-warnings)