diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c069e420..63a191411 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,6 +152,7 @@ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copie option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT}) +option(GGML_CUDA_COMPILE_THREADS "ggml: CUDA compile threads (0 - auto)" 0) option(GGML_HIP "ggml: use HIP" OFF) option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF) diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt index 54989d3cd..721f9aeae 100644 --- a/src/ggml-cuda/CMakeLists.txt +++ b/src/ggml-cuda/CMakeLists.txt @@ -96,7 +96,7 @@ if (CUDAToolkit_FOUND) set(CUDA_CXX_FLAGS "") - set(CUDA_FLAGS -use_fast_math --threads=0 --split-compile=0) + set(CUDA_FLAGS -use_fast_math --threads=${GGML_CUDA_COMPILE_THREADS} --split-compile=${GGML_CUDA_COMPILE_THREADS}) if (GGML_FATAL_WARNINGS) list(APPEND CUDA_FLAGS -Werror all-warnings)