Merge branch 'master' of https://github.com/NVlabs/gbrl

NVlabs · Jun 6, 2024 · bbbbe36 · bbbbe36
2 parents 873db0e + 32a5545
commit bbbbe36
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 45 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -112,17 +112,17 @@ if(APPLE)
     find_package(OpenMP)
     if(NOT OpenMP_FOUND)
       # Try again with extra path info; required for libomp 15+ from Homebrew
-      execute_process(COMMAND brew --prefix libomp
-                      OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
-                      OUTPUT_STRIP_TRAILING_WHITESPACE)
-      set(OpenMP_C_FLAGS
+        execute_process(COMMAND brew --prefix libomp
+                        OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
+                        OUTPUT_STRIP_TRAILING_WHITESPACE)
+        set(OpenMP_C_FLAGS
         "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
-      set(OpenMP_CXX_FLAGS
+        set(OpenMP_CXX_FLAGS
         "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")      
-      set(OpenMP_C_LIB_NAMES omp)
-      set(OpenMP_CXX_LIB_NAMES omp)
-      set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
-      find_package(OpenMP REQUIRED)  
+        set(OpenMP_C_LIB_NAMES omp)
+        set(OpenMP_CXX_LIB_NAMES omp)
+        set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
+        find_package(OpenMP REQUIRED)  
     endif()
 else()
     find_package(OpenMP REQUIRED)
@@ -131,6 +131,13 @@ else()
     set(OpenMP_omp_LIBRARY ${OpenMP_omp_LIBRARY})
 endif()
 
+# Ensure OpenMP flags and directories are applied
+if (OpenMP_FOUND)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+endif()
+
 # Include directories
 include_directories(${pybind11_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/gbrl/src/cpp)
 
@@ -192,7 +199,6 @@ elseif (WIN32)
     target_link_libraries(gbrl_cpp PRIVATE OpenMP::OpenMP_CXX)
     if (USE_CUDA)
         set(cuda_lib_path "${CUDAToolkit_ROOT_DIR}/lib/x64")
-        # target_link_libraries(gbrl_cpp PRIVATE ${cuda_lib_path}/cudart.lib)
         target_link_libraries(gbrl_cpp PRIVATE CUDA::cudart)
     endif()
 endif()

diff --git a/README.md b/README.md
@@ -9,38 +9,12 @@ GBRL is a Python-based GBT library designed and optimized for reinforcement lear
 
 
 ## Getting started
-
-### Dependencies 
-#### MAC OS 
-```
-llvm
-openmp
-```
-
-Make sure to run:
-```
-brew install libomp
-brew install llvm
- ```
-
-xcode command line tools should be installed installed 
-
-### Installation
+GBRL is installed via
 ```
 pip install gbrl
-``` 
-
-Verify that GPU is visible by running
 ```
-import gbrl
-
-gbrl.cuda_available()
-```
-
-GBRL can be compiled and installed with a CPU version only even on CUDA capable machines by setting `CPU_ONLY=1` as an environment variable. 
 
-*OPTIONAL*  
-For tree visualization make sure graphviz is installed before compilation. 
+For furthere installation details and dependencies see the documentation. 
 
 ***Usage Example see `tutorial.ipynb`***
 

diff --git a/docs/quickstart.rst b/docs/quickstart.rst
@@ -7,4 +7,50 @@ Install GBRL via pip:
    
    pip install gbrl
 
-GBRL can be compiled and installed with a CPU version only even on CUDA capable machines by setting `CPU_ONLY=1` as an environment variable.
+CPU only version is installed with the following command:
+
+.. code-block:: console
+
+   CPU_ONLY=1 pip install gbrl
+
+Dependencies 
+============ 
+
+MAC OS
+~~~~~~
+
+GBRL is dependent on LLVM and OpenMP. 
+
+These dependencies can be installed via Homebrew:
+
+.. code-block:: console
+
+   brew install libomp llvm
+
+
+Once installed make sure that the appropriate environment variables are set:
+
+.. code-block:: bash
+
+   export PATH="$(brew --prefix llvm)/bin:$PATH"
+   export LDFLAGS="-L$(brew --prefix libomp)/lib -L$(brew --prefix llvm)/lib -L$(brew --prefix llvm)/lib/c++ -Wl,-rpath,$(brew --prefix llvm)/lib/c++"
+   export CPPFLAGS="-I$(brew --prefix libomp)/include -I$(brew --prefix llvm)/include"
+   export CC="$(brew --prefix llvm)/bin/clang"
+   export CXX="$(brew --prefix llvm)/bin/clang++"
+   export DYLD_LIBRARY_PATH="$(brew --prefix llvm)/lib:$(brew --prefix libomp)/lib" 
+
+CUDA
+~~~~ 
+
+.. code-block:: python
+   # Verify that GPU is visible by running
+   import gbrl
+
+   print(gbrl.cuda_available())
+
+
+Graphviz
+~~~~~~~~
+
+*OPTIONAL*  
+For tree visualization make sure graphviz is installed before compilation. 
diff --git a/gbrl/src/cpp/CMakeLists.txt b/gbrl/src/cpp/CMakeLists.txt
@@ -29,11 +29,11 @@ if (CUDAToolkit_FOUND AND NOT APPLE)
 endif()
 
 if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++14 ${OpenMP_CXX_FLAGS} -Wall -Wpedantic -Wextra -march=native")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++14 -Wall -Wpedantic -Wextra -march=native")
 elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++14 ${OpenMP_C_FLAGS} -Wall -Wpedantic -Wextra")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++14 -Wall -Wpedantic -Wextra")
 elseif (WIN32)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /std:c++14 ${OpenMP_CXX_FLAGS} /W3")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /std:c++14 /W3")
 endif()
 
 if (CMAKE_BUILD_TYPE STREQUAL "Debug")

diff --git a/gbrl/src/cpp/gbrl.cpp b/gbrl/src/cpp/gbrl.cpp
@@ -999,7 +999,9 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
     }
 #endif
 #else
-throw std::runtime_error("GBRL compiled without Graphviz! Cannot plot model");
+    (void)tree_idx;
+    (void)filename;
+    throw std::runtime_error("GBRL compiled without Graphviz! Cannot plot model");
 #endif 
 }
 

diff --git a/gbrl/src/cpp/loss.cpp b/gbrl/src/cpp/loss.cpp
@@ -25,7 +25,7 @@ float MultiRMSE::get_loss_and_gradients(const float *raw_preds, const float *raw
         int thread_id = omp_get_thread_num();
         int start_idx = thread_id * elements_per_thread;
         int end_idx = (thread_id == n_threads - 1) ? n_elements : start_idx + elements_per_thread;
-#ifndef _MSC_VER
+#if !defined(_MSC_VER) && !defined(__APPLE__)
     #pragma omp simd
 #endif
         for (int i = start_idx; i < end_idx; ++i){
@@ -57,7 +57,7 @@ float MultiRMSE::get_loss(const float *raw_preds, const float *raw_targets, cons
             int end_idx = (thread_id == n_threads - 1) ? n_samples : start_idx + samples_per_thread;
             for (int sample_idx = start_idx; sample_idx < end_idx; ++sample_idx){
                 row = sample_idx * output_dim;
-#ifndef _MSC_VER
+#if !defined(_MSC_VER) && !defined(__APPLE__)
     #pragma omp simd
 #endif 
                 for (int d = 0; d < output_dim; ++d){