Skip to content

Commit

Permalink
removed SIMD for MSVC
Browse files Browse the repository at this point in the history
  • Loading branch information
benja263 committed Jun 5, 2024
1 parent d7dab06 commit d3d6e68
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 67 deletions.
8 changes: 2 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,6 @@ if(APPLE)
find_package(OpenMP REQUIRED)
endif()
else()
# Set OpenMP runtime for MSVC
if (MSVC)
set(OpenMP_RUNTIME_MSVC experimental)
message(STATUS "OpenMP_RUNTIME_MSVC set to ${OpenMP_RUNTIME_MSVC}")
endif()
find_package(OpenMP REQUIRED)
set(OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS})
set(OpenMP_CXX_LIB_NAMES ${OpenMP_CXX_LIB_NAMES})
Expand Down Expand Up @@ -197,7 +192,8 @@ elseif (WIN32)
target_link_libraries(gbrl_cpp PRIVATE OpenMP::OpenMP_CXX)
if (USE_CUDA)
set(cuda_lib_path "${CUDAToolkit_ROOT_DIR}/lib/x64")
target_link_libraries(gbrl_cpp PRIVATE ${cuda_lib_path}/cudart.lib)
# target_link_libraries(gbrl_cpp PRIVATE ${cuda_lib_path}/cudart.lib)
target_link_libraries(gbrl_cpp PRIVATE CUDA::cudart)
endif()
endif()

Expand Down
3 changes: 1 addition & 2 deletions gbrl/src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++14 ${OpenMP_C_FLAGS} -Wall -Wpedantic -Wextra")
elseif (WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++14 ${OpenMP_CXX_FLAGS} /O2 /W3 ")
message(STATUS ${OpenMP_CXX_FLAGS} )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /std:c++14 ${OpenMP_CXX_FLAGS} /W3")
endif()

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
Expand Down
8 changes: 6 additions & 2 deletions gbrl/src/cpp/fitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ float Fitter::fit_cpu(dataSet *dataset, const float* targets, ensembleData *edat
if (metadata->split_score_func == L2){
float *mean_grads = calculate_mean(build_grads, batch_n_samples, output_dim, par_th);
float *std = calculate_var_and_center(build_grads, mean_grads, batch_n_samples, output_dim, par_th);
#pragma omp simd
#ifndef _MSC_VER
#pragma omp simd
#endif
for (int i = 0; i < output_dim; ++i)
std[i] = sqrtf(std[i]);
divide_mat_by_vec_inplace(build_grads, std, batch_dataset.n_samples, metadata->output_dim, metadata->par_th);
Expand Down Expand Up @@ -527,7 +529,9 @@ void Fitter::calc_leaf_value(dataSet *dataset, ensembleData *edata, ensembleMeta
}
if (passed){
idx = i*output_dim;
#pragma omp simd
#ifndef _MSC_VER
#pragma omp simd
#endif
for (int d = 0; d < output_dim; ++d)
edata->values[leaf_idx*output_dim + d] += grads[idx + d];
count += 1;
Expand Down
8 changes: 6 additions & 2 deletions gbrl/src/cpp/loss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ float MultiRMSE::get_loss_and_gradients(const float *raw_preds, const float *raw
int thread_id = omp_get_thread_num();
int start_idx = thread_id * elements_per_thread;
int end_idx = (thread_id == n_threads - 1) ? n_elements : start_idx + elements_per_thread;
#pragma omp simd
#ifndef _MSC_VER
#pragma omp simd
#endif
for (int i = start_idx; i < end_idx; ++i){
row = i / output_dim;
col = i % output_dim;
Expand Down Expand Up @@ -55,7 +57,9 @@ float MultiRMSE::get_loss(const float *raw_preds, const float *raw_targets, cons
int end_idx = (thread_id == n_threads - 1) ? n_samples : start_idx + samples_per_thread;
for (int sample_idx = start_idx; sample_idx < end_idx; ++sample_idx){
row = sample_idx * output_dim;
#pragma omp simd
#ifndef _MSC_VER
#pragma omp simd
#endif
for (int d = 0; d < output_dim; ++d){
grad_value = raw_preds[row + d] - raw_targets[row + d];
losses[thread_id] += (grad_value * grad_value);
Expand Down
Loading

0 comments on commit d3d6e68

Please sign in to comment.