diff --git a/src/cuda/helpers.h b/src/cuda/helpers.h index b176805c2..09ff12c20 100644 --- a/src/cuda/helpers.h +++ b/src/cuda/helpers.h @@ -16,6 +16,11 @@ __device__ inline void __syncwarp(uint32_t mask){} //TODO: 6.1 should have this #include "ctranslate2/types.h" +#include +#include +#include +#include + #include "utils.h" #ifdef CT2_USE_HIP diff --git a/src/cuda/primitives.cu b/src/cuda/primitives.cu index b620d6b89..46515c39f 100644 --- a/src/cuda/primitives.cu +++ b/src/cuda/primitives.cu @@ -18,18 +18,22 @@ #define CUBLAS_COMPUTE_32I HIPBLAS_COMPUTE_32I #define CUDA_R_32F HIP_R_32F #define CUDA_R_16BF HIP_R_16BF -#define cublasGemmEx hipblasGemmEx_v2 +#define cublasGemmEx hipblasGemmEx #define CUDA_R_8I HIP_R_8I #define CUDA_R_32I HIP_R_32I #define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT #define cublasSgemmStridedBatched hipblasSgemmStridedBatched -#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx_v2 +#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx #else #include #include #endif #include +#include +#include +#include +#include #include "cuda/helpers.h" #include "type_dispatch.h" @@ -517,7 +521,7 @@ namespace ctranslate2 { } // cuBLAS assumes column-major storage, so swap a and b accordingly. - CUBLAS_CHECK(cublasGemmEx(cuda::get_cublas_handle(), + CUBLAS_CHECK(hipblasGemmEx(cuda::get_cublas_handle(), transpose_b ? CUBLAS_OP_T : CUBLAS_OP_N, transpose_a ? CUBLAS_OP_T : CUBLAS_OP_N, n, m, k, @@ -572,7 +576,7 @@ namespace ctranslate2 { int32_t beta_i = beta; // cuBLAS assumes column-major storage, so swap a and b accordingly. - CUBLAS_CHECK(cublasGemmEx(cuda::get_cublas_handle(), + CUBLAS_CHECK(hipblasGemmEx(cuda::get_cublas_handle(), transpose_b ? CUBLAS_OP_T : CUBLAS_OP_N, transpose_a ? CUBLAS_OP_T : CUBLAS_OP_N, n, m, k, @@ -632,7 +636,7 @@ namespace ctranslate2 { } // cuBLAS assumes column-major storage, so swap a and b accordingly. - CUBLAS_CHECK(cublasGemmStridedBatchedEx(cuda::get_cublas_handle(), + CUBLAS_CHECK(hipblasGemmStridedBatchedEx(cuda::get_cublas_handle(), transpose_b ? CUBLAS_OP_T : CUBLAS_OP_N, transpose_a ? CUBLAS_OP_T : CUBLAS_OP_N, n, m, k, diff --git a/src/ops/gumbel_max_gpu.cu b/src/ops/gumbel_max_gpu.cu index bb0c0e939..d368b890b 100644 --- a/src/ops/gumbel_max_gpu.cu +++ b/src/ops/gumbel_max_gpu.cu @@ -1,5 +1,8 @@ #include "ctranslate2/ops/gumbel_max.h" +#include +#include + #include "type_dispatch.h" #include "cuda/helpers.h" #include "cuda/random.h"