Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/cuda/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#ifdef CT2_USE_HIP
#include <hip/hip_fp16.h>
#include <hip/hip_bf16.h>
#include <hip/hip_runtime.h>

#define __nv_bfloat16 __hip_bfloat16
__device__ inline void __syncwarp(uint32_t mask){} //TODO: 6.1 should have this but it doesn't?
Expand Down Expand Up @@ -414,7 +415,7 @@ namespace ctranslate2 {
// They help define row-wise reduction where each block handles a single row.

#ifdef CT2_USE_HIP
#define C10_WARP_SIZE 64 //TODO: detect arch to set 32 for rdna
#define C10_WARP_SIZE warpSize
#else
#define C10_WARP_SIZE 32
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/ops/conv1d_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ namespace ctranslate2 {
size_t workspace_size = 0;
void* workspace = nullptr;
CUDNN_CHECK(miopenConvolutionForwardGetWorkSpaceSize(handle,
input_desc,
weight_desc,
input_desc,
conv_desc,
output_desc,
&workspace_size));
Expand Down