diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 047f413d39ba5..1dff268f3ca95 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -968,6 +968,11 @@ void CudaToolChain::addClangTargetOptions( "--nvptx-prec-sqrtf32=0"}); CC1Args.append({"-mllvm", "-enable-memcpyopt-without-libcalls"}); + + if (DriverArgs.hasFlag(options::OPT_fsycl_id_queries_fit_in_int, + options::OPT_fno_sycl_id_queries_fit_in_int, false)) + CC1Args.append( + {"-mllvm", "-nvvm-reflect-add=__CUDA_ID_QUERIES_FIT_IN_INT=1"}); } else { CC1Args.append({"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls", diff --git a/clang/test/Driver/sycl-nvptx-id-queries-fit-in-int.cpp b/clang/test/Driver/sycl-nvptx-id-queries-fit-in-int.cpp new file mode 100644 index 0000000000000..7f040322c7a0e --- /dev/null +++ b/clang/test/Driver/sycl-nvptx-id-queries-fit-in-int.cpp @@ -0,0 +1,16 @@ +// REQUIRES: nvptx-registered-target + +// RUN: %clang -### -nocudalib \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s + +// RUN: %clang -### -nocudalib \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fno-sycl-id-queries-fit-in-int %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s + +// RUN: %clang -### -nocudalib \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fsycl-id-queries-fit-in-int %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-INT %s + +// CHECK-INT: "-mllvm" "-nvvm-reflect-add=__CUDA_ID_QUERIES_FIT_IN_INT=1" +// CHECK-DEFAULT-NOT: "-nvvm-reflect-add=__CUDA_ID_QUERIES_FIT_IN_INT=1" diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl index a4b3c0df76b23..c87d2679fd28e 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl @@ -8,17 +8,31 @@ #include +extern int __nvvm_reflect_ocl(constant char *); + _CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_x() { + if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { + return (uint)__spirv_WorkgroupId_x() * (uint)__spirv_WorkgroupSize_x() + + (uint)__spirv_LocalInvocationId_x() + (uint)__spirv_GlobalOffset_x(); + } return __spirv_WorkgroupId_x() * __spirv_WorkgroupSize_x() + __spirv_LocalInvocationId_x() + __spirv_GlobalOffset_x(); } _CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_y() { + if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { + return (uint)__spirv_WorkgroupId_y() * (uint)__spirv_WorkgroupSize_y() + + (uint)__spirv_LocalInvocationId_y() + (uint)__spirv_GlobalOffset_y(); + } return __spirv_WorkgroupId_y() * __spirv_WorkgroupSize_y() + __spirv_LocalInvocationId_y() + __spirv_GlobalOffset_y(); } _CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_z() { + if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { + return (uint)__spirv_WorkgroupId_z() * (uint)__spirv_WorkgroupSize_z() + + (uint)__spirv_LocalInvocationId_z() + (uint)__spirv_GlobalOffset_z(); + } return __spirv_WorkgroupId_z() * __spirv_WorkgroupSize_z() + __spirv_LocalInvocationId_z() + __spirv_GlobalOffset_z(); }