diff --git a/clang/include/clang/Basic/BuiltinsSPIRV.td b/clang/include/clang/Basic/BuiltinsSPIRV.td index cc0c2f960f8d2..8fdd1ba6af3b5 100644 --- a/clang/include/clang/Basic/BuiltinsSPIRV.td +++ b/clang/include/clang/Basic/BuiltinsSPIRV.td @@ -37,3 +37,9 @@ def SPIRVFaceForward : Builtin { let Attributes = [NoThrow, Const, CustomTypeChecking]; let Prototype = "void(...)"; } + +def SPIRVGlobalInvoc : Builtin { + let Spellings = ["__builtin_spirv_global_invocation_id"]; + let Attributes = [NoThrow, Const]; + let Prototype = "size_t(int)"; +} \ No newline at end of file diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp index 5c076f694dfa4..c404cd2ae4a39 100644 --- a/clang/lib/Basic/Targets/SPIR.cpp +++ b/clang/lib/Basic/Targets/SPIR.cpp @@ -35,7 +35,7 @@ static constexpr Builtin::Info BuiltinInfos[] = { static_assert(std::size(BuiltinInfos) == NumBuiltins); llvm::SmallVector -SPIRVTargetInfo::getTargetBuiltins() const { +BaseSPIRTargetInfo::getTargetBuiltins() const { return {{&BuiltinStrings, BuiltinInfos}}; } diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 0d82886bb139b..0652e51207a81 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -167,9 +167,7 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { // memcpy as per section 3 of the SPIR spec. bool useFP16ConversionIntrinsics() const override { return false; } - llvm::SmallVector getTargetBuiltins() const override { - return {}; - } + llvm::SmallVector getTargetBuiltins() const override; std::string_view getClobbers() const override { return ""; } @@ -429,7 +427,6 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo { "v256:256-v512:512-v1024:1024-n8:16:32:64-G10"); } - llvm::SmallVector getTargetBuiltins() const override; void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4f1a36f6983c2..f95525cc176fa 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -126,8 +126,9 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::spirv: return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E); case llvm::Triple::spirv64: + case llvm::Triple::spir64: if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA) - return nullptr; + return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E); return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); default: return nullptr; diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index a708b3aea129d..197556cb3c513 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -368,16 +368,17 @@ llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B, } if (D.hasAttr()) { llvm::Function *ThreadIDIntrinsic = - CGM.getIntrinsic(getThreadIdIntrinsic()); + CGM.getIntrinsic(getThreadIdIntrinsic(), CGM.Int32Ty); return buildVectorInput(B, ThreadIDIntrinsic, Ty); } if (D.hasAttr()) { llvm::Function *GroupThreadIDIntrinsic = - CGM.getIntrinsic(getGroupThreadIdIntrinsic()); + CGM.getIntrinsic(getGroupThreadIdIntrinsic(), CGM.Int32Ty); return buildVectorInput(B, GroupThreadIDIntrinsic, Ty); } if (D.hasAttr()) { - llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(getGroupIdIntrinsic()); + llvm::Function *GroupIDIntrinsic = + CGM.getIntrinsic(getGroupIdIntrinsic(), CGM.Int32Ty); return buildVectorInput(B, GroupIDIntrinsic, Ty); } assert(false && "Unhandled parameter attribute"); diff --git a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp index 26f8eb1fd07f8..6a95c163719cf 100644 --- a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp @@ -83,6 +83,12 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/N->getType(), Intrinsic::spv_faceforward, ArrayRef{N, I, Ng}, /*FMFSource=*/nullptr, "spv.faceforward"); } + case SPIRV::BI__builtin_spirv_global_invocation_id: + return Builder.CreateIntrinsic( + /*ReturnType=*/getTypes().ConvertType(E->getType()), + Intrinsic::spv_thread_id, + ArrayRef{EmitScalarExpr(E->getArg(0))}, nullptr, + "spv.thread.id"); } return nullptr; } diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 4245fb0f658c9..19a3eb03d1063 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -564,7 +564,7 @@ ATTR_SYCL_KERNEL void kernel_single_task_2017(KernelType kernelFunc) { // #Kerne } template -ATTR_SYCL_KERNEL void +__attribute__((sycl_kernel)) void kernel_parallel_for(const KernelType &KernelFunc) { KernelFunc(id()); } diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 8d984d6ce58df..a60252f6e0886 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -59,10 +59,24 @@ let TargetPrefix = "spv" in { NoCapture>, ImmArg>]>; - // The following intrinsic(s) are mirrored from IntrinsicsDirectX.td for HLSL support. - def int_spv_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; - def int_spv_group_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; - def int_spv_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + // Ideally we should use the SPIR-V terminology for SPIR-V intrinsics. + def int_spv_thread_id : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_group_id : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_thread_id_in_group : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_workgroup_size : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_global_size : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_global_offset : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_num_workgroups : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_subgroup_size : ClangBuiltin<"__builtin_spirv_subgroup_size">, + Intrinsic<[llvm_i32_ty], [], [NoUndef, IntrNoMem, IntrWillReturn]>; + def int_spv_num_subgroups : ClangBuiltin<"__builtin_spirv_num_subgroups">, + Intrinsic<[llvm_i32_ty], [], [NoUndef, IntrNoMem, IntrWillReturn]>; + def int_spv_subgroup_id : ClangBuiltin<"__builtin_spirv_subgroup_id">, + Intrinsic<[llvm_i32_ty], [], [NoUndef, IntrNoMem, IntrWillReturn]>; + def int_spv_subgroup_local_invocation_id : ClangBuiltin<"__builtin_spirv_subgroup_local_invocation_id">, + Intrinsic<[llvm_i32_ty], [], [NoUndef, IntrNoMem, IntrWillReturn]>; + def int_spv_subgroup_max_size : ClangBuiltin<"__builtin_spirv_subgroup_max_size">, + Intrinsic<[llvm_i32_ty], [], [NoUndef, IntrNoMem, IntrWillReturn]>; def int_spv_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrWillReturn]>; def int_spv_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp index dabb5fe006b3c..7b21e0ee27992 100644 --- a/llvm/lib/IR/Intrinsics.cpp +++ b/llvm/lib/IR/Intrinsics.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsXCore.h" diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index a910c882af1b6..2b955c5d96bc5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3056,6 +3056,32 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, // a `LocalInvocationIndex` builtin variable return loadBuiltinInputID(SPIRV::BuiltIn::LocalInvocationIndex, ResVReg, ResType, I); + case Intrinsic::spv_workgroup_size: + return loadVec3BuiltinInputID(SPIRV::BuiltIn::WorkgroupSize, ResVReg, + ResType, I); + case Intrinsic::spv_global_size: + return loadVec3BuiltinInputID(SPIRV::BuiltIn::GlobalSize, ResVReg, ResType, + I); + case Intrinsic::spv_global_offset: + return loadVec3BuiltinInputID(SPIRV::BuiltIn::GlobalOffset, ResVReg, + ResType, I); + case Intrinsic::spv_num_workgroups: + return loadVec3BuiltinInputID(SPIRV::BuiltIn::NumWorkgroups, ResVReg, + ResType, I); + case Intrinsic::spv_subgroup_size: + return loadBuiltinInputID(SPIRV::BuiltIn::SubgroupSize, ResVReg, ResType, + I); + case Intrinsic::spv_num_subgroups: + return loadBuiltinInputID(SPIRV::BuiltIn::NumSubgroups, ResVReg, ResType, + I); + case Intrinsic::spv_subgroup_id: + return loadBuiltinInputID(SPIRV::BuiltIn::SubgroupId, ResVReg, ResType, I); + case Intrinsic::spv_subgroup_local_invocation_id: + return loadBuiltinInputID(SPIRV::BuiltIn::SubgroupLocalInvocationId, + ResVReg, ResType, I); + case Intrinsic::spv_subgroup_max_size: + return loadBuiltinInputID(SPIRV::BuiltIn::SubgroupMaxSize, ResVReg, ResType, + I); case Intrinsic::spv_fdot: return selectFloatDot(ResVReg, ResType, I); case Intrinsic::spv_udot: @@ -3983,13 +4009,13 @@ bool SPIRVInstructionSelector::selectLog10(Register ResVReg, // Generate the instructions to load 3-element vector builtin input // IDs/Indices. // Like: GlobalInvocationId, LocalInvocationId, etc.... + bool SPIRVInstructionSelector::loadVec3BuiltinInputID( SPIRV::BuiltIn::BuiltIn BuiltInValue, Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { MachineIRBuilder MIRBuilder(I); - const SPIRVType *U32Type = GR.getOrCreateSPIRVIntegerType(32, MIRBuilder); const SPIRVType *Vec3Ty = - GR.getOrCreateSPIRVVectorType(U32Type, 3, MIRBuilder, false); + GR.getOrCreateSPIRVVectorType(ResType, 3, MIRBuilder, false); const SPIRVType *PtrType = GR.getOrCreateSPIRVPointerType( Vec3Ty, MIRBuilder, SPIRV::StorageClass::Input); diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll index 2b2ce0974216c..d0d411d2f981d 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll @@ -37,21 +37,21 @@ entry: ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#GlobalInvocationId]] ; CHECK: %[[#load0:]] = OpCompositeExtract %[[#int]] %[[#load]] 0 - %0 = call i32 @llvm.spv.thread.id(i32 0) + %0 = call i32 @llvm.spv.thread.id.i32(i32 0) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load0]] %[[#tempvar]] 0 %1 = insertelement <3 x i32> poison, i32 %0, i64 0 ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#GlobalInvocationId]] ; CHECK: %[[#load1:]] = OpCompositeExtract %[[#int]] %[[#load]] 1 - %2 = call i32 @llvm.spv.thread.id(i32 1) + %2 = call i32 @llvm.spv.thread.id.i32(i32 1) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load1]] %[[#tempvar]] 1 %3 = insertelement <3 x i32> %1, i32 %2, i64 1 ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#GlobalInvocationId]] ; CHECK: %[[#load2:]] = OpCompositeExtract %[[#int]] %[[#load]] 2 - %4 = call i32 @llvm.spv.thread.id(i32 2) + %4 = call i32 @llvm.spv.thread.id.i32(i32 2) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load2]] %[[#tempvar]] 2 %5 = insertelement <3 x i32> %3, i32 %4, i64 2 @@ -61,7 +61,7 @@ entry: } ; Function Attrs: nounwind willreturn memory(none) -declare i32 @llvm.spv.thread.id(i32) #2 +declare i32 @llvm.spv.thread.id.i32(i32) #2 attributes #0 = { noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #1 = { norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll index bb7650810e989..5b9a7bc02d486 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll @@ -21,21 +21,21 @@ entry: ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#WorkgroupId]] ; CHECK: %[[#load0:]] = OpCompositeExtract %[[#int]] %[[#load]] 0 - %1 = call i32 @llvm.spv.group.id(i32 0) + %1 = call i32 @llvm.spv.group.id.i32(i32 0) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load0]] %[[#tempvar]] %2 = insertelement <3 x i32> poison, i32 %1, i64 0 ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#WorkgroupId]] ; CHECK: %[[#load1:]] = OpCompositeExtract %[[#int]] %[[#load]] 1 - %3 = call i32 @llvm.spv.group.id(i32 1) + %3 = call i32 @llvm.spv.group.id.i32(i32 1) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load1]] %[[#tempvar]] 1 %4 = insertelement <3 x i32> %2, i32 %3, i64 1 ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#WorkgroupId]] ; CHECK: %[[#load2:]] = OpCompositeExtract %[[#int]] %[[#load]] 2 - %5 = call i32 @llvm.spv.group.id(i32 2) + %5 = call i32 @llvm.spv.group.id.i32(i32 2) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load2]] %[[#tempvar]] 2 %6 = insertelement <3 x i32> %4, i32 %5, i64 2 @@ -45,7 +45,7 @@ entry: } ; Function Attrs: nounwind willreturn memory(none) -declare i32 @llvm.spv.group.id(i32) #3 +declare i32 @llvm.spv.group.id.i32(i32) #3 attributes #1 = { convergent noinline norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #3 = { nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupThreadID.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupThreadID.ll index 4e31d3fb77411..f058a539a2263 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupThreadID.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupThreadID.ll @@ -37,21 +37,21 @@ entry: ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#LocalInvocationId]] ; CHECK: %[[#load0:]] = OpCompositeExtract %[[#int]] %[[#load]] 0 - %0 = call i32 @llvm.spv.thread.id.in.group(i32 0) + %0 = call i32 @llvm.spv.thread.id.in.group.i32(i32 0) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load0]] %[[#tempvar]] 0 %1 = insertelement <3 x i32> poison, i32 %0, i64 0 ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#LocalInvocationId]] ; CHECK: %[[#load1:]] = OpCompositeExtract %[[#int]] %[[#load]] 1 - %2 = call i32 @llvm.spv.thread.id.in.group(i32 1) + %2 = call i32 @llvm.spv.thread.id.in.group.i32(i32 1) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load1]] %[[#tempvar]] 1 %3 = insertelement <3 x i32> %1, i32 %2, i64 1 ; CHECK: %[[#load:]] = OpLoad %[[#v3int]] %[[#LocalInvocationId]] ; CHECK: %[[#load2:]] = OpCompositeExtract %[[#int]] %[[#load]] 2 - %4 = call i32 @llvm.spv.thread.id.in.group(i32 2) + %4 = call i32 @llvm.spv.thread.id.in.group.i32(i32 2) ; CHECK: %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load2]] %[[#tempvar]] 2 %5 = insertelement <3 x i32> %3, i32 %4, i64 2 @@ -61,7 +61,7 @@ entry: } ; Function Attrs: nounwind willreturn memory(none) -declare i32 @llvm.spv.thread.id.in.group(i32) #2 +declare i32 @llvm.spv.thread.id.in.group.i32(i32) #2 attributes #0 = { noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #1 = { norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/sycl/include/sycl/__spirv/spirv_vars.hpp b/sycl/include/sycl/__spirv/spirv_vars.hpp index 285f5e37e37e0..3f564dff34d67 100644 --- a/sycl/include/sycl/__spirv/spirv_vars.hpp +++ b/sycl/include/sycl/__spirv/spirv_vars.hpp @@ -58,7 +58,6 @@ __DPCPP_SYCL_EXTERNAL uint32_t __spirv_SubgroupLocalInvocationId(); typedef size_t size_t_vec __attribute__((ext_vector_type(3))); __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalSize; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInNumWorkgroups; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; @@ -78,13 +77,13 @@ __SPIRV_VAR_QUALIFIERS __ocl_vec_t __spirv_BuiltInSubgroupLeMask; __SPIRV_VAR_QUALIFIERS __ocl_vec_t __spirv_BuiltInSubgroupLtMask; __DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalInvocationId_x() { - return __spirv_BuiltInGlobalInvocationId.x; + return __builtin_spirv_global_invocation_id(0); } __DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalInvocationId_y() { - return __spirv_BuiltInGlobalInvocationId.y; + return __builtin_spirv_global_invocation_id(1); } __DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalInvocationId_z() { - return __spirv_BuiltInGlobalInvocationId.z; + return __builtin_spirv_global_invocation_id(2); } __DPCPP_SYCL_EXTERNAL inline size_t __spirv_GlobalSize_x() {