diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0747eda3addb4..6a10a02d877a8 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1607,6 +1607,13 @@ def SYCLDevice : InheritableAttr { let Documentation = [SYCLDeviceDocs]; } +def SYCLDeviceOnly : InheritableAttr { + let Spellings = [GNU<"sycl_device_only">]; + let Subjects = SubjectList<[Function]>; + let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; + let Documentation = [SYCLDeviceOnlyDocs]; +} + def SYCLGlobalVar : InheritableAttr { let Spellings = [GNU<"sycl_global_var">]; let Subjects = SubjectList<[GlobalStorageNonLocalVar], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 5234c7ee02fff..000a0e522e8cc 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4518,6 +4518,20 @@ implicitly inherit this attribute. }]; } +def SYCLDeviceOnlyDocs : Documentation { + let Category = DocCatFunction; + let Heading = "sycl_device_only"; + let Content = [{ +This attribute can only be applied to functions and indicates that the function +is only available for the device. It allows functions marked with it to +overload existing functions without the attribute, in which case the overload +with the attribute will be used on the device side and the overload without +will be used on the host side. Note: as opposed to ``sycl_device`` this does +not mark the function as being exported, both attributes can be used together +if needed. + }]; +} + def RISCVInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (RISC-V)"; diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index fae15742b52ab..5a5cf72091a88 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3729,6 +3729,13 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; + // SYCL doesn't have a device-side standard library. SYCLDeviceOnlyAttr may + // be used to provide device-side definitions of standard functions, so + // anything with that attribute shouldn't be treated as a builtin. + if (Context.getLangOpts().isSYCL() && hasAttr()) { + return 0; + } + // As AMDGCN implementation of OpenMP does not have a device-side standard // library, none of the predefined library functions except printf and malloc // should be treated as a builtin i.e. 0 should be returned for them. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c7db24b8d95ff..57522a049bb00 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2782,10 +2782,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, GenerateIntrinsics = ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; } - bool IsSYCLDeviceWithoutIntrinsics = - getLangOpts().SYCLIsDevice && - (getTarget().getTriple().isNVPTX() || getTarget().getTriple().isAMDGCN()); - if (GenerateIntrinsics && !IsSYCLDeviceWithoutIntrinsics) { + if (GenerateIntrinsics) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIacos: case Builtin::BIacosf: @@ -3885,7 +3882,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_modf: case Builtin::BI__builtin_modff: case Builtin::BI__builtin_modfl: - if (Builder.getIsFPConstrained() || IsSYCLDeviceWithoutIntrinsics) + if (Builder.getIsFPConstrained()) break; // TODO: Emit constrained modf intrinsic once one exists. return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf)); case Builtin::BI__builtin_isgreater: diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index b2b1b72454f80..4a077f3fbf5de 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4323,6 +4323,12 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + // Don't emit 'sycl_device_only' function in SYCL host compilation. + if (LangOpts.SYCLIsHost && isa(Global) && + Global->hasAttr()) { + return; + } + if (LangOpts.OpenMP) { // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) @@ -4412,6 +4418,46 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + // When using SYCLDeviceOnlyAttr, there can be two functions with the same + // mangling, the host function and the device overload. So when compiling for + // device we need to make sure we're selecting the SYCLDeviceOnlyAttr + // overload and dropping the host overload. + if (LangOpts.SYCLIsDevice) { + StringRef MangledName = getMangledName(GD); + auto DDI = DeferredDecls.find(MangledName); + // If we have an existing declaration with the same mangling for this + // symbol it may be a SYCLDeviceOnlyAttr case. + if (DDI != DeferredDecls.end()) { + auto *PreviousGlobal = cast(DDI->second.getDecl()); + + // If the host declaration was already processed and the device only + // declaration is also a sycl external declaration, remove the host + // variant and skip. The device only variant will be generated later + // as it's marked sycl external. + if (!PreviousGlobal->hasAttr() && + Global->hasAttr() && + Global->hasAttr()) { + DeferredDecls.erase(DDI); + return; + } + + // If the host declaration was already processed, replace it with the + // device only declaration. + if (!PreviousGlobal->hasAttr() && + Global->hasAttr()) { + DeferredDecls[MangledName] = GD; + return; + } + + // If the device only declaration was already processed, skip the + // host declaration. + if (PreviousGlobal->hasAttr() && + !Global->hasAttr()) { + return; + } + } + } + // clang::ParseAST ensures that we emit the SYCL devices at the end, so // anything that is a device (or indirectly called) will be handled later. if (LangOpts.SYCLIsDevice && MustBeEmitted(Global)) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4bdab4b65d375..32c800c247728 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7354,6 +7354,10 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) { if (S.getLangOpts().CUDA && (D->template hasAttr() || D->template hasAttr())) return false; + + // So does SYCL's device_only attribute. + if (S.getLangOpts().isSYCL() && D->template hasAttr()) + return false; } return D->isExternC(); } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e6f6c547113be..b076cb60db269 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7224,6 +7224,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_SYCLDevice: S.SYCL().handleSYCLDeviceAttr(D, AL); break; + case ParsedAttr::AT_SYCLDeviceOnly: + handleSimpleAttribute(S, D, AL); + break; case ParsedAttr::AT_SYCLScope: S.SYCL().handleSYCLScopeAttr(D, AL); break; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 15294a11d4ecd..91c9106be81c1 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1629,6 +1629,14 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, } } + // Allow overloads with SYCLDeviceOnlyAttr + if (SemaRef.getLangOpts().isSYCL()) { + if (Old->hasAttr() != + New->hasAttr()) { + return true; + } + } + // The signatures match; this is not an overload. return false; } @@ -11020,6 +11028,15 @@ bool clang::isBetterOverloadCandidate( S.CUDA().IdentifyPreference(Caller, Cand2.Function); } + // In SYCL device compilation mode prefer the overload with the + // SYCLDeviceOnly attribute. + if (S.getLangOpts().SYCLIsDevice && Cand1.Function && Cand2.Function) { + if (Cand1.Function->hasAttr() != + Cand2.Function->hasAttr()) { + return Cand1.Function->hasAttr(); + } + } + // General member function overloading is handled above, so this only handles // constructors with address spaces. // This only handles address spaces since C++ has no other @@ -11374,6 +11391,15 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl( if (S.getLangOpts().CUDA) CudaExcludeWrongSideCandidates(S, Candidates); + // In SYCL host compilation remove candidates marked SYCLDeviceOnly. + if (S.getLangOpts().SYCLIsHost) { + auto IsDeviceCand = [&](const OverloadCandidate *Cand) { + return Cand->Viable && Cand->Function && + Cand->Function->hasAttr(); + }; + llvm::erase_if(Candidates, IsDeviceCand); + } + Best = end(); for (auto *Cand : Candidates) { Cand->Best = false; diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp new file mode 100644 index 0000000000000..6196f5b6744bb --- /dev/null +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKD +// RUN: %clang_cc1 -fsycl-is-host -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKH +// Test code generation for sycl_device_only attribute. + +// Verify that the device overload is used on device. +// +// CHECK-LABEL: _Z3fooi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int foo(int a) { return a + 10; } +__attribute__((sycl_device_only)) int foo(int a) { return a + 20; } + +// Use a `sycl_device` function as entry point +__attribute__((sycl_device)) int bar(int b) { return foo(b); } + +// Verify that the order of declaration doesn't change the behavior. +// +// CHECK-LABEL: _Z3fooswapi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +__attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; } +int fooswap(int a) { return a + 10; } + +// Use a `sycl_device` function as entry point. +__attribute__((sycl_device)) int barswap(int b) { return fooswap(b); } + +// Verify that in extern C the attribute enables mangling. +extern "C" { +// CHECK-LABEL: _Z3fooci +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int fooc(int a) { return a + 10; } +__attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } + +// Use a `sycl_device` function as entry point. +__attribute__((sycl_device)) int barc(int b) { return fooc(b); } +} + +// Verify that both attributes can work together. +// +// CHECK-LABEL: _Z3fooai +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int fooa(int a) { return a + 10; } +__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { + return a + 20; +} + +// Use a `sycl_device` function as entry point. +__attribute__((sycl_device)) int bara(int b) { return fooa(b); } + +// Verify that the order of declaration doesn't change the behavior when using +// both attributes. +// +// CHECK-LABEL: _Z3fooaswapi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { + return a + 20; +} +int fooaswap(int a) { return a + 10; } + +// Use a `sycl_device` function as entry point. +__attribute__((sycl_device)) int baraswap(int b) { return fooaswap(b); } diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp deleted file mode 100644 index 5c282449dc851..0000000000000 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// SYCL compilation uses libdevice in order to implement platform specific -// versions of funcs like cosf, logf, etc. In order for the libdevice funcs -// to be used, we need to make sure that llvm intrinsics such as llvm.cos.f32 -// are not emitted since many backends do not have lowerings for such -// intrinsics. This allows the driver to link in the libdevice definitions for -// cosf etc. later in the driver flow. - -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s - -#include "Inputs/sycl.hpp" - -extern "C" { -float scalbnf(float x, int n); -float logf(float x); -float expf(float x); -float frexpf(float x, int *exp); -float ldexpf(float x, int exp); -float log10f(float x); -float modff(float x, float *intpart); -float exp2f(float x); -float expm1f(float x); -int ilogbf(float x); -float log1pf(float x); -float log2f(float x); -float logbf(float x); -float sqrtf(float x); -float cbrtf(float x); -float hypotf(float x, float y); -float erff(float x); -float erfcf(float x); -float tgammaf(float x); -float lgammaf(float x); -float fmodf(float x, float y); -float remainderf(float x, float y); -float remquof(float x, float y, int *q); -float nextafterf(float x, float y); -float fdimf(float x, float y); -float fmaf(float x, float y, float z); -float sinf(float x); -float cosf(float x); -float tanf(float x); -float powf(float x, float y); -float acosf(float x); -float asinf(float x); -float atanf(float x); -float atan2f(float x, float y); -float coshf(float x); -float sinhf(float x); -float tanhf(float x); -float acoshf(float x); -float asinhf(float x); -float atanhf(float x); -}; - -// CHECK-NOT: llvm.abs. -// CHECK-NOT: llvm.scalbnf. -// CHECK-NOT: llvm.log. -// CHECK-NOT: llvm.exp. -// CHECK-NOT: llvm.frexp. -// CHECK-NOT: llvm.ldexp. -// CHECK-NOT: llvm.log10. -// CHECK-NOT: llvm.mod. -// CHECK-NOT: llvm.exp2. -// CHECK-NOT: llvm.expm1. -// CHECK-NOT: llvm.ilogb. -// CHECK-NOT: llvm.log1p. -// CHECK-NOT: llvm.log2. -// CHECK-NOT: llvm.logb. -// CHECK-NOT: llvm.sqrt. -// CHECK-NOT: llvm.cbrt. -// CHECK-NOT: llvm.hypot. -// CHECK-NOT: llvm.erf. -// CHECK-NOT: llvm.erfc. -// CHECK-NOT: llvm.tgamma. -// CHECK-NOT: llvm.lgamma. -// CHECK-NOT: llvm.fmod. -// CHECK-NOT: llvm.remainder. -// CHECK-NOT: llvm.remquo. -// CHECK-NOT: llvm.nextafter. -// CHECK-NOT: llvm.fdim. -// CHECK-NOT: llvm.fma. -// CHECK-NOT: llvm.sin. -// CHECK-NOT: llvm.cos. -// CHECK-NOT: llvm.tan. -// CHECK-NOT: llvm.pow. -// CHECK-NOT: llvm.acos. -// CHECK-NOT: llvm.asin. -// CHECK-NOT: llvm.atan. -// CHECK-NOT: llvm.atan2. -// CHECK-NOT: llvm.cosh. -// CHECK-NOT: llvm.sinh. -// CHECK-NOT: llvm.tanh. -// CHECK-NOT: llvm.acosh. -// CHECK-NOT: llvm.asinh. -// CHECK-NOT: llvm.atanh. -void sycl_kernel(float *a, int *b) { - sycl::queue{}.submit([&](sycl::handler &cgh) { - cgh.single_task([=]() { - a[0] = scalbnf(a[0], b[0]); - a[0] = logf(a[0]); - a[0] = expf(a[0]); - a[0] = frexpf(a[0], b); - a[0] = ldexpf(a[0], b[0]); - a[0] = log10f(a[0]); - a[0] = modff(a[0], a); - a[0] = exp2f(a[0]); - a[0] = expm1f(a[0]); - a[0] = ilogbf(a[0]); - a[0] = log1pf(a[0]); - a[0] = log2f(a[0]); - a[0] = logbf(a[0]); - a[0] = sqrtf(a[0]); - a[0] = cbrtf(a[0]); - a[0] = hypotf(a[0], a[0]); - a[0] = erff(a[0]); - a[0] = erfcf(a[0]); - a[0] = tgammaf(a[0]); - a[0] = lgammaf(a[0]); - a[0] = fmodf(a[0], a[0]); - a[0] = remainderf(a[0], a[0]); - a[0] = remquof(a[0], a[0], b); - a[0] = nextafterf(a[0], a[0]); - a[0] = fdimf(a[0], a[0]); - a[0] = fmaf(a[0], a[0], a[0]); - a[0] = sinf(a[0]); - a[0] = cosf(a[0]); - a[0] = tanf(a[0]); - a[0] = powf(a[0], a[0]); - a[0] = acosf(a[0]); - a[0] = asinf(a[0]); - a[0] = atanf(a[0]); - a[0] = atan2f(a[0], a[0]); - a[0] = coshf(a[0]); - a[0] = sinhf(a[0]); - a[0] = tanhf(a[0]); - a[0] = acoshf(a[0]); - a[0] = asinhf(a[0]); - a[0] = atanhf(a[0]); - }); - }); -} diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index d04cf791e88bf..c23d9acb98826 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -190,6 +190,7 @@ // CHECK-NEXT: SYCLDeviceGlobal (SubjectMatchRule_record) // CHECK-NEXT: SYCLDeviceHas (SubjectMatchRule_function) // CHECK-NEXT: SYCLDeviceIndirectlyCallable (SubjectMatchRule_function) +// CHECK-NEXT: SYCLDeviceOnly (SubjectMatchRule_function) // CHECK-NEXT: SYCLGlobalVariableAllowed (SubjectMatchRule_record) // CHECK-NEXT: SYCLIntelDisableLoopPipelining (SubjectMatchRule_function) // CHECK-NEXT: SYCLIntelInitiationInterval (SubjectMatchRule_function) diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index d59395b2d0994..a084a86883767 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,8 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -190,25 +189,7 @@ float asinhf(float x) { return __devicelib_asinhf(x); } DEVICE_EXTERN_C_INLINE float atanhf(float x) { return __devicelib_atanhf(x); } -#ifdef __NVPTX__ -extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); -DEVICE_EXTERN_C_INLINE -float nearbyintf(float x) { return __nv_nearbyintf(x); } - -extern "C" SYCL_EXTERNAL float __nv_rintf(float); -DEVICE_EXTERN_C_INLINE -float rintf(float x) { return __nv_rintf(x); } -#elif defined(__AMDGCN__) -extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); -DEVICE_EXTERN_C_INLINE -float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } - -extern "C" SYCL_EXTERNAL float __ocml_rint_f32(float); -DEVICE_EXTERN_C_INLINE -float rintf(float x) { return __ocml_rint_f32(x); } -#else DEVICE_EXTERN_C_INLINE float rintf(float x) { return __spirv_ocl_rint(x); } -#endif -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 720982799ea71..855317bcf3f4b 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,8 +9,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -180,26 +179,8 @@ double atanh(double x) { return __devicelib_atanh(x); } DEVICE_EXTERN_C_INLINE double scalbn(double x, int exp) { return __devicelib_scalbn(x, exp); } -#ifdef __NVPTX__ -extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); -DEVICE_EXTERN_C_INLINE -double nearbyint(double x) { return __nv_nearbyint(x); } - -extern "C" SYCL_EXTERNAL double __nv_rint(double); -DEVICE_EXTERN_C_INLINE -double rint(double x) { return __nv_rint(x); } -#elif defined(__AMDGCN__) -extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); -DEVICE_EXTERN_C_INLINE -double nearbyint(double x) { return __ocml_nearbyint_f64(x); } - -extern "C" SYCL_EXTERNAL double __ocml_rint_f64(double); -DEVICE_EXTERN_C_INLINE -double rint(double x) { return __ocml_rint_f64(x); } -#else DEVICE_EXTERN_C_INLINE double rint(double x) { return __spirv_ocl_rint(x); } -#endif #if defined(_MSC_VER) #include @@ -508,4 +489,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 97cb4cf67b4c7..d930ea014ac24 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,8 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -215,4 +214,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index c25eadf6394a1..eeb8261523d1a 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -1,3 +1,11 @@ +//==------------------------ cmath -----------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #pragma once // Include real STL header - the next one from the include search @@ -16,6 +24,10 @@ // *** *** +#if defined(__NVPTX__) || defined(__AMDGCN__) +#include "cmath-fallback.h" +#endif + #include #ifdef __SYCL_DEVICE_ONLY__ @@ -105,6 +117,8 @@ extern __DPCPP_SYCL_EXTERNAL double atanh(double x); extern __DPCPP_SYCL_EXTERNAL double frexp(double x, int *exp); extern __DPCPP_SYCL_EXTERNAL double ldexp(double x, int exp); extern __DPCPP_SYCL_EXTERNAL double hypot(double x, double y); +extern __DPCPP_SYCL_EXTERNAL float rintf(float x); +extern __DPCPP_SYCL_EXTERNAL double rint(double x); } #ifdef __GLIBC__ diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h new file mode 100644 index 0000000000000..abf1a16aae8b8 --- /dev/null +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -0,0 +1,541 @@ +//==------------- cmath-fallback.h -----------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CMATH_FALLBACK_H__ +#define __CMATH_FALLBACK_H__ + +// This header defines device-side overloads of functions based on +// their equivalent __spirv_ built-ins. + +#ifdef __SYCL_DEVICE_ONLY__ + +// The 'sycl_device_only' attribute enables device-side overloading. +#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) +#define __DPCPP_SYCL_DEVICE_C \ + extern "C" __attribute__((sycl_device_only, always_inline)) + +// Promotion templates: the C++ standard library provides overloads that allow +// arguments of math functions to be promoted. Any floating-point argument is +// allowed to accept any integer type, which should then be promoted to double. +// When multiple floating point arguments are available passing arguments with +// different precision should promote to the larger type. The template helpers +// below provide the machinery to define these promoting overloads. +template ::value || + std::is_floating_point::value)> +struct __dpcpp_promote { +private: + // Integer types are promoted to double. + template + static typename std::enable_if::value, double>::type + test(); + + // Floating point types are used as-is. + template + static typename std::enable_if::value, U>::type + test(); + +public: + // We rely on dummy templated methods and decltype to select the right type + // based on the input T. + typedef decltype(test()) type; +}; + +// Variant without ::type to allow SFINAE for non-promotable types. +template struct __dpcpp_promote {}; + +// With a single paramter we only need to promote integers. +template +using __dpcpp_promote_1 = std::enable_if::value, double>; + +// With two or three parameters we need to promote integers and possibly +// floating point types. We rely on operator+ with decltype to deduce the +// overall promotion type. This is only needed if at least one of the parameter +// is an integer, or if there's multiple different floating point types. +template +using __dpcpp_promote_2 = + std::enable_if::value || std::is_integral::value || + std::is_integral::value, + decltype(typename __dpcpp_promote::type(0) + + typename __dpcpp_promote::type(0))>; + +template +using __dpcpp_promote_3 = + std::enable_if::value && std::is_same::value) || + std::is_integral::value || + std::is_integral::value || std::is_integral::value, + decltype(typename __dpcpp_promote::type(0) + + typename __dpcpp_promote::type(0) + + typename __dpcpp_promote::type(0))>; + +// For each math built-in we need to define float and double overloads, an +// extern "C" float variant with the 'f' suffix, and a version that promotes +// integers or mixed precision floating-point parameters. +// +// TODO: Consider targets that don't have double support. +// TODO: Enable long double support where possible. +// +// The following two macros provide an easy way to define these overloads for +// basic built-ins with one or two floating-point parameters. +#define __DPCPP_SPIRV_MAP_UNARY(NAME) \ + __DPCPP_SYCL_DEVICE_C float NAME##f(float x) { \ + return __spirv_ocl_##NAME(x); \ + } \ + __DPCPP_SYCL_DEVICE float NAME(float x) { return __spirv_ocl_##NAME(x); } \ + __DPCPP_SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } \ + template \ + __DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type NAME(T x) { \ + return __spirv_ocl_##NAME((double)x); \ + } + +#define __DPCPP_SPIRV_MAP_BINARY(NAME) \ + __DPCPP_SYCL_DEVICE_C float NAME##f(float x, float y) { \ + return __spirv_ocl_##NAME(x, y); \ + } \ + __DPCPP_SYCL_DEVICE float NAME(float x, float y) { \ + return __spirv_ocl_##NAME(x, y); \ + } \ + __DPCPP_SYCL_DEVICE double NAME(double x, double y) { \ + return __spirv_ocl_##NAME(x, y); \ + } \ + template \ + __DPCPP_SYCL_DEVICE __dpcpp_promote_2::type NAME(T x, U y) { \ + typedef typename __dpcpp_promote_2::type type; \ + return __spirv_ocl_##NAME((type)x, (type)y); \ + } + +/// +// FIXME: Move this to a cstdlib fallback header. + +__DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } +__DPCPP_SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } +__DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { + return {x / y, x % y}; +} + +__DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE int abs(int n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } + +/// Basic operations +// + +__DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type fabs(T x) { + return x < 0 ? -x : x; +} + +__DPCPP_SPIRV_MAP_BINARY(fmod); +__DPCPP_SPIRV_MAP_BINARY(remainder); + +__DPCPP_SYCL_DEVICE_C float remquof(float x, float y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_2::type remquo(T x, U y, + int *q) { + typedef typename __dpcpp_promote_2::type type; + return __spirv_ocl_remquo((type)x, (type)y, q); +} + +__DPCPP_SYCL_DEVICE_C float fmaf(float x, float y, float z) { + return __spirv_ocl_fma(x, y, z); +} +__DPCPP_SYCL_DEVICE float fma(float x, float y, float z) { + return __spirv_ocl_fma(x, y, z); +} +__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { + return __spirv_ocl_fma(x, y, z); +} +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_3::type fma(T x, U y, + V z) { + typedef typename __dpcpp_promote_3::type type; + return __spirv_ocl_fma((type)x, (type)y, (type)z); +} + +__DPCPP_SPIRV_MAP_BINARY(fmax); +__DPCPP_SPIRV_MAP_BINARY(fmin); +__DPCPP_SPIRV_MAP_BINARY(fdim); +// unsupported: nan + +/// Exponential functions +// + +__DPCPP_SPIRV_MAP_UNARY(exp); +__DPCPP_SPIRV_MAP_UNARY(exp2); +__DPCPP_SPIRV_MAP_UNARY(expm1); +__DPCPP_SPIRV_MAP_UNARY(log); +__DPCPP_SPIRV_MAP_UNARY(log10); +__DPCPP_SPIRV_MAP_UNARY(log2); +__DPCPP_SPIRV_MAP_UNARY(log1p); + +/// Power functions +// + +__DPCPP_SPIRV_MAP_BINARY(pow); +__DPCPP_SPIRV_MAP_UNARY(sqrt); +__DPCPP_SPIRV_MAP_UNARY(cbrt); +__DPCPP_SPIRV_MAP_BINARY(hypot); + +/// Trigonometric functions +// + +__DPCPP_SPIRV_MAP_UNARY(sin); +__DPCPP_SPIRV_MAP_UNARY(cos); +__DPCPP_SPIRV_MAP_UNARY(tan); +__DPCPP_SPIRV_MAP_UNARY(asin); +__DPCPP_SPIRV_MAP_UNARY(acos); +__DPCPP_SPIRV_MAP_UNARY(atan); +__DPCPP_SPIRV_MAP_BINARY(atan2); + +/// Hyperbolic functions +// + +__DPCPP_SPIRV_MAP_UNARY(sinh); +__DPCPP_SPIRV_MAP_UNARY(cosh); +__DPCPP_SPIRV_MAP_UNARY(tanh); +__DPCPP_SPIRV_MAP_UNARY(asinh); +__DPCPP_SPIRV_MAP_UNARY(acosh); +__DPCPP_SPIRV_MAP_UNARY(atanh); + +/// Error and gamma functions +// + +__DPCPP_SPIRV_MAP_UNARY(erf); +__DPCPP_SPIRV_MAP_UNARY(erfc); +__DPCPP_SPIRV_MAP_UNARY(tgamma); +__DPCPP_SPIRV_MAP_UNARY(lgamma); + +/// Nearest integer floating-point operations +// + +__DPCPP_SPIRV_MAP_UNARY(ceil); +__DPCPP_SPIRV_MAP_UNARY(floor); +__DPCPP_SPIRV_MAP_UNARY(trunc); +__DPCPP_SPIRV_MAP_UNARY(round); +// unsupported: lround, llround (no spirv mapping) +__DPCPP_SPIRV_MAP_UNARY(rint); +// unsupported: lrint, llrint (no spirv mapping) + +// unsupported (partially, no spirv mapping): nearbyint +#if defined(__NVPTX__) +extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); +extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } +#elif defined(__AMDGCN__) +extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); +extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { + return __ocml_nearbyint_f32(x); +} +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { + return __ocml_nearbyint_f64(x); +} +#endif + +/// Floating-point manipulation functions +// + +__DPCPP_SYCL_DEVICE_C float frexpf(float x, int *exp) { + return __spirv_ocl_frexp(x, exp); +} +__DPCPP_SYCL_DEVICE float frexp(float x, int *exp) { + return __spirv_ocl_frexp(x, exp); +} +__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { + return __spirv_ocl_frexp(x, exp); +} +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type frexp(T x, int *exp) { + return __spirv_ocl_frexp((double)x, exp); +} + +__DPCPP_SYCL_DEVICE_C float ldexpf(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE float ldexp(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type ldexp(T x, int exp) { + return __spirv_ocl_ldexp((double)x, exp); +} + +__DPCPP_SYCL_DEVICE_C float modff(float x, float *intpart) { + return __spirv_ocl_modf(x, intpart); +} +__DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { + return __spirv_ocl_modf(x, intpart); +} +__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { + return __spirv_ocl_modf(x, intpart); +} +// modf only supports integer x when the intpart is double. +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type modf(T x, + double *intpart) { + return __spirv_ocl_modf((double)x, intpart); +} + +__DPCPP_SYCL_DEVICE_C float scalbnf(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type scalbn(T x, int exp) { + return __spirv_ocl_ldexp((double)x, exp); +} + +__DPCPP_SYCL_DEVICE_C float scalblnf(float x, long exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +__DPCPP_SYCL_DEVICE float scalbln(float x, long exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +__DPCPP_SYCL_DEVICE double scalbln(double x, long exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +template +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type scalbln(T x, long exp) { + return __spirv_ocl_ldexp((double)x, (int)exp); +} + +__DPCPP_SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } +// ilogb needs a special template since its signature doesn't include the +// promoted type anywhere, so it needs to be specialized differently. +template ::value, + bool>::type = true> +__DPCPP_SYCL_DEVICE int ilogb(T x) { + return __spirv_ocl_ilogb((double)x); +} + +__DPCPP_SPIRV_MAP_UNARY(logb); +__DPCPP_SPIRV_MAP_BINARY(nextafter); +// unsupported: nextforward +__DPCPP_SPIRV_MAP_BINARY(copysign); + +/// Classification and comparison +// + +// unsupported: fpclassify +// unsupported: isfinite +// unsupported: isinf +// unsupported: isnan +// unsupported: isnormal +// unsupported: signbit +// unsupported: isgreater +// unsupported: isgreaterequal +// unsupported: isless +// unsupported: islessequal +// unsupported: islessgreated +// unsupported: isunordered + +// Now drag all of the overloads we've just defined in the std namespace. For +// the overloads to work properly we need to ensure our namespace matches +// exactly the one of the system C++ library. +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_NAMESPACE_STD +#else +namespace std { +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_VERSION +#endif +#endif + +// +using ::div; +using ::labs; +using ::ldiv; +using ::llabs; +using ::lldiv; + +// Basic operations +using ::abs; +using ::fabs; +using ::fabsf; +using ::fdim; +using ::fdimf; +using ::fma; +using ::fmaf; +using ::fmax; +using ::fmaxf; +using ::fmin; +using ::fminf; +using ::fmod; +using ::fmodf; +using ::remainder; +using ::remainderf; +using ::remquo; +using ::remquof; +// using ::nan; +// using ::nanf; + +// Exponential functions +using ::exp; +using ::exp2; +using ::exp2f; +using ::expf; +using ::expm1; +using ::expm1f; +using ::log; +using ::log10; +using ::log10f; +using ::log1p; +using ::log1pf; +using ::log2; +using ::log2f; +using ::logf; + +// Power functions +using ::cbrt; +using ::cbrtf; +using ::hypot; +using ::hypotf; +using ::pow; +using ::powf; +using ::sqrt; +using ::sqrtf; + +// Trigonometric functions +using ::acos; +using ::acosf; +using ::asin; +using ::asinf; +using ::atan; +using ::atan2; +using ::atan2f; +using ::atanf; +using ::cos; +using ::cosf; +using ::sin; +using ::sinf; +using ::tan; +using ::tanf; + +// Hyperbloic functions +using ::acosh; +using ::acoshf; +using ::asinh; +using ::asinhf; +using ::atanh; +using ::atanhf; +using ::cosh; +using ::coshf; +using ::sinh; +using ::sinhf; +using ::tanh; +using ::tanhf; + +// Error and gamma functions +using ::erf; +using ::erfc; +using ::erfcf; +using ::erff; +using ::lgamma; +using ::lgammaf; +using ::tgamma; +using ::tgammaf; + +// Nearest integer floating-point operations +using ::ceil; +using ::ceilf; +using ::floor; +using ::floorf; +using ::round; +using ::roundf; +using ::trunc; +using ::truncf; +// using ::lround; +// using ::llround; +using ::rint; +using ::rintf; +// using ::lrint; +// using ::llrint; + +#if defined(__NVPTX__) || defined(__AMDGCN__) +using ::nearbyint; +using ::nearbyintf; +#endif + +// Floating-point manipulation functions +using ::frexp; +using ::frexpf; +using ::ilogb; +using ::ilogbf; +using ::ldexp; +using ::ldexpf; +using ::logb; +using ::logbf; +using ::modf; +using ::modff; +using ::nextafter; +using ::nextafterf; +using ::scalbln; +using ::scalblnf; +using ::scalbn; +using ::scalbnf; +// using ::nextforward +// using ::nextforwardf +using ::copysign; +using ::copysignf; + +// Classification and comparison +// using ::fpclassify; +// using ::isfinite; +// using ::isgreater; +// using ::isgreaterequal; +// using ::isinf; +// using ::isless; +// using ::islessequal; +// using ::islessgreater; +// using ::isnan; +// using ::isnormal; +// using ::isunordered; +// using ::signbit; + +#ifdef _LIBCPP_END_NAMESPACE_STD +_LIBCPP_END_NAMESPACE_STD +#else +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_END_NAMESPACE_VERSION +#endif +} // namespace std +#endif + +#undef __DPCPP_SPIRV_MAP_BINARY +#undef __DPCPP_SPIRV_MAP_UNARY +#undef __DPCPP_SYCL_DEVICE_C +#undef __DPCPP_SYCL_DEVICE +#endif +#endif diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index bda9ce2ff1ced..833b3aea6203a 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -21,13 +21,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 74 +#define TEST_NUM 75 double ref[TEST_NUM] = { - 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, - 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 1.0, 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, + 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, + 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -62,6 +62,8 @@ template void device_cmath_test(s::queue &deviceQueue) { T minus_infinity = -INFINITY; double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; + + res_access[i++] = std::rint(0.9); res_access[i++] = std::scalbln(1.5, 2); res_access[i++] = sycl::exp10(2.0); res_access[i++] = sycl::rsqrt(4.0); diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 6a453e56f704f..c5c58f09023d4 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -24,13 +24,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 70 +#define TEST_NUM 71 -float ref[TEST_NUM] = {100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, - 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, - 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, - 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref[TEST_NUM] = {1.0f, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, + 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, + 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -60,6 +60,7 @@ template void device_cmath_test_1(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; + res_access[i++] = std::rint(0.9f); res_access[i++] = sycl::exp10(2.0f); res_access[i++] = sycl::rsqrt(4.0f); res_access[i++] = std::trunc(1.2f); diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 86dad1b3c0d3f..5f160b9f2a7b1 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -20,12 +20,12 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 61 +#define TEST_NUM 62 double ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, - 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 1.0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -61,6 +61,7 @@ void device_math_test(s::queue &deviceQueue) { double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; + res_access[i++] = rint(0.9); res_access[i++] = cos(0.0); res_access[i++] = sin(0.0); res_access[i++] = log(1.0); diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index 029409b617473..c9a98f468225d 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -18,12 +18,12 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 59 +#define TEST_NUM 60 float ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, - 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 1.0f, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, + 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -53,6 +53,7 @@ void device_math_test(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; + res_access[i++] = rintf(0.9); res_access[i++] = cosf(0.0f); res_access[i++] = sinf(0.0f); res_access[i++] = logf(1.0f); diff --git a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp new file mode 100644 index 0000000000000..1da771bfc1861 --- /dev/null +++ b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp @@ -0,0 +1,481 @@ +// REQUIRES: cuda +// Note: This isn't really target specific and should be switched to spir when +// it's enabled for it. + +// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm -fsycl-device-only %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: entry +__attribute__((sycl_device)) void entry(float *fp, double *dp, int *ip, + long *lp, long long *llp, float *rf, + double *rd, int *ri) { + // Use an incrementing index to prevent the compiler from optimizing some + // calls that would store to the same address. + int idx = 0; + + // For each supported standard math built-in, we test that the following + // overloads are properly mapped to __spirv_ built-ins: + // + // * Float only. + // * Float only with 'f' suffix. + // * Double only. + // * Integer promotion. + // * Mixed floating point promotion (when applicable). + // + // CHECK: __spirv_ocl_fmodff + rf[idx++] = std::fmod(fp[0], fp[1]); + // CHECK: __spirv_ocl_fmodff + rf[idx++] = std::fmodf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fmoddd + rd[idx++] = std::fmod(dp[0], dp[1]); + // CHECK: __spirv_ocl_fmoddd + rd[idx++] = std::fmod(fp[0], ip[1]); + // CHECK: __spirv_ocl_fmoddd + rd[idx++] = std::fmod(fp[0], dp[1]); + + // CHECK: __spirv_ocl_remainderff + rf[idx++] = std::remainder(fp[0], fp[1]); + // CHECK: __spirv_ocl_remainderff + rf[idx++] = std::remainderf(fp[2], fp[1]); + // CHECK: __spirv_ocl_remainderdd + rd[idx++] = std::remainder(dp[0], dp[1]); + // CHECK: __spirv_ocl_remainderdd + rd[idx++] = std::remainder(fp[0], ip[1]); + // CHECK: __spirv_ocl_remainderdd + rd[idx++] = std::remainder(fp[0], dp[1]); + + // CHECK: __spirv_ocl_remquoff + rf[idx++] = std::remquo(fp[0], fp[1], ip); + // CHECK: __spirv_ocl_remquoff + rf[idx++] = std::remquof(fp[2], fp[1], ip); + // CHECK: __spirv_ocl_remquodd + rd[idx++] = std::remquo(dp[0], dp[1], ip); + // CHECK: __spirv_ocl_remquodd + rd[idx++] = std::remquo(fp[0], ip[1], ip); + // CHECK: __spirv_ocl_remquodd + rd[idx++] = std::remquo(fp[0], dp[1], ip); + + // CHECK: __spirv_ocl_fmaff + rf[idx++] = std::fma(fp[0], fp[1], fp[2]); + // CHECK: __spirv_ocl_fmaff + rf[idx++] = std::fmaf(fp[3], fp[1], fp[2]); + // CHECK: __spirv_ocl_fmadd + rd[idx++] = std::fma(dp[0], dp[1], dp[2]); + // CHECK: __spirv_ocl_fmadd + rd[idx++] = std::fma(fp[0], ip[1], fp[2]); + // CHECK: __spirv_ocl_fmadd + rd[idx++] = std::fma(fp[0], dp[1], fp[2]); + + // CHECK: __spirv_ocl_fmaxff + rf[idx++] = std::fmax(fp[0], fp[1]); + // CHECK: __spirv_ocl_fmaxff + rf[idx++] = std::fmaxf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fmaxdd + rd[idx++] = std::fmax(dp[0], dp[1]); + // CHECK: __spirv_ocl_fmaxdd + rd[idx++] = std::fmax(fp[0], ip[1]); + // CHECK: __spirv_ocl_fmaxdd + rd[idx++] = std::fmax(fp[0], dp[1]); + + // CHECK: __spirv_ocl_fminff + rf[idx++] = std::fmin(fp[0], fp[1]); + // CHECK: __spirv_ocl_fminff + rf[idx++] = std::fminf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fmindd + rd[idx++] = std::fmin(dp[0], dp[1]); + // CHECK: __spirv_ocl_fmindd + rd[idx++] = std::fmin(fp[0], ip[1]); + // CHECK: __spirv_ocl_fmindd + rd[idx++] = std::fmin(fp[0], dp[1]); + + // CHECK: __spirv_ocl_fdimff + rf[idx++] = std::fdim(fp[0], fp[1]); + // CHECK: __spirv_ocl_fdimff + rf[idx++] = std::fdimf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fdimdd + rd[idx++] = std::fdim(dp[0], dp[1]); + // CHECK: __spirv_ocl_fdimdd + rd[idx++] = std::fdim(fp[0], ip[1]); + // CHECK: __spirv_ocl_fdimdd + rd[idx++] = std::fdim(fp[0], dp[1]); + + // CHECK: __spirv_ocl_expf + rf[idx++] = std::exp(fp[0]); + // CHECK: __spirv_ocl_expf + rf[idx++] = std::expf(fp[1]); + // CHECK: __spirv_ocl_expd + rd[idx++] = std::exp(dp[0]); + // CHECK: __spirv_ocl_expd + rd[idx++] = std::exp(ip[0]); + + // CHECK: __spirv_ocl_exp2f + rf[idx++] = std::exp2(fp[0]); + // CHECK: __spirv_ocl_exp2f + rf[idx++] = std::exp2f(fp[1]); + // CHECK: __spirv_ocl_exp2d + rd[idx++] = std::exp2(dp[0]); + // CHECK: __spirv_ocl_exp2d + rd[idx++] = std::exp2(ip[0]); + + // CHECK: __spirv_ocl_expm1f + rf[idx++] = std::expm1(fp[0]); + // CHECK: __spirv_ocl_expm1f + rf[idx++] = std::expm1f(fp[1]); + // CHECK: __spirv_ocl_expm1d + rd[idx++] = std::expm1(dp[0]); + // CHECK: __spirv_ocl_expm1d + rd[idx++] = std::expm1(ip[0]); + + // CHECK: __spirv_ocl_logf + rf[idx++] = std::log(fp[0]); + // CHECK: __spirv_ocl_logf + rf[idx++] = std::logf(fp[1]); + // CHECK: __spirv_ocl_logd + rd[idx++] = std::log(dp[0]); + // CHECK: __spirv_ocl_logd + rd[idx++] = std::log(ip[0]); + + // CHECK: __spirv_ocl_log10f + rf[idx++] = std::log10(fp[0]); + // CHECK: __spirv_ocl_log10f + rf[idx++] = std::log10f(fp[1]); + // CHECK: __spirv_ocl_log10d + rd[idx++] = std::log10(dp[0]); + // CHECK: __spirv_ocl_log10d + rd[idx++] = std::log10(ip[0]); + + // CHECK: __spirv_ocl_log2f + rf[idx++] = std::log2(fp[0]); + // CHECK: __spirv_ocl_log2f + rf[idx++] = std::log2f(fp[1]); + // CHECK: __spirv_ocl_log2d + rd[idx++] = std::log2(dp[0]); + // CHECK: __spirv_ocl_log2d + rd[idx++] = std::log2(ip[0]); + + // CHECK: __spirv_ocl_log1pf + rf[idx++] = std::log1p(fp[0]); + // CHECK: __spirv_ocl_log1pf + rf[idx++] = std::log1pf(fp[1]); + // CHECK: __spirv_ocl_log1pd + rd[idx++] = std::log1p(dp[0]); + // CHECK: __spirv_ocl_log1pd + rd[idx++] = std::log1p(ip[0]); + + // CHECK: __spirv_ocl_powff + rf[idx++] = std::pow(fp[0], fp[1]); + // CHECK: __spirv_ocl_powff + rf[idx++] = std::powf(fp[2], fp[1]); + // CHECK: __spirv_ocl_powdd + rd[idx++] = std::pow(dp[0], dp[1]); + // CHECK: __spirv_ocl_powdd + rd[idx++] = std::pow(ip[0], fp[1]); + // CHECK: __spirv_ocl_powdd + rd[idx++] = std::pow(dp[0], fp[1]); + + // CHECK: __spirv_ocl_sqrtf + rf[idx++] = std::sqrt(fp[0]); + // CHECK: __spirv_ocl_sqrtf + rf[idx++] = std::sqrtf(fp[1]); + // CHECK: __spirv_ocl_sqrtd + rd[idx++] = std::sqrt(dp[0]); + // CHECK: __spirv_ocl_sqrtd + rd[idx++] = std::sqrt(ip[0]); + + // CHECK: __spirv_ocl_cbrtf + rf[idx++] = std::cbrt(fp[0]); + // CHECK: __spirv_ocl_cbrtf + rf[idx++] = std::cbrtf(fp[1]); + // CHECK: __spirv_ocl_cbrtd + rd[idx++] = std::cbrt(dp[0]); + // CHECK: __spirv_ocl_cbrtd + rd[idx++] = std::cbrt(ip[0]); + + // CHECK: __spirv_ocl_hypotff + rf[idx++] = std::hypot(fp[0], fp[1]); + // CHECK: __spirv_ocl_hypotff + rf[idx++] = std::hypotf(fp[2], fp[1]); + // CHECK: __spirv_ocl_hypotdd + rd[idx++] = std::hypot(dp[0], dp[1]); + // CHECK: __spirv_ocl_hypotdd + rd[idx++] = std::hypot(ip[0], fp[1]); + // CHECK: __spirv_ocl_hypotdd + rd[idx++] = std::hypot(dp[0], fp[1]); + + // CHECK: __spirv_ocl_sinf + rf[idx++] = std::sin(fp[0]); + // CHECK: __spirv_ocl_sinf + rf[idx++] = std::sinf(fp[1]); + // CHECK: __spirv_ocl_sind + rd[idx++] = std::sin(dp[0]); + // CHECK: __spirv_ocl_sind + rd[idx++] = std::sin(ip[0]); + + // CHECK: __spirv_ocl_cosf + rf[idx++] = std::cos(fp[0]); + // CHECK: __spirv_ocl_cosf + rf[idx++] = std::cosf(fp[1]); + // CHECK: __spirv_ocl_cosd + rd[idx++] = std::cos(dp[0]); + // CHECK: __spirv_ocl_cosd + rd[idx++] = std::cos(ip[0]); + + // CHECK: __spirv_ocl_tanf + rf[idx++] = std::tan(fp[0]); + // CHECK: __spirv_ocl_tanf + rf[idx++] = std::tanf(fp[1]); + // CHECK: __spirv_ocl_tand + rd[idx++] = std::tan(dp[0]); + // CHECK: __spirv_ocl_tand + rd[idx++] = std::tan(ip[0]); + + // CHECK: __spirv_ocl_asinf + rf[idx++] = std::asin(fp[0]); + // CHECK: __spirv_ocl_asinf + rf[idx++] = std::asinf(fp[1]); + // CHECK: __spirv_ocl_asind + rd[idx++] = std::asin(dp[0]); + // CHECK: __spirv_ocl_asind + rd[idx++] = std::asin(ip[0]); + + // CHECK: __spirv_ocl_acosf + rf[idx++] = std::acos(fp[0]); + // CHECK: __spirv_ocl_acosf + rf[idx++] = std::acosf(fp[1]); + // CHECK: __spirv_ocl_acosd + rd[idx++] = std::acos(dp[0]); + // CHECK: __spirv_ocl_acosd + rd[idx++] = std::acos(ip[0]); + + // CHECK: __spirv_ocl_atanf + rf[idx++] = std::atan(fp[0]); + // CHECK: __spirv_ocl_atanf + rf[idx++] = std::atanf(fp[1]); + // CHECK: __spirv_ocl_atand + rd[idx++] = std::atan(dp[0]); + // CHECK: __spirv_ocl_atand + rd[idx++] = std::atan(ip[0]); + + // CHECK: __spirv_ocl_atan2ff + rf[idx++] = std::atan2(fp[0], fp[1]); + // CHECK: __spirv_ocl_atan2ff + rf[idx++] = std::atan2f(fp[2], fp[1]); + // CHECK: __spirv_ocl_atan2dd + rd[idx++] = std::atan2(dp[0], dp[1]); + // CHECK: __spirv_ocl_atan2dd + rd[idx++] = std::atan2(ip[0], fp[1]); + // CHECK: __spirv_ocl_atan2dd + rd[idx++] = std::atan2(dp[0], fp[1]); + + // CHECK: __spirv_ocl_sinhf + rf[idx++] = std::sinh(fp[0]); + // CHECK: __spirv_ocl_sinhf + rf[idx++] = std::sinhf(fp[1]); + // CHECK: __spirv_ocl_sinhd + rd[idx++] = std::sinh(dp[0]); + // CHECK: __spirv_ocl_sinhd + rd[idx++] = std::sinh(ip[0]); + + // CHECK: __spirv_ocl_coshf + rf[idx++] = std::cosh(fp[0]); + // CHECK: __spirv_ocl_coshf + rf[idx++] = std::coshf(fp[1]); + // CHECK: __spirv_ocl_coshd + rd[idx++] = std::cosh(dp[0]); + // CHECK: __spirv_ocl_coshd + rd[idx++] = std::cosh(ip[0]); + + // CHECK: __spirv_ocl_tanhf + rf[idx++] = std::tanh(fp[0]); + // CHECK: __spirv_ocl_tanhf + rf[idx++] = std::tanhf(fp[1]); + // CHECK: __spirv_ocl_tanhd + rd[idx++] = std::tanh(dp[0]); + // CHECK: __spirv_ocl_tanhd + rd[idx++] = std::tanh(ip[0]); + + // CHECK: __spirv_ocl_asinhf + rf[idx++] = std::asinh(fp[0]); + // CHECK: __spirv_ocl_asinhf + rf[idx++] = std::asinhf(fp[1]); + // CHECK: __spirv_ocl_asinhd + rd[idx++] = std::asinh(dp[0]); + // CHECK: __spirv_ocl_asinhd + rd[idx++] = std::asinh(ip[0]); + + // CHECK: __spirv_ocl_acoshf + rf[idx++] = std::acosh(fp[0]); + // CHECK: __spirv_ocl_acoshf + rf[idx++] = std::acoshf(fp[1]); + // CHECK: __spirv_ocl_acoshd + rd[idx++] = std::acosh(dp[0]); + // CHECK: __spirv_ocl_acoshd + rd[idx++] = std::acosh(ip[0]); + + // CHECK: __spirv_ocl_atanhf + rf[idx++] = std::atanh(fp[0]); + // CHECK: __spirv_ocl_atanhf + rf[idx++] = std::atanhf(fp[1]); + // CHECK: __spirv_ocl_atanhd + rd[idx++] = std::atanh(dp[0]); + // CHECK: __spirv_ocl_atanhd + rd[idx++] = std::atanh(ip[0]); + + // CHECK: __spirv_ocl_erff + rf[idx++] = std::erf(fp[0]); + // CHECK: __spirv_ocl_erff + rf[idx++] = std::erff(fp[1]); + // CHECK: __spirv_ocl_erfd + rd[idx++] = std::erf(dp[0]); + // CHECK: __spirv_ocl_erfd + rd[idx++] = std::erf(ip[0]); + + // CHECK: __spirv_ocl_erfcf + rf[idx++] = std::erfc(fp[0]); + // CHECK: __spirv_ocl_erfcf + rf[idx++] = std::erfcf(fp[1]); + // CHECK: __spirv_ocl_erfcd + rd[idx++] = std::erfc(dp[0]); + // CHECK: __spirv_ocl_erfcd + rd[idx++] = std::erfc(ip[0]); + + // CHECK: __spirv_ocl_tgammaf + rf[idx++] = std::tgamma(fp[0]); + // CHECK: __spirv_ocl_tgammaf + rf[idx++] = std::tgammaf(fp[1]); + // CHECK: __spirv_ocl_tgammad + rd[idx++] = std::tgamma(dp[0]); + // CHECK: __spirv_ocl_tgammad + rd[idx++] = std::tgamma(ip[0]); + + // CHECK: __spirv_ocl_lgammaf + rf[idx++] = std::lgamma(fp[0]); + // CHECK: __spirv_ocl_lgammaf + rf[idx++] = std::lgammaf(fp[1]); + // CHECK: __spirv_ocl_lgammad + rd[idx++] = std::lgamma(dp[0]); + // CHECK: __spirv_ocl_lgammad + rd[idx++] = std::lgamma(ip[0]); + + // CHECK: __spirv_ocl_ceilf + rf[idx++] = std::ceil(fp[0]); + // CHECK: __spirv_ocl_ceilf + rf[idx++] = std::ceilf(fp[1]); + // CHECK: __spirv_ocl_ceild + rd[idx++] = std::ceil(dp[0]); + // CHECK: __spirv_ocl_ceild + rd[idx++] = std::ceil(ip[0]); + + // CHECK: __spirv_ocl_floorf + rf[idx++] = std::floor(fp[0]); + // CHECK: __spirv_ocl_floorf + rf[idx++] = std::floorf(fp[1]); + // CHECK: __spirv_ocl_floord + rd[idx++] = std::floor(dp[0]); + // CHECK: __spirv_ocl_floord + rd[idx++] = std::floor(ip[0]); + + // CHECK: __spirv_ocl_truncf + rf[idx++] = std::trunc(fp[0]); + // CHECK: __spirv_ocl_truncf + rf[idx++] = std::truncf(fp[1]); + // CHECK: __spirv_ocl_truncd + rd[idx++] = std::trunc(dp[0]); + // CHECK: __spirv_ocl_truncd + rd[idx++] = std::trunc(ip[0]); + + // CHECK: __spirv_ocl_roundf + rf[idx++] = std::round(fp[0]); + // CHECK: __spirv_ocl_roundf + rf[idx++] = std::roundf(fp[1]); + // CHECK: __spirv_ocl_roundd + rd[idx++] = std::round(dp[0]); + // CHECK: __spirv_ocl_roundd + rd[idx++] = std::round(ip[0]); + + // CHECK: __spirv_ocl_rintf + rf[idx++] = std::rint(fp[0]); + // CHECK: __spirv_ocl_rintf + rf[idx++] = std::rintf(fp[1]); + // CHECK: __spirv_ocl_rintd + rd[idx++] = std::rint(dp[0]); + // CHECK: __spirv_ocl_rintd + rd[idx++] = std::rint(ip[0]); + + // CHECK: __spirv_ocl_frexpf + rf[idx++] = std::frexp(fp[0], ip); + // CHECK: __spirv_ocl_frexpf + rf[idx++] = std::frexpf(fp[1], ip); + // CHECK: __spirv_ocl_frexpd + rd[idx++] = std::frexp(dp[0], ip); + // CHECK: __spirv_ocl_frexpd + rd[idx++] = std::frexp(ip[0], ip); + + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::ldexp(fp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::ldexpf(fp[1], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::ldexp(dp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::ldexp(ip[0], ip[0]); + + // CHECK: __spirv_ocl_modff + rf[idx++] = std::modf(fp[0], fp); + // CHECK: __spirv_ocl_modff + rf[idx++] = std::modff(fp[1], fp); + // CHECK: __spirv_ocl_modfd + rd[idx++] = std::modf(dp[0], dp); + // CHECK: __spirv_ocl_modfd + rd[idx++] = std::modf(ip[0], dp); + + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::scalbn(fp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::scalbnf(fp[1], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::scalbn(dp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::scalbn(ip[0], ip[0]); + + // CHECK: __spirv_ocl_ilogbf + ri[idx++] = std::ilogb(fp[0]); + // CHECK: __spirv_ocl_ilogbf + ri[idx++] = std::ilogbf(fp[1]); + // CHECK: __spirv_ocl_ilogbd + ri[idx++] = std::ilogb(dp[0]); + // CHECK: __spirv_ocl_ilogbd + ri[idx++] = std::ilogb(ip[0]); + + // CHECK: __spirv_ocl_logbf + rf[idx++] = std::logb(fp[0]); + // CHECK: __spirv_ocl_logbf + rf[idx++] = std::logbf(fp[1]); + // CHECK: __spirv_ocl_logbd + rd[idx++] = std::logb(dp[0]); + // CHECK: __spirv_ocl_logbd + rd[idx++] = std::logb(ip[0]); + + // CHECK: __spirv_ocl_nextafterf + rf[idx++] = std::nextafter(fp[0], fp[1]); + // CHECK: __spirv_ocl_nextafterf + rf[idx++] = std::nextafterf(fp[2], fp[1]); + // CHECK: __spirv_ocl_nextafterd + rd[idx++] = std::nextafter(dp[0], dp[1]); + // CHECK: __spirv_ocl_nextafterd + rd[idx++] = std::nextafter(ip[0], fp[1]); + // CHECK: __spirv_ocl_nextafterd + rd[idx++] = std::nextafter(dp[0], fp[1]); + + // CHECK: __spirv_ocl_copysignf + rf[idx++] = std::copysign(fp[0], fp[1]); + // CHECK: __spirv_ocl_copysignf + rf[idx++] = std::copysignf(fp[2], fp[1]); + // CHECK: __spirv_ocl_copysignd + rd[idx++] = std::copysign(dp[0], dp[1]); + // CHECK: __spirv_ocl_copysignd + rd[idx++] = std::copysign(ip[0], fp[1]); + // CHECK: __spirv_ocl_copysignd + rd[idx++] = std::copysign(dp[0], fp[1]); +}