From b139fa48cfa3d5247d8194f2e375d6f7c23328ad Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 27 May 2025 17:23:48 +0100 Subject: [PATCH 01/24] [SYCL][NVPTX][AMDGCN] Move devicelib cmath to header This patch experiments with moving standard library math built-ins from libdevice into headers. This is based on the way clang handles this for CUDA and HIP. In these languages you can define device functions as overloads. This allows re-defining standard library functions specifically for the device in a header, so that we can provide a device specific implementations of certain built-ins while still using the regular standard library headers. By default SYCL doesn't do overloads for device functions, so this patch introduces a new `sycl_device_only` attribute, this attribute will make a function device only and allow it to overload with existing functions. --- clang/include/clang/Basic/Attr.td | 7 + clang/lib/AST/Decl.cpp | 5 + clang/lib/CodeGen/CGBuiltin.cpp | 7 +- clang/lib/Sema/SemaDeclAttr.cpp | 3 + clang/lib/Sema/SemaOverload.cpp | 27 ++ libdevice/cmath_wrapper.cpp | 5 +- libdevice/cmath_wrapper_fp64.cpp | 5 +- libdevice/fallback-cmath.cpp | 5 +- sycl/include/sycl/stl_wrappers/cmath | 4 + .../sycl/stl_wrappers/cmath-fallback.h | 283 ++++++++++++++++++ 10 files changed, 337 insertions(+), 14 deletions(-) create mode 100644 sycl/include/sycl/stl_wrappers/cmath-fallback.h diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0747eda3addb4..8c2eddcc1487d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1607,6 +1607,13 @@ def SYCLDevice : InheritableAttr { let Documentation = [SYCLDeviceDocs]; } +def SYCLDeviceOnly : InheritableAttr { + let Spellings = [GNU<"sycl_device_only">]; + let Subjects = SubjectList<[Function]>; + let LangOpts = [SYCLIsDevice]; + let Documentation = [Undocumented]; +} + def SYCLGlobalVar : InheritableAttr { let Spellings = [GNU<"sycl_global_var">]; let Subjects = SubjectList<[GlobalStorageNonLocalVar], ErrorDiag>; diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index fae15742b52ab..62d1f0d9ba9fe 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3729,6 +3729,11 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; + if (Context.getLangOpts().isSYCL() && hasAttr() && + !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) { + return 0; + } + // As AMDGCN implementation of OpenMP does not have a device-side standard // library, none of the predefined library functions except printf and malloc // should be treated as a builtin i.e. 0 should be returned for them. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c7db24b8d95ff..57522a049bb00 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2782,10 +2782,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, GenerateIntrinsics = ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; } - bool IsSYCLDeviceWithoutIntrinsics = - getLangOpts().SYCLIsDevice && - (getTarget().getTriple().isNVPTX() || getTarget().getTriple().isAMDGCN()); - if (GenerateIntrinsics && !IsSYCLDeviceWithoutIntrinsics) { + if (GenerateIntrinsics) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIacos: case Builtin::BIacosf: @@ -3885,7 +3882,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_modf: case Builtin::BI__builtin_modff: case Builtin::BI__builtin_modfl: - if (Builder.getIsFPConstrained() || IsSYCLDeviceWithoutIntrinsics) + if (Builder.getIsFPConstrained()) break; // TODO: Emit constrained modf intrinsic once one exists. return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf)); case Builtin::BI__builtin_isgreater: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e6f6c547113be..b076cb60db269 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7224,6 +7224,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_SYCLDevice: S.SYCL().handleSYCLDeviceAttr(D, AL); break; + case ParsedAttr::AT_SYCLDeviceOnly: + handleSimpleAttribute(S, D, AL); + break; case ParsedAttr::AT_SYCLScope: S.SYCL().handleSYCLScopeAttr(D, AL); break; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 15294a11d4ecd..da9473914c98e 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1629,6 +1629,14 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, } } + // Allow overloads with SYCLDeviceOnlyAttr + if (SemaRef.getLangOpts().isSYCL()) { + if (hasExplicitAttr(Old) != + hasExplicitAttr(New)) { + return true; + } + } + // The signatures match; this is not an overload. return false; } @@ -11020,6 +11028,16 @@ bool clang::isBetterOverloadCandidate( S.CUDA().IdentifyPreference(Caller, Cand2.Function); } + // In SYCL device compilation mode prefer the overload with the + // SYCLDeviceOnly attribute. + if (S.getLangOpts().isSYCL() && S.getLangOpts().SYCLIsDevice && + Cand1.Function && Cand2.Function) { + if (hasExplicitAttr(Cand1.Function) != + hasExplicitAttr(Cand2.Function)) { + return hasExplicitAttr(Cand1.Function); + } + } + // General member function overloading is handled above, so this only handles // constructors with address spaces. // This only handles address spaces since C++ has no other @@ -11374,6 +11392,15 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl( if (S.getLangOpts().CUDA) CudaExcludeWrongSideCandidates(S, Candidates); + // In SYCL host compilation remove candidates marked SYCLDeviceOnly. + if (S.getLangOpts().isSYCL() && !S.getLangOpts().SYCLIsDevice) { + auto IsDeviceCand = [&](const OverloadCandidate *Cand) { + return Cand->Viable && Cand->Function && + hasExplicitAttr(Cand->Function); + }; + llvm::erase_if(Candidates, IsDeviceCand); + } + Best = end(); for (auto *Cand : Candidates) { Cand->Best = false; diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index d59395b2d0994..3c6c1b97fa1c5 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,8 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -211,4 +210,4 @@ DEVICE_EXTERN_C_INLINE float rintf(float x) { return __spirv_ocl_rint(x); } #endif -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 720982799ea71..81ba3e710ec6d 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,8 +9,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -508,4 +507,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 97cb4cf67b4c7..d930ea014ac24 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,8 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -215,4 +214,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index c25eadf6394a1..8c626e4b06606 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -16,6 +16,10 @@ // *** *** +#if defined(__NVPTX__) || defined(__AMDGCN__) +#include "cmath-fallback.h" +#endif + #include #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h new file mode 100644 index 0000000000000..2ade9be9aea6c --- /dev/null +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -0,0 +1,283 @@ +#ifndef __CMATH_FALLBACK_H__ +#define __CMATH_FALLBACK_H__ + +#ifdef __SYCL_DEVICE_ONLY__ + +#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) + +#define __DPCPP_SPIRV_MAP_UNARY(NAME, TYPE) \ + __DPCPP_SYCL_DEVICE TYPE NAME(TYPE x) { return __spirv_ocl_##NAME(x); } + +__DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } +__DPCPP_SPIRV_MAP_UNARY(acos, double); +__DPCPP_SPIRV_MAP_UNARY(acos, float); +__DPCPP_SPIRV_MAP_UNARY(acosh, double); +__DPCPP_SPIRV_MAP_UNARY(acosh, float); +__DPCPP_SPIRV_MAP_UNARY(asin, double); +__DPCPP_SPIRV_MAP_UNARY(asin, float); +__DPCPP_SPIRV_MAP_UNARY(asinh, double); +__DPCPP_SPIRV_MAP_UNARY(asinh, float); +__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE double scalbln(double x, long int exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +__DPCPP_SYCL_DEVICE float scalbln(float x, long int exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +__DPCPP_SYCL_DEVICE double atan2(double x, double y) { + return __spirv_ocl_atan2(x, y); +} +__DPCPP_SYCL_DEVICE float atan2(float x, float y) { + return __spirv_ocl_atan2(x, y); +} +__DPCPP_SPIRV_MAP_UNARY(atan, double); +__DPCPP_SPIRV_MAP_UNARY(atan, float); +__DPCPP_SPIRV_MAP_UNARY(atanh, double); +__DPCPP_SPIRV_MAP_UNARY(atanh, float); +__DPCPP_SPIRV_MAP_UNARY(cbrt, double); +__DPCPP_SPIRV_MAP_UNARY(cbrt, float); +__DPCPP_SPIRV_MAP_UNARY(ceil, double); +__DPCPP_SPIRV_MAP_UNARY(ceil, float); +__DPCPP_SPIRV_MAP_UNARY(cos, double); +__DPCPP_SPIRV_MAP_UNARY(cos, float); +__DPCPP_SPIRV_MAP_UNARY(cosh, double); +__DPCPP_SPIRV_MAP_UNARY(cosh, float); +__DPCPP_SPIRV_MAP_UNARY(erfc, double); +__DPCPP_SPIRV_MAP_UNARY(erfc, float); +__DPCPP_SPIRV_MAP_UNARY(erf, double); +__DPCPP_SPIRV_MAP_UNARY(erf, float); +__DPCPP_SPIRV_MAP_UNARY(exp2, double); +__DPCPP_SPIRV_MAP_UNARY(exp2, float); +__DPCPP_SPIRV_MAP_UNARY(exp, double); +__DPCPP_SPIRV_MAP_UNARY(exp, float); +__DPCPP_SPIRV_MAP_UNARY(expm1, double); +__DPCPP_SPIRV_MAP_UNARY(expm1, float); +__DPCPP_SYCL_DEVICE double fdim(double x, double y) { + return __spirv_ocl_fdim(x, y); +} +__DPCPP_SYCL_DEVICE float fdim(float x, float y) { + return __spirv_ocl_fdim(x, y); +} +__DPCPP_SPIRV_MAP_UNARY(floor, double); +__DPCPP_SPIRV_MAP_UNARY(floor, float); +__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { + return __spirv_ocl_fma(x, y, z); +} +__DPCPP_SYCL_DEVICE float fma(float x, float y, float z) { + return __spirv_ocl_fma(x, y, z); +} +__DPCPP_SYCL_DEVICE double fmax(double x, double y) { + return __spirv_ocl_fmax(x, y); +} +__DPCPP_SYCL_DEVICE float fmax(float x, float y) { + return __spirv_ocl_fmax(x, y); +} +__DPCPP_SYCL_DEVICE double fmin(double x, double y) { + return __spirv_ocl_fmin(x, y); +} +__DPCPP_SYCL_DEVICE float fmin(float x, float y) { + return __spirv_ocl_fmin(x, y); +} +__DPCPP_SYCL_DEVICE double fmod(double x, double y) { + return __spirv_ocl_fmod(x, y); +} +__DPCPP_SYCL_DEVICE float fmod(float x, float y) { + return __spirv_ocl_fmod(x, y); +} +__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { + return __spirv_ocl_frexp(x, exp); +} +__DPCPP_SYCL_DEVICE float frexp(float x, int *exp) { + return __spirv_ocl_frexp(x, exp); +} +__DPCPP_SYCL_DEVICE double hypot(double x, double y) { + return __spirv_ocl_hypot(x, y); +} +__DPCPP_SYCL_DEVICE float hypot(float x, float y) { + return __spirv_ocl_hypot(x, y); +} +__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE float ldexp(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SPIRV_MAP_UNARY(lgamma, double); +__DPCPP_SPIRV_MAP_UNARY(lgamma, float); +__DPCPP_SPIRV_MAP_UNARY(log10, double); +__DPCPP_SPIRV_MAP_UNARY(log10, float); +__DPCPP_SPIRV_MAP_UNARY(log1p, double); +__DPCPP_SPIRV_MAP_UNARY(log1p, float); +__DPCPP_SPIRV_MAP_UNARY(log2, double); +__DPCPP_SPIRV_MAP_UNARY(log2, float); +__DPCPP_SPIRV_MAP_UNARY(logb, double); +__DPCPP_SPIRV_MAP_UNARY(logb, float); +__DPCPP_SPIRV_MAP_UNARY(log, double); +__DPCPP_SPIRV_MAP_UNARY(log, float); +__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { + return __spirv_ocl_modf(x, intpart); +} +__DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { + return __spirv_ocl_modf(x, intpart); +} +__DPCPP_SYCL_DEVICE double nextafter(double x, double y) { + return __spirv_ocl_nextafter(x, y); +} +__DPCPP_SYCL_DEVICE float nextafter(float x, float y) { + return __spirv_ocl_nextafter(x, y); +} +__DPCPP_SYCL_DEVICE double pow(double x, double y) { + return __spirv_ocl_pow(x, y); +} +__DPCPP_SYCL_DEVICE float pow(float x, float y) { + return __spirv_ocl_pow(x, y); +} +__DPCPP_SYCL_DEVICE double remainder(double x, double y) { + return __spirv_ocl_remainder(x, y); +} +__DPCPP_SYCL_DEVICE float remainder(float x, float y) { + return __spirv_ocl_remainder(x, y); +} +__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SPIRV_MAP_UNARY(rint, double); +__DPCPP_SPIRV_MAP_UNARY(rint, float); +__DPCPP_SPIRV_MAP_UNARY(round, double); +__DPCPP_SPIRV_MAP_UNARY(round, float); +__DPCPP_SPIRV_MAP_UNARY(sin, double); +__DPCPP_SPIRV_MAP_UNARY(sin, float); +__DPCPP_SPIRV_MAP_UNARY(sinh, double); +__DPCPP_SPIRV_MAP_UNARY(sinh, float); +__DPCPP_SPIRV_MAP_UNARY(sqrt, double); +__DPCPP_SPIRV_MAP_UNARY(sqrt, float); +__DPCPP_SPIRV_MAP_UNARY(tan, double); +__DPCPP_SPIRV_MAP_UNARY(tan, float); +__DPCPP_SPIRV_MAP_UNARY(tanh, double); +__DPCPP_SPIRV_MAP_UNARY(tanh, float); +__DPCPP_SPIRV_MAP_UNARY(tgamma, double); +__DPCPP_SPIRV_MAP_UNARY(tgamma, float); +__DPCPP_SPIRV_MAP_UNARY(trunc, double); +__DPCPP_SPIRV_MAP_UNARY(trunc, float); + +__DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } + +__DPCPP_SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } + +__DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { + return {x / y, x % y}; +} + +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_NAMESPACE_STD +#else +namespace std { +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_VERSION +#endif +#endif + +using ::abs; +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::div; +using ::ldiv; +using ::lldiv; +// using ::copysign; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fabs; +using ::fdim; +using ::floor; +using ::fma; +using ::fmax; +using ::fmin; +using ::fmod; +// using ::fpclassify; +using ::frexp; +using ::hypot; +using ::ilogb; +// using ::isfinite; +// using ::isgreater; +// using ::isgreaterequal; +// using ::isinf; +// using ::isless; +// using ::islessequal; +// using ::islessgreater; +// using ::isnan; +// using ::isnormal; +// using ::isunordered; +// using ::labs; +using ::ldexp; +using ::lgamma; +// using ::llabs; +// using ::llrint; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +// using ::lrint; +// using ::lround; +// using ::llround; +using ::modf; +// using ::nan; +// using ::nanf; +// using ::nearbyint; +using ::nextafter; +using ::pow; +using ::remainder; +using ::remquo; +using ::rint; +using ::round; +using ::scalbln; +using ::scalbn; +// using ::signbit; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; +using ::trunc; + +#ifdef _LIBCPP_END_NAMESPACE_STD +_LIBCPP_END_NAMESPACE_STD +#else +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_END_NAMESPACE_VERSION +#endif +} // namespace std +#endif + +#undef __DPCPP_SPIRV_MAP_UNARY +#undef __DPCPP_SYCL_DEVICE +#endif +#endif From e942076bc94eda9f870868b1c94993c5c4e1423b Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 10:42:58 +0100 Subject: [PATCH 02/24] [SYCL] Fixup attribute handling We don't support malloc in SYCL, silence warnings for host compilation with `sycl_device_only`. Fix failing clang test with new attribute. --- clang/include/clang/Basic/Attr.td | 2 +- clang/lib/AST/Decl.cpp | 2 +- clang/test/Misc/pragma-attribute-supported-attributes-list.test | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 8c2eddcc1487d..27a190f5c1ef9 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1610,7 +1610,7 @@ def SYCLDevice : InheritableAttr { def SYCLDeviceOnly : InheritableAttr { let Spellings = [GNU<"sycl_device_only">]; let Subjects = SubjectList<[Function]>; - let LangOpts = [SYCLIsDevice]; + let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [Undocumented]; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 62d1f0d9ba9fe..5182a0f342015 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3730,7 +3730,7 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { return 0; if (Context.getLangOpts().isSYCL() && hasAttr() && - !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) { + BuiltinID != Builtin::BIprintf) { return 0; } diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index d04cf791e88bf..c23d9acb98826 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -190,6 +190,7 @@ // CHECK-NEXT: SYCLDeviceGlobal (SubjectMatchRule_record) // CHECK-NEXT: SYCLDeviceHas (SubjectMatchRule_function) // CHECK-NEXT: SYCLDeviceIndirectlyCallable (SubjectMatchRule_function) +// CHECK-NEXT: SYCLDeviceOnly (SubjectMatchRule_function) // CHECK-NEXT: SYCLGlobalVariableAllowed (SubjectMatchRule_record) // CHECK-NEXT: SYCLIntelDisableLoopPipelining (SubjectMatchRule_function) // CHECK-NEXT: SYCLIntelInitiationInterval (SubjectMatchRule_function) From d192f332518c2ea88ce641eb8f0a5a75fd37a989 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 10:54:02 +0100 Subject: [PATCH 03/24] [SYCL] Use hasAttr instead of hasExplicitAttr --- clang/lib/Sema/SemaOverload.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index da9473914c98e..7cb24f06ae1e2 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1631,8 +1631,8 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, // Allow overloads with SYCLDeviceOnlyAttr if (SemaRef.getLangOpts().isSYCL()) { - if (hasExplicitAttr(Old) != - hasExplicitAttr(New)) { + if (Old->hasAttr() != + New->hasAttr()) { return true; } } @@ -11032,9 +11032,9 @@ bool clang::isBetterOverloadCandidate( // SYCLDeviceOnly attribute. if (S.getLangOpts().isSYCL() && S.getLangOpts().SYCLIsDevice && Cand1.Function && Cand2.Function) { - if (hasExplicitAttr(Cand1.Function) != - hasExplicitAttr(Cand2.Function)) { - return hasExplicitAttr(Cand1.Function); + if (Cand1.Function->hasAttr() != + Cand2.Function->hasAttr()) { + return Cand1.Function->hasAttr(); } } @@ -11396,7 +11396,7 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl( if (S.getLangOpts().isSYCL() && !S.getLangOpts().SYCLIsDevice) { auto IsDeviceCand = [&](const OverloadCandidate *Cand) { return Cand->Viable && Cand->Function && - hasExplicitAttr(Cand->Function); + Cand->Function->hasAttr(); }; llvm::erase_if(Candidates, IsDeviceCand); } From a82cebca4e767e91c03b40ad71ab9c11c112fcf5 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 15:48:31 +0100 Subject: [PATCH 04/24] [SYCL] Update fallback header --- .../sycl/stl_wrappers/cmath-fallback.h | 276 ++++++++++-------- 1 file changed, 150 insertions(+), 126 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 2ade9be9aea6c..93d0cb5e912f4 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -3,177 +3,157 @@ #ifdef __SYCL_DEVICE_ONLY__ -#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) +#define __DPCPP_SYCL_DEVICE \ + __attribute__((sycl_device_only, always_inline, overloadable)) +#define __DPCPP_SYCL_DEVICE_C \ + extern "C" __attribute__((sycl_device_only, always_inline, overloadable)) -#define __DPCPP_SPIRV_MAP_UNARY(NAME, TYPE) \ - __DPCPP_SYCL_DEVICE TYPE NAME(TYPE x) { return __spirv_ocl_##NAME(x); } +#define __DPCPP_SPIRV_MAP_UNARY(NAME) \ + __DPCPP_SYCL_DEVICE_C float NAME##f(float x) { \ + return __spirv_ocl_##NAME(x); \ + } \ + __DPCPP_SYCL_DEVICE float NAME(float x) { return __spirv_ocl_##NAME(x); } \ + __DPCPP_SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } + +#define __DPCPP_SPIRV_MAP_BINARY(NAME) \ + __DPCPP_SYCL_DEVICE_C float NAME##f(float x, float y) { \ + return __spirv_ocl_##NAME(x, y); \ + } \ + __DPCPP_SYCL_DEVICE float NAME(float x, float y) { \ + return __spirv_ocl_##NAME(x, y); \ + } \ + __DPCPP_SYCL_DEVICE double NAME(double x, double y) { \ + return __spirv_ocl_##NAME(x, y); \ + } __DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } -__DPCPP_SPIRV_MAP_UNARY(acos, double); -__DPCPP_SPIRV_MAP_UNARY(acos, float); -__DPCPP_SPIRV_MAP_UNARY(acosh, double); -__DPCPP_SPIRV_MAP_UNARY(acosh, float); -__DPCPP_SPIRV_MAP_UNARY(asin, double); -__DPCPP_SPIRV_MAP_UNARY(asin, float); -__DPCPP_SPIRV_MAP_UNARY(asinh, double); -__DPCPP_SPIRV_MAP_UNARY(asinh, float); -__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + +__DPCPP_SPIRV_MAP_UNARY(acos); +__DPCPP_SPIRV_MAP_UNARY(acosh); +__DPCPP_SPIRV_MAP_UNARY(asin); +__DPCPP_SPIRV_MAP_UNARY(asinh); + +__DPCPP_SYCL_DEVICE_C float scalbnf(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } __DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SYCL_DEVICE double scalbln(double x, long int exp) { +__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} + +__DPCPP_SYCL_DEVICE_C float scalblnf(float x, long int exp) { return __spirv_ocl_ldexp(x, (int)exp); } __DPCPP_SYCL_DEVICE float scalbln(float x, long int exp) { return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SYCL_DEVICE double atan2(double x, double y) { - return __spirv_ocl_atan2(x, y); -} -__DPCPP_SYCL_DEVICE float atan2(float x, float y) { - return __spirv_ocl_atan2(x, y); -} -__DPCPP_SPIRV_MAP_UNARY(atan, double); -__DPCPP_SPIRV_MAP_UNARY(atan, float); -__DPCPP_SPIRV_MAP_UNARY(atanh, double); -__DPCPP_SPIRV_MAP_UNARY(atanh, float); -__DPCPP_SPIRV_MAP_UNARY(cbrt, double); -__DPCPP_SPIRV_MAP_UNARY(cbrt, float); -__DPCPP_SPIRV_MAP_UNARY(ceil, double); -__DPCPP_SPIRV_MAP_UNARY(ceil, float); -__DPCPP_SPIRV_MAP_UNARY(cos, double); -__DPCPP_SPIRV_MAP_UNARY(cos, float); -__DPCPP_SPIRV_MAP_UNARY(cosh, double); -__DPCPP_SPIRV_MAP_UNARY(cosh, float); -__DPCPP_SPIRV_MAP_UNARY(erfc, double); -__DPCPP_SPIRV_MAP_UNARY(erfc, float); -__DPCPP_SPIRV_MAP_UNARY(erf, double); -__DPCPP_SPIRV_MAP_UNARY(erf, float); -__DPCPP_SPIRV_MAP_UNARY(exp2, double); -__DPCPP_SPIRV_MAP_UNARY(exp2, float); -__DPCPP_SPIRV_MAP_UNARY(exp, double); -__DPCPP_SPIRV_MAP_UNARY(exp, float); -__DPCPP_SPIRV_MAP_UNARY(expm1, double); -__DPCPP_SPIRV_MAP_UNARY(expm1, float); -__DPCPP_SYCL_DEVICE double fdim(double x, double y) { - return __spirv_ocl_fdim(x, y); -} -__DPCPP_SYCL_DEVICE float fdim(float x, float y) { - return __spirv_ocl_fdim(x, y); +__DPCPP_SYCL_DEVICE double scalbln(double x, long int exp) { + return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SPIRV_MAP_UNARY(floor, double); -__DPCPP_SPIRV_MAP_UNARY(floor, float); -__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { + +__DPCPP_SPIRV_MAP_BINARY(atan2); +__DPCPP_SPIRV_MAP_UNARY(atan); +__DPCPP_SPIRV_MAP_UNARY(atanh); +__DPCPP_SPIRV_MAP_UNARY(cbrt); +__DPCPP_SPIRV_MAP_UNARY(ceil); +__DPCPP_SPIRV_MAP_UNARY(cos); +__DPCPP_SPIRV_MAP_UNARY(cosh); +__DPCPP_SPIRV_MAP_UNARY(erfc); +__DPCPP_SPIRV_MAP_UNARY(erf); +__DPCPP_SPIRV_MAP_UNARY(exp2); +__DPCPP_SPIRV_MAP_UNARY(exp); +__DPCPP_SPIRV_MAP_UNARY(expm1); +__DPCPP_SPIRV_MAP_BINARY(fdim); +__DPCPP_SPIRV_MAP_UNARY(floor); + +__DPCPP_SYCL_DEVICE_C float fmaf(float x, float y, float z) { return __spirv_ocl_fma(x, y, z); } __DPCPP_SYCL_DEVICE float fma(float x, float y, float z) { return __spirv_ocl_fma(x, y, z); } -__DPCPP_SYCL_DEVICE double fmax(double x, double y) { - return __spirv_ocl_fmax(x, y); -} -__DPCPP_SYCL_DEVICE float fmax(float x, float y) { - return __spirv_ocl_fmax(x, y); -} -__DPCPP_SYCL_DEVICE double fmin(double x, double y) { - return __spirv_ocl_fmin(x, y); -} -__DPCPP_SYCL_DEVICE float fmin(float x, float y) { - return __spirv_ocl_fmin(x, y); -} -__DPCPP_SYCL_DEVICE double fmod(double x, double y) { - return __spirv_ocl_fmod(x, y); -} -__DPCPP_SYCL_DEVICE float fmod(float x, float y) { - return __spirv_ocl_fmod(x, y); +__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { + return __spirv_ocl_fma(x, y, z); } -__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { + +__DPCPP_SPIRV_MAP_BINARY(fmax); +__DPCPP_SPIRV_MAP_BINARY(fmin); +__DPCPP_SPIRV_MAP_BINARY(fmod); + +__DPCPP_SYCL_DEVICE_C float frexpf(float x, int *exp) { return __spirv_ocl_frexp(x, exp); } __DPCPP_SYCL_DEVICE float frexp(float x, int *exp) { return __spirv_ocl_frexp(x, exp); } -__DPCPP_SYCL_DEVICE double hypot(double x, double y) { - return __spirv_ocl_hypot(x, y); -} -__DPCPP_SYCL_DEVICE float hypot(float x, float y) { - return __spirv_ocl_hypot(x, y); +__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { + return __spirv_ocl_frexp(x, exp); } -__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } + +__DPCPP_SPIRV_MAP_BINARY(hypot); +__DPCPP_SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } __DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } -__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { +__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } + +__DPCPP_SYCL_DEVICE_C float ldexpf(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } __DPCPP_SYCL_DEVICE float ldexp(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SPIRV_MAP_UNARY(lgamma, double); -__DPCPP_SPIRV_MAP_UNARY(lgamma, float); -__DPCPP_SPIRV_MAP_UNARY(log10, double); -__DPCPP_SPIRV_MAP_UNARY(log10, float); -__DPCPP_SPIRV_MAP_UNARY(log1p, double); -__DPCPP_SPIRV_MAP_UNARY(log1p, float); -__DPCPP_SPIRV_MAP_UNARY(log2, double); -__DPCPP_SPIRV_MAP_UNARY(log2, float); -__DPCPP_SPIRV_MAP_UNARY(logb, double); -__DPCPP_SPIRV_MAP_UNARY(logb, float); -__DPCPP_SPIRV_MAP_UNARY(log, double); -__DPCPP_SPIRV_MAP_UNARY(log, float); -__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { +__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} + +__DPCPP_SPIRV_MAP_UNARY(lgamma); +__DPCPP_SPIRV_MAP_UNARY(log10); +__DPCPP_SPIRV_MAP_UNARY(log1p); +__DPCPP_SPIRV_MAP_UNARY(log2); +__DPCPP_SPIRV_MAP_UNARY(logb); +__DPCPP_SPIRV_MAP_UNARY(log); + +__DPCPP_SYCL_DEVICE_C float modff(float x, float *intpart) { return __spirv_ocl_modf(x, intpart); } __DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { return __spirv_ocl_modf(x, intpart); } -__DPCPP_SYCL_DEVICE double nextafter(double x, double y) { - return __spirv_ocl_nextafter(x, y); -} -__DPCPP_SYCL_DEVICE float nextafter(float x, float y) { - return __spirv_ocl_nextafter(x, y); -} -__DPCPP_SYCL_DEVICE double pow(double x, double y) { - return __spirv_ocl_pow(x, y); -} -__DPCPP_SYCL_DEVICE float pow(float x, float y) { - return __spirv_ocl_pow(x, y); -} -__DPCPP_SYCL_DEVICE double remainder(double x, double y) { - return __spirv_ocl_remainder(x, y); -} -__DPCPP_SYCL_DEVICE float remainder(float x, float y) { - return __spirv_ocl_remainder(x, y); +__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { + return __spirv_ocl_modf(x, intpart); } -__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + +__DPCPP_SPIRV_MAP_BINARY(nextafter); +__DPCPP_SPIRV_MAP_BINARY(pow); +__DPCPP_SPIRV_MAP_BINARY(remainder); + +__DPCPP_SYCL_DEVICE_C float remquof(float x, float y, int *q) { return __spirv_ocl_remquo(x, y, q); } __DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { return __spirv_ocl_remquo(x, y, q); } -__DPCPP_SPIRV_MAP_UNARY(rint, double); -__DPCPP_SPIRV_MAP_UNARY(rint, float); -__DPCPP_SPIRV_MAP_UNARY(round, double); -__DPCPP_SPIRV_MAP_UNARY(round, float); -__DPCPP_SPIRV_MAP_UNARY(sin, double); -__DPCPP_SPIRV_MAP_UNARY(sin, float); -__DPCPP_SPIRV_MAP_UNARY(sinh, double); -__DPCPP_SPIRV_MAP_UNARY(sinh, float); -__DPCPP_SPIRV_MAP_UNARY(sqrt, double); -__DPCPP_SPIRV_MAP_UNARY(sqrt, float); -__DPCPP_SPIRV_MAP_UNARY(tan, double); -__DPCPP_SPIRV_MAP_UNARY(tan, float); -__DPCPP_SPIRV_MAP_UNARY(tanh, double); -__DPCPP_SPIRV_MAP_UNARY(tanh, float); -__DPCPP_SPIRV_MAP_UNARY(tgamma, double); -__DPCPP_SPIRV_MAP_UNARY(tgamma, float); -__DPCPP_SPIRV_MAP_UNARY(trunc, double); -__DPCPP_SPIRV_MAP_UNARY(trunc, float); +__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SPIRV_MAP_UNARY(rint); +__DPCPP_SPIRV_MAP_UNARY(round); +__DPCPP_SPIRV_MAP_UNARY(sin); +__DPCPP_SPIRV_MAP_UNARY(sinh); +__DPCPP_SPIRV_MAP_UNARY(sqrt); +__DPCPP_SPIRV_MAP_UNARY(tan); +__DPCPP_SPIRV_MAP_UNARY(tanh); +__DPCPP_SPIRV_MAP_UNARY(tgamma); +__DPCPP_SPIRV_MAP_UNARY(trunc); __DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } @@ -194,32 +174,54 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION using ::abs; using ::acos; +using ::acosf; using ::acosh; +using ::acoshf; using ::asin; +using ::asinf; using ::asinh; +using ::asinhf; using ::atan; using ::atan2; +using ::atan2f; +using ::atanf; using ::atanh; +using ::atanhf; using ::cbrt; +using ::cbrtf; using ::ceil; +using ::ceilf; using ::div; +using ::labs; using ::ldiv; +using ::llabs; using ::lldiv; // using ::copysign; using ::cos; +using ::cosf; using ::cosh; +using ::coshf; using ::erf; using ::erfc; +using ::erfcf; +using ::erff; using ::exp; using ::exp2; +using ::exp2f; +using ::expf; using ::expm1; +using ::expm1f; using ::fabs; +using ::fabsf; using ::fdim; +using ::fdimf; using ::floor; -using ::fma; -using ::fmax; -using ::fmin; +using ::floorf; +using ::fmaf; +using ::fmaxf; +using ::fminf; using ::fmod; +using ::fmodf; // using ::fpclassify; using ::frexp; using ::hypot; @@ -237,36 +239,58 @@ using ::ilogb; // using ::labs; using ::ldexp; using ::lgamma; +using ::lgammaf; // using ::llabs; // using ::llrint; using ::log; using ::log10; +using ::log10f; using ::log1p; +using ::log1pf; using ::log2; +using ::log2f; using ::logb; +using ::logbf; +using ::logf; // using ::lrint; // using ::lround; // using ::llround; using ::modf; +using ::modff; // using ::nan; // using ::nanf; // using ::nearbyint; using ::nextafter; +using ::nextafterf; using ::pow; +using ::powf; using ::remainder; +using ::remainderf; using ::remquo; +using ::remquof; using ::rint; +using ::rintf; using ::round; +using ::roundf; using ::scalbln; +using ::scalblnf; using ::scalbn; +using ::scalbnf; // using ::signbit; using ::sin; +using ::sinf; using ::sinh; +using ::sinhf; using ::sqrt; +using ::sqrtf; using ::tan; +using ::tanf; using ::tanh; +using ::tanhf; using ::tgamma; +using ::tgammaf; using ::trunc; +using ::truncf; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD From b3574fb95b1ff26838379834a61e42313ced1807 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 15:50:09 +0100 Subject: [PATCH 05/24] [SYCL] Remove sycl-libdevice-cmath.cpp test This test was relying on the hack preventing LLVM intrinsics from being emitted so it doesn't work at all with the new approach. --- .../test/CodeGenSYCL/sycl-libdevice-cmath.cpp | 144 ------------------ 1 file changed, 144 deletions(-) delete mode 100644 clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp deleted file mode 100644 index 5c282449dc851..0000000000000 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// SYCL compilation uses libdevice in order to implement platform specific -// versions of funcs like cosf, logf, etc. In order for the libdevice funcs -// to be used, we need to make sure that llvm intrinsics such as llvm.cos.f32 -// are not emitted since many backends do not have lowerings for such -// intrinsics. This allows the driver to link in the libdevice definitions for -// cosf etc. later in the driver flow. - -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s - -#include "Inputs/sycl.hpp" - -extern "C" { -float scalbnf(float x, int n); -float logf(float x); -float expf(float x); -float frexpf(float x, int *exp); -float ldexpf(float x, int exp); -float log10f(float x); -float modff(float x, float *intpart); -float exp2f(float x); -float expm1f(float x); -int ilogbf(float x); -float log1pf(float x); -float log2f(float x); -float logbf(float x); -float sqrtf(float x); -float cbrtf(float x); -float hypotf(float x, float y); -float erff(float x); -float erfcf(float x); -float tgammaf(float x); -float lgammaf(float x); -float fmodf(float x, float y); -float remainderf(float x, float y); -float remquof(float x, float y, int *q); -float nextafterf(float x, float y); -float fdimf(float x, float y); -float fmaf(float x, float y, float z); -float sinf(float x); -float cosf(float x); -float tanf(float x); -float powf(float x, float y); -float acosf(float x); -float asinf(float x); -float atanf(float x); -float atan2f(float x, float y); -float coshf(float x); -float sinhf(float x); -float tanhf(float x); -float acoshf(float x); -float asinhf(float x); -float atanhf(float x); -}; - -// CHECK-NOT: llvm.abs. -// CHECK-NOT: llvm.scalbnf. -// CHECK-NOT: llvm.log. -// CHECK-NOT: llvm.exp. -// CHECK-NOT: llvm.frexp. -// CHECK-NOT: llvm.ldexp. -// CHECK-NOT: llvm.log10. -// CHECK-NOT: llvm.mod. -// CHECK-NOT: llvm.exp2. -// CHECK-NOT: llvm.expm1. -// CHECK-NOT: llvm.ilogb. -// CHECK-NOT: llvm.log1p. -// CHECK-NOT: llvm.log2. -// CHECK-NOT: llvm.logb. -// CHECK-NOT: llvm.sqrt. -// CHECK-NOT: llvm.cbrt. -// CHECK-NOT: llvm.hypot. -// CHECK-NOT: llvm.erf. -// CHECK-NOT: llvm.erfc. -// CHECK-NOT: llvm.tgamma. -// CHECK-NOT: llvm.lgamma. -// CHECK-NOT: llvm.fmod. -// CHECK-NOT: llvm.remainder. -// CHECK-NOT: llvm.remquo. -// CHECK-NOT: llvm.nextafter. -// CHECK-NOT: llvm.fdim. -// CHECK-NOT: llvm.fma. -// CHECK-NOT: llvm.sin. -// CHECK-NOT: llvm.cos. -// CHECK-NOT: llvm.tan. -// CHECK-NOT: llvm.pow. -// CHECK-NOT: llvm.acos. -// CHECK-NOT: llvm.asin. -// CHECK-NOT: llvm.atan. -// CHECK-NOT: llvm.atan2. -// CHECK-NOT: llvm.cosh. -// CHECK-NOT: llvm.sinh. -// CHECK-NOT: llvm.tanh. -// CHECK-NOT: llvm.acosh. -// CHECK-NOT: llvm.asinh. -// CHECK-NOT: llvm.atanh. -void sycl_kernel(float *a, int *b) { - sycl::queue{}.submit([&](sycl::handler &cgh) { - cgh.single_task([=]() { - a[0] = scalbnf(a[0], b[0]); - a[0] = logf(a[0]); - a[0] = expf(a[0]); - a[0] = frexpf(a[0], b); - a[0] = ldexpf(a[0], b[0]); - a[0] = log10f(a[0]); - a[0] = modff(a[0], a); - a[0] = exp2f(a[0]); - a[0] = expm1f(a[0]); - a[0] = ilogbf(a[0]); - a[0] = log1pf(a[0]); - a[0] = log2f(a[0]); - a[0] = logbf(a[0]); - a[0] = sqrtf(a[0]); - a[0] = cbrtf(a[0]); - a[0] = hypotf(a[0], a[0]); - a[0] = erff(a[0]); - a[0] = erfcf(a[0]); - a[0] = tgammaf(a[0]); - a[0] = lgammaf(a[0]); - a[0] = fmodf(a[0], a[0]); - a[0] = remainderf(a[0], a[0]); - a[0] = remquof(a[0], a[0], b); - a[0] = nextafterf(a[0], a[0]); - a[0] = fdimf(a[0], a[0]); - a[0] = fmaf(a[0], a[0], a[0]); - a[0] = sinf(a[0]); - a[0] = cosf(a[0]); - a[0] = tanf(a[0]); - a[0] = powf(a[0], a[0]); - a[0] = acosf(a[0]); - a[0] = asinf(a[0]); - a[0] = atanf(a[0]); - a[0] = atan2f(a[0], a[0]); - a[0] = coshf(a[0]); - a[0] = sinhf(a[0]); - a[0] = tanhf(a[0]); - a[0] = acoshf(a[0]); - a[0] = asinhf(a[0]); - a[0] = atanhf(a[0]); - }); - }); -} From 3d2aa445e8c7a965a7c1416966f6968742e8af39 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 17:51:17 +0100 Subject: [PATCH 06/24] [SYCL] Add missing abs --- sycl/include/sycl/stl_wrappers/cmath-fallback.h | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 93d0cb5e912f4..a433bbe43bd1d 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -29,6 +29,7 @@ __DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE int abs(int n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } From 8ce1d93ba39b9792e380e0d2093107e4d3c15bf5 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 9 Jun 2025 15:23:38 +0100 Subject: [PATCH 07/24] [SYCL] Fix overloadble requirement for sycl_device_only --- clang/lib/Sema/SemaDecl.cpp | 4 ++++ sycl/include/sycl/stl_wrappers/cmath-fallback.h | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 49eef7e6f05e2..3ebffbec4c4c8 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7354,6 +7354,10 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) { if (S.getLangOpts().CUDA && (D->template hasAttr() || D->template hasAttr())) return false; + + // So does SYCL's device_only attribute. + if (S.getLangOpts().isSYCL() && D->template hasAttr()) + return false; } return D->isExternC(); } diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index a433bbe43bd1d..25e8f543154f1 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -3,10 +3,9 @@ #ifdef __SYCL_DEVICE_ONLY__ -#define __DPCPP_SYCL_DEVICE \ - __attribute__((sycl_device_only, always_inline, overloadable)) +#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) #define __DPCPP_SYCL_DEVICE_C \ - extern "C" __attribute__((sycl_device_only, always_inline, overloadable)) + extern "C" __attribute__((sycl_device_only, always_inline)) #define __DPCPP_SPIRV_MAP_UNARY(NAME) \ __DPCPP_SYCL_DEVICE_C float NAME##f(float x) { \ From d3b29882a73606931ce70d203548c2779d564b67 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 9 Jun 2025 17:25:24 +0100 Subject: [PATCH 08/24] [SYCL] Add device only docs --- clang/include/clang/Basic/Attr.td | 2 +- clang/include/clang/Basic/AttrDocs.td | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 27a190f5c1ef9..6a10a02d877a8 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1611,7 +1611,7 @@ def SYCLDeviceOnly : InheritableAttr { let Spellings = [GNU<"sycl_device_only">]; let Subjects = SubjectList<[Function]>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; - let Documentation = [Undocumented]; + let Documentation = [SYCLDeviceOnlyDocs]; } def SYCLGlobalVar : InheritableAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 5234c7ee02fff..17594e5710419 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4518,6 +4518,17 @@ implicitly inherit this attribute. }]; } +def SYCLDeviceOnlyDocs : Documentation { + let Category = DocCatFunction; + let Heading = "sycl_device_only"; + let Content = [{ +This attribute can only be applied to functions and indicates that the function +is for the device only. This attribute allows to provide a device specific +overload of an existing function. All ``sycl_device_only`` function callees +implicitly inherit this attribute. + }]; +} + def RISCVInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (RISC-V)"; From 350aec6b0c55ff0b14e4b1883517ca150624fcb8 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 9 Jun 2025 17:46:00 +0100 Subject: [PATCH 09/24] [SYCL] Add initial test for sycl-device-only --- clang/test/CodeGenSYCL/sycl-device-only.cpp | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 clang/test/CodeGenSYCL/sycl-device-only.cpp diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp new file mode 100644 index 0000000000000..86db368b75af1 --- /dev/null +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKD +// RUN: %clang_cc1 -fsycl-is-host -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKH +// Test code generation for sycl_device_only attribute. + +// Verify that the device overload is used on device. +// +// CHECK-LABEL: _Z3fooi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int foo(int a) { return a + 10; } + +#ifdef __SYCL_DEVICE_ONLY__ +__attribute__((sycl_device_only)) int foo(int a) { return a + 20; } +#endif + +__attribute__((sycl_device)) int bar(int b) { + return foo(b); +} + + +// Verify that in extern C the attribute enables mangling. +extern "C" { +// CHECK-LABEL: _Z3fooci +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int fooc(int a) { return a + 10; } +#ifdef __SYCL_DEVICE_ONLY__ +__attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } +#endif + +__attribute__((sycl_device)) int barc(int b) { + return fooc(b); +} +} From 2c119aed1575141544a8fe32ae2f4bfd18c2b852 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 10 Jun 2025 17:14:45 +0100 Subject: [PATCH 10/24] [SYCL] Add diagnostic for host side sycl_device_only --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 ++++ clang/lib/Sema/SemaDeclAttr.cpp | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d2583e96b2922..e7bdacf906a11 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8048,6 +8048,10 @@ def err_sycl_device_global_not_publicly_accessible: Error< def err_sycl_device_global_array : Error< "'device_global' array is not allowed">; +def err_sycl_device_only_attr + : Error<"'sycl_device_only' functions are not allowed in host side. Please " + "guard them with __SYCL_DEVICE_ONLY__.">; + def err_unexpected_interface : Error< "unexpected interface name %0: expected expression">; def err_ref_non_value : Error<"%0 does not refer to a value">; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index b076cb60db269..b61b256186f55 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8053,6 +8053,12 @@ void Sema::ProcessDeclAttributeList( D->setInvalidDecl(); } + // Do not permit 'sycl_device_only' functions in host code + if (getLangOpts().SYCLIsHost && D->hasAttr()) { + Diag(D->getLocation(), diag::err_sycl_device_only_attr); + D->setInvalidDecl(); + } + // Do this check after processing D's attributes because the attribute // objc_method_family can change whether the given method is in the init // family, and it can be applied after objc_designated_initializer. This is a From 605eed6c777a9197a5df22733b3036d96a8baebe Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 11 Jun 2025 11:07:11 +0100 Subject: [PATCH 11/24] [SYCL] Block sycl_device_only emission on host side --- clang/lib/CodeGen/CodeGenModule.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9478b836e1fd0..b5a485ee28ca9 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4323,6 +4323,12 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + // Don't emit 'sycl_device_only' function in SYCL host compilation. + if (LangOpts.SYCLIsHost && isa(Global) && + Global->hasAttr()) { + return; + } + if (LangOpts.OpenMP) { // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) From 172e7f7e5d99002a34f990cc0b79eabbf759f1c2 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 11 Jun 2025 11:07:44 +0100 Subject: [PATCH 12/24] Revert "[SYCL] Add diagnostic for host side sycl_device_only" This reverts commit af224a08d299adee6263c0cdf953ab7f8eee568f. --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 ---- clang/lib/Sema/SemaDeclAttr.cpp | 6 ------ 2 files changed, 10 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e7bdacf906a11..d2583e96b2922 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8048,10 +8048,6 @@ def err_sycl_device_global_not_publicly_accessible: Error< def err_sycl_device_global_array : Error< "'device_global' array is not allowed">; -def err_sycl_device_only_attr - : Error<"'sycl_device_only' functions are not allowed in host side. Please " - "guard them with __SYCL_DEVICE_ONLY__.">; - def err_unexpected_interface : Error< "unexpected interface name %0: expected expression">; def err_ref_non_value : Error<"%0 does not refer to a value">; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index b61b256186f55..b076cb60db269 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8053,12 +8053,6 @@ void Sema::ProcessDeclAttributeList( D->setInvalidDecl(); } - // Do not permit 'sycl_device_only' functions in host code - if (getLangOpts().SYCLIsHost && D->hasAttr()) { - Diag(D->getLocation(), diag::err_sycl_device_only_attr); - D->setInvalidDecl(); - } - // Do this check after processing D's attributes because the attribute // objc_method_family can change whether the given method is in the init // family, and it can be applied after objc_designated_initializer. This is a From bb9fc66b465110d1e61f7f4b489a90d43234c2c0 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 11 Jun 2025 11:56:13 +0100 Subject: [PATCH 13/24] [SYCL] Cleanup documentation and comments --- clang/include/clang/Basic/AttrDocs.td | 7 ++++--- clang/test/CodeGenSYCL/sycl-device-only.cpp | 8 ++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 17594e5710419..55d889f3efa1f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4523,9 +4523,10 @@ def SYCLDeviceOnlyDocs : Documentation { let Heading = "sycl_device_only"; let Content = [{ This attribute can only be applied to functions and indicates that the function -is for the device only. This attribute allows to provide a device specific -overload of an existing function. All ``sycl_device_only`` function callees -implicitly inherit this attribute. +is only available for the device. It allows functions marked with it to +overload existing functions without the attribute, in which case the overload +with the attribute will be used on the device side and the overload without +will be used on the host side. }]; } diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index 86db368b75af1..32331665f5ece 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -8,26 +8,22 @@ // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 int foo(int a) { return a + 10; } - -#ifdef __SYCL_DEVICE_ONLY__ __attribute__((sycl_device_only)) int foo(int a) { return a + 20; } -#endif +// Use a `sycl_device` function as entry point __attribute__((sycl_device)) int bar(int b) { return foo(b); } - // Verify that in extern C the attribute enables mangling. extern "C" { // CHECK-LABEL: _Z3fooci // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 int fooc(int a) { return a + 10; } -#ifdef __SYCL_DEVICE_ONLY__ __attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } -#endif +// Use a `sycl_device` function as entry point __attribute__((sycl_device)) int barc(int b) { return fooc(b); } From 06474cbb34b8c91f46dad2af40bd8b4fc7bfd31e Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 12 Jun 2025 18:04:08 +0100 Subject: [PATCH 14/24] [SYCL] Fix attribute emission handling --- clang/include/clang/Basic/AttrDocs.td | 4 ++- clang/lib/CodeGen/CodeGenModule.cpp | 37 +++++++++++++++++++++ clang/test/CodeGenSYCL/sycl-device-only.cpp | 36 ++++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 55d889f3efa1f..000a0e522e8cc 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4526,7 +4526,9 @@ This attribute can only be applied to functions and indicates that the function is only available for the device. It allows functions marked with it to overload existing functions without the attribute, in which case the overload with the attribute will be used on the device side and the overload without -will be used on the host side. +will be used on the host side. Note: as opposed to ``sycl_device`` this does +not mark the function as being exported, both attributes can be used together +if needed. }]; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index b5a485ee28ca9..3850247242fb3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4417,6 +4417,43 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + + // When using SYCLDeviceOnlyAttr, there can be two functions with the same + // mangling, the host function and the device overload. So when compiling for + // device we need to make sure we're selecting the SYCLDeviceOnlyAttr + // overload and dropping the host overload. + if (LangOpts.SYCLIsDevice) { + StringRef MangledName = getMangledName(GD); + auto DDI = DeferredDecls.find(MangledName); + // If we have an existing declaration with the same mangling for this + // symbol it may be a SYCLDeviceOnlyAttr case. + if (DDI != DeferredDecls.end()) { + auto *G = cast(DeferredDecls[MangledName].getDecl()); + + if (!G->hasAttr() && + Global->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed and the device only + // declaration is also a sycl external declaration, remove the host + // variant and skip. The device only variant will be generated later + // as it's marked sycl external. + DeferredDecls.erase(DDI); + return; + } else if (!G->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed, replace it with the + // device only declaration. + DeferredDecls[MangledName] = GD; + return; + } else if (!Global->hasAttr() && + G->hasAttr()) { + // If the device only declaration was already processed, skip the + // host declaration. + return; + } + } + } + // clang::ParseAST ensures that we emit the SYCL devices at the end, so // anything that is a device (or indirectly called) will be handled later. if (LangOpts.SYCLIsDevice && MustBeEmitted(Global)) { diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index 32331665f5ece..51e76eb5deeee 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -15,6 +15,19 @@ __attribute__((sycl_device)) int bar(int b) { return foo(b); } +// Verify that the order of declaration doesn't change the behavior. +// +// CHECK-LABEL: _Z3fooswapi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +__attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; } +int fooswap(int a) { return a + 10; } + +// Use a `sycl_device` function as entry point +__attribute__((sycl_device)) int barswap(int b) { + return fooswap(b); +} + // Verify that in extern C the attribute enables mangling. extern "C" { // CHECK-LABEL: _Z3fooci @@ -28,3 +41,26 @@ __attribute__((sycl_device)) int barc(int b) { return fooc(b); } } + +// Check that both attributes can work together +// CHECK-LABEL: _Z3fooai +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int fooa(int a) { return a + 10; } +__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { return a + 20; } + +// Use a `sycl_device` function as entry point +__attribute__((sycl_device)) int bara(int b) { + return fooa(b); +} + +// CHECK-LABEL: _Z3fooaswapi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { return a + 20; } +int fooaswap(int a) { return a + 10; } + +// Use a `sycl_device` function as entry point +__attribute__((sycl_device)) int baraswap(int b) { + return fooaswap(b); +} From 59c6edfdb48e50fbe1986132cd1acecf6094fe25 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 12 Jun 2025 18:10:32 +0100 Subject: [PATCH 15/24] [SYCL] Fix formatting --- clang/lib/CodeGen/CodeGenModule.cpp | 57 ++++++++++----------- clang/test/CodeGenSYCL/sycl-device-only.cpp | 28 ++++------ 2 files changed, 39 insertions(+), 46 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 3850247242fb3..43078e9e29545 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4417,41 +4417,40 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } - // When using SYCLDeviceOnlyAttr, there can be two functions with the same // mangling, the host function and the device overload. So when compiling for // device we need to make sure we're selecting the SYCLDeviceOnlyAttr // overload and dropping the host overload. if (LangOpts.SYCLIsDevice) { - StringRef MangledName = getMangledName(GD); - auto DDI = DeferredDecls.find(MangledName); - // If we have an existing declaration with the same mangling for this - // symbol it may be a SYCLDeviceOnlyAttr case. - if (DDI != DeferredDecls.end()) { - auto *G = cast(DeferredDecls[MangledName].getDecl()); - - if (!G->hasAttr() && - Global->hasAttr() && - Global->hasAttr()) { - // If the host declaration was already processed and the device only - // declaration is also a sycl external declaration, remove the host - // variant and skip. The device only variant will be generated later - // as it's marked sycl external. - DeferredDecls.erase(DDI); - return; - } else if (!G->hasAttr() && - Global->hasAttr()) { - // If the host declaration was already processed, replace it with the - // device only declaration. - DeferredDecls[MangledName] = GD; - return; - } else if (!Global->hasAttr() && - G->hasAttr()) { - // If the device only declaration was already processed, skip the - // host declaration. - return; - } + StringRef MangledName = getMangledName(GD); + auto DDI = DeferredDecls.find(MangledName); + // If we have an existing declaration with the same mangling for this + // symbol it may be a SYCLDeviceOnlyAttr case. + if (DDI != DeferredDecls.end()) { + auto *G = cast(DeferredDecls[MangledName].getDecl()); + + if (!G->hasAttr() && + Global->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed and the device only + // declaration is also a sycl external declaration, remove the host + // variant and skip. The device only variant will be generated later + // as it's marked sycl external. + DeferredDecls.erase(DDI); + return; + } else if (!G->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed, replace it with the + // device only declaration. + DeferredDecls[MangledName] = GD; + return; + } else if (!Global->hasAttr() && + G->hasAttr()) { + // If the device only declaration was already processed, skip the + // host declaration. + return; } + } } // clang::ParseAST ensures that we emit the SYCL devices at the end, so diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index 51e76eb5deeee..f9a97cb803fae 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -11,9 +11,7 @@ int foo(int a) { return a + 10; } __attribute__((sycl_device_only)) int foo(int a) { return a + 20; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int bar(int b) { - return foo(b); -} +__attribute__((sycl_device)) int bar(int b) { return foo(b); } // Verify that the order of declaration doesn't change the behavior. // @@ -24,9 +22,7 @@ __attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; } int fooswap(int a) { return a + 10; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int barswap(int b) { - return fooswap(b); -} +__attribute__((sycl_device)) int barswap(int b) { return fooswap(b); } // Verify that in extern C the attribute enables mangling. extern "C" { @@ -37,9 +33,7 @@ int fooc(int a) { return a + 10; } __attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int barc(int b) { - return fooc(b); -} +__attribute__((sycl_device)) int barc(int b) { return fooc(b); } } // Check that both attributes can work together @@ -47,20 +41,20 @@ __attribute__((sycl_device)) int barc(int b) { // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 int fooa(int a) { return a + 10; } -__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { return a + 20; } +__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { + return a + 20; +} // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int bara(int b) { - return fooa(b); -} +__attribute__((sycl_device)) int bara(int b) { return fooa(b); } // CHECK-LABEL: _Z3fooaswapi // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 -__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { return a + 20; } +__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { + return a + 20; +} int fooaswap(int a) { return a + 10; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int baraswap(int b) { - return fooaswap(b); -} +__attribute__((sycl_device)) int baraswap(int b) { return fooaswap(b); } From 1a6e0f56d9b5a4ccde42ad4a2066e84af8815f61 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 10:09:11 +0100 Subject: [PATCH 16/24] [SYCL] Rename variable --- clang/lib/CodeGen/CodeGenModule.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 43078e9e29545..5c03aacd25c22 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4427,9 +4427,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // If we have an existing declaration with the same mangling for this // symbol it may be a SYCLDeviceOnlyAttr case. if (DDI != DeferredDecls.end()) { - auto *G = cast(DeferredDecls[MangledName].getDecl()); + auto *PreviousGlobal = + cast(DeferredDecls[MangledName].getDecl()); - if (!G->hasAttr() && + if (!PreviousGlobal->hasAttr() && Global->hasAttr() && Global->hasAttr()) { // If the host declaration was already processed and the device only @@ -4438,14 +4439,14 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // as it's marked sycl external. DeferredDecls.erase(DDI); return; - } else if (!G->hasAttr() && + } else if (!PreviousGlobal->hasAttr() && Global->hasAttr()) { // If the host declaration was already processed, replace it with the // device only declaration. DeferredDecls[MangledName] = GD; return; - } else if (!Global->hasAttr() && - G->hasAttr()) { + } else if (PreviousGlobal->hasAttr() && + !Global->hasAttr()) { // If the device only declaration was already processed, skip the // host declaration. return; From 5df06cf1d8784fef80652f9d4e6169793b3ae4d6 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 14:58:37 +0100 Subject: [PATCH 17/24] [SYCL] More fallback header improvements --- libdevice/cmath_wrapper.cpp | 18 ------------ libdevice/cmath_wrapper_fp64.cpp | 18 ------------ .../sycl/stl_wrappers/cmath-fallback.h | 29 +++++++++++++++++-- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index 3c6c1b97fa1c5..a084a86883767 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -189,25 +189,7 @@ float asinhf(float x) { return __devicelib_asinhf(x); } DEVICE_EXTERN_C_INLINE float atanhf(float x) { return __devicelib_atanhf(x); } -#ifdef __NVPTX__ -extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); -DEVICE_EXTERN_C_INLINE -float nearbyintf(float x) { return __nv_nearbyintf(x); } - -extern "C" SYCL_EXTERNAL float __nv_rintf(float); -DEVICE_EXTERN_C_INLINE -float rintf(float x) { return __nv_rintf(x); } -#elif defined(__AMDGCN__) -extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); -DEVICE_EXTERN_C_INLINE -float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } - -extern "C" SYCL_EXTERNAL float __ocml_rint_f32(float); -DEVICE_EXTERN_C_INLINE -float rintf(float x) { return __ocml_rint_f32(x); } -#else DEVICE_EXTERN_C_INLINE float rintf(float x) { return __spirv_ocl_rint(x); } -#endif #endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 81ba3e710ec6d..855317bcf3f4b 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -179,26 +179,8 @@ double atanh(double x) { return __devicelib_atanh(x); } DEVICE_EXTERN_C_INLINE double scalbn(double x, int exp) { return __devicelib_scalbn(x, exp); } -#ifdef __NVPTX__ -extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); -DEVICE_EXTERN_C_INLINE -double nearbyint(double x) { return __nv_nearbyint(x); } - -extern "C" SYCL_EXTERNAL double __nv_rint(double); -DEVICE_EXTERN_C_INLINE -double rint(double x) { return __nv_rint(x); } -#elif defined(__AMDGCN__) -extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); -DEVICE_EXTERN_C_INLINE -double nearbyint(double x) { return __ocml_nearbyint_f64(x); } - -extern "C" SYCL_EXTERNAL double __ocml_rint_f64(double); -DEVICE_EXTERN_C_INLINE -double rint(double x) { return __ocml_rint_f64(x); } -#else DEVICE_EXTERN_C_INLINE double rint(double x) { return __spirv_ocl_rint(x); } -#endif #if defined(_MSC_VER) #include diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 25e8f543154f1..5ff5cb8461575 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -36,6 +36,7 @@ __DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } +__DPCPP_SPIRV_MAP_BINARY(copysign); __DPCPP_SPIRV_MAP_UNARY(acos); __DPCPP_SPIRV_MAP_UNARY(acosh); __DPCPP_SPIRV_MAP_UNARY(asin); @@ -163,6 +164,21 @@ __DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { return {x / y, x % y}; } +#if defined(__NVPTX__) +extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); +extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } +#elif defined(__AMDGCN__) +extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); +extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __ocml_nearbyint_f64(x); } +#endif + + #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else @@ -172,6 +188,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif +#if defined(__NVPTX__) || defined(__AMDGCN__) +using ::nearbyint; +using ::nearbyintf; +#endif + using ::abs; using ::acos; using ::acosf; @@ -196,7 +217,8 @@ using ::labs; using ::ldiv; using ::llabs; using ::lldiv; -// using ::copysign; +using ::copysign; +using ::copysignf; using ::cos; using ::cosf; using ::cosh; @@ -225,7 +247,9 @@ using ::fmodf; // using ::fpclassify; using ::frexp; using ::hypot; +using ::hypotf; using ::ilogb; +using ::ilogbf; // using ::isfinite; // using ::isgreater; // using ::isgreaterequal; @@ -238,11 +262,13 @@ using ::ilogb; // using ::isunordered; // using ::labs; using ::ldexp; +using ::ldexpf; using ::lgamma; using ::lgammaf; // using ::llabs; // using ::llrint; using ::log; +using ::logf; using ::log10; using ::log10f; using ::log1p; @@ -251,7 +277,6 @@ using ::log2; using ::log2f; using ::logb; using ::logbf; -using ::logf; // using ::lrint; // using ::lround; // using ::llround; From e0cd39995818ee40933d207fc5006d6a30db4e20 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 15:02:55 +0100 Subject: [PATCH 18/24] [SYCL] Add nearbyint and rint to devicelib tests --- sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp | 13 ++++++++----- sycl/test-e2e/DeviceLib/cmath_test.cpp | 14 ++++++++------ sycl/test-e2e/DeviceLib/math_fp64_test.cpp | 13 ++++++++----- sycl/test-e2e/DeviceLib/math_test.cpp | 13 ++++++++----- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index bda9ce2ff1ced..a7c030de66c15 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -21,13 +21,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 74 +#define TEST_NUM 76 double ref[TEST_NUM] = { - 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, - 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 1.0, 1.0, 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, + 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, + 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -62,6 +62,9 @@ template void device_cmath_test(s::queue &deviceQueue) { T minus_infinity = -INFINITY; double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; + + res_access[i++] = std::nearbyint(0.9); + res_access[i++] = std::rint(0.9); res_access[i++] = std::scalbln(1.5, 2); res_access[i++] = sycl::exp10(2.0); res_access[i++] = sycl::rsqrt(4.0); diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 6a453e56f704f..0c1687e26044c 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -24,13 +24,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 70 +#define TEST_NUM 72 -float ref[TEST_NUM] = {100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, - 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, - 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, - 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref[TEST_NUM] = { + 1.0f, 1.0f, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, 0, 0, + 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, + 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -60,6 +60,8 @@ template void device_cmath_test_1(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; + res_access[i++] = std::nearbyint(0.9f); + res_access[i++] = std::rint(0.9f); res_access[i++] = sycl::exp10(2.0f); res_access[i++] = sycl::rsqrt(4.0f); res_access[i++] = std::trunc(1.2f); diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 86dad1b3c0d3f..099f420c3aeb6 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -20,12 +20,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 61 +#define TEST_NUM 63 -double ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, - 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +double ref_val[TEST_NUM] = {1.0, 1.0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, + 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, + 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, + NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -61,6 +62,8 @@ void device_math_test(s::queue &deviceQueue) { double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; + res_access[i++] = nearbyint(0.9); + res_access[i++] = rint(0.9); res_access[i++] = cos(0.0); res_access[i++] = sin(0.0); res_access[i++] = log(1.0); diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index 029409b617473..d7f7532c09a69 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -18,12 +18,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 59 +#define TEST_NUM 61 -float ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, - 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref_val[TEST_NUM] = {1.0f, 1.0f, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, + 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, + 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -53,6 +54,8 @@ void device_math_test(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; + res_access[i++] = nearbyintf(0.9); + res_access[i++] = rintf(0.9); res_access[i++] = cosf(0.0f); res_access[i++] = sinf(0.0f); res_access[i++] = logf(1.0f); From 8affa7a55f69068d7982975a410ba442f97d67d4 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 16:32:17 +0100 Subject: [PATCH 19/24] [SYCL] Fix formatting --- .../sycl/stl_wrappers/cmath-fallback.h | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 5ff5cb8461575..8f074257bd17f 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -173,12 +173,15 @@ __DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } #elif defined(__AMDGCN__) extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); -__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { + return __ocml_nearbyint_f32(x); +} __DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } -__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __ocml_nearbyint_f64(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { + return __ocml_nearbyint_f64(x); +} #endif - #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else @@ -212,17 +215,13 @@ using ::cbrt; using ::cbrtf; using ::ceil; using ::ceilf; -using ::div; -using ::labs; -using ::ldiv; -using ::llabs; -using ::lldiv; using ::copysign; using ::copysignf; using ::cos; using ::cosf; using ::cosh; using ::coshf; +using ::div; using ::erf; using ::erfc; using ::erfcf; @@ -244,6 +243,10 @@ using ::fmaxf; using ::fminf; using ::fmod; using ::fmodf; +using ::labs; +using ::ldiv; +using ::llabs; +using ::lldiv; // using ::fpclassify; using ::frexp; using ::hypot; @@ -268,7 +271,6 @@ using ::lgammaf; // using ::llabs; // using ::llrint; using ::log; -using ::logf; using ::log10; using ::log10f; using ::log1p; @@ -277,6 +279,7 @@ using ::log2; using ::log2f; using ::logb; using ::logbf; +using ::logf; // using ::lrint; // using ::lround; // using ::llround; From 8d9733db16b7ebfe5b5a44bb469c1ad7bea5fcea Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 17:29:15 +0100 Subject: [PATCH 20/24] [SYCL] Add SYCL_EXTERNAL to neabyint and rint --- sycl/include/sycl/stl_wrappers/cmath | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index 8c626e4b06606..e6c3fdfca7a03 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -109,6 +109,10 @@ extern __DPCPP_SYCL_EXTERNAL double atanh(double x); extern __DPCPP_SYCL_EXTERNAL double frexp(double x, int *exp); extern __DPCPP_SYCL_EXTERNAL double ldexp(double x, int exp); extern __DPCPP_SYCL_EXTERNAL double hypot(double x, double y); +extern __DPCPP_SYCL_EXTERNAL float nearbyintf(float x); +extern __DPCPP_SYCL_EXTERNAL double nearbyint(double x); +extern __DPCPP_SYCL_EXTERNAL float rintf(float x); +extern __DPCPP_SYCL_EXTERNAL double rint(double x); } #ifdef __GLIBC__ From ec84f57a6b0ae6f140d7022571a62be76ba7b745 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 16 Jun 2025 13:29:26 +0100 Subject: [PATCH 21/24] [SYCL][E2E] Remove nearbyint from test and stl wrapper This doesn't map to a spir-v built-in --- sycl/include/sycl/stl_wrappers/cmath | 2 -- sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp | 11 +++++------ sycl/test-e2e/DeviceLib/cmath_test.cpp | 13 ++++++------- sycl/test-e2e/DeviceLib/math_fp64_test.cpp | 12 +++++------- sycl/test-e2e/DeviceLib/math_test.cpp | 12 +++++------- 5 files changed, 21 insertions(+), 29 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index e6c3fdfca7a03..4240e7458d0b0 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -109,8 +109,6 @@ extern __DPCPP_SYCL_EXTERNAL double atanh(double x); extern __DPCPP_SYCL_EXTERNAL double frexp(double x, int *exp); extern __DPCPP_SYCL_EXTERNAL double ldexp(double x, int exp); extern __DPCPP_SYCL_EXTERNAL double hypot(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float nearbyintf(float x); -extern __DPCPP_SYCL_EXTERNAL double nearbyint(double x); extern __DPCPP_SYCL_EXTERNAL float rintf(float x); extern __DPCPP_SYCL_EXTERNAL double rint(double x); diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index a7c030de66c15..833b3aea6203a 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -21,13 +21,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 76 +#define TEST_NUM 75 double ref[TEST_NUM] = { - 1.0, 1.0, 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, - 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, - 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 1.0, 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, + 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, + 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -63,7 +63,6 @@ template void device_cmath_test(s::queue &deviceQueue) { double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; - res_access[i++] = std::nearbyint(0.9); res_access[i++] = std::rint(0.9); res_access[i++] = std::scalbln(1.5, 2); res_access[i++] = sycl::exp10(2.0); diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 0c1687e26044c..c5c58f09023d4 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -24,13 +24,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 72 +#define TEST_NUM 71 -float ref[TEST_NUM] = { - 1.0f, 1.0f, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, - 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref[TEST_NUM] = {1.0f, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, + 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, + 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -60,7 +60,6 @@ template void device_cmath_test_1(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; - res_access[i++] = std::nearbyint(0.9f); res_access[i++] = std::rint(0.9f); res_access[i++] = sycl::exp10(2.0f); res_access[i++] = sycl::rsqrt(4.0f); diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 099f420c3aeb6..5f160b9f2a7b1 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -20,13 +20,12 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 63 +#define TEST_NUM 62 -double ref_val[TEST_NUM] = {1.0, 1.0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, - 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, - 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, - NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +double ref_val[TEST_NUM] = { + 1.0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -62,7 +61,6 @@ void device_math_test(s::queue &deviceQueue) { double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; - res_access[i++] = nearbyint(0.9); res_access[i++] = rint(0.9); res_access[i++] = cos(0.0); res_access[i++] = sin(0.0); diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index d7f7532c09a69..c9a98f468225d 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -18,13 +18,12 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 61 +#define TEST_NUM 60 -float ref_val[TEST_NUM] = {1.0f, 1.0f, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, - 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, - 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref_val[TEST_NUM] = { + 1.0f, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, + 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -54,7 +53,6 @@ void device_math_test(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; - res_access[i++] = nearbyintf(0.9); res_access[i++] = rintf(0.9); res_access[i++] = cosf(0.0f); res_access[i++] = sinf(0.0f); From 9db7bde66616a24228eb1132424cc6bf09340b75 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 18 Jun 2025 18:05:34 +0100 Subject: [PATCH 22/24] [SYCL] Don't leak macros from cmath-fallback.h --- sycl/include/sycl/stl_wrappers/cmath-fallback.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 8f074257bd17f..a056e501dd1c5 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -329,7 +329,9 @@ _GLIBCXX_END_NAMESPACE_VERSION } // namespace std #endif +#undef __DPCPP_SPIRV_MAP_BINARY #undef __DPCPP_SPIRV_MAP_UNARY +#undef __DPCPP_SYCL_DEVICE_C #undef __DPCPP_SYCL_DEVICE #endif #endif From d8a273b9193bdc1a085a806cebbddecb1a73fcfe Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 25 Jun 2025 14:52:42 +0100 Subject: [PATCH 23/24] [SYCL] Fix if/else/return formatting --- clang/lib/CodeGen/CodeGenModule.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 5c03aacd25c22..310e0c72ffb1e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4430,25 +4430,29 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { auto *PreviousGlobal = cast(DeferredDecls[MangledName].getDecl()); + // If the host declaration was already processed and the device only + // declaration is also a sycl external declaration, remove the host + // variant and skip. The device only variant will be generated later + // as it's marked sycl external. if (!PreviousGlobal->hasAttr() && Global->hasAttr() && Global->hasAttr()) { - // If the host declaration was already processed and the device only - // declaration is also a sycl external declaration, remove the host - // variant and skip. The device only variant will be generated later - // as it's marked sycl external. DeferredDecls.erase(DDI); return; - } else if (!PreviousGlobal->hasAttr() && - Global->hasAttr()) { - // If the host declaration was already processed, replace it with the - // device only declaration. + } + + // If the host declaration was already processed, replace it with the + // device only declaration. + if (!PreviousGlobal->hasAttr() && + Global->hasAttr()) { DeferredDecls[MangledName] = GD; return; - } else if (PreviousGlobal->hasAttr() && - !Global->hasAttr()) { - // If the device only declaration was already processed, skip the - // host declaration. + } + + // If the device only declaration was already processed, skip the + // host declaration. + if (PreviousGlobal->hasAttr() && + !Global->hasAttr()) { return; } } From f53840b3b640db793adff7aa54bf91a6ab675a95 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 25 Jun 2025 14:53:18 +0100 Subject: [PATCH 24/24] [SYCL] Cleanup use of isSYCL() in SemaOverload --- clang/lib/Sema/SemaOverload.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 7cb24f06ae1e2..91c9106be81c1 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -11030,8 +11030,7 @@ bool clang::isBetterOverloadCandidate( // In SYCL device compilation mode prefer the overload with the // SYCLDeviceOnly attribute. - if (S.getLangOpts().isSYCL() && S.getLangOpts().SYCLIsDevice && - Cand1.Function && Cand2.Function) { + if (S.getLangOpts().SYCLIsDevice && Cand1.Function && Cand2.Function) { if (Cand1.Function->hasAttr() != Cand2.Function->hasAttr()) { return Cand1.Function->hasAttr(); @@ -11393,7 +11392,7 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl( CudaExcludeWrongSideCandidates(S, Candidates); // In SYCL host compilation remove candidates marked SYCLDeviceOnly. - if (S.getLangOpts().isSYCL() && !S.getLangOpts().SYCLIsDevice) { + if (S.getLangOpts().SYCLIsHost) { auto IsDeviceCand = [&](const OverloadCandidate *Cand) { return Cand->Viable && Cand->Function && Cand->Function->hasAttr();