Skip to content

Commit 2f0abc6

Browse files
author
Martin Wehking
authored
Create one bitcode library for AMD (#15055)
Enable compilation of libdevice for AMD by adding AMDGCN to macro guarded code parts in libdevice for enabling e.g. standard library math function. Add compilation workflow to SYCLLibdevice.cmake for AMD. Follow the compilation mechanism for NVPTX (56a6ae2) and create a single bitcode library file. Do not select builtin LLVM intrinsics for AMDGCN by default to ensure that stdlib functions can be found when linking against libdevice. Ensure that the clang tests check for the correctness of the new clang driver actions and check if the driver still links the device code against the itt device libraries when device library linkage has been excluded. Fix a compilation error of Intel math function libraries for MSVC when targeting AMD. Include "device.h" before including "device_imf.hpp" to avoid the inclusion of <type_traits>, which failed with a redefinition of symbols error.
1 parent 340e133 commit 2f0abc6

40 files changed

+175
-67
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2738,7 +2738,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
27382738
ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
27392739
}
27402740
if (GenerateIntrinsics &&
2741-
!(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
2741+
!(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() ||
2742+
getTarget().getTriple().isAMDGCN()))) {
27422743
switch (BuiltinIDIfNoAsmLabel) {
27432744
case Builtin::BIacos:
27442745
case Builtin::BIacosf:

clang/lib/Driver/Driver.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5555,7 +5555,7 @@ class OffloadingActionBuilder final {
55555555
// AOT compilation.
55565556
bool SYCLDeviceLibLinked = false;
55575557
Action *NativeCPULib = nullptr;
5558-
if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
5558+
if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) {
55595559
bool UseJitLink =
55605560
IsSPIR &&
55615561
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
165165

166166
// spir64 target is actually JIT compilation, so we defer selection of
167167
// bfloat16 libraries to runtime. For AOT we need libraries, but skip
168-
// for Nvidia.
169-
NeedLibs =
170-
Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX();
168+
// for Nvidia and AMD.
169+
NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch &&
170+
!Triple.isNVPTX() && !Triple.isAMDGCN();
171171
UseNative = false;
172172
if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
173173
C.hasOffloadToolChain<Action::OFK_SYCL>()) {
@@ -212,9 +212,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
212212
SmallVector<std::string, 8> LibraryList;
213213
const llvm::opt::ArgList &Args = C.getArgs();
214214

215-
// For NVPTX we only use one single bitcode library and ignore
215+
// For NVPTX and AMDGCN we only use one single bitcode library and ignore
216216
// manually specified SYCL device libraries.
217-
bool IgnoreSingleLibs = TargetTriple.isNVPTX();
217+
bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();
218218

219219
struct DeviceLibOptInfo {
220220
StringRef DeviceLibName;
@@ -278,6 +278,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
278278
if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
279279
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));
280280

281+
if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
282+
LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc"));
283+
281284
if (IgnoreSingleLibs)
282285
return LibraryList;
283286

clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
99
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s
10+
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
11+
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s
1012

1113
#include "Inputs/sycl.hpp"
1214

clang/test/Driver/Inputs/SYCL/lib/devicelib--amd.bc

Whitespace-only changes.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Tests specific to `-fsycl-targets=amdgcn-amd-amdhsa`
2+
// Verify that the correct devicelib linking actions are spawned by the driver.
3+
// Check also if the correct warnings are generated.
4+
5+
// UNSUPPORTED: system-windows
6+
7+
// Check if internal libraries are still linked against when linkage of all
8+
// device libs is manually excluded.
9+
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
10+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
11+
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
12+
13+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
14+
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
15+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
16+
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
17+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
18+
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
19+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
20+
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)
21+
22+
// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
23+
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
24+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
25+
// RUN: | FileCheck -check-prefix=CHK-ALL %s
26+
27+
// Check that the -fsycl-device-lib flag has no effect when subsets of libs
28+
// are specified.
29+
// RUN: %clangxx -ccc-print-phases -std=c++11 --sysroot=%S/Inputs/SYCL \
30+
// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
31+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
32+
// RUN: | FileCheck -check-prefix=CHK-ALL %s
33+
34+
// Check that -fno-sycl-device-lib is ignored when it does not contain "all".
35+
// A warning should be printed that the flag got ignored.
36+
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl --sysroot=%S/Inputs/SYCL \
37+
// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
38+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
39+
// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s
40+
41+
// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
42+
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
43+
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906)
44+

clang/test/Driver/sycl-offload-amdgcn.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
/// Check phases w/out specifying a compute capability.
2727
// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
28-
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
28+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-lib=all -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
2929
// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
3030
// CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl)
3131
// CHK-PHASES-NO-CC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
@@ -37,17 +37,19 @@
3737
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
3838
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
3939
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906)
40-
// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906)
41-
// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906)
42-
// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906)
43-
// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906)
44-
// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906)
45-
// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906)
46-
// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906)
47-
// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906)
48-
// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906)
49-
// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object
50-
// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl)
40+
// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
41+
// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906)
42+
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906)
43+
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906)
44+
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906)
45+
// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906)
46+
// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906)
47+
// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906)
48+
// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906)
49+
// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906)
50+
// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906)
51+
// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object
52+
// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)
5153

5254
/// Check that we only unbundle an archive once.
5355
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \

libdevice/cmake/modules/SYCLLibdevice.cmake

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ foreach(filetype IN LISTS filetypes)
6565
add_dependencies(libsycldevice libsycldevice-${filetype})
6666
endforeach()
6767

68-
# For NVPTX each device libary is compiled into a single bitcode
68+
# For NVPTX and AMDGCN each device libary is compiled into a single bitcode
6969
# file and all files created this way are linked into one large bitcode
7070
# library.
7171
# Additional compilation options are needed for compiling each device library.
@@ -76,6 +76,13 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
7676
"-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib")
7777
set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false")
7878
endif()
79+
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
80+
list(APPEND devicelib_arch amd)
81+
set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa"
82+
"-Xsycl-target-backend" "--offload-arch=gfx940")
83+
set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false")
84+
endif()
85+
7986

8087
set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv)
8188
set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir)
@@ -444,7 +451,7 @@ foreach(dtype IN ITEMS bf16 fp32 fp64)
444451
endforeach()
445452
endforeach()
446453

447-
# Add device fallback imf libraries for the CUDA target.
454+
# Add device fallback imf libraries for the NVPTX and AMD targets.
448455
# The output files are bitcode.
449456
foreach(arch IN LISTS devicelib_arch)
450457
foreach(dtype IN ITEMS bf16 fp32 fp64)
@@ -464,7 +471,7 @@ foreach(arch IN LISTS devicelib_arch)
464471
endforeach()
465472
endforeach()
466473

467-
# Create one large bitcode file for the CUDA targets.
474+
# Create one large bitcode file for the CUDA and AMD targets.
468475
# Use all the files collected in the respective global properties.
469476
foreach(arch IN LISTS devicelib_arch)
470477
get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch})

libdevice/cmath_wrapper.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
#include "device_math.h"
1010

11-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
11+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
12+
defined(__AMDGCN__)
1213

1314
DEVICE_EXTERN_C_INLINE
1415
int abs(int x) { return __devicelib_abs(x); }
@@ -199,4 +200,4 @@ DEVICE_EXTERN_C_INLINE
199200
float rintf(float x) { return __nv_rintf(x); }
200201
#endif // __NVPTX__
201202

202-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
203+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

libdevice/cmath_wrapper_fp64.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
#include "device_math.h"
1111

12-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
12+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
13+
defined(__AMDGCN__)
1314

1415
// All exported functions in math and complex device libraries are weak
1516
// reference. If users provide their own math or complex functions(with
@@ -496,4 +497,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1
496497
}
497498
}
498499
#endif // defined(_WIN32)
499-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
500+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

libdevice/crt_wrapper.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
DeviceGlobal<uint64_t[RAND_NEXT_LEN]> RandNext;
1818
#endif
1919

20-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
20+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
21+
defined(__AMDGCN__)
2122
DEVICE_EXTERN_C_INLINE
2223
void *memcpy(void *dest, const void *src, size_t n) {
2324
return __devicelib_memcpy(dest, src, n);
@@ -126,4 +127,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line,
126127
__spirv_LocalInvocationId_z());
127128
}
128129
#endif
129-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
130+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

libdevice/device.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
#define EXTERN_C
1616
#endif // __cplusplus
1717

18-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
18+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
19+
defined(__AMDGCN__)
1920
#ifdef __SYCL_DEVICE_ONLY__
2021
#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak))
2122
#else // __SYCL_DEVICE_ONLY__
@@ -27,7 +28,7 @@
2728
DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline))
2829
#define DEVICE_EXTERN_C_NOINLINE \
2930
DEVICE_EXTERNAL EXTERN_C __attribute__((noinline))
30-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
31+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
3132

3233
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__LIBDEVICE_HOST_IMPL__)
3334
#define __LIBDEVICE_IMF_ENABLED__

libdevice/device_math.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
#define __LIBDEVICE_DEVICE_MATH_H__
1111

1212
#include "device.h"
13-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
13+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
14+
defined(__AMDGCN__)
1415
#include <cstdint>
1516

1617
typedef struct {
@@ -367,5 +368,5 @@ float __devicelib_scalbnf(float x, int n);
367368
DEVICE_EXTERN_C
368369
double __devicelib_scalbn(double x, int exp);
369370

370-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
371+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
371372
#endif // __LIBDEVICE_DEVICE_MATH_H__

libdevice/fallback-cassert.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file,
100100
}
101101
#endif // __SPIR__ || __SPIRV__
102102

103-
#ifdef __NVPTX__
103+
#if defined(__NVPTX__) || defined(__AMDGCN__)
104104

105105
DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file,
106106
unsigned __line, const char *__function,
@@ -119,4 +119,4 @@ DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File,
119119
__assertfail(_Message, _File, _Line, 0, 1);
120120
}
121121

122-
#endif
122+
#endif // __NVPTX__ || __AMDGCN__

libdevice/fallback-cmath-fp64.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
#include "device_math.h"
1111

12-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
12+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
13+
defined(__AMDGCN__)
1314

1415
// To support fallback device libraries on-demand loading, please update the
1516
// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
@@ -193,4 +194,4 @@ DEVICE_EXTERN_C_INLINE
193194
double __devicelib_scalbn(double x, int exp) {
194195
return __spirv_ocl_ldexp(x, exp);
195196
}
196-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
197+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

libdevice/fallback-cmath.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
#include "device_math.h"
1010

11-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
11+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
12+
defined(__AMDGCN__)
1213

1314
// To support fallback device libraries on-demand loading, please update the
1415
// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
@@ -209,4 +210,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); }
209210
DEVICE_EXTERN_C_INLINE
210211
float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); }
211212

212-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
213+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

libdevice/fallback-cstring.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
#include "wrapper.h"
1010
#include <cstdint>
1111

12-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
12+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
13+
defined(__AMDGCN__)
1314

1415
static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src,
1516
size_t n) {
@@ -202,4 +203,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) {
202203

203204
return head_cmp;
204205
}
205-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
206+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN_

libdevice/imf/imf_fp32_dl.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@
1111
/// overhead in these deep learning frameworks.
1212
//===----------------------------------------------------------------------===//
1313

14-
#include "../device_imf.hpp"
14+
#include "../device.h"
15+
1516
#ifdef __LIBDEVICE_IMF_ENABLED__
1617

18+
#include "../device_imf.hpp"
19+
1720
DEVICE_EXTERN_C_INLINE int32_t __devicelib_imf_abs(int32_t x) {
1821
return (x >= 0) ? x : -x;
1922
}

libdevice/imf/imf_fp64_dl.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@
1111
/// overhead in these deep learning frameworks.
1212
//===----------------------------------------------------------------------===//
1313

14-
#include "../device_imf.hpp"
14+
#include "../device.h"
15+
1516
#ifdef __LIBDEVICE_IMF_ENABLED__
1617

18+
#include "../device_imf.hpp"
19+
1720
DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) {
1821
return __fabs(x);
1922
}

libdevice/imf/imf_inline_bf16.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8-
#include "../device_imf.hpp"
8+
#include "../device.h"
99

1010
#ifdef __LIBDEVICE_IMF_ENABLED__
11+
12+
#include "../device_imf.hpp"
13+
1114
DEVICE_EXTERN_C_INLINE
1215
_iml_bf16_internal __devicelib_imf_fmabf16(_iml_bf16_internal a,
1316
_iml_bf16_internal b,

libdevice/imf/imf_inline_fp32.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8-
#include "../device_imf.hpp"
8+
9+
#include "../device.h"
10+
911
#ifdef __LIBDEVICE_IMF_ENABLED__
1012

13+
#include "../device_imf.hpp"
14+
1115
DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16(
1216
_iml_half_internal a, _iml_half_internal b, _iml_half_internal c) {
1317
_iml_half ha(a), hb(b), hc(c);

0 commit comments

Comments
 (0)