Skip to content

Commit a6b5ece

Browse files
shiltianrampitec
andauthored
[AMDGPU] Add support for v_exp_bf16 on gfx1250 (#149229)
Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
1 parent 145b6cd commit a6b5ece

25 files changed

+1855
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
673673
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
674674
TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
675675
TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts")
676+
TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")
676677

677678
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
678679
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
439439
case AMDGPU::BI__builtin_amdgcn_log_bf16:
440440
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
441441
case AMDGPU::BI__builtin_amdgcn_exp2f:
442+
case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
442443
return emitBuiltinWithOneOverloadedType<1>(*this, E,
443444
Intrinsic::amdgcn_exp2);
444445
case AMDGPU::BI__builtin_amdgcn_log_clampf:

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,25 @@ void test_log_bf16(global __bf16* out, __bf16 a)
118118
*out = __builtin_amdgcn_log_bf16(a);
119119
}
120120

121+
// CHECK-LABEL: @test_exp2_bf16(
122+
// CHECK-NEXT: entry:
123+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
124+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
125+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
126+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
127+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
128+
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
129+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
130+
// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.exp2.bf16(bfloat [[TMP0]])
131+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
132+
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
133+
// CHECK-NEXT: ret void
134+
//
135+
void test_exp2_bf16(global __bf16* out, __bf16 a)
136+
{
137+
*out = __builtin_amdgcn_exp2_bf16(a);
138+
}
139+
121140
// CHECK-LABEL: @test_cvt_f16_fp8(
122141
// CHECK-NEXT: entry:
123142
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
533533
defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
534534
defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
535535
defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
536+
defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>;
536537
}
537538
} // End TRANS = 1, SchedRW = [WriteTrans32]
538539
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1145,6 +1146,7 @@ defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>;
11451146
defm V_SQRT_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>;
11461147
defm V_RSQ_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>;
11471148
defm V_LOG_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>;
1149+
defm V_EXP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07d>;
11481150

11491151
//===----------------------------------------------------------------------===//
11501152
// GFX10.

llvm/test/CodeGen/AMDGPU/bf16-math.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,27 @@ define amdgpu_ps void @llvm_log2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src
2525
ret void
2626
}
2727

28+
define amdgpu_ps void @llvm_exp2_bf16_v(ptr addrspace(1) %out, bfloat %src) {
29+
; GCN-LABEL: llvm_exp2_bf16_v:
30+
; GCN: ; %bb.0:
31+
; GCN-NEXT: v_exp_bf16_e32 v2, v2
32+
; GCN-NEXT: global_store_b16 v[0:1], v2, off
33+
; GCN-NEXT: s_endpgm
34+
%exp = call bfloat @llvm.exp2.bf16(bfloat %src)
35+
store bfloat %exp, ptr addrspace(1) %out, align 2
36+
ret void
37+
}
38+
39+
define amdgpu_ps void @llvm_exp2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src) {
40+
; GCN-LABEL: llvm_exp2_bf16_s:
41+
; GCN: ; %bb.0:
42+
; GCN-NEXT: v_exp_bf16_e32 v2, s0
43+
; GCN-NEXT: global_store_b16 v[0:1], v2, off
44+
; GCN-NEXT: s_endpgm
45+
%exp = call bfloat @llvm.exp2.bf16(bfloat %src)
46+
store bfloat %exp, ptr addrspace(1) %out, align 2
47+
ret void
48+
}
49+
2850
declare bfloat @llvm.log2.bf16(bfloat)
51+
declare bfloat @llvm.exp2.bf16(bfloat)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
2+
; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
3+
4+
; FIXME: GlobalISel does not work with bf16
5+
6+
declare bfloat @llvm.amdgcn.exp2.bf16(bfloat) #0
7+
8+
; GCN-LABEL: {{^}}exp_bf16:
9+
; GCN: v_exp_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
10+
define amdgpu_kernel void @exp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
11+
%exp = call bfloat @llvm.amdgcn.exp2.bf16(bfloat %src) #0
12+
store bfloat %exp, ptr addrspace(1) %out, align 2
13+
ret void
14+
}
15+
16+
; GCN-LABEL: {{^}}exp_bf16_constant_4
17+
; GCN: v_exp_bf16_e32 v0, 4.0
18+
define amdgpu_kernel void @exp_bf16_constant_4(ptr addrspace(1) %out) #1 {
19+
%exp = call bfloat @llvm.amdgcn.exp2.bf16(bfloat 4.0) #0
20+
store bfloat %exp, ptr addrspace(1) %out, align 2
21+
ret void
22+
}
23+
24+
; GCN-LABEL: {{^}}exp_bf16_constant_100
25+
; GCN: v_exp_bf16_e32 {{v[0-9]+}}, 0x42c8
26+
define amdgpu_kernel void @exp_bf16_constant_100(ptr addrspace(1) %out) #1 {
27+
%exp = call bfloat @llvm.amdgcn.exp2.bf16(bfloat 100.0) #0
28+
store bfloat %exp, ptr addrspace(1) %out, align 2
29+
ret void
30+
}
31+
32+
attributes #0 = { nounwind readnone }
33+
attributes #1 = { nounwind }

0 commit comments

Comments
 (0)