Skip to content

[AMDGPU] Add the code generation support for llvm.[sin/cos].bf16 #149631

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,

// BF16 - VOP1 Actions.
if (Subtarget->hasBF16TransInsts())
setOperationAction(ISD::FDIV, MVT::bf16, Custom);
setOperationAction({ISD::FCOS, ISD::FSIN, ISD::FDIV}, MVT::bf16, Custom);

setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote);
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::bf16, Promote);
Expand Down
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.cos.bf16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=GCN %s
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

; FIXME: GlobalISel does not work with bf16

declare bfloat @llvm.cos.bf16(bfloat) #0

define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 {
; GCN-LABEL: cos_bf16_constant_4:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GCN-NEXT: v_cos_bf16_e32 v0, 0x3f23
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
%cos = call bfloat @llvm.cos.bf16(bfloat 4.0) #0
store bfloat %cos, ptr addrspace(1) %out, align 2
ret void
}

define amdgpu_kernel void @cos_bf16_constant_100(ptr addrspace(1) %out) #1 {
; GCN-LABEL: cos_bf16_constant_100:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GCN-NEXT: v_cos_bf16_e32 v0, 0x417f
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
%cos = call bfloat @llvm.cos.bf16(bfloat 100.0) #0
store bfloat %cos, ptr addrspace(1) %out, align 2
ret void
}

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.sin.bf16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=GCN %s
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

; FIXME: GlobalISel does not work with bf16

declare bfloat @llvm.sin.bf16(bfloat) #0

define amdgpu_kernel void @sin_bf16_constant_4(ptr addrspace(1) %out) #1 {
; GCN-LABEL: sin_bf16_constant_4:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GCN-NEXT: v_sin_bf16_e32 v0, 0x3f23
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
%sin = call bfloat @llvm.sin.bf16(bfloat 4.0) #0
store bfloat %sin, ptr addrspace(1) %out, align 2
ret void
}

define amdgpu_kernel void @sin_bf16_constant_100(ptr addrspace(1) %out) #1 {
; GCN-LABEL: sin_bf16_constant_100:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GCN-NEXT: v_sin_bf16_e32 v0, 0x417f
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
%sin = call bfloat @llvm.sin.bf16(bfloat 100.0) #0
store bfloat %sin, ptr addrspace(1) %out, align 2
ret void
}

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
Loading