-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[Clang] Add elementwise maximumnum/minimumnum builtin functions #149775
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -4108,6 +4108,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, | |||
return RValue::get(Result); | ||||
} | ||||
|
||||
case Builtin::BI__builtin_elementwise_maximumnum: { | ||||
Value *Op0 = EmitScalarExpr(E->getArg(0)); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we ever ensure we actually HAVE 2 arguments? Both of these refer to the 1st arg, but the prototype accepts 0 or 1. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It is checked at llvm-project/clang/lib/Sema/SemaChecking.cpp Line 15756 in 4d48996
sorry I don't get it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I stopped chasing down the call-stack 1 too few :) I see it now in I was pointing out that we refer to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I see, thanks @erichkeane |
||||
Value *Op1 = EmitScalarExpr(E->getArg(1)); | ||||
Value *Result = Builder.CreateBinaryIntrinsic( | ||||
Intrinsic::maximumnum, Op0, Op1, nullptr, "elt.maximumnum"); | ||||
return RValue::get(Result); | ||||
} | ||||
|
||||
case Builtin::BI__builtin_elementwise_minimumnum: { | ||||
Value *Op0 = EmitScalarExpr(E->getArg(0)); | ||||
Value *Op1 = EmitScalarExpr(E->getArg(1)); | ||||
Value *Result = Builder.CreateBinaryIntrinsic( | ||||
Intrinsic::minimumnum, Op0, Op1, nullptr, "elt.minimumnum"); | ||||
return RValue::get(Result); | ||||
} | ||||
|
||||
case Builtin::BI__builtin_reduce_max: { | ||||
auto GetIntrinsicID = [this](QualType QT) { | ||||
if (auto *VecTy = QT->getAs<VectorType>()) | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 | ||
// RUN: %clang_cc1 -x c++ -std=c++20 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK | ||
|
||
typedef _Float16 half8 __attribute__((ext_vector_type(8))); | ||
typedef __bf16 bf16x8 __attribute__((ext_vector_type(8))); | ||
typedef float float4 __attribute__((ext_vector_type(4))); | ||
typedef double double2 __attribute__((ext_vector_type(2))); | ||
typedef long double ldouble2 __attribute__((ext_vector_type(2))); | ||
|
||
// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmin16Dv8_DF16_S_( | ||
// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 | ||
// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]] | ||
// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) | ||
// CHECK-NEXT: ret <8 x half> [[ELT_MINIMUMNUM]] | ||
// | ||
half8 pfmin16(half8 a, half8 b) { | ||
return __builtin_elementwise_minimumnum(a, b); | ||
} | ||
// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmin16bDv8_DF16bS_( | ||
// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 | ||
// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) | ||
// CHECK-NEXT: ret <8 x bfloat> [[ELT_MINIMUMNUM]] | ||
// | ||
bf16x8 pfmin16b(bf16x8 a, bf16x8 b) { | ||
return __builtin_elementwise_minimumnum(a, b); | ||
} | ||
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmin32Dv4_fS_( | ||
// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 | ||
// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) | ||
// CHECK-NEXT: ret <4 x float> [[ELT_MINIMUMNUM]] | ||
// | ||
float4 pfmin32(float4 a, float4 b) { | ||
return __builtin_elementwise_minimumnum(a, b); | ||
} | ||
// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmin64Dv2_dS_( | ||
// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 | ||
// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) | ||
// CHECK-NEXT: ret <2 x double> [[ELT_MINIMUMNUM]] | ||
// | ||
double2 pfmin64(double2 a, double2 b) { | ||
return __builtin_elementwise_minimumnum(a, b); | ||
} | ||
// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmin80Dv2_eS_( | ||
// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 | ||
// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <2 x x86_fp80> @llvm.minimumnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) | ||
// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINIMUMNUM]] | ||
// | ||
ldouble2 pfmin80(ldouble2 a, ldouble2 b) { | ||
return __builtin_elementwise_minimumnum(a, b); | ||
} | ||
|
||
// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmax16Dv8_DF16_S_( | ||
// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 | ||
// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) | ||
// CHECK-NEXT: ret <8 x half> [[ELT_MAXIMUMNUM]] | ||
// | ||
half8 pfmax16(half8 a, half8 b) { | ||
return __builtin_elementwise_maximumnum(a, b); | ||
} | ||
// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmax16bDv8_DF16bS_( | ||
// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 | ||
// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) | ||
// CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXIMUMNUM]] | ||
// | ||
bf16x8 pfmax16b(bf16x8 a, bf16x8 b) { | ||
return __builtin_elementwise_maximumnum(a, b); | ||
} | ||
// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmax32Dv4_fS_( | ||
// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 | ||
// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) | ||
// CHECK-NEXT: ret <4 x float> [[ELT_MAXIMUMNUM]] | ||
// | ||
float4 pfmax32(float4 a, float4 b) { | ||
return __builtin_elementwise_maximumnum(a, b); | ||
} | ||
// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmax64Dv2_dS_( | ||
// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 | ||
// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MAXIMUMNUM:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) | ||
// CHECK-NEXT: ret <2 x double> [[ELT_MAXIMUMNUM]] | ||
// | ||
double2 pfmax64(double2 a, double2 b) { | ||
return __builtin_elementwise_maximumnum(a, b); | ||
} | ||
|
||
// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmax80Dv2_eS_( | ||
// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 | ||
// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] | ||
// CHECK-NEXT: [[ELT_MINIMUMNUM:%.*]] = call <2 x x86_fp80> @llvm.minimumnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) | ||
// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINIMUMNUM]] | ||
// | ||
ldouble2 pfmax80(ldouble2 a, ldouble2 b) { | ||
return __builtin_elementwise_minimumnum(a, b); | ||
} | ||
|
||
//. | ||
// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} | ||
// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} | ||
// CHECK: [[META4]] = !{!"Simple C++ TBAA"} | ||
//. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm confused. :-)
We have
__builtin_elementwise_min
,__builtin_elementwise_minnum
,__builtin_elementwise_minimum
, and now we're adding__builtin_elementwise_minimumnum
?I think the docs need to be expanded a bit to help understand what the difference is between the four choices. Same for maximum.
Why do we need four different builtins to select the lesser of two values?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ugh, I see now, these are the elementwise variants of the existing C math library functions. Carry on. :-D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, the difference is explained in the link http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation