-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for big-endian #151565
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-powerpc Author: zhijian lin (diggerlin) ChangesThe patch fixed a bug introduced patch [PowePC] using MTVSRBMI instruction instead of constant pool in power10+. The issue arose because the layout of vector register elements differs between little-endian and big-endian modes — specifically, the elements appear in reverse order. This led to incorrect behavior when loading constants using MTVSRBMI in big-endian configurations. Full diff: https://github.com/llvm/llvm-project/pull/151565.diff 2 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 459525ed4ee9a..0579630ee3cde 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9586,12 +9586,14 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return false;
}
-bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
+bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
+ bool IsLittleEndian) {
assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
BitMask.clearAllBits();
EVT VT = BVN.getValueType(0);
- APInt ConstValue(VT.getSizeInBits(), 0);
+ unsigned VTSize = VT.getSizeInBits();
+ APInt ConstValue(VTSize, 0);
unsigned EltWidth = VT.getScalarSizeInBits();
@@ -9602,7 +9604,8 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
if (!CN)
return false;
- ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+ ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
+ IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
BitPos += EltWidth;
}
@@ -9633,7 +9636,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// we do not convert it to MTVSRBMI.
// The xxleqv instruction sets a vector with all ones.
// The xxlxor instruction sets a vector with all zeros.
- if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
+ if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
+ BitMask != 0 && BitMask != 0xffff) {
SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
MachineSDNode *MSDNode =
DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 232014db9a012..a9503f77c3090 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -2,22 +2,87 @@
; Verify whether the generated assembly for the following function includes the mtvsrbmi instruction.
; vector unsigned char v00FF()
; {
-; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
-; return x;
+; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
+; return x;
+; }
+; vector unsigned short short00FF()
+; {
+; vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0};
+; return x;
+; }
+; vector unsigned int int00FF()
+; {
+; vector unsigned int x = { 0xFF, 0,0,0};
+; return x;
+; }
+; vector unsigned long long longlong00FF()
+; {
+; vector unsigned long long x = { 0xFF, 0};
+; return x;
; }
; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix -mcpu=pwr10 -verify-machineinstrs \
-; RUN: | FileCheck %s --check-prefix=CHECK
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+
+; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10 -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+; CHECK-NOT: .byte 255
+; CHECK-NOT: .byte 0
define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK-NOT: L..CPI0_0:
-; CHECK-NOT: .byte 255 # 0xff
-; CHECK-NOT: .byte 0 # 0x0
-
-; CHECK-LABEL: _Z5v00FFv:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mtvsrbmi v2, 1
-; CHECK-NEXT: blr
+; CHECK-BE-LABEL: _Z5v00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 32768
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z5v00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+
entry:
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
}
+
+define dso_local noundef range(i16 0, 256) <8 x i16> @_Z9short00FFv() {
+; CHECK-BE-LABEL: _Z9short00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 16384
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z9short00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+entry:
+ ret <8 x i16> <i16 255, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+}
+
+define dso_local noundef range(i32 0, 256) <4 x i32> @_Z7int00FFv() {
+; CHECK-BE-LABEL: _Z7int00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 4096
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z7int00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+entry:
+ ret <4 x i32> <i32 255, i32 0, i32 0, i32 0>
+}
+
+define dso_local noundef range(i64 0, 256) <2 x i64> @_Z12longlong00FFv() {
+; CHECK-BE-LABEL: _Z12longlong00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 256
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z12longlong00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+entry:
+ ret <2 x i64> <i64 255, i64 0>
+}
|
; CHECK-NOT: .byte 255 | ||
; CHECK-NOT: .byte 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these auto generated?
The patch fixed a bug introduced patch [PowePC] using MTVSRBMI instruction instead of constant pool in power10+.
The issue arose because the layout of vector register elements differs between little-endian and big-endian modes — specifically, the elements appear in reverse order. This led to incorrect behavior when loading constants using MTVSRBMI in big-endian configurations.