-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[LegalizeTypes] Expand 128-bit UDIV/UREM by constant via Chunk Addition #146238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7981,8 +7981,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, | |
|
||
// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and | ||
// then add in the carry. | ||
// TODO: If we can't split it in half, we might be able to split into 3 or | ||
// more pieces using a smaller bit width. | ||
if (HalfMaxPlus1.urem(Divisor).isOne()) { | ||
assert(!LL == !LH && "Expected both input halves or no input halves!"); | ||
if (!LL) | ||
|
@@ -8030,6 +8028,80 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, | |
DAG.getConstant(0, dl, HiLoVT)); | ||
Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry); | ||
} | ||
|
||
} else { | ||
// If we cannot split in two halves. Let's look for a smaller chunk | ||
// width where (1 << ChunkWidth) mod Divisor == 1. | ||
// This ensures that the sum of all such chunks modulo Divisor | ||
// is equivalent to the original value modulo Divisor. | ||
const APInt &Divisor = CN->getAPIntValue(); | ||
unsigned BitWidth = VT.getScalarSizeInBits(); | ||
unsigned BestChunkWidth = 0; | ||
|
||
// We restrict to small chunk sizes (e.g., ≤ 32 bits) to ensure that all | ||
// operations remain legal on most targets. | ||
unsigned MaxChunk = 32; | ||
for (int i = MaxChunk; i >= 1; --i) { | ||
APInt ChunkMaxPlus1 = APInt::getOneBitSet(BitWidth, i); | ||
if (ChunkMaxPlus1.urem(Divisor).isOne()) { | ||
BestChunkWidth = i; | ||
break; | ||
} | ||
} | ||
|
||
// If we found a good chunk width, slice the number and sum the pieces. | ||
if (BestChunkWidth > 0) { | ||
EVT ChunkVT = EVT::getIntegerVT(*DAG.getContext(), BestChunkWidth); | ||
|
||
if (!LL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Braces, but I'm not really sure why this is conditional in the first place |
||
std::tie(LL, LH) = | ||
DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT); | ||
SDValue In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You only need the BUILD_PAIR if LL is set. Otherwise, you can use N->getOperand(0) as |
||
|
||
SmallVector<SDValue, 8> Parts; | ||
// Split into fixed-size chunks | ||
for (unsigned i = 0; i < BitWidth; i += BestChunkWidth) { | ||
SDValue Shift = DAG.getShiftAmountConstant(i, VT, dl); | ||
SDValue Chunk = DAG.getNode(ISD::SRL, dl, VT, In, Shift); | ||
Chunk = DAG.getNode(ISD::TRUNCATE, dl, ChunkVT, Chunk); | ||
Parts.push_back(Chunk); | ||
} | ||
if (Parts.empty()) | ||
return false; | ||
Sum = Parts[0]; | ||
|
||
// Use uaddo_carry if we can, otherwise use a compare to detect overflow. | ||
// same logic as used in above if condition. | ||
SDValue Carry = DAG.getConstant(0, dl, ChunkVT); | ||
EVT SetCCType = | ||
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ChunkVT); | ||
for (unsigned i = 1; i < Parts.size(); ++i) { | ||
if (isOperationLegalOrCustom(ISD::UADDO_CARRY, ChunkVT)) { | ||
SDVTList VTList = DAG.getVTList(ChunkVT, SetCCType); | ||
SDValue UAdd = DAG.getNode(ISD::UADDO, dl, VTList, Sum, Parts[i]); | ||
Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, UAdd, Carry, | ||
UAdd.getValue(1)); | ||
} else { | ||
SDValue Add = DAG.getNode(ISD::ADD, dl, ChunkVT, Sum, Parts[i]); | ||
SDValue NewCarry = DAG.getSetCC(dl, SetCCType, Add, Sum, ISD::SETULT); | ||
|
||
if (getBooleanContents(ChunkVT) == | ||
TargetLoweringBase::ZeroOrOneBooleanContent) | ||
NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT); | ||
else | ||
NewCarry = DAG.getSelect(dl, ChunkVT, NewCarry, | ||
DAG.getConstant(1, dl, ChunkVT), | ||
DAG.getConstant(0, dl, ChunkVT)); | ||
Comment on lines
+8088
to
+8094
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're doing the zext in either case, so just do the zext. It doesn't depend on the boolean contents There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This pattern is repeated in multiple places in the type legalizer. I suspect the getZExtOrTrunc pattern gives better results, but we should confirm. |
||
|
||
Sum = DAG.getNode(ISD::ADD, dl, ChunkVT, Add, Carry); | ||
Carry = NewCarry; | ||
} | ||
} | ||
|
||
Sum = DAG.getNode(ISD::ZERO_EXTEND, dl, HiLoVT, Sum); | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
// If we didn't find a sum, we can't do the expansion. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should choose a maximum size based on the set of legal types and operations instead of just guessing that 32 is good