1212// useful for targets like x86_64 that cannot lower fp convertions
1313// with more than 128 bits.
1414//
15+ // This pass also expands div/rem instructions with a bitwidth above a
16+ // threshold into a call to auto-generated functions. This is useful
17+ // for targets like x86_64 that cannot lower divisions with more than
18+ // 128 bits or targets like x86_32 that cannot lower divisions with
19+ // more than 64 bits.
20+ //
1521// ===----------------------------------------------------------------------===//
1622
1723#include " llvm/CodeGen/ExpandFp.h"
3541#include " llvm/Support/ErrorHandling.h"
3642#include " llvm/Target/TargetMachine.h"
3743#include " llvm/Transforms/Utils/BasicBlockUtils.h"
44+ #include " llvm/Transforms/Utils/IntegerDivision.h"
45+ #include < llvm/Support/Casting.h>
3846#include < optional>
3947
4048#define DEBUG_TYPE " expand-fp"
@@ -47,7 +55,28 @@ static cl::opt<unsigned>
4755 cl::desc(" fp convert instructions on integers with "
4856 " more than <N> bits are expanded." ));
4957
58+ static cl::opt<unsigned >
59+ ExpandDivRemBits (" expand-div-rem-bits" , cl::Hidden,
60+ cl::init (llvm::IntegerType::MAX_INT_BITS),
61+ cl::desc(" div and rem instructions on integers with "
62+ " more than <N> bits are expanded." ));
63+
5064namespace {
65+ bool isConstantPowerOfTwo (llvm::Value *V, bool SignedOp) {
66+ auto *C = dyn_cast<ConstantInt>(V);
67+ if (!C)
68+ return false ;
69+
70+ APInt Val = C->getValue ();
71+ if (SignedOp && Val.isNegative ())
72+ Val = -Val;
73+ return Val.isPowerOf2 ();
74+ }
75+
76+ bool isSigned (unsigned int Opcode) {
77+ return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
78+ }
79+
5180// / This class implements a precise expansion of the frem instruction.
5281// / The generated code is based on the fmod implementation in the AMD device
5382// / libs.
@@ -995,11 +1024,17 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
9951024 if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
9961025 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
9971026
1027+ unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported ();
1028+ if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS)
1029+ MaxLegalDivRemBitWidth = ExpandDivRemBits;
1030+
9981031 bool DisableExpandLargeFp =
9991032 MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1033+ bool DisableExpandLargeDivRem =
1034+ MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS;
10001035 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType (TLI);
10011036
1002- if (DisableExpandLargeFp && DisableFrem)
1037+ if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem )
10031038 return false ;
10041039
10051040 auto ShouldHandleInst = [&](Instruction &I) {
@@ -1021,6 +1056,16 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
10211056 return !DisableExpandLargeFp &&
10221057 cast<IntegerType>(I.getOperand (0 )->getType ()->getScalarType ())
10231058 ->getIntegerBitWidth () > MaxLegalFpConvertBitWidth;
1059+ case Instruction::UDiv:
1060+ case Instruction::SDiv:
1061+ case Instruction::URem:
1062+ case Instruction::SRem:
1063+ return !DisableExpandLargeDivRem &&
1064+ cast<IntegerType>(Ty->getScalarType ())->getIntegerBitWidth () >
1065+ MaxLegalDivRemBitWidth
1066+ // The backend has peephole optimizations for powers of two.
1067+ // TODO: We don't consider vectors here.
1068+ && !isConstantPowerOfTwo (I.getOperand (1 ), isSigned (I.getOpcode ()));
10241069 }
10251070
10261071 return false ;
@@ -1064,6 +1109,15 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
10641109 case Instruction::SIToFP:
10651110 expandIToFP (I);
10661111 break ;
1112+
1113+ case Instruction::UDiv:
1114+ case Instruction::SDiv:
1115+ expandDivision (cast<BinaryOperator>(I));
1116+ break ;
1117+ case Instruction::URem:
1118+ case Instruction::SRem:
1119+ expandRemainder (cast<BinaryOperator>(I));
1120+ break ;
10671121 }
10681122 }
10691123
0 commit comments