diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 69ae4f80297d5..ba4f23b2d9191 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3510,6 +3510,13 @@ class LLVM_ABI TargetLoweringBase { return false; } + /// True if the target allows transformations of in-bounds pointer + /// arithmetic that cause out-of-bounds intermediate results. + virtual bool canTransformPtrArithOutOfBounds(const Function &F, + EVT PtrVT) const { + return false; + } + /// Does this target support complex deinterleaving virtual bool isComplexDeinterleavingSupported() const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 08dab7c697b99..3626ac45a4860 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2689,59 +2689,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) { if (PtrVT == IntVT && isNullConstant(N0)) return N1; - if (N0.getOpcode() != ISD::PTRADD || - reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) - return SDValue(); - - SDValue X = N0.getOperand(0); - SDValue Y = N0.getOperand(1); - SDValue Z = N1; - bool N0OneUse = N0.hasOneUse(); - bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); - bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); - - // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if: - // * y is a constant and (ptradd x, y) has one use; or - // * y and z are both constants. - if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) { - // If both additions in the original were NUW, the new ones are as well. - SDNodeFlags Flags = - (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; - SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags); - AddToWorklist(Add.getNode()); - return DAG.getMemBasePlusOffset(X, Add, DL, Flags); + if (N0.getOpcode() == ISD::PTRADD && + !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) { + SDValue X = N0.getOperand(0); + SDValue Y = N0.getOperand(1); + SDValue Z = N1; + bool N0OneUse = N0.hasOneUse(); + bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); + bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); + + // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if: + // * y is a constant and (ptradd x, y) has one use; or + // * y and z are both constants. + if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) { + // If both additions in the original were NUW, the new ones are as well. + SDNodeFlags Flags = + (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; + SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags); + AddToWorklist(Add.getNode()); + return DAG.getMemBasePlusOffset(X, Add, DL, Flags); + } + } + + // The following combines can turn in-bounds pointer arithmetic out of bounds. + // That is problematic for settings like AArch64's CPA, which checks that + // intermediate results of pointer arithmetic remain in bounds. The target + // therefore needs to opt-in to enable them. + if (!TLI.canTransformPtrArithOutOfBounds( + DAG.getMachineFunction().getFunction(), PtrVT)) + return SDValue(); + + if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) { + // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with + // global address GA and constant c, such that c can be folded into GA. + SDValue GAValue = N0.getOperand(0); + if (const GlobalAddressSDNode *GA = + dyn_cast(GAValue)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // If both additions in the original were NUW, reassociation preserves + // that. + SDNodeFlags Flags = + (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; + SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags); + AddToWorklist(Inner.getNode()); + return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags); + } + } } - // TODO: There is another possible fold here that was proven useful. - // It would be this: - // - // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if: - // * (ptradd x, y) has one use; and - // * y is a constant; and - // * z is not a constant. - // - // In some cases, specifically in AArch64's FEAT_CPA, it exposes the - // opportunity to select more complex instructions such as SUBPT and - // MSUBPT. However, a hypothetical corner case has been found that we could - // not avoid. Consider this (pseudo-POSIX C): - // - // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;} - // char *p = mmap(LARGE_CONSTANT); - // char *q = foo(p, -LARGE_CONSTANT); - // - // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a - // further + z takes it back to the start of the mapping, so valid, - // regardless of the address mmap gave back. However, if mmap gives you an - // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will - // borrow from the high bits (with the subsequent + z carrying back into - // the high bits to give you a well-defined pointer) and thus trip - // FEAT_CPA's pointer corruption checks. - // - // We leave this fold as an opportunity for future work, addressing the - // corner case for FEAT_CPA, as well as reconciling the solution with the - // more general application of pointer arithmetic in other future targets. - // For now each architecture that wants this fold must implement it in the - // target-specific code (see e.g. SITargetLowering::performPtrAddCombine) + if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) { + // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant, + // y is not, and (add y, z) is used only once. + // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant, + // z is not, and (add y, z) is used only once. + // The goal is to move constant offsets to the outermost ptradd, to create + // more opportunities to fold offsets into memory instructions. + // Together with the another combine above, this also implements + // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)). + SDValue X = N0; + SDValue Y = N1.getOperand(0); + SDValue Z = N1.getOperand(1); + bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); + bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); + + // If both additions in the original were NUW, reassociation preserves that. + SDNodeFlags ReassocFlags = + (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap; + + if (ZIsConstant != YIsConstant) { + if (YIsConstant) + std::swap(Y, Z); + SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags); + AddToWorklist(Inner.getNode()); + return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags); + } + } return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 52c811d27e804..822bab88c8a09 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10602,6 +10602,11 @@ bool SITargetLowering::shouldPreservePtrArith(const Function &F, return UseSelectionDAGPTRADD && PtrVT == MVT::i64; } +bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F, + EVT PtrVT) const { + return true; +} + // The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args: // offset (the offset that is included in bounds checking and swizzling, to be // split between the instruction's voffset and immoffset fields) and soffset @@ -15131,65 +15136,17 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N, return Folded; } - if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) { - // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with - // global address GA and constant c, such that c can be folded into GA. - SDValue GAValue = N0.getOperand(0); - if (const GlobalAddressSDNode *GA = - dyn_cast(GAValue)) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (DCI.isBeforeLegalizeOps() && TLI.isOffsetFoldingLegal(GA)) { - // If both additions in the original were NUW, reassociation preserves - // that. - SDNodeFlags Flags = - (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; - SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags); - DCI.AddToWorklist(Inner.getNode()); - return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags); - } - } - } - if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse()) return SDValue(); - // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant, - // y is not, and (add y, z) is used only once. - // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant, - // z is not, and (add y, z) is used only once. - // The goal is to move constant offsets to the outermost ptradd, to create - // more opportunities to fold offsets into memory instructions. - // Together with the generic combines in DAGCombiner.cpp, this also - // implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)). - // - // This transform is here instead of in the general DAGCombiner as it can - // turn in-bounds pointer arithmetic out-of-bounds, which is problematic for - // AArch64's CPA. SDValue X = N0; SDValue Y = N1.getOperand(0); SDValue Z = N1.getOperand(1); bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); - // If both additions in the original were NUW, reassociation preserves that. - SDNodeFlags ReassocFlags = - (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap; - - if (ZIsConstant != YIsConstant) { - if (YIsConstant) - std::swap(Y, Z); - SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags); - DCI.AddToWorklist(Inner.getNode()); - return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags); - } - - // If one of Y and Z is constant, they have been handled above. If both were - // constant, the addition would have been folded in SelectionDAG::getNode - // already. This ensures that the generic DAG combines won't undo the - // following reassociation. - assert(!YIsConstant && !ZIsConstant); - - if (!X->isDivergent() && Y->isDivergent() != Z->isDivergent()) { + if (!YIsConstant && !ZIsConstant && !X->isDivergent() && + Y->isDivergent() != Z->isDivergent()) { // Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if x and // y are uniform and z isn't. // Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if x and @@ -15200,6 +15157,9 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N, // reassociate. if (Y->isDivergent()) std::swap(Y, Z); + // If both additions in the original were NUW, reassociation preserves that. + SDNodeFlags ReassocFlags = + (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap; SDValue UniformInner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags); DCI.AddToWorklist(UniformInner.getNode()); return DAG.getMemBasePlusOffset(UniformInner, Z, DL, ReassocFlags); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index c66f300ec4cb1..c5bc959c48e60 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -264,6 +264,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override; + bool canTransformPtrArithOutOfBounds(const Function &F, + EVT PtrVT) const override; + private: // Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store // the three offsets (voffset, soffset and instoffset) into the SDValue[3]