diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h index eb775babd6061..dca423235b3c0 100644 --- a/llvm/include/llvm/Analysis/Delinearization.h +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -112,6 +112,29 @@ void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes, const SCEV *ElementSize); +/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the +/// subscripts and sizes of an access to a fixed size array. This is a special +/// case of delinearization for fixed size arrays. +/// +/// The delinearization is a 2 step process: the first step estimates the sizes +/// of each dimension of the array. The second step computes the access +/// functions for the delinearized array: +/// +/// 1. Compute the array size +/// 2. Compute the access function: same as normal delinearization +/// +/// Different from the normal delinearization, this function assumes that NO +/// terms exist in the \p Expr. In other words, it assumes that the all step +/// values are constant. +/// +/// This function is intended to replace getIndexExpressionsFromGEP and +/// tryDelinearizeFixedSizeImpl. They rely on the GEP source element type so +/// that they will be removed in the future. +void delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes, + const SCEV *ElementSize); + /// Gathers the individual index expressions from a GEP instruction. /// /// This function optimistically assumes the GEP references into a fixed size diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp index 329bd35530c72..5399a4f2e3f7f 100644 --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -32,6 +33,11 @@ using namespace llvm; #define DL_NAME "delinearize" #define DEBUG_TYPE DL_NAME +static cl::opt UseFixedSizeArrayHeuristic( + "delinearize-use-fixed-size-array-heuristic", cl::init(false), cl::Hidden, + cl::desc("When printing analysis, use the heuristic for fixed-size arrays " + "if the default delinearizetion fails.")); + // Return true when S contains at least an undef value. static inline bool containsUndefs(const SCEV *S) { return SCEVExprContains(S, [](const SCEV *S) { @@ -480,6 +486,179 @@ void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr, }); } +static std::optional tryIntoAPInt(const SCEV *S) { + if (const auto *Const = dyn_cast(S)) + return Const->getAPInt(); + return std::nullopt; +} + +/// Collects the absolute values of constant steps for all induction variables. +/// Returns true if we can prove that all step recurrences are constants and \p +/// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p +/// Steps after divided by \p ElementSize. +static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr, + SmallVectorImpl &Steps, + unsigned ElementSize) { + // End of recursion. The constant value also must be a multiple of + // ElementSize. + if (const auto *Const = dyn_cast(Expr)) { + const unsigned Mod = Const->getAPInt().urem(ElementSize); + return Mod == 0; + } + + const SCEVAddRecExpr *AR = dyn_cast(Expr); + if (!AR || !AR->isAffine()) + return false; + + const SCEV *Step = AR->getStepRecurrence(SE); + std::optional StepAPInt = tryIntoAPInt(Step); + if (!StepAPInt) + return false; + + APInt Q; + uint64_t R; + APInt::udivrem(StepAPInt->abs(), ElementSize, Q, R); + if (R != 0) + return false; + + // Bail out when the step is too large. + std::optional StepVal = Q.tryZExtValue(); + if (!StepVal) + return false; + + Steps.push_back(*StepVal); + return collectConstantAbsSteps(SE, AR->getStart(), Steps, ElementSize); +} + +static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr, + SmallVectorImpl &Sizes, + const SCEV *ElementSize) { + if (!ElementSize) + return false; + + std::optional ElementSizeAPInt = tryIntoAPInt(ElementSize); + if (!ElementSizeAPInt || *ElementSizeAPInt == 0) + return false; + + std::optional ElementSizeConst = ElementSizeAPInt->tryZExtValue(); + + // Early exit when ElementSize is not a positive constant. + if (!ElementSizeConst) + return false; + + if (!collectConstantAbsSteps(SE, Expr, Sizes, *ElementSizeConst) || + Sizes.empty()) { + Sizes.clear(); + return false; + } + + // At this point, Sizes contains the absolute step recurrences for all + // induction variables. Each step recurrence must be a multiple of the size of + // the array element. Assuming that the each value represents the size of an + // array for each dimension, attempts to restore the length of each dimension + // by dividing the step recurrence by the next smaller value. For example, if + // we have the following AddRec SCEV: + // + // AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8) + // + // Then Sizes will become [256, 32, 1] after sorted. We don't know the size of + // the outermost dimension, the next dimension will be computed as 256 / 32 = + // 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results + // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is + // a base pointer. + // + // TODO: Catch more cases, e.g., when a step recurrence is not divisible by + // the next smaller one, like A[i][3*j]. + llvm::sort(Sizes.rbegin(), Sizes.rend()); + Sizes.erase(llvm::unique(Sizes), Sizes.end()); + for (unsigned I = 0; I + 1 < Sizes.size(); I++) { + unsigned PrevSize = Sizes[I + 1]; + if (Sizes[I] % PrevSize) { + Sizes.clear(); + return false; + } + Sizes[I] /= PrevSize; + } + + // The last element should be ElementSize. + Sizes.back() = *ElementSizeConst; + return true; +} + +/// Splits the SCEV into two vectors of SCEVs representing the subscripts and +/// sizes of an array access, assuming that the array is a fixed size array. +/// +/// E.g., if we have the code like as follows: +/// +/// double A[42][8][32]; +/// for i +/// for j +/// for k +/// use A[i][j][k] +/// +/// The access function will be represented as an AddRec SCEV like: +/// +/// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8) +/// +/// Then findFixedSizeArrayDimensions infers the size of each dimension of the +/// array based on the fact that the value of the step recurrence is a multiple +/// of the size of the corresponding array element. In the above example, it +/// results in the following: +/// +/// CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes. +/// +/// Finally each subscript will be computed as follows: +/// +/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +/// +/// Note that this function doesn't check the range of possible values for each +/// subscript, so the caller should perform additional boundary checks if +/// necessary. +/// +/// Also note that this function doesn't guarantee that the original array size +/// is restored "correctly". For example, in the following case: +/// +/// double A[42][4][64]; +/// double B[42][8][32]; +/// for i +/// for j +/// for k +/// use A[i][j][k] +/// use B[i][2*j][k] +/// +/// The access function for both accesses will be the same: +/// +/// AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8) +/// +/// The array sizes for both A and B will be computed as +/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B. +/// +/// TODO: At the moment, this function can handle only simple cases. For +/// example, we cannot handle a case where a step recurrence is not divisible +/// by the next smaller step recurrence, e.g., A[i][3*j]. +void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes, + const SCEV *ElementSize) { + + // First step: find the fixed array size. + SmallVector ConstSizes; + if (!findFixedSizeArrayDimensions(SE, Expr, ConstSizes, ElementSize)) { + Sizes.clear(); + return; + } + + // Convert the constant size to SCEV. + for (unsigned Size : ConstSizes) + Sizes.push_back(SE.getConstant(Expr->getType(), Size)); + + // Second step: compute the access functions for each subscript. + computeAccessFunctions(SE, Expr, Subscripts, Sizes); + + if (Subscripts.empty()) + return; +} + bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE, const GetElementPtrInst *GEP, SmallVectorImpl &Subscripts, @@ -586,9 +765,21 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI, O << "AccessFunction: " << *AccessFn << "\n"; SmallVector Subscripts, Sizes; + + auto IsDelinearizationFailed = [&]() { + return Subscripts.size() == 0 || Sizes.size() == 0 || + Subscripts.size() != Sizes.size(); + }; + delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst)); - if (Subscripts.size() == 0 || Sizes.size() == 0 || - Subscripts.size() != Sizes.size()) { + if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) { + Subscripts.clear(); + Sizes.clear(); + delinearizeFixedSizeArray(*SE, AccessFn, Subscripts, Sizes, + SE->getElementSize(&Inst)); + } + + if (IsDelinearizationFailed()) { O << "failed to delinearize\n"; continue; } diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll new file mode 100644 index 0000000000000..f37c943272f6d --- /dev/null +++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll @@ -0,0 +1,446 @@ +; RUN: opt < %s -passes='print' -disable-output -delinearize-use-fixed-size-array-heuristic 2>&1 | FileCheck %s + +; void f(int A[][8][32]) { +; for (i = 0; i < 42; i++) +; for (j = 0; j < 8; j++) +; for (k = 0; k < 32; k++) +; A[i][j][k] = 1; +; } + +; CHECK: Delinearization on function a_i_j_k: +; CHECK: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{0,+,1}<%for.k>] +define void @a_i_j_k(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k + store i32 1, ptr %idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 32 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 8 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 42 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; void f(int A[][8][32]) { +; for (i = 0; i < 42; i++) +; for (j = 0; j < 8; j++) +; for (k = 0; k < 32; k++) +; A[i][7-j][k] = 1; +; } + +; CHECK: Delinearization on function a_i_nj_k: +; CHECK: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{7,+,-1}<%for.j.header>][{0,+,1}<%for.k>] +define void @a_i_nj_k(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + %j.subscript = sub i32 7, %j + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k + store i32 1, ptr %idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 32 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 8 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 42 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; In the following code, the access functions for both stores are represented +; in the same way in SCEV, so the delinearization results are also the same. We +; don't have any type information of the underlying objects. +; +; void f(int A[][4][64], int B[][8][32]) { +; for (i = 0; i < 42; i++) +; for (j = 0; j < 4; j++) +; for (k = 0; k < 32; k++) { +; A[i][j][k] = 1; +; B[i][2*j][k] = 1; +; } +; } + +; CHECK: Delinearization on function a_ijk_b_i2jk: +; CHECK: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{0,+,1}<%for.k>] +; CHECK: Base offset: %b +; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{0,+,1}<%for.k>] +define void @a_ijk_b_i2jk(ptr %a, ptr %b) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + %j2 = shl i32 %j, 1 + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %a.idx = getelementptr [4 x [64 x i32]], ptr %a, i32 %i, i32 %j, i32 %k + %b.idx = getelementptr [8 x [32 x i32]], ptr %b, i32 %i, i32 %j2, i32 %k + store i32 1, ptr %a.idx + store i32 1, ptr %b.idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 32 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 4 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 42 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; The type information of the underlying object is not available, so the +; delinearization result is different from it. +; +; void f(int A[][8][32]) { +; for (i = 0; i < 42; i++) +; for (j = 0; j < 3; j++) +; for (k = 0; k < 32; k++) +; A[i][2*j+1][k] = 1; +; } + +; CHECK: Delinearization on function a_i_2j1_k: +; CHECK: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{32,+,1}<%for.k>] +define void @a_i_2j1_k(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + %j2 = shl i32 %j, 1 + %j.subscript = add i32 %j2, 1 + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k + store i32 1, ptr %idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 32 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 3 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 42 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; Fail to delinearize because the step recurrence of the j-loop is not +; divisible by that of the k-loop. +; +; void f(int A[][8][32]) { +; for (i = 0; i < 42; i++) +; for (j = 0; j < 8; j++) +; for (k = 0; k < 10; k++) +; A[i][j][3*k] = 1; +; } + +; CHECK: Delinearization on function a_i_j_3k: +; CHECK: AccessFunction: {{...}}0,+,1024}<%for.i.header>,+,128}<%for.j.header>,+,12}<%for.k> +; CHECK-NEXT: failed to delinearize +define void @a_i_j_3k(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %k.subscript = mul i32 %k, 3 + %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k.subscript + store i32 1, ptr %idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 10 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 8 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 42 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; Fail to delinearize because i is used in multiple subscripts that are not adjacent. +; +; void f(int A[][8][32]) { +; for (i = 0; i < 32; i++) +; for (j = 0; j < 4; j++) +; for (k = 0; k < 4; k++) +; A[i][j+k][i] = 1; +; } + +; CHECK: Delinearization on function a_i_jk_i: +; CHECK: AccessFunction: {{...}}0,+,1028}<%for.i.header>,+,128}<%for.j.header>,+,128}<%for.k> +; CHECK-NEXT: failed to delinearize +define void @a_i_jk_i(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %jk = add i32 %j, %k + %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %i + store i32 1, ptr %idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 4 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 4 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 32 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; Can delinearize, but the result is different from the original array size. In +; this case, the outermost two dimensions are melded into one. +; +; void f(int A[][8][32]) { +; for (i = 0; i < 8; i++) +; for (j = 0; j < 10; j++) +; for (k = 0; k < 10; k++) +; A[i][i][j+k] = 1; +; } + +; CHECK: Delinearization on function a_i_i_jk: +; CHECK: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][288] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{{..}}0,+,1}<%for.j.header>,+,1}<%for.k>] +define void @a_i_i_jk(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %jk = add i32 %j, %k + %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %i, i32 %jk + store i32 1, ptr %idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 10 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 10 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 8 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; void f(int A[][8][32]) { +; for (i = 0; i < 8; i++) +; for (j = 0; j < 4; j++) +; for (k = 0; k < 4; k++) +; for (l = 0; l < 32; l++) +; A[i][j+k][l] = 1; +; } + +; CHECK: Delinearization on function a_i_jk_l: +; CHECK: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{{..}}0,+,1}<%for.j.header>,+,1}<%for.k.header>][{0,+,1}<%for.l>] + +define void @a_i_jk_l(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + br label %for.k.header + +for.k.header: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k.latch ] + %jk = add i32 %j, %k + br label %for.l + +for.l: + %l = phi i32 [ 0, %for.k.header ], [ %l.inc, %for.l ] + %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %l + store i32 1, ptr %idx + %l.inc = add i32 %l, 1 + %cmp.l = icmp slt i32 %l.inc, 32 + br i1 %cmp.l, label %for.l, label %for.k.latch + +for.k.latch: + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 4 + br i1 %cmp.k, label %for.k.header, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 4 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 8 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +} + +; Reject if the address is not a multiple of the element size. +; +; void f(int *A) { +; for (i = 0; i < 42; i++) +; for (j = 0; j < 8; j++) +; for (k = 0; k < 32; k++) +; *((int *)((char *)A + i*256 + j*32 + k)) = 1; +; } + +; CHECK: Delinearization on function non_divisible_by_element_size: +; CHECK: AccessFunction: {{...}}0,+,256}<%for.i.header>,+,32}<%for.j.header>,+,1}<%for.k> +; CHECK-NEXT: failed to delinearize +define void @non_divisible_by_element_size(ptr %a) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ] + br label %for.j.header + +for.j.header: + %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ] + br label %for.k + +for.k: + %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ] + %idx = getelementptr [8 x [32 x i8]], ptr %a, i32 %i, i32 %j, i32 %k + store i32 1, ptr %idx + %k.inc = add i32 %k, 1 + %cmp.k = icmp slt i32 %k.inc, 32 + br i1 %cmp.k, label %for.k, label %for.j.latch + +for.j.latch: + %j.inc = add i32 %j, 1 + %cmp.j = icmp slt i32 %j.inc, 8 + br i1 %cmp.j, label %for.j.header, label %for.i.latch + +for.i.latch: + %i.inc = add i32 %i, 1 + %cmp.i = icmp slt i32 %i.inc, 42 + br i1 %cmp.i, label %for.i.header, label %exit + +exit: + ret void +}