diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index eb775babd6061..dca423235b3c0 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -112,6 +112,29 @@ void delinearize(ScalarEvolution &SE, const SCEV *Expr,
                  SmallVectorImpl<const SCEV *> &Subscripts,
                  SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
 
+/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+/// subscripts and sizes of an access to a fixed size array. This is a special
+/// case of delinearization for fixed size arrays.
+///
+/// The delinearization is a 2 step process: the first step estimates the sizes
+/// of each dimension of the array. The second step computes the access
+/// functions for the delinearized array:
+///
+/// 1. Compute the array size
+/// 2. Compute the access function: same as normal delinearization
+///
+/// Different from the normal delinearization, this function assumes that NO
+/// terms exist in the \p Expr. In other words, it assumes that the all step
+/// values are constant.
+///
+/// This function is intended to replace getIndexExpressionsFromGEP and
+/// tryDelinearizeFixedSizeImpl. They rely on the GEP source element type so
+/// that they will be removed in the future.
+void delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+                               SmallVectorImpl<const SCEV *> &Subscripts,
+                               SmallVectorImpl<const SCEV *> &Sizes,
+                               const SCEV *ElementSize);
+
 /// Gathers the individual index expressions from a GEP instruction.
 ///
 /// This function optimistically assumes the GEP references into a fixed size
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 329bd35530c72..5399a4f2e3f7f 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -32,6 +33,11 @@ using namespace llvm;
 #define DL_NAME "delinearize"
 #define DEBUG_TYPE DL_NAME
 
+static cl::opt<bool> UseFixedSizeArrayHeuristic(
+    "delinearize-use-fixed-size-array-heuristic", cl::init(false), cl::Hidden,
+    cl::desc("When printing analysis, use the heuristic for fixed-size arrays "
+             "if the default delinearizetion fails."));
+
 // Return true when S contains at least an undef value.
 static inline bool containsUndefs(const SCEV *S) {
   return SCEVExprContains(S, [](const SCEV *S) {
@@ -480,6 +486,179 @@ void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
   });
 }
 
+static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
+  if (const auto *Const = dyn_cast<SCEVConstant>(S))
+    return Const->getAPInt();
+  return std::nullopt;
+}
+
+/// Collects the absolute values of constant steps for all induction variables.
+/// Returns true if we can prove that all step recurrences are constants and \p
+/// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p
+/// Steps after divided by \p ElementSize.
+static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
+                                    SmallVectorImpl<unsigned> &Steps,
+                                    unsigned ElementSize) {
+  // End of recursion. The constant value also must be a multiple of
+  // ElementSize.
+  if (const auto *Const = dyn_cast<SCEVConstant>(Expr)) {
+    const unsigned Mod = Const->getAPInt().urem(ElementSize);
+    return Mod == 0;
+  }
+
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AR || !AR->isAffine())
+    return false;
+
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  std::optional<APInt> StepAPInt = tryIntoAPInt(Step);
+  if (!StepAPInt)
+    return false;
+
+  APInt Q;
+  uint64_t R;
+  APInt::udivrem(StepAPInt->abs(), ElementSize, Q, R);
+  if (R != 0)
+    return false;
+
+  // Bail out when the step is too large.
+  std::optional<unsigned> StepVal = Q.tryZExtValue();
+  if (!StepVal)
+    return false;
+
+  Steps.push_back(*StepVal);
+  return collectConstantAbsSteps(SE, AR->getStart(), Steps, ElementSize);
+}
+
+static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
+                                         SmallVectorImpl<unsigned> &Sizes,
+                                         const SCEV *ElementSize) {
+  if (!ElementSize)
+    return false;
+
+  std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(ElementSize);
+  if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
+    return false;
+
+  std::optional<unsigned> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
+
+  // Early exit when ElementSize is not a positive constant.
+  if (!ElementSizeConst)
+    return false;
+
+  if (!collectConstantAbsSteps(SE, Expr, Sizes, *ElementSizeConst) ||
+      Sizes.empty()) {
+    Sizes.clear();
+    return false;
+  }
+
+  // At this point, Sizes contains the absolute step recurrences for all
+  // induction variables. Each step recurrence must be a multiple of the size of
+  // the array element. Assuming that the each value represents the size of an
+  // array for each dimension, attempts to restore the length of each dimension
+  // by dividing the step recurrence by the next smaller value. For example, if
+  // we have the following AddRec SCEV:
+  //
+  //   AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
+  //
+  // Then Sizes will become [256, 32, 1] after sorted. We don't know the size of
+  // the outermost dimension, the next dimension will be computed as 256 / 32 =
+  // 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results
+  // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is
+  // a base pointer.
+  //
+  // TODO: Catch more cases, e.g., when a step recurrence is not divisible by
+  // the next smaller one, like A[i][3*j].
+  llvm::sort(Sizes.rbegin(), Sizes.rend());
+  Sizes.erase(llvm::unique(Sizes), Sizes.end());
+  for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
+    unsigned PrevSize = Sizes[I + 1];
+    if (Sizes[I] % PrevSize) {
+      Sizes.clear();
+      return false;
+    }
+    Sizes[I] /= PrevSize;
+  }
+
+  // The last element should be ElementSize.
+  Sizes.back() = *ElementSizeConst;
+  return true;
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access, assuming that the array is a fixed size array.
+///
+/// E.g., if we have the code like as follows:
+///
+///  double A[42][8][32];
+///  for i
+///    for j
+///      for k
+///        use A[i][j][k]
+///
+/// The access function will be represented as an AddRec SCEV like:
+///
+///  AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
+///
+/// Then findFixedSizeArrayDimensions infers the size of each dimension of the
+/// array based on the fact that the value of the step recurrence is a multiple
+/// of the size of the corresponding array element. In the above example, it
+/// results in the following:
+///
+///  CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes.
+///
+/// Finally each subscript will be computed as follows:
+///
+///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// Note that this function doesn't check the range of possible values for each
+/// subscript, so the caller should perform additional boundary checks if
+/// necessary.
+///
+/// Also note that this function doesn't guarantee that the original array size
+/// is restored "correctly". For example, in the following case:
+///
+///  double A[42][4][64];
+///  double B[42][8][32];
+///  for i
+///    for j
+///      for k
+///        use A[i][j][k]
+///        use B[i][2*j][k]
+///
+/// The access function for both accesses will be the same:
+///
+///  AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8)
+///
+/// The array sizes for both A and B will be computed as
+/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
+///
+/// TODO: At the moment, this function can handle only simple cases. For
+/// example, we cannot handle a case where a step recurrence is not divisible
+/// by the next smaller step recurrence, e.g., A[i][3*j].
+void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+                                     SmallVectorImpl<const SCEV *> &Subscripts,
+                                     SmallVectorImpl<const SCEV *> &Sizes,
+                                     const SCEV *ElementSize) {
+
+  // First step: find the fixed array size.
+  SmallVector<unsigned, 4> ConstSizes;
+  if (!findFixedSizeArrayDimensions(SE, Expr, ConstSizes, ElementSize)) {
+    Sizes.clear();
+    return;
+  }
+
+  // Convert the constant size to SCEV.
+  for (unsigned Size : ConstSizes)
+    Sizes.push_back(SE.getConstant(Expr->getType(), Size));
+
+  // Second step: compute the access functions for each subscript.
+  computeAccessFunctions(SE, Expr, Subscripts, Sizes);
+
+  if (Subscripts.empty())
+    return;
+}
+
 bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
                                       const GetElementPtrInst *GEP,
                                       SmallVectorImpl<const SCEV *> &Subscripts,
@@ -586,9 +765,21 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
       O << "AccessFunction: " << *AccessFn << "\n";
 
       SmallVector<const SCEV *, 3> Subscripts, Sizes;
+
+      auto IsDelinearizationFailed = [&]() {
+        return Subscripts.size() == 0 || Sizes.size() == 0 ||
+               Subscripts.size() != Sizes.size();
+      };
+
       delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
-      if (Subscripts.size() == 0 || Sizes.size() == 0 ||
-          Subscripts.size() != Sizes.size()) {
+      if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) {
+        Subscripts.clear();
+        Sizes.clear();
+        delinearizeFixedSizeArray(*SE, AccessFn, Subscripts, Sizes,
+                                  SE->getElementSize(&Inst));
+      }
+
+      if (IsDelinearizationFailed()) {
         O << "failed to delinearize\n";
         continue;
       }
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
new file mode 100644
index 0000000000000..f37c943272f6d
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -0,0 +1,446 @@
+; RUN: opt < %s -passes='print<delinearization>' -disable-output -delinearize-use-fixed-size-array-heuristic 2>&1 | FileCheck %s
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][j][k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_j_k:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_i_j_k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][7-j][k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_nj_k:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{7,+,-1}<nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_i_nj_k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j.subscript = sub i32 7, %j
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; In the following code, the access functions for both stores are represented
+; in the same way in SCEV, so the delinearization results are also the same. We
+; don't have any type information of the underlying objects.
+;
+; void f(int A[][4][64], int B[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 32; k++) {
+;       A[i][j][k] = 1;
+;       B[i][2*j][k] = 1;
+;     }
+; }
+
+; CHECK:      Delinearization on function a_ijk_b_i2jk:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+; CHECK:      Base offset: %b
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_ijk_b_i2jk(ptr %a, ptr %b) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j2 = shl i32 %j, 1
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %a.idx = getelementptr [4 x [64 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+  %b.idx = getelementptr [8 x [32 x i32]], ptr %b, i32 %i, i32 %j2, i32 %k
+  store i32 1, ptr %a.idx
+  store i32 1, ptr %b.idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; The type information of the underlying object is not available, so the
+; delinearization result is different from it.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 3; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][2*j+1][k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_2j1_k:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><%for.j.header>][{32,+,1}<nw><%for.k>]
+define void @a_i_2j1_k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j2 = shl i32 %j, 1
+  %j.subscript = add i32 %j2, 1
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 3
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Fail to delinearize because the step recurrence of the j-loop is not
+; divisible by that of the k-loop.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 10; k++)
+;       A[i][j][3*k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_j_3k:
+; CHECK:      AccessFunction: {{...}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,12}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @a_i_j_3k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %k.subscript = mul i32 %k, 3
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k.subscript
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 10
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Fail to delinearize because i is used in multiple subscripts that are not adjacent.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 32; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 4; k++)
+;       A[i][j+k][i] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_jk_i:
+; CHECK:      AccessFunction: {{...}}0,+,1028}<%for.i.header>,+,128}<nw><%for.j.header>,+,128}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @a_i_jk_i(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %jk = add i32 %j, %k
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %i
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 4
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 32
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Can delinearize, but the result is different from the original array size. In
+; this case, the outermost two dimensions are melded into one.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 8; i++)
+;    for (j = 0; j < 10; j++)
+;     for (k = 0; k < 10; k++)
+;       A[i][i][j+k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_i_jk:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][288] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{..}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>]
+define void @a_i_i_jk(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %jk = add i32 %j, %k
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %i, i32 %jk
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 10
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 10
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 8
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 8; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 4; k++)
+;       for (l = 0; l < 32; l++)
+;         A[i][j+k][l] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_jk_l:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{..}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k.header>][{0,+,1}<nuw><nsw><%for.l>]
+
+define void @a_i_jk_l(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k.header
+
+for.k.header:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k.latch ]
+  %jk = add i32 %j, %k
+  br label %for.l
+
+for.l:
+  %l = phi i32 [ 0, %for.k.header ], [ %l.inc, %for.l ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %l
+  store i32 1, ptr %idx
+  %l.inc = add i32 %l, 1
+  %cmp.l = icmp slt i32 %l.inc, 32
+  br i1 %cmp.l, label %for.l, label %for.k.latch
+
+for.k.latch:
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 4
+  br i1 %cmp.k, label %for.k.header, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 8
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Reject if the address is not a multiple of the element size.
+;
+; void f(int *A) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       *((int *)((char *)A + i*256 + j*32 + k)) = 1;
+; }
+
+; CHECK:      Delinearization on function non_divisible_by_element_size:
+; CHECK:      AccessFunction: {{...}}0,+,256}<nuw><nsw><%for.i.header>,+,32}<nw><%for.j.header>,+,1}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @non_divisible_by_element_size(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i8]], ptr %a, i32 %i, i32 %j, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}