|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
1 | 2 | ; RUN: opt -passes=loop-vectorize \ |
2 | 3 | ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ |
3 | | -; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL |
4 | | - |
5 | | -; RUN: opt -passes=loop-vectorize \ |
6 | | -; RUN: -prefer-predicate-over-epilogue=scalar-epilogue \ |
7 | | -; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP |
| 4 | +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s |
8 | 5 |
|
9 | 6 | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" |
10 | 7 |
|
11 | 8 | define void @reverse_store(ptr %a, i64 %n) !dbg !4 { |
| 9 | +; CHECK-LABEL: define void @reverse_store( |
| 10 | +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG4:![0-9]+]] { |
| 11 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 12 | +; CHECK-NEXT: #dbg_value(ptr [[A]], [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) |
| 13 | +; CHECK-NEXT: #dbg_value(i64 [[N]], [[META12:![0-9]+]], !DIExpression(), [[META15]]) |
| 14 | +; CHECK-NEXT: #dbg_value(i64 [[N]], [[META13:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value), [[META16:![0-9]+]]) |
| 15 | +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1, !dbg [[DBG17:![0-9]+]] |
| 16 | +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 1), !dbg [[DBG17]] |
| 17 | +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[UMIN]], !dbg [[DBG17]] |
| 18 | +; CHECK-NEXT: br label %[[VECTOR_PH:.*]], !dbg [[DBG17]] |
| 19 | +; CHECK: [[VECTOR_PH]]: |
| 20 | +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() |
| 21 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[N]], i64 0 |
| 22 | +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
| 23 | +; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <vscale x 4 x i64> [[TMP2]], splat (i64 -1) |
| 24 | +; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP3]] |
| 25 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG17]] |
| 26 | +; CHECK: [[VECTOR_BODY]]: |
| 27 | +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 28 | +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP1]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 29 | +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) |
| 30 | +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 |
| 31 | +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i64 -1, [[TMP5]] |
| 32 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0 |
| 33 | +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
| 34 | +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 -1), !dbg [[DBG18:![0-9]+]] |
| 35 | +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i64> [[TMP7]], i32 0 |
| 36 | +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP8]], !dbg [[DBG19:![0-9]+]] |
| 37 | +; CHECK-NEXT: [[TMP10:%.*]] = trunc nuw nsw <vscale x 4 x i64> [[TMP7]] to <vscale x 4 x i32>, !dbg [[DBG21:![0-9]+]] |
| 38 | +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP4]] to i64, !dbg [[DBG21]] |
| 39 | +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP11]], !dbg [[DBG21]] |
| 40 | +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], 1, !dbg [[DBG21]] |
| 41 | +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 -1, [[TMP13]], !dbg [[DBG21]] |
| 42 | +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i64 [[TMP12]], !dbg [[DBG21]] |
| 43 | +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP14]], !dbg [[DBG21]] |
| 44 | +; CHECK-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP4]]), !dbg [[DBG21]] |
| 45 | +; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP4]]), !dbg [[DBG21]] |
| 46 | +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] |
| 47 | +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]] |
| 48 | +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 |
| 49 | +; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] |
| 50 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 51 | +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP:.*]] |
| 52 | +; CHECK: [[FOR_COND_CLEANUP]]: |
| 53 | +; CHECK-NEXT: ret void, !dbg [[DBG25:![0-9]+]] |
| 54 | +; |
12 | 55 | entry: |
13 | 56 | #dbg_value(ptr %a, !11, !DIExpression(), !15) |
14 | 57 | #dbg_value(i64 %n, !12, !DIExpression(), !15) |
@@ -62,3 +105,29 @@ for.body: ; preds = %entry, %for.body |
62 | 105 | !25 = distinct !{!25, !17, !26, !27} |
63 | 106 | !26 = !DILocation(line: 3, column: 14, scope: !14) |
64 | 107 | !27 = !{!"llvm.loop.vectorize.enable", i1 true} |
| 108 | +; |
| 109 | +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META1:![0-9]+]], producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) |
| 110 | +; CHECK: [[META1]] = !DIFile(filename: "{{.*}}dbg-tail-folding-by-evl.cpp", directory: {{.*}}) |
| 111 | +; CHECK: [[DBG4]] = distinct !DISubprogram(name: "reverse_store", linkageName: "_Z13reverse_storePil", scope: [[META1]], file: [[META1]], line: 1, type: [[META5:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META10:![0-9]+]]) |
| 112 | +; CHECK: [[META5]] = !DISubroutineType(types: [[META6:![0-9]+]]) |
| 113 | +; CHECK: [[META6]] = !{null, [[META7:![0-9]+]], [[META9:![0-9]+]]} |
| 114 | +; CHECK: [[META7]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META8:![0-9]+]], size: 64) |
| 115 | +; CHECK: [[META8]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) |
| 116 | +; CHECK: [[META9]] = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) |
| 117 | +; CHECK: [[META10]] = !{[[META11]], [[META12]], [[META13]]} |
| 118 | +; CHECK: [[META11]] = !DILocalVariable(name: "a", arg: 1, scope: [[DBG4]], file: [[META1]], line: 1, type: [[META7]]) |
| 119 | +; CHECK: [[META12]] = !DILocalVariable(name: "n", arg: 2, scope: [[DBG4]], file: [[META1]], line: 1, type: [[META9]]) |
| 120 | +; CHECK: [[META13]] = !DILocalVariable(name: "i", scope: [[META14:![0-9]+]], file: [[META1]], line: 2, type: [[META8]]) |
| 121 | +; CHECK: [[META14]] = distinct !DILexicalBlock(scope: [[DBG4]], file: [[META1]], line: 2, column: 5) |
| 122 | +; CHECK: [[META15]] = !DILocation(line: 0, scope: [[DBG4]]) |
| 123 | +; CHECK: [[META16]] = !DILocation(line: 0, scope: [[META14]]) |
| 124 | +; CHECK: [[DBG17]] = !DILocation(line: 2, column: 5, scope: [[META14]]) |
| 125 | +; CHECK: [[DBG18]] = !DILocation(line: 2, scope: [[META14]]) |
| 126 | +; CHECK: [[DBG19]] = !DILocation(line: 3, column: 7, scope: [[META20:![0-9]+]]) |
| 127 | +; CHECK: [[META20]] = distinct !DILexicalBlock(scope: [[META14]], file: [[META1]], line: 2, column: 5) |
| 128 | +; CHECK: [[DBG21]] = !DILocation(line: 3, column: 12, scope: [[META20]]) |
| 129 | +; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META23:![0-9]+]], [[META24:![0-9]+]]} |
| 130 | +; CHECK: [[META23]] = !{!"llvm.loop.isvectorized", i32 1} |
| 131 | +; CHECK: [[META24]] = !{!"llvm.loop.unroll.runtime.disable"} |
| 132 | +; CHECK: [[DBG25]] = !DILocation(line: 4, column: 1, scope: [[DBG4]]) |
| 133 | +;. |
0 commit comments