-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[SCEV] Fix cache poisoning from depth-limited truncate expressions #172234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,223 @@ | ||||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 | ||||
| ; RUN: opt %s -passes="loop(loop-idiom,indvars,loop-deletion,loop-unroll-full)" -S | FileCheck %s | ||||
| ; REQUIRES: asserts | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does not need asserts.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added an assertion in IndVarSimplify to ensure that the truncated expression is a SCEVAddRecExpr.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original issue involved the following assertion to fail:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You only need to add |
||||
|
|
||||
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||||
| target triple = "x86_64-unknown-linux-gnu" | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not use triple. |
||||
|
|
||||
| define void @_ZNK4Test29TestViewOperator_LeftAndRightIiN6Kokkos6SerialELj7EEclEmRi(i32 %conv5, i1 %cmp13, i1 %cmp20, i1 %cmp27, i1 %cmp34, i1 %cmp41) local_unnamed_addr { | ||||
| ; CHECK-LABEL: define void @_ZNK4Test29TestViewOperator_LeftAndRightIiN6Kokkos6SerialELj7EEclEmRi( | ||||
| ; CHECK-SAME: i32 [[CONV5:%.*]], i1 [[CMP13:%.*]], i1 [[CMP20:%.*]], i1 [[CMP27:%.*]], i1 [[CMP34:%.*]], i1 [[CMP41:%.*]]) local_unnamed_addr { | ||||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[CONV5]], 1 | ||||
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 | ||||
| ; CHECK-NEXT: br label %[[FOR_COND:.*]] | ||||
| ; CHECK: [[FOR_COND_LOOPEXIT:.*]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND]] | ||||
| ; CHECK: [[FOR_COND]]: | ||||
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[CONV5]] to i64 | ||||
| ; CHECK-NEXT: br label %[[FOR_COND2:.*]] | ||||
| ; CHECK: [[FOR_COND2]]: | ||||
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_COND_CLEANUP14:.*]] ], [ 0, %[[FOR_COND]] ] | ||||
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP2]] | ||||
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_COND_LOOPEXIT]] | ||||
| ; CHECK: [[FOR_COND9_PREHEADER]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND9:.*]] | ||||
| ; CHECK: [[FOR_COND9_LOOPEXIT:.*]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND9]] | ||||
| ; CHECK: [[FOR_COND9]]: | ||||
| ; CHECK-NEXT: br i1 [[CMP13]], label %[[FOR_COND16_PREHEADER:.*]], label %[[FOR_COND_CLEANUP14]] | ||||
| ; CHECK: [[FOR_COND16_PREHEADER]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND16:.*]] | ||||
| ; CHECK: [[FOR_COND_CLEANUP14]]: | ||||
| ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 | ||||
| ; CHECK-NEXT: br label %[[FOR_COND2]] | ||||
| ; CHECK: [[FOR_COND16_LOOPEXIT:.*]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND16]] | ||||
| ; CHECK: [[FOR_COND16]]: | ||||
| ; CHECK-NEXT: br i1 [[CMP20]], label %[[FOR_COND23_PREHEADER:.*]], label %[[FOR_COND9_LOOPEXIT]] | ||||
| ; CHECK: [[FOR_COND23_PREHEADER]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND23:.*]] | ||||
| ; CHECK: [[FOR_COND23_LOOPEXIT:.*]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND23]] | ||||
| ; CHECK: [[FOR_COND23]]: | ||||
| ; CHECK-NEXT: br i1 [[CMP27]], label %[[FOR_COND30_PREHEADER:.*]], label %[[FOR_COND16_LOOPEXIT]] | ||||
| ; CHECK: [[FOR_COND30_PREHEADER]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND30:.*]] | ||||
| ; CHECK: [[FOR_COND30_LOOPEXIT_LOOPEXIT:.*]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND30_LOOPEXIT:.*]] | ||||
| ; CHECK: [[FOR_COND30_LOOPEXIT]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND30]] | ||||
| ; CHECK: [[FOR_COND30]]: | ||||
| ; CHECK-NEXT: br i1 [[CMP34]], label %[[FOR_COND37_PREHEADER:.*]], label %[[FOR_COND23_LOOPEXIT]] | ||||
| ; CHECK: [[FOR_COND37_PREHEADER]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND37_PEEL_BEGIN:.*]] | ||||
| ; CHECK: [[FOR_COND37_PEEL_BEGIN]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND37_PEEL:.*]] | ||||
| ; CHECK: [[FOR_COND37_PEEL]]: | ||||
| ; CHECK-NEXT: br i1 [[CMP41]], label %[[FOR_BODY43_PEEL:.*]], label %[[FOR_COND30_LOOPEXIT]] | ||||
| ; CHECK: [[FOR_BODY43_PEEL]]: | ||||
| ; CHECK-NEXT: [[CONV45_PEEL:%.*]] = zext i32 0 to i64 | ||||
| ; CHECK-NEXT: [[CALL31_I_I_PEEL:%.*]] = load volatile i64, ptr null, align 8 | ||||
| ; CHECK-NEXT: [[MUL79_I_I_PEEL:%.*]] = mul i64 [[CALL31_I_I_PEEL]], [[INDVARS_IV]] | ||||
| ; CHECK-NEXT: [[DOTIDX1_PEEL:%.*]] = add i64 [[CONV45_PEEL]], [[MUL79_I_I_PEEL]] | ||||
| ; CHECK-NEXT: [[SUB_PTR_LHS_CAST_PEEL:%.*]] = shl i64 [[DOTIDX1_PEEL]], 2 | ||||
| ; CHECK-NEXT: [[SUB_PTR_DIV_PEEL:%.*]] = ashr exact i64 [[SUB_PTR_LHS_CAST_PEEL]], 1 | ||||
| ; CHECK-NEXT: [[CMP55_PEEL:%.*]] = icmp sgt i64 0, 0 | ||||
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP55_PEEL]]) | ||||
| ; CHECK-NEXT: br label %[[FOR_COND37_PEEL_NEXT:.*]] | ||||
| ; CHECK: [[FOR_COND37_PEEL_NEXT]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND37_PEEL_NEXT1:.*]] | ||||
| ; CHECK: [[FOR_COND37_PEEL_NEXT1]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND37_PREHEADER_PEEL_NEWPH:.*]] | ||||
| ; CHECK: [[FOR_COND37_PREHEADER_PEEL_NEWPH]]: | ||||
| ; CHECK-NEXT: br label %[[FOR_COND37:.*]] | ||||
| ; CHECK: [[FOR_COND37]]: | ||||
| ; CHECK-NEXT: [[OFFSET_619:%.*]] = phi i64 [ [[SUB_PTR_DIV:%.*]], %[[FOR_BODY43:.*]] ], [ [[SUB_PTR_DIV_PEEL]], %[[FOR_COND37_PREHEADER_PEEL_NEWPH]] ] | ||||
| ; CHECK-NEXT: br i1 [[CMP41]], label %[[FOR_BODY43]], label %[[FOR_COND30_LOOPEXIT_LOOPEXIT]] | ||||
| ; CHECK: [[FOR_BODY43]]: | ||||
| ; CHECK-NEXT: [[CALL31_I_I:%.*]] = load volatile i64, ptr null, align 8 | ||||
| ; CHECK-NEXT: [[ADD33_I_I:%.*]] = add i64 [[INDVARS_IV]], [[CALL31_I_I]] | ||||
| ; CHECK-NEXT: [[MUL42_I_I:%.*]] = mul i64 [[TMP1]], [[ADD33_I_I]] | ||||
| ; CHECK-NEXT: [[ADD43_I_I:%.*]] = add i64 [[MUL42_I_I]], 1 | ||||
| ; CHECK-NEXT: [[MUL52_I_I:%.*]] = mul i64 [[TMP1]], [[ADD43_I_I]] | ||||
| ; CHECK-NEXT: [[ADD53_I_I:%.*]] = add i64 [[MUL52_I_I]], 1 | ||||
| ; CHECK-NEXT: [[MUL62_I_I:%.*]] = mul i64 [[TMP1]], [[ADD53_I_I]] | ||||
| ; CHECK-NEXT: [[ADD63_I_I:%.*]] = add i64 [[MUL62_I_I]], 1 | ||||
| ; CHECK-NEXT: [[MUL72_I_I:%.*]] = mul i64 [[INDVARS_IV]], [[ADD63_I_I]] | ||||
| ; CHECK-NEXT: [[MUL79_I_I:%.*]] = mul i64 [[CALL31_I_I]], [[MUL72_I_I]] | ||||
| ; CHECK-NEXT: [[DOTIDX1:%.*]] = add i64 [[TMP1]], [[MUL79_I_I]] | ||||
| ; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = shl i64 [[DOTIDX1]], 2 | ||||
| ; CHECK-NEXT: [[SUB_PTR_DIV]] = ashr exact i64 [[SUB_PTR_LHS_CAST]], 1 | ||||
| ; CHECK-NEXT: [[CMP55:%.*]] = icmp sgt i64 [[OFFSET_619]], 0 | ||||
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP55]]) | ||||
| ; CHECK-NEXT: br label %[[FOR_COND37]], !llvm.loop [[LOOP0:![0-9]+]] | ||||
| ; | ||||
| entry: | ||||
| br label %for.cond | ||||
|
|
||||
| for.cond: ; preds = %for.cond2, %entry | ||||
| br label %for.cond2 | ||||
|
|
||||
| for.cond2: ; preds = %for.cond.cleanup14, %for.cond | ||||
| %i5.0 = phi i32 [ 0, %for.cond ], [ %inc70, %for.cond.cleanup14 ] | ||||
| %cmp6 = icmp ult i32 %i5.0, %conv5 | ||||
| br i1 %cmp6, label %for.cond9, label %for.cond | ||||
|
|
||||
| for.cond9: ; preds = %for.cond16, %for.cond2 | ||||
| br i1 %cmp13, label %for.cond16, label %for.cond.cleanup14 | ||||
|
|
||||
| for.cond.cleanup14: ; preds = %for.cond9 | ||||
| %inc70 = add i32 %i5.0, 1 | ||||
| br label %for.cond2 | ||||
|
|
||||
| for.cond16: ; preds = %for.cond23, %for.cond9 | ||||
| br i1 %cmp20, label %for.cond23, label %for.cond9 | ||||
|
|
||||
| for.cond23: ; preds = %for.cond30, %for.cond16 | ||||
| br i1 %cmp27, label %for.cond30, label %for.cond16 | ||||
|
|
||||
| for.cond30: ; preds = %for.cond37, %for.cond23 | ||||
| br i1 %cmp34, label %for.cond37, label %for.cond23 | ||||
|
|
||||
| for.cond37: ; preds = %for.body43, %for.cond30 | ||||
| %i0.018 = phi i32 [ %inc, %for.body43 ], [ 0, %for.cond30 ] | ||||
| %offset.619 = phi i64 [ %sub.ptr.div, %for.body43 ], [ 0, %for.cond30 ] | ||||
| br i1 %cmp41, label %for.body43, label %for.cond30 | ||||
|
|
||||
| for.body43: ; preds = %for.cond37 | ||||
| %conv45 = zext i32 %i0.018 to i64 | ||||
| %conv50 = zext i32 %i5.0 to i64 | ||||
| %call31.i.i = load volatile i64, ptr null, align 8 | ||||
| %add33.i.i = add i64 %conv50, %call31.i.i | ||||
| %mul42.i.i = mul i64 %conv45, %add33.i.i | ||||
| %add43.i.i = add i64 %mul42.i.i, 1 | ||||
| %mul52.i.i = mul i64 %conv45, %add43.i.i | ||||
| %add53.i.i = add i64 %mul52.i.i, 1 | ||||
| %mul62.i.i = mul i64 %conv45, %add53.i.i | ||||
| %add63.i.i = add i64 %mul62.i.i, 1 | ||||
| %mul72.i.i = mul i64 %conv50, %add63.i.i | ||||
| %mul79.i.i = mul i64 %call31.i.i, %mul72.i.i | ||||
| %.idx1 = add i64 %conv45, %mul79.i.i | ||||
| %sub.ptr.lhs.cast = shl i64 %.idx1, 2 | ||||
| %sub.ptr.div = ashr exact i64 %sub.ptr.lhs.cast, 1 | ||||
| %cmp55 = icmp sgt i64 %offset.619, 0 | ||||
| call void @llvm.assume(i1 %cmp55) | ||||
| %inc = add i32 %conv5, 1 | ||||
| br label %for.cond37 | ||||
| } | ||||
|
|
||||
| define ptr @_ZNK6Kokkos11DynRankViewIiJNS_10LayoutLeftENS_6SerialEEEclEmmmmmmm(i64 %i0, i64 %i5) local_unnamed_addr { | ||||
| ; CHECK-LABEL: define ptr @_ZNK6Kokkos11DynRankViewIiJNS_10LayoutLeftENS_6SerialEEEclEmmmmmmm( | ||||
| ; CHECK-SAME: i64 [[I0:%.*]], i64 [[I5:%.*]]) local_unnamed_addr { | ||||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||||
| ; CHECK-NEXT: [[CALL31_I:%.*]] = load volatile i64, ptr null, align 8 | ||||
| ; CHECK-NEXT: [[ADD33_I:%.*]] = add i64 [[I5]], [[CALL31_I]] | ||||
| ; CHECK-NEXT: [[MUL42_I:%.*]] = mul i64 [[I0]], [[ADD33_I]] | ||||
| ; CHECK-NEXT: [[ADD43_I:%.*]] = add i64 [[MUL42_I]], 1 | ||||
| ; CHECK-NEXT: [[MUL52_I:%.*]] = mul i64 [[I0]], [[ADD43_I]] | ||||
| ; CHECK-NEXT: [[ADD53_I:%.*]] = add i64 [[MUL52_I]], 1 | ||||
| ; CHECK-NEXT: [[MUL62_I:%.*]] = mul i64 [[I0]], [[ADD53_I]] | ||||
| ; CHECK-NEXT: [[ADD63_I:%.*]] = add i64 [[MUL62_I]], 1 | ||||
| ; CHECK-NEXT: [[MUL72_I:%.*]] = mul i64 [[I5]], [[ADD63_I]] | ||||
| ; CHECK-NEXT: [[MUL79_I:%.*]] = mul i64 [[CALL31_I]], [[MUL72_I]] | ||||
| ; CHECK-NEXT: [[ADD80_I:%.*]] = add i64 [[I0]], [[MUL79_I]] | ||||
| ; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr i32, ptr null, i64 [[ADD80_I]] | ||||
| ; CHECK-NEXT: ret ptr [[ARRAYIDX_I]] | ||||
| ; | ||||
| entry: | ||||
| %call31.i = load volatile i64, ptr null, align 8 | ||||
| %add33.i = add i64 %i5, %call31.i | ||||
| %mul42.i = mul i64 %i0, %add33.i | ||||
| %add43.i = add i64 %mul42.i, 1 | ||||
| %mul52.i = mul i64 %i0, %add43.i | ||||
| %add53.i = add i64 %mul52.i, 1 | ||||
| %mul62.i = mul i64 %i0, %add53.i | ||||
| %add63.i = add i64 %mul62.i, 1 | ||||
| %mul72.i = mul i64 %i5, %add63.i | ||||
| %mul79.i = mul i64 %call31.i, %mul72.i | ||||
| %add80.i = add i64 %i0, %mul79.i | ||||
| %arrayidx.i = getelementptr i32, ptr null, i64 %add80.i | ||||
| ret ptr %arrayidx.i | ||||
| } | ||||
|
|
||||
| define i64 @_ZNK6Kokkos4ViewIPPPPPPPiJNS_10LayoutLeftENS_6SerialEEE14compute_offsetIJLm0ELm1ELm2ELm3ELm4ELm5ELm6EEJmmmmmmmEEEDaSt16integer_sequenceImJXspT_EEEDpT0_(i64 %index_offsets, i64 %index_offsets9) local_unnamed_addr { | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This does not look like a reduced test case -- these functions don't look relevant. |
||||
| ; CHECK-LABEL: define i64 @_ZNK6Kokkos4ViewIPPPPPPPiJNS_10LayoutLeftENS_6SerialEEE14compute_offsetIJLm0ELm1ELm2ELm3ELm4ELm5ELm6EEJmmmmmmmEEEDaSt16integer_sequenceImJXspT_EEEDpT0_( | ||||
| ; CHECK-SAME: i64 [[INDEX_OFFSETS:%.*]], i64 [[INDEX_OFFSETS9:%.*]]) local_unnamed_addr { | ||||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||||
| ; CHECK-NEXT: [[CALL31:%.*]] = load volatile i64, ptr null, align 8 | ||||
| ; CHECK-NEXT: [[ADD33:%.*]] = add i64 [[INDEX_OFFSETS9]], [[CALL31]] | ||||
| ; CHECK-NEXT: [[MUL42:%.*]] = mul i64 [[INDEX_OFFSETS]], [[ADD33]] | ||||
| ; CHECK-NEXT: [[ADD43:%.*]] = add i64 [[MUL42]], 1 | ||||
| ; CHECK-NEXT: [[MUL52:%.*]] = mul i64 [[INDEX_OFFSETS]], [[ADD43]] | ||||
| ; CHECK-NEXT: [[ADD53:%.*]] = add i64 [[MUL52]], 1 | ||||
| ; CHECK-NEXT: [[MUL62:%.*]] = mul i64 [[INDEX_OFFSETS]], [[ADD53]] | ||||
| ; CHECK-NEXT: [[ADD63:%.*]] = add i64 [[MUL62]], 1 | ||||
| ; CHECK-NEXT: [[MUL72:%.*]] = mul i64 [[INDEX_OFFSETS9]], [[ADD63]] | ||||
| ; CHECK-NEXT: [[MUL79:%.*]] = mul i64 [[CALL31]], [[MUL72]] | ||||
| ; CHECK-NEXT: [[ADD80:%.*]] = add i64 [[INDEX_OFFSETS]], [[MUL79]] | ||||
| ; CHECK-NEXT: ret i64 [[ADD80]] | ||||
| ; | ||||
| entry: | ||||
| %call31 = load volatile i64, ptr null, align 8 | ||||
| %add33 = add i64 %index_offsets9, %call31 | ||||
| %mul42 = mul i64 %index_offsets, %add33 | ||||
| %add43 = add i64 %mul42, 1 | ||||
| %mul52 = mul i64 %index_offsets, %add43 | ||||
| %add53 = add i64 %mul52, 1 | ||||
| %mul62 = mul i64 %index_offsets, %add53 | ||||
| %add63 = add i64 %mul62, 1 | ||||
| %mul72 = mul i64 %index_offsets9, %add63 | ||||
| %mul79 = mul i64 %call31, %mul72 | ||||
| %add80 = add i64 %index_offsets, %mul79 | ||||
| ret i64 %add80 | ||||
| } | ||||
|
|
||||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) | ||||
| declare void @llvm.assume(i1 noundef) #0 | ||||
|
|
||||
| attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } | ||||
| ;. | ||||
| ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} | ||||
| ; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} | ||||
| ;. | ||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The fix removes caching of depth-limited truncate expressions to prevent cache poisoning. However, the same caching issue exists in getZeroExtendExprImpl (line 1603) and getSignExtendExprImpl (line 1943), where depth-limited unsimplified zero/sign extend expressions are still being inserted into UniqueSCEVs cache. This inconsistency means that while truncate expressions are now properly handled, zero and sign extend expressions can still cause the same cache poisoning problem, where a depth-limited unsimplified expression prevents later queries at lower depth from properly simplifying.