Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1172,9 +1172,12 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);

if (Depth > MaxCastDepth) {
// Avoid caching depth-limited unsimplified results. Later queries at lower
// depth should be able to simplify (e.g., truncate of AddRec -> AddRec).
// Caching here would return this unsimplified SCEVTruncateExpr for future
// queries, preventing proper simplification.
SCEV *S =
new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
registerUser(S, Op);
return S;
}
Comment on lines 1174 to 1183
Copy link

Copilot AI Dec 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fix removes caching of depth-limited truncate expressions to prevent cache poisoning. However, the same caching issue exists in getZeroExtendExprImpl (line 1603) and getSignExtendExprImpl (line 1943), where depth-limited unsimplified zero/sign extend expressions are still being inserted into UniqueSCEVs cache. This inconsistency means that while truncate expressions are now properly handled, zero and sign extend expressions can still cause the same cache poisoning problem, where a depth-limited unsimplified expression prevents later queries at lower depth from properly simplifying.

Copilot uses AI. Check for mistakes.
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1033,8 +1033,12 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
SE->getTypeSizeInBits(AR->getType()) >
SE->getTypeSizeInBits(ExitCount->getType())) {
const SCEV *IVInit = AR->getStart();
if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount)) {
const SCEV *TruncExpr = SE->getTruncateExpr(AR, ExitCount->getType());
assert(isa<SCEVAddRecExpr>(TruncExpr) &&
"TruncateExpr should be an SCEVAddRecExpr");
AR = cast<SCEVAddRecExpr>(TruncExpr);
}
}

const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
Expand Down
223 changes: 223 additions & 0 deletions llvm/test/Transforms/IndVarSimplify/scev-update-loop-opt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt %s -passes="loop(loop-idiom,indvars,loop-deletion,loop-unroll-full)" -S | FileCheck %s
; REQUIRES: asserts
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not need asserts.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an assertion in IndVarSimplify to ensure that the truncated expression is a SCEVAddRecExpr.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original issue involved the following assertion to fail:

assert(ExitCnt->getType()->isPointerTy() ==

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You only need to add REQUIRES: asserts if the test will fail in a build without assertions, e.g. because you're using -debug. It's not intended for tests that happened to assert in the past.


target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not use triple.


define void @_ZNK4Test29TestViewOperator_LeftAndRightIiN6Kokkos6SerialELj7EEclEmRi(i32 %conv5, i1 %cmp13, i1 %cmp20, i1 %cmp27, i1 %cmp34, i1 %cmp41) local_unnamed_addr {
; CHECK-LABEL: define void @_ZNK4Test29TestViewOperator_LeftAndRightIiN6Kokkos6SerialELj7EEclEmRi(
; CHECK-SAME: i32 [[CONV5:%.*]], i1 [[CMP13:%.*]], i1 [[CMP20:%.*]], i1 [[CMP27:%.*]], i1 [[CMP34:%.*]], i1 [[CMP41:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[CONV5]], 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: br label %[[FOR_COND:.*]]
; CHECK: [[FOR_COND_LOOPEXIT:.*]]:
; CHECK-NEXT: br label %[[FOR_COND]]
; CHECK: [[FOR_COND]]:
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[CONV5]] to i64
; CHECK-NEXT: br label %[[FOR_COND2:.*]]
; CHECK: [[FOR_COND2]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_COND_CLEANUP14:.*]] ], [ 0, %[[FOR_COND]] ]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP2]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_COND_LOOPEXIT]]
; CHECK: [[FOR_COND9_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_COND9:.*]]
; CHECK: [[FOR_COND9_LOOPEXIT:.*]]:
; CHECK-NEXT: br label %[[FOR_COND9]]
; CHECK: [[FOR_COND9]]:
; CHECK-NEXT: br i1 [[CMP13]], label %[[FOR_COND16_PREHEADER:.*]], label %[[FOR_COND_CLEANUP14]]
; CHECK: [[FOR_COND16_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_COND16:.*]]
; CHECK: [[FOR_COND_CLEANUP14]]:
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label %[[FOR_COND2]]
; CHECK: [[FOR_COND16_LOOPEXIT:.*]]:
; CHECK-NEXT: br label %[[FOR_COND16]]
; CHECK: [[FOR_COND16]]:
; CHECK-NEXT: br i1 [[CMP20]], label %[[FOR_COND23_PREHEADER:.*]], label %[[FOR_COND9_LOOPEXIT]]
; CHECK: [[FOR_COND23_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_COND23:.*]]
; CHECK: [[FOR_COND23_LOOPEXIT:.*]]:
; CHECK-NEXT: br label %[[FOR_COND23]]
; CHECK: [[FOR_COND23]]:
; CHECK-NEXT: br i1 [[CMP27]], label %[[FOR_COND30_PREHEADER:.*]], label %[[FOR_COND16_LOOPEXIT]]
; CHECK: [[FOR_COND30_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_COND30:.*]]
; CHECK: [[FOR_COND30_LOOPEXIT_LOOPEXIT:.*]]:
; CHECK-NEXT: br label %[[FOR_COND30_LOOPEXIT:.*]]
; CHECK: [[FOR_COND30_LOOPEXIT]]:
; CHECK-NEXT: br label %[[FOR_COND30]]
; CHECK: [[FOR_COND30]]:
; CHECK-NEXT: br i1 [[CMP34]], label %[[FOR_COND37_PREHEADER:.*]], label %[[FOR_COND23_LOOPEXIT]]
; CHECK: [[FOR_COND37_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_COND37_PEEL_BEGIN:.*]]
; CHECK: [[FOR_COND37_PEEL_BEGIN]]:
; CHECK-NEXT: br label %[[FOR_COND37_PEEL:.*]]
; CHECK: [[FOR_COND37_PEEL]]:
; CHECK-NEXT: br i1 [[CMP41]], label %[[FOR_BODY43_PEEL:.*]], label %[[FOR_COND30_LOOPEXIT]]
; CHECK: [[FOR_BODY43_PEEL]]:
; CHECK-NEXT: [[CONV45_PEEL:%.*]] = zext i32 0 to i64
; CHECK-NEXT: [[CALL31_I_I_PEEL:%.*]] = load volatile i64, ptr null, align 8
; CHECK-NEXT: [[MUL79_I_I_PEEL:%.*]] = mul i64 [[CALL31_I_I_PEEL]], [[INDVARS_IV]]
; CHECK-NEXT: [[DOTIDX1_PEEL:%.*]] = add i64 [[CONV45_PEEL]], [[MUL79_I_I_PEEL]]
; CHECK-NEXT: [[SUB_PTR_LHS_CAST_PEEL:%.*]] = shl i64 [[DOTIDX1_PEEL]], 2
; CHECK-NEXT: [[SUB_PTR_DIV_PEEL:%.*]] = ashr exact i64 [[SUB_PTR_LHS_CAST_PEEL]], 1
; CHECK-NEXT: [[CMP55_PEEL:%.*]] = icmp sgt i64 0, 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP55_PEEL]])
; CHECK-NEXT: br label %[[FOR_COND37_PEEL_NEXT:.*]]
; CHECK: [[FOR_COND37_PEEL_NEXT]]:
; CHECK-NEXT: br label %[[FOR_COND37_PEEL_NEXT1:.*]]
; CHECK: [[FOR_COND37_PEEL_NEXT1]]:
; CHECK-NEXT: br label %[[FOR_COND37_PREHEADER_PEEL_NEWPH:.*]]
; CHECK: [[FOR_COND37_PREHEADER_PEEL_NEWPH]]:
; CHECK-NEXT: br label %[[FOR_COND37:.*]]
; CHECK: [[FOR_COND37]]:
; CHECK-NEXT: [[OFFSET_619:%.*]] = phi i64 [ [[SUB_PTR_DIV:%.*]], %[[FOR_BODY43:.*]] ], [ [[SUB_PTR_DIV_PEEL]], %[[FOR_COND37_PREHEADER_PEEL_NEWPH]] ]
; CHECK-NEXT: br i1 [[CMP41]], label %[[FOR_BODY43]], label %[[FOR_COND30_LOOPEXIT_LOOPEXIT]]
; CHECK: [[FOR_BODY43]]:
; CHECK-NEXT: [[CALL31_I_I:%.*]] = load volatile i64, ptr null, align 8
; CHECK-NEXT: [[ADD33_I_I:%.*]] = add i64 [[INDVARS_IV]], [[CALL31_I_I]]
; CHECK-NEXT: [[MUL42_I_I:%.*]] = mul i64 [[TMP1]], [[ADD33_I_I]]
; CHECK-NEXT: [[ADD43_I_I:%.*]] = add i64 [[MUL42_I_I]], 1
; CHECK-NEXT: [[MUL52_I_I:%.*]] = mul i64 [[TMP1]], [[ADD43_I_I]]
; CHECK-NEXT: [[ADD53_I_I:%.*]] = add i64 [[MUL52_I_I]], 1
; CHECK-NEXT: [[MUL62_I_I:%.*]] = mul i64 [[TMP1]], [[ADD53_I_I]]
; CHECK-NEXT: [[ADD63_I_I:%.*]] = add i64 [[MUL62_I_I]], 1
; CHECK-NEXT: [[MUL72_I_I:%.*]] = mul i64 [[INDVARS_IV]], [[ADD63_I_I]]
; CHECK-NEXT: [[MUL79_I_I:%.*]] = mul i64 [[CALL31_I_I]], [[MUL72_I_I]]
; CHECK-NEXT: [[DOTIDX1:%.*]] = add i64 [[TMP1]], [[MUL79_I_I]]
; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = shl i64 [[DOTIDX1]], 2
; CHECK-NEXT: [[SUB_PTR_DIV]] = ashr exact i64 [[SUB_PTR_LHS_CAST]], 1
; CHECK-NEXT: [[CMP55:%.*]] = icmp sgt i64 [[OFFSET_619]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP55]])
; CHECK-NEXT: br label %[[FOR_COND37]], !llvm.loop [[LOOP0:![0-9]+]]
;
entry:
br label %for.cond

for.cond: ; preds = %for.cond2, %entry
br label %for.cond2

for.cond2: ; preds = %for.cond.cleanup14, %for.cond
%i5.0 = phi i32 [ 0, %for.cond ], [ %inc70, %for.cond.cleanup14 ]
%cmp6 = icmp ult i32 %i5.0, %conv5
br i1 %cmp6, label %for.cond9, label %for.cond

for.cond9: ; preds = %for.cond16, %for.cond2
br i1 %cmp13, label %for.cond16, label %for.cond.cleanup14

for.cond.cleanup14: ; preds = %for.cond9
%inc70 = add i32 %i5.0, 1
br label %for.cond2

for.cond16: ; preds = %for.cond23, %for.cond9
br i1 %cmp20, label %for.cond23, label %for.cond9

for.cond23: ; preds = %for.cond30, %for.cond16
br i1 %cmp27, label %for.cond30, label %for.cond16

for.cond30: ; preds = %for.cond37, %for.cond23
br i1 %cmp34, label %for.cond37, label %for.cond23

for.cond37: ; preds = %for.body43, %for.cond30
%i0.018 = phi i32 [ %inc, %for.body43 ], [ 0, %for.cond30 ]
%offset.619 = phi i64 [ %sub.ptr.div, %for.body43 ], [ 0, %for.cond30 ]
br i1 %cmp41, label %for.body43, label %for.cond30

for.body43: ; preds = %for.cond37
%conv45 = zext i32 %i0.018 to i64
%conv50 = zext i32 %i5.0 to i64
%call31.i.i = load volatile i64, ptr null, align 8
%add33.i.i = add i64 %conv50, %call31.i.i
%mul42.i.i = mul i64 %conv45, %add33.i.i
%add43.i.i = add i64 %mul42.i.i, 1
%mul52.i.i = mul i64 %conv45, %add43.i.i
%add53.i.i = add i64 %mul52.i.i, 1
%mul62.i.i = mul i64 %conv45, %add53.i.i
%add63.i.i = add i64 %mul62.i.i, 1
%mul72.i.i = mul i64 %conv50, %add63.i.i
%mul79.i.i = mul i64 %call31.i.i, %mul72.i.i
%.idx1 = add i64 %conv45, %mul79.i.i
%sub.ptr.lhs.cast = shl i64 %.idx1, 2
%sub.ptr.div = ashr exact i64 %sub.ptr.lhs.cast, 1
%cmp55 = icmp sgt i64 %offset.619, 0
call void @llvm.assume(i1 %cmp55)
%inc = add i32 %conv5, 1
br label %for.cond37
}

define ptr @_ZNK6Kokkos11DynRankViewIiJNS_10LayoutLeftENS_6SerialEEEclEmmmmmmm(i64 %i0, i64 %i5) local_unnamed_addr {
; CHECK-LABEL: define ptr @_ZNK6Kokkos11DynRankViewIiJNS_10LayoutLeftENS_6SerialEEEclEmmmmmmm(
; CHECK-SAME: i64 [[I0:%.*]], i64 [[I5:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CALL31_I:%.*]] = load volatile i64, ptr null, align 8
; CHECK-NEXT: [[ADD33_I:%.*]] = add i64 [[I5]], [[CALL31_I]]
; CHECK-NEXT: [[MUL42_I:%.*]] = mul i64 [[I0]], [[ADD33_I]]
; CHECK-NEXT: [[ADD43_I:%.*]] = add i64 [[MUL42_I]], 1
; CHECK-NEXT: [[MUL52_I:%.*]] = mul i64 [[I0]], [[ADD43_I]]
; CHECK-NEXT: [[ADD53_I:%.*]] = add i64 [[MUL52_I]], 1
; CHECK-NEXT: [[MUL62_I:%.*]] = mul i64 [[I0]], [[ADD53_I]]
; CHECK-NEXT: [[ADD63_I:%.*]] = add i64 [[MUL62_I]], 1
; CHECK-NEXT: [[MUL72_I:%.*]] = mul i64 [[I5]], [[ADD63_I]]
; CHECK-NEXT: [[MUL79_I:%.*]] = mul i64 [[CALL31_I]], [[MUL72_I]]
; CHECK-NEXT: [[ADD80_I:%.*]] = add i64 [[I0]], [[MUL79_I]]
; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr i32, ptr null, i64 [[ADD80_I]]
; CHECK-NEXT: ret ptr [[ARRAYIDX_I]]
;
entry:
%call31.i = load volatile i64, ptr null, align 8
%add33.i = add i64 %i5, %call31.i
%mul42.i = mul i64 %i0, %add33.i
%add43.i = add i64 %mul42.i, 1
%mul52.i = mul i64 %i0, %add43.i
%add53.i = add i64 %mul52.i, 1
%mul62.i = mul i64 %i0, %add53.i
%add63.i = add i64 %mul62.i, 1
%mul72.i = mul i64 %i5, %add63.i
%mul79.i = mul i64 %call31.i, %mul72.i
%add80.i = add i64 %i0, %mul79.i
%arrayidx.i = getelementptr i32, ptr null, i64 %add80.i
ret ptr %arrayidx.i
}

define i64 @_ZNK6Kokkos4ViewIPPPPPPPiJNS_10LayoutLeftENS_6SerialEEE14compute_offsetIJLm0ELm1ELm2ELm3ELm4ELm5ELm6EEJmmmmmmmEEEDaSt16integer_sequenceImJXspT_EEEDpT0_(i64 %index_offsets, i64 %index_offsets9) local_unnamed_addr {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not look like a reduced test case -- these functions don't look relevant.

; CHECK-LABEL: define i64 @_ZNK6Kokkos4ViewIPPPPPPPiJNS_10LayoutLeftENS_6SerialEEE14compute_offsetIJLm0ELm1ELm2ELm3ELm4ELm5ELm6EEJmmmmmmmEEEDaSt16integer_sequenceImJXspT_EEEDpT0_(
; CHECK-SAME: i64 [[INDEX_OFFSETS:%.*]], i64 [[INDEX_OFFSETS9:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CALL31:%.*]] = load volatile i64, ptr null, align 8
; CHECK-NEXT: [[ADD33:%.*]] = add i64 [[INDEX_OFFSETS9]], [[CALL31]]
; CHECK-NEXT: [[MUL42:%.*]] = mul i64 [[INDEX_OFFSETS]], [[ADD33]]
; CHECK-NEXT: [[ADD43:%.*]] = add i64 [[MUL42]], 1
; CHECK-NEXT: [[MUL52:%.*]] = mul i64 [[INDEX_OFFSETS]], [[ADD43]]
; CHECK-NEXT: [[ADD53:%.*]] = add i64 [[MUL52]], 1
; CHECK-NEXT: [[MUL62:%.*]] = mul i64 [[INDEX_OFFSETS]], [[ADD53]]
; CHECK-NEXT: [[ADD63:%.*]] = add i64 [[MUL62]], 1
; CHECK-NEXT: [[MUL72:%.*]] = mul i64 [[INDEX_OFFSETS9]], [[ADD63]]
; CHECK-NEXT: [[MUL79:%.*]] = mul i64 [[CALL31]], [[MUL72]]
; CHECK-NEXT: [[ADD80:%.*]] = add i64 [[INDEX_OFFSETS]], [[MUL79]]
; CHECK-NEXT: ret i64 [[ADD80]]
;
entry:
%call31 = load volatile i64, ptr null, align 8
%add33 = add i64 %index_offsets9, %call31
%mul42 = mul i64 %index_offsets, %add33
%add43 = add i64 %mul42, 1
%mul52 = mul i64 %index_offsets, %add43
%add53 = add i64 %mul52, 1
%mul62 = mul i64 %index_offsets, %add53
%add63 = add i64 %mul62, 1
%mul72 = mul i64 %index_offsets9, %add63
%mul79 = mul i64 %call31, %mul72
%add80 = add i64 %index_offsets, %mul79
ret i64 %add80
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #0

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
;.