From 30334ac01be4fca5627edeeadfd4a4a8f47ef639 Mon Sep 17 00:00:00 2001 From: Abhishek Kaushik Date: Sun, 22 Jun 2025 14:15:03 +0530 Subject: [PATCH 1/5] [Reland][ValueTracking] Improve Bitcast handling to match SDAG --- llvm/lib/Analysis/ValueTracking.cpp | 27 +++++++++++++++++-- .../InstCombine/X86/x86-vector-shifts.ll | 4 +-- .../InstCombine/bitcast-known-bits.ll | 21 +++++---------- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index a17417cb5189c..2c4d55eea1dda 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1346,6 +1346,8 @@ static void computeKnownBitsFromOperator(const Operator *I, isa(I->getType())) break; + unsigned NumElts = DemandedElts.getBitWidth(); + bool IsLE = Q.DL.isLittleEndian(); // Look through a cast from narrow vector elements to wider type. // Examples: v4i32 -> v2i64, v3i8 -> v24 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); @@ -1364,7 +1366,6 @@ static void computeKnownBitsFromOperator(const Operator *I, // // The known bits of each sub-element are then inserted into place // (dependent on endian) to form the full result of known bits. - unsigned NumElts = DemandedElts.getBitWidth(); unsigned SubScale = BitWidth / SubBitWidth; APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); for (unsigned i = 0; i != NumElts; ++i) { @@ -1376,10 +1377,32 @@ static void computeKnownBitsFromOperator(const Operator *I, for (unsigned i = 0; i != SubScale; ++i) { computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q, Depth + 1); - unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; + unsigned ShiftElt = IsLE ? i : SubScale - 1 - i; Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); } } + // Look through a cast from wider vector elements to narrow type. + // Examples: v2i64 -> v4i32 + if (SubBitWidth % BitWidth == 0) { + unsigned SubScale = SubBitWidth / BitWidth; + KnownBits KnownSrc(SubBitWidth); + APInt SubDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale); + computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q, + Depth + 1); + + Known.Zero.setAllBits(); + Known.One.setAllBits(); + for (unsigned i = 0; i != NumElts; ++i) { + if (DemandedElts[i]) { + unsigned Shifts = IsLE ? i : NumElts - 1 - i; + unsigned Offset = (Shifts % SubScale) * BitWidth; + Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset)); + if (Known.isUnknown()) + break; + } + } + } break; } case Instruction::SExt: { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll index db56080a3ea2b..cc252ae53803b 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -3732,7 +3732,6 @@ define <4 x i64> @test_avx2_psrl_0() { ret <4 x i64> %16 } -; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds. define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) { ; CHECK-LABEL: @PR125228( ; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63) @@ -3741,7 +3740,8 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) { ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8> ; CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> -; CHECK-NEXT: [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]] ; CHECK-NEXT: [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> ; CHECK-NEXT: ret <2 x i64> [[SHUFP_UNCASTED]] ; diff --git a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll index 3e47e775e3a28..65b43df752f76 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll @@ -12,8 +12,7 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg ; CHECK-NEXT: [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16> ; CHECK-NEXT: [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2) ; CHECK-NEXT: [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8> -; CHECK-NEXT: [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52) -; CHECK-NEXT: ret <16 x i8> [[AND7]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST6]] ; %and = and <16 x i8> %arg1, splat (i8 3) %and3 = and <16 x i8> %arg2, splat (i8 48) @@ -33,8 +32,7 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg) { ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16> ; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4) ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8> -; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16) -; CHECK-NEXT: ret <16 x i8> [[AND3]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST2]] ; %and = and <16 x i8> %arg, splat (i8 15) %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> @@ -53,8 +51,7 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg) { ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16> ; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4) ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8> -; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16) -; CHECK-NEXT: ret <16 x i8> [[AND3]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST2]] ; %and = and <16 x i8> %arg, splat (i8 15) %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> @@ -70,8 +67,7 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg) { ; CHECK-SAME: <8 x i16> [[ARG:%.*]]) { ; CHECK-NEXT: [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15) ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8> -; CHECK-NEXT: [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1) -; CHECK-NEXT: ret <16 x i8> [[AND]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST1]] ; %lshr = lshr <8 x i16> %arg, splat (i16 15) %bitcast1 = bitcast <8 x i16> %lshr to <16 x i8> @@ -88,7 +84,8 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i ; CHECK-NEXT: [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]]) ; CHECK-NEXT: [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32> ; CHECK-NEXT: [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]]) +; CHECK-NEXT: [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0 ; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1) @@ -110,11 +107,7 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> % ; CHECK-NEXT: [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8> ; CHECK-NEXT: [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]] ; CHECK-NEXT: [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]] -; CHECK-NEXT: [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16> -; CHECK-NEXT: [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8) -; CHECK-NEXT: [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8> -; CHECK-NEXT: [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]] -; CHECK-NEXT: ret <16 x i8> [[ADD13]] +; CHECK-NEXT: ret <16 x i8> [[ADD14]] ; %shl6 = shl <8 x i16> %arg2, splat (i16 8) %bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8> From b9521a6c90f6d4cc36bc382b9af1b42105648a75 Mon Sep 17 00:00:00 2001 From: Abhishek Kaushik Date: Thu, 26 Jun 2025 23:24:40 +0530 Subject: [PATCH 2/5] Add miscompile test --- .../Transforms/Inline/bitcast-knownbits.ll | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 llvm/test/Transforms/Inline/bitcast-knownbits.ll diff --git a/llvm/test/Transforms/Inline/bitcast-knownbits.ll b/llvm/test/Transforms/Inline/bitcast-knownbits.ll new file mode 100644 index 0000000000000..07c677126c039 --- /dev/null +++ b/llvm/test/Transforms/Inline/bitcast-knownbits.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=always-inline -S | FileCheck %s + +define <2 x i64> @vpx_lpf_horizontal_4_sse2(<2 x i64> %0) { +; CHECK-LABEL: define <2 x i64> @vpx_lpf_horizontal_4_sse2( +; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i16> [[TMP5]], splat (i16 1) +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %2 = call fastcc <2 x i64> @_mm_unpackhi_epi8(<2 x i64> %0) + %3 = call fastcc <2 x i64> @_mm_srai_epi16(<2 x i64> %2) + ret <2 x i64> %3 +} + +define fastcc <2 x i64> @_mm_unpackhi_epi8(<2 x i64> %0) #0 { +; CHECK-LABEL: define fastcc <2 x i64> @_mm_unpackhi_epi8( +; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %2 = bitcast <2 x i64> %0 to <16 x i8> + %3 = shufflevector <16 x i8> %2, <16 x i8> zeroinitializer, <16 x i32> + %4 = bitcast <16 x i8> %3 to <2 x i64> + ret <2 x i64> %4 +} + +define fastcc <2 x i64> @_mm_srai_epi16(<2 x i64> %0) #0 { +; CHECK-LABEL: define fastcc <2 x i64> @_mm_srai_epi16( +; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP2]], splat (i16 1) +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %2 = bitcast <2 x i64> %0 to <8 x i16> + %3 = ashr <8 x i16> %2, splat (i16 1) + ret <2 x i64> zeroinitializer +} + +attributes #0 = { alwaysinline } From 6e4ff4f689e8f610fee55dbe2d6a00ebf7017269 Mon Sep 17 00:00:00 2001 From: Abhishek Kaushik Date: Tue, 29 Jul 2025 22:04:23 +0530 Subject: [PATCH 3/5] Remove inline test and add instsimplify test --- .../Transforms/Inline/bitcast-knownbits.ll | 45 ------------------- .../InstSimplify/shift-knownbits.ll | 32 +++++++++++++ 2 files changed, 32 insertions(+), 45 deletions(-) delete mode 100644 llvm/test/Transforms/Inline/bitcast-knownbits.ll diff --git a/llvm/test/Transforms/Inline/bitcast-knownbits.ll b/llvm/test/Transforms/Inline/bitcast-knownbits.ll deleted file mode 100644 index 07c677126c039..0000000000000 --- a/llvm/test/Transforms/Inline/bitcast-knownbits.ll +++ /dev/null @@ -1,45 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=always-inline -S | FileCheck %s - -define <2 x i64> @vpx_lpf_horizontal_4_sse2(<2 x i64> %0) { -; CHECK-LABEL: define <2 x i64> @vpx_lpf_horizontal_4_sse2( -; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) { -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> zeroinitializer, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i16> [[TMP5]], splat (i16 1) -; CHECK-NEXT: ret <2 x i64> zeroinitializer -; - %2 = call fastcc <2 x i64> @_mm_unpackhi_epi8(<2 x i64> %0) - %3 = call fastcc <2 x i64> @_mm_srai_epi16(<2 x i64> %2) - ret <2 x i64> %3 -} - -define fastcc <2 x i64> @_mm_unpackhi_epi8(<2 x i64> %0) #0 { -; CHECK-LABEL: define fastcc <2 x i64> @_mm_unpackhi_epi8( -; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> zeroinitializer, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[TMP4]] -; - %2 = bitcast <2 x i64> %0 to <16 x i8> - %3 = shufflevector <16 x i8> %2, <16 x i8> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i8> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define fastcc <2 x i64> @_mm_srai_epi16(<2 x i64> %0) #0 { -; CHECK-LABEL: define fastcc <2 x i64> @_mm_srai_epi16( -; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP2]], splat (i16 1) -; CHECK-NEXT: ret <2 x i64> zeroinitializer -; - %2 = bitcast <2 x i64> %0 to <8 x i16> - %3 = ashr <8 x i16> %2, splat (i16 1) - ret <2 x i64> zeroinitializer -} - -attributes #0 = { alwaysinline } diff --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll index 3917172e3b752..c1316041d3d08 100644 --- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll +++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll @@ -499,3 +499,35 @@ define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> % %r = shl <1 x i64> %v2, %b ret <1 x i64> %r } + +; Test that verifies correct handling of known bits when bitcasting from a smaller vector +; to a larger one (e.g., <2 x i32> to <8 x i8>). Previously, only the subscale portion +; (e.g., 4 elements) was checked instead of the full demanded vector width (8 elements), +; leading to incorrect known bits and removal of the `ashr` instruction. + +define <8 x i8> @bitcast_knownbits_subscale_miscompile(i32 %x) { +; CHECK-LABEL: @bitcast_knownbits_subscale_miscompile( +; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[X:%.*]], -256 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32 [[MASKED]] to <4 x i8> +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x i8> [[BITCAST]], i32 3 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[EXTRACT]], -113 +; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i32> poison, i32 [[MASKED]], i32 0 +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC:%.*]] = bitcast <2 x i32> [[SPLAT]] to <8 x i8> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i8> [[VEC]], <8 x i8> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[SHR:%.*]] = ashr <8 x i8> [[SHUF]], splat (i8 1) +; CHECK-NEXT: ret <8 x i8> [[SHR]] +; + %masked = and i32 %x, u0xFFFFFF00 + %bitcast = bitcast i32 %masked to <4 x i8> + %extract = extractelement <4 x i8> %bitcast, i32 3 + %cond = icmp eq i8 %extract, u0x8F + call void @llvm.assume(i1 %cond) + %insert = insertelement <2 x i32> poison, i32 %masked, i32 0 + %splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> splat (i32 0) + %vec = bitcast <2 x i32> %splat to <8 x i8> + %shuf = shufflevector <8 x i8> %vec, <8 x i8> zeroinitializer, <8 x i32> + %shr = ashr <8 x i8> %shuf, splat (i8 1) + ret <8 x i8> %shr +} From 9c02704b06483508ea60f9596337d10f96a44f2a Mon Sep 17 00:00:00 2001 From: Abhishek Kaushik Date: Sun, 3 Aug 2025 16:04:48 +0530 Subject: [PATCH 4/5] Update shift-knownbits.ll --- .../InstSimplify/shift-knownbits.ll | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll index c1316041d3d08..68b13287e8c96 100644 --- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll +++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll @@ -505,14 +505,16 @@ define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> % ; (e.g., 4 elements) was checked instead of the full demanded vector width (8 elements), ; leading to incorrect known bits and removal of the `ashr` instruction. +; Test that verifies correct handling of known bits when bitcasting from a smaller vector +; to a larger one (e.g., <2 x i32> to <8 x i8>). Previously, only the subscale portion +; (e.g., 4 elements) was checked instead of the full demanded vector width (8 elements), +; leading to incorrect known bits and removal of the `ashr` instruction. + define <8 x i8> @bitcast_knownbits_subscale_miscompile(i32 %x) { ; CHECK-LABEL: @bitcast_knownbits_subscale_miscompile( ; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[X:%.*]], -256 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32 [[MASKED]] to <4 x i8> -; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x i8> [[BITCAST]], i32 3 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[EXTRACT]], -113 -; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) -; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i32> poison, i32 [[MASKED]], i32 0 +; CHECK-NEXT: [[SETBITS:%.*]] = or i32 [[MASKED]], -16777216 +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i32> poison, i32 [[SETBITS]], i32 0 ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC:%.*]] = bitcast <2 x i32> [[SPLAT]] to <8 x i8> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i8> [[VEC]], <8 x i8> zeroinitializer, <8 x i32> @@ -520,11 +522,8 @@ define <8 x i8> @bitcast_knownbits_subscale_miscompile(i32 %x) { ; CHECK-NEXT: ret <8 x i8> [[SHR]] ; %masked = and i32 %x, u0xFFFFFF00 - %bitcast = bitcast i32 %masked to <4 x i8> - %extract = extractelement <4 x i8> %bitcast, i32 3 - %cond = icmp eq i8 %extract, u0x8F - call void @llvm.assume(i1 %cond) - %insert = insertelement <2 x i32> poison, i32 %masked, i32 0 + %setbits = or i32 %masked, u0xFF000000 + %insert = insertelement <2 x i32> poison, i32 %setbits, i32 0 %splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> splat (i32 0) %vec = bitcast <2 x i32> %splat to <8 x i8> %shuf = shufflevector <8 x i8> %vec, <8 x i8> zeroinitializer, <8 x i32> From ac4766b81f0a48b090fd6b5d6baf3c30347affa8 Mon Sep 17 00:00:00 2001 From: Abhishek Kaushik Date: Sun, 3 Aug 2025 16:08:49 +0530 Subject: [PATCH 5/5] Update shift-knownbits.ll --- llvm/test/Transforms/InstSimplify/shift-knownbits.ll | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll index 68b13287e8c96..940a41bf6483f 100644 --- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll +++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll @@ -505,11 +505,6 @@ define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> % ; (e.g., 4 elements) was checked instead of the full demanded vector width (8 elements), ; leading to incorrect known bits and removal of the `ashr` instruction. -; Test that verifies correct handling of known bits when bitcasting from a smaller vector -; to a larger one (e.g., <2 x i32> to <8 x i8>). Previously, only the subscale portion -; (e.g., 4 elements) was checked instead of the full demanded vector width (8 elements), -; leading to incorrect known bits and removal of the `ashr` instruction. - define <8 x i8> @bitcast_knownbits_subscale_miscompile(i32 %x) { ; CHECK-LABEL: @bitcast_knownbits_subscale_miscompile( ; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[X:%.*]], -256