[InstCombine] Fold reconstruction across select #145102

macsencasaus · 2025-06-20T21:12:06Z

Closes #144020
https://alive2.llvm.org/ce/z/E85DRW

github-actions · 2025-06-20T21:12:24Z

Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this page.

If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using @ followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers.

If you have further questions, they may be answered by the LLVM GitHub User Guide.

You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums.

llvmbot · 2025-06-20T21:12:54Z

@llvm/pr-subscribers-llvm-transforms

Author: Macsen Casaus (macsencasaus)

Changes

Closes #144020
https://alive2.llvm.org/ce/z/E85DRW

Full diff: https://github.com/llvm/llvm-project/pull/145102.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+35)
(added) llvm/test/Transforms/InstCombine/select-reconstruction.ll (+93)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4fe900e9421f8..d23441a4a8129 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1349,6 +1349,37 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
     return nullptr;
   };
 
+  // Special case for reconstructing across a select:
+  // (Cond ? V1 : (X & Mask)) op
+  // zext (Cond ? V2 : trunc X)
+  // -> (Cond ? (V1 op zext V2) : ((X & Mask) op zext trunc X))
+  auto foldReconstruction = [&](Value *V1, Value *Masked,
+                                Value *ZExtSel) -> Value * {
+    Value *X;
+    if (!match(Masked, m_OneUse(m_And(m_Value(X), m_Constant()))))
+      return nullptr;
+
+    Value *V2, *Trunc;
+    if (!match(ZExtSel, m_ZExt(m_OneUse(m_Select(m_Specific(Cond), m_Value(V2),
+                                                 m_Value(Trunc))))))
+      return nullptr;
+
+    if (!match(Trunc, m_Trunc(m_Specific(X))))
+      return nullptr;
+
+    Value *ZExtTrue = Builder.CreateZExt(V2, V1->getType());
+    Value *True;
+    if (!(True = simplifyBinOp(Opcode, V1, ZExtTrue, FMF, Q)))
+      True = Builder.CreateOr(V1, ZExtTrue);
+
+    Value *ZExtFalse = Builder.CreateZExt(Trunc, V1->getType());
+    Value *False;
+    if (!(False = simplifyBinOp(Opcode, Masked, ZExtFalse, FMF, Q)))
+      False = Builder.CreateOr(Masked, ZExtFalse);
+
+    return Builder.CreateSelect(Cond, True, False, I.getName());
+  };
+
   if (LHSIsSelect && RHSIsSelect && A == D) {
     // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
     Cond = A;
@@ -1368,6 +1399,8 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
     False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
     if (Value *NewSel = foldAddNegate(B, C, RHS))
       return NewSel;
+    if (Value *NewSel = foldReconstruction(B, C, RHS))
+      return NewSel;
   } else if (RHSIsSelect && RHS->hasOneUse()) {
     // X op (D ? E : F) -> D ? (X op E) : (X op F)
     Cond = D;
@@ -1375,6 +1408,8 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
     False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
     if (Value *NewSel = foldAddNegate(E, F, LHS))
       return NewSel;
+    if (Value *NewSel = foldReconstruction(E, F, LHS))
+      return NewSel;
   }
 
   if (!True || !False)
diff --git a/llvm/test/Transforms/InstCombine/select-reconstruction.ll b/llvm/test/Transforms/InstCombine/select-reconstruction.ll
new file mode 100644
index 0000000000000..eb918ed4f40d0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-reconstruction.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i40 @select_reconstruction_i40(i40 %arg0) {
+; CHECK-LABEL: define i40 @select_reconstruction_i40(
+; CHECK-SAME: i40 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i40 0, i40 [[ARG0]]
+; CHECK-NEXT:    ret i40 [[TMP3]]
+;
+  %1 = trunc i40 %arg0 to i8
+  %2 = icmp eq i8 %1, 2
+  %3 = and i40 %arg0, -256
+  %4 = select i1 %2, i8 0, i8 %1
+  %5 = select i1 %2, i40 0, i40 %3
+  %6 = zext i8 %4 to i40
+  %7 = or disjoint i40 %5, %6
+  ret i40 %7
+}
+
+define i40 @select_reconstruction_any_cmp_val(i40 %arg0, i8 %arg1) {
+; CHECK-LABEL: define i40 @select_reconstruction_any_cmp_val(
+; CHECK-SAME: i40 [[ARG0:%.*]], i8 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[ARG1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i40 0, i40 [[ARG0]]
+; CHECK-NEXT:    ret i40 [[TMP3]]
+;
+  %1 = trunc i40 %arg0 to i8
+  %2 = icmp eq i8 %1, %arg1
+  %3 = and i40 %arg0, -256
+  %4 = select i1 %2, i8 0, i8 %1
+  %5 = select i1 %2, i40 0, i40 %3
+  %6 = zext i8 %4 to i40
+  %7 = or disjoint i40 %5, %6
+  ret i40 %7
+}
+
+define i40 @select_reconstruction_257_mask(i40 %arg0) {
+; CHECK-LABEL: define i40 @select_reconstruction_257_mask(
+; CHECK-SAME: i40 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = and i40 [[ARG0]], -257
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i40 0, i40 [[TMP3]]
+; CHECK-NEXT:    ret i40 [[TMP4]]
+;
+  %1 = trunc i40 %arg0 to i8
+  %2 = icmp eq i8 %1, 2
+  %3 = and i40 %arg0, -257
+  %4 = select i1 %2, i8 0, i8 %1
+  %5 = select i1 %2, i40 0, i40 %3
+  %6 = zext i8 %4 to i40
+  %7 = or disjoint i40 %5, %6
+  ret i40 %7
+}
+
+define i40 @select_reconstruction_i16_mask(i40 %arg0) {
+; CHECK-LABEL: define i40 @select_reconstruction_i16_mask(
+; CHECK-SAME: i40 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i40 [[ARG0]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i40 0, i40 [[ARG0]]
+; CHECK-NEXT:    ret i40 [[TMP3]]
+;
+  %1 = trunc i40 %arg0 to i16
+  %2 = icmp eq i16 %1, 2
+  %3 = and i40 %arg0, -65356
+  %4 = select i1 %2, i16 0, i16 %1
+  %5 = select i1 %2, i40 0, i40 %3
+  %6 = zext i16 %4 to i40
+  %7 = or disjoint i40 %5, %6
+  ret i40 %7
+}
+
+define <2 x i32> @select_reconstruction_vec_any_cmp_val(<2 x i32> %arg0, <2 x i8> %arg1) {
+; CHECK-LABEL: define <2 x i32> @select_reconstruction_vec_any_cmp_val(
+; CHECK-SAME: <2 x i32> [[ARG0:%.*]], <2 x i8> [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[ARG0]] to <2 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i8> [[ARG1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> zeroinitializer, <2 x i32> [[ARG0]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP7]]
+;
+  %1 = trunc <2 x i32> %arg0 to <2 x i8>
+  %2 = icmp eq <2 x i8> %1, %arg1
+  %3 = and <2 x i32> %arg0, <i32 -256, i32 -256>
+  %4 = select <2 x i1> %2, <2 x i8> <i8 0, i8 0>, <2 x i8> %1
+  %5 = select <2 x i1> %2, <2 x i32> <i32 0, i32 0>, <2 x i32> %3
+  %6 = zext <2 x i8> %4 to <2 x i32>
+  %7 = or <2 x i32> %5, %6
+  ret <2 x i32> %7
+}

macsencasaus · 2025-06-20T21:16:00Z

@dtcxzyw

dtcxzyw

Miscompilation reproducer: https://alive2.llvm.org/ce/z/qu9FYr

llvm/test/Transforms/InstCombine/select-reconstruction.ll

- only apply on OR reconstruction - rename values in test

dtcxzyw · 2025-06-22T08:32:32Z

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+      return nullptr;
+
+    Value *X;
+    if (!match(Masked, m_OneUse(m_And(m_Value(X), m_Constant()))))


Suggested change

if (!match(Masked, m_OneUse(m_And(m_Value(X), m_Constant()))))

if (!match(Masked, m_OneUse(m_And(m_Value(X), m_APInt(*C)))))

Missing check for the mask. It should be APInt::getBitsSetFrom(X->getType()->getScalarSizeInBits(), Trunc->getType()->getScalarSizeInBits()).

I thought of this, but it would fail to simplify a case like

llvm-project/llvm/test/Transforms/InstCombine/select-reconstruction.ll

Lines 40 to 57 in 6f0a946

define i40 @select_reconstruction_257_mask(i40 %arg0) {

; CHECK-LABEL: define i40 @select_reconstruction_257_mask(

; CHECK-SAME: i40 [[ARG0:%.*]]) {

; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8

; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 2

; CHECK-NEXT: [[TMP3:%.*]] = and i40 [[ARG0]], -257

; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i40 0, i40 [[TMP3]]

; CHECK-NEXT: ret i40 [[TMP4]]

;

%1 = trunc i40 %arg0 to i8

%2 = icmp eq i8 %1, 2

%3 = and i40 %arg0, -257

%4 = select i1 %2, i8 0, i8 %1

%5 = select i1 %2, i40 0, i40 %3

%6 = zext i8 %4 to i40

%7 = or disjoint i40 %5, %6

ret i40 %7

}

I can do it anyway if you believe this case is not relevant.

Yeah it should work for all constants. It even holds if we replace the and with any other instructions. I'd like to focus on the original motivating issue if you don't have a better idea to generalize the pattern. With this constraint, we can simplify ((X & Mask) op zext trunc X) into X directly, instead of creating a temporary instruction and relying on later optimizations.

To generalize, this optimization already occurs when the condition is arbitrary:
https://godbolt.org/z/8EPEP81TP

It just doesn't work when the condition is an icmp because of this check:

llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines 178 to 186 in cfcb788

// We are casting a select. Try to fold the cast into the select if the

// select does not have a compare instruction with matching operand types

// or the select is likely better done in a narrow type.

// Creating a select with operands that are different sizes than its

// condition may inhibit other folds and lead to worse codegen.

auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());

if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||

(CI.getOpcode() == Instruction::Trunc &&

shouldChangeType(CI.getSrcTy(), CI.getType()))) {

I didn't know how to implement the optimization from here, but this may be an area where a more general optimization might occur.

That is exactly what I said in #145102 (comment).

dtcxzyw · 2025-06-22T08:44:35Z

@nikic
Do you have a better idea to generalize this pattern? Always folding %zext_low into the select addresses the issue: https://godbolt.org/z/xoKex34TE But we cannot do that as select i8 is more canonical than select i40.

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

dtcxzyw

LGTM. Please wait for additional approval from other reviewers.

macsencasaus added 2 commits June 20, 2025 14:59

pre-commit test

683da5c

[InstCombine] Fold reconstruction across select

6f0a946

macsencasaus requested a review from nikic as a code owner June 20, 2025 21:12

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jun 20, 2025

dtcxzyw mentioned this pull request Jun 21, 2025

Fuzz PR145102 dtcxzyw/llvm-mutation-based-fuzz-service#65

Closed

dtcxzyw requested changes Jun 21, 2025

View reviewed changes

llvm/test/Transforms/InstCombine/select-reconstruction.ll Outdated Show resolved Hide resolved

address review comments

9542631

- only apply on OR reconstruction - rename values in test

macsencasaus requested a review from dtcxzyw June 21, 2025 16:20

This was referenced Jun 21, 2025

Fuzz PR145102 dtcxzyw/llvm-fuzz-service#86

Closed

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

zyw-bot mentioned this pull request Jun 21, 2025

pre-commit: PR145102 dtcxzyw/llvm-opt-benchmark#2485

Closed

dtcxzyw reviewed Jun 22, 2025

View reviewed changes

dtcxzyw reviewed Jun 23, 2025

View reviewed changes

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Outdated Show resolved Hide resolved

match correct mask only

f5bcc06

dtcxzyw approved these changes Jun 23, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[InstCombine] Fold reconstruction across select #145102

[InstCombine] Fold reconstruction across select #145102

macsencasaus commented Jun 20, 2025

Uh oh!

github-actions bot commented Jun 20, 2025

Uh oh!

llvmbot commented Jun 20, 2025

Uh oh!

macsencasaus commented Jun 20, 2025

Uh oh!

dtcxzyw left a comment

Uh oh!

Uh oh!

dtcxzyw Jun 22, 2025

Uh oh!

macsencasaus Jun 22, 2025

Uh oh!

dtcxzyw Jun 23, 2025

Uh oh!

macsencasaus Jun 23, 2025

Uh oh!

dtcxzyw Jun 23, 2025

Uh oh!

dtcxzyw commented Jun 22, 2025

Uh oh!

Uh oh!

Uh oh!

dtcxzyw left a comment

Uh oh!

Uh oh!

	if (!match(Masked, m_OneUse(m_And(m_Value(X), m_Constant()))))
	if (!match(Masked, m_OneUse(m_And(m_Value(X), m_APInt(*C)))))

	define i40 @select_reconstruction_257_mask(i40 %arg0) {
	; CHECK-LABEL: define i40 @select_reconstruction_257_mask(
	; CHECK-SAME: i40 [[ARG0:%.*]]) {
	; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8
	; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 2
	; CHECK-NEXT: [[TMP3:%.*]] = and i40 [[ARG0]], -257
	; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i40 0, i40 [[TMP3]]
	; CHECK-NEXT: ret i40 [[TMP4]]
	;
	%1 = trunc i40 %arg0 to i8
	%2 = icmp eq i8 %1, 2
	%3 = and i40 %arg0, -257
	%4 = select i1 %2, i8 0, i8 %1
	%5 = select i1 %2, i40 0, i40 %3
	%6 = zext i8 %4 to i40
	%7 = or disjoint i40 %5, %6
	ret i40 %7
	}

	// We are casting a select. Try to fold the cast into the select if the
	// select does not have a compare instruction with matching operand types
	// or the select is likely better done in a narrow type.
	// Creating a select with operands that are different sizes than its
	// condition may inhibit other folds and lead to worse codegen.
	auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
	if (!Cmp \|\| Cmp->getOperand(0)->getType() != Sel->getType() \|\|
	(CI.getOpcode() == Instruction::Trunc &&
	shouldChangeType(CI.getSrcTy(), CI.getType()))) {

[InstCombine] Fold reconstruction across select #145102

Are you sure you want to change the base?

[InstCombine] Fold reconstruction across select #145102

Conversation

macsencasaus commented Jun 20, 2025

Uh oh!

github-actions bot commented Jun 20, 2025

Uh oh!

llvmbot commented Jun 20, 2025

Uh oh!

macsencasaus commented Jun 20, 2025

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

dtcxzyw Jun 22, 2025

Choose a reason for hiding this comment

Uh oh!

macsencasaus Jun 22, 2025

Choose a reason for hiding this comment

Uh oh!

dtcxzyw Jun 23, 2025

Choose a reason for hiding this comment

Uh oh!

macsencasaus Jun 23, 2025

Choose a reason for hiding this comment

Uh oh!

dtcxzyw Jun 23, 2025

Choose a reason for hiding this comment

Uh oh!

dtcxzyw commented Jun 22, 2025

Uh oh!

Uh oh!

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!