@@ -4273,4 +4273,265 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) {
4273
4273
}
4274
4274
4275
4275
declare void @llvm.assume (i1 )
4276
+ declare i32 @llvm.ctlz.i32 (i32 , i1 )
4277
+
4278
+ ; Ceiling division by power-of-2: (x >> log2(N)) + ((x & (N-1)) != 0) -> (x + (N-1)) >> log2(N)
4279
+ ; This is only valid when x + (N-1) doesn't overflow
4280
+
4281
+ ; Test with known range that prevents overflow
4282
+ define i32 @ceil_div_by_8_known_range (i32 range(i32 0 , 100 ) %x ) {
4283
+ ; CHECK-LABEL: @ceil_div_by_8_known_range(
4284
+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
4285
+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4286
+ ; CHECK-NEXT: ret i32 [[R]]
4287
+ ;
4288
+ %shr = lshr i32 %x , 3
4289
+ %and = and i32 %x , 7
4290
+ %cmp = icmp ne i32 %and , 0
4291
+ %ext = zext i1 %cmp to i32
4292
+ %r = add i32 %shr , %ext
4293
+ ret i32 %r
4294
+ }
4295
+
4296
+ ; Test with the exact IR from the original testcase
4297
+ define i32 @ceil_div_from_clz (i32 %v ) {
4298
+ ; CHECK-LABEL: @ceil_div_from_clz(
4299
+ ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 false)
4300
+ ; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 39, [[CTLZ]]
4301
+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4302
+ ; CHECK-NEXT: ret i32 [[R]]
4303
+ ;
4304
+ %ctlz = tail call range(i32 0 , 33 ) i32 @llvm.ctlz.i32 (i32 %v , i1 false )
4305
+ %sub = sub nuw nsw i32 32 , %ctlz
4306
+ %shr = lshr i32 %sub , 3
4307
+ %and = and i32 %sub , 7
4308
+ %cmp = icmp ne i32 %and , 0
4309
+ %ext = zext i1 %cmp to i32
4310
+ %r = add nuw nsw i32 %shr , %ext
4311
+ ret i32 %r
4312
+ }
4313
+
4314
+ ; Vector version with known range
4315
+ define <2 x i32 > @ceil_div_by_8_vec_range (<2 x i32 > range(i32 0 , 1000 ) %x ) {
4316
+ ; CHECK-LABEL: @ceil_div_by_8_vec_range(
4317
+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <2 x i32> [[X:%.*]], splat (i32 7)
4318
+ ; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
4319
+ ; CHECK-NEXT: ret <2 x i32> [[R]]
4320
+ ;
4321
+ %shr = lshr <2 x i32 > %x , <i32 3 , i32 3 >
4322
+ %and = and <2 x i32 > %x , <i32 7 , i32 7 >
4323
+ %cmp = icmp ne <2 x i32 > %and , <i32 0 , i32 0 >
4324
+ %ext = zext <2 x i1 > %cmp to <2 x i32 >
4325
+ %r = add <2 x i32 > %shr , %ext
4326
+ ret <2 x i32 > %r
4327
+ }
4328
+
4329
+ ; Ceiling division by 16 with known range
4330
+ define i16 @ceil_div_by_16_i16 (i16 range(i16 0 , 1000 ) %x ) {
4331
+ ; CHECK-LABEL: @ceil_div_by_16_i16(
4332
+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X:%.*]], 15
4333
+ ; CHECK-NEXT: [[R:%.*]] = lshr i16 [[TMP1]], 4
4334
+ ; CHECK-NEXT: ret i16 [[R]]
4335
+ ;
4336
+ %shr = lshr i16 %x , 4
4337
+ %and = and i16 %x , 15
4338
+ %cmp = icmp ne i16 %and , 0
4339
+ %ext = zext i1 %cmp to i16
4340
+ %r = add i16 %shr , %ext
4341
+ ret i16 %r
4342
+ }
4343
+
4344
+ ; Negative test: no overflow guarantee - should NOT optimize
4345
+ define i32 @ceil_div_by_8_no_overflow_info (i32 %x ) {
4346
+ ; CHECK-LABEL: @ceil_div_by_8_no_overflow_info(
4347
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4348
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4349
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4350
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4351
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4352
+ ; CHECK-NEXT: ret i32 [[R]]
4353
+ ;
4354
+ %shr = lshr i32 %x , 3
4355
+ %and = and i32 %x , 7
4356
+ %cmp = icmp ne i32 %and , 0
4357
+ %ext = zext i1 %cmp to i32
4358
+ %r = add i32 %shr , %ext
4359
+ ret i32 %r
4360
+ }
4361
+
4362
+ ; Negative test: nuw on final add doesn't help
4363
+ define i32 @ceil_div_by_8_only_nuw_on_add (i32 %x ) {
4364
+ ; CHECK-LABEL: @ceil_div_by_8_only_nuw_on_add(
4365
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4366
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4367
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4368
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4369
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4370
+ ; CHECK-NEXT: ret i32 [[R]]
4371
+ ;
4372
+ %shr = lshr i32 %x , 3
4373
+ %and = and i32 %x , 7
4374
+ %cmp = icmp ne i32 %and , 0
4375
+ %ext = zext i1 %cmp to i32
4376
+ %r = add nuw i32 %shr , %ext ; nuw here doesn't prove x+7 won't overflow
4377
+ ret i32 %r
4378
+ }
4379
+
4380
+ ; Negative test: wrong mask
4381
+ define i32 @ceil_div_wrong_mask (i32 range(i32 0 , 100 ) %x ) {
4382
+ ; CHECK-LABEL: @ceil_div_wrong_mask(
4383
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4384
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 6
4385
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4386
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4387
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4388
+ ; CHECK-NEXT: ret i32 [[R]]
4389
+ ;
4390
+ %shr = lshr i32 %x , 3
4391
+ %and = and i32 %x , 6 ; Wrong mask: should be 7
4392
+ %cmp = icmp ne i32 %and , 0
4393
+ %ext = zext i1 %cmp to i32
4394
+ %r = add i32 %shr , %ext
4395
+ ret i32 %r
4396
+ }
4397
+
4398
+ ; Negative test: wrong shift amount
4399
+ define i32 @ceil_div_wrong_shift (i32 range(i32 0 , 100 ) %x ) {
4400
+ ; CHECK-LABEL: @ceil_div_wrong_shift(
4401
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 4
4402
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4403
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4404
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4405
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4406
+ ; CHECK-NEXT: ret i32 [[R]]
4407
+ ;
4408
+ %shr = lshr i32 %x , 4 ; Shift by 4, but mask is 7 (should be 15)
4409
+ %and = and i32 %x , 7
4410
+ %cmp = icmp ne i32 %and , 0
4411
+ %ext = zext i1 %cmp to i32
4412
+ %r = add i32 %shr , %ext
4413
+ ret i32 %r
4414
+ }
4415
+
4416
+ ; Negative test: wrong comparison
4417
+ define i32 @ceil_div_wrong_cmp (i32 range(i32 0 , 100 ) %x ) {
4418
+ ; CHECK-LABEL: @ceil_div_wrong_cmp(
4419
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4420
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4421
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
4422
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4423
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4424
+ ; CHECK-NEXT: ret i32 [[R]]
4425
+ ;
4426
+ %shr = lshr i32 %x , 3
4427
+ %and = and i32 %x , 7
4428
+ %cmp = icmp eq i32 %and , 0 ; Wrong: should be ne
4429
+ %ext = zext i1 %cmp to i32
4430
+ %r = add i32 %shr , %ext
4431
+ ret i32 %r
4432
+ }
4433
+
4434
+ ; Multi-use test: all intermediate values have uses
4435
+ define i32 @ceil_div_multi_use (i32 range(i32 0 , 100 ) %x ) {
4436
+ ; CHECK-LABEL: @ceil_div_multi_use(
4437
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4438
+ ; CHECK-NEXT: call void @use_i32(i32 [[SHR]])
4439
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4440
+ ; CHECK-NEXT: call void @use_i32(i32 [[AND]])
4441
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4442
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4443
+ ; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
4444
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4445
+ ; CHECK-NEXT: ret i32 [[R]]
4446
+ ;
4447
+ %shr = lshr i32 %x , 3
4448
+ call void @use_i32 (i32 %shr )
4449
+ %and = and i32 %x , 7
4450
+ call void @use_i32 (i32 %and )
4451
+ %cmp = icmp ne i32 %and , 0
4452
+ %ext = zext i1 %cmp to i32
4453
+ call void @use_i32 (i32 %ext )
4454
+ %r = add i32 %shr , %ext
4455
+ ret i32 %r
4456
+ }
4457
+
4458
+ ; Commuted test: add operands are swapped
4459
+ define i32 @ceil_div_commuted (i32 range(i32 0 , 100 ) %x ) {
4460
+ ; CHECK-LABEL: @ceil_div_commuted(
4461
+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
4462
+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4463
+ ; CHECK-NEXT: ret i32 [[R]]
4464
+ ;
4465
+ %shr = lshr i32 %x , 3
4466
+ %and = and i32 %x , 7
4467
+ %cmp = icmp ne i32 %and , 0
4468
+ %ext = zext i1 %cmp to i32
4469
+ %r = add i32 %ext , %shr ; Operands swapped
4470
+ ret i32 %r
4471
+ }
4472
+
4473
+ ; Commuted with multi-use
4474
+ define i32 @ceil_div_commuted_multi_use (i32 range(i32 0 , 100 ) %x ) {
4475
+ ; CHECK-LABEL: @ceil_div_commuted_multi_use(
4476
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4477
+ ; CHECK-NEXT: call void @use_i32(i32 [[SHR]])
4478
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4479
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4480
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4481
+ ; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
4482
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4483
+ ; CHECK-NEXT: ret i32 [[R]]
4484
+ ;
4485
+ %shr = lshr i32 %x , 3
4486
+ call void @use_i32 (i32 %shr )
4487
+ %and = and i32 %x , 7
4488
+ %cmp = icmp ne i32 %and , 0
4489
+ %ext = zext i1 %cmp to i32
4490
+ call void @use_i32 (i32 %ext )
4491
+ %r = add i32 %ext , %shr ; Operands swapped
4492
+ ret i32 %r
4493
+ }
4494
+
4495
+ ; Multi-use test where only zext has multiple uses - should still optimize
4496
+ define i32 @ceil_div_zext_multi_use (i32 range(i32 0 , 100 ) %x ) {
4497
+ ; CHECK-LABEL: @ceil_div_zext_multi_use(
4498
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 7
4499
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4500
+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4501
+ ; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
4502
+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X]], 7
4503
+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4504
+ ; CHECK-NEXT: ret i32 [[R]]
4505
+ ;
4506
+ %shr = lshr i32 %x , 3
4507
+ %and = and i32 %x , 7
4508
+ %cmp = icmp ne i32 %and , 0
4509
+ %ext = zext i1 %cmp to i32
4510
+ call void @use_i32 (i32 %ext )
4511
+ %r = add i32 %shr , %ext
4512
+ ret i32 %r
4513
+ }
4514
+
4515
+ ; Multi-use with vector type
4516
+ define <2 x i32 > @ceil_div_vec_multi_use (<2 x i32 > range(i32 0 , 1000 ) %x ) {
4517
+ ; CHECK-LABEL: @ceil_div_vec_multi_use(
4518
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 3)
4519
+ ; CHECK-NEXT: call void @use_vec(<2 x i32> [[SHR]])
4520
+ ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], splat (i32 7)
4521
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
4522
+ ; CHECK-NEXT: [[EXT:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
4523
+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[SHR]], [[EXT]]
4524
+ ; CHECK-NEXT: ret <2 x i32> [[R]]
4525
+ ;
4526
+ %shr = lshr <2 x i32 > %x , <i32 3 , i32 3 >
4527
+ call void @use_vec (<2 x i32 > %shr )
4528
+ %and = and <2 x i32 > %x , <i32 7 , i32 7 >
4529
+ %cmp = icmp ne <2 x i32 > %and , <i32 0 , i32 0 >
4530
+ %ext = zext <2 x i1 > %cmp to <2 x i32 >
4531
+ %r = add <2 x i32 > %shr , %ext
4532
+ ret <2 x i32 > %r
4533
+ }
4534
+
4535
+ declare void @use_i32 (i32 )
4536
+ declare void @use_vec (<2 x i32 >)
4276
4537
declare void @fake_func (i32 )
0 commit comments