Skip to content

Commit d75d057

Browse files
committed
use fadd v2f32 to keep bitcast pattern in isel
1 parent 4bd3d3f commit d75d057

File tree

1 file changed

+43
-18
lines changed

1 file changed

+43
-18
lines changed

llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,49 @@ declare <2 x i32> @return_i32x2(i32 %0)
2020

2121
; Test with v2i32.
2222
define ptx_kernel void @store_i32x2(i32 %0, ptr %p) {
23-
; CHECK-LABEL: store_i32x2(
24-
; CHECK: {
25-
; CHECK-NEXT: .reg .b32 %r<6>;
26-
; CHECK-NEXT: .reg .b64 %rd<2>;
27-
; CHECK-EMPTY:
28-
; CHECK-NEXT: // %bb.0:
29-
; CHECK-NEXT: ld.param.b64 %rd1, [store_i32x2_param_1];
30-
; CHECK-NEXT: ld.param.b32 %r1, [store_i32x2_param_0];
31-
; CHECK-NEXT: { // callseq 0, 0
32-
; CHECK-NEXT: .param .b32 param0;
33-
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
34-
; CHECK-NEXT: st.param.b32 [param0], %r1;
35-
; CHECK-NEXT: call.uni (retval0), return_i32x2, (param0);
36-
; CHECK-NEXT: ld.param.v2.b32 {%r2, %r3}, [retval0];
37-
; CHECK-NEXT: } // callseq 0
38-
; CHECK-NEXT: st.v2.b32 [%rd1], {%r2, %r3};
39-
; CHECK-NEXT: ret;
23+
; CHECK-SM90A-LABEL: store_i32x2(
24+
; CHECK-SM90A: {
25+
; CHECK-SM90A-NEXT: .reg .b32 %r<6>;
26+
; CHECK-SM90A-NEXT: .reg .b64 %rd<2>;
27+
; CHECK-SM90A-EMPTY:
28+
; CHECK-SM90A-NEXT: // %bb.0:
29+
; CHECK-SM90A-NEXT: ld.param.b64 %rd1, [store_i32x2_param_1];
30+
; CHECK-SM90A-NEXT: ld.param.b32 %r1, [store_i32x2_param_0];
31+
; CHECK-SM90A-NEXT: { // callseq 0, 0
32+
; CHECK-SM90A-NEXT: .param .b32 param0;
33+
; CHECK-SM90A-NEXT: .param .align 8 .b8 retval0[8];
34+
; CHECK-SM90A-NEXT: st.param.b32 [param0], %r1;
35+
; CHECK-SM90A-NEXT: call.uni (retval0), return_i32x2, (param0);
36+
; CHECK-SM90A-NEXT: ld.param.v2.b32 {%r2, %r3}, [retval0];
37+
; CHECK-SM90A-NEXT: } // callseq 0
38+
; CHECK-SM90A-NEXT: add.rn.f32 %r4, %r3, %r3;
39+
; CHECK-SM90A-NEXT: add.rn.f32 %r5, %r2, %r2;
40+
; CHECK-SM90A-NEXT: st.v2.b32 [%rd1], {%r5, %r4};
41+
; CHECK-SM90A-NEXT: ret;
42+
;
43+
; CHECK-SM100-LABEL: store_i32x2(
44+
; CHECK-SM100: {
45+
; CHECK-SM100-NEXT: .reg .b32 %r<4>;
46+
; CHECK-SM100-NEXT: .reg .b64 %rd<4>;
47+
; CHECK-SM100-EMPTY:
48+
; CHECK-SM100-NEXT: // %bb.0:
49+
; CHECK-SM100-NEXT: ld.param.b64 %rd1, [store_i32x2_param_1];
50+
; CHECK-SM100-NEXT: ld.param.b32 %r1, [store_i32x2_param_0];
51+
; CHECK-SM100-NEXT: { // callseq 0, 0
52+
; CHECK-SM100-NEXT: .param .b32 param0;
53+
; CHECK-SM100-NEXT: .param .align 8 .b8 retval0[8];
54+
; CHECK-SM100-NEXT: st.param.b32 [param0], %r1;
55+
; CHECK-SM100-NEXT: call.uni (retval0), return_i32x2, (param0);
56+
; CHECK-SM100-NEXT: ld.param.v2.b32 {%r2, %r3}, [retval0];
57+
; CHECK-SM100-NEXT: } // callseq 0
58+
; CHECK-SM100-NEXT: mov.b64 %rd2, {%r2, %r3};
59+
; CHECK-SM100-NEXT: add.rn.f32x2 %rd3, %rd2, %rd2;
60+
; CHECK-SM100-NEXT: st.b64 [%rd1], %rd3;
61+
; CHECK-SM100-NEXT: ret;
4062
%v = call <2 x i32> @return_i32x2(i32 %0)
4163
%v.f32x2 = bitcast <2 x i32> %v to <2 x float>
42-
store <2 x float> %v.f32x2, ptr %p, align 8
64+
%res = fadd <2 x float> %v.f32x2, %v.f32x2
65+
store <2 x float> %res, ptr %p, align 8
4366
ret void
4467
}
4568

@@ -93,3 +116,5 @@ define ptx_kernel void @inlineasm(ptr %p) {
93116
store <2 x float> %mul, ptr %p, align 8
94117
ret void
95118
}
119+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
120+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)