@@ -20,26 +20,49 @@ declare <2 x i32> @return_i32x2(i32 %0)
20
20
21
21
; Test with v2i32.
22
22
define ptx_kernel void @store_i32x2 (i32 %0 , ptr %p ) {
23
- ; CHECK-LABEL: store_i32x2(
24
- ; CHECK: {
25
- ; CHECK-NEXT: .reg .b32 %r<6>;
26
- ; CHECK-NEXT: .reg .b64 %rd<2>;
27
- ; CHECK-EMPTY:
28
- ; CHECK-NEXT: // %bb.0:
29
- ; CHECK-NEXT: ld.param.b64 %rd1, [store_i32x2_param_1];
30
- ; CHECK-NEXT: ld.param.b32 %r1, [store_i32x2_param_0];
31
- ; CHECK-NEXT: { // callseq 0, 0
32
- ; CHECK-NEXT: .param .b32 param0;
33
- ; CHECK-NEXT: .param .align 8 .b8 retval0[8];
34
- ; CHECK-NEXT: st.param.b32 [param0], %r1;
35
- ; CHECK-NEXT: call.uni (retval0), return_i32x2, (param0);
36
- ; CHECK-NEXT: ld.param.v2.b32 {%r2, %r3}, [retval0];
37
- ; CHECK-NEXT: } // callseq 0
38
- ; CHECK-NEXT: st.v2.b32 [%rd1], {%r2, %r3};
39
- ; CHECK-NEXT: ret;
23
+ ; CHECK-SM90A-LABEL: store_i32x2(
24
+ ; CHECK-SM90A: {
25
+ ; CHECK-SM90A-NEXT: .reg .b32 %r<6>;
26
+ ; CHECK-SM90A-NEXT: .reg .b64 %rd<2>;
27
+ ; CHECK-SM90A-EMPTY:
28
+ ; CHECK-SM90A-NEXT: // %bb.0:
29
+ ; CHECK-SM90A-NEXT: ld.param.b64 %rd1, [store_i32x2_param_1];
30
+ ; CHECK-SM90A-NEXT: ld.param.b32 %r1, [store_i32x2_param_0];
31
+ ; CHECK-SM90A-NEXT: { // callseq 0, 0
32
+ ; CHECK-SM90A-NEXT: .param .b32 param0;
33
+ ; CHECK-SM90A-NEXT: .param .align 8 .b8 retval0[8];
34
+ ; CHECK-SM90A-NEXT: st.param.b32 [param0], %r1;
35
+ ; CHECK-SM90A-NEXT: call.uni (retval0), return_i32x2, (param0);
36
+ ; CHECK-SM90A-NEXT: ld.param.v2.b32 {%r2, %r3}, [retval0];
37
+ ; CHECK-SM90A-NEXT: } // callseq 0
38
+ ; CHECK-SM90A-NEXT: add.rn.f32 %r4, %r3, %r3;
39
+ ; CHECK-SM90A-NEXT: add.rn.f32 %r5, %r2, %r2;
40
+ ; CHECK-SM90A-NEXT: st.v2.b32 [%rd1], {%r5, %r4};
41
+ ; CHECK-SM90A-NEXT: ret;
42
+ ;
43
+ ; CHECK-SM100-LABEL: store_i32x2(
44
+ ; CHECK-SM100: {
45
+ ; CHECK-SM100-NEXT: .reg .b32 %r<4>;
46
+ ; CHECK-SM100-NEXT: .reg .b64 %rd<4>;
47
+ ; CHECK-SM100-EMPTY:
48
+ ; CHECK-SM100-NEXT: // %bb.0:
49
+ ; CHECK-SM100-NEXT: ld.param.b64 %rd1, [store_i32x2_param_1];
50
+ ; CHECK-SM100-NEXT: ld.param.b32 %r1, [store_i32x2_param_0];
51
+ ; CHECK-SM100-NEXT: { // callseq 0, 0
52
+ ; CHECK-SM100-NEXT: .param .b32 param0;
53
+ ; CHECK-SM100-NEXT: .param .align 8 .b8 retval0[8];
54
+ ; CHECK-SM100-NEXT: st.param.b32 [param0], %r1;
55
+ ; CHECK-SM100-NEXT: call.uni (retval0), return_i32x2, (param0);
56
+ ; CHECK-SM100-NEXT: ld.param.v2.b32 {%r2, %r3}, [retval0];
57
+ ; CHECK-SM100-NEXT: } // callseq 0
58
+ ; CHECK-SM100-NEXT: mov.b64 %rd2, {%r2, %r3};
59
+ ; CHECK-SM100-NEXT: add.rn.f32x2 %rd3, %rd2, %rd2;
60
+ ; CHECK-SM100-NEXT: st.b64 [%rd1], %rd3;
61
+ ; CHECK-SM100-NEXT: ret;
40
62
%v = call <2 x i32 > @return_i32x2 (i32 %0 )
41
63
%v.f32x2 = bitcast <2 x i32 > %v to <2 x float >
42
- store <2 x float > %v.f32x2 , ptr %p , align 8
64
+ %res = fadd <2 x float > %v.f32x2 , %v.f32x2
65
+ store <2 x float > %res , ptr %p , align 8
43
66
ret void
44
67
}
45
68
@@ -93,3 +116,5 @@ define ptx_kernel void @inlineasm(ptr %p) {
93
116
store <2 x float > %mul , ptr %p , align 8
94
117
ret void
95
118
}
119
+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
120
+ ; CHECK: {{.*}}
0 commit comments