@@ -20,28 +20,28 @@ namespace libyuv {
20
20
extern " C" {
21
21
#endif
22
22
23
- #define ILVLH_B (in0, in1, in2, in3, out0, out1, out2, out3 ) \
24
- { \
25
- DUP2_ARG2 (__lsx_vilvl_b, in1, in0, in3, in2, out0, out2); \
26
- DUP2_ARG2 (__lsx_vilvh_b, in1, in0, in3, in2, out1, out3); \
23
+ #define ILVLH_B (in0, in1, in2, in3, out0, out1, out2, out3 ) \
24
+ { \
25
+ DUP2_ARG2 (__lsx_vilvl_b, in1, in0, in3, in2, out0, out2); \
26
+ DUP2_ARG2 (__lsx_vilvh_b, in1, in0, in3, in2, out1, out3); \
27
27
}
28
28
29
- #define ILVLH_H (in0, in1, in2, in3, out0, out1, out2, out3 ) \
30
- { \
31
- DUP2_ARG2 (__lsx_vilvl_h, in1, in0, in3, in2, out0, out2); \
32
- DUP2_ARG2 (__lsx_vilvh_h, in1, in0, in3, in2, out1, out3); \
29
+ #define ILVLH_H (in0, in1, in2, in3, out0, out1, out2, out3 ) \
30
+ { \
31
+ DUP2_ARG2 (__lsx_vilvl_h, in1, in0, in3, in2, out0, out2); \
32
+ DUP2_ARG2 (__lsx_vilvh_h, in1, in0, in3, in2, out1, out3); \
33
33
}
34
34
35
- #define ILVLH_W (in0, in1, in2, in3, out0, out1, out2, out3 ) \
36
- { \
37
- DUP2_ARG2 (__lsx_vilvl_w, in1, in0, in3, in2, out0, out2); \
38
- DUP2_ARG2 (__lsx_vilvh_w, in1, in0, in3, in2, out1, out3); \
35
+ #define ILVLH_W (in0, in1, in2, in3, out0, out1, out2, out3 ) \
36
+ { \
37
+ DUP2_ARG2 (__lsx_vilvl_w, in1, in0, in3, in2, out0, out2); \
38
+ DUP2_ARG2 (__lsx_vilvh_w, in1, in0, in3, in2, out1, out3); \
39
39
}
40
40
41
- #define ILVLH_D (in0, in1, in2, in3, out0, out1, out2, out3 ) \
42
- { \
43
- DUP2_ARG2 (__lsx_vilvl_d, in1, in0, in3, in2, out0, out2); \
44
- DUP2_ARG2 (__lsx_vilvh_d, in1, in0, in3, in2, out1, out3); \
41
+ #define ILVLH_D (in0, in1, in2, in3, out0, out1, out2, out3 ) \
42
+ { \
43
+ DUP2_ARG2 (__lsx_vilvl_d, in1, in0, in3, in2, out0, out2); \
44
+ DUP2_ARG2 (__lsx_vilvh_d, in1, in0, in3, in2, out1, out3); \
45
45
}
46
46
47
47
#define LSX_ST_4 (_dst0, _dst1, _dst2, _dst3, _dst, _stride, _stride2, \
@@ -54,11 +54,11 @@ extern "C" {
54
54
_dst += _stride4; \
55
55
}
56
56
57
- #define LSX_ST_2 (_dst0, _dst1, _dst, _stride, _stride2 ) \
58
- { \
59
- __lsx_vst (_dst0, _dst, 0 ); \
60
- __lsx_vstx (_dst1, _dst, _stride); \
61
- _dst += _stride2; \
57
+ #define LSX_ST_2 (_dst0, _dst1, _dst, _stride, _stride2 ) \
58
+ { \
59
+ __lsx_vst (_dst0, _dst, 0 ); \
60
+ __lsx_vstx (_dst1, _dst, _stride); \
61
+ _dst += _stride2; \
62
62
}
63
63
64
64
void TransposeWx16_C (const uint8_t * src,
@@ -84,15 +84,14 @@ void TransposeUVWx16_C(const uint8_t* src,
84
84
dst_stride_a, (dst_b + 8 ), dst_stride_b, width);
85
85
}
86
86
87
-
88
87
void TransposeWx16_LSX (const uint8_t * src,
89
88
int src_stride,
90
89
uint8_t * dst,
91
90
int dst_stride,
92
91
int width) {
93
92
int x;
94
93
int len = width / 16 ;
95
- uint8_t * s;
94
+ uint8_t * s;
96
95
int src_stride2 = src_stride << 1 ;
97
96
int src_stride3 = src_stride + src_stride2;
98
97
int src_stride4 = src_stride2 << 1 ;
@@ -139,23 +138,23 @@ void TransposeWx16_LSX(const uint8_t* src,
139
138
res8 = __lsx_vilvl_w (reg4, reg0);
140
139
res9 = __lsx_vilvh_w (reg4, reg0);
141
140
ILVLH_D (res0, res8, res1, res9, dst0, dst1, dst2, dst3);
142
- LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
143
- dst_stride3, dst_stride4);
141
+ LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
142
+ dst_stride4);
144
143
res8 = __lsx_vilvl_w (reg5, reg1);
145
144
res9 = __lsx_vilvh_w (reg5, reg1);
146
145
ILVLH_D (res2, res8, res3, res9, dst0, dst1, dst2, dst3);
147
- LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
148
- dst_stride3, dst_stride4);
146
+ LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
147
+ dst_stride4);
149
148
res8 = __lsx_vilvl_w (reg6, reg2);
150
149
res9 = __lsx_vilvh_w (reg6, reg2);
151
150
ILVLH_D (res4, res8, res5, res9, dst0, dst1, dst2, dst3);
152
- LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
153
- dst_stride3, dst_stride4);
151
+ LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
152
+ dst_stride4);
154
153
res8 = __lsx_vilvl_w (reg7, reg3);
155
154
res9 = __lsx_vilvh_w (reg7, reg3);
156
155
ILVLH_D (res6, res8, res7, res9, dst0, dst1, dst2, dst3);
157
- LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
158
- dst_stride3, dst_stride4);
156
+ LSX_ST_4 (dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
157
+ dst_stride4);
159
158
src += 16 ;
160
159
}
161
160
}
0 commit comments