Skip to content

Commit 804980b

Browse files
fbarchardlibyuv LUCI CQ
authored and
libyuv LUCI CQ
committed
DetilePlane and unittest for NEON
Bug: libyuv:915, b/215425056 Change-Id: Iccab1ed3f6d385f02895d44faa94d198ad79d693 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3424820 Reviewed-by: Justin Green <[email protected]> Reviewed-by: Frank Barchard <[email protected]> Commit-Queue: Frank Barchard <[email protected]>
1 parent 2c6bfc0 commit 804980b

20 files changed

+1708
-1591
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pin-log.txt
1212
/native_client
1313
/net
1414
/out
15+
/unit_test/out
1516
/source/out
1617
/sde-avx-sse-transition-out.txt
1718
/testing

README.chromium

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Name: libyuv
22
URL: http://code.google.com/p/libyuv/
3-
Version: 1809
3+
Version: 1810
44
License: BSD
55
License File: LICENSE
66

include/libyuv/loongson_intrinsics.h

+812-828
Large diffs are not rendered by default.

include/libyuv/planar_functions.h

+10
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,16 @@ void SetPlane(uint8_t* dst_y,
8383
int height,
8484
uint32_t value);
8585

86+
// Convert a plane of tiles of 16 x H to linear.
87+
LIBYUV_API
88+
void DetilePlane(const uint8_t* src_y,
89+
int src_stride_y,
90+
uint8_t* dst_y,
91+
int dst_stride_y,
92+
int width,
93+
int height,
94+
int tile_height);
95+
8696
// Split interleaved UV plane into separate U and V planes.
8797
LIBYUV_API
8898
void SplitUVPlane(const uint8_t* src_uv,

include/libyuv/row.h

+14-9
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,8 @@ extern "C" {
400400

401401
// The following are available for AVX512 clang x64 platforms:
402402
// TODO(fbarchard): Port to x86
403-
#if !defined(LIBYUV_DISABLE_X86) && \
404-
defined(__x86_64__) && (defined(CLANG_HAS_AVX512))
403+
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) && \
404+
(defined(CLANG_HAS_AVX512))
405405
#define HAS_I422TOARGBROW_AVX512BW
406406
#endif
407407

@@ -536,7 +536,7 @@ extern "C" {
536536
#define HAS_SCALESUMSAMPLES_NEON
537537
#define HAS_GAUSSROW_F32_NEON
538538
#define HAS_GAUSSCOL_F32_NEON
539-
539+
#define HAS_DETILEROW_NEON
540540
#endif
541541
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
542542
#define HAS_ABGRTOUVROW_MSA
@@ -1768,7 +1768,9 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
17681768
uint8_t* dst_ptr,
17691769
int width);
17701770
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
1771-
void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
1771+
void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr,
1772+
uint8_t* dst_ptr,
1773+
int width);
17721774

17731775
void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
17741776
uint8_t* dst_rgb24,
@@ -1828,7 +1830,15 @@ void SplitUVRow_Any_LSX(const uint8_t* src_ptr,
18281830
uint8_t* dst_u,
18291831
uint8_t* dst_v,
18301832
int width);
1833+
void DetileRow_C(const uint8_t* src,
1834+
ptrdiff_t src_tile_stride,
1835+
uint8_t* dst,
1836+
int width);
18311837

1838+
void DetileRow_NEON(const uint8_t* src,
1839+
ptrdiff_t src_tile_stride,
1840+
uint8_t* dst,
1841+
int width);
18321842
void MergeUVRow_C(const uint8_t* src_u,
18331843
const uint8_t* src_v,
18341844
uint8_t* dst_uv,
@@ -2802,7 +2812,6 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
28022812
uint8_t* dst_rgb,
28032813
int width);
28042814

2805-
28062815
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
28072816
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
28082817
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -4097,7 +4106,6 @@ void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr,
40974106
uint8_t* dst_ptr,
40984107
int width);
40994108

4100-
41014109
void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
41024110
const uint8_t* u_buf,
41034111
const uint8_t* v_buf,
@@ -4878,7 +4886,6 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb,
48784886
int interval_offset,
48794887
int width);
48804888

4881-
48824889
void ARGBShadeRow_C(const uint8_t* src_argb,
48834890
uint8_t* dst_argb,
48844891
int width,
@@ -4912,7 +4919,6 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
49124919
const int32_t* previous_cumsum,
49134920
int width);
49144921

4915-
49164922
void CumulativeSumToAverageRow_C(const int32_t* tl,
49174923
const int32_t* bl,
49184924
int w,
@@ -5259,7 +5265,6 @@ float ScaleSumSamples_NEON(const float* src,
52595265
void ScaleSamples_C(const float* src, float* dst, float scale, int width);
52605266
void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
52615267

5262-
52635268
void GaussRow_F32_NEON(const float* src, float* dst, int width);
52645269
void GaussRow_F32_C(const float* src, float* dst, int width);
52655270

include/libyuv/scale_row.h

-1
Original file line numberDiff line numberDiff line change
@@ -1564,7 +1564,6 @@ void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr,
15641564
uint8_t* dst_ptr,
15651565
int dst_width);
15661566

1567-
15681567
void ScaleRowDown2_LSX(const uint8_t* src_ptr,
15691568
ptrdiff_t src_stride,
15701569
uint8_t* dst,

include/libyuv/version.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@
1111
#ifndef INCLUDE_LIBYUV_VERSION_H_
1212
#define INCLUDE_LIBYUV_VERSION_H_
1313

14-
#define LIBYUV_VERSION 1809
14+
#define LIBYUV_VERSION 1810
1515

1616
#endif // INCLUDE_LIBYUV_VERSION_H_

source/convert.cc

+1-2
Original file line numberDiff line numberDiff line change
@@ -2448,8 +2448,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
24482448
}
24492449
}
24502450
// MSA version does direct RGB565 to YUV.
2451-
#elif (defined(HAS_RGB565TOYROW_MSA) \
2452-
|| defined(HAS_RGB565TOYROW_LSX))
2451+
#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX))
24532452
#if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA)
24542453
if (TestCpuFlag(kCpuHasMSA)) {
24552454
RGB565ToUVRow = RGB565ToUVRow_Any_MSA;

source/convert_argb.cc

+6-3
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ int I420ToARGBMatrix(const uint8_t* src_y,
9090
}
9191
#endif
9292
#if defined(HAS_I422TOARGBROW_AVX512BW)
93-
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) == (kCpuHasAVX512BW | kCpuHasAVX512VL)) {
93+
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) ==
94+
(kCpuHasAVX512BW | kCpuHasAVX512VL)) {
9495
I422ToARGBRow = I422ToARGBRow_Any_AVX512BW;
9596
if (IS_ALIGNED(width, 32)) {
9697
I422ToARGBRow = I422ToARGBRow_AVX512BW;
@@ -329,7 +330,8 @@ int I422ToARGBMatrix(const uint8_t* src_y,
329330
}
330331
#endif
331332
#if defined(HAS_I422TOARGBROW_AVX512BW)
332-
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) == (kCpuHasAVX512BW | kCpuHasAVX512VL)) {
333+
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) ==
334+
(kCpuHasAVX512BW | kCpuHasAVX512VL)) {
333335
I422ToARGBRow = I422ToARGBRow_Any_AVX512BW;
334336
if (IS_ALIGNED(width, 32)) {
335337
I422ToARGBRow = I422ToARGBRow_AVX512BW;
@@ -5094,7 +5096,8 @@ int I420ToRGB565Dither(const uint8_t* src_y,
50945096
}
50955097
#endif
50965098
#if defined(HAS_I422TOARGBROW_AVX512BW)
5097-
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) == (kCpuHasAVX512BW | kCpuHasAVX512VL)) {
5099+
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) ==
5100+
(kCpuHasAVX512BW | kCpuHasAVX512VL)) {
50985101
I422ToARGBRow = I422ToARGBRow_Any_AVX512BW;
50995102
if (IS_ALIGNED(width, 32)) {
51005103
I422ToARGBRow = I422ToARGBRow_AVX512BW;

source/cpu_id.cc

+5-9
Original file line numberDiff line numberDiff line change
@@ -193,25 +193,21 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
193193

194194
// TODO(fbarchard): Consider read_loongarch_ir().
195195
#define LOONGARCH_CFG2 0x2
196-
#define LOONGARCH_CFG2_LSX (1 << 6)
197-
#define LOONGARCH_CFG2_LASX (1 << 7)
196+
#define LOONGARCH_CFG2_LSX (1 << 6)
197+
#define LOONGARCH_CFG2_LASX (1 << 7)
198198

199199
#if defined(__loongarch__) && defined(__linux__)
200200
LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) {
201201
int flag = 0x0;
202202
uint32_t cfg2 = 0;
203203

204-
__asm__ volatile(
205-
"cpucfg %0, %1 \n\t"
206-
: "+&r"(cfg2)
207-
: "r"(LOONGARCH_CFG2)
208-
);
204+
__asm__ volatile("cpucfg %0, %1 \n\t" : "+&r"(cfg2) : "r"(LOONGARCH_CFG2));
209205

210206
if (cfg2 & LOONGARCH_CFG2_LSX)
211-
flag |= kCpuHasLSX;
207+
flag |= kCpuHasLSX;
212208

213209
if (cfg2 & LOONGARCH_CFG2_LASX)
214-
flag |= kCpuHasLASX;
210+
flag |= kCpuHasLASX;
215211
return flag;
216212
}
217213
#endif

source/planar_functions.cc

+47
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,53 @@ int NV21ToNV12(const uint8_t* src_y,
853853
return 0;
854854
}
855855

856+
// Detile a plane of data
857+
// tile width is 16 and assumed.
858+
// tile_height is 16 or 32 for MM21.
859+
// src_stride_y is bytes per row of source ignoring tiling. e.g. 640
860+
// TODO: More detile row functions.
861+
862+
LIBYUV_API
863+
void DetilePlane(const uint8_t* src_y,
864+
int src_stride_y,
865+
uint8_t* dst_y,
866+
int dst_stride_y,
867+
int width,
868+
int height,
869+
int tile_height) {
870+
const ptrdiff_t src_tile_stride = 16 * tile_height;
871+
int y;
872+
void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
873+
int width) = DetileRow_C;
874+
assert(src_stride_y >= 0);
875+
assert(tile_height > 0);
876+
assert(src_stride_y > 0);
877+
878+
// Negative height means invert the image.
879+
if (height < 0) {
880+
height = -height;
881+
dst_y = dst_y + (height - 1) * dst_stride_y;
882+
dst_stride_y = -dst_stride_y;
883+
}
884+
885+
#if defined(HAS_DETILEROW_NEON)
886+
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
887+
DetileRow = DetileRow_NEON;
888+
}
889+
#endif
890+
891+
// Detile plane
892+
for (y = 0; y < height; ++y) {
893+
DetileRow(src_y, src_tile_stride, dst_y, width);
894+
dst_y += dst_stride_y;
895+
src_y += 16;
896+
// Advance to next row of tiles.
897+
if ((y & (tile_height - 1)) == (tile_height - 1)) {
898+
src_y = src_y - src_tile_stride + src_stride_y * tile_height;
899+
}
900+
}
901+
}
902+
856903
// Support function for NV12 etc RGB channels.
857904
// Width and height are plane sizes (typically half pixel width).
858905
LIBYUV_API

source/rotate_lsx.cc

+30-31
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,28 @@ namespace libyuv {
2020
extern "C" {
2121
#endif
2222

23-
#define ILVLH_B(in0, in1, in2, in3, out0, out1, out2, out3) \
24-
{ \
25-
DUP2_ARG2(__lsx_vilvl_b, in1, in0, in3, in2, out0, out2); \
26-
DUP2_ARG2(__lsx_vilvh_b, in1, in0, in3, in2, out1, out3); \
23+
#define ILVLH_B(in0, in1, in2, in3, out0, out1, out2, out3) \
24+
{ \
25+
DUP2_ARG2(__lsx_vilvl_b, in1, in0, in3, in2, out0, out2); \
26+
DUP2_ARG2(__lsx_vilvh_b, in1, in0, in3, in2, out1, out3); \
2727
}
2828

29-
#define ILVLH_H(in0, in1, in2, in3, out0, out1, out2, out3) \
30-
{ \
31-
DUP2_ARG2(__lsx_vilvl_h, in1, in0, in3, in2, out0, out2); \
32-
DUP2_ARG2(__lsx_vilvh_h, in1, in0, in3, in2, out1, out3); \
29+
#define ILVLH_H(in0, in1, in2, in3, out0, out1, out2, out3) \
30+
{ \
31+
DUP2_ARG2(__lsx_vilvl_h, in1, in0, in3, in2, out0, out2); \
32+
DUP2_ARG2(__lsx_vilvh_h, in1, in0, in3, in2, out1, out3); \
3333
}
3434

35-
#define ILVLH_W(in0, in1, in2, in3, out0, out1, out2, out3) \
36-
{ \
37-
DUP2_ARG2(__lsx_vilvl_w, in1, in0, in3, in2, out0, out2); \
38-
DUP2_ARG2(__lsx_vilvh_w, in1, in0, in3, in2, out1, out3); \
35+
#define ILVLH_W(in0, in1, in2, in3, out0, out1, out2, out3) \
36+
{ \
37+
DUP2_ARG2(__lsx_vilvl_w, in1, in0, in3, in2, out0, out2); \
38+
DUP2_ARG2(__lsx_vilvh_w, in1, in0, in3, in2, out1, out3); \
3939
}
4040

41-
#define ILVLH_D(in0, in1, in2, in3, out0, out1, out2, out3) \
42-
{ \
43-
DUP2_ARG2(__lsx_vilvl_d, in1, in0, in3, in2, out0, out2); \
44-
DUP2_ARG2(__lsx_vilvh_d, in1, in0, in3, in2, out1, out3); \
41+
#define ILVLH_D(in0, in1, in2, in3, out0, out1, out2, out3) \
42+
{ \
43+
DUP2_ARG2(__lsx_vilvl_d, in1, in0, in3, in2, out0, out2); \
44+
DUP2_ARG2(__lsx_vilvh_d, in1, in0, in3, in2, out1, out3); \
4545
}
4646

4747
#define LSX_ST_4(_dst0, _dst1, _dst2, _dst3, _dst, _stride, _stride2, \
@@ -54,11 +54,11 @@ extern "C" {
5454
_dst += _stride4; \
5555
}
5656

57-
#define LSX_ST_2(_dst0, _dst1, _dst, _stride, _stride2) \
58-
{ \
59-
__lsx_vst(_dst0, _dst, 0); \
60-
__lsx_vstx(_dst1, _dst, _stride); \
61-
_dst += _stride2; \
57+
#define LSX_ST_2(_dst0, _dst1, _dst, _stride, _stride2) \
58+
{ \
59+
__lsx_vst(_dst0, _dst, 0); \
60+
__lsx_vstx(_dst1, _dst, _stride); \
61+
_dst += _stride2; \
6262
}
6363

6464
void TransposeWx16_C(const uint8_t* src,
@@ -84,15 +84,14 @@ void TransposeUVWx16_C(const uint8_t* src,
8484
dst_stride_a, (dst_b + 8), dst_stride_b, width);
8585
}
8686

87-
8887
void TransposeWx16_LSX(const uint8_t* src,
8988
int src_stride,
9089
uint8_t* dst,
9190
int dst_stride,
9291
int width) {
9392
int x;
9493
int len = width / 16;
95-
uint8_t *s;
94+
uint8_t* s;
9695
int src_stride2 = src_stride << 1;
9796
int src_stride3 = src_stride + src_stride2;
9897
int src_stride4 = src_stride2 << 1;
@@ -139,23 +138,23 @@ void TransposeWx16_LSX(const uint8_t* src,
139138
res8 = __lsx_vilvl_w(reg4, reg0);
140139
res9 = __lsx_vilvh_w(reg4, reg0);
141140
ILVLH_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
142-
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
143-
dst_stride3, dst_stride4);
141+
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
142+
dst_stride4);
144143
res8 = __lsx_vilvl_w(reg5, reg1);
145144
res9 = __lsx_vilvh_w(reg5, reg1);
146145
ILVLH_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
147-
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
148-
dst_stride3, dst_stride4);
146+
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
147+
dst_stride4);
149148
res8 = __lsx_vilvl_w(reg6, reg2);
150149
res9 = __lsx_vilvh_w(reg6, reg2);
151150
ILVLH_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
152-
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
153-
dst_stride3, dst_stride4);
151+
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
152+
dst_stride4);
154153
res8 = __lsx_vilvl_w(reg7, reg3);
155154
res9 = __lsx_vilvh_w(reg7, reg3);
156155
ILVLH_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
157-
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2,
158-
dst_stride3, dst_stride4);
156+
LSX_ST_4(dst0, dst1, dst2, dst3, dst, dst_stride, dst_stride2, dst_stride3,
157+
dst_stride4);
159158
src += 16;
160159
}
161160
}

source/row_common.cc

+15
Original file line numberDiff line numberDiff line change
@@ -2659,6 +2659,21 @@ void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
26592659
}
26602660
}
26612661

2662+
void DetileRow_C(const uint8_t* src,
2663+
ptrdiff_t src_tile_stride,
2664+
uint8_t* dst,
2665+
int width) {
2666+
int x;
2667+
for (x = 0; x < width - 15; x += 16) {
2668+
memcpy(dst, src, 16);
2669+
dst += 16;
2670+
src += src_tile_stride;
2671+
}
2672+
if (width & 15) {
2673+
memcpy(dst, src, width & 15);
2674+
}
2675+
}
2676+
26622677
void SplitUVRow_C(const uint8_t* src_uv,
26632678
uint8_t* dst_u,
26642679
uint8_t* dst_v,

0 commit comments

Comments
 (0)