Skip to content

Commit f145aa2

Browse files
committed
Add SME2 detect
Bug: None Change-Id: I36e576de1cf468049faaf3923b6c21fc9ad14271 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6401373 Reviewed-by: George Steed <[email protected]>
1 parent 64ac2d8 commit f145aa2

File tree

4 files changed

+20
-6
lines changed

4 files changed

+20
-6
lines changed

include/libyuv/cpu_id.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ static const int kCpuHasNeonI8MM = 0x400;
2929
static const int kCpuHasSVE = 0x800;
3030
static const int kCpuHasSVE2 = 0x1000;
3131
static const int kCpuHasSME = 0x2000;
32+
static const int kCpuHasSME2 = 0x4000;
3233

3334
// These flags are only valid on RISCV processors.
3435
static const int kCpuHasRISCV = 0x4;

source/cpu_id.cc

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,12 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
182182
#ifdef __linux__
183183
// Define hwcap values ourselves: building with an old auxv header where these
184184
// hwcap values are not defined should not prevent features from being enabled.
185-
#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20)
186-
#define YUV_AARCH64_HWCAP_SVE (1 << 22)
187-
#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1)
188-
#define YUV_AARCH64_HWCAP2_I8MM (1 << 13)
189-
#define YUV_AARCH64_HWCAP2_SME (1 << 23)
185+
#define YUV_AARCH64_HWCAP_ASIMDDP (1UL << 20)
186+
#define YUV_AARCH64_HWCAP_SVE (1UL << 22)
187+
#define YUV_AARCH64_HWCAP2_SVE2 (1UL << 1)
188+
#define YUV_AARCH64_HWCAP2_I8MM (1UL << 13)
189+
#define YUV_AARCH64_HWCAP2_SME (1UL << 23)
190+
#define YUV_AARCH64_HWCAP2_SME2 (1UL << 37)
190191

191192
// For AArch64, but public to allow testing on any CPU.
192193
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap,
@@ -210,6 +211,9 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap,
210211
features |= kCpuHasSVE2;
211212
if (hwcap2 & YUV_AARCH64_HWCAP2_SME) {
212213
features |= kCpuHasSME;
214+
if (hwcap2 & YUV_AARCH64_HWCAP2_SME2) {
215+
features |= kCpuHasSME2;
216+
}
213217
}
214218
}
215219
}
@@ -256,8 +260,11 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() {
256260
features |= kCpuHasNeonDotProd;
257261
if (have_feature("hw.optional.arm.FEAT_I8MM")) {
258262
features |= kCpuHasNeonI8MM;
259-
if (have_feature("hw.optional.arm.FEAT_SME2")) {
263+
if (have_feature("hw.optional.arm.FEAT_SME")) {
260264
features |= kCpuHasSME;
265+
if (have_feature("hw.optional.arm.FEAT_SME2")) {
266+
features |= kCpuHasSME2;
267+
}
261268
}
262269
}
263270
}

unit_test/cpu_test.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,15 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
101101
int has_sve = TestCpuFlag(kCpuHasSVE);
102102
int has_sve2 = TestCpuFlag(kCpuHasSVE2);
103103
int has_sme = TestCpuFlag(kCpuHasSME);
104+
int has_sme2 = TestCpuFlag(kCpuHasSME2);
104105
printf("Has Arm 0x%x\n", has_arm);
105106
printf("Has Neon 0x%x\n", has_neon);
106107
printf("Has Neon DotProd 0x%x\n", has_neon_dotprod);
107108
printf("Has Neon I8MM 0x%x\n", has_neon_i8mm);
108109
printf("Has SVE 0x%x\n", has_sve);
109110
printf("Has SVE2 0x%x\n", has_sve2);
110111
printf("Has SME 0x%x\n", has_sme);
112+
printf("Has SME2 0x%x\n", has_sme2);
111113

112114
#if defined(__aarch64__)
113115
// Read and print the SVE and SME vector lengths.
@@ -381,6 +383,8 @@ TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
381383
// Check for SME feature detection.
382384
expected |= kCpuHasSME;
383385
EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU));
386+
387+
// TODO: Check for SME2 feature detection from Apple M4
384388
}
385389
#endif
386390

util/cpuid.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,15 @@ int main(int argc, const char* argv[]) {
6161
int has_sve = TestCpuFlag(kCpuHasSVE);
6262
int has_sve2 = TestCpuFlag(kCpuHasSVE2);
6363
int has_sme = TestCpuFlag(kCpuHasSME);
64+
int has_sme2 = TestCpuFlag(kCpuHasSME2);
6465
printf("Has Arm 0x%x\n", has_arm);
6566
printf("Has Neon 0x%x\n", has_neon);
6667
printf("Has Neon DotProd 0x%x\n", has_neon_dotprod);
6768
printf("Has Neon I8MM 0x%x\n", has_neon_i8mm);
6869
printf("Has SVE 0x%x\n", has_sve);
6970
printf("Has SVE2 0x%x\n", has_sve2);
7071
printf("Has SME 0x%x\n", has_sme);
72+
printf("Has SME2 0x%x\n", has_sme2);
7173

7274
#if __aarch64__
7375
// Read and print the SVE and SME vector lengths.

0 commit comments

Comments
 (0)