From 24e7b25dc349f280ad80c4de576a876f288e0291 Mon Sep 17 00:00:00 2001 From: Dylan Fleming Date: Mon, 8 Dec 2025 11:56:56 +0000 Subject: [PATCH 1/6] Initial commit for mathvec directories Created a mathvec/ file structure that mirrors the structure of the math/ directory, and added a dummy expf implementation. --- libc/CMakeLists.txt | 2 ++ libc/config/linux/aarch64/entrypoints.txt | 7 ++++ libc/config/linux/x86_64/entrypoints.txt | 7 ++++ libc/lib/CMakeLists.txt | 6 ++-- libc/src/CMakeLists.txt | 5 +++ libc/src/__support/CMakeLists.txt | 3 ++ libc/src/__support/mathvec/CMakeLists.txt | 24 +++++++++++++ libc/src/__support/mathvec/expf.h | 33 +++++++++++++++++ libc/src/mathvec/CMakeLists.txt | 43 +++++++++++++++++++++++ libc/src/mathvec/expf.h | 21 +++++++++++ libc/src/mathvec/generic/CMakeLists.txt | 12 +++++++ libc/src/mathvec/generic/expf.cpp | 18 ++++++++++ 12 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 libc/src/__support/mathvec/CMakeLists.txt create mode 100644 libc/src/__support/mathvec/expf.h create mode 100644 libc/src/mathvec/CMakeLists.txt create mode 100644 libc/src/mathvec/expf.h create mode 100644 libc/src/mathvec/generic/CMakeLists.txt create mode 100644 libc/src/mathvec/generic/expf.cpp diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 4e6b4195a9c5e..2d474f08841ca 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -350,6 +350,7 @@ include(LLVMLibCRules) set(TARGET_LLVMLIBC_ENTRYPOINTS "") set(TARGET_LIBC_ENTRYPOINTS "") set(TARGET_LIBM_ENTRYPOINTS "") +set(TARGET_LIBMVEC_ENTRYPOINTS "") set(TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS "") # Check entrypoints.txt @@ -380,6 +381,7 @@ foreach(removed_entrypoint IN LISTS TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS) list(REMOVE_ITEM TARGET_LLVMLIBC_ENTRYPOINTS ${removed_entrypoint}) list(REMOVE_ITEM TARGET_LIBC_ENTRYPOINTS ${removed_entrypoint}) list(REMOVE_ITEM TARGET_LIBM_ENTRYPOINTS ${removed_entrypoint}) + list(REMOVE_ITEM TARGET_LIBMVEC_ENTRYPOINTS ${removed_entrypoint}) endforeach() set(TARGET_ENTRYPOINT_NAME_LIST "") diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 970c825bbfc96..d0db8b0acff2b 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -1178,7 +1178,14 @@ if(LLVM_LIBC_FULL_BUILD) ) endif() +if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE) + set(TARGET_LIBMVEC_ENTRYPOINTS + libc.src.mathvec.expf + ) +endif() + set(TARGET_LLVMLIBC_ENTRYPOINTS ${TARGET_LIBC_ENTRYPOINTS} ${TARGET_LIBM_ENTRYPOINTS} + ${TARGET_LIBMVEC_ENTRYPOINTS} ) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 9399b284fa2da..7c5e649968223 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1396,7 +1396,14 @@ if(LLVM_LIBC_FULL_BUILD) ) endif() +if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE) + set(TARGET_LIBMVEC_ENTRYPOINTS + libc.src.mathvec.expf + ) +endif() + set(TARGET_LLVMLIBC_ENTRYPOINTS ${TARGET_LIBC_ENTRYPOINTS} ${TARGET_LIBM_ENTRYPOINTS} + ${TARGET_LIBMVEC_ENTRYPOINTS} ) diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt index ce0b07fb6cb49..19751a9cc0736 100644 --- a/libc/lib/CMakeLists.txt +++ b/libc/lib/CMakeLists.txt @@ -2,10 +2,10 @@ set(libc_archive_targets "") set(libc_archive_names "") set(libc_archive_entrypoint_lists "") if(LLVM_LIBC_FULL_BUILD) - list(APPEND libc_archive_names c m) - list(APPEND libc_archive_targets libc libm) + list(APPEND libc_archive_names c m mvec) + list(APPEND libc_archive_targets libc libm libmvec) list(APPEND libc_archive_entrypoint_lists - TARGET_LIBC_ENTRYPOINTS TARGET_LIBM_ENTRYPOINTS) + TARGET_LIBC_ENTRYPOINTS TARGET_LIBM_ENTRYPOINTS TARGET_LIBMVEC_ENTRYPOINTS) else() list(APPEND libc_archive_names llvmlibc) list(APPEND libc_archive_targets libc) diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index b2afe0a33acee..8f005a700a80f 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -29,6 +29,11 @@ if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(termios) endif() +if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE) + message(STATUS "Vector math enabled") + add_subdirectory(mathvec) +endif() + if(NOT LLVM_LIBC_FULL_BUILD) return() endif() diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index c7f127d6934a0..1ebd7dd2621a2 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -416,3 +416,6 @@ if(NOT (LIBC_TARGET_OS_IS_DARWIN)) endif() add_subdirectory(math) +if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE) + add_subdirectory(mathvec) +endif() diff --git a/libc/src/__support/mathvec/CMakeLists.txt b/libc/src/__support/mathvec/CMakeLists.txt new file mode 100644 index 0000000000000..ad66dd1aadbaa --- /dev/null +++ b/libc/src/__support/mathvec/CMakeLists.txt @@ -0,0 +1,24 @@ +add_header_library( + common_constants + HDRS + common_constants.h + DEPENDS +) + +add_header_library( + expf_utils + HDRS + expf_utils.h + DEPENDS + libc.src.__support.CPP.simd + libc.src.__support.mathvec.common_constants +) + +add_header_library( + expf + HDRS + expf.h + DEPENDS + libc.src.__support.CPP.simd + libc.src.__support.math.expf +) \ No newline at end of file diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h new file mode 100644 index 0000000000000..1328df27aced6 --- /dev/null +++ b/libc/src/__support/mathvec/expf.h @@ -0,0 +1,33 @@ +//===-- Implementation header for SIMD expf ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H +#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H + +#include "src/__support/CPP/simd.h" +#include "src/__support/math/expf.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace mathvec { + +template +LIBC_INLINE static constexpr cpp::simd expf(cpp::simd x) { + cpp::simd ret = 0.0f; + + for (size_t i = 0; i < N; i++) + ret[i] = math::expf(x[i]); + + return ret; +} + +} // namespace mathvec + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H diff --git a/libc/src/mathvec/CMakeLists.txt b/libc/src/mathvec/CMakeLists.txt new file mode 100644 index 0000000000000..dff1dd3b70b51 --- /dev/null +++ b/libc/src/mathvec/CMakeLists.txt @@ -0,0 +1,43 @@ +add_subdirectory(generic) +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) + add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) +endif() + +function(add_vector_math_entrypoint_object name) + # We prefer machine specific implementation if available. Hence we check + # that first and return early if we are able to add an alias target for the + # machine specific implementation. + get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.${name}" fq_machine_specific_target_name) + if(TARGET ${fq_machine_specific_target_name}) + add_entrypoint_object( + ${name} + ALIAS + DEPENDS + .${LIBC_TARGET_ARCHITECTURE}.${name} + ) + return() + endif() + + get_fq_target_name("generic.${name}" fq_generic_target_name) + if(TARGET ${fq_generic_target_name}) + add_entrypoint_object( + ${name} + ALIAS + DEPENDS + .generic.${name} + ) + return() + endif() + + # Add a dummy entrypoint object for missing implementations. They will be skipped + # anyway as there will be no entry for them in the target entrypoints list. + add_entrypoint_object( + ${name} + SRCS + dummy_srcs + HDRS + dummy_hdrs + ) +endfunction() + +add_vector_math_entrypoint_object(expf) diff --git a/libc/src/mathvec/expf.h b/libc/src/mathvec/expf.h new file mode 100644 index 0000000000000..257fb485838af --- /dev/null +++ b/libc/src/mathvec/expf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for SIMD expf ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATHVEC_EXPF_H +#define LLVM_LIBC_SRC_MATHVEC_EXPF_H + +#include "src/__support/CPP/simd.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +cpp::simd expf(cpp::simd x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATHVEC_EXPF_H diff --git a/libc/src/mathvec/generic/CMakeLists.txt b/libc/src/mathvec/generic/CMakeLists.txt new file mode 100644 index 0000000000000..769794d6d5eee --- /dev/null +++ b/libc/src/mathvec/generic/CMakeLists.txt @@ -0,0 +1,12 @@ +add_entrypoint_object( + expf + SRCS + expf.cpp + HDRS + ../expf.h + DEPENDS + libc.src.__support.CPP.simd + FLAGS + ROUND_OPT + FMA_OPT +) diff --git a/libc/src/mathvec/generic/expf.cpp b/libc/src/mathvec/generic/expf.cpp new file mode 100644 index 0000000000000..2e113d17e8680 --- /dev/null +++ b/libc/src/mathvec/generic/expf.cpp @@ -0,0 +1,18 @@ +//===-- Single-precision SIMD e^x vector function -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/mathvec/expf.h" +#include "src/__support/mathvec/expf.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(cpp::simd, expf, (cpp::simd x)) { + return mathvec::expf(x); +} + +} // namespace LIBC_NAMESPACE_DECL From 609371f7f3684c75c28ca16d18933319060440df Mon Sep 17 00:00:00 2001 From: Pierre Blanchard Date: Fri, 5 Sep 2025 07:19:49 +0000 Subject: [PATCH 2/6] Add simple vector math tester Simplistic checks based on a wrapper duplicating a scalar input. Introduce SIMD matcher (ultimately should be moved to test utils/) - Comparison with exception is supported. - Test all rounding modes in one line. - Control lanes A potential improvement could be made later: load vector in test over range instead of duplicating input. That would reduce run time of vector testing. --- libc/test/UnitTest/SIMDMatcher.h | 61 ++++++++++++ libc/test/src/CMakeLists.txt | 4 + libc/test/src/mathvec/AddTest.h | 87 +++++++++++++++++ libc/test/src/mathvec/CMakeLists.txt | 14 +++ libc/test/src/mathvec/expf_test.cpp | 139 +++++++++++++++++++++++++++ 5 files changed, 305 insertions(+) create mode 100644 libc/test/UnitTest/SIMDMatcher.h create mode 100644 libc/test/src/mathvec/AddTest.h create mode 100644 libc/test/src/mathvec/CMakeLists.txt create mode 100644 libc/test/src/mathvec/expf_test.cpp diff --git a/libc/test/UnitTest/SIMDMatcher.h b/libc/test/UnitTest/SIMDMatcher.h new file mode 100644 index 0000000000000..542d311ddbd89 --- /dev/null +++ b/libc/test/UnitTest/SIMDMatcher.h @@ -0,0 +1,61 @@ +//===-- SIMDMatchers.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_UNITTEST_SIMDMATCHER_H +#define LLVM_LIBC_TEST_UNITTEST_SIMDMATCHER_H + +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/architectures.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "hdr/math_macros.h" + +#define EXPECT_SIMD_EQ(REF, RES) \ + for (size_t i = 0; \ + i < LIBC_NAMESPACE::cpp::internal::native_vector_size; i++) { \ + EXPECT_FP_EQ(REF[i], RES[i]); \ + } + +#define EXPECT_SIMD_EQ_WITH_EXCEPTION(REF, RES, EXCEPTION) \ + for (size_t i = 0; \ + i < LIBC_NAMESPACE::cpp::internal::native_vector_size; i++) { \ + EXPECT_FP_EQ_WITH_EXCEPTION(REF[i], RES[i], EXCEPTION); \ + } + +#define EXPECT_SIMD_EQ_ROUNDING_MODE(expected, actual, rounding_mode) \ + do { \ + using namespace LIBC_NAMESPACE::fputil::testing; \ + ForceRoundingMode __r((rounding_mode)); \ + if (__r.success) { \ + EXPECT_SIMD_EQ((expected), (actual)) \ + } \ + } while (0) + +#define EXPECT_SIMD_EQ_ROUNDING_NEAREST(expected, actual) \ + EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Nearest) + +#define EXPECT_SIMD_EQ_ROUNDING_UPWARD(expected, actual) \ + EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Upward) + +#define EXPECT_SIMD_EQ_ROUNDING_DOWNWARD(expected, actual) \ + EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Downward) + +#define EXPECT_SIMD_EQ_ROUNDING_TOWARD_ZERO(expected, actual) \ + EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::TowardZero) + +#define EXPECT_SIMD_EQ_ALL_ROUNDING(expected, actual) \ + do { \ + EXPECT_SIMD_EQ_ROUNDING_NEAREST((expected), (actual)); \ + EXPECT_SIMD_EQ_ROUNDING_UPWARD((expected), (actual)); \ + EXPECT_SIMD_EQ_ROUNDING_DOWNWARD((expected), (actual)); \ + EXPECT_SIMD_EQ_ROUNDING_TOWARD_ZERO((expected), (actual)); \ + } while (0) + +#endif // LLVM_LIBC_TEST_UNITTEST_SIMDMATCHER_H diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index 0c6ec9f07a9b7..5e61c739c066a 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -87,6 +87,10 @@ if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(termios) endif() +if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE) + add_subdirectory(mathvec) +endif() + if(NOT LLVM_LIBC_FULL_BUILD) return() endif() diff --git a/libc/test/src/mathvec/AddTest.h b/libc/test/src/mathvec/AddTest.h new file mode 100644 index 0000000000000..2f7d0ec6904e3 --- /dev/null +++ b/libc/test/src/mathvec/AddTest.h @@ -0,0 +1,87 @@ +//===-- Utility class to test different flavors of float add ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_SRC_MATHVEC_ADDTEST_H +#define LLVM_LIBC_TEST_SRC_MATHVEC_ADDTEST_H + +#include "src/__support/CPP/algorithm.h" +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +template +class AddTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + struct InConstants { + DECLARE_SPECIAL_CONSTANTS(InType) + }; + + using InFPBits = typename InConstants::FPBits; + using InStorageType = typename InConstants::StorageType; + + static constexpr InStorageType IN_MAX_NORMAL_U = + InFPBits::max_normal().uintval(); + static constexpr InStorageType IN_MIN_NORMAL_U = + InFPBits::min_normal().uintval(); + static constexpr InStorageType IN_MAX_SUBNORMAL_U = + InFPBits::max_subnormal().uintval(); + static constexpr InStorageType IN_MIN_SUBNORMAL_U = + InFPBits::min_subnormal().uintval(); + +public: + using AddFunc = OutType (*)(InType, InType); + + void test_subnormal_range(AddFunc func) { + constexpr int COUNT = 100'001; + constexpr InStorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast((IN_MAX_SUBNORMAL_U - IN_MIN_SUBNORMAL_U) / + COUNT), + InStorageType(1)); + for (InStorageType i = IN_MIN_SUBNORMAL_U; i <= IN_MAX_SUBNORMAL_U; + i += STEP) { + InType x = InFPBits(i).get_val(); + InType y = InFPBits(static_cast(IN_MAX_SUBNORMAL_U - i)) + .get_val(); + mpfr::BinaryInput input{x, y}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Add, input, func(x, y), + 0.5); + } + } + + void test_normal_range(AddFunc func) { + constexpr int COUNT = 100'001; + constexpr InStorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast((IN_MAX_NORMAL_U - IN_MIN_NORMAL_U) / COUNT), + InStorageType(1)); + for (InStorageType i = IN_MIN_NORMAL_U; i <= IN_MAX_NORMAL_U; i += STEP) { + InType x = InFPBits(i).get_val(); + InType y = + InFPBits(static_cast(IN_MAX_NORMAL_U - i)).get_val(); + mpfr::BinaryInput input{x, y}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Add, input, func(x, y), + 0.5); + } + } +}; + +#define LIST_ADD_TESTS(OutType, InType, func) \ + using LlvmLibcAddTest = AddTest; \ + TEST_F(LlvmLibcAddTest, SubnormalRange) { test_subnormal_range(&func); } \ + TEST_F(LlvmLibcAddTest, NormalRange) { test_normal_range(&func); } + +#define LIST_ADD_SAME_TYPE_TESTS(suffix, OutType, InType, func) \ + using LlvmLibcAddTest##suffix = AddTest; \ + TEST_F(LlvmLibcAddTest##suffix, SubnormalRange) { \ + test_subnormal_range(&func); \ + } \ + TEST_F(LlvmLibcAddTest##suffix, NormalRange) { test_normal_range(&func); } + +#endif // LLVM_LIBC_TEST_SRC_MATHVEC_ADDTEST_H diff --git a/libc/test/src/mathvec/CMakeLists.txt b/libc/test/src/mathvec/CMakeLists.txt new file mode 100644 index 0000000000000..78f4b16fc4af5 --- /dev/null +++ b/libc/test/src/mathvec/CMakeLists.txt @@ -0,0 +1,14 @@ +add_custom_target(libc-mathvec-unittests) + +add_fp_unittest( + expf_test + SUITE + libc-mathvec-unittests + SRCS + expf_test.cpp + DEPENDS + libc.src.math.expf + libc.src.mathvec.expf + libc.src.__support.FPUtil.fp_bits + libc.src.__support.CPP.simd +) diff --git a/libc/test/src/mathvec/expf_test.cpp b/libc/test/src/mathvec/expf_test.cpp new file mode 100644 index 0000000000000..5bb8486adbcdf --- /dev/null +++ b/libc/test/src/mathvec/expf_test.cpp @@ -0,0 +1,139 @@ +//===-- Unittests for expf ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/math_macros.h" +#include "src/__support/CPP/simd.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/math/expf.h" +#include "src/mathvec/expf.h" +#include "test/UnitTest/SIMDMatcher.h" +#include "test/UnitTest/Test.h" + +#include "hdr/stdint_proxy.h" + +using LlvmLibcVecExpfTest = LIBC_NAMESPACE::testing::FPTest; + +// Wrappers + +// In order to test vector we can either duplicate a scalar input +// or do something more elaborate. In any case that requires a wrapper +// since the function call is written in this file. + +// Run reference on a vector with lanes duplicated from a scalar input. + +// with control lane +static LIBC_NAMESPACE::cpp::simd wrap_ref_vexpf(float x, float control) { + LIBC_NAMESPACE::cpp::simd v(x); + v[0] = control; + constexpr size_t N = LIBC_NAMESPACE::cpp::internal::native_vector_size; + for (size_t i = 0; i < N; i++) { + v[i] = LIBC_NAMESPACE::expf(v[i]); + } + return v; +} + +// without control lane +static LIBC_NAMESPACE::cpp::simd wrap_ref_vexpf(float x) { + return wrap_ref_vexpf(x, x); +} + +// Run implementation on a vector with lanes duplicated from a scalar input. + +// with control lane +static LIBC_NAMESPACE::cpp::simd wrap_vexpf(float x, float control) { + LIBC_NAMESPACE::cpp::simd v(x); + v[0] = control; + return LIBC_NAMESPACE::expf(v); +} + +// without control lane +static LIBC_NAMESPACE::cpp::simd wrap_vexpf(float x) { + return wrap_vexpf(x, x); +} + +TEST_F(LlvmLibcVecExpfTest, SpecialNumbers) { + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(aNaN), wrap_vexpf(aNaN)); + + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf), wrap_vexpf(inf)); + + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(0.0f), wrap_vexpf(neg_inf)); + + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(1.0f), wrap_vexpf(0.0f)); + + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(1.0f), wrap_vexpf(-0.0f)); +} + +TEST_F(LlvmLibcVecExpfTest, Overflow) { + // Exception still supported with current impl and test, but might not endup + // being tested. + EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf), + wrap_vexpf(FPBits(0x7f7fffffU).get_val()), + FE_OVERFLOW); + + EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf), + wrap_vexpf(FPBits(0x42cffff8U).get_val()), + FE_OVERFLOW); + + EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf), + wrap_vexpf(FPBits(0x42d00008U).get_val()), + FE_OVERFLOW); +} + +TEST_F(LlvmLibcVecExpfTest, Underflow) { + // Exception still supported with current impl and test, but eventually won't + // be tested. + EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(0.0f), + wrap_vexpf(FPBits(0xff7fffffU).get_val()), + FE_UNDERFLOW); + + float x = FPBits(0xc2cffff8U).get_val(); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + + x = FPBits(0xc2d00008U).get_val(); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); +} + +// Test with inputs which are the borders of underflow/overflow but still +// produce valid results without setting errno. +// Is this still relevant to vector function? +TEST_F(LlvmLibcVecExpfTest, Borderline) { + float x; + + x = FPBits(0x42affff8U).get_val(); + // Do we need ASSERT? If so it needs a version for all rounding modes + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + + x = FPBits(0x42b00008U).get_val(); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + + x = FPBits(0xc2affff8U).get_val(); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + + x = FPBits(0xc2b00008U).get_val(); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + + x = FPBits(0xc236bd8cU).get_val(); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); +} + +TEST_F(LlvmLibcVecExpfTest, InFloatRange) { + constexpr uint32_t COUNT = 100'000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + float x = FPBits(v).get_val(); + if (FPBits(v).is_nan() || FPBits(v).is_inf()) + continue; + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x), wrap_vexpf(x)); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, aNaN), wrap_vexpf(x, aNaN)); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, inf), wrap_vexpf(x, inf)); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -inf), + wrap_vexpf(x, neg_inf)); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 0.0), wrap_vexpf(x, 0.0)); + EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -0.0), wrap_vexpf(x, -0.0)); + } +} From c8f7e7e54fb9eaeef6833333101d97d1a7466d87 Mon Sep 17 00:00:00 2001 From: Dylan Fleming Date: Thu, 11 Dec 2025 19:28:55 +0000 Subject: [PATCH 3/6] Add CR (RN only) vector expf Implements a fully vectorised expf routine, that is correctly rounded for Round-to-nearest. As an example for what a correctly rounded vector routine looks like using LLVM LIBC SIMD types. --- libc/src/__support/mathvec/CMakeLists.txt | 15 ++++- libc/src/__support/mathvec/common_constants.h | 40 ++++++++++++++ libc/src/__support/mathvec/expf.h | 55 +++++++++++++++++-- libc/src/__support/mathvec/expf_utils.h | 29 ++++++++++ libc/src/__support/mathvec/vector_utils.h | 46 ++++++++++++++++ libc/test/src/mathvec/expf_test.cpp | 52 ++++++++---------- 6 files changed, 200 insertions(+), 37 deletions(-) create mode 100644 libc/src/__support/mathvec/common_constants.h create mode 100644 libc/src/__support/mathvec/expf_utils.h create mode 100644 libc/src/__support/mathvec/vector_utils.h diff --git a/libc/src/__support/mathvec/CMakeLists.txt b/libc/src/__support/mathvec/CMakeLists.txt index ad66dd1aadbaa..4b6e31ad29929 100644 --- a/libc/src/__support/mathvec/CMakeLists.txt +++ b/libc/src/__support/mathvec/CMakeLists.txt @@ -19,6 +19,17 @@ add_header_library( HDRS expf.h DEPENDS + libc.src.__support.common libc.src.__support.CPP.simd - libc.src.__support.math.expf -) \ No newline at end of file + libc.src.__support.FPUtil.FPBits + libc.src.__support.mathvec.expf_utils + libc.src.__support.mathvec.vector_utils +) + +add_header_library( + vector_utils + HDRS + vector_utils.h + DEPENDS + libc.src.__support.CPP.simd +) diff --git a/libc/src/__support/mathvec/common_constants.h b/libc/src/__support/mathvec/common_constants.h new file mode 100644 index 0000000000000..c235d6842e5b0 --- /dev/null +++ b/libc/src/__support/mathvec/common_constants.h @@ -0,0 +1,40 @@ +//===-- Common constants for mathvec functions ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_COMMON_CONSTANTS_H +#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_COMMON_CONSTANTS_H + +namespace LIBC_NAMESPACE_DECL { + +namespace common_constants_internal { + +// Lookup table for mantissas of 2^(i / 64) with i = 0, ..., 63. +static constexpr uint64_t EXP_MANTISSA[64] = { + 0x0000000000000, 0x02c9a3e778061, 0x059b0d3158574, 0x0874518759bc8, + 0x0b5586cf9890f, 0x0e3ec32d3d1a2, 0x11301d0125b51, 0x1429aaea92de0, + 0x172b83c7d517b, 0x1a35beb6fcb75, 0x1d4873168b9aa, 0x2063b88628cd6, + 0x2387a6e756238, 0x26b4565e27cdd, 0x29e9df51fdee1, 0x2d285a6e4030b, + 0x306fe0a31b715, 0x33c08b26416ff, 0x371a7373aa9cb, 0x3a7db34e59ff7, + 0x3dea64c123422, 0x4160a21f72e2a, 0x44e086061892d, 0x486a2b5c13cd0, + 0x4bfdad5362a27, 0x4f9b2769d2ca7, 0x5342b569d4f82, 0x56f4736b527da, + 0x5ab07dd485429, 0x5e76f15ad2148, 0x6247eb03a5585, 0x6623882552225, + 0x6a09e667f3bcd, 0x6dfb23c651a2f, 0x71f75e8ec5f74, 0x75feb564267c9, + 0x7a11473eb0187, 0x7e2f336cf4e62, 0x82589994cce13, 0x868d99b4492ed, + 0x8ace5422aa0db, 0x8f1ae99157736, 0x93737b0cdc5e5, 0x97d829fde4e50, + 0x9c49182a3f090, 0xa0c667b5de565, 0xa5503b23e255d, 0xa9e6b5579fdbf, + 0xae89f995ad3ad, 0xb33a2b84f15fb, 0xb7f76f2fb5e47, 0xbcc1e904bc1d2, + 0xc199bdd85529c, 0xc67f12e57d14b, 0xcb720dcef9069, 0xd072d4a07897c, + 0xd5818dcfba487, 0xda9e603db3285, 0xdfc97337b9b5f, 0xe502ee78b3ff6, + 0xea4afa2a490da, 0xefa1bee615a27, 0xf50765b6e4540, 0xfa7c1819e90d8, +}; + +} // namespace common_constants_internal + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_COMMON_CONSTANTS_H diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h index 1328df27aced6..6b5abb4a0c32f 100644 --- a/libc/src/__support/mathvec/expf.h +++ b/libc/src/__support/mathvec/expf.h @@ -10,20 +10,63 @@ #define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H #include "src/__support/CPP/simd.h" -#include "src/__support/math/expf.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/common.h" +#include "src/__support/mathvec/expf_utils.h" +#include "src/__support/mathvec/vector_utils.h" namespace LIBC_NAMESPACE_DECL { namespace mathvec { template -LIBC_INLINE static constexpr cpp::simd expf(cpp::simd x) { - cpp::simd ret = 0.0f; +LIBC_INLINE cpp::simd inline_exp(cpp::simd x) { + static constexpr cpp::simd shift = 0x1.800000000ffc0p+46; - for (size_t i = 0; i < N; i++) - ret[i] = math::expf(x[i]); + auto z = shift + x * 0x1.71547652b82fep+0; + auto n = z - shift; - return ret; + auto r = x; + r = r - n * 0x1.62e42fefa3800p-1; + r = r - n * 0x1.ef35793c76730p-45; + + /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */ + static constexpr cpp::simd c0 = 0x1.fffffffffdbcdp-2; + static constexpr cpp::simd c1 = 0x1.555555555444cp-3; + static constexpr cpp::simd c2 = 0x1.555573c6a9f7dp-5; + static constexpr cpp::simd c3 = 0x1.1111266d28935p-7; + + auto r2 = r * r; + auto p01 = c0 + r * c1; + auto p23 = c2 + r * c3; + auto p04 = p01 + r2 * p23; + auto y = r + p04 * r2; + + auto u = reinterpret_cast>(z); + auto s = exp_lookup(u); + return s + s * y; +} + +template +LIBC_INLINE cpp::simd expf(cpp::simd x) { + using FPBits = typename fputil::FPBits; + cpp::simd ret; + + auto is_inf = cpp::simd_cast(x >= 0x1.62e38p+9); + auto is_zero = cpp::simd_cast(x <= -0x1.628c2ap+9); + auto is_special = is_inf | is_zero; + + auto special_res = + cpp::select(is_inf, cpp::simd(FPBits::inf().get_val()), + cpp::simd(0.0f)); + + auto [lo, hi] = vector_float_to_double(x); + + auto lo_res = inline_exp(lo); + auto hi_res = inline_exp(hi); + + ret = vector_double_to_float(lo_res, hi_res); + return cpp::select(is_special, special_res, ret); } } // namespace mathvec diff --git a/libc/src/__support/mathvec/expf_utils.h b/libc/src/__support/mathvec/expf_utils.h new file mode 100644 index 0000000000000..2650bcba9bca7 --- /dev/null +++ b/libc/src/__support/mathvec/expf_utils.h @@ -0,0 +1,29 @@ +//===-- Common utils for exp function ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXP_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXP_UTILS_H + +#include "src/__support/CPP/simd.h" +#include "src/__support/mathvec/common_constants.h" + +namespace LIBC_NAMESPACE_DECL { + +template +LIBC_INLINE cpp::simd exp_lookup(cpp::simd u) { + auto index = u & cpp::simd(0x3f); + auto mantissa = cpp::gather>( + true, index, common_constants_internal::EXP_MANTISSA); + auto exponent = (u >> 6) << 52; + auto result = mantissa | exponent; + return reinterpret_cast>(result); +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXP_UTILS_H diff --git a/libc/src/__support/mathvec/vector_utils.h b/libc/src/__support/mathvec/vector_utils.h new file mode 100644 index 0000000000000..d2138f148a0ee --- /dev/null +++ b/libc/src/__support/mathvec/vector_utils.h @@ -0,0 +1,46 @@ +//===-- Common utils for SIMD functions -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H + +#include "src/__support/CPP/simd.h" +#include + +namespace LIBC_NAMESPACE_DECL { + +// Casts a simd into two simd +template +LIBC_INLINE constexpr auto vector_float_to_double(cpp::simd v) { + static_assert(N % 2 == 0, "vector size must be even"); + constexpr size_t H = N / 2; + + auto parts = cpp::split(v); + auto lo_f = cpp::get<0>(parts); + auto hi_f = cpp::get<1>(parts); + + auto lo_d = cpp::simd_cast(lo_f); + auto hi_d = cpp::simd_cast(hi_f); + + return cpp::make_tuple(lo_d, hi_d); +} + +// Casts two simd into a simd +template +LIBC_INLINE constexpr auto vector_double_to_float(cpp::simd lo_d, + cpp::simd hi_d) { + + auto lo_f = cpp::simd_cast(lo_d); + auto hi_f = cpp::simd_cast(hi_d); + + return cpp::concat(lo_f, hi_f); +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H diff --git a/libc/test/src/mathvec/expf_test.cpp b/libc/test/src/mathvec/expf_test.cpp index 5bb8486adbcdf..e599a9e7ed9c4 100644 --- a/libc/test/src/mathvec/expf_test.cpp +++ b/libc/test/src/mathvec/expf_test.cpp @@ -69,33 +69,28 @@ TEST_F(LlvmLibcVecExpfTest, SpecialNumbers) { } TEST_F(LlvmLibcVecExpfTest, Overflow) { - // Exception still supported with current impl and test, but might not endup - // being tested. - EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf), - wrap_vexpf(FPBits(0x7f7fffffU).get_val()), - FE_OVERFLOW); - - EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf), - wrap_vexpf(FPBits(0x42cffff8U).get_val()), - FE_OVERFLOW); - - EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf), - wrap_vexpf(FPBits(0x42d00008U).get_val()), - FE_OVERFLOW); + // Fails if tested with exceptions + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf), + wrap_vexpf(FPBits(0x7f7fffffU).get_val())); + + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf), + wrap_vexpf(FPBits(0x42cffff8U).get_val())); + + EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf), + wrap_vexpf(FPBits(0x42d00008U).get_val())); } TEST_F(LlvmLibcVecExpfTest, Underflow) { - // Exception still supported with current impl and test, but eventually won't - // be tested. + // Passes if tested with exceptions ? EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(0.0f), wrap_vexpf(FPBits(0xff7fffffU).get_val()), FE_UNDERFLOW); float x = FPBits(0xc2cffff8U).get_val(); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); x = FPBits(0xc2d00008U).get_val(); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); } // Test with inputs which are the borders of underflow/overflow but still @@ -106,19 +101,19 @@ TEST_F(LlvmLibcVecExpfTest, Borderline) { x = FPBits(0x42affff8U).get_val(); // Do we need ASSERT? If so it needs a version for all rounding modes - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); x = FPBits(0x42b00008U).get_val(); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); x = FPBits(0xc2affff8U).get_val(); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); x = FPBits(0xc2b00008U).get_val(); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); x = FPBits(0xc236bd8cU).get_val(); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0)); } TEST_F(LlvmLibcVecExpfTest, InFloatRange) { @@ -128,12 +123,11 @@ TEST_F(LlvmLibcVecExpfTest, InFloatRange) { float x = FPBits(v).get_val(); if (FPBits(v).is_nan() || FPBits(v).is_inf()) continue; - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x), wrap_vexpf(x)); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, aNaN), wrap_vexpf(x, aNaN)); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, inf), wrap_vexpf(x, inf)); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -inf), - wrap_vexpf(x, neg_inf)); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 0.0), wrap_vexpf(x, 0.0)); - EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -0.0), wrap_vexpf(x, -0.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x), wrap_vexpf(x)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, aNaN), wrap_vexpf(x, aNaN)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, inf), wrap_vexpf(x, inf)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, -inf), wrap_vexpf(x, neg_inf)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 0.0), wrap_vexpf(x, 0.0)); + EXPECT_SIMD_EQ(wrap_ref_vexpf(x, -0.0), wrap_vexpf(x, -0.0)); } } From ddd9fbf938b96800236f641dd24bc9c1516a49a3 Mon Sep 17 00:00:00 2001 From: Dylan Fleming Date: Wed, 7 Jan 2026 22:18:42 +0000 Subject: [PATCH 4/6] Removed vector_utils, Improved Gathers, and fixed bitcast. Removes vector_utils.h, and instead relies on simd_cast/compiler to correctly handle oversized vectors. Added check for gather instruction support, and implemented an explicit loop over scalar lookup fallback. Changed incorrect reinterpret_cast to bit_cast. Removed leftover debug message from a CMakeList file. --- libc/src/CMakeLists.txt | 1 - libc/src/__support/CPP/simd.h | 10 ++++ .../macros/properties/cpu_features.h | 4 ++ libc/src/__support/mathvec/CMakeLists.txt | 8 ---- libc/src/__support/mathvec/expf.h | 21 +++------ libc/src/__support/mathvec/expf_utils.h | 2 +- libc/src/__support/mathvec/vector_utils.h | 46 ------------------- 7 files changed, 22 insertions(+), 70 deletions(-) delete mode 100644 libc/src/__support/mathvec/vector_utils.h diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index 8f005a700a80f..8a0acccaed708 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -30,7 +30,6 @@ if(${LIBC_TARGET_OS} STREQUAL "linux") endif() if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE) - message(STATUS "Vector math enabled") add_subdirectory(mathvec) endif() diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 422d2f4c8433d..fd34a13c323b3 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -325,8 +325,18 @@ LIBC_INLINE constexpr static T gather(simd> mask, Idx idx, const void *base, bool aligned = false) { if (aligned) base = __builtin_assume_aligned(base, alignof(T)); +#if defined(LIBC_TARGET_CPU_HAS_GATHER) return __builtin_masked_gather( mask, idx, reinterpret_cast *>(base)); +#else + T result; + for (size_t i = 0; i < simd_size_v; ++i) { + if (mask[i]) + result[i] = + *(reinterpret_cast *>(base) + idx[i]); + } + return result; +#endif } template = 0> LIBC_INLINE constexpr static void scatter(simd> mask, diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index 1fe20d9b23a34..0ad0ab5d4e523 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -56,6 +56,10 @@ #define LIBC_TARGET_CPU_HAS_AVX512BW #endif +#if defined(__AVX512F__) || defined(__AVX2__) +#define LIBC_TARGET_CPU_HAS_GATHER +#endif + #if defined(__ARM_FP) #if (__ARM_FP & 0x2) #define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF diff --git a/libc/src/__support/mathvec/CMakeLists.txt b/libc/src/__support/mathvec/CMakeLists.txt index 4b6e31ad29929..865e53f2658a9 100644 --- a/libc/src/__support/mathvec/CMakeLists.txt +++ b/libc/src/__support/mathvec/CMakeLists.txt @@ -25,11 +25,3 @@ add_header_library( libc.src.__support.mathvec.expf_utils libc.src.__support.mathvec.vector_utils ) - -add_header_library( - vector_utils - HDRS - vector_utils.h - DEPENDS - libc.src.__support.CPP.simd -) diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h index 6b5abb4a0c32f..3b92cba609c9b 100644 --- a/libc/src/__support/mathvec/expf.h +++ b/libc/src/__support/mathvec/expf.h @@ -13,7 +13,6 @@ #include "src/__support/FPUtil/FPBits.h" #include "src/__support/common.h" #include "src/__support/mathvec/expf_utils.h" -#include "src/__support/mathvec/vector_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -50,22 +49,16 @@ LIBC_INLINE cpp::simd inline_exp(cpp::simd x) { template LIBC_INLINE cpp::simd expf(cpp::simd x) { using FPBits = typename fputil::FPBits; - cpp::simd ret; - auto is_inf = cpp::simd_cast(x >= 0x1.62e38p+9); - auto is_zero = cpp::simd_cast(x <= -0x1.628c2ap+9); - auto is_special = is_inf | is_zero; + cpp::simd is_inf = x >= 0x1.62e38p+9; + cpp::simd is_zero = x <= -0x1.628c2ap+9; + cpp::simd is_special = is_inf | is_zero; - auto special_res = - cpp::select(is_inf, cpp::simd(FPBits::inf().get_val()), - cpp::simd(0.0f)); + cpp::simd special_res = is_inf ? FPBits::inf().get_val() : 0.0f; - auto [lo, hi] = vector_float_to_double(x); - - auto lo_res = inline_exp(lo); - auto hi_res = inline_exp(hi); - - ret = vector_double_to_float(lo_res, hi_res); + cpp::simd x_d = cpp::simd_cast(x); + cpp::simd y = inline_exp(x_d); + cpp::simd ret = cpp::simd_cast(y); return cpp::select(is_special, special_res, ret); } diff --git a/libc/src/__support/mathvec/expf_utils.h b/libc/src/__support/mathvec/expf_utils.h index 2650bcba9bca7..8c1fbb239dbd0 100644 --- a/libc/src/__support/mathvec/expf_utils.h +++ b/libc/src/__support/mathvec/expf_utils.h @@ -21,7 +21,7 @@ LIBC_INLINE cpp::simd exp_lookup(cpp::simd u) { true, index, common_constants_internal::EXP_MANTISSA); auto exponent = (u >> 6) << 52; auto result = mantissa | exponent; - return reinterpret_cast>(result); + return cpp::bit_cast>(result); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/mathvec/vector_utils.h b/libc/src/__support/mathvec/vector_utils.h deleted file mode 100644 index d2138f148a0ee..0000000000000 --- a/libc/src/__support/mathvec/vector_utils.h +++ /dev/null @@ -1,46 +0,0 @@ -//===-- Common utils for SIMD functions -------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H -#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H - -#include "src/__support/CPP/simd.h" -#include - -namespace LIBC_NAMESPACE_DECL { - -// Casts a simd into two simd -template -LIBC_INLINE constexpr auto vector_float_to_double(cpp::simd v) { - static_assert(N % 2 == 0, "vector size must be even"); - constexpr size_t H = N / 2; - - auto parts = cpp::split(v); - auto lo_f = cpp::get<0>(parts); - auto hi_f = cpp::get<1>(parts); - - auto lo_d = cpp::simd_cast(lo_f); - auto hi_d = cpp::simd_cast(hi_f); - - return cpp::make_tuple(lo_d, hi_d); -} - -// Casts two simd into a simd -template -LIBC_INLINE constexpr auto vector_double_to_float(cpp::simd lo_d, - cpp::simd hi_d) { - - auto lo_f = cpp::simd_cast(lo_d); - auto hi_f = cpp::simd_cast(hi_d); - - return cpp::concat(lo_f, hi_f); -} - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H From 33d73471e27b77a21735bf9f656f18ce5e987bd0 Mon Sep 17 00:00:00 2001 From: Dylan Fleming Date: Thu, 8 Jan 2026 21:26:44 +0000 Subject: [PATCH 5/6] Updated coefficients and addressed comments Updated coefficients used in expf, and added sollya comments to reproduce them. Made inline_exp and exp_lookup static to fix ABI warning. Reduced number of auto types used. --- libc/src/__support/CPP/simd.h | 10 ----- libc/src/__support/mathvec/expf.h | 57 ++++++++++++++++--------- libc/src/__support/mathvec/expf_utils.h | 10 ++--- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index fd34a13c323b3..422d2f4c8433d 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -325,18 +325,8 @@ LIBC_INLINE constexpr static T gather(simd> mask, Idx idx, const void *base, bool aligned = false) { if (aligned) base = __builtin_assume_aligned(base, alignof(T)); -#if defined(LIBC_TARGET_CPU_HAS_GATHER) return __builtin_masked_gather( mask, idx, reinterpret_cast *>(base)); -#else - T result; - for (size_t i = 0; i < simd_size_v; ++i) { - if (mask[i]) - result[i] = - *(reinterpret_cast *>(base) + idx[i]); - } - return result; -#endif } template = 0> LIBC_INLINE constexpr static void scatter(simd> mask, diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h index 3b92cba609c9b..0df7cbb7d0c2f 100644 --- a/libc/src/__support/mathvec/expf.h +++ b/libc/src/__support/mathvec/expf.h @@ -19,30 +19,45 @@ namespace LIBC_NAMESPACE_DECL { namespace mathvec { template -LIBC_INLINE cpp::simd inline_exp(cpp::simd x) { +LIBC_INLINE static cpp::simd inline_exp(cpp::simd x) { static constexpr cpp::simd shift = 0x1.800000000ffc0p+46; - auto z = shift + x * 0x1.71547652b82fep+0; - auto n = z - shift; - - auto r = x; - r = r - n * 0x1.62e42fefa3800p-1; - r = r - n * 0x1.ef35793c76730p-45; + // inv_ln2 = round(1/log(2), D, RN); + static constexpr cpp::simd inv_ln2 = 0x1.71547652b82fep+0; + cpp::simd z = shift + x * inv_ln2; + cpp::simd n = z - shift; + + // ln2_hi = round(log(2), D, RN); + // ln2_lo = round(log(2) - ln2_hi, D, RN); + static constexpr cpp::simd ln2_hi = 0x1.62e42fefa39efp-1; + static constexpr cpp::simd ln2_lo = 0x1.abc9e3b39803fp-56; + + cpp::simd r = x; + r = r - n * ln2_hi; + r = r - n * ln2_lo; + + // Coefficients of exp approximation, generated by Sollya with: + // poly = 1 + x; + // for i from 2 to 5 do { + // r = remez(exp(x)-poly(x), 5-i, [-log(2)/128;log(2)/128], x^i, 1e-10); + // c = coeff(roundcoefficients(r, [|D ...|]), 0); + // poly = poly + x^i*c; + // c; + // }; + static constexpr cpp::simd c0 = 0x1.fffffffffdbcep-2; + static constexpr cpp::simd c1 = 0x1.55555555543c2p-3; + static constexpr cpp::simd c2 = 0x1.555573c64f2e3p-5; + static constexpr cpp::simd c3 = 0x1.111126b4eff73p-7; /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */ - static constexpr cpp::simd c0 = 0x1.fffffffffdbcdp-2; - static constexpr cpp::simd c1 = 0x1.555555555444cp-3; - static constexpr cpp::simd c2 = 0x1.555573c6a9f7dp-5; - static constexpr cpp::simd c3 = 0x1.1111266d28935p-7; - - auto r2 = r * r; - auto p01 = c0 + r * c1; - auto p23 = c2 + r * c3; - auto p04 = p01 + r2 * p23; - auto y = r + p04 * r2; - - auto u = reinterpret_cast>(z); - auto s = exp_lookup(u); + cpp::simd r2 = r * r; + cpp::simd p01 = c0 + r * c1; + cpp::simd p23 = c2 + r * c3; + cpp::simd p04 = p01 + r2 * p23; + cpp::simd y = r + p04 * r2; + + cpp::simd u = cpp::bit_cast>(z); + cpp::simd s = exp_lookup(u); return s + s * y; } @@ -59,7 +74,7 @@ LIBC_INLINE cpp::simd expf(cpp::simd x) { cpp::simd x_d = cpp::simd_cast(x); cpp::simd y = inline_exp(x_d); cpp::simd ret = cpp::simd_cast(y); - return cpp::select(is_special, special_res, ret); + return is_special ? special_res : ret; } } // namespace mathvec diff --git a/libc/src/__support/mathvec/expf_utils.h b/libc/src/__support/mathvec/expf_utils.h index 8c1fbb239dbd0..25f520ecca1d7 100644 --- a/libc/src/__support/mathvec/expf_utils.h +++ b/libc/src/__support/mathvec/expf_utils.h @@ -15,12 +15,12 @@ namespace LIBC_NAMESPACE_DECL { template -LIBC_INLINE cpp::simd exp_lookup(cpp::simd u) { - auto index = u & cpp::simd(0x3f); - auto mantissa = cpp::gather>( +LIBC_INLINE static cpp::simd exp_lookup(cpp::simd u) { + cpp::simd index = u & cpp::simd(0x3f); + cpp::simd mantissa = cpp::gather>( true, index, common_constants_internal::EXP_MANTISSA); - auto exponent = (u >> 6) << 52; - auto result = mantissa | exponent; + cpp::simd exponent = (u >> 6) << 52; + cpp::simd result = mantissa | exponent; return cpp::bit_cast>(result); } From 4a482b695b3c4d1b99a89ed67ccf773a907c3f3f Mon Sep 17 00:00:00 2001 From: Dylan Fleming Date: Thu, 8 Jan 2026 22:52:59 +0000 Subject: [PATCH 6/6] Made cpp::bit_cast static --- libc/src/__support/CPP/bit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 88d4362e6758c..ccf623a0298d9 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -39,7 +39,7 @@ LIBC_INLINE static void inline_copy(const char *from, char *to) { // This implementation of bit_cast requires trivially-constructible To, to avoid // UB in the implementation. template -LIBC_INLINE constexpr cpp::enable_if_t< +LIBC_INLINE static constexpr cpp::enable_if_t< (sizeof(To) == sizeof(From)) && cpp::is_trivially_constructible::value && cpp::is_trivially_copyable::value &&