From 24e7b25dc349f280ad80c4de576a876f288e0291 Mon Sep 17 00:00:00 2001
From: Dylan Fleming <Dylan.Fleming@arm.com>
Date: Mon, 8 Dec 2025 11:56:56 +0000
Subject: [PATCH 1/6] Initial commit for mathvec directories

Created a mathvec/ file structure that mirrors the
structure of the math/ directory, and added a dummy
expf implementation.
---
 libc/CMakeLists.txt                       |  2 ++
 libc/config/linux/aarch64/entrypoints.txt |  7 ++++
 libc/config/linux/x86_64/entrypoints.txt  |  7 ++++
 libc/lib/CMakeLists.txt                   |  6 ++--
 libc/src/CMakeLists.txt                   |  5 +++
 libc/src/__support/CMakeLists.txt         |  3 ++
 libc/src/__support/mathvec/CMakeLists.txt | 24 +++++++++++++
 libc/src/__support/mathvec/expf.h         | 33 +++++++++++++++++
 libc/src/mathvec/CMakeLists.txt           | 43 +++++++++++++++++++++++
 libc/src/mathvec/expf.h                   | 21 +++++++++++
 libc/src/mathvec/generic/CMakeLists.txt   | 12 +++++++
 libc/src/mathvec/generic/expf.cpp         | 18 ++++++++++
 12 files changed, 178 insertions(+), 3 deletions(-)
 create mode 100644 libc/src/__support/mathvec/CMakeLists.txt
 create mode 100644 libc/src/__support/mathvec/expf.h
 create mode 100644 libc/src/mathvec/CMakeLists.txt
 create mode 100644 libc/src/mathvec/expf.h
 create mode 100644 libc/src/mathvec/generic/CMakeLists.txt
 create mode 100644 libc/src/mathvec/generic/expf.cpp

diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index 4e6b4195a9c5e..2d474f08841ca 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -350,6 +350,7 @@ include(LLVMLibCRules)
 set(TARGET_LLVMLIBC_ENTRYPOINTS "")
 set(TARGET_LIBC_ENTRYPOINTS "")
 set(TARGET_LIBM_ENTRYPOINTS "")
+set(TARGET_LIBMVEC_ENTRYPOINTS "")
 set(TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS "")
 
 # Check entrypoints.txt
@@ -380,6 +381,7 @@ foreach(removed_entrypoint IN LISTS TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS)
   list(REMOVE_ITEM TARGET_LLVMLIBC_ENTRYPOINTS ${removed_entrypoint})
   list(REMOVE_ITEM TARGET_LIBC_ENTRYPOINTS ${removed_entrypoint})
   list(REMOVE_ITEM TARGET_LIBM_ENTRYPOINTS ${removed_entrypoint})
+  list(REMOVE_ITEM TARGET_LIBMVEC_ENTRYPOINTS ${removed_entrypoint})
 endforeach()
 
 set(TARGET_ENTRYPOINT_NAME_LIST "")
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 970c825bbfc96..d0db8b0acff2b 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -1178,7 +1178,14 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE)
+  set(TARGET_LIBMVEC_ENTRYPOINTS
+    libc.src.mathvec.expf
+  )
+endif()
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
+  ${TARGET_LIBMVEC_ENTRYPOINTS}
 )
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9399b284fa2da..7c5e649968223 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1396,7 +1396,14 @@ if(LLVM_LIBC_FULL_BUILD)
   )
 endif()
 
+if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE)
+  set(TARGET_LIBMVEC_ENTRYPOINTS
+    libc.src.mathvec.expf
+  )
+endif()
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
+  ${TARGET_LIBMVEC_ENTRYPOINTS}
 )
diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt
index ce0b07fb6cb49..19751a9cc0736 100644
--- a/libc/lib/CMakeLists.txt
+++ b/libc/lib/CMakeLists.txt
@@ -2,10 +2,10 @@ set(libc_archive_targets "")
 set(libc_archive_names "")
 set(libc_archive_entrypoint_lists "")
 if(LLVM_LIBC_FULL_BUILD)
-  list(APPEND libc_archive_names c m)
-  list(APPEND libc_archive_targets libc libm)
+  list(APPEND libc_archive_names c m mvec)
+  list(APPEND libc_archive_targets libc libm libmvec)
   list(APPEND libc_archive_entrypoint_lists
-       TARGET_LIBC_ENTRYPOINTS TARGET_LIBM_ENTRYPOINTS)
+       TARGET_LIBC_ENTRYPOINTS TARGET_LIBM_ENTRYPOINTS TARGET_LIBMVEC_ENTRYPOINTS)
 else()
   list(APPEND libc_archive_names llvmlibc)
   list(APPEND libc_archive_targets libc)
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index b2afe0a33acee..8f005a700a80f 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -29,6 +29,11 @@ if(${LIBC_TARGET_OS} STREQUAL "linux")
   add_subdirectory(termios)
 endif()
 
+if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE)
+  message(STATUS "Vector math enabled")
+  add_subdirectory(mathvec)
+endif()
+
 if(NOT LLVM_LIBC_FULL_BUILD)
   return()
 endif()
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index c7f127d6934a0..1ebd7dd2621a2 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -416,3 +416,6 @@ if(NOT (LIBC_TARGET_OS_IS_DARWIN))
 endif()
 
 add_subdirectory(math)
+if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE)
+  add_subdirectory(mathvec)
+endif()
diff --git a/libc/src/__support/mathvec/CMakeLists.txt b/libc/src/__support/mathvec/CMakeLists.txt
new file mode 100644
index 0000000000000..ad66dd1aadbaa
--- /dev/null
+++ b/libc/src/__support/mathvec/CMakeLists.txt
@@ -0,0 +1,24 @@
+add_header_library(
+  common_constants
+  HDRS
+    common_constants.h
+  DEPENDS
+)
+
+add_header_library(
+  expf_utils
+  HDRS
+    expf_utils.h
+  DEPENDS
+  libc.src.__support.CPP.simd
+  libc.src.__support.mathvec.common_constants
+)
+
+add_header_library(
+  expf
+  HDRS
+    expf.h
+  DEPENDS
+    libc.src.__support.CPP.simd
+    libc.src.__support.math.expf
+)
\ No newline at end of file
diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h
new file mode 100644
index 0000000000000..1328df27aced6
--- /dev/null
+++ b/libc/src/__support/mathvec/expf.h
@@ -0,0 +1,33 @@
+//===-- Implementation header for SIMD expf ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H
+
+#include "src/__support/CPP/simd.h"
+#include "src/__support/math/expf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace mathvec {
+
+template <size_t N>
+LIBC_INLINE static constexpr cpp::simd<float, N> expf(cpp::simd<float, N> x) {
+  cpp::simd<float, N> ret = 0.0f;
+
+  for (size_t i = 0; i < N; i++)
+    ret[i] = math::expf(x[i]);
+
+  return ret;
+}
+
+} // namespace mathvec
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H
diff --git a/libc/src/mathvec/CMakeLists.txt b/libc/src/mathvec/CMakeLists.txt
new file mode 100644
index 0000000000000..dff1dd3b70b51
--- /dev/null
+++ b/libc/src/mathvec/CMakeLists.txt
@@ -0,0 +1,43 @@
+add_subdirectory(generic)
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
+  add_subdirectory(${LIBC_TARGET_ARCHITECTURE})
+endif()
+
+function(add_vector_math_entrypoint_object name)
+  # We prefer machine specific implementation if available. Hence we check
+  # that first and return early if we are able to add an alias target for the
+  # machine specific implementation.
+  get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.${name}" fq_machine_specific_target_name)
+  if(TARGET ${fq_machine_specific_target_name})
+    add_entrypoint_object(
+      ${name}
+      ALIAS
+      DEPENDS
+        .${LIBC_TARGET_ARCHITECTURE}.${name}
+    )
+    return()
+  endif()
+
+  get_fq_target_name("generic.${name}" fq_generic_target_name)
+  if(TARGET ${fq_generic_target_name})
+    add_entrypoint_object(
+      ${name}
+      ALIAS
+      DEPENDS
+        .generic.${name}
+    )
+    return()
+  endif()
+
+  # Add a dummy entrypoint object for missing implementations. They will be skipped
+  # anyway as there will be no entry for them in the target entrypoints list.
+  add_entrypoint_object(
+    ${name}
+    SRCS
+      dummy_srcs
+    HDRS
+      dummy_hdrs
+  )
+endfunction()
+
+add_vector_math_entrypoint_object(expf)
diff --git a/libc/src/mathvec/expf.h b/libc/src/mathvec/expf.h
new file mode 100644
index 0000000000000..257fb485838af
--- /dev/null
+++ b/libc/src/mathvec/expf.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for SIMD expf ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATHVEC_EXPF_H
+#define LLVM_LIBC_SRC_MATHVEC_EXPF_H
+
+#include "src/__support/CPP/simd.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+cpp::simd<float> expf(cpp::simd<float> x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATHVEC_EXPF_H
diff --git a/libc/src/mathvec/generic/CMakeLists.txt b/libc/src/mathvec/generic/CMakeLists.txt
new file mode 100644
index 0000000000000..769794d6d5eee
--- /dev/null
+++ b/libc/src/mathvec/generic/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_entrypoint_object(
+  expf
+  SRCS
+    expf.cpp
+  HDRS
+    ../expf.h
+  DEPENDS
+    libc.src.__support.CPP.simd
+  FLAGS
+    ROUND_OPT
+    FMA_OPT
+)
diff --git a/libc/src/mathvec/generic/expf.cpp b/libc/src/mathvec/generic/expf.cpp
new file mode 100644
index 0000000000000..2e113d17e8680
--- /dev/null
+++ b/libc/src/mathvec/generic/expf.cpp
@@ -0,0 +1,18 @@
+//===-- Single-precision SIMD e^x vector function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/mathvec/expf.h"
+#include "src/__support/mathvec/expf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(cpp::simd<float>, expf, (cpp::simd<float> x)) {
+  return mathvec::expf(x);
+}
+
+} // namespace LIBC_NAMESPACE_DECL

From 609371f7f3684c75c28ca16d18933319060440df Mon Sep 17 00:00:00 2001
From: Pierre Blanchard <pierre.blanchard@arm.com>
Date: Fri, 5 Sep 2025 07:19:49 +0000
Subject: [PATCH 2/6] Add simple vector math tester

Simplistic checks based on a wrapper duplicating a scalar input.

Introduce SIMD matcher (ultimately should be moved to test utils/)
- Comparison with exception is supported.
- Test all rounding modes in one line.
- Control lanes

A potential improvement could be made later: load vector in test
over range instead of duplicating input. That would reduce run
time of vector testing.
---
 libc/test/UnitTest/SIMDMatcher.h     |  61 ++++++++++++
 libc/test/src/CMakeLists.txt         |   4 +
 libc/test/src/mathvec/AddTest.h      |  87 +++++++++++++++++
 libc/test/src/mathvec/CMakeLists.txt |  14 +++
 libc/test/src/mathvec/expf_test.cpp  | 139 +++++++++++++++++++++++++++
 5 files changed, 305 insertions(+)
 create mode 100644 libc/test/UnitTest/SIMDMatcher.h
 create mode 100644 libc/test/src/mathvec/AddTest.h
 create mode 100644 libc/test/src/mathvec/CMakeLists.txt
 create mode 100644 libc/test/src/mathvec/expf_test.cpp

diff --git a/libc/test/UnitTest/SIMDMatcher.h b/libc/test/UnitTest/SIMDMatcher.h
new file mode 100644
index 0000000000000..542d311ddbd89
--- /dev/null
+++ b/libc/test/UnitTest/SIMDMatcher.h
@@ -0,0 +1,61 @@
+//===-- SIMDMatchers.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_UNITTEST_SIMDMATCHER_H
+#define LLVM_LIBC_TEST_UNITTEST_SIMDMATCHER_H
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/architectures.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+#include "hdr/math_macros.h"
+
+#define EXPECT_SIMD_EQ(REF, RES)                                               \
+  for (size_t i = 0;                                                           \
+       i < LIBC_NAMESPACE::cpp::internal::native_vector_size<float>; i++) {    \
+    EXPECT_FP_EQ(REF[i], RES[i]);                                              \
+  }
+
+#define EXPECT_SIMD_EQ_WITH_EXCEPTION(REF, RES, EXCEPTION)                     \
+  for (size_t i = 0;                                                           \
+       i < LIBC_NAMESPACE::cpp::internal::native_vector_size<float>; i++) {    \
+    EXPECT_FP_EQ_WITH_EXCEPTION(REF[i], RES[i], EXCEPTION);                    \
+  }
+
+#define EXPECT_SIMD_EQ_ROUNDING_MODE(expected, actual, rounding_mode)          \
+  do {                                                                         \
+    using namespace LIBC_NAMESPACE::fputil::testing;                           \
+    ForceRoundingMode __r((rounding_mode));                                    \
+    if (__r.success) {                                                         \
+      EXPECT_SIMD_EQ((expected), (actual))                                     \
+    }                                                                          \
+  } while (0)
+
+#define EXPECT_SIMD_EQ_ROUNDING_NEAREST(expected, actual)                      \
+  EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Nearest)
+
+#define EXPECT_SIMD_EQ_ROUNDING_UPWARD(expected, actual)                       \
+  EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Upward)
+
+#define EXPECT_SIMD_EQ_ROUNDING_DOWNWARD(expected, actual)                     \
+  EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Downward)
+
+#define EXPECT_SIMD_EQ_ROUNDING_TOWARD_ZERO(expected, actual)                  \
+  EXPECT_SIMD_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::TowardZero)
+
+#define EXPECT_SIMD_EQ_ALL_ROUNDING(expected, actual)                          \
+  do {                                                                         \
+    EXPECT_SIMD_EQ_ROUNDING_NEAREST((expected), (actual));                     \
+    EXPECT_SIMD_EQ_ROUNDING_UPWARD((expected), (actual));                      \
+    EXPECT_SIMD_EQ_ROUNDING_DOWNWARD((expected), (actual));                    \
+    EXPECT_SIMD_EQ_ROUNDING_TOWARD_ZERO((expected), (actual));                 \
+  } while (0)
+
+#endif // LLVM_LIBC_TEST_UNITTEST_SIMDMATCHER_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index 0c6ec9f07a9b7..5e61c739c066a 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -87,6 +87,10 @@ if(${LIBC_TARGET_OS} STREQUAL "linux")
   add_subdirectory(termios)
 endif()
 
+if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE)
+  add_subdirectory(mathvec)
+endif()
+
 if(NOT LLVM_LIBC_FULL_BUILD)
   return()
 endif()
diff --git a/libc/test/src/mathvec/AddTest.h b/libc/test/src/mathvec/AddTest.h
new file mode 100644
index 0000000000000..2f7d0ec6904e3
--- /dev/null
+++ b/libc/test/src/mathvec/AddTest.h
@@ -0,0 +1,87 @@
+//===-- Utility class to test different flavors of float add ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATHVEC_ADDTEST_H
+#define LLVM_LIBC_TEST_SRC_MATHVEC_ADDTEST_H
+
+#include "src/__support/CPP/algorithm.h"
+#include "test/UnitTest/FEnvSafeTest.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+template <typename OutType, typename InType>
+class AddTest : public LIBC_NAMESPACE::testing::FEnvSafeTest {
+
+  struct InConstants {
+    DECLARE_SPECIAL_CONSTANTS(InType)
+  };
+
+  using InFPBits = typename InConstants::FPBits;
+  using InStorageType = typename InConstants::StorageType;
+
+  static constexpr InStorageType IN_MAX_NORMAL_U =
+      InFPBits::max_normal().uintval();
+  static constexpr InStorageType IN_MIN_NORMAL_U =
+      InFPBits::min_normal().uintval();
+  static constexpr InStorageType IN_MAX_SUBNORMAL_U =
+      InFPBits::max_subnormal().uintval();
+  static constexpr InStorageType IN_MIN_SUBNORMAL_U =
+      InFPBits::min_subnormal().uintval();
+
+public:
+  using AddFunc = OutType (*)(InType, InType);
+
+  void test_subnormal_range(AddFunc func) {
+    constexpr int COUNT = 100'001;
+    constexpr InStorageType STEP = LIBC_NAMESPACE::cpp::max(
+        static_cast<InStorageType>((IN_MAX_SUBNORMAL_U - IN_MIN_SUBNORMAL_U) /
+                                   COUNT),
+        InStorageType(1));
+    for (InStorageType i = IN_MIN_SUBNORMAL_U; i <= IN_MAX_SUBNORMAL_U;
+         i += STEP) {
+      InType x = InFPBits(i).get_val();
+      InType y = InFPBits(static_cast<InStorageType>(IN_MAX_SUBNORMAL_U - i))
+                     .get_val();
+      mpfr::BinaryInput<InType> input{x, y};
+      EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Add, input, func(x, y),
+                                     0.5);
+    }
+  }
+
+  void test_normal_range(AddFunc func) {
+    constexpr int COUNT = 100'001;
+    constexpr InStorageType STEP = LIBC_NAMESPACE::cpp::max(
+        static_cast<InStorageType>((IN_MAX_NORMAL_U - IN_MIN_NORMAL_U) / COUNT),
+        InStorageType(1));
+    for (InStorageType i = IN_MIN_NORMAL_U; i <= IN_MAX_NORMAL_U; i += STEP) {
+      InType x = InFPBits(i).get_val();
+      InType y =
+          InFPBits(static_cast<InStorageType>(IN_MAX_NORMAL_U - i)).get_val();
+      mpfr::BinaryInput<InType> input{x, y};
+      EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Add, input, func(x, y),
+                                     0.5);
+    }
+  }
+};
+
+#define LIST_ADD_TESTS(OutType, InType, func)                                  \
+  using LlvmLibcAddTest = AddTest<OutType, InType>;                            \
+  TEST_F(LlvmLibcAddTest, SubnormalRange) { test_subnormal_range(&func); }     \
+  TEST_F(LlvmLibcAddTest, NormalRange) { test_normal_range(&func); }
+
+#define LIST_ADD_SAME_TYPE_TESTS(suffix, OutType, InType, func)                \
+  using LlvmLibcAddTest##suffix = AddTest<OutType, InType>;                    \
+  TEST_F(LlvmLibcAddTest##suffix, SubnormalRange) {                            \
+    test_subnormal_range(&func);                                               \
+  }                                                                            \
+  TEST_F(LlvmLibcAddTest##suffix, NormalRange) { test_normal_range(&func); }
+
+#endif // LLVM_LIBC_TEST_SRC_MATHVEC_ADDTEST_H
diff --git a/libc/test/src/mathvec/CMakeLists.txt b/libc/test/src/mathvec/CMakeLists.txt
new file mode 100644
index 0000000000000..78f4b16fc4af5
--- /dev/null
+++ b/libc/test/src/mathvec/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_custom_target(libc-mathvec-unittests)
+
+add_fp_unittest(
+  expf_test
+  SUITE
+    libc-mathvec-unittests
+  SRCS
+    expf_test.cpp
+  DEPENDS
+    libc.src.math.expf
+    libc.src.mathvec.expf
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.CPP.simd
+)
diff --git a/libc/test/src/mathvec/expf_test.cpp b/libc/test/src/mathvec/expf_test.cpp
new file mode 100644
index 0000000000000..5bb8486adbcdf
--- /dev/null
+++ b/libc/test/src/mathvec/expf_test.cpp
@@ -0,0 +1,139 @@
+//===-- Unittests for expf ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/math_macros.h"
+#include "src/__support/CPP/simd.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/expf.h"
+#include "src/mathvec/expf.h"
+#include "test/UnitTest/SIMDMatcher.h"
+#include "test/UnitTest/Test.h"
+
+#include "hdr/stdint_proxy.h"
+
+using LlvmLibcVecExpfTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
+// Wrappers
+
+// In order to test vector we can either duplicate a scalar input
+// or do something more elaborate. In any case that requires a wrapper
+// since the function call is written in this file.
+
+// Run reference on a vector with lanes duplicated from a scalar input.
+
+// with control lane
+static LIBC_NAMESPACE::cpp::simd<float> wrap_ref_vexpf(float x, float control) {
+  LIBC_NAMESPACE::cpp::simd<float> v(x);
+  v[0] = control;
+  constexpr size_t N = LIBC_NAMESPACE::cpp::internal::native_vector_size<float>;
+  for (size_t i = 0; i < N; i++) {
+    v[i] = LIBC_NAMESPACE::expf(v[i]);
+  }
+  return v;
+}
+
+// without control lane
+static LIBC_NAMESPACE::cpp::simd<float> wrap_ref_vexpf(float x) {
+  return wrap_ref_vexpf(x, x);
+}
+
+// Run implementation on a vector with lanes duplicated from a scalar input.
+
+// with control lane
+static LIBC_NAMESPACE::cpp::simd<float> wrap_vexpf(float x, float control) {
+  LIBC_NAMESPACE::cpp::simd<float> v(x);
+  v[0] = control;
+  return LIBC_NAMESPACE::expf(v);
+}
+
+// without control lane
+static LIBC_NAMESPACE::cpp::simd<float> wrap_vexpf(float x) {
+  return wrap_vexpf(x, x);
+}
+
+TEST_F(LlvmLibcVecExpfTest, SpecialNumbers) {
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(aNaN), wrap_vexpf(aNaN));
+
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf), wrap_vexpf(inf));
+
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(0.0f), wrap_vexpf(neg_inf));
+
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(1.0f), wrap_vexpf(0.0f));
+
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(1.0f), wrap_vexpf(-0.0f));
+}
+
+TEST_F(LlvmLibcVecExpfTest, Overflow) {
+  // Exception still supported with current impl and test, but might not endup
+  // being tested.
+  EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf),
+                                wrap_vexpf(FPBits(0x7f7fffffU).get_val()),
+                                FE_OVERFLOW);
+
+  EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf),
+                                wrap_vexpf(FPBits(0x42cffff8U).get_val()),
+                                FE_OVERFLOW);
+
+  EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf),
+                                wrap_vexpf(FPBits(0x42d00008U).get_val()),
+                                FE_OVERFLOW);
+}
+
+TEST_F(LlvmLibcVecExpfTest, Underflow) {
+  // Exception still supported with current impl and test, but eventually won't
+  // be tested.
+  EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(0.0f),
+                                wrap_vexpf(FPBits(0xff7fffffU).get_val()),
+                                FE_UNDERFLOW);
+
+  float x = FPBits(0xc2cffff8U).get_val();
+  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+
+  x = FPBits(0xc2d00008U).get_val();
+  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+}
+
+// Test with inputs which are the borders of underflow/overflow but still
+// produce valid results without setting errno.
+// Is this still relevant to vector function?
+TEST_F(LlvmLibcVecExpfTest, Borderline) {
+  float x;
+
+  x = FPBits(0x42affff8U).get_val();
+  // Do we need ASSERT? If so it needs a version for all rounding modes
+  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+
+  x = FPBits(0x42b00008U).get_val();
+  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+
+  x = FPBits(0xc2affff8U).get_val();
+  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+
+  x = FPBits(0xc2b00008U).get_val();
+  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+
+  x = FPBits(0xc236bd8cU).get_val();
+  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+}
+
+TEST_F(LlvmLibcVecExpfTest, InFloatRange) {
+  constexpr uint32_t COUNT = 100'000;
+  constexpr uint32_t STEP = UINT32_MAX / COUNT;
+  for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+    float x = FPBits(v).get_val();
+    if (FPBits(v).is_nan() || FPBits(v).is_inf())
+      continue;
+    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x), wrap_vexpf(x));
+    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, aNaN), wrap_vexpf(x, aNaN));
+    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, inf), wrap_vexpf(x, inf));
+    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -inf),
+                                wrap_vexpf(x, neg_inf));
+    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 0.0), wrap_vexpf(x, 0.0));
+    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -0.0), wrap_vexpf(x, -0.0));
+  }
+}

From c8f7e7e54fb9eaeef6833333101d97d1a7466d87 Mon Sep 17 00:00:00 2001
From: Dylan Fleming <Dylan.Fleming@arm.com>
Date: Thu, 11 Dec 2025 19:28:55 +0000
Subject: [PATCH 3/6] Add CR (RN only) vector expf

Implements a fully vectorised expf routine, that is
correctly rounded for Round-to-nearest. As an example
for what a correctly rounded vector routine looks like
using LLVM LIBC SIMD types.
---
 libc/src/__support/mathvec/CMakeLists.txt     | 15 ++++-
 libc/src/__support/mathvec/common_constants.h | 40 ++++++++++++++
 libc/src/__support/mathvec/expf.h             | 55 +++++++++++++++++--
 libc/src/__support/mathvec/expf_utils.h       | 29 ++++++++++
 libc/src/__support/mathvec/vector_utils.h     | 46 ++++++++++++++++
 libc/test/src/mathvec/expf_test.cpp           | 52 ++++++++----------
 6 files changed, 200 insertions(+), 37 deletions(-)
 create mode 100644 libc/src/__support/mathvec/common_constants.h
 create mode 100644 libc/src/__support/mathvec/expf_utils.h
 create mode 100644 libc/src/__support/mathvec/vector_utils.h

diff --git a/libc/src/__support/mathvec/CMakeLists.txt b/libc/src/__support/mathvec/CMakeLists.txt
index ad66dd1aadbaa..4b6e31ad29929 100644
--- a/libc/src/__support/mathvec/CMakeLists.txt
+++ b/libc/src/__support/mathvec/CMakeLists.txt
@@ -19,6 +19,17 @@ add_header_library(
   HDRS
     expf.h
   DEPENDS
+    libc.src.__support.common
     libc.src.__support.CPP.simd
-    libc.src.__support.math.expf
-)
\ No newline at end of file
+    libc.src.__support.FPUtil.FPBits
+    libc.src.__support.mathvec.expf_utils
+    libc.src.__support.mathvec.vector_utils
+)
+
+add_header_library(
+  vector_utils
+  HDRS
+    vector_utils.h
+  DEPENDS
+    libc.src.__support.CPP.simd
+)
diff --git a/libc/src/__support/mathvec/common_constants.h b/libc/src/__support/mathvec/common_constants.h
new file mode 100644
index 0000000000000..c235d6842e5b0
--- /dev/null
+++ b/libc/src/__support/mathvec/common_constants.h
@@ -0,0 +1,40 @@
+//===-- Common constants for mathvec functions ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_COMMON_CONSTANTS_H
+#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_COMMON_CONSTANTS_H
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace common_constants_internal {
+
+// Lookup table for mantissas of 2^(i / 64) with i = 0, ..., 63.
+static constexpr uint64_t EXP_MANTISSA[64] = {
+    0x0000000000000, 0x02c9a3e778061, 0x059b0d3158574, 0x0874518759bc8,
+    0x0b5586cf9890f, 0x0e3ec32d3d1a2, 0x11301d0125b51, 0x1429aaea92de0,
+    0x172b83c7d517b, 0x1a35beb6fcb75, 0x1d4873168b9aa, 0x2063b88628cd6,
+    0x2387a6e756238, 0x26b4565e27cdd, 0x29e9df51fdee1, 0x2d285a6e4030b,
+    0x306fe0a31b715, 0x33c08b26416ff, 0x371a7373aa9cb, 0x3a7db34e59ff7,
+    0x3dea64c123422, 0x4160a21f72e2a, 0x44e086061892d, 0x486a2b5c13cd0,
+    0x4bfdad5362a27, 0x4f9b2769d2ca7, 0x5342b569d4f82, 0x56f4736b527da,
+    0x5ab07dd485429, 0x5e76f15ad2148, 0x6247eb03a5585, 0x6623882552225,
+    0x6a09e667f3bcd, 0x6dfb23c651a2f, 0x71f75e8ec5f74, 0x75feb564267c9,
+    0x7a11473eb0187, 0x7e2f336cf4e62, 0x82589994cce13, 0x868d99b4492ed,
+    0x8ace5422aa0db, 0x8f1ae99157736, 0x93737b0cdc5e5, 0x97d829fde4e50,
+    0x9c49182a3f090, 0xa0c667b5de565, 0xa5503b23e255d, 0xa9e6b5579fdbf,
+    0xae89f995ad3ad, 0xb33a2b84f15fb, 0xb7f76f2fb5e47, 0xbcc1e904bc1d2,
+    0xc199bdd85529c, 0xc67f12e57d14b, 0xcb720dcef9069, 0xd072d4a07897c,
+    0xd5818dcfba487, 0xda9e603db3285, 0xdfc97337b9b5f, 0xe502ee78b3ff6,
+    0xea4afa2a490da, 0xefa1bee615a27, 0xf50765b6e4540, 0xfa7c1819e90d8,
+};
+
+} // namespace common_constants_internal
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_COMMON_CONSTANTS_H
diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h
index 1328df27aced6..6b5abb4a0c32f 100644
--- a/libc/src/__support/mathvec/expf.h
+++ b/libc/src/__support/mathvec/expf.h
@@ -10,20 +10,63 @@
 #define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXPF_H
 
 #include "src/__support/CPP/simd.h"
-#include "src/__support/math/expf.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/common.h"
+#include "src/__support/mathvec/expf_utils.h"
+#include "src/__support/mathvec/vector_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 namespace mathvec {
 
 template <size_t N>
-LIBC_INLINE static constexpr cpp::simd<float, N> expf(cpp::simd<float, N> x) {
-  cpp::simd<float, N> ret = 0.0f;
+LIBC_INLINE cpp::simd<double, N> inline_exp(cpp::simd<double, N> x) {
+  static constexpr cpp::simd<double, N> shift = 0x1.800000000ffc0p+46;
 
-  for (size_t i = 0; i < N; i++)
-    ret[i] = math::expf(x[i]);
+  auto z = shift + x * 0x1.71547652b82fep+0;
+  auto n = z - shift;
 
-  return ret;
+  auto r = x;
+  r = r - n * 0x1.62e42fefa3800p-1;
+  r = r - n * 0x1.ef35793c76730p-45;
+
+  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5.  */
+  static constexpr cpp::simd<double, N> c0 = 0x1.fffffffffdbcdp-2;
+  static constexpr cpp::simd<double, N> c1 = 0x1.555555555444cp-3;
+  static constexpr cpp::simd<double, N> c2 = 0x1.555573c6a9f7dp-5;
+  static constexpr cpp::simd<double, N> c3 = 0x1.1111266d28935p-7;
+
+  auto r2 = r * r;
+  auto p01 = c0 + r * c1;
+  auto p23 = c2 + r * c3;
+  auto p04 = p01 + r2 * p23;
+  auto y = r + p04 * r2;
+
+  auto u = reinterpret_cast<cpp::simd<uint64_t, N>>(z);
+  auto s = exp_lookup(u);
+  return s + s * y;
+}
+
+template <size_t N>
+LIBC_INLINE cpp::simd<float, N> expf(cpp::simd<float, N> x) {
+  using FPBits = typename fputil::FPBits<float>;
+  cpp::simd<float, N> ret;
+
+  auto is_inf = cpp::simd_cast<bool>(x >= 0x1.62e38p+9);
+  auto is_zero = cpp::simd_cast<bool>(x <= -0x1.628c2ap+9);
+  auto is_special = is_inf | is_zero;
+
+  auto special_res =
+      cpp::select(is_inf, cpp::simd<float, N>(FPBits::inf().get_val()),
+                  cpp::simd<float>(0.0f));
+
+  auto [lo, hi] = vector_float_to_double(x);
+
+  auto lo_res = inline_exp(lo);
+  auto hi_res = inline_exp(hi);
+
+  ret = vector_double_to_float(lo_res, hi_res);
+  return cpp::select(is_special, special_res, ret);
 }
 
 } // namespace mathvec
diff --git a/libc/src/__support/mathvec/expf_utils.h b/libc/src/__support/mathvec/expf_utils.h
new file mode 100644
index 0000000000000..2650bcba9bca7
--- /dev/null
+++ b/libc/src/__support/mathvec/expf_utils.h
@@ -0,0 +1,29 @@
+//===-- Common utils for exp function ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXP_UTILS_H
+#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXP_UTILS_H
+
+#include "src/__support/CPP/simd.h"
+#include "src/__support/mathvec/common_constants.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+template <size_t N>
+LIBC_INLINE cpp::simd<double, N> exp_lookup(cpp::simd<uint64_t, N> u) {
+  auto index = u & cpp::simd<uint64_t, N>(0x3f);
+  auto mantissa = cpp::gather<cpp::simd<uint64_t, N>>(
+      true, index, common_constants_internal::EXP_MANTISSA);
+  auto exponent = (u >> 6) << 52;
+  auto result = mantissa | exponent;
+  return reinterpret_cast<cpp::simd<double, N>>(result);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_EXP_UTILS_H
diff --git a/libc/src/__support/mathvec/vector_utils.h b/libc/src/__support/mathvec/vector_utils.h
new file mode 100644
index 0000000000000..d2138f148a0ee
--- /dev/null
+++ b/libc/src/__support/mathvec/vector_utils.h
@@ -0,0 +1,46 @@
+//===-- Common utils for SIMD functions -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H
+#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H
+
+#include "src/__support/CPP/simd.h"
+#include <tuple>
+
+namespace LIBC_NAMESPACE_DECL {
+
+// Casts a simd<float, N> into two simd<double, N/2>
+template <size_t N>
+LIBC_INLINE constexpr auto vector_float_to_double(cpp::simd<float, N> v) {
+  static_assert(N % 2 == 0, "vector size must be even");
+  constexpr size_t H = N / 2;
+
+  auto parts = cpp::split<H, H>(v);
+  auto lo_f = cpp::get<0>(parts);
+  auto hi_f = cpp::get<1>(parts);
+
+  auto lo_d = cpp::simd_cast<double, float, H>(lo_f);
+  auto hi_d = cpp::simd_cast<double, float, H>(hi_f);
+
+  return cpp::make_tuple(lo_d, hi_d);
+}
+
+// Casts two simd<double, N> into a simd<float, 2N>
+template <size_t N>
+LIBC_INLINE constexpr auto vector_double_to_float(cpp::simd<double, N> lo_d,
+                                                  cpp::simd<double, N> hi_d) {
+
+  auto lo_f = cpp::simd_cast<float, double, N>(lo_d);
+  auto hi_f = cpp::simd_cast<float, double, N>(hi_d);
+
+  return cpp::concat(lo_f, hi_f);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H
diff --git a/libc/test/src/mathvec/expf_test.cpp b/libc/test/src/mathvec/expf_test.cpp
index 5bb8486adbcdf..e599a9e7ed9c4 100644
--- a/libc/test/src/mathvec/expf_test.cpp
+++ b/libc/test/src/mathvec/expf_test.cpp
@@ -69,33 +69,28 @@ TEST_F(LlvmLibcVecExpfTest, SpecialNumbers) {
 }
 
 TEST_F(LlvmLibcVecExpfTest, Overflow) {
-  // Exception still supported with current impl and test, but might not endup
-  // being tested.
-  EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf),
-                                wrap_vexpf(FPBits(0x7f7fffffU).get_val()),
-                                FE_OVERFLOW);
-
-  EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf),
-                                wrap_vexpf(FPBits(0x42cffff8U).get_val()),
-                                FE_OVERFLOW);
-
-  EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(inf),
-                                wrap_vexpf(FPBits(0x42d00008U).get_val()),
-                                FE_OVERFLOW);
+  // Fails if tested with exceptions
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf),
+                 wrap_vexpf(FPBits(0x7f7fffffU).get_val()));
+
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf),
+                 wrap_vexpf(FPBits(0x42cffff8U).get_val()));
+
+  EXPECT_SIMD_EQ(LIBC_NAMESPACE::cpp::splat(inf),
+                 wrap_vexpf(FPBits(0x42d00008U).get_val()));
 }
 
 TEST_F(LlvmLibcVecExpfTest, Underflow) {
-  // Exception still supported with current impl and test, but eventually won't
-  // be tested.
+  // Passes if tested with exceptions ?
   EXPECT_SIMD_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::cpp::splat(0.0f),
                                 wrap_vexpf(FPBits(0xff7fffffU).get_val()),
                                 FE_UNDERFLOW);
 
   float x = FPBits(0xc2cffff8U).get_val();
-  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+  EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
 
   x = FPBits(0xc2d00008U).get_val();
-  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+  EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
 }
 
 // Test with inputs which are the borders of underflow/overflow but still
@@ -106,19 +101,19 @@ TEST_F(LlvmLibcVecExpfTest, Borderline) {
 
   x = FPBits(0x42affff8U).get_val();
   // Do we need ASSERT? If so it needs a version for all rounding modes
-  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+  EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
 
   x = FPBits(0x42b00008U).get_val();
-  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+  EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
 
   x = FPBits(0xc2affff8U).get_val();
-  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+  EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
 
   x = FPBits(0xc2b00008U).get_val();
-  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+  EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
 
   x = FPBits(0xc236bd8cU).get_val();
-  EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
+  EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 1.0), wrap_vexpf(x, 1.0));
 }
 
 TEST_F(LlvmLibcVecExpfTest, InFloatRange) {
@@ -128,12 +123,11 @@ TEST_F(LlvmLibcVecExpfTest, InFloatRange) {
     float x = FPBits(v).get_val();
     if (FPBits(v).is_nan() || FPBits(v).is_inf())
       continue;
-    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x), wrap_vexpf(x));
-    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, aNaN), wrap_vexpf(x, aNaN));
-    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, inf), wrap_vexpf(x, inf));
-    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -inf),
-                                wrap_vexpf(x, neg_inf));
-    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, 0.0), wrap_vexpf(x, 0.0));
-    EXPECT_SIMD_EQ_ALL_ROUNDING(wrap_ref_vexpf(x, -0.0), wrap_vexpf(x, -0.0));
+    EXPECT_SIMD_EQ(wrap_ref_vexpf(x), wrap_vexpf(x));
+    EXPECT_SIMD_EQ(wrap_ref_vexpf(x, aNaN), wrap_vexpf(x, aNaN));
+    EXPECT_SIMD_EQ(wrap_ref_vexpf(x, inf), wrap_vexpf(x, inf));
+    EXPECT_SIMD_EQ(wrap_ref_vexpf(x, -inf), wrap_vexpf(x, neg_inf));
+    EXPECT_SIMD_EQ(wrap_ref_vexpf(x, 0.0), wrap_vexpf(x, 0.0));
+    EXPECT_SIMD_EQ(wrap_ref_vexpf(x, -0.0), wrap_vexpf(x, -0.0));
   }
 }

From ddd9fbf938b96800236f641dd24bc9c1516a49a3 Mon Sep 17 00:00:00 2001
From: Dylan Fleming <Dylan.Fleming@arm.com>
Date: Wed, 7 Jan 2026 22:18:42 +0000
Subject: [PATCH 4/6] Removed vector_utils, Improved Gathers, and fixed
 bitcast.

Removes vector_utils.h, and instead relies on simd_cast/compiler
to correctly handle oversized vectors.

Added check for gather instruction support, and implemented
an explicit loop over scalar lookup fallback.

Changed incorrect reinterpret_cast to bit_cast.

Removed leftover debug message from a CMakeList file.
---
 libc/src/CMakeLists.txt                       |  1 -
 libc/src/__support/CPP/simd.h                 | 10 ++++
 .../macros/properties/cpu_features.h          |  4 ++
 libc/src/__support/mathvec/CMakeLists.txt     |  8 ----
 libc/src/__support/mathvec/expf.h             | 21 +++------
 libc/src/__support/mathvec/expf_utils.h       |  2 +-
 libc/src/__support/mathvec/vector_utils.h     | 46 -------------------
 7 files changed, 22 insertions(+), 70 deletions(-)
 delete mode 100644 libc/src/__support/mathvec/vector_utils.h

diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 8f005a700a80f..8a0acccaed708 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -30,7 +30,6 @@ if(${LIBC_TARGET_OS} STREQUAL "linux")
 endif()
 
 if(LIBC_COMPILER_HAS_EXT_VECTOR_TYPE)
-  message(STATUS "Vector math enabled")
   add_subdirectory(mathvec)
 endif()
 
diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
index 422d2f4c8433d..fd34a13c323b3 100644
--- a/libc/src/__support/CPP/simd.h
+++ b/libc/src/__support/CPP/simd.h
@@ -325,8 +325,18 @@ LIBC_INLINE constexpr static T gather(simd<bool, simd_size_v<T>> mask, Idx idx,
                                       const void *base, bool aligned = false) {
   if (aligned)
     base = __builtin_assume_aligned(base, alignof(T));
+#if defined(LIBC_TARGET_CPU_HAS_GATHER)
   return __builtin_masked_gather(
       mask, idx, reinterpret_cast<const simd_element_type_t<T> *>(base));
+#else
+  T result;
+  for (size_t i = 0; i < simd_size_v<T>; ++i) {
+    if (mask[i])
+      result[i] =
+          *(reinterpret_cast<const simd_element_type_t<T> *>(base) + idx[i]);
+  }
+  return result;
+#endif
 }
 template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
 LIBC_INLINE constexpr static void scatter(simd<bool, simd_size_v<T>> mask,
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index 1fe20d9b23a34..0ad0ab5d4e523 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -56,6 +56,10 @@
 #define LIBC_TARGET_CPU_HAS_AVX512BW
 #endif
 
+#if defined(__AVX512F__) || defined(__AVX2__)
+#define LIBC_TARGET_CPU_HAS_GATHER
+#endif
+
 #if defined(__ARM_FP)
 #if (__ARM_FP & 0x2)
 #define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
diff --git a/libc/src/__support/mathvec/CMakeLists.txt b/libc/src/__support/mathvec/CMakeLists.txt
index 4b6e31ad29929..865e53f2658a9 100644
--- a/libc/src/__support/mathvec/CMakeLists.txt
+++ b/libc/src/__support/mathvec/CMakeLists.txt
@@ -25,11 +25,3 @@ add_header_library(
     libc.src.__support.mathvec.expf_utils
     libc.src.__support.mathvec.vector_utils
 )
-
-add_header_library(
-  vector_utils
-  HDRS
-    vector_utils.h
-  DEPENDS
-    libc.src.__support.CPP.simd
-)
diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h
index 6b5abb4a0c32f..3b92cba609c9b 100644
--- a/libc/src/__support/mathvec/expf.h
+++ b/libc/src/__support/mathvec/expf.h
@@ -13,7 +13,6 @@
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/common.h"
 #include "src/__support/mathvec/expf_utils.h"
-#include "src/__support/mathvec/vector_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -50,22 +49,16 @@ LIBC_INLINE cpp::simd<double, N> inline_exp(cpp::simd<double, N> x) {
 template <size_t N>
 LIBC_INLINE cpp::simd<float, N> expf(cpp::simd<float, N> x) {
   using FPBits = typename fputil::FPBits<float>;
-  cpp::simd<float, N> ret;
 
-  auto is_inf = cpp::simd_cast<bool>(x >= 0x1.62e38p+9);
-  auto is_zero = cpp::simd_cast<bool>(x <= -0x1.628c2ap+9);
-  auto is_special = is_inf | is_zero;
+  cpp::simd<bool, N> is_inf = x >= 0x1.62e38p+9;
+  cpp::simd<bool, N> is_zero = x <= -0x1.628c2ap+9;
+  cpp::simd<bool, N> is_special = is_inf | is_zero;
 
-  auto special_res =
-      cpp::select(is_inf, cpp::simd<float, N>(FPBits::inf().get_val()),
-                  cpp::simd<float>(0.0f));
+  cpp::simd<float, N> special_res = is_inf ? FPBits::inf().get_val() : 0.0f;
 
-  auto [lo, hi] = vector_float_to_double(x);
-
-  auto lo_res = inline_exp(lo);
-  auto hi_res = inline_exp(hi);
-
-  ret = vector_double_to_float(lo_res, hi_res);
+  cpp::simd<double, N> x_d = cpp::simd_cast<double, float, N>(x);
+  cpp::simd<double, N> y = inline_exp(x_d);
+  cpp::simd<float, N> ret = cpp::simd_cast<float, double, N>(y);
   return cpp::select(is_special, special_res, ret);
 }
 
diff --git a/libc/src/__support/mathvec/expf_utils.h b/libc/src/__support/mathvec/expf_utils.h
index 2650bcba9bca7..8c1fbb239dbd0 100644
--- a/libc/src/__support/mathvec/expf_utils.h
+++ b/libc/src/__support/mathvec/expf_utils.h
@@ -21,7 +21,7 @@ LIBC_INLINE cpp::simd<double, N> exp_lookup(cpp::simd<uint64_t, N> u) {
       true, index, common_constants_internal::EXP_MANTISSA);
   auto exponent = (u >> 6) << 52;
   auto result = mantissa | exponent;
-  return reinterpret_cast<cpp::simd<double, N>>(result);
+  return cpp::bit_cast<cpp::simd<double, N>>(result);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/mathvec/vector_utils.h b/libc/src/__support/mathvec/vector_utils.h
deleted file mode 100644
index d2138f148a0ee..0000000000000
--- a/libc/src/__support/mathvec/vector_utils.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- Common utils for SIMD functions -------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H
-#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H
-
-#include "src/__support/CPP/simd.h"
-#include <tuple>
-
-namespace LIBC_NAMESPACE_DECL {
-
-// Casts a simd<float, N> into two simd<double, N/2>
-template <size_t N>
-LIBC_INLINE constexpr auto vector_float_to_double(cpp::simd<float, N> v) {
-  static_assert(N % 2 == 0, "vector size must be even");
-  constexpr size_t H = N / 2;
-
-  auto parts = cpp::split<H, H>(v);
-  auto lo_f = cpp::get<0>(parts);
-  auto hi_f = cpp::get<1>(parts);
-
-  auto lo_d = cpp::simd_cast<double, float, H>(lo_f);
-  auto hi_d = cpp::simd_cast<double, float, H>(hi_f);
-
-  return cpp::make_tuple(lo_d, hi_d);
-}
-
-// Casts two simd<double, N> into a simd<float, 2N>
-template <size_t N>
-LIBC_INLINE constexpr auto vector_double_to_float(cpp::simd<double, N> lo_d,
-                                                  cpp::simd<double, N> hi_d) {
-
-  auto lo_f = cpp::simd_cast<float, double, N>(lo_d);
-  auto hi_f = cpp::simd_cast<float, double, N>(hi_d);
-
-  return cpp::concat(lo_f, hi_f);
-}
-
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif // LLVM_LIBC_SRC___SUPPORT_MATHVEC_VECTOR_UTILS_H

From 33d73471e27b77a21735bf9f656f18ce5e987bd0 Mon Sep 17 00:00:00 2001
From: Dylan Fleming <Dylan.Fleming@arm.com>
Date: Thu, 8 Jan 2026 21:26:44 +0000
Subject: [PATCH 5/6] Updated coefficients and addressed comments

Updated coefficients used in expf, and added
sollya comments to reproduce them.

Made inline_exp and exp_lookup static to fix
ABI warning.

Reduced number of auto types used.
---
 libc/src/__support/CPP/simd.h           | 10 -----
 libc/src/__support/mathvec/expf.h       | 57 ++++++++++++++++---------
 libc/src/__support/mathvec/expf_utils.h | 10 ++---
 3 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
index fd34a13c323b3..422d2f4c8433d 100644
--- a/libc/src/__support/CPP/simd.h
+++ b/libc/src/__support/CPP/simd.h
@@ -325,18 +325,8 @@ LIBC_INLINE constexpr static T gather(simd<bool, simd_size_v<T>> mask, Idx idx,
                                       const void *base, bool aligned = false) {
   if (aligned)
     base = __builtin_assume_aligned(base, alignof(T));
-#if defined(LIBC_TARGET_CPU_HAS_GATHER)
   return __builtin_masked_gather(
       mask, idx, reinterpret_cast<const simd_element_type_t<T> *>(base));
-#else
-  T result;
-  for (size_t i = 0; i < simd_size_v<T>; ++i) {
-    if (mask[i])
-      result[i] =
-          *(reinterpret_cast<const simd_element_type_t<T> *>(base) + idx[i]);
-  }
-  return result;
-#endif
 }
 template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
 LIBC_INLINE constexpr static void scatter(simd<bool, simd_size_v<T>> mask,
diff --git a/libc/src/__support/mathvec/expf.h b/libc/src/__support/mathvec/expf.h
index 3b92cba609c9b..0df7cbb7d0c2f 100644
--- a/libc/src/__support/mathvec/expf.h
+++ b/libc/src/__support/mathvec/expf.h
@@ -19,30 +19,45 @@ namespace LIBC_NAMESPACE_DECL {
 namespace mathvec {
 
 template <size_t N>
-LIBC_INLINE cpp::simd<double, N> inline_exp(cpp::simd<double, N> x) {
+LIBC_INLINE static cpp::simd<double, N> inline_exp(cpp::simd<double, N> x) {
   static constexpr cpp::simd<double, N> shift = 0x1.800000000ffc0p+46;
 
-  auto z = shift + x * 0x1.71547652b82fep+0;
-  auto n = z - shift;
-
-  auto r = x;
-  r = r - n * 0x1.62e42fefa3800p-1;
-  r = r - n * 0x1.ef35793c76730p-45;
+  // inv_ln2 = round(1/log(2), D, RN);
+  static constexpr cpp::simd<double, N> inv_ln2 = 0x1.71547652b82fep+0;
+  cpp::simd<double, N> z = shift + x * inv_ln2;
+  cpp::simd<double, N> n = z - shift;
+
+  // ln2_hi = round(log(2), D, RN);
+  // ln2_lo = round(log(2) - ln2_hi, D, RN);
+  static constexpr cpp::simd<double, N> ln2_hi = 0x1.62e42fefa39efp-1;
+  static constexpr cpp::simd<double, N> ln2_lo = 0x1.abc9e3b39803fp-56;
+
+  cpp::simd<double, N> r = x;
+  r = r - n * ln2_hi;
+  r = r - n * ln2_lo;
+
+  // Coefficients of exp approximation, generated by Sollya with:
+  // poly = 1 + x;
+  // for i from 2 to 5 do {
+  //   r = remez(exp(x)-poly(x), 5-i, [-log(2)/128;log(2)/128], x^i, 1e-10);
+  //   c = coeff(roundcoefficients(r, [|D ...|]), 0);
+  //   poly = poly + x^i*c;
+  //   c;
+  // };
+  static constexpr cpp::simd<double, N> c0 = 0x1.fffffffffdbcep-2;
+  static constexpr cpp::simd<double, N> c1 = 0x1.55555555543c2p-3;
+  static constexpr cpp::simd<double, N> c2 = 0x1.555573c64f2e3p-5;
+  static constexpr cpp::simd<double, N> c3 = 0x1.111126b4eff73p-7;
 
   /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5.  */
-  static constexpr cpp::simd<double, N> c0 = 0x1.fffffffffdbcdp-2;
-  static constexpr cpp::simd<double, N> c1 = 0x1.555555555444cp-3;
-  static constexpr cpp::simd<double, N> c2 = 0x1.555573c6a9f7dp-5;
-  static constexpr cpp::simd<double, N> c3 = 0x1.1111266d28935p-7;
-
-  auto r2 = r * r;
-  auto p01 = c0 + r * c1;
-  auto p23 = c2 + r * c3;
-  auto p04 = p01 + r2 * p23;
-  auto y = r + p04 * r2;
-
-  auto u = reinterpret_cast<cpp::simd<uint64_t, N>>(z);
-  auto s = exp_lookup(u);
+  cpp::simd<double, N> r2 = r * r;
+  cpp::simd<double, N> p01 = c0 + r * c1;
+  cpp::simd<double, N> p23 = c2 + r * c3;
+  cpp::simd<double, N> p04 = p01 + r2 * p23;
+  cpp::simd<double, N> y = r + p04 * r2;
+
+  cpp::simd<uint64_t, N> u = cpp::bit_cast<cpp::simd<uint64_t, N>>(z);
+  cpp::simd<double, N> s = exp_lookup(u);
   return s + s * y;
 }
 
@@ -59,7 +74,7 @@ LIBC_INLINE cpp::simd<float, N> expf(cpp::simd<float, N> x) {
   cpp::simd<double, N> x_d = cpp::simd_cast<double, float, N>(x);
   cpp::simd<double, N> y = inline_exp(x_d);
   cpp::simd<float, N> ret = cpp::simd_cast<float, double, N>(y);
-  return cpp::select(is_special, special_res, ret);
+  return is_special ? special_res : ret;
 }
 
 } // namespace mathvec
diff --git a/libc/src/__support/mathvec/expf_utils.h b/libc/src/__support/mathvec/expf_utils.h
index 8c1fbb239dbd0..25f520ecca1d7 100644
--- a/libc/src/__support/mathvec/expf_utils.h
+++ b/libc/src/__support/mathvec/expf_utils.h
@@ -15,12 +15,12 @@
 namespace LIBC_NAMESPACE_DECL {
 
 template <size_t N>
-LIBC_INLINE cpp::simd<double, N> exp_lookup(cpp::simd<uint64_t, N> u) {
-  auto index = u & cpp::simd<uint64_t, N>(0x3f);
-  auto mantissa = cpp::gather<cpp::simd<uint64_t, N>>(
+LIBC_INLINE static cpp::simd<double, N> exp_lookup(cpp::simd<uint64_t, N> u) {
+  cpp::simd<uint64_t, N> index = u & cpp::simd<uint64_t, N>(0x3f);
+  cpp::simd<uint64_t, N> mantissa = cpp::gather<cpp::simd<uint64_t, N>>(
       true, index, common_constants_internal::EXP_MANTISSA);
-  auto exponent = (u >> 6) << 52;
-  auto result = mantissa | exponent;
+  cpp::simd<uint64_t, N> exponent = (u >> 6) << 52;
+  cpp::simd<uint64_t, N> result = mantissa | exponent;
   return cpp::bit_cast<cpp::simd<double, N>>(result);
 }
 

From 4a482b695b3c4d1b99a89ed67ccf773a907c3f3f Mon Sep 17 00:00:00 2001
From: Dylan Fleming <Dylan.Fleming@arm.com>
Date: Thu, 8 Jan 2026 22:52:59 +0000
Subject: [PATCH 6/6] Made cpp::bit_cast static

---
 libc/src/__support/CPP/bit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h
index 88d4362e6758c..ccf623a0298d9 100644
--- a/libc/src/__support/CPP/bit.h
+++ b/libc/src/__support/CPP/bit.h
@@ -39,7 +39,7 @@ LIBC_INLINE static void inline_copy(const char *from, char *to) {
 // This implementation of bit_cast requires trivially-constructible To, to avoid
 // UB in the implementation.
 template <typename To, typename From>
-LIBC_INLINE constexpr cpp::enable_if_t<
+LIBC_INLINE static constexpr cpp::enable_if_t<
     (sizeof(To) == sizeof(From)) &&
         cpp::is_trivially_constructible<To>::value &&
         cpp::is_trivially_copyable<To>::value &&