Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions include/RAJA/util/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ struct LeftFoldReduce
m_accumulated_value = m_op(std::move(m_accumulated_value), std::move(val));
}

/*!
\brief combine a value into the reducer
*/
RAJA_HOST_DEVICE RAJA_INLINE void operator+=(T val)
{
combine(std::move(val));
}

private:
BinaryOp m_op;
T m_accumulated_value;
Expand Down Expand Up @@ -214,6 +222,14 @@ struct BinaryTreeReduce
++m_count;
}

/*!
\brief combine a value into the reducer
*/
RAJA_HOST_DEVICE RAJA_INLINE void operator+=(T val)
{
combine(std::move(val));
}

private:
BinaryOp m_op;

Expand Down Expand Up @@ -241,6 +257,80 @@ struct BinaryTreeReduce
}
};

/*!
\brief Reduce class that does a reduction with a left fold.
*/
template<typename T>
struct KahanSum
{
static_assert(std::is_floating_point_v<T>, "T must be a floating point type");

RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit KahanSum(
T init = T()) noexcept
: m_accumulated_value(std::move(init)),
m_accumulated_carry(T())
{}

KahanSum(KahanSum const&) = delete;
KahanSum& operator=(KahanSum const&) = delete;
KahanSum(KahanSum&&) = delete;
KahanSum& operator=(KahanSum&&) = delete;

~KahanSum() = default;

/*!
\brief reset the combined value of the reducer to the identity
*/
RAJA_HOST_DEVICE RAJA_INLINE void clear() noexcept
{
m_accumulated_value = T();
m_accumulated_carry = T();
}

/*!
\brief return the combined value and clear the reducer
*/
RAJA_HOST_DEVICE RAJA_INLINE T get_and_clear()
{
T accumulated_value = std::move(m_accumulated_value);

clear();

return accumulated_value;
}

/*!
\brief return the combined value
*/
RAJA_HOST_DEVICE RAJA_INLINE T get() { return m_accumulated_value; }

/*!
\brief combine a value into the reducer
*/
RAJA_HOST_DEVICE RAJA_INLINE void combine(T val)
{
// volatile used to prevent compiler optimizations that assume
// floating-point operations are associative
T y = val - m_accumulated_carry;
volatile T t = m_accumulated_value + y;
volatile T z = t - m_accumulated_value;
m_accumulated_carry = z - y;
m_accumulated_value = t;
}

/*!
\brief combine a value into the reducer
*/
RAJA_HOST_DEVICE RAJA_INLINE void operator+=(T val)
{
combine(std::move(val));
}

private:
T m_accumulated_value;
T m_accumulated_carry;
};

template<typename T, typename BinaryOp>
using HighAccuracyReduce =
std::conditional_t<RAJA::operators::is_fp_associative<T>::value,
Expand Down Expand Up @@ -291,6 +381,24 @@ binary_tree_reduce(Iter begin, Iter end, T init, BinaryOp op)
return reducer.get_and_clear();
}

/*!
\brief Combine into a single value using a kahan sum using O(N) operations
and O(1) memory
*/
template<typename Iter, typename T>
RAJA_HOST_DEVICE RAJA_INLINE T kahan_sum(Iter begin, Iter end, T init)
{
KahanSum<T> reducer(std::move(init));

for (; begin != end; ++begin)
{

reducer.combine(*begin);
}

return reducer.get_and_clear();
}

/*!
\brief reducer that uses a high accuracy implementation when round-off error
is a concern, or a faster algorithm with it is not a concern
Expand Down Expand Up @@ -358,6 +466,22 @@ RAJA_HOST_DEVICE RAJA_INLINE
std::move(op));
}

/*!
\brief Accumulate given range to a single value
using a left fold algorithm in O(N) operations and O(1) extra memory
see https://en.cppreference.com/w/cpp/algorithm/accumulate
*/
template<typename Container, typename T = detail::ContainerVal<Container>>
RAJA_HOST_DEVICE RAJA_INLINE concepts::
enable_if_t<T, type_traits::is_range<Container>, std::is_floating_point<T>>
kahan_sum(Container&& c, T init = T())
{
using std::begin;
using std::end;

return detail::kahan_sum(begin(c), end(c), std::move(init));
}

/*!
\brief Reduce given range to a single value
using an algorithm with high accuracy when floating point round off is a
Expand Down
2 changes: 1 addition & 1 deletion test/unit/algorithm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ if(RAJA_ENABLE_HIP)
endif()


set( UTIL_REDUCES BinaryTree Accumulate )
set( UTIL_REDUCES BinaryTree Accumulate Kahan )

RAJA_GENERATE_ALGORITHM_UTIL_TESTS( reduce Sequential Default "${UTIL_REDUCES}" )

Expand Down
Loading