Skip to content

Commit

Permalink
enable simd split (#1956)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #1956

using new implementation

Reviewed By: dmm-fb

Differential Revision: D44088386

fbshipit-source-id: 2f67ea6ef2fde0bf5ae84c35c918f23ad58f55f5
  • Loading branch information
DenisYaroshevskiy authored and facebook-github-bot committed Mar 17, 2023
1 parent e288b43 commit 304ee74
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 64 deletions.
23 changes: 7 additions & 16 deletions folly/String-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,25 +389,16 @@ bool splitFixed(
//////////////////////////////////////////////////////////////////////

template <class Delim, class String, class OutputType>
void split(
std::enable_if_t<
(!detail::IsSimdSupportedDelim<Delim>::value ||
!detail::HasSimdSplitCompatibleValueType<OutputType>::value) &&
detail::IsSplitSupportedContainer<OutputType>::value>
split(
const Delim& delimiter,
const String& input,
std::vector<OutputType>& out,
OutputType& out,
bool ignoreEmpty) {
detail::internalSplit<OutputType>(
detail::prepareDelim(delimiter),
StringPiece(input),
std::back_inserter(out),
ignoreEmpty);
}

template <class Delim, class String, class OutputType>
void split(
const Delim& delimiter,
const String& input,
fbvector<OutputType, std::allocator<OutputType>>& out,
bool ignoreEmpty) {
detail::internalSplit<OutputType>(
detail::internalSplit<typename OutputType::value_type>(
detail::prepareDelim(delimiter),
StringPiece(input),
std::back_inserter(out),
Expand Down
59 changes: 51 additions & 8 deletions folly/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <folly/ScopeGuard.h>
#include <folly/Traits.h>
#include <folly/Unit.h>
#include <folly/detail/SplitStringSimd.h>

namespace folly {

Expand Down Expand Up @@ -424,6 +425,41 @@ std::string hexDump(const void* ptr, size_t size);
*/
std::string errnoStr(int err);

template <typename T, std::size_t M, typename P>
class small_vector;

template <typename T, typename Allocator>
class fbvector;

namespace detail {

// We don't use SimdSplitByCharIsDefinedFor because
// we would like the user to get an error where they could use SIMD
// implementation but didn't use quite correct parameters.
template <typename>
struct IsSplitSupportedContainer : std::false_type {};

template <typename T>
using HasSimdSplitCompatibleValueType =
std::is_convertible<typename T::value_type, folly::StringPiece>;

template <typename T, typename A>
struct IsSplitSupportedContainer<std::vector<T, A>> : std::true_type {};

template <typename T, typename A>
struct IsSplitSupportedContainer<fbvector<T, A>> : std::true_type {};

template <typename T, std::size_t M, typename P>
struct IsSplitSupportedContainer<small_vector<T, M, P>> : std::true_type {};

template <typename>
struct IsSimdSupportedDelim : std::false_type {};

template <>
struct IsSimdSupportedDelim<char> : std::true_type {};

} // namespace detail

/**
* Split a string into a list of tokens by delimiter.
*
Expand Down Expand Up @@ -454,20 +490,27 @@ std::string errnoStr(int err);
*/

template <class Delim, class String, class OutputType>
void split(
FOLLY_ALWAYS_INLINE std::enable_if_t<
detail::IsSimdSupportedDelim<Delim>::value &&
detail::HasSimdSplitCompatibleValueType<OutputType>::value &&
detail::IsSplitSupportedContainer<OutputType>::value>
split(
const Delim& delimiter,
const String& input,
std::vector<OutputType>& out,
const bool ignoreEmpty = false);

template <class T, class Allocator>
class fbvector;
OutputType& out,
const bool ignoreEmpty = false) {
return detail::simdSplitByChar(delimiter, input, out, ignoreEmpty);
}

template <class Delim, class String, class OutputType>
void split(
std::enable_if_t<
(!detail::IsSimdSupportedDelim<Delim>::value ||
!detail::HasSimdSplitCompatibleValueType<OutputType>::value) &&
detail::IsSplitSupportedContainer<OutputType>::value>
split(
const Delim& delimiter,
const String& input,
folly::fbvector<OutputType, std::allocator<OutputType>>& out,
OutputType& out,
const bool ignoreEmpty = false);

/**
Expand Down
28 changes: 19 additions & 9 deletions folly/detail/SplitStringSimd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <folly/detail/SplitStringSimd.h>
#include <folly/detail/SplitStringSimdImpl.h>

#include <folly/FBString.h>
#include <folly/FBVector.h>
#include <folly/small_vector.h>

Expand All @@ -37,6 +38,20 @@ void SimdSplitByCharImpl<Container>::dropEmpty(
sep, what, res);
}

template <typename Container>
void SimdSplitByCharImplToStrings<Container>::keepEmpty(
char sep, folly::StringPiece what, Container& res) {
PlatformSimdSplitByChar<StringSplitCurrentPlatform, /*ignoreEmpty*/ false>{}(
sep, what, res);
}

template <typename Container>
void SimdSplitByCharImplToStrings<Container>::dropEmpty(
char sep, folly::StringPiece what, Container& res) {
PlatformSimdSplitByChar<StringSplitCurrentPlatform, /*ignoreEmpty*/ true>{}(
sep, what, res);
}

// clang-format off
#define FOLLY_DETAIL_DEFINE_ALL_SIMD_SPLIT_OVERLOADS(...) \
template struct SimdSplitByCharImpl<std::vector<__VA_ARGS__>>; \
Expand All @@ -59,14 +74,9 @@ FOLLY_DETAIL_DEFINE_ALL_SIMD_SPLIT_OVERLOADS(std::string_view)

#undef FOLLY_DETAIL_DEFINE_ALL_SIMD_SPLIT_OVERLOADS

void simdSplitByCharVecOfStringsKeepEmpty(
char sep, folly::StringPiece what, std::vector<std::string>& res) {
PlatformSimdSplitByChar<StringSplitCurrentPlatform, false>{}(sep, what, res);
}
void simdSplitByCharVecOfStringsDropEmpty(
char sep, folly::StringPiece what, std::vector<std::string>& res) {
PlatformSimdSplitByChar<StringSplitCurrentPlatform, true>{}(sep, what, res);
}

template struct SimdSplitByCharImplToStrings<std::vector<std::string>>;
template struct SimdSplitByCharImplToStrings<std::vector<fbstring>>;
template struct SimdSplitByCharImplToStrings<fbvector<std::string>>;
template struct SimdSplitByCharImplToStrings<fbvector<fbstring>>;
} // namespace detail
} // namespace folly
66 changes: 49 additions & 17 deletions folly/detail/SplitStringSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,33 @@ class small_vector;
template <typename T, typename Allocator>
class fbvector;

template <class Char>
class fbstring_core;

template <typename E, typename T, typename A, typename Storage>
class basic_fbstring;

namespace detail {

using PredeclareFbString = basic_fbstring<
char,
std::char_traits<char>,
std::allocator<char>,
fbstring_core<char>>;

template <typename Container>
struct SimdSplitByCharImpl {
static void keepEmpty(char sep, folly::StringPiece what, Container& res);
static void dropEmpty(char sep, folly::StringPiece what, Container& res);
};

// Different name to easier identify in the stack potential performance issues
template <typename Container>
struct SimdSplitByCharImplToStrings {
static void keepEmpty(char sep, folly::StringPiece what, Container& res);
static void dropEmpty(char sep, folly::StringPiece what, Container& res);
};

template <typename T>
constexpr bool isSimdSplitSupportedStringViewType =
std::is_same<T, folly::StringPiece>::value
Expand All @@ -44,14 +63,20 @@ constexpr bool isSimdSplitSupportedStringViewType =
#endif
;

template <typename T>
constexpr bool isSimdSplitSupportedStringType =
std::is_same<T, PredeclareFbString>::value ||
std::is_same<T, std::string>::value;

template <typename>
struct SimdSplitByCharIsDefinedFor {
static constexpr bool value = false;
};

template <typename T>
struct SimdSplitByCharIsDefinedFor<std::vector<T>> {
static constexpr bool value = isSimdSplitSupportedStringViewType<T>;
static constexpr bool value = isSimdSplitSupportedStringViewType<T> ||
isSimdSplitSupportedStringType<T>;
};

template <typename T, typename A>
Expand All @@ -65,36 +90,35 @@ struct SimdSplitByCharIsDefinedFor<folly::small_vector<T, M, void>> {
};

template <typename Container>
void simdSplitByChar(
std::enable_if_t<
isSimdSplitSupportedStringViewType<typename Container::value_type>>
simdSplitByChar(
char sep, folly::StringPiece what, Container& res, bool ignoreEmpty) {
static_assert(
SimdSplitByCharIsDefinedFor<Container>::value,
"simd split by char is supported only for vector/fbvector/small_vector, with small size <= 8."
" The resulting string type has to string_view or StringPiece."
" There is also a special case of std::vector<std::string> for ease of migration");
" There is also a special case of (fb)vector<(fb)string> for legacy compatibility");
if (ignoreEmpty) {
SimdSplitByCharImpl<Container>::dropEmpty(sep, what, res);
} else {
SimdSplitByCharImpl<Container>::keepEmpty(sep, what, res);
}
}

// Using vector of strings instead of string views is a bad idea in general.
// We use this to have a separate name in the stack.
void simdSplitByCharVecOfStringsKeepEmpty(
char sep, folly::StringPiece what, std::vector<std::string>& res);
void simdSplitByCharVecOfStringsDropEmpty(
char sep, folly::StringPiece what, std::vector<std::string>& res);

inline void simdSplitByChar(
char sep,
folly::StringPiece what,
std::vector<std::string>& res,
bool ignoreEmpty) {
template <typename Container>
std::enable_if_t<isSimdSplitSupportedStringType<typename Container::value_type>>
simdSplitByChar(
char sep, folly::StringPiece what, Container& res, bool ignoreEmpty) {
static_assert(
SimdSplitByCharIsDefinedFor<Container>::value,
"simd split by char is supported only for vector/fbvector/small_vector, with small size <= 8."
" The resulting string type has to string_view or StringPiece."
" There is also a special case of (fb)vector<(fb)string> for legacy compatibility");
if (ignoreEmpty) {
simdSplitByCharVecOfStringsDropEmpty(sep, what, res);
SimdSplitByCharImplToStrings<Container>::dropEmpty(sep, what, res);
} else {
simdSplitByCharVecOfStringsKeepEmpty(sep, what, res);
SimdSplitByCharImplToStrings<Container>::keepEmpty(sep, what, res);
}
}

Expand All @@ -118,6 +142,14 @@ FOLLY_DETAIL_DECLARE_ALL_SIMD_SPLIT_OVERLOADS(folly::StringPiece)
FOLLY_DETAIL_DECLARE_ALL_SIMD_SPLIT_OVERLOADS(std::string_view)
#endif

extern template struct SimdSplitByCharImplToStrings<std::vector<std::string>>;
extern template struct SimdSplitByCharImplToStrings<
std::vector<PredeclareFbString>>;
extern template struct SimdSplitByCharImplToStrings<
fbvector<std::string, std::allocator<std::string>>>;
extern template struct SimdSplitByCharImplToStrings<
fbvector<PredeclareFbString, std::allocator<PredeclareFbString>>>;

#undef FOLLY_DETAIL_DECLARE_ALL_SIMD_SPLIT_OVERLOADS

} // namespace detail
Expand Down
43 changes: 29 additions & 14 deletions folly/detail/test/SplitStringSimdTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <folly/detail/SplitStringSimd.h>
#include <folly/detail/SplitStringSimdImpl.h>

#include <folly/FBString.h>
#include <folly/FBVector.h>
#include <folly/Range.h>
#include <folly/portability/GTest.h>
Expand Down Expand Up @@ -110,6 +111,29 @@ void testAllContainersOfSVs(
"");
}

template <bool ignoreEmpty, typename Container>
void testContainersOfStrings(
folly::StringPiece s, const std::vector<folly::StringPiece>& expected) {
Container actual;
simdSplitByChar(',', s, actual, ignoreEmpty);

ASSERT_EQ(expected.size(), actual.size()) << s;

for (std::size_t i = 0; i != expected.size(); ++i) {
ASSERT_EQ(expected[i], actual[i]) << s << " : " << i;
}
}

template <bool ie>
void testAllContainersOfStrings(
folly::StringPiece s, const std::vector<folly::StringPiece>& expected) {
testContainersOfStrings<ie, std::vector<std::string>>(s, expected);
testContainersOfStrings<ie, folly::fbvector<std::string>>(s, expected);

testContainersOfStrings<ie, std::vector<folly::fbstring>>(s, expected);
testContainersOfStrings<ie, folly::fbvector<folly::fbstring>>(s, expected);
}

template <bool ignoreEmpty>
void runTestStringSplitOneType(folly::StringPiece s) {
std::vector<folly::StringPiece> expected;
Expand Down Expand Up @@ -147,17 +171,7 @@ void runTestStringSplitOneType(folly::StringPiece s) {
}

testAllContainersOfSVs<ignoreEmpty>(s, expected);

{
std::vector<std::string> actual;
simdSplitByChar(',', s, actual, ignoreEmpty);

ASSERT_EQ(expected.size(), actual.size()) << s;

for (std::size_t i = 0; i != expected.size(); ++i) {
ASSERT_EQ(expected[i], actual[i]) << s << " : " << i;
}
}
testAllContainersOfStrings<ignoreEmpty>(s, expected);
}

void runTestStringSplit(folly::StringPiece s) {
Expand Down Expand Up @@ -196,10 +210,11 @@ TEST(SplitStringSimd, ByChar) {

// special case: triggered shift right by 32 on uint32
{
constexpr std::string_view kTestData = "ong_history_by_pagetype_convr:0,";
static_assert(kTestData.size() == 32);

alignas(32) std::array<char, 32> buf;
buf.fill(0);
std::ranges::copy(
std::string_view("ong_history_by_pagetype_convr:0,"), buf.data());
std::copy(kTestData.begin(), kTestData.end(), buf.begin());
runTestStringSplit({buf.data(), buf.size()});
}
}
Expand Down

0 comments on commit 304ee74

Please sign in to comment.