Skip to content

Commit 2691103

Browse files
Anakin100100pitrou
andauthored
GH-48591: [C++] Remove some bit utils from bit_utils.h and replace them with C++ 20 built in functions (#49298)
### Rationale for this change Before C++ 20 there was no built in implementation for many common bit operations utilities included in the stdlib so they were implemented in bit_utils.h. Now that they are included in the stdlib they should be removed from bit_utils to decrease the amount of code that needs to be maintained as described in #48591 ### What changes are included in this PR? IsPowerOf2, PopCount, CountLeadingZeros, CountTrailingZeros, NumRequiredBits are removed from bit_utils and replaced with their equivalents from bit.h i.e. has_single_bit, popcount, countl_zero, countr_zero and bit_width. ### Are these changes tested? No new code is introduced and the stdlib implementation maintains parity with the replaced functions so no new unit tests are necessary. ### Are there any user-facing changes? No * GitHub Issue: #48591 Lead-authored-by: Paweł Biegun <biegunpawel900@gmail.com> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 59e0ba6 commit 2691103

25 files changed

Lines changed: 101 additions & 290 deletions

cpp/src/arrow/acero/aggregate_benchmark.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "benchmark/benchmark.h"
1919

20+
#include <bit>
2021
#include <cassert>
2122
#include <cmath>
2223
#include <iostream>
@@ -269,7 +270,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
269270
local.total += SUM_SHIFT(5);
270271
local.total += SUM_SHIFT(6);
271272
local.total += SUM_SHIFT(7);
272-
local.valid_count += bit_util::kBytePopcount[valid_byte];
273+
local.valid_count += std::popcount(valid_byte);
273274
} else {
274275
// No nulls
275276
local.total += values[i + 0] + values[i + 1] + values[i + 2] + values[i + 3] +

cpp/src/arrow/acero/bloom_filter_test.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <gmock/gmock-matchers.h>
1919

2020
#include <algorithm>
21+
#include <bit>
2122
#include <chrono>
2223
#include <condition_variable>
2324
#include <thread>
@@ -407,14 +408,14 @@ void TestBloomLarge(BloomFilterBuildStrategy strategy, int64_t num_build,
407408
uint64_t num_negatives = 0ULL;
408409
for (int iword = 0; iword < next_batch_size / 64; ++iword) {
409410
uint64_t word = reinterpret_cast<const uint64_t*>(result_bit_vector.data())[iword];
410-
num_negatives += ARROW_POPCOUNT64(~word);
411+
num_negatives += std::popcount(~word);
411412
}
412413
if (next_batch_size % 64 > 0) {
413414
uint64_t word = reinterpret_cast<const uint64_t*>(
414415
result_bit_vector.data())[next_batch_size / 64];
415416
uint64_t mask = (1ULL << (next_batch_size % 64)) - 1;
416417
word |= ~mask;
417-
num_negatives += ARROW_POPCOUNT64(~word);
418+
num_negatives += std::popcount(~word);
418419
}
419420
if (i < num_build) {
420421
num_negatives_build += num_negatives;

cpp/src/arrow/acero/swiss_join.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include <sys/stat.h>
1919
#include <algorithm> // std::upper_bound
20+
#include <bit>
2021
#include <cstdio>
2122
#include <cstdlib>
2223
#include <mutex>
@@ -666,7 +667,7 @@ void SwissTableMerge::MergePartition(SwissTable* target, const SwissTable* sourc
666667
// For each non-empty source slot...
667668
constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL;
668669
int num_full_slots = SwissTable::kSlotsPerBlock -
669-
static_cast<int>(ARROW_POPCOUNT64(block & kHighBitOfEachByte));
670+
static_cast<int>(std::popcount(block & kHighBitOfEachByte));
670671
for (int local_slot_id = 0; local_slot_id < num_full_slots; ++local_slot_id) {
671672
// Read group id and hash for this slot.
672673
//
@@ -722,7 +723,7 @@ inline bool SwissTableMerge::InsertNewGroup(SwissTable* target, uint32_t group_i
722723
return false;
723724
}
724725
int local_slot_id = SwissTable::kSlotsPerBlock -
725-
static_cast<int>(ARROW_POPCOUNT64(block & kHighBitOfEachByte));
726+
static_cast<int>(std::popcount(block & kHighBitOfEachByte));
726727
uint32_t global_slot_id = SwissTable::global_slot_id(block_id, local_slot_id);
727728
target->insert_into_empty_slot(global_slot_id, hash, group_id);
728729
return true;

cpp/src/arrow/compute/kernels/base_arithmetic_internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#pragma once
1919

20+
#include <bit>
2021
#include <limits>
2122
#include "arrow/compute/api_scalar.h"
2223
#include "arrow/compute/kernels/common_internal.h"
@@ -594,8 +595,7 @@ struct PowerChecked {
594595
}
595596
// left to right O(logn) power with overflow checks
596597
bool overflow = false;
597-
uint64_t bitmask =
598-
1ULL << (63 - bit_util::CountLeadingZeros(static_cast<uint64_t>(exp)));
598+
uint64_t bitmask = 1ULL << (63 - std::countl_zero(static_cast<uint64_t>(exp)));
599599
T pow = 1;
600600
while (bitmask) {
601601
overflow |= MultiplyWithOverflow(pow, pow, &pow);

cpp/src/arrow/compute/key_hash_internal.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <memory.h>
2121

2222
#include <algorithm>
23+
#include <bit>
2324
#include <cstdint>
2425

2526
#include "arrow/compute/light_array_internal.h"
@@ -357,7 +358,7 @@ void Hashing32::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_len
357358
void Hashing32::HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_keys,
358359
uint64_t key_length, const uint8_t* keys, uint32_t* hashes,
359360
uint32_t* temp_hashes_for_combine) {
360-
if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) {
361+
if (std::popcount(key_length) == 1 && key_length <= sizeof(uint64_t)) {
361362
HashInt(combine_hashes, num_keys, key_length, keys, hashes);
362363
return;
363364
}
@@ -809,7 +810,7 @@ void Hashing64::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_len
809810

810811
void Hashing64::HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t key_length,
811812
const uint8_t* keys, uint64_t* hashes) {
812-
if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) {
813+
if (std::popcount(key_length) == 1 && key_length <= sizeof(uint64_t)) {
813814
HashInt(combine_hashes, num_keys, key_length, keys, hashes);
814815
return;
815816
}

cpp/src/arrow/compute/key_map_internal.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "arrow/compute/key_map_internal.h"
1919

2020
#include <algorithm>
21+
#include <bit>
2122
#include <cstdint>
2223

2324
#include "arrow/util/bit_util.h"
@@ -27,7 +28,6 @@
2728

2829
namespace arrow {
2930

30-
using bit_util::CountLeadingZeros;
3131
using internal::CpuInfo;
3232

3333
namespace compute {
@@ -91,7 +91,7 @@ inline void SwissTable::search_block(uint64_t block, int stamp, int start_slot,
9191
// Now if we or with the highest bits of the block and scan zero bits in reverse, we get
9292
// 8x slot index that we were looking for. This formula works in all three cases a), b)
9393
// and c).
94-
*out_slot = static_cast<int>(CountLeadingZeros(matches | block_high_bits) >> 3);
94+
*out_slot = static_cast<int>(std::countl_zero(matches | block_high_bits) >> 3);
9595
}
9696

9797
template <typename T, bool use_selection>
@@ -204,8 +204,8 @@ void SwissTable::init_slot_ids_for_new_keys(uint32_t num_ids, const uint16_t* id
204204
int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
205205
if (log_blocks_ == 0) {
206206
uint64_t block = *reinterpret_cast<const uint64_t*>(blocks_->mutable_data());
207-
uint32_t empty_slot = static_cast<uint32_t>(
208-
kSlotsPerBlock - ARROW_POPCOUNT64(block & kHighBitOfEachByte));
207+
uint32_t empty_slot =
208+
static_cast<uint32_t>(kSlotsPerBlock - std::popcount(block & kHighBitOfEachByte));
209209
for (uint32_t i = 0; i < num_ids; ++i) {
210210
int id = ids[i];
211211
slot_ids[id] = empty_slot;
@@ -224,7 +224,7 @@ void SwissTable::init_slot_ids_for_new_keys(uint32_t num_ids, const uint16_t* id
224224
}
225225
iblock = (iblock + 1) & ((1 << log_blocks_) - 1);
226226
}
227-
uint32_t empty_slot = static_cast<int>(kSlotsPerBlock - ARROW_POPCOUNT64(block));
227+
uint32_t empty_slot = static_cast<int>(kSlotsPerBlock - std::popcount(block));
228228
slot_ids[id] = global_slot_id(iblock, empty_slot);
229229
}
230230
}
@@ -684,7 +684,7 @@ Status SwissTable::grow_double() {
684684
mutable_block_data(blocks_new->mutable_data(), 2 * i, block_size_after);
685685
uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
686686

687-
uint32_t full_slots = CountLeadingZeros(block & kHighBitOfEachByte) >> 3;
687+
uint32_t full_slots = std::countl_zero(block & kHighBitOfEachByte) >> 3;
688688
uint32_t full_slots_new[2];
689689
full_slots_new[0] = full_slots_new[1] = 0;
690690
util::SafeStore(double_block_base_new, kHighBitOfEachByte);
@@ -722,7 +722,7 @@ Status SwissTable::grow_double() {
722722
// How many full slots in this block
723723
const uint8_t* block_base = block_data(i, block_size_before);
724724
uint64_t block = util::SafeLoadAs<uint64_t>(block_base);
725-
uint32_t full_slots = CountLeadingZeros(block & kHighBitOfEachByte) >> 3;
725+
uint32_t full_slots = std::countl_zero(block & kHighBitOfEachByte) >> 3;
726726

727727
for (uint32_t j = 0; j < full_slots; ++j) {
728728
uint32_t slot_id = global_slot_id(i, j);
@@ -741,13 +741,13 @@ Status SwissTable::grow_double() {
741741
mutable_block_data(blocks_new->mutable_data(), block_id_new, block_size_after);
742742
uint64_t block_new = util::SafeLoadAs<uint64_t>(block_base_new);
743743
int full_slots_new =
744-
static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
744+
static_cast<int>(std::countl_zero(block_new & kHighBitOfEachByte) >> 3);
745745
while (full_slots_new == kSlotsPerBlock) {
746746
block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1);
747747
block_base_new = blocks_new->mutable_data() + block_id_new * block_size_after;
748748
block_new = util::SafeLoadAs<uint64_t>(block_base_new);
749749
full_slots_new =
750-
static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
750+
static_cast<int>(std::countl_zero(block_new & kHighBitOfEachByte) >> 3);
751751
}
752752

753753
hashes_new[block_id_new * kSlotsPerBlock + full_slots_new] = hash;

cpp/src/arrow/compute/row/row_internal.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
#include "arrow/compute/row/row_internal.h"
1919

20+
#include <bit>
21+
2022
#include "arrow/compute/util.h"
2123
#include "arrow/util/logging_internal.h"
2224

@@ -89,9 +91,9 @@ void RowTableMetadata::FromColumnMetadataVector(
8991
std::sort(
9092
column_order.begin(), column_order.end(), [&cols](uint32_t left, uint32_t right) {
9193
bool is_left_pow2 =
92-
!cols[left].is_fixed_length || ARROW_POPCOUNT64(cols[left].fixed_length) <= 1;
93-
bool is_right_pow2 = !cols[right].is_fixed_length ||
94-
ARROW_POPCOUNT64(cols[right].fixed_length) <= 1;
94+
!cols[left].is_fixed_length || std::popcount(cols[left].fixed_length) <= 1;
95+
bool is_right_pow2 =
96+
!cols[right].is_fixed_length || std::popcount(cols[right].fixed_length) <= 1;
9597
bool is_left_fixedlen = cols[left].is_fixed_length;
9698
bool is_right_fixedlen = cols[right].is_fixed_length;
9799
uint32_t width_left =
@@ -127,7 +129,7 @@ void RowTableMetadata::FromColumnMetadataVector(
127129
for (uint32_t i = 0; i < num_cols; ++i) {
128130
const KeyColumnMetadata& col = cols[column_order[i]];
129131
if (col.is_fixed_length && col.fixed_length != 0 &&
130-
ARROW_POPCOUNT64(col.fixed_length) != 1) {
132+
std::popcount(col.fixed_length) != 1) {
131133
offset_within_row += RowTableMetadata::padding_for_alignment_within_row(
132134
offset_within_row, string_alignment, col);
133135
}

cpp/src/arrow/compute/row/row_internal.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717
#pragma once
1818

19+
#include <bit>
1920
#include <cstdint>
2021
#include <vector>
2122

@@ -85,7 +86,7 @@ struct ARROW_COMPUTE_EXPORT RowTableMetadata {
8586
/// Alignment must be a power of 2.
8687
static inline uint32_t padding_for_alignment_within_row(uint32_t offset,
8788
int required_alignment) {
88-
ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
89+
ARROW_DCHECK(std::popcount(static_cast<uint64_t>(required_alignment)) == 1);
8990
return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
9091
(required_alignment - 1));
9192
}
@@ -94,8 +95,7 @@ struct ARROW_COMPUTE_EXPORT RowTableMetadata {
9495
/// choosing required alignment based on the data type of that column.
9596
static inline uint32_t padding_for_alignment_within_row(
9697
uint32_t offset, int string_alignment, const KeyColumnMetadata& col_metadata) {
97-
if (!col_metadata.is_fixed_length ||
98-
ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
98+
if (!col_metadata.is_fixed_length || std::popcount(col_metadata.fixed_length) <= 1) {
9999
return 0;
100100
} else {
101101
return padding_for_alignment_within_row(offset, string_alignment);
@@ -106,7 +106,7 @@ struct ARROW_COMPUTE_EXPORT RowTableMetadata {
106106
/// Alignment must be a power of 2.
107107
static inline offset_type padding_for_alignment_row(offset_type row_offset,
108108
int required_alignment) {
109-
ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
109+
ARROW_DCHECK(std::popcount(static_cast<uint64_t>(required_alignment)) == 1);
110110
return (-row_offset) & (required_alignment - 1);
111111
}
112112

cpp/src/arrow/compute/util.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@
1717

1818
#include "arrow/compute/util.h"
1919

20+
#include <bit>
21+
2022
#include "arrow/util/logging.h"
2123
#include "arrow/util/ubsan.h"
2224

2325
namespace arrow {
2426

25-
using bit_util::CountTrailingZeros;
2627
using internal::CpuInfo;
2728

2829
namespace util {
@@ -65,7 +66,7 @@ inline void bits_to_indexes_helper(uint64_t word, uint16_t base_index, int* num_
6566
uint16_t* indexes) {
6667
int n = *num_indexes;
6768
while (word) {
68-
indexes[n++] = base_index + static_cast<uint16_t>(CountTrailingZeros(word));
69+
indexes[n++] = base_index + static_cast<uint16_t>(std::countr_zero(word));
6970
word &= word - 1;
7071
}
7172
*num_indexes = n;
@@ -75,7 +76,7 @@ inline void bits_filter_indexes_helper(uint64_t word, const uint16_t* input_inde
7576
int* num_indexes, uint16_t* indexes) {
7677
int n = *num_indexes;
7778
while (word) {
78-
indexes[n++] = input_indexes[CountTrailingZeros(word)];
79+
indexes[n++] = input_indexes[std::countr_zero(word)];
7980
word &= word - 1;
8081
}
8182
*num_indexes = n;

cpp/src/arrow/compute/util_avx2.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
#include <bit>
1819
#include <cstring>
1920

2021
#include "arrow/compute/util.h"
@@ -54,7 +55,7 @@ void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, int* num_
5455
_pext_u64(mask, _pdep_u64(word, kEachByteIs1) * 0xff) + base;
5556
*reinterpret_cast<uint64_t*>(byte_indexes + num_indexes_loop) = byte_indexes_next;
5657
base += incr;
57-
num_indexes_loop += static_cast<int>(arrow::bit_util::PopCount(word & 0xff));
58+
num_indexes_loop += static_cast<int>(std::popcount(word & 0xff));
5859
word >>= 8;
5960
}
6061
// Unpack indexes to 16-bits and either add the base of i * 64 or shuffle input
@@ -144,7 +145,7 @@ void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
144145
kByteSequence_0_8_1_9_2_10_3_11,
145146
kByteSequence_4_12_5_13_6_14_7_15));
146147
_mm256_storeu_si256((__m256i*)(indexes + num_indexes), output);
147-
num_indexes += static_cast<int>(arrow::bit_util::PopCount(word & 0xffff));
148+
num_indexes += static_cast<int>(std::popcount(word & 0xffff));
148149
word >>= 16;
149150
++loop_id;
150151
}

0 commit comments

Comments
 (0)