Skip to content

Commit d1bf856

Browse files
zanmato1984pitrou
andauthored
GH-45344: [C++][Testing] Generic StepGenerator (#45345)
### Rationale for this change #45344 ### What changes are included in this PR? Make the `StepGenerator` generic. ### Are these changes tested? UT included. ### Are there any user-facing changes? None. * GitHub Issue: #45344 Lead-authored-by: Rossi Sun <zanmato1984@gmail.com> Co-authored-by: Antoine Pitrou <pitrou@free.fr> Signed-off-by: Rossi Sun <zanmato1984@gmail.com>
1 parent d3c4676 commit d1bf856

File tree

6 files changed

+119
-49
lines changed

6 files changed

+119
-49
lines changed

cpp/src/arrow/acero/order_by_node_test.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ static constexpr int kRowsPerBatch = 4;
4242
static constexpr int kNumBatches = 32;
4343

4444
std::shared_ptr<Table> TestTable() {
45-
return gen::Gen({{"up", gen::Step()},
46-
{"down", gen::Step(/*start=*/0, /*step=*/-1, /*signed_int=*/true)}})
45+
return gen::Gen({{"up", gen::Step()}, {"down", gen::Step(/*start=*/0, /*step=*/-1)}})
4746
->FailOnError()
4847
->Table(kRowsPerBatch, kNumBatches);
4948
}

cpp/src/arrow/acero/sorted_merge_node_test.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ namespace arrow::acero {
3636

3737
std::shared_ptr<Table> TestTable(int start, int step, int rows_per_batch,
3838
int num_batches) {
39-
return gen::Gen({{"timestamp", gen::Step(start, step, /*signed_int=*/true)},
40-
{"str", gen::Random(utf8())}})
39+
return gen::Gen({{"timestamp", gen::Step(start, step)}, {"str", gen::Random(utf8())}})
4140
->FailOnError()
4241
->Table(rows_per_batch, num_batches);
4342
}

cpp/src/arrow/testing/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
arrow_install_all_headers("arrow/testing")
1919

2020
if(ARROW_BUILD_TESTS)
21-
add_arrow_test(random_test)
21+
add_arrow_test(generator_test)
2222
add_arrow_test(gtest_util_test)
23+
add_arrow_test(random_test)
2324

2425
if(ARROW_FILESYSTEM)
2526
add_library(arrow_filesystem_example MODULE examplefs.cc)

cpp/src/arrow/testing/generator.cc

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626

2727
#include "arrow/array.h"
2828
#include "arrow/buffer.h"
29-
#include "arrow/builder.h"
3029
#include "arrow/compute/exec.h"
3130
#include "arrow/datum.h"
3231
#include "arrow/record_batch.h"
@@ -220,42 +219,6 @@ class ConstantGenerator : public ArrayGenerator {
220219
std::shared_ptr<Scalar> value_;
221220
};
222221

223-
class StepGenerator : public ArrayGenerator {
224-
public:
225-
StepGenerator(uint32_t start, uint32_t step, bool signed_int)
226-
: start_(start), step_(step), signed_int_(signed_int) {}
227-
228-
template <typename BuilderType, typename CType>
229-
Result<std::shared_ptr<Array>> DoGenerate(int64_t num_rows) {
230-
BuilderType builder;
231-
ARROW_RETURN_NOT_OK(builder.Reserve(num_rows));
232-
CType val = start_;
233-
for (int64_t i = 0; i < num_rows; i++) {
234-
builder.UnsafeAppend(val);
235-
val += step_;
236-
}
237-
start_ = val;
238-
return builder.Finish();
239-
}
240-
241-
Result<std::shared_ptr<Array>> Generate(int64_t num_rows) override {
242-
if (signed_int_) {
243-
return DoGenerate<Int32Builder, int32_t>(num_rows);
244-
} else {
245-
return DoGenerate<UInt32Builder, uint32_t>(num_rows);
246-
}
247-
}
248-
249-
std::shared_ptr<DataType> type() const override {
250-
return signed_int_ ? int32() : uint32();
251-
}
252-
253-
private:
254-
uint32_t start_;
255-
uint32_t step_;
256-
bool signed_int_;
257-
};
258-
259222
static constexpr random::SeedType kTestSeed = 42;
260223

261224
class RandomGenerator : public ArrayGenerator {
@@ -405,10 +368,6 @@ std::shared_ptr<ArrayGenerator> Constant(std::shared_ptr<Scalar> value) {
405368
return std::make_shared<ConstantGenerator>(std::move(value));
406369
}
407370

408-
std::shared_ptr<ArrayGenerator> Step(uint32_t start, uint32_t step, bool signed_int) {
409-
return std::make_shared<StepGenerator>(start, step, signed_int);
410-
}
411-
412371
std::shared_ptr<ArrayGenerator> Random(std::shared_ptr<DataType> type) {
413372
return std::make_shared<RandomGenerator>(std::move(type));
414373
}

cpp/src/arrow/testing/generator.h

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <vector>
2424

2525
#include "arrow/array/array_base.h"
26+
#include "arrow/array/util.h"
27+
#include "arrow/buffer_builder.h"
2628
#include "arrow/compute/type_fwd.h"
2729
#include "arrow/testing/gtest_util.h"
2830
#include "arrow/testing/visibility.h"
@@ -301,12 +303,48 @@ ARROW_TESTING_EXPORT std::shared_ptr<DataGenerator> Gen(
301303
/// make a generator that returns a constant value
302304
ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Constant(
303305
std::shared_ptr<Scalar> value);
306+
304307
/// make a generator that returns an incrementing value
305308
///
306309
/// Note: overflow is not prevented standard unsigned integer overflow applies
307-
ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Step(uint32_t start = 0,
308-
uint32_t step = 1,
309-
bool signed_int = false);
310+
template <typename T = uint32_t>
311+
std::shared_ptr<ArrayGenerator> Step(T start = 0, T step = 1) {
312+
class StepGenerator : public ArrayGenerator {
313+
public:
314+
// Use [[maybe_unused]] to avoid a compiler warning in Clang versions before 15 that
315+
// incorrectly reports 'unused type alias'.
316+
using ArrowType [[maybe_unused]] = typename CTypeTraits<T>::ArrowType;
317+
static_assert(is_number_type<ArrowType>::value,
318+
"Step generator only supports numeric types");
319+
320+
StepGenerator(T start, T step) : start_(start), step_(step) {}
321+
322+
Result<std::shared_ptr<Array>> Generate(int64_t num_rows) override {
323+
TypedBufferBuilder<T> builder;
324+
ARROW_RETURN_NOT_OK(builder.Reserve(num_rows));
325+
T val = start_;
326+
for (int64_t i = 0; i < num_rows; i++) {
327+
builder.UnsafeAppend(val);
328+
val += step_;
329+
}
330+
start_ = val;
331+
ARROW_ASSIGN_OR_RAISE(auto buf, builder.Finish());
332+
return MakeArray(ArrayData::Make(TypeTraits<ArrowType>::type_singleton(), num_rows,
333+
{NULLPTR, std::move(buf)}, /*null_count=*/0));
334+
}
335+
336+
std::shared_ptr<DataType> type() const override {
337+
return TypeTraits<ArrowType>::type_singleton();
338+
}
339+
340+
private:
341+
T start_;
342+
T step_;
343+
};
344+
345+
return std::make_shared<StepGenerator>(start, step);
346+
}
347+
310348
/// make a generator that returns a random value
311349
ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Random(
312350
std::shared_ptr<DataType> type);
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <gtest/gtest.h>
19+
20+
#include "arrow/testing/generator.h"
21+
22+
namespace arrow::gen {
23+
24+
template <typename CType>
25+
void CheckStep(const Array& result, CType start, CType step, int64_t length) {
26+
using ArrowType = typename CTypeTraits<CType>::ArrowType;
27+
28+
ASSERT_OK(result.ValidateFull());
29+
ASSERT_EQ(result.type_id(), TypeTraits<ArrowType>::type_singleton()->id());
30+
ASSERT_EQ(result.length(), length);
31+
ASSERT_EQ(result.null_bitmap(), nullptr);
32+
auto data = result.data()->GetValues<CType>(1);
33+
CType current = start;
34+
for (int64_t i = 0; i < length; ++i) {
35+
ASSERT_EQ(data[i], current);
36+
current += step;
37+
}
38+
}
39+
40+
TEST(StepTest, Default) {
41+
for (auto length : {0, 1, 1024}) {
42+
ARROW_SCOPED_TRACE("length=" + std::to_string(length));
43+
ASSERT_OK_AND_ASSIGN(auto array, Step()->Generate(length));
44+
CheckStep<uint32_t>(*array, 0, 1, length);
45+
}
46+
}
47+
48+
using NumericCTypes = ::testing::Types<int8_t, uint8_t, int16_t, uint16_t, int32_t,
49+
uint32_t, int64_t, uint64_t, float, double>;
50+
51+
template <typename CType>
52+
class TypedStepTest : public ::testing::Test {};
53+
54+
TYPED_TEST_SUITE(TypedStepTest, NumericCTypes);
55+
56+
TYPED_TEST(TypedStepTest, Basic) {
57+
for (auto length : {0, 1, 1024}) {
58+
ARROW_SCOPED_TRACE("length=" + std::to_string(length));
59+
for (TypeParam start :
60+
{std::numeric_limits<TypeParam>::min(), static_cast<TypeParam>(0)}) {
61+
ARROW_SCOPED_TRACE("start=" + std::to_string(start));
62+
for (TypeParam step :
63+
{static_cast<TypeParam>(0), std::numeric_limits<TypeParam>::epsilon(),
64+
static_cast<TypeParam>(std::numeric_limits<TypeParam>::max() /
65+
(length + 1))}) {
66+
ARROW_SCOPED_TRACE("step=" + std::to_string(step));
67+
ASSERT_OK_AND_ASSIGN(auto array, Step(start, step)->Generate(length));
68+
CheckStep(*array, start, step, length);
69+
}
70+
}
71+
}
72+
}
73+
74+
} // namespace arrow::gen

0 commit comments

Comments
 (0)