Skip to content

Commit aa1cabe

Browse files
GH-49441: [C++][Gandiva] Add rand_integer function (#49442)
### Rationale for this change Add `rand_integer` function to Gandiva to generate random integers, complementing the existing `rand`/`random` functions that generate random doubles. This provides native integer random number generation and offers a more efficient alternative to `CAST(rand() * range AS INT)`. ### What changes are included in this PR? - Add `RandomIntegerGeneratorHolder` class following the existing `RandomGeneratorHolder` pattern - Implement three function signatures: - `rand_integer()` → int32 in range [INT32_MIN, INT32_MAX] - `rand_integer(int32 range)` → int32 in range [0, range-1] - `rand_integer(int32 min, int32 max)` → int32 in range [min, max] inclusive - Add parameter validation (range > 0, min <= max) at expression compilation time - Add 8 unit tests covering all signatures and edge cases - Use `std::uniform_int_distribution<int32_t>` with Mersenne Twister engine ### Are these changes tested? Yes, added 8 unit tests in `random_generator_holder_test.cc`: - `NoParams` - verifies full int32 range - `WithRange` - verifies [0, range-1] bounds - `WithMinMax` - verifies [min, max] inclusive bounds - `WithNegativeMinMax` - verifies negative range handling - `InvalidRangeZero` - verifies range=0 is rejected - `InvalidRangeNegative` - verifies negative range is rejected - `InvalidMinGreaterThanMax` - verifies min > max is rejected - `NullRangeDefaultsToOne` - verifies null parameter handling ### Are there any user-facing changes? Yes, this adds a new `rand_integer` function to Gandiva with three signatures as described above. * GitHub Issue: #49441
1 parent 2ce4e66 commit aa1cabe

File tree

7 files changed

+457
-1
lines changed

7 files changed

+457
-1
lines changed

cpp/src/gandiva/function_holder_maker_registry.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ FunctionHolderMakerRegistry::MakerMap FunctionHolderMakerRegistry::DefaultHolder
6262
{"to_date", HolderMaker<ToDateHolder>},
6363
{"random", HolderMaker<RandomGeneratorHolder>},
6464
{"rand", HolderMaker<RandomGeneratorHolder>},
65+
{"rand_integer", HolderMaker<RandomIntegerGeneratorHolder>},
6566
{"regexp_replace", HolderMaker<ReplaceHolder>},
6667
{"regexp_extract", HolderMaker<ExtractHolder>},
6768
{"castintervalday", HolderMaker<IntervalDaysHolder>},

cpp/src/gandiva/function_registry_math_ops.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ std::vector<NativeFunction> GetMathOpsFunctionRegistry() {
103103
"gdv_fn_random", NativeFunction::kNeedsFunctionHolder),
104104
NativeFunction("random", {"rand"}, DataTypeVector{int32()}, float64(),
105105
kResultNullNever, "gdv_fn_random_with_seed",
106+
NativeFunction::kNeedsFunctionHolder),
107+
NativeFunction("rand_integer", {}, DataTypeVector{}, int32(), kResultNullNever,
108+
"gdv_fn_rand_integer", NativeFunction::kNeedsFunctionHolder),
109+
NativeFunction("rand_integer", {}, DataTypeVector{int32()}, int32(),
110+
kResultNullNever, "gdv_fn_rand_integer_with_range",
111+
NativeFunction::kNeedsFunctionHolder),
112+
NativeFunction("rand_integer", {}, DataTypeVector{int32(), int32()}, int32(),
113+
kResultNullNever, "gdv_fn_rand_integer_with_min_max",
106114
NativeFunction::kNeedsFunctionHolder)};
107115

108116
return math_fn_registry_;

cpp/src/gandiva/gdv_function_stubs.cc

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,33 @@ double gdv_fn_random(int64_t ptr) {
6767
return (*holder)();
6868
}
6969

70-
double gdv_fn_random_with_seed(int64_t ptr, int32_t seed, bool seed_validity) {
70+
double gdv_fn_random_with_seed(int64_t ptr, int32_t /*seed*/, bool /*seed_validity*/) {
7171
gandiva::RandomGeneratorHolder* holder =
7272
reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
7373
return (*holder)();
7474
}
7575

76+
int32_t gdv_fn_rand_integer(int64_t ptr) {
77+
gandiva::RandomIntegerGeneratorHolder* holder =
78+
reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
79+
return (*holder)();
80+
}
81+
82+
int32_t gdv_fn_rand_integer_with_range(int64_t ptr, int32_t /*range*/,
83+
bool /*range_validity*/) {
84+
gandiva::RandomIntegerGeneratorHolder* holder =
85+
reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
86+
return (*holder)();
87+
}
88+
89+
int32_t gdv_fn_rand_integer_with_min_max(int64_t ptr, int32_t /*min*/,
90+
bool /*min_validity*/, int32_t /*max*/,
91+
bool /*max_validity*/) {
92+
gandiva::RandomIntegerGeneratorHolder* holder =
93+
reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
94+
return (*holder)();
95+
}
96+
7697
bool gdv_fn_in_expr_lookup_int32(int64_t ptr, int32_t value, bool in_validity) {
7798
if (!in_validity) {
7899
return false;
@@ -864,6 +885,22 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const {
864885
engine->AddGlobalMappingForFunc("gdv_fn_random_with_seed", types->double_type(), args,
865886
reinterpret_cast<void*>(gdv_fn_random_with_seed));
866887

888+
// gdv_fn_rand_integer
889+
args = {types->i64_type()};
890+
engine->AddGlobalMappingForFunc("gdv_fn_rand_integer", types->i32_type(), args,
891+
reinterpret_cast<void*>(gdv_fn_rand_integer));
892+
893+
args = {types->i64_type(), types->i32_type(), types->i1_type()};
894+
engine->AddGlobalMappingForFunc(
895+
"gdv_fn_rand_integer_with_range", types->i32_type(), args,
896+
reinterpret_cast<void*>(gdv_fn_rand_integer_with_range));
897+
898+
args = {types->i64_type(), types->i32_type(), types->i1_type(), types->i32_type(),
899+
types->i1_type()};
900+
engine->AddGlobalMappingForFunc(
901+
"gdv_fn_rand_integer_with_min_max", types->i32_type(), args,
902+
reinterpret_cast<void*>(gdv_fn_rand_integer_with_min_max));
903+
867904
// gdv_fn_dec_from_string
868905
args = {
869906
types->i64_type(), // context

cpp/src/gandiva/random_generator_holder.cc

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
// under the License.
1717

1818
#include "gandiva/random_generator_holder.h"
19+
20+
#include <limits>
21+
1922
#include "gandiva/node.h"
2023

2124
namespace gandiva {
@@ -40,4 +43,62 @@ Result<std::shared_ptr<RandomGeneratorHolder>> RandomGeneratorHolder::Make(
4043
return std::shared_ptr<RandomGeneratorHolder>(new RandomGeneratorHolder(
4144
literal->is_null() ? 0 : std::get<int32_t>(literal->holder())));
4245
}
46+
47+
Result<std::shared_ptr<RandomIntegerGeneratorHolder>> RandomIntegerGeneratorHolder::Make(
48+
const FunctionNode& node) {
49+
ARROW_RETURN_IF(
50+
node.children().size() > 2,
51+
Status::Invalid("'rand_integer' function requires at most two parameters"));
52+
53+
// No params: full int32 range [INT32_MIN, INT32_MAX]
54+
if (node.children().empty()) {
55+
return std::shared_ptr<RandomIntegerGeneratorHolder>(
56+
new RandomIntegerGeneratorHolder());
57+
}
58+
59+
// One param: range [0, range - 1]
60+
if (node.children().size() == 1) {
61+
auto literal = dynamic_cast<LiteralNode*>(node.children().at(0).get());
62+
ARROW_RETURN_IF(
63+
literal == nullptr,
64+
Status::Invalid("'rand_integer' function requires a literal as parameter"));
65+
ARROW_RETURN_IF(
66+
literal->return_type()->id() != arrow::Type::INT32,
67+
Status::Invalid(
68+
"'rand_integer' function requires an int32 literal as parameter"));
69+
70+
// NULL range defaults to INT32_MAX (full positive range)
71+
int32_t range = literal->is_null() ? std::numeric_limits<int32_t>::max()
72+
: std::get<int32_t>(literal->holder());
73+
ARROW_RETURN_IF(range <= 0,
74+
Status::Invalid("'rand_integer' function range must be positive"));
75+
76+
return std::shared_ptr<RandomIntegerGeneratorHolder>(
77+
new RandomIntegerGeneratorHolder(range));
78+
}
79+
80+
// Two params: min, max [min, max] inclusive
81+
auto min_literal = dynamic_cast<LiteralNode*>(node.children().at(0).get());
82+
auto max_literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
83+
84+
ARROW_RETURN_IF(
85+
min_literal == nullptr || max_literal == nullptr,
86+
Status::Invalid("'rand_integer' function requires literals as parameters"));
87+
ARROW_RETURN_IF(
88+
min_literal->return_type()->id() != arrow::Type::INT32 ||
89+
max_literal->return_type()->id() != arrow::Type::INT32,
90+
Status::Invalid("'rand_integer' function requires int32 literals as parameters"));
91+
92+
// NULL min defaults to 0, NULL max defaults to INT32_MAX
93+
int32_t min_val = min_literal->is_null() ? 0 : std::get<int32_t>(min_literal->holder());
94+
int32_t max_val = max_literal->is_null() ? std::numeric_limits<int32_t>::max()
95+
: std::get<int32_t>(max_literal->holder());
96+
97+
ARROW_RETURN_IF(min_val > max_val,
98+
Status::Invalid("'rand_integer' function min must be <= max"));
99+
100+
return std::shared_ptr<RandomIntegerGeneratorHolder>(
101+
new RandomIntegerGeneratorHolder(min_val, max_val));
102+
}
103+
43104
} // namespace gandiva

cpp/src/gandiva/random_generator_holder.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#pragma once
1919

20+
#include <limits>
2021
#include <memory>
2122
#include <random>
2223

@@ -53,4 +54,36 @@ class GANDIVA_EXPORT RandomGeneratorHolder : public FunctionHolder {
5354
std::uniform_real_distribution<> distribution_;
5455
};
5556

57+
/// Function Holder for 'rand_integer'
58+
class GANDIVA_EXPORT RandomIntegerGeneratorHolder : public FunctionHolder {
59+
public:
60+
~RandomIntegerGeneratorHolder() override = default;
61+
62+
static Result<std::shared_ptr<RandomIntegerGeneratorHolder>> Make(
63+
const FunctionNode& node);
64+
65+
int32_t operator()() { return distribution_(generator_); }
66+
67+
private:
68+
// Full range: [INT32_MIN, INT32_MAX]
69+
RandomIntegerGeneratorHolder()
70+
: distribution_(std::numeric_limits<int32_t>::min(),
71+
std::numeric_limits<int32_t>::max()) {
72+
generator_.seed(::arrow::internal::GetRandomSeed());
73+
}
74+
75+
// Range: [0, range - 1]
76+
explicit RandomIntegerGeneratorHolder(int32_t range) : distribution_(0, range - 1) {
77+
generator_.seed(::arrow::internal::GetRandomSeed());
78+
}
79+
80+
// Min/Max: [min, max] inclusive
81+
RandomIntegerGeneratorHolder(int32_t min, int32_t max) : distribution_(min, max) {
82+
generator_.seed(::arrow::internal::GetRandomSeed());
83+
}
84+
85+
std::mt19937_64 generator_;
86+
std::uniform_int_distribution<int32_t> distribution_;
87+
};
88+
5689
} // namespace gandiva

cpp/src/gandiva/random_generator_holder_test.cc

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717

1818
#include "gandiva/random_generator_holder.h"
1919

20+
#include <limits>
2021
#include <memory>
2122

23+
#include <gmock/gmock.h>
2224
#include <gtest/gtest.h>
2325

2426
#include "arrow/testing/gtest_util.h"
@@ -87,4 +89,161 @@ TEST_F(TestRandGenHolder, WithInValidSeed) {
8789
EXPECT_EQ(random_1(), random_2());
8890
}
8991

92+
// Test that non-literal seed argument is rejected
93+
TEST_F(TestRandGenHolder, NonLiteralSeedRejected) {
94+
auto field_node = std::make_shared<FieldNode>(arrow::field("seed", arrow::int32()));
95+
FunctionNode rand_func = {"rand", {field_node}, arrow::float64()};
96+
97+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
98+
::testing::HasSubstr("requires a literal as parameter"),
99+
RandomGeneratorHolder::Make(rand_func).status());
100+
}
101+
102+
class TestRandIntGenHolder : public ::testing::Test {
103+
public:
104+
FunctionNode BuildRandIntFunc() { return {"rand_integer", {}, arrow::int32()}; }
105+
106+
FunctionNode BuildRandIntWithRangeFunc(int32_t range, bool range_is_null) {
107+
auto range_node = std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(range),
108+
range_is_null);
109+
return {"rand_integer", {range_node}, arrow::int32()};
110+
}
111+
112+
FunctionNode BuildRandIntWithMinMaxFunc(int32_t min, bool min_is_null, int32_t max,
113+
bool max_is_null) {
114+
auto min_node =
115+
std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(min), min_is_null);
116+
auto max_node =
117+
std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(max), max_is_null);
118+
return {"rand_integer", {min_node, max_node}, arrow::int32()};
119+
}
120+
};
121+
122+
TEST_F(TestRandIntGenHolder, NoParams) {
123+
FunctionNode rand_func = BuildRandIntFunc();
124+
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
125+
RandomIntegerGeneratorHolder::Make(rand_func));
126+
127+
auto& random = *rand_gen_holder;
128+
// Generate multiple values and verify they are integers
129+
for (int i = 0; i < 10; i++) {
130+
int32_t val = random();
131+
EXPECT_GE(val, std::numeric_limits<int32_t>::min());
132+
EXPECT_LE(val, std::numeric_limits<int32_t>::max());
133+
}
134+
}
135+
136+
TEST_F(TestRandIntGenHolder, WithRange) {
137+
FunctionNode rand_func = BuildRandIntWithRangeFunc(100, false);
138+
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
139+
RandomIntegerGeneratorHolder::Make(rand_func));
140+
141+
auto& random = *rand_gen_holder;
142+
// Generate multiple values and verify they are in range [0, 99]
143+
for (int i = 0; i < 100; i++) {
144+
int32_t val = random();
145+
EXPECT_GE(val, 0);
146+
EXPECT_LT(val, 100);
147+
}
148+
}
149+
150+
TEST_F(TestRandIntGenHolder, WithMinMax) {
151+
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(10, false, 20, false);
152+
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
153+
RandomIntegerGeneratorHolder::Make(rand_func));
154+
155+
auto& random = *rand_gen_holder;
156+
// Generate multiple values and verify they are in range [10, 20]
157+
for (int i = 0; i < 100; i++) {
158+
int32_t val = random();
159+
EXPECT_GE(val, 10);
160+
EXPECT_LE(val, 20);
161+
}
162+
}
163+
164+
TEST_F(TestRandIntGenHolder, WithNegativeMinMax) {
165+
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(-50, false, -10, false);
166+
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
167+
RandomIntegerGeneratorHolder::Make(rand_func));
168+
169+
auto& random = *rand_gen_holder;
170+
// Generate multiple values and verify they are in range [-50, -10]
171+
for (int i = 0; i < 100; i++) {
172+
int32_t val = random();
173+
EXPECT_GE(val, -50);
174+
EXPECT_LE(val, -10);
175+
}
176+
}
177+
178+
TEST_F(TestRandIntGenHolder, InvalidRangeZero) {
179+
FunctionNode rand_func = BuildRandIntWithRangeFunc(0, false);
180+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be positive"),
181+
RandomIntegerGeneratorHolder::Make(rand_func).status());
182+
}
183+
184+
TEST_F(TestRandIntGenHolder, InvalidRangeNegative) {
185+
FunctionNode rand_func = BuildRandIntWithRangeFunc(-5, false);
186+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be positive"),
187+
RandomIntegerGeneratorHolder::Make(rand_func).status());
188+
}
189+
190+
TEST_F(TestRandIntGenHolder, InvalidMinGreaterThanMax) {
191+
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(20, false, 10, false);
192+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("min must be <= max"),
193+
RandomIntegerGeneratorHolder::Make(rand_func).status());
194+
}
195+
196+
TEST_F(TestRandIntGenHolder, NullRangeDefaultsToMaxInt) {
197+
FunctionNode rand_func = BuildRandIntWithRangeFunc(0, true); // null range
198+
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
199+
RandomIntegerGeneratorHolder::Make(rand_func));
200+
201+
auto& random = *rand_gen_holder;
202+
// With NULL range defaulting to INT32_MAX, values should be in [0, INT32_MAX-1]
203+
for (int i = 0; i < 100; i++) {
204+
int32_t val = random();
205+
EXPECT_GE(val, 0);
206+
EXPECT_LT(val, std::numeric_limits<int32_t>::max());
207+
}
208+
}
209+
210+
// Test that non-literal arguments are rejected
211+
TEST_F(TestRandIntGenHolder, NonLiteralRangeRejected) {
212+
// Create a FieldNode instead of LiteralNode for the range parameter
213+
auto field_node = std::make_shared<FieldNode>(arrow::field("range", arrow::int32()));
214+
FunctionNode rand_func = {"rand_integer", {field_node}, arrow::int32()};
215+
216+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
217+
::testing::HasSubstr("requires a literal as parameter"),
218+
RandomIntegerGeneratorHolder::Make(rand_func).status());
219+
}
220+
221+
TEST_F(TestRandIntGenHolder, NonLiteralMinMaxRejected) {
222+
// Create FieldNodes instead of LiteralNodes for min/max parameters
223+
auto min_field = std::make_shared<FieldNode>(arrow::field("min", arrow::int32()));
224+
auto max_literal =
225+
std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(100), false);
226+
FunctionNode rand_func = {"rand_integer", {min_field, max_literal}, arrow::int32()};
227+
228+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
229+
::testing::HasSubstr("requires literals as parameters"),
230+
RandomIntegerGeneratorHolder::Make(rand_func).status());
231+
}
232+
233+
TEST_F(TestRandIntGenHolder, NullMinMaxDefaults) {
234+
// Test null handling for 2-arg form: NULL min defaults to 0, NULL max defaults to
235+
// INT32_MAX
236+
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(0, true, 0, true); // both null
237+
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
238+
RandomIntegerGeneratorHolder::Make(rand_func));
239+
240+
auto& random = *rand_gen_holder;
241+
// With NULL min=0, NULL max=INT32_MAX, values should be in [0, INT32_MAX]
242+
for (int i = 0; i < 100; i++) {
243+
int32_t val = random();
244+
EXPECT_GE(val, 0);
245+
EXPECT_LE(val, std::numeric_limits<int32_t>::max());
246+
}
247+
}
248+
90249
} // namespace gandiva

0 commit comments

Comments
 (0)