diff --git a/cpp/src/gandiva/function_holder_maker_registry.cc b/cpp/src/gandiva/function_holder_maker_registry.cc index 2d9657489670..f45cf2b820f8 100644 --- a/cpp/src/gandiva/function_holder_maker_registry.cc +++ b/cpp/src/gandiva/function_holder_maker_registry.cc @@ -62,6 +62,7 @@ FunctionHolderMakerRegistry::MakerMap FunctionHolderMakerRegistry::DefaultHolder {"to_date", HolderMaker}, {"random", HolderMaker}, {"rand", HolderMaker}, + {"rand_integer", HolderMaker}, {"regexp_replace", HolderMaker}, {"regexp_extract", HolderMaker}, {"castintervalday", HolderMaker}, diff --git a/cpp/src/gandiva/function_registry_math_ops.cc b/cpp/src/gandiva/function_registry_math_ops.cc index 232c7c532600..3bfcfc180e7e 100644 --- a/cpp/src/gandiva/function_registry_math_ops.cc +++ b/cpp/src/gandiva/function_registry_math_ops.cc @@ -103,6 +103,14 @@ std::vector GetMathOpsFunctionRegistry() { "gdv_fn_random", NativeFunction::kNeedsFunctionHolder), NativeFunction("random", {"rand"}, DataTypeVector{int32()}, float64(), kResultNullNever, "gdv_fn_random_with_seed", + NativeFunction::kNeedsFunctionHolder), + NativeFunction("rand_integer", {}, DataTypeVector{}, int32(), kResultNullNever, + "gdv_fn_rand_integer", NativeFunction::kNeedsFunctionHolder), + NativeFunction("rand_integer", {}, DataTypeVector{int32()}, int32(), + kResultNullNever, "gdv_fn_rand_integer_with_range", + NativeFunction::kNeedsFunctionHolder), + NativeFunction("rand_integer", {}, DataTypeVector{int32(), int32()}, int32(), + kResultNullNever, "gdv_fn_rand_integer_with_min_max", NativeFunction::kNeedsFunctionHolder)}; return math_fn_registry_; diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index dff15e6fd29f..3eda4afadb03 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -67,12 +67,33 @@ double gdv_fn_random(int64_t ptr) { return (*holder)(); } -double gdv_fn_random_with_seed(int64_t ptr, int32_t seed, bool seed_validity) { +double gdv_fn_random_with_seed(int64_t ptr, int32_t /*seed*/, bool /*seed_validity*/) { gandiva::RandomGeneratorHolder* holder = reinterpret_cast(ptr); return (*holder)(); } +int32_t gdv_fn_rand_integer(int64_t ptr) { + gandiva::RandomIntegerGeneratorHolder* holder = + reinterpret_cast(ptr); + return (*holder)(); +} + +int32_t gdv_fn_rand_integer_with_range(int64_t ptr, int32_t /*range*/, + bool /*range_validity*/) { + gandiva::RandomIntegerGeneratorHolder* holder = + reinterpret_cast(ptr); + return (*holder)(); +} + +int32_t gdv_fn_rand_integer_with_min_max(int64_t ptr, int32_t /*min*/, + bool /*min_validity*/, int32_t /*max*/, + bool /*max_validity*/) { + gandiva::RandomIntegerGeneratorHolder* holder = + reinterpret_cast(ptr); + return (*holder)(); +} + bool gdv_fn_in_expr_lookup_int32(int64_t ptr, int32_t value, bool in_validity) { if (!in_validity) { return false; @@ -864,6 +885,22 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc("gdv_fn_random_with_seed", types->double_type(), args, reinterpret_cast(gdv_fn_random_with_seed)); + // gdv_fn_rand_integer + args = {types->i64_type()}; + engine->AddGlobalMappingForFunc("gdv_fn_rand_integer", types->i32_type(), args, + reinterpret_cast(gdv_fn_rand_integer)); + + args = {types->i64_type(), types->i32_type(), types->i1_type()}; + engine->AddGlobalMappingForFunc( + "gdv_fn_rand_integer_with_range", types->i32_type(), args, + reinterpret_cast(gdv_fn_rand_integer_with_range)); + + args = {types->i64_type(), types->i32_type(), types->i1_type(), types->i32_type(), + types->i1_type()}; + engine->AddGlobalMappingForFunc( + "gdv_fn_rand_integer_with_min_max", types->i32_type(), args, + reinterpret_cast(gdv_fn_rand_integer_with_min_max)); + // gdv_fn_dec_from_string args = { types->i64_type(), // context diff --git a/cpp/src/gandiva/random_generator_holder.cc b/cpp/src/gandiva/random_generator_holder.cc index 8f80c5826d93..2729c2875ad7 100644 --- a/cpp/src/gandiva/random_generator_holder.cc +++ b/cpp/src/gandiva/random_generator_holder.cc @@ -16,6 +16,9 @@ // under the License. #include "gandiva/random_generator_holder.h" + +#include + #include "gandiva/node.h" namespace gandiva { @@ -40,4 +43,62 @@ Result> RandomGeneratorHolder::Make( return std::shared_ptr(new RandomGeneratorHolder( literal->is_null() ? 0 : std::get(literal->holder()))); } + +Result> RandomIntegerGeneratorHolder::Make( + const FunctionNode& node) { + ARROW_RETURN_IF( + node.children().size() > 2, + Status::Invalid("'rand_integer' function requires at most two parameters")); + + // No params: full int32 range [INT32_MIN, INT32_MAX] + if (node.children().empty()) { + return std::shared_ptr( + new RandomIntegerGeneratorHolder()); + } + + // One param: range [0, range - 1] + if (node.children().size() == 1) { + auto literal = dynamic_cast(node.children().at(0).get()); + ARROW_RETURN_IF( + literal == nullptr, + Status::Invalid("'rand_integer' function requires a literal as parameter")); + ARROW_RETURN_IF( + literal->return_type()->id() != arrow::Type::INT32, + Status::Invalid( + "'rand_integer' function requires an int32 literal as parameter")); + + // NULL range defaults to INT32_MAX (full positive range) + int32_t range = literal->is_null() ? std::numeric_limits::max() + : std::get(literal->holder()); + ARROW_RETURN_IF(range <= 0, + Status::Invalid("'rand_integer' function range must be positive")); + + return std::shared_ptr( + new RandomIntegerGeneratorHolder(range)); + } + + // Two params: min, max [min, max] inclusive + auto min_literal = dynamic_cast(node.children().at(0).get()); + auto max_literal = dynamic_cast(node.children().at(1).get()); + + ARROW_RETURN_IF( + min_literal == nullptr || max_literal == nullptr, + Status::Invalid("'rand_integer' function requires literals as parameters")); + ARROW_RETURN_IF( + min_literal->return_type()->id() != arrow::Type::INT32 || + max_literal->return_type()->id() != arrow::Type::INT32, + Status::Invalid("'rand_integer' function requires int32 literals as parameters")); + + // NULL min defaults to 0, NULL max defaults to INT32_MAX + int32_t min_val = min_literal->is_null() ? 0 : std::get(min_literal->holder()); + int32_t max_val = max_literal->is_null() ? std::numeric_limits::max() + : std::get(max_literal->holder()); + + ARROW_RETURN_IF(min_val > max_val, + Status::Invalid("'rand_integer' function min must be <= max")); + + return std::shared_ptr( + new RandomIntegerGeneratorHolder(min_val, max_val)); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/random_generator_holder.h b/cpp/src/gandiva/random_generator_holder.h index ffab725aa7fc..752e8d242015 100644 --- a/cpp/src/gandiva/random_generator_holder.h +++ b/cpp/src/gandiva/random_generator_holder.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include @@ -53,4 +54,36 @@ class GANDIVA_EXPORT RandomGeneratorHolder : public FunctionHolder { std::uniform_real_distribution<> distribution_; }; +/// Function Holder for 'rand_integer' +class GANDIVA_EXPORT RandomIntegerGeneratorHolder : public FunctionHolder { + public: + ~RandomIntegerGeneratorHolder() override = default; + + static Result> Make( + const FunctionNode& node); + + int32_t operator()() { return distribution_(generator_); } + + private: + // Full range: [INT32_MIN, INT32_MAX] + RandomIntegerGeneratorHolder() + : distribution_(std::numeric_limits::min(), + std::numeric_limits::max()) { + generator_.seed(::arrow::internal::GetRandomSeed()); + } + + // Range: [0, range - 1] + explicit RandomIntegerGeneratorHolder(int32_t range) : distribution_(0, range - 1) { + generator_.seed(::arrow::internal::GetRandomSeed()); + } + + // Min/Max: [min, max] inclusive + RandomIntegerGeneratorHolder(int32_t min, int32_t max) : distribution_(min, max) { + generator_.seed(::arrow::internal::GetRandomSeed()); + } + + std::mt19937_64 generator_; + std::uniform_int_distribution distribution_; +}; + } // namespace gandiva diff --git a/cpp/src/gandiva/random_generator_holder_test.cc b/cpp/src/gandiva/random_generator_holder_test.cc index 77b2750f2e95..26677515c275 100644 --- a/cpp/src/gandiva/random_generator_holder_test.cc +++ b/cpp/src/gandiva/random_generator_holder_test.cc @@ -17,8 +17,10 @@ #include "gandiva/random_generator_holder.h" +#include #include +#include #include #include "arrow/testing/gtest_util.h" @@ -87,4 +89,161 @@ TEST_F(TestRandGenHolder, WithInValidSeed) { EXPECT_EQ(random_1(), random_2()); } +// Test that non-literal seed argument is rejected +TEST_F(TestRandGenHolder, NonLiteralSeedRejected) { + auto field_node = std::make_shared(arrow::field("seed", arrow::int32())); + FunctionNode rand_func = {"rand", {field_node}, arrow::float64()}; + + EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, + ::testing::HasSubstr("requires a literal as parameter"), + RandomGeneratorHolder::Make(rand_func).status()); +} + +class TestRandIntGenHolder : public ::testing::Test { + public: + FunctionNode BuildRandIntFunc() { return {"rand_integer", {}, arrow::int32()}; } + + FunctionNode BuildRandIntWithRangeFunc(int32_t range, bool range_is_null) { + auto range_node = std::make_shared(arrow::int32(), LiteralHolder(range), + range_is_null); + return {"rand_integer", {range_node}, arrow::int32()}; + } + + FunctionNode BuildRandIntWithMinMaxFunc(int32_t min, bool min_is_null, int32_t max, + bool max_is_null) { + auto min_node = + std::make_shared(arrow::int32(), LiteralHolder(min), min_is_null); + auto max_node = + std::make_shared(arrow::int32(), LiteralHolder(max), max_is_null); + return {"rand_integer", {min_node, max_node}, arrow::int32()}; + } +}; + +TEST_F(TestRandIntGenHolder, NoParams) { + FunctionNode rand_func = BuildRandIntFunc(); + EXPECT_OK_AND_ASSIGN(auto rand_gen_holder, + RandomIntegerGeneratorHolder::Make(rand_func)); + + auto& random = *rand_gen_holder; + // Generate multiple values and verify they are integers + for (int i = 0; i < 10; i++) { + int32_t val = random(); + EXPECT_GE(val, std::numeric_limits::min()); + EXPECT_LE(val, std::numeric_limits::max()); + } +} + +TEST_F(TestRandIntGenHolder, WithRange) { + FunctionNode rand_func = BuildRandIntWithRangeFunc(100, false); + EXPECT_OK_AND_ASSIGN(auto rand_gen_holder, + RandomIntegerGeneratorHolder::Make(rand_func)); + + auto& random = *rand_gen_holder; + // Generate multiple values and verify they are in range [0, 99] + for (int i = 0; i < 100; i++) { + int32_t val = random(); + EXPECT_GE(val, 0); + EXPECT_LT(val, 100); + } +} + +TEST_F(TestRandIntGenHolder, WithMinMax) { + FunctionNode rand_func = BuildRandIntWithMinMaxFunc(10, false, 20, false); + EXPECT_OK_AND_ASSIGN(auto rand_gen_holder, + RandomIntegerGeneratorHolder::Make(rand_func)); + + auto& random = *rand_gen_holder; + // Generate multiple values and verify they are in range [10, 20] + for (int i = 0; i < 100; i++) { + int32_t val = random(); + EXPECT_GE(val, 10); + EXPECT_LE(val, 20); + } +} + +TEST_F(TestRandIntGenHolder, WithNegativeMinMax) { + FunctionNode rand_func = BuildRandIntWithMinMaxFunc(-50, false, -10, false); + EXPECT_OK_AND_ASSIGN(auto rand_gen_holder, + RandomIntegerGeneratorHolder::Make(rand_func)); + + auto& random = *rand_gen_holder; + // Generate multiple values and verify they are in range [-50, -10] + for (int i = 0; i < 100; i++) { + int32_t val = random(); + EXPECT_GE(val, -50); + EXPECT_LE(val, -10); + } +} + +TEST_F(TestRandIntGenHolder, InvalidRangeZero) { + FunctionNode rand_func = BuildRandIntWithRangeFunc(0, false); + EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be positive"), + RandomIntegerGeneratorHolder::Make(rand_func).status()); +} + +TEST_F(TestRandIntGenHolder, InvalidRangeNegative) { + FunctionNode rand_func = BuildRandIntWithRangeFunc(-5, false); + EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be positive"), + RandomIntegerGeneratorHolder::Make(rand_func).status()); +} + +TEST_F(TestRandIntGenHolder, InvalidMinGreaterThanMax) { + FunctionNode rand_func = BuildRandIntWithMinMaxFunc(20, false, 10, false); + EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("min must be <= max"), + RandomIntegerGeneratorHolder::Make(rand_func).status()); +} + +TEST_F(TestRandIntGenHolder, NullRangeDefaultsToMaxInt) { + FunctionNode rand_func = BuildRandIntWithRangeFunc(0, true); // null range + EXPECT_OK_AND_ASSIGN(auto rand_gen_holder, + RandomIntegerGeneratorHolder::Make(rand_func)); + + auto& random = *rand_gen_holder; + // With NULL range defaulting to INT32_MAX, values should be in [0, INT32_MAX-1] + for (int i = 0; i < 100; i++) { + int32_t val = random(); + EXPECT_GE(val, 0); + EXPECT_LT(val, std::numeric_limits::max()); + } +} + +// Test that non-literal arguments are rejected +TEST_F(TestRandIntGenHolder, NonLiteralRangeRejected) { + // Create a FieldNode instead of LiteralNode for the range parameter + auto field_node = std::make_shared(arrow::field("range", arrow::int32())); + FunctionNode rand_func = {"rand_integer", {field_node}, arrow::int32()}; + + EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, + ::testing::HasSubstr("requires a literal as parameter"), + RandomIntegerGeneratorHolder::Make(rand_func).status()); +} + +TEST_F(TestRandIntGenHolder, NonLiteralMinMaxRejected) { + // Create FieldNodes instead of LiteralNodes for min/max parameters + auto min_field = std::make_shared(arrow::field("min", arrow::int32())); + auto max_literal = + std::make_shared(arrow::int32(), LiteralHolder(100), false); + FunctionNode rand_func = {"rand_integer", {min_field, max_literal}, arrow::int32()}; + + EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, + ::testing::HasSubstr("requires literals as parameters"), + RandomIntegerGeneratorHolder::Make(rand_func).status()); +} + +TEST_F(TestRandIntGenHolder, NullMinMaxDefaults) { + // Test null handling for 2-arg form: NULL min defaults to 0, NULL max defaults to + // INT32_MAX + FunctionNode rand_func = BuildRandIntWithMinMaxFunc(0, true, 0, true); // both null + EXPECT_OK_AND_ASSIGN(auto rand_gen_holder, + RandomIntegerGeneratorHolder::Make(rand_func)); + + auto& random = *rand_gen_holder; + // With NULL min=0, NULL max=INT32_MAX, values should be in [0, INT32_MAX] + for (int i = 0; i < 100; i++) { + int32_t val = random(); + EXPECT_GE(val, 0); + EXPECT_LE(val, std::numeric_limits::max()); + } +} + } // namespace gandiva diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index dc1ac9dfd266..268cb55a6422 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -3678,4 +3678,161 @@ TEST_F(TestProjector, TestExtendedCFunctionThatNeedsContext) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } +TEST_F(TestProjector, TestRandomNoArgs) { + // Test random() with no arguments - returns double in [0, 1) + auto dummy_field = field("dummy", arrow::int32()); + auto schema = arrow::schema({dummy_field}); + auto out_field = field("out", arrow::float64()); + + auto rand_node = TreeExprBuilder::MakeFunction("random", {}, arrow::float64()); + auto expr = TreeExprBuilder::MakeExpression(rand_node, out_field); + + std::shared_ptr projector; + ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector)); + + int num_records = 100; + auto dummy_array = MakeArrowArrayInt32(std::vector(num_records, 0), + std::vector(num_records, true)); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + + // Verify all values are in range [0, 1) + auto result = std::dynamic_pointer_cast(outs.at(0)); + EXPECT_EQ(result->length(), num_records); + EXPECT_EQ(result->null_count(), 0); + for (int i = 0; i < num_records; i++) { + double value = result->Value(i); + EXPECT_GE(value, 0.0); + EXPECT_LT(value, 1.0); + } +} + +TEST_F(TestProjector, TestRandomWithSeed) { + // Test rand(seed) - with seed literal, returns double in [0, 1) + auto dummy_field = field("dummy", arrow::int32()); + auto schema = arrow::schema({dummy_field}); + auto out_field = field("out", arrow::float64()); + + auto seed_literal = TreeExprBuilder::MakeLiteral(static_cast(12345)); + auto rand_node = + TreeExprBuilder::MakeFunction("rand", {seed_literal}, arrow::float64()); + auto expr = TreeExprBuilder::MakeExpression(rand_node, out_field); + + std::shared_ptr projector; + ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector)); + + int num_records = 100; + auto dummy_array = MakeArrowArrayInt32(std::vector(num_records, 0), + std::vector(num_records, true)); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + + // Verify all values are in range [0, 1) + auto result = std::dynamic_pointer_cast(outs.at(0)); + EXPECT_EQ(result->length(), num_records); + EXPECT_EQ(result->null_count(), 0); + for (int i = 0; i < num_records; i++) { + double value = result->Value(i); + EXPECT_GE(value, 0.0); + EXPECT_LT(value, 1.0); + } +} + +TEST_F(TestProjector, TestRandIntegerNoArgs) { + // Test rand_integer() with no arguments - full int32 range + auto dummy_field = field("dummy", arrow::int32()); + auto schema = arrow::schema({dummy_field}); + auto out_field = field("out", arrow::int32()); + + auto rand_int_node = TreeExprBuilder::MakeFunction("rand_integer", {}, arrow::int32()); + auto expr = TreeExprBuilder::MakeExpression(rand_int_node, out_field); + + std::shared_ptr projector; + ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector)); + + int num_records = 100; + auto dummy_array = MakeArrowArrayInt32(std::vector(num_records, 0), + std::vector(num_records, true)); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + + // Verify all values are valid int32 (no specific range check for full range) + auto result = std::dynamic_pointer_cast(outs.at(0)); + EXPECT_EQ(result->length(), num_records); + EXPECT_EQ(result->null_count(), 0); +} + +TEST_F(TestProjector, TestRandIntegerWithRange) { + // Test rand_integer(10) - range [0, 9] + auto dummy_field = field("dummy", arrow::int32()); + auto schema = arrow::schema({dummy_field}); + auto out_field = field("out", arrow::int32()); + + auto range_literal = TreeExprBuilder::MakeLiteral(static_cast(10)); + auto rand_int_node = + TreeExprBuilder::MakeFunction("rand_integer", {range_literal}, arrow::int32()); + auto expr = TreeExprBuilder::MakeExpression(rand_int_node, out_field); + + std::shared_ptr projector; + ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector)); + + int num_records = 100; + auto dummy_array = MakeArrowArrayInt32(std::vector(num_records, 0), + std::vector(num_records, true)); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + + // Verify all values are in range [0, 9] + auto result = std::dynamic_pointer_cast(outs.at(0)); + EXPECT_EQ(result->length(), num_records); + EXPECT_EQ(result->null_count(), 0); + for (int i = 0; i < num_records; i++) { + int32_t value = result->Value(i); + EXPECT_GE(value, 0); + EXPECT_LT(value, 10); + } +} + +TEST_F(TestProjector, TestRandIntegerWithMinMax) { + // Test rand_integer(5, 15) - range [5, 15] inclusive + auto dummy_field = field("dummy", arrow::int32()); + auto schema = arrow::schema({dummy_field}); + auto out_field = field("out", arrow::int32()); + + auto min_literal = TreeExprBuilder::MakeLiteral(static_cast(5)); + auto max_literal = TreeExprBuilder::MakeLiteral(static_cast(15)); + auto rand_int_node = TreeExprBuilder::MakeFunction( + "rand_integer", {min_literal, max_literal}, arrow::int32()); + auto expr = TreeExprBuilder::MakeExpression(rand_int_node, out_field); + + std::shared_ptr projector; + ARROW_EXPECT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector)); + + int num_records = 100; + auto dummy_array = MakeArrowArrayInt32(std::vector(num_records, 0), + std::vector(num_records, true)); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {dummy_array}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + + // Verify all values are in range [5, 15] inclusive + auto result = std::dynamic_pointer_cast(outs.at(0)); + EXPECT_EQ(result->length(), num_records); + EXPECT_EQ(result->null_count(), 0); + for (int i = 0; i < num_records; i++) { + int32_t value = result->Value(i); + EXPECT_GE(value, 5); + EXPECT_LE(value, 15); + } +} + } // namespace gandiva