Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Multi-input & Scalar Support for Transform UDFs #17881

Merged
merged 28 commits into from
Feb 13, 2025
Merged
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fee37d9
added support for custom types in PTX parser
lamarrr Jan 29, 2025
96d17a3
Merge branch 'branch-25.02' into ptx-custom-types
lamarrr Jan 30, 2025
1238297
Merge branch 'branch-25.02' into ptx-custom-types
lamarrr Jan 30, 2025
2e914e4
added multi-input & scalar support for UDF transforms
lamarrr Jan 31, 2025
b24ebc9
added ternary transform + scalar test
lamarrr Jan 31, 2025
90cb21d
fixed binary_op ptx parsing
lamarrr Jan 31, 2025
4d992ed
Merge branch 'ptx-custom-types' of https://github.com/lamarrr/cudf in…
lamarrr Jan 31, 2025
a1474af
Merge branch 'ptx-custom-types' into multi-transform-udfs
lamarrr Jan 31, 2025
f9db165
Merge remote-tracking branch 'upstream/branch-25.04' into multi-trans…
lamarrr Feb 4, 2025
ef7ce8f
refactored changes
lamarrr Feb 4, 2025
d9912f9
nit: namespaces
lamarrr Feb 4, 2025
a622731
adopted code review changes
lamarrr Feb 4, 2025
e74392d
Merge branch 'ptx-custom-types' into multi-transform-udfs
lamarrr Feb 4, 2025
7859c8f
Merge remote-tracking branch 'upstream/branch-25.04' into multi-trans…
lamarrr Feb 5, 2025
0cfacb1
refactoring & changed back to using cudf::size_type in kernels
lamarrr Feb 5, 2025
00b4148
fixed copyright
lamarrr Feb 5, 2025
a4f96ed
implemented code review suggestions
lamarrr Feb 5, 2025
6e48f0e
added codegen doc for transform unit tests
lamarrr Feb 5, 2025
ed2e673
added codegen doc for transform unit tests
lamarrr Feb 5, 2025
0d33c43
added python tests for UDF transforms
lamarrr Feb 6, 2025
d38c211
Merge branch 'branch-25.04' into multi-transform-udfs
lamarrr Feb 6, 2025
93c4949
refactored tests
lamarrr Feb 10, 2025
cc5b4d2
Merge branch 'branch-25.04' into multi-transform-udfs
lamarrr Feb 10, 2025
c2e86e4
Merge branch 'branch-25.04' into multi-transform-udfs
lamarrr Feb 11, 2025
f91f47b
Merge branch 'branch-25.04' into multi-transform-udfs
lamarrr Feb 11, 2025
7e1eac9
Merge branch 'branch-25.04' into multi-transform-udfs
lamarrr Feb 12, 2025
b4d75dd
Merge branch 'branch-25.04' into multi-transform-udfs
lamarrr Feb 12, 2025
033051d
Merge branch 'branch-25.04' into multi-transform-udfs
lamarrr Feb 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added multi-input & scalar support for UDF transforms
lamarrr committed Jan 31, 2025
commit 2e914e4265b716899bec032de99bb724d7716574
23 changes: 13 additions & 10 deletions cpp/include/cudf/transform.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -32,27 +32,30 @@ namespace CUDF_EXPORT cudf {
*/

/**
* @brief Creates a new column by applying a unary function against every
* element of an input column.
* @brief Creates a new column by applying a transform function against every
* element of the input columns.
*
* Computes:
* `out[i] = F(in[i])`
* `out[i] = F(inputs[i]...)`.
*
* Note that for every scalar in `inputs`, `input[i] == input[0]`
*
* The output null mask is the same is the input null mask so if input[i] is
* null then output[i] is also null
* null then output[i] is also null. The size of the resulting column is the size of the largest
* column. Scalar inputs are also supported and they should have size of 1.
*
* @param input An immutable view of the input column to transform
* @param unary_udf The PTX/CUDA string of the unary function to apply
* @param inputs Immutable views of the input columns to transform
* @param transform_udf The PTX/CUDA string of the transform function to apply
* @param output_type The output type that is compatible with the output type in the UDF
* @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return The column resulting from applying the unary function to
* @return The column resulting from applying the transform function to
* every element of the input
*/
std::unique_ptr<column> transform(
column_view const& input,
std::string const& unary_udf,
std::vector<column_view> const& inputs,
std::string const& transform_udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
20 changes: 16 additions & 4 deletions cpp/src/transform/jit/kernel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -33,8 +33,20 @@ namespace cudf {
namespace transformation {
namespace jit {

template <typename TypeOut, typename TypeIn>
CUDF_KERNEL void kernel(cudf::size_type size, TypeOut* out_data, TypeIn* in_data)
/// @brief This class supports striding into columns of data as either scalars or actual
/// columns at no runtime cost. Although it implies the kernel will be recompiled if scalar and
/// column inputs are interchanged.
template <typename T, int multiplier>
struct strided {
T data;

__device__ T const& get(int64_t index) const { return (&data)[index * multiplier]; }

__device__ T& get(int64_t index) { return (&data)[index * multiplier]; }
};

template <typename Out, typename... In>
CUDF_KERNEL void kernel(int64_t size, Out* __restrict__ out, In const* __restrict__... ins)
{
// cannot use global_thread_id utility due to a JIT build issue by including
// the `cudf/detail/utilities/cuda.cuh` header
@@ -43,7 +55,7 @@ CUDF_KERNEL void kernel(cudf::size_type size, TypeOut* out_data, TypeIn* in_data
thread_index_type const stride = block_size * gridDim.x;

for (auto i = start; i < static_cast<thread_index_type>(size); i += stride) {
GENERIC_UNARY_OP(&out_data[i], in_data[i]);
GENERIC_TRANSFORM_OP(&out->get(i), ins->get(i)...);
}
}

134 changes: 98 additions & 36 deletions cpp/src/transform/transform.cpp
Original file line number Diff line number Diff line change
@@ -34,75 +34,137 @@ namespace cudf {
namespace transformation {
namespace jit {
namespace {
void unary_operation(mutable_column_view output,
column_view input,
std::string const& udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream)

using device_data_t = void*;

std::map<unsigned int, std::string> ptx_params(mutable_column_view output,
std::vector<column_view> const& inputs

)
{
std::map<unsigned int, std::string> types;

unsigned int i = 0;
types.emplace(i, cudf::type_to_name(output.type()) + " *");
i++;

for (auto& input : inputs) {
types.emplace(i, cudf::type_to_name(input.type()));
i++;
}

return types;
}

void transform_operation(mutable_column_view output,
std::vector<column_view> const& inputs,
std::string const& udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream,
cudf::size_type base_column_size)
{
std::vector<std::string> typenames;
typenames.push_back(jitify2::reflection::Template("cudf::transformation::jit::strided")
.instantiate(cudf::type_to_name(output.type()), 1));

for (auto& input : inputs) {
bool const is_scalar = input.size() != base_column_size;
typenames.push_back(jitify2::reflection::Template("cudf::transformation::jit::strided")
.instantiate(cudf::type_to_name(input.type()), is_scalar ? 0 : 1));
}

std::string const kernel_name =
jitify2::reflection::Template("cudf::transformation::jit::kernel") //
.instantiate(cudf::type_to_name(output.type()), // list of template arguments
cudf::type_to_name(input.type()));

std::string cuda_source = is_ptx ? cudf::jit::parse_single_function_ptx(
udf, //
"GENERIC_UNARY_OP",
{
{0, "void *"}, // output argument
{1, cudf::type_to_name(input.type())} // input argument
})
: cudf::jit::parse_single_function_cuda(udf, //
"GENERIC_UNARY_OP");

cudf::jit::get_program_cache(*transform_jit_kernel_cu_jit)
.get_kernel(
kernel_name, {}, {{"transform/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, nullptr, stream.value()) //
->launch(output.size(), //
cudf::jit::get_data_ptr(output),
cudf::jit::get_data_ptr(input));
.instantiate(typenames);

std::string cuda_source = is_ptx
? cudf::jit::parse_single_function_ptx(udf, //
"GENERIC_TRANSFORM_OP",
ptx_params(output, inputs))
: cudf::jit::parse_single_function_cuda(udf, //
"GENERIC_TRANSFORM_OP");

{
std::vector<device_data_t> device_data;

device_data.push_back(const_cast<device_data_t>(cudf::jit::get_data_ptr(output)));
std::transform(
inputs.begin(), inputs.end(), std::back_inserter(device_data), [](column_view view) {
return const_cast<device_data_t>(cudf::jit::get_data_ptr(view));
});

int64_t size = output.size();

std::vector<void*> args;
args.push_back(&size);
std::transform(device_data.begin(),
device_data.end(),
std::back_inserter(args),
[](device_data_t& data) -> void* { return &data; });

cudf::jit::get_program_cache(*transform_jit_kernel_cu_jit)
.get_kernel(
kernel_name, {}, {{"transform/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, nullptr, stream.value()) //
->launch(args.data());
}
}
} // namespace

} // namespace jit
} // namespace transformation

namespace detail {
std::unique_ptr<column> transform(column_view const& input,
std::string const& unary_udf,
std::unique_ptr<column> transform(std::vector<column_view> const& inputs,
std::string const& transform_udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_EXPECTS(is_fixed_width(input.type()), "Unexpected non-fixed-width type.");
CUDF_EXPECTS(is_fixed_width(output_type), "Unexpected non-fixed-width type.");
std::for_each(inputs.begin(), inputs.end(), [](auto& col) {
CUDF_EXPECTS(is_fixed_width(col.type()), "Unexpected non-fixed-width type.");
});

auto base_column = std::max_element(
inputs.begin(), inputs.end(), [](auto& a, auto& b) { return a.size() < b.size(); });

std::for_each(inputs.begin(), inputs.end(), [&](column_view const& col) {
CUDF_EXPECTS((col.size() == 1) || (col.size() == base_column->size()), "");
CUDF_EXPECTS((col.null_count() == 0) || (col.null_count() == base_column->null_count()), "");
});

std::unique_ptr<column> output = make_fixed_width_column(
output_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr);
std::unique_ptr<column> output = make_fixed_width_column(output_type,
base_column->size(),
copy_bitmask(*base_column, stream, mr),
base_column->null_count(),
stream,
mr);

if (input.is_empty()) { return output; }
if (base_column->is_empty()) { return output; }

mutable_column_view const output_view = *output;

// transform
transformation::jit::unary_operation(output_view, input, unary_udf, output_type, is_ptx, stream);
transformation::jit::transform_operation(
output_view, inputs, transform_udf, output_type, is_ptx, stream, base_column->size());

return output;
}

} // namespace detail

std::unique_ptr<column> transform(column_view const& input,
std::string const& unary_udf,
std::unique_ptr<column> transform(std::vector<column_view> const& inputs,
std::string const& transform_udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::transform(input, unary_udf, output_type, is_ptx, stream, mr);
return detail::transform(inputs, transform_udf, output_type, is_ptx, stream, mr);
}

} // namespace cudf
9 changes: 6 additions & 3 deletions cpp/tests/streams/transform_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -32,8 +32,11 @@ void test_udf(char const* udf, Data data_init, cudf::size_type size, bool is_ptx
auto data_iter = cudf::detail::make_counting_transform_iterator(0, data_init);
cudf::test::fixed_width_column_wrapper<dtype, typename decltype(data_iter)::value_type> in(
data_iter, data_iter + size, all_valid);
cudf::transform(
in, udf, cudf::data_type(cudf::type_to_id<dtype>()), is_ptx, cudf::test::get_default_stream());
cudf::transform({in},
udf,
cudf::data_type(cudf::type_to_id<dtype>()),
is_ptx,
cudf::test::get_default_stream());
}

TEST_F(TransformTest, Transform)
4 changes: 2 additions & 2 deletions cpp/tests/transform/integration/unary_transform_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Copyright 2018-2019 BlazingDB, Inc.
* Copyright 2018 Christian Noboa Mardini <[email protected]>
@@ -39,7 +39,7 @@ void test_udf(char const* udf, Op op, Data data_init, cudf::size_type size, bool
data_iter, data_iter + size, all_valid);

std::unique_ptr<cudf::column> out =
cudf::transform(in, udf, cudf::data_type(cudf::type_to_id<dtype>()), is_ptx);
cudf::transform({in}, udf, cudf::data_type(cudf::type_to_id<dtype>()), is_ptx);

ASSERT_UNARY<dtype, dtype>(out->view(), in, op);
}
4 changes: 2 additions & 2 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1503,7 +1503,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_transform(
cudf::jni::native_jstring n_j_udf(env, j_udf);
std::string n_udf(n_j_udf.get());
return release_as_jlong(
cudf::transform(*column, n_udf, cudf::data_type(cudf::type_id::INT32), j_is_ptx));
cudf::transform({*column}, n_udf, cudf::data_type(cudf::type_id::INT32), j_is_ptx));
}
CATCH_STD(env, 0);
}
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
@@ -185,7 +185,7 @@ def __setitem__(self, key: Any, value: Any):
@acquire_spill_lock()
def transform(self, compiled_op, np_dtype: np.dtype) -> ColumnBase:
plc_column = plc.transform.transform(
self.to_pylibcudf(mode="read"),
[self.to_pylibcudf(mode="read")],
compiled_op[0],
plc.column._datatype_from_dtype_desc(np_dtype.str[1:]),
True,
7 changes: 4 additions & 3 deletions python/pylibcudf/pylibcudf/libcudf/transform.pxd
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.pair cimport pair
from libcpp.string cimport string
from libcpp.vector cimport vector
from pylibcudf.exception_handler cimport libcudf_exception_handler
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
@@ -33,8 +34,8 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
) except +libcudf_exception_handler

cdef unique_ptr[column] transform(
column_view input,
string unary_udf,
const vector[column_view] & inputs,
const string & transform_udf,
data_type output_type,
bool is_ptx
) except +libcudf_exception_handler
7 changes: 5 additions & 2 deletions python/pylibcudf/pylibcudf/transform.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
from libcpp cimport bool
from pylibcudf.libcudf.types cimport bitmask_type, data_type

@@ -17,7 +17,10 @@ cpdef tuple[gpumemoryview, int] bools_to_mask(Column input)

cpdef Column mask_to_bools(Py_ssize_t bitmask, int begin_bit, int end_bit)

cpdef Column transform(Column input, str unary_udf, DataType output_type, bool is_ptx)
cpdef Column transform(list[Column] inputs,
str transform_udf,
DataType output_type,
bool is_ptx)

cpdef tuple[Table, Column] encode(Table input)

5 changes: 4 additions & 1 deletion python/pylibcudf/pylibcudf/transform.pyi
Original file line number Diff line number Diff line change
@@ -10,7 +10,10 @@ def compute_column(input: Table, expr: Expression) -> Column: ...
def bools_to_mask(input: Column) -> tuple[gpumemoryview, int]: ...
def mask_to_bools(bitmask: int, begin_bit: int, end_bit: int) -> Column: ...
def transform(
input: Column, unary_udf: str, output_type: DataType, is_ptx: bool
inputs: list[Column],
transform_udf: str,
output_type: DataType,
is_ptx: bool,
) -> Column: ...
def encode(input: Table) -> tuple[Table, Column]: ...
def one_hot_encode(input: Column, categories: Column) -> Table: ...
29 changes: 19 additions & 10 deletions python/pylibcudf/pylibcudf/transform.pyx
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.vector cimport vector
from libcpp.utility cimport move, pair
from pylibcudf.libcudf cimport transform as cpp_transform
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport bitmask_type, size_type
@@ -129,16 +131,19 @@ cpdef Column mask_to_bools(Py_ssize_t bitmask, int begin_bit, int end_bit):
return Column.from_libcudf(move(c_result))


cpdef Column transform(Column input, str unary_udf, DataType output_type, bool is_ptx):
"""Create a new column by applying a unary function against every
element of an input column.
cpdef Column transform(list[Column] inputs,
str transform_udf,
DataType output_type,
bool is_ptx):
"""Create a new column by applying a transform function against
multiple input columns.
Parameters
----------
input : Column
Column to transform.
unary_udf : str
The PTX/CUDA string of the unary function to apply.
inputs : list[Column]
Columns to transform.
transform_udf : str
The PTX/CUDA string of the transform function to apply.
output_type : DataType
The output type that is compatible with the output type in the unary_udf.
is_ptx : bool
@@ -150,13 +155,17 @@ cpdef Column transform(Column input, str unary_udf, DataType output_type, bool i
Column
The transformed column having the UDF applied to each element.
"""
cdef vector[column_view] c_inputs
cdef unique_ptr[column] c_result
cdef string c_unary_udf = unary_udf.encode()
cdef string c_transform_udf = transform_udf.encode()
cdef bool c_is_ptx = is_ptx

for input in inputs:
c_inputs.push_back((<Column?>input).view())

with nogil:
c_result = cpp_transform.transform(
input.view(), c_unary_udf, output_type.c_obj, c_is_ptx
c_inputs, c_transform_udf, output_type.c_obj, c_is_ptx
)

return Column.from_libcudf(move(c_result))