diff --git a/cudax/include/cuda/experimental/__multi_gpu/concepts.h b/cudax/include/cuda/experimental/__multi_gpu/concepts.h new file mode 100644 index 00000000000..7bb09934431 --- /dev/null +++ b/cudax/include/cuda/experimental/__multi_gpu/concepts.h @@ -0,0 +1,222 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_EXPERIMENTAL___MULTI_GPU_CONCEPTS_H +#define _CUDA_EXPERIMENTAL___MULTI_GPU_CONCEPTS_H + +#include // IWYU pragma: export + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include +#include + +#include + +// NOLINTBEGIN(bugprone-reserved-identifier) +namespace cuda::experimental +{ +// Needed because the C++17 concept emulation can't handle the implicit first template +// parameter of real concepts. +template +_CCCL_CONCEPT __convertible_to_int32 = ::cuda::std::convertible_to<_Tp, ::cuda::std::int32_t>; + +// Requires communicator<_Comm> to be checked first, but that invokes a circular dependency +template +_CCCL_CONCEPT __has_send = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __buf, + ::cuda::std::size_t __count, + ::cuda::std::int32_t __peer, + ::cuda::stream_ref __stream)(_Same_as(void) __comm.send( + ::cuda::std::declval(), __buf, __count, __peer, __stream)); + +// Requires communicator<_Comm> to be checked first, but that invokes a circular dependency +template +_CCCL_CONCEPT __has_recv = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __buf, + ::cuda::std::size_t __count, + ::cuda::std::int32_t __peer, + ::cuda::stream_ref __stream)(_Same_as(void) __comm.recv( + ::cuda::std::declval(), __buf, __count, __peer, __stream)); + +template +_CCCL_CONCEPT __communicator = _CCCL_REQUIRES_EXPR((_Comm), _Comm& __comm)( + typename(typename _Comm::native_handle_type), + _Same_as(typename _Comm::native_handle_type) __comm.native_handle(), + noexcept(__comm.native_handle()), + _Satisfies(__convertible_to_int32) __comm.rank(), + _Satisfies(__convertible_to_int32) __comm.size(), + typename(typename _Comm::group_guard_type), + _Same_as(typename _Comm::group_guard_type) __comm.group_guard(), + requires(__has_send<_Comm>), + requires(__has_recv<_Comm>) // +); + +// ========================================================================================== + +// Use a typed pointer as default here, since the op may need to instantiated with a +// dereferenceable pointer type for reductions +template +_CCCL_CONCEPT __has_reduce = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + _Ptr __recvbuff, + ::cuda::std::size_t __count, + ::cuda::std::plus<> __op, + ::cuda::std::int32_t __root, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.reduce( + ::cuda::std::declval(), __sendbuff, __recvbuff, __count, __op, __root, __stream)); + +// ========================================================================================== + +// Use a typed pointer as default here, since the op may need to instantiated with a +// dereferenceable pointer type for reductions +template +_CCCL_CONCEPT __has_all_reduce = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + _Ptr __recvbuff, + ::cuda::std::size_t __count, + ::cuda::std::plus<> __op, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.all_reduce( + ::cuda::std::declval(), __sendbuff, __recvbuff, __count, __op, __stream)); + +// ========================================================================================== + +template +_CCCL_CONCEPT __has_gather = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + _Ptr __recvbuff, + ::cuda::std::size_t __count, + ::cuda::std::int32_t __root, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.gather( + ::cuda::std::declval(), __sendbuff, __recvbuff, __count, __root, __stream)); + +// ========================================================================================== + +template +_CCCL_CONCEPT __has_gather_v = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + ::cuda::std::size_t __send_count, + _Ptr __recvbuff, + const ::cuda::std::size_t* __recv_counts, + const ::cuda::std::size_t* __displs, + ::cuda::std::int32_t __root, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.gather_v( + ::cuda::std::declval(), + __sendbuff, + __send_count, + __recvbuff, + __recv_counts, + __displs, + __root, + __stream)); + +// ========================================================================================== + +template +_CCCL_CONCEPT __has_all_gather = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + _Ptr __recvbuff, + ::cuda::std::size_t __count, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.all_gather( + ::cuda::std::declval(), __sendbuff, __recvbuff, __count, __stream)); + +// ========================================================================================== + +template +_CCCL_CONCEPT __has_broadcast = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + _Ptr __recvbuff, + ::cuda::std::size_t __count, + ::cuda::std::int32_t __root, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.broadcast( + ::cuda::std::declval(), __sendbuff, __recvbuff, __count, __root, __stream)); + +// ========================================================================================== + +template +_CCCL_CONCEPT __has_all_to_all = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + _Ptr __recvbuff, + ::cuda::std::size_t __count, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.all_to_all( + ::cuda::std::declval(), __sendbuff, __recvbuff, __count, __stream)); + +// ========================================================================================== + +template +_CCCL_CONCEPT __has_all_to_all_v = _CCCL_REQUIRES_EXPR( + (_Comm, _Ptr), + _Comm& __comm, + _Ptr __sendbuff, + const ::cuda::std::size_t* __send_counts, + const ::cuda::std::size_t* __send_displs, + _Ptr __recvbuff, + const ::cuda::std::size_t* __recv_counts, + const ::cuda::std::size_t* __recv_displs, + ::cuda::stream_ref __stream)( + requires(__communicator<_Comm>), + _Same_as(void) __comm.all_to_all_v( + ::cuda::std::declval(), + __sendbuff, + __send_counts, + __send_displs, + __recvbuff, + __recv_counts, + __recv_displs, + __stream)); +} // namespace cuda::experimental +// NOLINTEND(bugprone-reserved-identifier) + +#include + +#endif // _CUDA_EXPERIMENTAL___MULTI_GPU_CONCEPTS_H diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 01bf161ac24..f4c8268a6e6 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -141,6 +141,8 @@ cudax_add_catch2_test(test_target algorithm algorithm/copy.cu ) +add_subdirectory(multi_gpu) + cudax_add_catch2_test(test_target group.mapping.binary_partition group/mapping/binary_partition.cu ) diff --git a/cudax/test/multi_gpu/CMakeLists.txt b/cudax/test/multi_gpu/CMakeLists.txt new file mode 100644 index 00000000000..8fff0626dff --- /dev/null +++ b/cudax/test/multi_gpu/CMakeLists.txt @@ -0,0 +1,11 @@ +#===----------------------------------------------------------------------===## +# +# Part of CUDA Experimental in CUDA C++ Core Libraries, +# under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +# +#===----------------------------------------------------------------------===## + +add_subdirectory(concepts) diff --git a/cudax/test/multi_gpu/concepts/CMakeLists.txt b/cudax/test/multi_gpu/concepts/CMakeLists.txt new file mode 100644 index 00000000000..dd2aa696722 --- /dev/null +++ b/cudax/test/multi_gpu/concepts/CMakeLists.txt @@ -0,0 +1,38 @@ +#===----------------------------------------------------------------------===## +# +# Part of CUDA Experimental in CUDA C++ Core Libraries, +# under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +# +#===----------------------------------------------------------------------===## + +set( + multi_gpu_concepts_test_sources + communicator.cu + has_all_gather.cu + has_all_reduce.cu + has_all_to_all.cu + has_all_to_all_v.cu + has_broadcast.cu + has_gather.cu + has_gather_v.cu + has_recv.cu + has_reduce.cu + has_send.cu +) + +function(cudax_add_multi_gpu_concepts_test target_name_var source) + cmake_path(GET source STEM filename) + set(test_target "cudax.test.multi_gpu.concepts.${filename}") + + cccl_add_executable(${test_target} SOURCES "${source}" ADD_CTEST) + target_link_libraries(${test_target} PRIVATE cudax.compiler_interface) + + set(${target_name_var} ${test_target} PARENT_SCOPE) +endfunction() + +foreach (source IN LISTS multi_gpu_concepts_test_sources) + cudax_add_multi_gpu_concepts_test(test_target "${source}") +endforeach() diff --git a/cudax/test/multi_gpu/concepts/collective_concepts_common.cuh b/cudax/test/multi_gpu/concepts/collective_concepts_common.cuh new file mode 100644 index 00000000000..bded0d82c7d --- /dev/null +++ b/cudax/test/multi_gpu/concepts/collective_concepts_common.cuh @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX_TEST_MULTI_GPU_COLLECTIVE_CONCEPTS_COMMON_CUH +#define _CUDAX_TEST_MULTI_GPU_COLLECTIVE_CONCEPTS_COMMON_CUH + +#include +#include +#include + +#include "concepts_common.cuh" + +namespace cudax_multi_gpu_concepts +{ +struct collective_communicator_model : communicator_model +{ + template + void reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::std::int32_t, ::cuda::stream_ref); + + template + void all_reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::stream_ref); + + template + void gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); + + template + void gather_v( + group_guard_type&, + Tp*, + ::cuda::std::size_t, + Tp*, + const ::cuda::std::size_t*, + const ::cuda::std::size_t*, + ::cuda::std::int32_t, + ::cuda::stream_ref); + + template + void all_gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref); + + template + void broadcast(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); + + template + void all_to_all(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref); + + template + void all_to_all_v( + group_guard_type&, + Tp*, + const ::cuda::std::size_t*, + const ::cuda::std::size_t*, + Tp*, + const ::cuda::std::size_t*, + const ::cuda::std::size_t*, + ::cuda::stream_ref); +}; +} // namespace cudax_multi_gpu_concepts + +#endif // _CUDAX_TEST_MULTI_GPU_COLLECTIVE_CONCEPTS_COMMON_CUH diff --git a/cudax/test/multi_gpu/concepts/communicator.cu b/cudax/test/multi_gpu/concepts/communicator.cu new file mode 100644 index 00000000000..632c2bb99da --- /dev/null +++ b/cudax/test/multi_gpu/concepts/communicator.cu @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +#include "concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct no_send : types::basic_communicator_model +{ + template + void recv(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +struct no_recv : types::basic_communicator_model +{ + template + void send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__communicator); + static_assert(!cudax::__communicator); + static_assert(!cudax::__communicator); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/concepts_common.cuh b/cudax/test/multi_gpu/concepts/concepts_common.cuh new file mode 100644 index 00000000000..5fbac3b794d --- /dev/null +++ b/cudax/test/multi_gpu/concepts/concepts_common.cuh @@ -0,0 +1,43 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX_TEST_MULTI_GPU_CONCEPTS_COMMON_CUH +#define _CUDAX_TEST_MULTI_GPU_CONCEPTS_COMMON_CUH + +#include +#include +#include + +namespace cudax_multi_gpu_concepts +{ +struct group_guard +{}; + +struct basic_communicator_model +{ + using native_handle_type = int; + using group_guard_type = group_guard; + + native_handle_type native_handle() noexcept; + ::cuda::std::int32_t rank() noexcept; + ::cuda::std::int32_t size() noexcept; + group_guard_type group_guard(); +}; + +struct communicator_model : basic_communicator_model +{ + template + void send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); + template + void recv(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; +} // namespace cudax_multi_gpu_concepts + +#endif // _CUDAX_TEST_MULTI_GPU_CONCEPTS_COMMON_CUH diff --git a/cudax/test/multi_gpu/concepts/has_all_gather.cu b/cudax/test/multi_gpu/concepts/has_all_gather.cu new file mode 100644 index 00000000000..3773d3dfb0c --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_all_gather.cu @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct all_gather_returns_int : types::communicator_model +{ + template + int all_gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_all_gather); + static_assert(cudax::__has_all_gather); + static_assert(!cudax::__has_all_gather); + + static_assert(!cudax::__has_all_gather); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_all_reduce.cu b/cudax/test/multi_gpu/concepts/has_all_reduce.cu new file mode 100644 index 00000000000..e007ef5d0c7 --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_all_reduce.cu @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct all_reduce_rejects_void : types::communicator_model +{ + template , int> = 0> + void all_reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::stream_ref); +}; + +struct all_reduce_returns_int : types::communicator_model +{ + template + int all_reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_all_reduce); + static_assert(cudax::__has_all_reduce); + static_assert(!cudax::__has_all_reduce); + + static_assert(!cudax::__has_all_reduce); + static_assert(cudax::__has_all_reduce); + static_assert(!cudax::__has_all_reduce); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_all_to_all.cu b/cudax/test/multi_gpu/concepts/has_all_to_all.cu new file mode 100644 index 00000000000..78a3f4f29e7 --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_all_to_all.cu @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct all_to_all_returns_int : types::communicator_model +{ + template + int all_to_all(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_all_to_all); + static_assert(cudax::__has_all_to_all); + static_assert(!cudax::__has_all_to_all); + + static_assert(!cudax::__has_all_to_all); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_all_to_all_v.cu b/cudax/test/multi_gpu/concepts/has_all_to_all_v.cu new file mode 100644 index 00000000000..366dcfba042 --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_all_to_all_v.cu @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct all_to_all_v_returns_int : types::communicator_model +{ + template + int all_to_all_v( + group_guard_type&, + Tp*, + const ::cuda::std::size_t*, + const ::cuda::std::size_t*, + Tp*, + const ::cuda::std::size_t*, + const ::cuda::std::size_t*, + ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_all_to_all_v); + static_assert(cudax::__has_all_to_all_v); + static_assert(!cudax::__has_all_to_all_v); + + static_assert(!cudax::__has_all_to_all_v); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_broadcast.cu b/cudax/test/multi_gpu/concepts/has_broadcast.cu new file mode 100644 index 00000000000..9cfa38efe2b --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_broadcast.cu @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct broadcast_returns_int : types::communicator_model +{ + template + int broadcast(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_broadcast); + static_assert(cudax::__has_broadcast); + static_assert(!cudax::__has_broadcast); + + static_assert(!cudax::__has_broadcast); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_gather.cu b/cudax/test/multi_gpu/concepts/has_gather.cu new file mode 100644 index 00000000000..fe7fab4e2b5 --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_gather.cu @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct gather_returns_int : types::communicator_model +{ + template + int gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_gather); + static_assert(cudax::__has_gather); + static_assert(!cudax::__has_gather); + + static_assert(!cudax::__has_gather); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_gather_v.cu b/cudax/test/multi_gpu/concepts/has_gather_v.cu new file mode 100644 index 00000000000..aac88772771 --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_gather_v.cu @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct gather_v_returns_int : types::communicator_model +{ + template + int gather_v(group_guard_type&, + Tp*, + ::cuda::std::size_t, + Tp*, + const ::cuda::std::size_t*, + const ::cuda::std::size_t*, + ::cuda::std::int32_t, + ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_gather_v); + static_assert(cudax::__has_gather_v); + static_assert(!cudax::__has_gather_v); + + static_assert(!cudax::__has_gather_v); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_recv.cu b/cudax/test/multi_gpu/concepts/has_recv.cu new file mode 100644 index 00000000000..c9dc33455eb --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_recv.cu @@ -0,0 +1,57 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include + +#include "concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct recv_returns_int : types::basic_communicator_model +{ + template + int recv(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +struct no_recv : types::basic_communicator_model +{ + template + void send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_recv); + static_assert(cudax::__has_recv); + static_assert(!cudax::__has_recv); + + static_assert(!cudax::__has_recv); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_reduce.cu b/cudax/test/multi_gpu/concepts/has_reduce.cu new file mode 100644 index 00000000000..da2447f119d --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_reduce.cu @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include + +#include "collective_concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct reduce_returns_int : types::communicator_model +{ + template + int reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_reduce); + static_assert(cudax::__has_reduce); + static_assert(!cudax::__has_reduce); + + static_assert(!cudax::__has_reduce); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +} diff --git a/cudax/test/multi_gpu/concepts/has_send.cu b/cudax/test/multi_gpu/concepts/has_send.cu new file mode 100644 index 00000000000..6fdb8ef7cac --- /dev/null +++ b/cudax/test/multi_gpu/concepts/has_send.cu @@ -0,0 +1,57 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include + +#include "concepts_common.cuh" + +namespace +{ +namespace cudax = ::cuda::experimental; +namespace types = cudax_multi_gpu_concepts; + +// nvcc ignores [[maybe_unused]] entirely +_CCCL_BEGIN_NV_DIAG_SUPPRESS(177) + +struct no_send : types::basic_communicator_model +{ + template + void send_sync(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t); +}; + +struct send_returns_int : no_send +{ + template + int send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref); +}; + +_CCCL_END_NV_DIAG_SUPPRESS() + +_CCCL_HOST_DEVICE_API constexpr bool test() +{ + static_assert(cudax::__has_send); + static_assert(cudax::__has_send); + + static_assert(!cudax::__has_send); + static_assert(!cudax::__has_send); + return true; +} +} // namespace + +int main(int, char**) +{ + test(); + static_assert(test()); + return 0; +}