diff --git a/cudax/include/cuda/experimental/__multi_gpu/concepts.h b/cudax/include/cuda/experimental/__multi_gpu/concepts.h
new file mode 100644
index 00000000000..7bb09934431
--- /dev/null
+++ b/cudax/include/cuda/experimental/__multi_gpu/concepts.h
@@ -0,0 +1,222 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_EXPERIMENTAL___MULTI_GPU_CONCEPTS_H
+#define _CUDA_EXPERIMENTAL___MULTI_GPU_CONCEPTS_H
+
+#include <cuda/std/detail/__config> // IWYU pragma: export
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__concepts/concept_macros.h>
+#include <cuda/std/__concepts/convertible_to.h>
+#include <cuda/std/__concepts/same_as.h>
+#include <cuda/std/__functional/operations.h>
+#include <cuda/std/__utility/declval.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/std/__cccl/prologue.h>
+
+// NOLINTBEGIN(bugprone-reserved-identifier)
+namespace cuda::experimental
+{
+// Needed because the C++17 concept emulation can't handle the implicit first template
+// parameter of real concepts.
+template <class _Tp>
+_CCCL_CONCEPT __convertible_to_int32 = ::cuda::std::convertible_to<_Tp, ::cuda::std::int32_t>;
+
+// Requires communicator<_Comm> to be checked first, but that invokes a circular dependency
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_send = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __buf,
+  ::cuda::std::size_t __count,
+  ::cuda::std::int32_t __peer,
+  ::cuda::stream_ref __stream)(_Same_as(void) __comm.send(
+  ::cuda::std::declval<typename _Comm::group_guard_type&>(), __buf, __count, __peer, __stream));
+
+// Requires communicator<_Comm> to be checked first, but that invokes a circular dependency
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_recv = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __buf,
+  ::cuda::std::size_t __count,
+  ::cuda::std::int32_t __peer,
+  ::cuda::stream_ref __stream)(_Same_as(void) __comm.recv(
+  ::cuda::std::declval<typename _Comm::group_guard_type&>(), __buf, __count, __peer, __stream));
+
+template <class _Comm>
+_CCCL_CONCEPT __communicator = _CCCL_REQUIRES_EXPR((_Comm), _Comm& __comm)(
+  typename(typename _Comm::native_handle_type),
+  _Same_as(typename _Comm::native_handle_type) __comm.native_handle(),
+  noexcept(__comm.native_handle()),
+  _Satisfies(__convertible_to_int32) __comm.rank(),
+  _Satisfies(__convertible_to_int32) __comm.size(),
+  typename(typename _Comm::group_guard_type),
+  _Same_as(typename _Comm::group_guard_type) __comm.group_guard(),
+  requires(__has_send<_Comm>),
+  requires(__has_recv<_Comm>) //
+);
+
+// ==========================================================================================
+
+// Use a typed pointer as default here, since the op may need to instantiated with a
+// dereferenceable pointer type for reductions
+template <class _Comm, class _Ptr = int*>
+_CCCL_CONCEPT __has_reduce = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  _Ptr __recvbuff,
+  ::cuda::std::size_t __count,
+  ::cuda::std::plus<> __op,
+  ::cuda::std::int32_t __root,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.reduce(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(), __sendbuff, __recvbuff, __count, __op, __root, __stream));
+
+// ==========================================================================================
+
+// Use a typed pointer as default here, since the op may need to instantiated with a
+// dereferenceable pointer type for reductions
+template <class _Comm, class _Ptr = int*>
+_CCCL_CONCEPT __has_all_reduce = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  _Ptr __recvbuff,
+  ::cuda::std::size_t __count,
+  ::cuda::std::plus<> __op,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.all_reduce(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(), __sendbuff, __recvbuff, __count, __op, __stream));
+
+// ==========================================================================================
+
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_gather = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  _Ptr __recvbuff,
+  ::cuda::std::size_t __count,
+  ::cuda::std::int32_t __root,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.gather(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(), __sendbuff, __recvbuff, __count, __root, __stream));
+
+// ==========================================================================================
+
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_gather_v = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  ::cuda::std::size_t __send_count,
+  _Ptr __recvbuff,
+  const ::cuda::std::size_t* __recv_counts,
+  const ::cuda::std::size_t* __displs,
+  ::cuda::std::int32_t __root,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.gather_v(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(),
+    __sendbuff,
+    __send_count,
+    __recvbuff,
+    __recv_counts,
+    __displs,
+    __root,
+    __stream));
+
+// ==========================================================================================
+
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_all_gather = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  _Ptr __recvbuff,
+  ::cuda::std::size_t __count,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.all_gather(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(), __sendbuff, __recvbuff, __count, __stream));
+
+// ==========================================================================================
+
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_broadcast = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  _Ptr __recvbuff,
+  ::cuda::std::size_t __count,
+  ::cuda::std::int32_t __root,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.broadcast(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(), __sendbuff, __recvbuff, __count, __root, __stream));
+
+// ==========================================================================================
+
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_all_to_all = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  _Ptr __recvbuff,
+  ::cuda::std::size_t __count,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.all_to_all(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(), __sendbuff, __recvbuff, __count, __stream));
+
+// ==========================================================================================
+
+template <class _Comm, class _Ptr = void*>
+_CCCL_CONCEPT __has_all_to_all_v = _CCCL_REQUIRES_EXPR(
+  (_Comm, _Ptr),
+  _Comm& __comm,
+  _Ptr __sendbuff,
+  const ::cuda::std::size_t* __send_counts,
+  const ::cuda::std::size_t* __send_displs,
+  _Ptr __recvbuff,
+  const ::cuda::std::size_t* __recv_counts,
+  const ::cuda::std::size_t* __recv_displs,
+  ::cuda::stream_ref __stream)(
+  requires(__communicator<_Comm>),
+  _Same_as(void) __comm.all_to_all_v(
+    ::cuda::std::declval<typename _Comm::group_guard_type&>(),
+    __sendbuff,
+    __send_counts,
+    __send_displs,
+    __recvbuff,
+    __recv_counts,
+    __recv_displs,
+    __stream));
+} // namespace cuda::experimental
+// NOLINTEND(bugprone-reserved-identifier)
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_EXPERIMENTAL___MULTI_GPU_CONCEPTS_H
diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt
index 01bf161ac24..f4c8268a6e6 100644
--- a/cudax/test/CMakeLists.txt
+++ b/cudax/test/CMakeLists.txt
@@ -141,6 +141,8 @@ cudax_add_catch2_test(test_target algorithm
     algorithm/copy.cu
 )
 
+add_subdirectory(multi_gpu)
+
 cudax_add_catch2_test(test_target group.mapping.binary_partition
     group/mapping/binary_partition.cu
 )
diff --git a/cudax/test/multi_gpu/CMakeLists.txt b/cudax/test/multi_gpu/CMakeLists.txt
new file mode 100644
index 00000000000..8fff0626dff
--- /dev/null
+++ b/cudax/test/multi_gpu/CMakeLists.txt
@@ -0,0 +1,11 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of CUDA Experimental in CUDA C++ Core Libraries,
+# under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+#
+#===----------------------------------------------------------------------===##
+
+add_subdirectory(concepts)
diff --git a/cudax/test/multi_gpu/concepts/CMakeLists.txt b/cudax/test/multi_gpu/concepts/CMakeLists.txt
new file mode 100644
index 00000000000..dd2aa696722
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/CMakeLists.txt
@@ -0,0 +1,38 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of CUDA Experimental in CUDA C++ Core Libraries,
+# under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+#
+#===----------------------------------------------------------------------===##
+
+set(
+  multi_gpu_concepts_test_sources
+  communicator.cu
+  has_all_gather.cu
+  has_all_reduce.cu
+  has_all_to_all.cu
+  has_all_to_all_v.cu
+  has_broadcast.cu
+  has_gather.cu
+  has_gather_v.cu
+  has_recv.cu
+  has_reduce.cu
+  has_send.cu
+)
+
+function(cudax_add_multi_gpu_concepts_test target_name_var source)
+  cmake_path(GET source STEM filename)
+  set(test_target "cudax.test.multi_gpu.concepts.${filename}")
+
+  cccl_add_executable(${test_target} SOURCES "${source}" ADD_CTEST)
+  target_link_libraries(${test_target} PRIVATE cudax.compiler_interface)
+
+  set(${target_name_var} ${test_target} PARENT_SCOPE)
+endfunction()
+
+foreach (source IN LISTS multi_gpu_concepts_test_sources)
+  cudax_add_multi_gpu_concepts_test(test_target "${source}")
+endforeach()
diff --git a/cudax/test/multi_gpu/concepts/collective_concepts_common.cuh b/cudax/test/multi_gpu/concepts/collective_concepts_common.cuh
new file mode 100644
index 00000000000..bded0d82c7d
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/collective_concepts_common.cuh
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDAX_TEST_MULTI_GPU_COLLECTIVE_CONCEPTS_COMMON_CUH
+#define _CUDAX_TEST_MULTI_GPU_COLLECTIVE_CONCEPTS_COMMON_CUH
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+#include "concepts_common.cuh"
+
+namespace cudax_multi_gpu_concepts
+{
+struct collective_communicator_model : communicator_model
+{
+  template <class Tp, class Op>
+  void reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::std::int32_t, ::cuda::stream_ref);
+
+  template <class Tp, class Op>
+  void all_reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::stream_ref);
+
+  template <class Tp>
+  void gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+
+  template <class Tp>
+  void gather_v(
+    group_guard_type&,
+    Tp*,
+    ::cuda::std::size_t,
+    Tp*,
+    const ::cuda::std::size_t*,
+    const ::cuda::std::size_t*,
+    ::cuda::std::int32_t,
+    ::cuda::stream_ref);
+
+  template <class Tp>
+  void all_gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref);
+
+  template <class Tp>
+  void broadcast(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+
+  template <class Tp>
+  void all_to_all(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref);
+
+  template <class Tp>
+  void all_to_all_v(
+    group_guard_type&,
+    Tp*,
+    const ::cuda::std::size_t*,
+    const ::cuda::std::size_t*,
+    Tp*,
+    const ::cuda::std::size_t*,
+    const ::cuda::std::size_t*,
+    ::cuda::stream_ref);
+};
+} // namespace cudax_multi_gpu_concepts
+
+#endif // _CUDAX_TEST_MULTI_GPU_COLLECTIVE_CONCEPTS_COMMON_CUH
diff --git a/cudax/test/multi_gpu/concepts/communicator.cu b/cudax/test/multi_gpu/concepts/communicator.cu
new file mode 100644
index 00000000000..632c2bb99da
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/communicator.cu
@@ -0,0 +1,51 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct no_send : types::basic_communicator_model
+{
+  template <class Tp>
+  void recv(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+struct no_recv : types::basic_communicator_model
+{
+  template <class Tp>
+  void send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__communicator<types::communicator_model>);
+  static_assert(!cudax::__communicator<no_send>);
+  static_assert(!cudax::__communicator<no_recv>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/concepts_common.cuh b/cudax/test/multi_gpu/concepts/concepts_common.cuh
new file mode 100644
index 00000000000..5fbac3b794d
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/concepts_common.cuh
@@ -0,0 +1,43 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDAX_TEST_MULTI_GPU_CONCEPTS_COMMON_CUH
+#define _CUDAX_TEST_MULTI_GPU_CONCEPTS_COMMON_CUH
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+namespace cudax_multi_gpu_concepts
+{
+struct group_guard
+{};
+
+struct basic_communicator_model
+{
+  using native_handle_type = int;
+  using group_guard_type   = group_guard;
+
+  native_handle_type native_handle() noexcept;
+  ::cuda::std::int32_t rank() noexcept;
+  ::cuda::std::int32_t size() noexcept;
+  group_guard_type group_guard();
+};
+
+struct communicator_model : basic_communicator_model
+{
+  template <class Tp>
+  void send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+  template <class Tp>
+  void recv(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+} // namespace cudax_multi_gpu_concepts
+
+#endif // _CUDAX_TEST_MULTI_GPU_CONCEPTS_COMMON_CUH
diff --git a/cudax/test/multi_gpu/concepts/has_all_gather.cu b/cudax/test/multi_gpu/concepts/has_all_gather.cu
new file mode 100644
index 00000000000..3773d3dfb0c
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_all_gather.cu
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct all_gather_returns_int : types::communicator_model
+{
+  template <class Tp>
+  int all_gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_all_gather<types::collective_communicator_model>);
+  static_assert(cudax::__has_all_gather<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_all_gather<types::communicator_model>);
+
+  static_assert(!cudax::__has_all_gather<all_gather_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_all_reduce.cu b/cudax/test/multi_gpu/concepts/has_all_reduce.cu
new file mode 100644
index 00000000000..e007ef5d0c7
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_all_reduce.cu
@@ -0,0 +1,60 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/__type_traits/enable_if.h>
+#include <cuda/std/__type_traits/is_void.h>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct all_reduce_rejects_void : types::communicator_model
+{
+  template <class Tp, class Op, ::cuda::std::enable_if_t<!::cuda::std::is_void_v<Tp>, int> = 0>
+  void all_reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::stream_ref);
+};
+
+struct all_reduce_returns_int : types::communicator_model
+{
+  template <class Tp, class Op>
+  int all_reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_all_reduce<types::collective_communicator_model>);
+  static_assert(cudax::__has_all_reduce<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_all_reduce<types::communicator_model>);
+
+  static_assert(!cudax::__has_all_reduce<all_reduce_returns_int>);
+  static_assert(cudax::__has_all_reduce<all_reduce_rejects_void, int*>);
+  static_assert(!cudax::__has_all_reduce<all_reduce_rejects_void, void*>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_all_to_all.cu b/cudax/test/multi_gpu/concepts/has_all_to_all.cu
new file mode 100644
index 00000000000..78a3f4f29e7
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_all_to_all.cu
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct all_to_all_returns_int : types::communicator_model
+{
+  template <class Tp>
+  int all_to_all(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_all_to_all<types::collective_communicator_model>);
+  static_assert(cudax::__has_all_to_all<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_all_to_all<types::communicator_model>);
+
+  static_assert(!cudax::__has_all_to_all<all_to_all_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_all_to_all_v.cu b/cudax/test/multi_gpu/concepts/has_all_to_all_v.cu
new file mode 100644
index 00000000000..366dcfba042
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_all_to_all_v.cu
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct all_to_all_v_returns_int : types::communicator_model
+{
+  template <class Tp>
+  int all_to_all_v(
+    group_guard_type&,
+    Tp*,
+    const ::cuda::std::size_t*,
+    const ::cuda::std::size_t*,
+    Tp*,
+    const ::cuda::std::size_t*,
+    const ::cuda::std::size_t*,
+    ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_all_to_all_v<types::collective_communicator_model>);
+  static_assert(cudax::__has_all_to_all_v<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_all_to_all_v<types::communicator_model>);
+
+  static_assert(!cudax::__has_all_to_all_v<all_to_all_v_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_broadcast.cu b/cudax/test/multi_gpu/concepts/has_broadcast.cu
new file mode 100644
index 00000000000..9cfa38efe2b
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_broadcast.cu
@@ -0,0 +1,51 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct broadcast_returns_int : types::communicator_model
+{
+  template <class Tp>
+  int broadcast(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_broadcast<types::collective_communicator_model>);
+  static_assert(cudax::__has_broadcast<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_broadcast<types::communicator_model>);
+
+  static_assert(!cudax::__has_broadcast<broadcast_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_gather.cu b/cudax/test/multi_gpu/concepts/has_gather.cu
new file mode 100644
index 00000000000..fe7fab4e2b5
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_gather.cu
@@ -0,0 +1,51 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct gather_returns_int : types::communicator_model
+{
+  template <class Tp>
+  int gather(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_gather<types::collective_communicator_model>);
+  static_assert(cudax::__has_gather<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_gather<types::communicator_model>);
+
+  static_assert(!cudax::__has_gather<gather_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_gather_v.cu b/cudax/test/multi_gpu/concepts/has_gather_v.cu
new file mode 100644
index 00000000000..aac88772771
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_gather_v.cu
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct gather_v_returns_int : types::communicator_model
+{
+  template <class Tp>
+  int gather_v(group_guard_type&,
+               Tp*,
+               ::cuda::std::size_t,
+               Tp*,
+               const ::cuda::std::size_t*,
+               const ::cuda::std::size_t*,
+               ::cuda::std::int32_t,
+               ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_gather_v<types::collective_communicator_model>);
+  static_assert(cudax::__has_gather_v<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_gather_v<types::communicator_model>);
+
+  static_assert(!cudax::__has_gather_v<gather_v_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_recv.cu b/cudax/test/multi_gpu/concepts/has_recv.cu
new file mode 100644
index 00000000000..c9dc33455eb
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_recv.cu
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct recv_returns_int : types::basic_communicator_model
+{
+  template <class Tp>
+  int recv(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+struct no_recv : types::basic_communicator_model
+{
+  template <class Tp>
+  void send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_recv<types::communicator_model>);
+  static_assert(cudax::__has_recv<types::communicator_model, long*>);
+  static_assert(!cudax::__has_recv<no_recv>);
+
+  static_assert(!cudax::__has_recv<recv_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_reduce.cu b/cudax/test/multi_gpu/concepts/has_reduce.cu
new file mode 100644
index 00000000000..da2447f119d
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_reduce.cu
@@ -0,0 +1,51 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "collective_concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct reduce_returns_int : types::communicator_model
+{
+  template <class Tp, class Op>
+  int reduce(group_guard_type&, Tp*, Tp*, ::cuda::std::size_t, Op, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_reduce<types::collective_communicator_model>);
+  static_assert(cudax::__has_reduce<types::collective_communicator_model, long*>);
+  static_assert(!cudax::__has_reduce<types::communicator_model>);
+
+  static_assert(!cudax::__has_reduce<reduce_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}
diff --git a/cudax/test/multi_gpu/concepts/has_send.cu b/cudax/test/multi_gpu/concepts/has_send.cu
new file mode 100644
index 00000000000..6fdb8ef7cac
--- /dev/null
+++ b/cudax/test/multi_gpu/concepts/has_send.cu
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__stream/stream_ref.h>
+#include <cuda/std/__cstddef/types.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/experimental/__multi_gpu/concepts.h>
+
+#include "concepts_common.cuh"
+
+namespace
+{
+namespace cudax = ::cuda::experimental;
+namespace types = cudax_multi_gpu_concepts;
+
+// nvcc ignores [[maybe_unused]] entirely
+_CCCL_BEGIN_NV_DIAG_SUPPRESS(177)
+
+struct no_send : types::basic_communicator_model
+{
+  template <class Tp>
+  void send_sync(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t);
+};
+
+struct send_returns_int : no_send
+{
+  template <class Tp>
+  int send(group_guard_type&, Tp*, ::cuda::std::size_t, ::cuda::std::int32_t, ::cuda::stream_ref);
+};
+
+_CCCL_END_NV_DIAG_SUPPRESS()
+
+_CCCL_HOST_DEVICE_API constexpr bool test()
+{
+  static_assert(cudax::__has_send<types::communicator_model>);
+  static_assert(cudax::__has_send<types::communicator_model, long*>);
+
+  static_assert(!cudax::__has_send<no_send>);
+  static_assert(!cudax::__has_send<send_returns_int>);
+  return true;
+}
+} // namespace
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+  return 0;
+}