diff --git a/.clang-tidy b/.clang-tidy index 45d7f1ec0..32d9dd2d0 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -3,6 +3,7 @@ WarningsAsErrors: '*' HeaderFilterRegex: '.' FormatStyle: file +TransformLambdas: none CheckOptions: - { key: readability-identifier-naming.ClassCase , value: lower_case } diff --git a/.gitlab-ci-correaa.yml b/.gitlab-ci-correaa.yml index 53c16b5b0..fa29359c2 100644 --- a/.gitlab-ci-correaa.yml +++ b/.gitlab-ci-correaa.yml @@ -283,16 +283,19 @@ clang++-18 mull-18: # mull-18 crashes - large-memory-space - non-shared interruptible: true - allow_failure: false + allow_failure: true script: - apt-get -qq update - - apt-get -qq install --no-install-recommends -y --quiet gnupg curl wget ca-certificates clang-18 cmake libclang-rt-18-dev make libboost-timer-dev libboost-serialization-dev > /dev/null + - apt-get -qq install --no-install-recommends -y --quiet gnupg curl wget ca-certificates clang-18 cmake file libclang-rt-18-dev make libboost-timer-dev libboost-serialization-dev > /dev/null - curl -1sLf 'https://dl.cloudsmith.io/public/mull-project/mull-stable/setup.deb.sh' | bash - apt-get -qq update - apt search mull - apt-get -qq install --no-install-recommends -y --quiet mull-18 libclang-cpp18 - mkdir build && cd build - clang++-18 --version + - which mull-runner-18 + - ls -all `which mull-runner-18` + - file `which mull-runner-18` - mull-runner-18 --version - CXX=clang++-18 cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-O3 -DNDEBUG -fpass-plugin=/usr/lib/mull-ir-frontend-18 -g -grecord-command-line" - cmake --build . --parallel 8 || cmake --build . --parallel 1 --verbose diff --git a/include/boost/multi/adaptors/cufft.hpp b/include/boost/multi/adaptors/cufft.hpp index b3ac00327..f696878fc 100644 --- a/include/boost/multi/adaptors/cufft.hpp +++ b/include/boost/multi/adaptors/cufft.hpp @@ -58,7 +58,7 @@ static auto cuda_get_error_enum(cufftResult error) -> char const* { } #define cufftSafeCall(err) implcufftSafeCall(err, __FILE__, __LINE__) -inline void implcufftSafeCall(cufftResult err, const char* file, const int line) { +inline void implcufftSafeCall(cufftResult err, char const* file, int const line) { if(CUFFT_SUCCESS != err) { std::cerr << "CUFFT error in file " << file << ", line " << line << "\nerror " << err << ": " << cuda_get_error_enum(err) << "\n"; // fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n", __FILE__, __LINE__, err, @@ -97,7 +97,7 @@ class plan { Alloc alloc_; ::size_t workSize_ = 0; void* workArea_{}; - cufftHandle h_{}; // TODO(correaa) put this in a unique_ptr + std::array hs_{}; // TODO(correaa) put this in a unique_ptr std::array, DD + 1> which_iodims_{}; int first_howmany_{}; @@ -120,7 +120,7 @@ class plan { : alloc_{std::move(other.alloc_)}, workSize_{std::exchange(other.workSize_, {})}, workArea_{std::exchange(other.workArea_, {})}, - h_{std::exchange(other.h_, {})}, + hs_{std::exchange(other.hs_, {})}, which_iodims_{std::exchange(other.which_iodims_, {})}, first_howmany_{std::exchange(other.first_howmany_, {})} { // other.used_ = true; // moved-from object cannot be used @@ -141,26 +141,30 @@ class plan { auto const istride_tuple = in.strides(); auto const ostride_tuple = out.strides(); - using boost::multi::detail::get; - auto which_iodims = std::apply([](auto... elems) { - return std::array, sizeof...(elems) + 1>{ - // TODO(correaa) added one element to avoid problem with gcc 13 static analysis (out-of-bounds) - std::pair{ - get<0>(elems), - cufft_iodim64{get<1>(elems), get<2>(elems), get<3>(elems)} - } - ..., - std::pair{} - }; - }, - boost::multi::detail::tuple_zip(which, sizes_tuple, istride_tuple, ostride_tuple)); + using std::get; // boost::multi::detail::get; + auto which_iodims = std::apply( + [](auto... elems) { + return std::array /*, sizeof...(elems) + 1>*/ { + // TODO(correaa) added one element to avoid problem with gcc 13 static analysis (out-of-bounds) + std::pair /**/ { + get<0>(elems), + cufft_iodim64{get<1>(elems), get<2>(elems), get<3>(elems)} + } + ..., + std::pair{} + }; + }, + boost::multi::detail::tuple_zip(which, sizes_tuple, istride_tuple, ostride_tuple) + ); - std::stable_sort(which_iodims.begin(), which_iodims.end() - 1, [](auto const& alpha, auto const& omega) { return get<1>(alpha).is > get<1>(omega).is; }); + auto const part = std::stable_partition(which_iodims.begin(), which_iodims.end() - 1, [](auto elem) { return get<0>(elem); }); - auto const part = std::stable_partition(which_iodims.begin(), which_iodims.end() - 1, [](auto elem) { return std::get<0>(elem); }); + std::stable_sort(which_iodims.begin(), part, [](auto const& alpha, auto const& omega) { return get<1>(alpha).os > get<1>(omega).os; }); + std::stable_sort(part, which_iodims.end() - 1, [](auto const& alpha, auto const& omega) { return get<1>(alpha).os > get<1>(omega).os; }); std::array dims{}; - auto const dims_end = std::transform(which_iodims.begin(), part, dims.begin(), [](auto elem) { return elem.second; }); + + auto const dims_end = std::transform(which_iodims.begin(), part, dims.begin(), [](auto elem) { return elem.second; }); // std::array howmany_dims{}; // auto const howmany_dims_end = std::transform(part, which_iodims.end() -1, howmany_dims.begin(), [](auto elem) {return elem.second;}); @@ -189,6 +193,8 @@ class plan { assert(ostrides[idx - 1] >= ostrides[idx]); assert(ostrides[idx - 1] % ostrides[idx] == 0); onembed[idx] = ostrides[idx - 1] / ostrides[idx]; + + assert(istrides[idx - 1] >= istrides[idx]); assert(istrides[idx - 1] % istrides[idx] == 0); inembed[idx] = istrides[idx - 1] / istrides[idx]; } @@ -217,7 +223,7 @@ class plan { if constexpr(std::is_same_v) { assert(dims_end - dims.begin() < 4); // cufft cannot do 4D FFT cufftSafeCall(::cufftPlanMany( - /*cufftHandle *plan*/ &h_, + /*cufftHandle *plan*/ &hs_[0], /*int rank*/ dims_end - dims.begin(), /*int *n*/ ion.data(), /*int *inembed*/ inembed.data(), @@ -230,10 +236,10 @@ class plan { /*int batch*/ 1 // BATCH )); } else { - cufftSafeCall(cufftCreate(&h_)); - cufftSafeCall(cufftSetAutoAllocation(h_, false)); + cufftSafeCall(cufftCreate(&hs_[0])); + cufftSafeCall(cufftSetAutoAllocation(hs_[0], false)); cufftSafeCall(cufftMakePlanMany( - /*cufftHandle *plan*/ h_, + /*cufftHandle *plan*/ hs_[0], /*int rank*/ dims_end - dims.begin(), /*int *n*/ ion.data(), /*int *inembed*/ inembed.data(), @@ -246,14 +252,14 @@ class plan { /*int batch*/ 1, // BATCH /*size_t **/ &workSize_ )); - cufftSafeCall(cufftGetSize(h_, &workSize_)); + cufftSafeCall(cufftGetSize(hs_[0], &workSize_)); workArea_ = ::thrust::raw_pointer_cast(alloc_.allocate(workSize_)); static_assert(sizeof(Alloc) == 1000); // auto s = cudaMalloc(&workArea_, workSize_); // if(s != cudaSuccess) {throw std::runtime_error{"L212"};} - cufftSafeCall(cufftSetWorkArea(h_, workArea_)); + cufftSafeCall(cufftSetWorkArea(hs_[0], workArea_)); } - if(!h_) { + if(!hs_[0]) { throw std::runtime_error{"cufftPlanMany null"}; } return; @@ -264,7 +270,7 @@ class plan { if(first_howmany_ <= D - 1) { if constexpr(std::is_same_v) { // NOLINT(bugprone-branch-clone) workaround bug in DeepSource cufftSafeCall(::cufftPlanMany( - /*cufftHandle *plan*/ &h_, + /*cufftHandle *plan*/ &hs_[0], /*int rank*/ dims_end - dims.begin(), /*int *n*/ ion.data(), /*int *inembed*/ inembed.data(), @@ -277,10 +283,10 @@ class plan { /*int batch*/ which_iodims_[first_howmany_].second.n )); } else { - cufftSafeCall(cufftCreate(&h_)); - cufftSafeCall(cufftSetAutoAllocation(h_, false)); + cufftSafeCall(cufftCreate(&hs_[0])); + cufftSafeCall(cufftSetAutoAllocation(hs_[0], false)); cufftSafeCall(cufftMakePlanMany( - /*cufftHandle *plan*/ h_, + /*cufftHandle *plan*/ hs_[0], /*int rank*/ dims_end - dims.begin(), /*int *n*/ ion.data(), /*int *inembed*/ inembed.data(), @@ -293,11 +299,11 @@ class plan { /*int batch*/ which_iodims_[first_howmany_].second.n, /*size_t **/ &workSize_ )); - cufftSafeCall(cufftGetSize(h_, &workSize_)); + cufftSafeCall(cufftGetSize(hs_[0], &workSize_)); workArea_ = ::thrust::raw_pointer_cast(alloc_.allocate(workSize_)); - cufftSafeCall(cufftSetWorkArea(h_, workArea_)); + cufftSafeCall(cufftSetWorkArea(hs_[0], workArea_)); } - if(!h_) { + if(!hs_[0]) { throw std::runtime_error{"cufftPlanMany null"}; } ++first_howmany_; @@ -310,7 +316,7 @@ class plan { template void ExecZ2Z_(complex_type const* idata, complex_type* odata, int direction) const { // used_ = true; - cufftSafeCall(cufftExecZ2Z(h_, const_cast(idata), odata, direction)); // NOLINT(cppcoreguidelines-pro-type-const-cast) wrap legacy interface + cufftSafeCall(cufftExecZ2Z(hs_[0], const_cast(idata), odata, direction)); // NOLINT(cppcoreguidelines-pro-type-const-cast) wrap legacy interface // cudaDeviceSynchronize(); } @@ -333,7 +339,7 @@ class plan { for(int idx = 0; idx != which_iodims_[first_howmany_].second.n; ++idx) { // NOLINT(altera-unroll-loops,altera-id-dependent-backward-branch) cufftExecZ2Z( - h_, + hs_[0], const_cast(reinterpret_cast(::thrust::raw_pointer_cast(idata + idx * which_iodims_[first_howmany_].second.is))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) legacy interface reinterpret_cast(::thrust::raw_pointer_cast(odata + idx * which_iodims_[first_howmany_].second.os)), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) legacy interface direction @@ -354,7 +360,7 @@ class plan { for(int idx = 0; idx != which_iodims_[first_howmany_].second.n; ++idx) { // NOLINT(altera-unroll-loops,altera-unroll-loops,altera-id-dependent-backward-branch) TODO(correaa) use an algorithm for(int jdx = 0; jdx != which_iodims_[first_howmany_ + 1].second.n; ++jdx) { // NOLINT(altera-unroll-loops,altera-unroll-loops,altera-id-dependent-backward-branch) TODO(correaa) use an algorithm cufftExecZ2Z( - h_, + hs_[0], const_cast(reinterpret_cast(::thrust::raw_pointer_cast(idata + idx * which_iodims_[first_howmany_].second.is + jdx * which_iodims_[first_howmany_ + 1].second.is))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) legacy interface reinterpret_cast(::thrust::raw_pointer_cast(odata + idx * which_iodims_[first_howmany_].second.os + jdx * which_iodims_[first_howmany_ + 1].second.os)), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) legacy interface direction @@ -397,13 +403,9 @@ class plan { alloc_.deallocate(typename std::allocator_traits::pointer(reinterpret_cast(workArea_)), workSize_); } // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) legacy interface } - if(h_ != 0) { - cufftSafeCall(cufftDestroy(h_)); + if(hs_[0] != 0) { + cufftSafeCall(cufftDestroy(hs_[0])); } - // if(!used_) { - // std::cerr <<"Warning: cufft plan was never used\n"; - // std::terminate(); - // } } using size_type = int; diff --git a/include/boost/multi/adaptors/cufft/test/CMakeLists.txt b/include/boost/multi/adaptors/cufft/test/CMakeLists.txt index 4d1f8b2ad..4bd8f3ef1 100644 --- a/include/boost/multi/adaptors/cufft/test/CMakeLists.txt +++ b/include/boost/multi/adaptors/cufft/test/CMakeLists.txt @@ -66,7 +66,7 @@ include(CTest) include_directories(${CMAKE_BINARY_DIR}) # file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) -set(TEST_SRCS cufft.cpp) +set(TEST_SRCS cases.cpp cufft.cpp) foreach(TEST_FILE ${TEST_SRCS}) set(TEST_EXE "${TEST_FILE}.x") diff --git a/include/boost/multi/adaptors/cufft/test/cases.cpp b/include/boost/multi/adaptors/cufft/test/cases.cpp new file mode 100644 index 000000000..5281505ef --- /dev/null +++ b/include/boost/multi/adaptors/cufft/test/cases.cpp @@ -0,0 +1,65 @@ +// Copyright 2020-2025 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +#include + +#if(!(defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_NVIDIA__))) && (!defined(__HIPCC__)) +#include +#else +#include +#endif + +#include +#include + +#include +#include + +#include + +namespace multi = boost::multi; + +using complex = thrust::complex; + +template<> +constexpr bool multi::force_element_trivial_default_construction> = true; + +auto main() -> int try { + complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) + + auto in_cpu = multi::array({20, 30, 50, 70}); + + std::generate( + in_cpu.elements().begin(), in_cpu.elements().end(), + [dist = std::normal_distribution<>{}, gen = std::mt19937(std::random_device{}())] () mutable { return dist(gen); } + ); + + auto const in_gpu = multi::thrust::cuda::array(in_cpu); + + auto const& in_cpu_view = in_cpu.transposed().rotated().transposed().unrotated().transposed(); + auto const& in_gpu_view = in_gpu.transposed().rotated().transposed().unrotated().transposed(); + + auto out_gpu = multi::thrust::cuda::array(in_gpu_view.extensions()); + auto out_cpu = multi::array(out_gpu.extensions()); + + assert(in_gpu_view.extensions() == out_gpu.extensions()); + + multi::fftw::dft_forward({true, true, true, false}, in_cpu_view, out_cpu); + + // multi::cufft::plan<4>({true, true, true, false}, in_gpu_view.layout(), out_gpu.layout()) + // .execute(in_gpu_view.base(), out_gpu.base(), multi::cufft::forward); + + // std::cout << out_cpu[2][3][4][5] << ' ' << out_gpu[2][3][4][5] << std::endl; + // BOOST_TEST( thrust::abs(out_cpu[2][3][4][5] - static_cast(out_gpu[2][3][4][5])) < 1e-6 ); + + return boost::report_errors(); +} catch(...) { + throw; + return 1; +} diff --git a/include/boost/multi/detail/layout.hpp b/include/boost/multi/detail/layout.hpp index 89f0769dc..b1e7cd972 100644 --- a/include/boost/multi/detail/layout.hpp +++ b/include/boost/multi/detail/layout.hpp @@ -104,6 +104,7 @@ class f_extensions_t { } public: + static constexpr dimensionality_type dimensionality = D; constexpr static dimensionality_type rank_v = D; using difference_type = typename extensions_t::difference_type; @@ -219,6 +220,7 @@ class f_extensions_t { }; constexpr auto elements() const { return elements_t{xs_.elements(), proj_}; } + constexpr auto num_elements() const { return xs_.num_elements(); } }; template diff --git a/test/extensions.cpp b/test/extensions.cpp index e64910f16..0f4bf1968 100644 --- a/test/extensions.cpp +++ b/test/extensions.cpp @@ -77,6 +77,8 @@ auto main() -> int { // NOLINT(bugprone-exception-escape,readability-function-c { auto x1d = multi::extensions_t<1>(3); + BOOST_TEST( multi::extensions_t<1>(3) == multi::extensions_t(3) ); + auto it = x1d.elements().begin(); BOOST_TEST( get<0>(*it) == 0 ); @@ -128,6 +130,8 @@ auto main() -> int { // NOLINT(bugprone-exception-escape,readability-function-c { multi::extensions_t<2> const x2d({4, 3}); + BOOST_TEST( multi::extensions_t<2>(4, 3) == multi::extensions_t(4, 3) ); + auto ll = [](auto xx, auto yy) { return xx + yy; }; @@ -375,6 +379,8 @@ auto main() -> int { // NOLINT(bugprone-exception-escape,readability-function-c { multi::extensions_t<3> const xs{3, 4, 5}; + BOOST_TEST(( multi::extensions_t<3>{3, 4, 5} == multi::extensions_t(3, 4, 5) )); + BOOST_TEST( xs.sub() == multi::extensions_t<2>(4, 5) ); static_assert(std::is_same_v::element>); } @@ -402,6 +408,7 @@ auto main() -> int { // NOLINT(bugprone-exception-escape,readability-function-c auto const& values = [](auto ii, auto jj) { return ii + jj; } ^ arr.extensions(); + BOOST_TEST( values.dimensionality == 2 ); BOOST_TEST( values.extensions() == arr.extensions() ); BOOST_TEST( *values.elements().begin() == 0 ); BOOST_TEST( values.elements().begin() < values.elements().end() );