diff --git a/CMakeLists.txt b/CMakeLists.txt index cfe1b2a5ef..33846d8a53 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,11 @@ set(SEQUANT_INSTALL_CMAKEDIR "lib/cmake/sequant" ############################ option(SEQUANT_BENCHMARKS "Enable SeQuant benchmarks" ${PROJECT_IS_TOP_LEVEL}) option(SEQUANT_UTILITIES "Enable SeQuant utility programs" ${PROJECT_IS_TOP_LEVEL}) +option(SEQUANT_EVAL_TESTS "OBSOLETE: use SEQUANT_TILEDARRAY and SEQUANT_BTAS" OFF) +option(SEQUANT_BTAS "Enable BTAS eval backend" ${SEQUANT_EVAL_TESTS}) +add_feature_info(SEQUANT_EVAL_BTAS SEQUANT_BTAS "Enable BTAS eval backend") +option(SEQUANT_TILEDARRAY "Enable TiledArray eval backend" ${SEQUANT_EVAL_TESTS}) +add_feature_info(SEQUANT_EVAL_TILEDARRAY SEQUANT_TILEDARRAY "Enable TiledArray eval backend") option(SEQUANT_IWYU "Whether to use the include-what-you-use tool (if found)" OFF) option(SEQUANT_WARNINGS_AS_ERRORS "Whether to treat compiler warnings as errors" ${PROJECT_IS_TOP_LEVEL}) @@ -260,9 +265,9 @@ set(SeQuant_src SeQuant/core/container.hpp SeQuant/core/context.cpp SeQuant/core/context.hpp - SeQuant/core/eval_expr.cpp - SeQuant/core/eval_expr.hpp - SeQuant/core/eval_node.hpp + SeQuant/core/eval/eval_expr.cpp + SeQuant/core/eval/eval_expr.hpp + SeQuant/core/eval/eval_node.hpp SeQuant/core/export/compute_selection.cpp SeQuant/core/export/compute_selection.hpp SeQuant/core/export/context.cpp @@ -405,20 +410,44 @@ set_source_files_properties( ) ### optional prereqs -if (SEQUANT_EVAL_TESTS) - include(FindOrFetchTiledArray) -endif (SEQUANT_EVAL_TESTS) - -if (TARGET tiledarray) +set(SEQUANT_HAS_EVAL OFF) # do not build SQ/eval unless there is a backend +if (SEQUANT_TILEDARRAY) + if (NOT TARGET tiledarray) + include(FindOrFetchTiledArray) + endif() + set(SEQUANT_HAS_EVAL ON) set(SEQUANT_HAS_TILEDARRAY ON) +endif () +if (SEQUANT_BTAS) + if (NOT TARGET BTAS::BTAS) + include(FindOrFetchBTAS) + endif() + set(SEQUANT_HAS_EVAL ON) + set(SEQUANT_HAS_BTAS ON) +endif () + +if (SEQUANT_HAS_EVAL) list(APPEND SeQuant_src SeQuant/core/eval/cache_manager.cpp SeQuant/core/eval/cache_manager.hpp SeQuant/core/eval/eval.hpp SeQuant/core/eval/result.cpp SeQuant/core/eval/result.hpp - SeQuant/core/eval/eval_fwd.hpp + SeQuant/core/eval/fwd.hpp ) + if (SEQUANT_HAS_TILEDARRAY) + list(APPEND SeQuant_src + SeQuant/core/eval/backends/tiledarray/eval_expr.hpp + SeQuant/core/eval/backends/tiledarray/result.hpp + SeQuant/core/eval/backends/tiledarray/result.cpp + ) + endif () + if (SEQUANT_HAS_BTAS) + list(APPEND SeQuant_src + SeQuant/core/eval/backends/btas/eval_expr.hpp + SeQuant/core/eval/backends/btas/result.hpp + ) + endif () endif () add_library(SeQuant @@ -451,9 +480,17 @@ if (Boost_IS_MODULARIZED) Boost::spirit ) endif() -if (TARGET tiledarray) +if (SEQUANT_HAS_EVAL) + target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_EVAL=1) + if (SEQUANT_HAS_TILEDARRAY) target_link_libraries(SeQuant PUBLIC tiledarray) -endif () + target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_TILEDARRAY=1) + endif () + if (SEQUANT_HAS_BTAS) + target_link_libraries(SeQuant PUBLIC BTAS::BTAS) + target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_BTAS=1) + endif () +endif() if (SEQUANT_HAS_EXECUTION_HEADER_STANDALONE OR SEQUANT_HAS_EXECUTION_HEADER_WITH_TBB) target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_EXECUTION_HEADER) if (SEQUANT_HAS_EXECUTION_HEADER_WITH_TBB) diff --git a/SeQuant/core/eval/backends/btas/eval_expr.hpp b/SeQuant/core/eval/backends/btas/eval_expr.hpp new file mode 100644 index 0000000000..ef16b38ec7 --- /dev/null +++ b/SeQuant/core/eval/backends/btas/eval_expr.hpp @@ -0,0 +1,70 @@ +#ifndef SEQUANT_EVAL_BACKENDS_BTAS_EVAL_EXPR_HPP +#define SEQUANT_EVAL_BACKENDS_BTAS_EVAL_EXPR_HPP + +#ifdef SEQUANT_HAS_BTAS + +#include + +#include +#include +#include + +#include + +namespace sequant { + +/// +/// \brief This class extends the EvalExpr class by adding an annot() method so +/// that it can be used to evaluate using BTAS. +/// +class EvalExprBTAS final : public EvalExpr { + public: + using annot_t = container::svector; + + /// + /// \param bk iterable of Index objects. + /// \return vector of long-type hash values + /// of the labels of indices in \c bk + /// + template + static auto index_hash(Iterable&& bk) { + return ranges::views::transform( + std::forward(bk), [](auto const& idx) { + // + // WARNING! + // The BTAS uses long for scalar indexing by default. + // Hence, here we explicitly cast the size_t values to long + // Which is a potentially narrowing conversion leading to + // integral overflow. Hence, the values in the returned + // container are mixed negative and positive integers (long type) + // + return static_cast(sequant::hash::value(Index{idx}.label())); + }); + } + + template >> + EvalExprBTAS(Args&&... args) : EvalExpr{std::forward(args)...} { + annot_ = index_hash(canon_indices()) | ranges::to; + } + + /// + /// \return Annotation (container::svector) for BTAS::Tensor. + /// + [[nodiscard]] inline annot_t const& annot() const noexcept { return annot_; } + + private: + annot_t annot_; +}; + +/// Type alias for BTAS evaluation nodes +using EvalNodeBTAS = EvalNode; + +static_assert(meta::eval_node); +static_assert(meta::can_evaluate); + +} // namespace sequant + +#endif // SEQUANT_HAS_BTAS + +#endif // SEQUANT_EVAL_BACKENDS_BTAS_EVAL_EXPR_HPP diff --git a/SeQuant/core/eval/backends/btas/result.hpp b/SeQuant/core/eval/backends/btas/result.hpp new file mode 100644 index 0000000000..9eec126c40 --- /dev/null +++ b/SeQuant/core/eval/backends/btas/result.hpp @@ -0,0 +1,217 @@ +#ifndef SEQUANT_EVAL_BACKENDS_BTAS_RESULT_HPP +#define SEQUANT_EVAL_BACKENDS_BTAS_RESULT_HPP + +#ifdef SEQUANT_HAS_BTAS + +#include + +#include + +namespace sequant { + +namespace { + +/// +/// \brief This function implements the symmetrization of btas::Tensor. +/// +/// \param arr The tensor to be symmetrized. +/// +/// \pre The rank of the tensor must be even. +/// +/// \return The symmetrized btas::Tensor. +/// +template +auto column_symmetrize_btas(btas::Tensor const& arr) { + using ranges::views::iota; + + size_t const rank = arr.rank(); + + if (rank % 2 != 0) + throw std::domain_error("This function only supports even-ranked tensors"); + + perm_t perm = iota(size_t{0}, rank) | ranges::to; + + auto const lannot = perm; + + auto result = btas::Tensor{arr.range()}; + result.fill(0); + + auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() { + btas::Tensor temp; + btas::permute(arr, lannot, temp, perm); + result += temp; + }; + + auto const nparticles = rank / 2; + symmetric_permutation(SymmetricParticleRange{perm.begin(), // + perm.begin() + nparticles, // + nparticles}, + call_back); + + return result; +} + +/// +/// \brief This function implements the antisymmetrization of btas::Tensor. +/// +/// \param arr The tensor to be antisymmetrized +/// +/// \param bra_rank The rank of the bra indices +/// +/// \return The antisymmetrized btas::Tensor. +/// +template +auto particle_antisymmetrize_btas(btas::Tensor const& arr, + size_t bra_rank) { + using ranges::views::concat; + using ranges::views::iota; + size_t const rank = arr.rank(); + SEQUANT_ASSERT(bra_rank <= rank); + size_t const ket_rank = rank - bra_rank; + + perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; + perm_t ket_perm = iota(bra_rank, rank) | ranges::to; + const auto lannot = iota(size_t{0}, rank) | ranges::to; + + auto process_permutations = [&lannot](const btas::Tensor& input_arr, + size_t range_rank, perm_t range_perm, + const perm_t& other_perm, bool is_bra) { + if (range_rank <= 1) return input_arr; + btas::Tensor result{input_arr.range()}; + + auto callback = [&](int parity) { + const auto annot = + is_bra ? concat(range_perm, other_perm) | ranges::to() + : concat(other_perm, range_perm) | ranges::to(); + + typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1; + btas::Tensor temp; + btas::permute(input_arr, lannot, temp, annot); + btas::scal(p_, temp); + result += temp; + }; + + antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank}, + callback); + return result; + }; + // Process bra permutations first + const auto ket_annot = ket_rank == 0 ? perm_t{} : ket_perm; + auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true); + + // Process ket permutations if needed + const auto bra_annot = bra_rank == 0 ? perm_t{} : bra_perm; + result = process_permutations(result, ket_rank, ket_perm, bra_annot, false); + + return result; +} + +} // namespace + +/// +/// \brief Result for a tensor value of btas::Tensor type. +/// \tparam T btas::Tensor type. Must be a specialization of btas::Tensor. +/// +template +class ResultTensorBTAS final : public Result { + public: + using Result::id_t; + using numeric_type = typename T::numeric_type; + + explicit ResultTensorBTAS(T arr) : Result{std::move(arr)} {} + + private: + // TODO make it same as that used by EvalExprBTAS class from eval.hpp file + using annot_t = container::svector; + using annot_wrap = Annot; + + [[nodiscard]] id_t type_id() const noexcept override { + return id_for_type>(); + } + + [[nodiscard]] ResultPtr sum( + Result const& other, + std::array const& annot) const override { + SEQUANT_ASSERT(other.is>()); + auto const a = annot_wrap{annot}; + + T lres, rres; + btas::permute(get(), a.lannot, lres, a.this_annot); + btas::permute(other.get(), a.rannot, rres, a.this_annot); + return eval_result>(lres + rres); + } + + [[nodiscard]] ResultPtr prod(Result const& other, + std::array const& annot, + DeNest /*DeNestFlag*/) const override { + auto const a = annot_wrap{annot}; + + if (other.is>()) { + T result; + btas::permute(get(), a.lannot, result, a.this_annot); + btas::scal(other.as>().value(), result); + return eval_result>(std::move(result)); + } + + SEQUANT_ASSERT(other.is>()); + + if (a.this_annot.empty()) { + T rres; + btas::permute(other.get(), a.rannot, rres, a.lannot); + return eval_result>(btas::dot(get(), rres)); + } + + T result; + btas::contract(numeric_type{1}, // + get(), a.lannot, // + other.get(), a.rannot, // + numeric_type{0}, // + result, a.this_annot); + return eval_result>(std::move(result)); + } + + [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override { + auto pre = get(); + btas::scal(numeric_type(factor), pre); + return eval_result>(std::move(pre)); + } + + [[nodiscard]] ResultPtr permute( + std::array const& ann) const override { + auto const pre_annot = std::any_cast(ann[0]); + auto const post_annot = std::any_cast(ann[1]); + T result; + btas::permute(get(), pre_annot, result, post_annot); + return eval_result>(std::move(result)); + } + + void add_inplace(Result const& other) override { + auto& t = get(); + auto const& o = other.get(); + SEQUANT_ASSERT(t.range() == o.range()); + t += o; + } + + [[nodiscard]] ResultPtr symmetrize() const override { + return eval_result>(column_symmetrize_btas(get())); + } + + [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override { + return eval_result>( + particle_antisymmetrize_btas(get(), bra_rank)); + } + + private: + [[nodiscard]] std::size_t size_in_bytes() const final { + static_assert(std::is_arithmetic_v); + const auto& tensor = get(); + // only count data + return tensor.range().volume() * sizeof(T); + } +}; + +} // namespace sequant + +#endif // SEQUANT_HAS_BTAS + +#endif // SEQUANT_EVAL_BACKENDS_BTAS_RESULT_HPP diff --git a/SeQuant/core/eval/backends/tiledarray/eval_expr.hpp b/SeQuant/core/eval/backends/tiledarray/eval_expr.hpp new file mode 100644 index 0000000000..30efcb5722 --- /dev/null +++ b/SeQuant/core/eval/backends/tiledarray/eval_expr.hpp @@ -0,0 +1,40 @@ +#ifndef SEQUANT_EVAL_BACKENDS_TILEDARRAY_EVAL_EXPR_HPP +#define SEQUANT_EVAL_BACKENDS_TILEDARRAY_EVAL_EXPR_HPP + +#ifdef SEQUANT_HAS_TILEDARRAY + +#include + +#include + +namespace sequant { + +/// +/// \brief This class extends the EvalExpr class by adding an annot() method so +/// that it can be used to evaluate using TiledArray. +/// +class EvalExprTA final : public EvalExpr { + public: + template >> + EvalExprTA(Args&&... args) : EvalExpr{std::forward(args)...} { + annot_ = indices_annot(); + } + + [[nodiscard]] inline auto const& annot() const noexcept { return annot_; } + + private: + std::string annot_; +}; + +/// Type alias for TiledArray evaluation nodes +using EvalNodeTA = EvalNode; + +static_assert(meta::eval_node); +static_assert(meta::can_evaluate); + +} // namespace sequant + +#endif // SEQUANT_HAS_TILEDARRAY + +#endif // SEQUANT_EVAL_BACKENDS_TILEDARRAY_EVAL_EXPR_HPP diff --git a/SeQuant/core/eval/backends/tiledarray/result.cpp b/SeQuant/core/eval/backends/tiledarray/result.cpp new file mode 100644 index 0000000000..14169d4d67 --- /dev/null +++ b/SeQuant/core/eval/backends/tiledarray/result.cpp @@ -0,0 +1,62 @@ +#include + +#ifdef SEQUANT_HAS_TILEDARRAY + +#include + +#include + +#include +#include +#include + +namespace sequant { + +void log_ta_tensor_host_memory_use([[maybe_unused]] madness::World& world, + [[maybe_unused]] std::string_view label) { +#if defined(TA_TENSOR_MEM_PROFILE) + auto logger = Logger::instance(); + if (logger.eval.level < 3) return; + std::vector hwsize(world.size(), 0); + std::vector currsize(world.size(), 0); + std::vector actsize(world.size(), 0); + hwsize[world.rank()] = + TA::hostEnv::instance()->host_allocator_getActualHighWatermark(); + currsize[world.rank()] = + TA::hostEnv::instance()->host_allocator().getCurrentSize(); + actsize[world.rank()] = + TA::hostEnv::instance()->host_allocator().getActualSize(); + world.gop.sum(hwsize.data(), hwsize.size()); + world.gop.sum(currsize.data(), currsize.size()); + world.gop.sum(actsize.data(), actsize.size()); + + std::ostringstream oss; + oss << label << ": TA_TENSOR_MEM_PROFILE allocation statistics (MiB):\n"; + oss << std::setw(5) << "rank" // + << std::setw(12) << "hw" // + << std::setw(12) << "cur" // + << std::setw(12) << "act" // + << '\n'; // + oss << "--------------------------------------------\n"; + std::uint64_t total = 0; + for (auto rank = 0; rank != world.size(); ++rank) { + oss << std::setw(5) << rank // + << std::setw(12) << hwsize[rank] / (1 << 20) // + << std::setw(12) << currsize[rank] / (1 << 20) // + << std::setw(12) << actsize[rank] / (1 << 20) // + << '\n'; + total += currsize[rank] / (1 << 20); + } + oss << std::setw(5) << "total" // + << std::setw(12) << "" // + << std::setw(12) << total // + << std::setw(12) << "" // + << '\n'; + oss << "--------------------------------------------" << std::endl; + write_log(logger, oss.str()); +#endif +} + +} // namespace sequant + +#endif // SEQUANT_HAS_TILEDARRAY diff --git a/SeQuant/core/eval/backends/tiledarray/result.hpp b/SeQuant/core/eval/backends/tiledarray/result.hpp new file mode 100644 index 0000000000..47cd27e775 --- /dev/null +++ b/SeQuant/core/eval/backends/tiledarray/result.hpp @@ -0,0 +1,435 @@ +#ifndef SEQUANT_EVAL_BACKENDS_TILEDARRAY_RESULT_HPP +#define SEQUANT_EVAL_BACKENDS_TILEDARRAY_RESULT_HPP + +#ifdef SEQUANT_HAS_TILEDARRAY + +#include + +#include +#include + +namespace sequant { + +namespace { + +/// +/// \brief This function implements the symmetrization of TA::DistArray. +/// +/// \param arr The array to be symmetrized +/// +/// \pre The rank of the array must be even +/// +/// \return The symmetrized TA::DistArray. +/// +template +auto column_symmetrize_ta(TA::DistArray const& arr) { + using ranges::views::iota; + + size_t const rank = arr.trange().rank(); + if (rank % 2 != 0) + throw std::domain_error("This function only supports even-ranked tensors"); + + TA::DistArray result; + + perm_t perm = iota(size_t{0}, rank) | ranges::to; + + auto const lannot = ords_to_annot(perm); + + auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() { + auto const rannot = ords_to_annot(perm); + if (result.is_initialized()) { + result(lannot) += arr(rannot); + } else { + result(lannot) = arr(rannot); + } + }; + + auto const nparticles = rank / 2; + symmetric_permutation(SymmetricParticleRange{perm.begin(), // + perm.begin() + nparticles, // + nparticles}, + call_back); + + TA::DistArray::wait_for_lazy_cleanup(result.world()); + + return result; +} + +/// +/// \brief This function implements the antisymmetrization of TA::DistArray. +/// +/// \param arr The array to be antisymmetrized. +/// +/// \param bra_rank The rank of the bra indices +/// +/// \return The antisymmetrized TA::DistArray. +/// +template +auto particle_antisymmetrize_ta(TA::DistArray const& arr, + size_t bra_rank) { + using ranges::views::iota; + size_t const rank = arr.trange().rank(); + SEQUANT_ASSERT(bra_rank <= rank); + size_t const ket_rank = rank - bra_rank; + + if (bra_rank <= 1 && ket_rank <= 1) { + // nothing to do + return arr; + } + + perm_t perm = iota(size_t{0}, rank) | ranges::to; + perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; + perm_t ket_perm = iota(bra_rank, rank) | ranges::to; + + const auto lannot = ords_to_annot(perm); + + auto process_permutations = [&lannot](const TA::DistArray& input_arr, + size_t range_rank, perm_t range_perm, + const std::string& other_annot, + bool is_bra) -> TA::DistArray { + if (range_rank <= 1) return input_arr; + TA::DistArray result; + + auto callback = [&](int parity) { + const auto range_annot = ords_to_annot(range_perm); + const auto annot = other_annot.empty() + ? range_annot + : (is_bra ? range_annot + "," + other_annot + : other_annot + "," + range_annot); + + typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1; + if (result.is_initialized()) { + result(lannot) += p_ * input_arr(annot); + } else { + result(lannot) = p_ * input_arr(annot); + } + }; + antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank}, + callback); + return result; + }; + + // Process bra permutations first + const auto ket_annot = ket_rank == 0 ? "" : ords_to_annot(ket_perm); + auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true); + + // Process ket permutations + const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm); + result = process_permutations(result, ket_rank, ket_perm, bra_annot, false); + + TA::DistArray::wait_for_lazy_cleanup(result.world()); + return result; +} + +template +inline void log_ta(Args const&... args) noexcept { + log_result("[TA] ", args...); +} + +/// Convert sequant::DeNest to TA::DeNest +inline constexpr TA::DeNest to_ta_denest(DeNest d) noexcept { + return d == DeNest::True ? TA::DeNest::True : TA::DeNest::False; +} + +} // namespace + +/// TA::Tensor memory use logger +/// If TiledArray was configured with TA_TENSOR_MEM_PROFILE set this +/// prints the current use of memory by TA::Tensor objects in host memory space +/// to \p os . +/// \param world the world object to use for logging +/// \param label string to prepend to the profile +void log_ta_tensor_host_memory_use(madness::World& world, + std::string_view label = ""); + +/// +/// \brief Result for a tensor value of TA::DistArray type. +/// \tparam ArrayT TA::DistArray type. Tile type of ArrayT is regular tensor of +/// scalars (not a tensor of tensors) +/// +template >> +class ResultTensorTA final : public Result { + public: + using Result::id_t; + using numeric_type = typename ArrayT::numeric_type; + + explicit ResultTensorTA(ArrayT arr) : Result{std::move(arr)} {} + + private: + using this_type = ResultTensorTA; + using annot_wrap = Annot; + + [[nodiscard]] id_t type_id() const noexcept override { + return id_for_type(); + } + + [[nodiscard]] ResultPtr sum( + Result const& other, + std::array const& annot) const override { + SEQUANT_ASSERT(other.is()); + auto const a = annot_wrap{annot}; + + log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n"); + + ArrayT result; + result(a.this_annot) = + get()(a.lannot) + other.get()(a.rannot); + decltype(result)::wait_for_lazy_cleanup(result.world()); + return eval_result(std::move(result)); + } + + [[nodiscard]] ResultPtr prod(Result const& other, + std::array const& annot, + DeNest DeNestFlag) const override { + auto const a = annot_wrap{annot}; + + if (other.is>()) { + auto result = get(); + auto scalar = other.get(); + + log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n"); + + result(a.this_annot) = scalar * result(a.lannot); + + decltype(result)::wait_for_lazy_cleanup(result.world()); + return eval_result(std::move(result)); + } + + if (a.this_annot.empty()) { + // DOT product + SEQUANT_ASSERT(other.is()); + numeric_type d = + TA::dot(get()(a.lannot), other.get()(a.rannot)); + ArrayT::wait_for_lazy_cleanup(get().world()); + ArrayT::wait_for_lazy_cleanup(other.get().world()); + + log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n"); + + return eval_result>(d); + } + + if (!other.is()) { + // potential T * ToT + auto annot_swap = annot; + std::swap(annot_swap[0], annot_swap[1]); + return other.prod(*this, annot_swap, DeNestFlag); + } + + // confirmed: other.is() is true + + log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n"); + + ArrayT result; + + result = TA::einsum(get()(a.lannot), other.get()(a.rannot), + a.this_annot); + decltype(result)::wait_for_lazy_cleanup(result.world()); + return eval_result(std::move(result)); + } + + [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override { + auto pre = get(); + TA::scale(pre, numeric_type(factor)); + return eval_result(std::move(pre)); + } + + [[nodiscard]] ResultPtr permute( + std::array const& ann) const override { + auto const pre_annot = std::any_cast(ann[0]); + auto const post_annot = std::any_cast(ann[1]); + + log_ta(pre_annot, " = ", post_annot, "\n"); + + ArrayT result; + result(post_annot) = get()(pre_annot); + ArrayT::wait_for_lazy_cleanup(result.world()); + return eval_result(std::move(result)); + } + + void add_inplace(Result const& other) override { + SEQUANT_ASSERT(other.is()); + + auto& t = get(); + auto const& o = other.get(); + + SEQUANT_ASSERT(t.trange() == o.trange()); + auto ann = TA::detail::dummy_annotation(t.trange().rank()); + + log_ta(ann, " += ", ann, "\n"); + + t(ann) += o(ann); + ArrayT::wait_for_lazy_cleanup(t.world()); + } + + [[nodiscard]] ResultPtr symmetrize() const override { + return eval_result(column_symmetrize_ta(get())); + } + + [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override { + return eval_result( + particle_antisymmetrize_ta(get(), bra_rank)); + } + + private: + [[nodiscard]] std::size_t size_in_bytes() const final { + auto& v = get(); + auto local_size = TA::size_of(v); + v.world().gop.sum(local_size); + return local_size; + } +}; + +template >> +class ResultTensorOfTensorTA final : public Result { + public: + using Result::id_t; + using numeric_type = typename ArrayT::numeric_type; + + explicit ResultTensorOfTensorTA(ArrayT arr) : Result{std::move(arr)} {} + + private: + using this_type = ResultTensorOfTensorTA; + using annot_wrap = Annot; + + using _inner_tensor_type = typename ArrayT::value_type::value_type; + + using compatible_regular_distarray_type = + TA::DistArray<_inner_tensor_type, typename ArrayT::policy_type>; + + // Only @c that_type type is allowed for ToT * T computation + using that_type = ResultTensorTA; + + [[nodiscard]] id_t type_id() const noexcept override { + return id_for_type(); + } + + [[nodiscard]] ResultPtr sum( + Result const& other, + std::array const& annot) const override { + SEQUANT_ASSERT(other.is()); + auto const a = annot_wrap{annot}; + + log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n"); + + ArrayT result; + result(a.this_annot) = + get()(a.lannot) + other.get()(a.rannot); + decltype(result)::wait_for_lazy_cleanup(result.world()); + return eval_result(std::move(result)); + } + + [[nodiscard]] ResultPtr prod(Result const& other, + std::array const& annot, + DeNest DeNestFlag) const override { + auto const a = annot_wrap{annot}; + + if (other.is>()) { + auto result = get(); + auto scalar = other.get(); + + log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n"); + + result(a.this_annot) = scalar * result(a.lannot); + + decltype(result)::wait_for_lazy_cleanup(result.world()); + return eval_result(std::move(result)); + } else if (a.this_annot.empty()) { + // DOT product + SEQUANT_ASSERT(other.is()); + numeric_type d = + TA::dot(get()(a.lannot), other.get()(a.rannot)); + ArrayT::wait_for_lazy_cleanup(get().world()); + ArrayT::wait_for_lazy_cleanup(other.get().world()); + + log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n"); + + return eval_result>(d); + } + + log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n"); + + if (other.is()) { + // ToT * T -> ToT + auto result = + TA::einsum(get()(a.lannot), + other.get()(a.rannot), + a.this_annot); + return eval_result(std::move(result)); + + } else if (other.is() && DeNestFlag == DeNest::True) { + // ToT * ToT -> T + auto result = TA::einsum( + get()(a.lannot), other.get()(a.rannot), a.this_annot); + return eval_result(std::move(result)); + + } else if (other.is() && DeNestFlag == DeNest::False) { + // ToT * ToT -> ToT + auto result = TA::einsum(get()(a.lannot), + other.get()(a.rannot), a.this_annot); + return eval_result(std::move(result)); + } else { + throw invalid_operand(); + } + } + + [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override { + auto pre = get(); + TA::scale(pre, numeric_type(factor)); + return eval_result(std::move(pre)); + } + + [[nodiscard]] ResultPtr permute( + std::array const& ann) const override { + auto const pre_annot = std::any_cast(ann[0]); + auto const post_annot = std::any_cast(ann[1]); + + log_ta(pre_annot, " = ", post_annot, "\n"); + + ArrayT result; + result(post_annot) = get()(pre_annot); + ArrayT::wait_for_lazy_cleanup(result.world()); + return eval_result(std::move(result)); + } + + void add_inplace(Result const& other) override { + SEQUANT_ASSERT(other.is()); + + auto& t = get(); + auto const& o = other.get(); + + SEQUANT_ASSERT(t.trange() == o.trange()); + auto ann = TA::detail::dummy_annotation(t.trange().rank()); + + log_ta(ann, " += ", ann, "\n"); + + t(ann) += o(ann); + ArrayT::wait_for_lazy_cleanup(t.world()); + } + + [[nodiscard]] ResultPtr symmetrize() const override { + // not implemented yet + return nullptr; + } + + [[nodiscard]] ResultPtr antisymmetrize(size_t /*bra_rank*/) const override { + // not implemented yet + return nullptr; + } + + private: + [[nodiscard]] std::size_t size_in_bytes() const final { + auto& v = get(); + auto local_size = TA::size_of(v); + v.world().gop.sum(local_size); + return local_size; + } +}; + +} // namespace sequant + +#endif // SEQUANT_HAS_TILEDARRAY + +#endif // SEQUANT_EVAL_BACKENDS_TILEDARRAY_RESULT_HPP diff --git a/SeQuant/core/eval/cache_manager.cpp b/SeQuant/core/eval/cache_manager.cpp index 72e90d65b2..8895824fe2 100644 --- a/SeQuant/core/eval/cache_manager.cpp +++ b/SeQuant/core/eval/cache_manager.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include namespace sequant { diff --git a/SeQuant/core/eval/cache_manager.hpp b/SeQuant/core/eval/cache_manager.hpp index cc7c68ffe9..27ee25a534 100644 --- a/SeQuant/core/eval/cache_manager.hpp +++ b/SeQuant/core/eval/cache_manager.hpp @@ -3,8 +3,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/SeQuant/core/eval/eval.hpp b/SeQuant/core/eval/eval.hpp index 825fd792a9..5da4fb0c0c 100644 --- a/SeQuant/core/eval/eval.hpp +++ b/SeQuant/core/eval/eval.hpp @@ -1,20 +1,18 @@ #ifndef SEQUANT_EVAL_EVAL_HPP #define SEQUANT_EVAL_EVAL_HPP +#include + #include #include -#include +#include #include -#include #include #include #include #include #include -#include -#include - #include #include #include @@ -60,9 +58,6 @@ enum struct EvalMode { SumInplace, Symmetrize, Antisymmetrize, - /// NNS projection of Wang-Knizia biorthogonalization - /// @sa ResultPtr::biorthogonal_nns_project - BiorthogonalNNSProject, Unknown }; @@ -80,18 +75,17 @@ enum struct EvalMode { } [[nodiscard]] constexpr auto to_string(EvalMode mode) noexcept { - return (mode == EvalMode::Constant) ? "Constant" - : (mode == EvalMode::Variable) ? "Variable" - : (mode == EvalMode::Tensor) ? "Tensor" - : (mode == EvalMode::Permute) ? "Permute" - : (mode == EvalMode::Product) ? "Product" - : (mode == EvalMode::MultByPhase) ? "MultByPhase" - : (mode == EvalMode::Sum) ? "Sum" - : (mode == EvalMode::SumInplace) ? "SumInplace" - : (mode == EvalMode::Symmetrize) ? "Symmetrize" - : (mode == EvalMode::Antisymmetrize) ? "Antisymmetrize" - : (mode == EvalMode::BiorthogonalNNSProject) ? "BiorthogonalNNSProject" - : "??"; + return (mode == EvalMode::Constant) ? "Constant" + : (mode == EvalMode::Variable) ? "Variable" + : (mode == EvalMode::Tensor) ? "Tensor" + : (mode == EvalMode::Permute) ? "Permute" + : (mode == EvalMode::Product) ? "Product" + : (mode == EvalMode::MultByPhase) ? "MultByPhase" + : (mode == EvalMode::Sum) ? "Sum" + : (mode == EvalMode::SumInplace) ? "SumInplace" + : (mode == EvalMode::Symmetrize) ? "Symmetrize" + : (mode == EvalMode::Antisymmetrize) ? "Antisymmetrize" + : "??"; } enum struct CacheMode { Store, Access, Release }; @@ -315,8 +309,8 @@ ResultPtr evaluate(Node const& node, // auto const de_nest = node.left()->tot() && node.right()->tot() && !node->tot(); time = timed_eval_inplace([&]() { - result = left->prod(*right, ann, - de_nest ? TA::DeNest::True : TA::DeNest::False); + result = + left->prod(*right, ann, de_nest ? DeNest::True : DeNest::False); }); } } @@ -543,30 +537,6 @@ ResultPtr evaluate_antisymm(Args&&... args) { return result; } -/// \brief Calls sequant::evaluate followed by -/// ResultPtr::biorthogonal_nns_project \return Evaluated result as ResultPtr. -/// \sa ResultPtr::biorthogonal_nns_project -template -ResultPtr evaluate_biorthogonal_nns_project(Args&&... args) { - ResultPtr pre = evaluate(std::forward(args)...); - SEQUANT_ASSERT(pre); - - auto const& n0 = node0(arg0(std::forward(args)...)); - - ResultPtr result; - auto time = timed_eval_inplace([&]() { - result = pre->biorthogonal_nns_project(n0->as_tensor().bra_rank()); - }); - - // logging - if constexpr (trace(EvalTrace)) { - auto stat = log::EvalStat{.mode = log::EvalMode::BiorthogonalNNSProject, - .time = time, - .memory = log::bytes(pre, result)}; - log::eval(stat, n0->label()); - } - return result; -} } // namespace sequant #endif // SEQUANT_EVAL_EVAL_HPP diff --git a/SeQuant/core/eval_expr.cpp b/SeQuant/core/eval/eval_expr.cpp similarity index 99% rename from SeQuant/core/eval_expr.cpp rename to SeQuant/core/eval/eval_expr.cpp index 563daaadac..d8483765f1 100644 --- a/SeQuant/core/eval_expr.cpp +++ b/SeQuant/core/eval/eval_expr.cpp @@ -2,8 +2,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/SeQuant/core/eval_expr.hpp b/SeQuant/core/eval/eval_expr.hpp similarity index 84% rename from SeQuant/core/eval_expr.hpp rename to SeQuant/core/eval/eval_expr.hpp index 5069031b15..cafb8c2149 100644 --- a/SeQuant/core/eval_expr.hpp +++ b/SeQuant/core/eval/eval_expr.hpp @@ -1,8 +1,9 @@ -#ifndef SEQUANT_EVAL_EXPR_HPP -#define SEQUANT_EVAL_EXPR_HPP +#ifndef SEQUANT_EVAL_EVAL_EXPR_HPP +#define SEQUANT_EVAL_EVAL_EXPR_HPP #include #include +#include #include #include #include @@ -248,68 +249,6 @@ struct EvalOpSetter { void set(EvalExpr& expr, EvalOp op) { expr.op_type_ = op; } }; -/// -/// \brief This class extends the EvalExpr class by adding an annot() method so -/// that it can be used to evaluate using TiledArray. -/// -class EvalExprTA final : public EvalExpr { - public: - template >> - EvalExprTA(Args&&... args) : EvalExpr{std::forward(args)...} { - annot_ = indices_annot(); - } - - [[nodiscard]] inline auto const& annot() const noexcept { return annot_; } - - private: - std::string annot_; -}; - -/// -/// \brief This class extends the EvalExpr class by adding an annot() method so -/// that it can be used to evaluate using BTAS. -/// -class EvalExprBTAS final : public EvalExpr { - public: - using annot_t = container::svector; - - /// - /// \param bk iterable of Index objects. - /// \return vector of long-type hash values - /// of the labels of indices in \c bk - /// - template - static auto index_hash(Iterable&& bk) { - return ranges::views::transform( - std::forward(bk), [](auto const& idx) { - // - // WARNING! - // The BTAS uses long for scalar indexing by default. - // Hence, here we explicitly cast the size_t values to long - // Which is a potentially narrowing conversion leading to - // integral overflow. Hence, the values in the returned - // container are mixed negative and positive integers (long type) - // - return static_cast(sequant::hash::value(Index{idx}.label())); - }); - } - - template >> - EvalExprBTAS(Args&&... args) : EvalExpr{std::forward(args)...} { - annot_ = index_hash(canon_indices()) | ranges::to; - } - - /// - /// \return Annotation (container::svector) for BTAS::Tensor. - /// - [[nodiscard]] inline annot_t const& annot() const noexcept { return annot_; } - - private: - annot_t annot_; -}; - namespace meta { namespace detail { @@ -350,6 +289,48 @@ template concept eval_node_range = std::ranges::range && eval_node>; +/// +/// \brief Satisfied by a type with a method named `annot` that returns +/// a non-void type. +/// +template +concept has_annot = requires(T t) { + t.annot(); + requires !std::is_void_v; +}; + +/// +/// \brief Satisfied by an eval_node whose dereferenced type satisfies the +/// has_annot method. +/// \example +/// * `static_assert(!meta::can_evaluate>)` +/// * `static_assert(meta::can_evaluate)` (where EvalNodeTA +/// is defined in backends/tiledarray/eval_expr.hpp) +/// +template +concept can_evaluate = eval_node && requires(T n) { + { *n } -> has_annot; +}; + +/// +/// \brief Satisfied by a range type of objects satisfying can_evaluate. +/// +template +concept can_evaluate_range = + std::ranges::range && can_evaluate>; + +/// +/// \brief \tparam F is a leaf node evaluator of type \tparam Node if +/// an object (a function object) of type \tparam F returns ResultPtr +/// when called with the single argument of const ref type to +/// \tparam Node and the \tparam Node satisfies can_evaluate. +/// +template +concept leaf_node_evaluator = + can_evaluate && requires(F f, Node const& n) { + { f(n) } -> std::same_as; + }; + } // namespace meta namespace impl { @@ -362,6 +343,9 @@ FullBinaryNode binarize(ExprPtr const&); template using EvalNode = FullBinaryNode; +static_assert(meta::eval_node>); +static_assert(!meta::can_evaluate>); + /// /// Creates a binary tree for evaluation. /// @@ -460,4 +444,4 @@ ExprPtr to_expr(meta::eval_node auto const& node) { } // namespace sequant -#endif // SEQUANT_EVAL_EXPR_HPP +#endif // SEQUANT_EVAL_EVAL_EXPR_HPP diff --git a/SeQuant/core/eval/eval_fwd.hpp b/SeQuant/core/eval/eval_fwd.hpp deleted file mode 100644 index 0c2113e854..0000000000 --- a/SeQuant/core/eval/eval_fwd.hpp +++ /dev/null @@ -1,73 +0,0 @@ -// -// Created by Bimal Gaudel on 3/27/25. -// - -#ifndef SEQUANT_EVAL_FWD_HPP -#define SEQUANT_EVAL_FWD_HPP - -#include - -namespace sequant { - -class CacheManager; -class Result; - -/// -/// \brief Managed pointer to the result of an evaluation. -/// -using ResultPtr = std::shared_ptr; - -namespace meta { - -/// -/// \brief Satisfied by a type with a method named `annot` that returns -/// a non-void type. -/// -template -concept has_annot = requires(T t) { - t.annot(); - requires !std::is_void_v; -}; - -/// -/// \brief Satisfied by an eval_node whose dereferenced type satisfies the -/// has_annot method. -/// \example -/// * `static_assert(!meta::can_evaluate>)` -/// * `static_assert(meta::can_evaluate>)` -/// -template -concept can_evaluate = eval_node && requires(T n) { - { *n } -> has_annot; -}; - -/// -/// \brief Satisfied by a range type of objects satisfying can_evaluate. -/// -template -concept can_evaluate_range = - std::ranges::range && can_evaluate>; - -/// -/// \brief \tparam F is a leaf node evaluator of type \tparam Node if -/// an object (a function object) of type \tparam F returns ResultPtr -/// when called with the single argument of const ref type to -/// \tparam Node and the \tparam Node satisfies can_evaluate. -/// -template -concept leaf_node_evaluator = - can_evaluate && requires(F f, Node const& n) { - { f(n) } -> std::same_as; - }; -} // namespace meta - -static_assert(meta::eval_node>); -static_assert(meta::eval_node>); -static_assert(meta::eval_node>); - -static_assert(!meta::can_evaluate>); -static_assert(meta::can_evaluate>); -static_assert(meta::can_evaluate>); - -} // namespace sequant -#endif // SEQUANT_EVAL_FWD_HPP diff --git a/SeQuant/core/eval_node.hpp b/SeQuant/core/eval/eval_node.hpp similarity index 98% rename from SeQuant/core/eval_node.hpp rename to SeQuant/core/eval/eval_node.hpp index 8be58232ca..2be333062a 100644 --- a/SeQuant/core/eval_node.hpp +++ b/SeQuant/core/eval/eval_node.hpp @@ -2,12 +2,12 @@ // Created by Bimal Gaudel on 5/24/21. // -#ifndef SEQUANT_EVAL_NODE_HPP -#define SEQUANT_EVAL_NODE_HPP +#ifndef SEQUANT_EVAL_EVAL_NODE_HPP +#define SEQUANT_EVAL_EVAL_NODE_HPP #include #include -#include +#include #include #include #include @@ -252,4 +252,4 @@ AsyCost min_storage(meta::eval_node auto const& node) { } // namespace sequant -#endif // SEQUANT_EVAL_NODE_HPP +#endif // SEQUANT_EVAL_EVAL_NODE_HPP diff --git a/SeQuant/core/eval/fwd.hpp b/SeQuant/core/eval/fwd.hpp new file mode 100644 index 0000000000..f378d63f3f --- /dev/null +++ b/SeQuant/core/eval/fwd.hpp @@ -0,0 +1,27 @@ +// +// Created by Bimal Gaudel on 3/27/25. +// + +#ifndef SEQUANT_EVAL_FWD_HPP +#define SEQUANT_EVAL_FWD_HPP + +#include + +namespace sequant { + +/// Backend-agnostic flag to control tensor de-nesting behavior during products. +/// When multiplying tensor-of-tensor types, this controls whether the result +/// should be "de-nested" (flattened) to a regular tensor or kept as nested. +enum class DeNest { True, False }; + +class CacheManager; +class Result; + +/// +/// \brief Managed pointer to the result of an evaluation. +/// +using ResultPtr = std::shared_ptr; + +} // namespace sequant + +#endif // SEQUANT_EVAL_FWD_HPP diff --git a/SeQuant/core/eval/result.cpp b/SeQuant/core/eval/result.cpp index c0998b6ac4..151b495bf1 100644 --- a/SeQuant/core/eval/result.cpp +++ b/SeQuant/core/eval/result.cpp @@ -9,48 +9,4 @@ Result::id_t Result::next_id() noexcept { bool Result::has_value() const noexcept { return value_.has_value(); } -void log_ta_tensor_host_memory_use([[maybe_unused]] madness::World& world, - [[maybe_unused]] std::string_view label) { -#if defined(TA_TENSOR_MEM_PROFILE) - auto logger = Logger::instance(); - if (logger.eval.level < 3) return; - std::vector hwsize(world.size(), 0); - std::vector currsize(world.size(), 0); - std::vector actsize(world.size(), 0); - hwsize[world.rank()] = - TA::hostEnv::instance()->host_allocator_getActualHighWatermark(); - currsize[world.rank()] = - TA::hostEnv::instance()->host_allocator().getCurrentSize(); - actsize[world.rank()] = - TA::hostEnv::instance()->host_allocator().getActualSize(); - world.gop.sum(hwsize.data(), hwsize.size()); - world.gop.sum(currsize.data(), currsize.size()); - world.gop.sum(actsize.data(), actsize.size()); - - std::ostringstream oss; - oss << label << ": TA_TENSOR_MEM_PROFILE allocation statistics (MiB):\n"; - oss << std::setw(5) << "rank" // - << std::setw(12) << "hw" // - << std::setw(12) << "cur" // - << std::setw(12) << "act" // - << '\n'; // - oss << "--------------------------------------------\n"; - std::uint64_t total = 0; - for (auto rank = 0; rank != world.size(); ++rank) { - oss << std::setw(5) << rank // - << std::setw(12) << hwsize[rank] / (1 << 20) // - << std::setw(12) << currsize[rank] / (1 << 20) // - << std::setw(12) << actsize[rank] / (1 << 20) // - << '\n'; - total += currsize[rank] / (1 << 20); - } - oss << std::setw(5) << "total" // - << std::setw(12) << "" // - << std::setw(12) << total // - << std::setw(12) << "" // - << '\n'; - oss << "--------------------------------------------" << std::endl; - write_log(logger, oss.str()); -#endif -} } // namespace sequant diff --git a/SeQuant/core/eval/result.hpp b/SeQuant/core/eval/result.hpp index 6f3cc48d5e..cb1c1e55f6 100644 --- a/SeQuant/core/eval/result.hpp +++ b/SeQuant/core/eval/result.hpp @@ -1,17 +1,15 @@ #ifndef SEQUANT_EVAL_RESULT_HPP #define SEQUANT_EVAL_RESULT_HPP +#include + #include #include -#include #include #include #include #include -#include -#include -#include #include #include @@ -136,372 +134,12 @@ std::string ords_to_annot(RngOfOrdinals const& ords) { ranges::to; } -/// -/// \brief This function implements the symmetrization of TA::DistArray. -/// -/// \param arr The array to be symmetrized -/// -/// \pre The rank of the array must be even -/// -/// \return The symmetrized TA::DistArray. -/// -template -auto column_symmetrize_ta(TA::DistArray const& arr) { - using ranges::views::iota; - - size_t const rank = arr.trange().rank(); - if (rank % 2 != 0) - throw std::domain_error("This function only supports even-ranked tensors"); - - TA::DistArray result; - - perm_t perm = iota(size_t{0}, rank) | ranges::to; - - auto const lannot = ords_to_annot(perm); - - auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() { - auto const rannot = ords_to_annot(perm); - if (result.is_initialized()) { - result(lannot) += arr(rannot); - } else { - result(lannot) = arr(rannot); - } - }; - - auto const nparticles = rank / 2; - symmetric_permutation(SymmetricParticleRange{perm.begin(), // - perm.begin() + nparticles, // - nparticles}, - call_back); - - TA::DistArray::wait_for_lazy_cleanup(result.world()); - - return result; -} - -/// -/// \brief This function implements the antisymmetrization of TA::DistArray. -/// -/// \param arr The array to be antisymmetrized. -/// -/// \param bra_rank The rank of the bra indices -/// -/// \return The antisymmetrized TA::DistArray. -/// -template -auto particle_antisymmetrize_ta(TA::DistArray const& arr, - size_t bra_rank) { - using ranges::views::iota; - size_t const rank = arr.trange().rank(); - SEQUANT_ASSERT(bra_rank <= rank); - size_t const ket_rank = rank - bra_rank; - - if (bra_rank <= 1 && ket_rank <= 1) { - // nothing to do - return arr; - } - - perm_t perm = iota(size_t{0}, rank) | ranges::to; - perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; - perm_t ket_perm = iota(bra_rank, rank) | ranges::to; - - const auto lannot = ords_to_annot(perm); - - auto process_permutations = [&lannot](const TA::DistArray& input_arr, - size_t range_rank, perm_t range_perm, - const std::string& other_annot, - bool is_bra) -> TA::DistArray { - if (range_rank <= 1) return input_arr; - TA::DistArray result; - - auto callback = [&](int parity) { - const auto range_annot = ords_to_annot(range_perm); - const auto annot = other_annot.empty() - ? range_annot - : (is_bra ? range_annot + "," + other_annot - : other_annot + "," + range_annot); - - typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1; - if (result.is_initialized()) { - result(lannot) += p_ * input_arr(annot); - } else { - result(lannot) = p_ * input_arr(annot); - } - }; - antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank}, - callback); - return result; - }; - - // Process bra permutations first - const auto ket_annot = ket_rank == 0 ? "" : ords_to_annot(ket_perm); - auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true); - - // Process ket permutations - const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm); - result = process_permutations(result, ket_rank, ket_perm, bra_annot, false); - - TA::DistArray::wait_for_lazy_cleanup(result.world()); - return result; -} - -/// -/// \brief This function implements the symmetrization of btas::Tensor. -/// -/// \param arr The tensor to be symmetrized. -/// -/// \pre The rank of the tensor must be even. -/// -/// \return The symmetrized btas::Tensor. -/// -template -auto column_symmetrize_btas(btas::Tensor const& arr) { - using ranges::views::iota; - - size_t const rank = arr.rank(); - - if (rank % 2 != 0) - throw std::domain_error("This function only supports even-ranked tensors"); - - perm_t perm = iota(size_t{0}, rank) | ranges::to; - - auto const lannot = perm; - - auto result = btas::Tensor{arr.range()}; - result.fill(0); - - auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() { - btas::Tensor temp; - btas::permute(arr, lannot, temp, perm); - result += temp; - }; - - auto const nparticles = rank / 2; - symmetric_permutation(SymmetricParticleRange{perm.begin(), // - perm.begin() + nparticles, // - nparticles}, - call_back); - - return result; -} - -/// -/// \brief This function implements the antisymmetrization of btas::Tensor. -/// -/// \param arr The tensor to be antisymmetrized -/// -/// \param bra_rank The rank of the bra indices -/// -/// \return The antisymmetrized btas::Tensor. -/// -template -auto particle_antisymmetrize_btas(btas::Tensor const& arr, - size_t bra_rank) { - using ranges::views::concat; - using ranges::views::iota; - size_t const rank = arr.rank(); - SEQUANT_ASSERT(bra_rank <= rank); - size_t const ket_rank = rank - bra_rank; - - perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; - perm_t ket_perm = iota(bra_rank, rank) | ranges::to; - const auto lannot = iota(size_t{0}, rank) | ranges::to; - - auto process_permutations = [&lannot](const btas::Tensor& input_arr, - size_t range_rank, perm_t range_perm, - const perm_t& other_perm, bool is_bra) { - if (range_rank <= 1) return input_arr; - btas::Tensor result{input_arr.range()}; - - auto callback = [&](int parity) { - const auto annot = - is_bra ? concat(range_perm, other_perm) | ranges::to() - : concat(other_perm, range_perm) | ranges::to(); - - typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1; - btas::Tensor temp; - btas::permute(input_arr, lannot, temp, annot); - btas::scal(p_, temp); - result += temp; - }; - - antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank}, - callback); - return result; - }; - // Process bra permutations first - const auto ket_annot = ket_rank == 0 ? perm_t{} : ket_perm; - auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true); - - // Process ket permutations if needed - const auto bra_annot = bra_rank == 0 ? perm_t{} : bra_perm; - result = process_permutations(result, ket_rank, ket_perm, bra_annot, false); - - return result; -} - -/// \brief This function is used to implement -/// ResultPtr::biorthogonal_nns_project for TA::DistArray -/// -/// \param arr The array to be "cleaned up" -/// \param bra_rank The rank of the bra indices -/// -/// \return The cleaned TA::DistArray. -template -auto biorthogonal_nns_project_ta(TA::DistArray const& arr, - size_t bra_rank) { - using ranges::views::iota; - size_t const rank = arr.trange().rank(); - SEQUANT_ASSERT(bra_rank <= rank); - size_t const ket_rank = rank - bra_rank; - - if (rank <= 4) { - return arr; - } - - using numeric_type = typename TA::DistArray::numeric_type; - - size_t factorial_ket = 1; - for (size_t i = 2; i <= ket_rank; ++i) { - factorial_ket *= i; - } - numeric_type norm_factor = numeric_type(1) / numeric_type(factorial_ket); - - TA::DistArray result; - - perm_t perm = iota(size_t{0}, rank) | ranges::to; - perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; - perm_t ket_perm = iota(bra_rank, rank) | ranges::to; - - const auto lannot = ords_to_annot(perm); - - auto process_permutations = [&lannot](const TA::DistArray& input_arr, - size_t range_rank, perm_t range_perm, - const std::string& other_annot, - bool is_bra) -> TA::DistArray { - if (range_rank <= 1) return input_arr; - TA::DistArray result; - - auto callback = [&]([[maybe_unused]] int parity) { - const auto range_annot = ords_to_annot(range_perm); - const auto annot = other_annot.empty() - ? range_annot - : (is_bra ? range_annot + "," + other_annot - : other_annot + "," + range_annot); - - // ignore parity, all permutations get same coefficient - numeric_type p_ = 1; - if (result.is_initialized()) { - result(lannot) += p_ * input_arr(annot); - } else { - result(lannot) = p_ * input_arr(annot); - } - }; - antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank}, - callback); - return result; - }; - - // identity term with coefficient +1 - result(lannot) = arr(lannot); - - // process only ket permutations with coefficient norm_factor - if (ket_rank > 1) { - const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm); - auto ket_result = - process_permutations(arr, ket_rank, ket_perm, bra_annot, false); - - result(lannot) -= norm_factor * ket_result(lannot); - } - - TA::DistArray::wait_for_lazy_cleanup(result.world()); - return result; -} - -/// \brief This function is used to implement -/// ResultPtr::biorthogonal_nns_project for btas::Tensor -/// -/// \param arr The array to be "cleaned up" -/// \param bra_rank The rank of the bra indices -/// -/// \return The cleaned btas::Tensor. -template -auto biorthogonal_nns_project_btas(btas::Tensor const& arr, - size_t bra_rank) { - using ranges::views::concat; - using ranges::views::iota; - size_t const rank = arr.rank(); - SEQUANT_ASSERT(bra_rank <= rank); - size_t const ket_rank = rank - bra_rank; - - if (rank <= 4) { - return arr; - } - - using numeric_type = typename btas::Tensor::numeric_type; - - size_t factorial_ket = 1; - for (size_t i = 2; i <= ket_rank; ++i) { - factorial_ket *= i; - } - numeric_type norm_factor = numeric_type(1) / numeric_type(factorial_ket); - - perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; - perm_t ket_perm = iota(bra_rank, rank) | ranges::to; - const auto lannot = iota(size_t{0}, rank) | ranges::to; - - auto process_permutations = [&lannot](const btas::Tensor& input_arr, - size_t range_rank, perm_t range_perm, - const perm_t& other_perm, bool is_bra) { - if (range_rank <= 1) return input_arr; - btas::Tensor result{input_arr.range()}; - result.fill(0); - - auto callback = [&]([[maybe_unused]] int parity) { - const auto annot = - is_bra ? concat(range_perm, other_perm) | ranges::to() - : concat(other_perm, range_perm) | ranges::to(); - - // ignore parity, all permutations get same coefficient - numeric_type p_ = 1; - btas::Tensor temp; - btas::permute(input_arr, lannot, temp, annot); - btas::scal(p_, temp); - result += temp; - }; - - antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank}, - callback); - return result; - }; - - // identity term with coefficient +1 - auto result = arr; - - // process only ket permutations with coefficient norm_factor - if (ket_rank > 1) { - const auto bra_annot = bra_rank == 0 ? perm_t{} : bra_perm; - auto ket_result = - process_permutations(arr, ket_rank, ket_perm, bra_annot, false); - - btas::scal(norm_factor, ket_result); - result -= ket_result; - } - - return result; -} - template inline void log_result(Args const&... args) noexcept { auto& l = Logger::instance(); if (l.eval.level > 1) write_log(l, args...); } -template -inline void log_ta(Args const&... args) noexcept { - log_result("[TA] ", args...); -} - template inline void log_constant(Args const&... args) noexcept { log_result("[CONST] ", args...); @@ -509,15 +147,6 @@ inline void log_constant(Args const&... args) noexcept { } // namespace -/// TA::Tensor memory use logger -/// If TiledArray was configured with TA_TENSOR_MEM_PROFILE set this -/// prints the current use of memory by TA::Tensor objects in host memory space -/// to \p os . -/// \param world the world object to use for logging -/// \param label string to prepend to the profile -void log_ta_tensor_host_memory_use(madness::World& world, - std::string_view label = ""); - /******************************************************************************/ /// @@ -604,7 +233,7 @@ class Result { /// [[nodiscard]] virtual ResultPtr prod(Result const&, std::array const&, - TA::DeNest DeNestFlag) const = 0; + DeNest DeNestFlag) const = 0; /// /// \brief Permute this object according to the annotations in the argument. @@ -631,19 +260,6 @@ class Result { /// [[nodiscard]] virtual ResultPtr antisymmetrize(size_t bra_rank) const = 0; - /// \brief Implements "biorthogonal cleanup" of closed-shell - /// more compact spintraced equations produced via method of - /// Wang and Knizia. - /// - /// For 3-body residual (`bra_rank=3`) this implements Eq. (41) of the - /// Wang/Knizia paper, same as the first line of Figure 1. - /// For 4-body residual this implements the first line of Figure 2. - /// The implementation is for arbitrary ranks. - /// @param bra_rank the particle rank of the residual tensor (i.e. - /// its order halved) - [[nodiscard]] virtual ResultPtr biorthogonal_nns_project( - size_t bra_rank) const = 0; - [[nodiscard]] bool has_value() const noexcept; [[nodiscard]] virtual ResultPtr mult_by_phase(std::int8_t) const = 0; @@ -720,7 +336,7 @@ class ResultScalar final : public Result { [[nodiscard]] ResultPtr prod(Result const& other, std::array const& maybe_empty, - TA::DeNest DeNestFlag) const override { + DeNest DeNestFlag) const override { if (other.is>()) { auto const& o = other.as>(); auto p = value() * o.value(); @@ -755,11 +371,6 @@ class ResultScalar final : public Result { throw unimplemented_method("antisymmetrize"); } - [[nodiscard]] ResultPtr biorthogonal_nns_project( - [[maybe_unused]] size_t bra_rank) const override { - throw unimplemented_method("biorthogonal_nns_project"); - } - [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override { return eval_result>(value() * T(factor)); } @@ -772,413 +383,6 @@ class ResultScalar final : public Result { [[nodiscard]] std::size_t size_in_bytes() const final { return sizeof(T); } }; -/// -/// \brief Result for a tensor value of TA::DistArray type. -/// \tparam ArrayT TA::DistArray type. Tile type of ArrayT is regular tensor of -/// scalars (not a tensor of tensors) -/// -template >> -class ResultTensorTA final : public Result { - public: - using Result::id_t; - using numeric_type = typename ArrayT::numeric_type; - - explicit ResultTensorTA(ArrayT arr) : Result{std::move(arr)} {} - - private: - using this_type = ResultTensorTA; - using annot_wrap = Annot; - - [[nodiscard]] id_t type_id() const noexcept override { - return id_for_type(); - } - - [[nodiscard]] ResultPtr sum( - Result const& other, - std::array const& annot) const override { - SEQUANT_ASSERT(other.is()); - auto const a = annot_wrap{annot}; - - log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n"); - - ArrayT result; - result(a.this_annot) = - get()(a.lannot) + other.get()(a.rannot); - decltype(result)::wait_for_lazy_cleanup(result.world()); - return eval_result(std::move(result)); - } - - [[nodiscard]] ResultPtr prod(Result const& other, - std::array const& annot, - TA::DeNest DeNestFlag) const override { - auto const a = annot_wrap{annot}; - - if (other.is>()) { - auto result = get(); - auto scalar = other.get(); - - log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n"); - - result(a.this_annot) = scalar * result(a.lannot); - - decltype(result)::wait_for_lazy_cleanup(result.world()); - return eval_result(std::move(result)); - } - - if (a.this_annot.empty()) { - // DOT product - SEQUANT_ASSERT(other.is()); - numeric_type d = - TA::dot(get()(a.lannot), other.get()(a.rannot)); - ArrayT::wait_for_lazy_cleanup(get().world()); - ArrayT::wait_for_lazy_cleanup(other.get().world()); - - log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n"); - - return eval_result>(d); - } - - if (!other.is()) { - // potential T * ToT - auto annot_swap = annot; - std::swap(annot_swap[0], annot_swap[1]); - return other.prod(*this, annot_swap, DeNestFlag); - } - - // confirmed: other.is() is true - - log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n"); - - ArrayT result; - - result = TA::einsum(get()(a.lannot), other.get()(a.rannot), - a.this_annot); - decltype(result)::wait_for_lazy_cleanup(result.world()); - return eval_result(std::move(result)); - } - - [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override { - auto pre = get(); - TA::scale(pre, numeric_type(factor)); - return eval_result(std::move(pre)); - } - - [[nodiscard]] ResultPtr permute( - std::array const& ann) const override { - auto const pre_annot = std::any_cast(ann[0]); - auto const post_annot = std::any_cast(ann[1]); - - log_ta(pre_annot, " = ", post_annot, "\n"); - - ArrayT result; - result(post_annot) = get()(pre_annot); - ArrayT::wait_for_lazy_cleanup(result.world()); - return eval_result(std::move(result)); - } - - void add_inplace(Result const& other) override { - SEQUANT_ASSERT(other.is()); - - auto& t = get(); - auto const& o = other.get(); - - SEQUANT_ASSERT(t.trange() == o.trange()); - auto ann = TA::detail::dummy_annotation(t.trange().rank()); - - log_ta(ann, " += ", ann, "\n"); - - t(ann) += o(ann); - ArrayT::wait_for_lazy_cleanup(t.world()); - } - - [[nodiscard]] ResultPtr symmetrize() const override { - return eval_result(column_symmetrize_ta(get())); - } - - [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override { - return eval_result( - particle_antisymmetrize_ta(get(), bra_rank)); - } - - [[nodiscard]] ResultPtr biorthogonal_nns_project( - size_t bra_rank) const override { - return eval_result( - biorthogonal_nns_project_ta(get(), bra_rank)); - } - - private: - [[nodiscard]] std::size_t size_in_bytes() const final { - auto& v = get(); - auto local_size = TA::size_of(v); - v.world().gop.sum(local_size); - return local_size; - } -}; - -template >> -class ResultTensorOfTensorTA final : public Result { - public: - using Result::id_t; - using numeric_type = typename ArrayT::numeric_type; - - explicit ResultTensorOfTensorTA(ArrayT arr) : Result{std::move(arr)} {} - - private: - using this_type = ResultTensorOfTensorTA; - using annot_wrap = Annot; - - using _inner_tensor_type = typename ArrayT::value_type::value_type; - - using compatible_regular_distarray_type = - TA::DistArray<_inner_tensor_type, typename ArrayT::policy_type>; - - // Only @c that_type type is allowed for ToT * T computation - using that_type = ResultTensorTA; - - [[nodiscard]] id_t type_id() const noexcept override { - return id_for_type(); - } - - [[nodiscard]] ResultPtr sum( - Result const& other, - std::array const& annot) const override { - SEQUANT_ASSERT(other.is()); - auto const a = annot_wrap{annot}; - - log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n"); - - ArrayT result; - result(a.this_annot) = - get()(a.lannot) + other.get()(a.rannot); - decltype(result)::wait_for_lazy_cleanup(result.world()); - return eval_result(std::move(result)); - } - - [[nodiscard]] ResultPtr prod(Result const& other, - std::array const& annot, - TA::DeNest DeNestFlag) const override { - auto const a = annot_wrap{annot}; - - if (other.is>()) { - auto result = get(); - auto scalar = other.get(); - - log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n"); - - result(a.this_annot) = scalar * result(a.lannot); - - decltype(result)::wait_for_lazy_cleanup(result.world()); - return eval_result(std::move(result)); - } else if (a.this_annot.empty()) { - // DOT product - SEQUANT_ASSERT(other.is()); - numeric_type d = - TA::dot(get()(a.lannot), other.get()(a.rannot)); - ArrayT::wait_for_lazy_cleanup(get().world()); - ArrayT::wait_for_lazy_cleanup(other.get().world()); - - log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n"); - - return eval_result>(d); - } - - log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n"); - - if (other.is()) { - // ToT * T -> ToT - auto result = - TA::einsum(get()(a.lannot), - other.get()(a.rannot), - a.this_annot); - return eval_result(std::move(result)); - - } else if (other.is() && DeNestFlag == TA::DeNest::True) { - // ToT * ToT -> T - auto result = TA::einsum( - get()(a.lannot), other.get()(a.rannot), a.this_annot); - return eval_result(std::move(result)); - - } else if (other.is() && DeNestFlag == TA::DeNest::False) { - // ToT * ToT -> ToT - auto result = TA::einsum(get()(a.lannot), - other.get()(a.rannot), a.this_annot); - return eval_result(std::move(result)); - } else { - throw invalid_operand(); - } - } - - [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override { - auto pre = get(); - TA::scale(pre, numeric_type(factor)); - return eval_result(std::move(pre)); - } - - [[nodiscard]] ResultPtr permute( - std::array const& ann) const override { - auto const pre_annot = std::any_cast(ann[0]); - auto const post_annot = std::any_cast(ann[1]); - - log_ta(pre_annot, " = ", post_annot, "\n"); - - ArrayT result; - result(post_annot) = get()(pre_annot); - ArrayT::wait_for_lazy_cleanup(result.world()); - return eval_result(std::move(result)); - } - - void add_inplace(Result const& other) override { - SEQUANT_ASSERT(other.is()); - - auto& t = get(); - auto const& o = other.get(); - - SEQUANT_ASSERT(t.trange() == o.trange()); - auto ann = TA::detail::dummy_annotation(t.trange().rank()); - - log_ta(ann, " += ", ann, "\n"); - - t(ann) += o(ann); - ArrayT::wait_for_lazy_cleanup(t.world()); - } - - [[nodiscard]] ResultPtr symmetrize() const override { - // not implemented yet - return nullptr; - } - - [[nodiscard]] ResultPtr antisymmetrize(size_t /*bra_rank*/) const override { - // not implemented yet - return nullptr; - } - - [[nodiscard]] ResultPtr biorthogonal_nns_project( - [[maybe_unused]] size_t bra_rank) const override { - // or? throw unimplemented_method("biorthogonal_nns_project"); - // not implemented yet, I think I need it for CSV - return nullptr; - } - - private: - [[nodiscard]] std::size_t size_in_bytes() const final { - auto& v = get(); - auto local_size = TA::size_of(v); - v.world().gop.sum(local_size); - return local_size; - } -}; - -/// -/// \brief Result for a tensor value of btas::Tensor type. -/// \tparam T btas::Tensor type. Must be a specialization of btas::Tensor. -/// -template -class ResultTensorBTAS final : public Result { - public: - using Result::id_t; - using numeric_type = typename T::numeric_type; - - explicit ResultTensorBTAS(T arr) : Result{std::move(arr)} {} - - private: - // TODO make it same as that used by EvalExprBTAS class from eval.hpp file - using annot_t = container::svector; - using annot_wrap = Annot; - - [[nodiscard]] id_t type_id() const noexcept override { - return id_for_type>(); - } - - [[nodiscard]] ResultPtr sum( - Result const& other, - std::array const& annot) const override { - SEQUANT_ASSERT(other.is>()); - auto const a = annot_wrap{annot}; - - T lres, rres; - btas::permute(get(), a.lannot, lres, a.this_annot); - btas::permute(other.get(), a.rannot, rres, a.this_annot); - return eval_result>(lres + rres); - } - - [[nodiscard]] ResultPtr prod(Result const& other, - std::array const& annot, - TA::DeNest /*DeNestFlag*/) const override { - auto const a = annot_wrap{annot}; - - if (other.is>()) { - T result; - btas::permute(get(), a.lannot, result, a.this_annot); - btas::scal(other.as>().value(), result); - return eval_result>(std::move(result)); - } - - SEQUANT_ASSERT(other.is>()); - - if (a.this_annot.empty()) { - T rres; - btas::permute(other.get(), a.rannot, rres, a.lannot); - return eval_result>(btas::dot(get(), rres)); - } - - T result; - btas::contract(numeric_type{1}, // - get(), a.lannot, // - other.get(), a.rannot, // - numeric_type{0}, // - result, a.this_annot); - return eval_result>(std::move(result)); - } - - [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override { - auto pre = get(); - btas::scal(numeric_type(factor), pre); - return eval_result>(std::move(pre)); - } - - [[nodiscard]] ResultPtr permute( - std::array const& ann) const override { - auto const pre_annot = std::any_cast(ann[0]); - auto const post_annot = std::any_cast(ann[1]); - T result; - btas::permute(get(), pre_annot, result, post_annot); - return eval_result>(std::move(result)); - } - - void add_inplace(Result const& other) override { - auto& t = get(); - auto const& o = other.get(); - SEQUANT_ASSERT(t.range() == o.range()); - t += o; - } - - [[nodiscard]] ResultPtr symmetrize() const override { - return eval_result>(column_symmetrize_btas(get())); - } - - [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override { - return eval_result>( - particle_antisymmetrize_btas(get(), bra_rank)); - } - - [[nodiscard]] ResultPtr biorthogonal_nns_project( - [[maybe_unused]] size_t bra_rank) const override { - return eval_result>( - biorthogonal_nns_project_btas(get(), bra_rank)); - } - - private: - [[nodiscard]] std::size_t size_in_bytes() const final { - static_assert(std::is_arithmetic_v); - const auto& tensor = get(); - // only count data - return tensor.range().volume() * sizeof(T); - } -}; - } // namespace sequant #endif // SEQUANT_EVAL_RESULT_HPP diff --git a/SeQuant/core/export/export.hpp b/SeQuant/core/export/export.hpp index a3e6479095..a15fbd327f 100644 --- a/SeQuant/core/export/export.hpp +++ b/SeQuant/core/export/export.hpp @@ -2,7 +2,7 @@ #define SEQUANT_CORE_EXPORT_EXPORT_HPP #include -#include +#include #include #include #include diff --git a/SeQuant/core/export/export_expr.cpp b/SeQuant/core/export/export_expr.cpp index c990ac6f15..ab2a56ad71 100644 --- a/SeQuant/core/export/export_expr.cpp +++ b/SeQuant/core/export/export_expr.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/SeQuant/core/export/export_expr.hpp b/SeQuant/core/export/export_expr.hpp index 431cbd8069..b4f939dea0 100644 --- a/SeQuant/core/export/export_expr.hpp +++ b/SeQuant/core/export/export_expr.hpp @@ -1,7 +1,7 @@ #ifndef SEQUANT_CORE_EXPORT_EXPORT_EXPR_HPP #define SEQUANT_CORE_EXPORT_EXPORT_EXPR_HPP -#include +#include #include #include diff --git a/SeQuant/core/optimize/common_subexpression_elimination.hpp b/SeQuant/core/optimize/common_subexpression_elimination.hpp index 6ef7f138c6..6db312e1cf 100644 --- a/SeQuant/core/optimize/common_subexpression_elimination.hpp +++ b/SeQuant/core/optimize/common_subexpression_elimination.hpp @@ -3,8 +3,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/SeQuant/core/optimize/optimize.cpp b/SeQuant/core/optimize/optimize.cpp index f470c411d7..099e002367 100644 --- a/SeQuant/core/optimize/optimize.cpp +++ b/SeQuant/core/optimize/optimize.cpp @@ -1,8 +1,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/SeQuant/core/utility/exception.hpp b/SeQuant/core/utility/exception.hpp index 10ef041a3f..535e31c9f8 100644 --- a/SeQuant/core/utility/exception.hpp +++ b/SeQuant/core/utility/exception.hpp @@ -14,7 +14,7 @@ namespace sequant { class Exception { public: Exception(const std::string& str) : msg_(str) {} - const auto& what() const { return msg_; } + virtual std::string_view what() const { return msg_; } private: std::string msg_; diff --git a/SeQuant/domain/mbpt/biorthogonalization.cpp b/SeQuant/domain/mbpt/biorthogonalization.cpp index 9c372b6f2c..a8cd857e6a 100644 --- a/SeQuant/domain/mbpt/biorthogonalization.cpp +++ b/SeQuant/domain/mbpt/biorthogonalization.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -29,8 +30,134 @@ struct compare_first_less { using IndexPair = std::pair; using ParticlePairings = container::svector; -ResultExpr biorthogonal_transform_copy(const ResultExpr& expr, - double threshold) { +// clang-format off +/// \brief Provides the first row of the biorthogonal coefficients matrix, +/// hardcoded from Mathematica to avoid numerical precision loss. +/// +/// The Myrvold-Ruskey unrank1 algorithm (doi.org/10.1016/S0020-0190(01)00141-7) +/// is used to order permutations, then the permutational overlap matrix M is +/// constructed with elements (-2)^{c} × (-1)^{n_particles}, where c is the +/// number of cycles in the relative permutation. +/// +/// The biorthogonal coefficients are obtained from the normalized pseudoinverse +/// of M: first compute M_pinv (the pseudoinverse), then normalize it by the +/// factor ((n_particles)!/rank(M)). +/// Finally, biorthogonal coefficients = normalized_M_pinv · e_1, +/// where e_1 is the first unit vector. +/// See [DOI 10.48550/ARXIV.1805.00565](https://doi.org/10.48550/ARXIV.1805.00565) +/// for more details. +/// +/// \param n_particles The rank of external index pairs +/// +/// \return Vector of rational coefficients representing the first row +/// +/// \throw std::runtime_error if n_particles is not in the range [1,5] +// clang-format on +std::vector hardcoded_biorthogonalizer_row( + std::size_t n_particles) { + switch (n_particles) { + case 1: + return std::vector{ratio(1, 2)}; + + case 2: + return std::vector{ratio(1, 3), ratio(1, 6)}; + + case 3: + return std::vector{ratio(17, 120), ratio(-7, 120), + ratio(-1, 120), ratio(-1, 120), + ratio(-1, 120), ratio(-7, 120)}; + + case 4: + return std::vector{ + ratio(43, 840), ratio(-19, 1680), ratio(-19, 1680), + ratio(-1, 105), ratio(-19, 1680), ratio(-19, 1680), + ratio(13, 840), ratio(1, 120), ratio(-1, 105), + ratio(1, 120), ratio(-1, 105), ratio(-19, 1680), + ratio(-1, 105), ratio(1, 120), ratio(1, 120), + ratio(13, 840), ratio(-1, 105), ratio(-1, 105), + ratio(1, 120), ratio(-19, 1680), ratio(-19, 1680), + ratio(13, 840), ratio(-19, 1680), ratio(1, 120)}; + + case 5: + return std::vector{ + ratio(59, 3780), ratio(-5, 3024), ratio(-5, 3024), + ratio(-5, 3024), ratio(-31, 7560), ratio(-5, 3024), + ratio(-5, 3024), ratio(-23, 30240), ratio(19, 7560), + ratio(37, 15120), ratio(-5, 3024), ratio(-23, 30240), + ratio(-5, 3024), ratio(19, 7560), ratio(37, 15120), + ratio(-31, 7560), ratio(37, 15120), ratio(37, 15120), + ratio(-31, 7560), ratio(-5, 3024), ratio(-5, 3024), + ratio(-23, 30240), ratio(-23, 30240), ratio(-23, 30240), + ratio(-13, 7560), ratio(-5, 3024), ratio(-5, 3024), + ratio(19, 7560), ratio(-23, 30240), ratio(37, 15120), + ratio(19, 7560), ratio(-23, 30240), ratio(19, 7560), + ratio(-23, 30240), ratio(-13, 7560), ratio(37, 15120), + ratio(-13, 7560), ratio(-13, 7560), ratio(37, 15120), + ratio(-23, 30240), ratio(-31, 7560), ratio(-13, 7560), + ratio(37, 15120), ratio(37, 15120), ratio(19, 7560), + ratio(37, 15120), ratio(37, 15120), ratio(-13, 7560), + ratio(-13, 7560), ratio(-23, 30240), ratio(-31, 7560), + ratio(37, 15120), ratio(-31, 7560), ratio(37, 15120), + ratio(-5, 3024), ratio(-5, 3024), ratio(-23, 30240), + ratio(19, 7560), ratio(-5, 3024), ratio(37, 15120), + ratio(-31, 7560), ratio(37, 15120), ratio(37, 15120), + ratio(-13, 7560), ratio(19, 7560), ratio(37, 15120), + ratio(37, 15120), ratio(-13, 7560), ratio(-13, 7560), + ratio(-23, 30240), ratio(37, 15120), ratio(-13, 7560), + ratio(37, 15120), ratio(-13, 7560), ratio(-23, 30240), + ratio(19, 7560), ratio(-23, 30240), ratio(-23, 30240), + ratio(19, 7560), ratio(-13, 7560), ratio(-31, 7560), + ratio(37, 15120), ratio(-13, 7560), ratio(37, 15120), + ratio(19, 7560), ratio(-31, 7560), ratio(-31, 7560), + ratio(37, 15120), ratio(37, 15120), ratio(-5, 3024), + ratio(37, 15120), ratio(-13, 7560), ratio(37, 15120), + ratio(-13, 7560), ratio(-23, 30240), ratio(-5, 3024), + ratio(19, 7560), ratio(-23, 30240), ratio(-5, 3024), + ratio(37, 15120), ratio(-5, 3024), ratio(-23, 30240), + ratio(-23, 30240), ratio(-23, 30240), ratio(-13, 7560), + ratio(19, 7560), ratio(19, 7560), ratio(-23, 30240), + ratio(-23, 30240), ratio(-13, 7560), ratio(-5, 3024), + ratio(19, 7560), ratio(-5, 3024), ratio(-23, 30240), + ratio(37, 15120), ratio(37, 15120), ratio(-13, 7560), + ratio(-13, 7560), ratio(37, 15120), ratio(-23, 30240)}; + + default: + throw std::runtime_error( + "hardcoded biorthogonal coefficients only available for ranks 1-5, " + "requested rank is : " + + std::to_string(n_particles)); + } +} + +Eigen::Matrix +make_hardcoded_biorthogonalizer_matrix( + const std::vector& first_row, std::size_t n_particles) { + const auto n = first_row.size(); + Eigen::Matrix M(n, n); + + for (std::size_t row = 0; row < n; ++row) { + for (std::size_t col = 0; col < n; ++col) { + perm::Permutation row_perm = perm::unrank(n - 1 - row, n_particles); + perm::Permutation col_perm = perm::unrank(col, n_particles); + + col_perm->preMultiply(row_perm); + + std::size_t source_idx = perm::rank(col_perm, n_particles); + M(row, col) = first_row[source_idx]; + } + } + return M; +} + +Eigen::Matrix +hardcoded_biorthogonalizer_matrix(std::size_t n_particles) { + auto first_row = hardcoded_biorthogonalizer_row(n_particles); + return make_hardcoded_biorthogonalizer_matrix(first_row, n_particles); +} + +ResultExpr biorthogonal_transform_copy( + const ResultExpr& expr, + double threshold = default_biorthogonalizer_pseudoinverse_threshold) { container::svector wrapper = {expr.clone()}; biorthogonal_transform(wrapper, threshold); @@ -39,7 +166,8 @@ ResultExpr biorthogonal_transform_copy(const ResultExpr& expr, } container::svector biorthogonal_transform_copy( - const container::svector& exprs, double threshold) { + const container::svector& exprs, + double threshold = default_biorthogonalizer_pseudoinverse_threshold) { container::svector copy; copy.reserve(exprs.size()); @@ -101,8 +229,8 @@ Eigen::MatrixXd permutational_overlap_matrix(std::size_t n_particles) { return M; } -Eigen::MatrixXd compute_biorth_coeffs(std::size_t n_particles, - double threshold) { +Eigen::MatrixXd compute_biorthogonalizer_matrix(std::size_t n_particles, + double threshold) { auto perm_ovlp_mat = permutational_overlap_matrix(n_particles); SEQUANT_ASSERT(perm_ovlp_mat.rows() == perm_ovlp_mat.cols()); SEQUANT_ASSERT(perm_ovlp_mat.isApprox(perm_ovlp_mat.transpose())); @@ -309,7 +437,7 @@ void biorthogonal_transform(container::svector& result_exprs, // like R^{IJ}_{AB} and the index pairing of the result is what determines // the required symmetrization. Hence, the symmetrization operator must not // be changed when transforming from one representation into the other. - assert(std::all_of( + SEQUANT_ASSERT(std::all_of( result_exprs.begin(), result_exprs.end(), [](const ResultExpr& res) { bool found = false; res.expression()->visit( @@ -336,12 +464,7 @@ void biorthogonal_transform(container::svector& result_exprs, ranges::to>(); const std::size_t n_particles = externals.front().size(); - - Eigen::MatrixXd coefficients = compute_biorth_coeffs(n_particles, threshold); - auto num_perms = factorial(n_particles); - SEQUANT_ASSERT(num_perms == coefficients.rows()); - SEQUANT_ASSERT(num_perms == coefficients.cols()); auto original_exprs = result_exprs | ranges::views::transform([](const ResultExpr& res) { @@ -349,6 +472,60 @@ void biorthogonal_transform(container::svector& result_exprs, }) | ranges::to>(); + auto memoize = [](container::map, + std::optional>& cache, + std::mutex& mutex, std::condition_variable& cv, + std::pair key, + auto compute_fn) -> const T& { + { + std::unique_lock lock(mutex); + auto [it, inserted] = cache.try_emplace(key, std::nullopt); + if (!inserted) { + cv.wait(lock, [&] { return it->second.has_value(); }); + return it->second.value(); + } + } + + T result = compute_fn(); + + { + std::lock_guard lock(mutex); + cache[key] = std::move(result); + cv.notify_all(); + return cache[key].value(); + } + }; + + using HardcodedMatrix = + Eigen::Matrix; + using ComputedMatrix = Eigen::MatrixXd; + using CacheKey = std::pair; + + static std::mutex cache_mutex; + static std::condition_variable cache_cv; + static container::map> + hardcoded_cache; + static container::map> computed_cache; + + constexpr std::size_t max_rank_hardcoded_biorthogonalizer_matrix = 5; + CacheKey key{n_particles, threshold}; + + const HardcodedMatrix* hardcoded_coefficients = nullptr; + const ComputedMatrix* computed_coefficients = nullptr; + + if (n_particles <= max_rank_hardcoded_biorthogonalizer_matrix) { + hardcoded_coefficients = &memoize( + hardcoded_cache, cache_mutex, cache_cv, key, + [&] { return hardcoded_biorthogonalizer_matrix(n_particles); }); + } else { + computed_coefficients = + &memoize(computed_cache, cache_mutex, cache_cv, key, [&] { + return compute_biorthogonalizer_matrix(n_particles, threshold); + }); + SEQUANT_ASSERT(num_perms == computed_coefficients->rows()); + SEQUANT_ASSERT(num_perms == computed_coefficients->cols()); + } + for (std::size_t i = 0; i < result_exprs.size(); ++i) { result_exprs.at(i).expression() = ex(0); perm::Permutation reference = perm::unrank(ranks.at(i), n_particles); @@ -358,9 +535,14 @@ void biorthogonal_transform(container::svector& result_exprs, perm::Permutation perm = perm::unrank(rank, n_particles); perm->postMultiply(reference); + sequant::rational coeff = + (n_particles <= max_rank_hardcoded_biorthogonalizer_matrix) + ? (*hardcoded_coefficients)(ranks.at(i), rank) + : to_rational((*computed_coefficients)(ranks.at(i), rank), + threshold); + result_exprs.at(i).expression() += - ex( - to_rational(coefficients(ranks.at(i), rank), threshold)) * + ex(coeff) * create_expr_for(externals.at(i), perm, externals, original_exprs); } @@ -390,4 +572,36 @@ ExprPtr biorthogonal_transform( return res.expression(); } +namespace detail { + +std::vector compute_nns_p_coeffs(std::size_t n_particles, + double threshold) { + auto perm_ovlp_mat = permutational_overlap_matrix(n_particles); + auto normalized_pinv = + compute_biorthogonalizer_matrix(n_particles, threshold); + Eigen::MatrixXd nns_matrix = perm_ovlp_mat * normalized_pinv; + + auto num_perms = nns_matrix.rows(); + std::vector coeffs; + coeffs.reserve(num_perms); + for (std::size_t i = 0; i < num_perms; ++i) { + coeffs.push_back(nns_matrix(num_perms - 1, i)); + } + return coeffs; +} + +container::svector compute_permuted_indices( + const container::svector& indices, size_t perm_rank, + size_t n_particles) { + perm::Permutation perm_obj = perm::unrank(perm_rank, n_particles); + + container::svector permuted_indices(n_particles); + for (size_t i = 0; i < n_particles; ++i) { + permuted_indices[i] = indices[perm_obj[i]]; + } + return permuted_indices; +} + +} // namespace detail + } // namespace sequant diff --git a/SeQuant/domain/mbpt/biorthogonalization.hpp b/SeQuant/domain/mbpt/biorthogonalization.hpp index 62c415dd86..2e7eea28ba 100644 --- a/SeQuant/domain/mbpt/biorthogonalization.hpp +++ b/SeQuant/domain/mbpt/biorthogonalization.hpp @@ -5,33 +5,354 @@ #include #include -namespace sequant { +#if defined(SEQUANT_HAS_TILEDARRAY) +#include +#include +#endif +#if defined(SEQUANT_HAS_BTAS) +#include +#include +#endif -namespace { -static constexpr double default_biorth_threshold = 1e-12; -} +#include +#include +#include +#include +#include +#include -[[nodiscard]] ResultExpr biorthogonal_transform_copy( - const ResultExpr& expr, double threshold = default_biorth_threshold); +namespace sequant { -[[nodiscard]] container::svector biorthogonal_transform_copy( - const container::svector& exprs, - double threshold = default_biorth_threshold); +static constexpr double default_biorthogonalizer_pseudoinverse_threshold = + 1e-12; -void biorthogonal_transform(ResultExpr& expr, - double threshold = default_biorth_threshold); +void biorthogonal_transform( + ResultExpr& expr, double pseudoinverse_threshold = + default_biorthogonalizer_pseudoinverse_threshold); -void biorthogonal_transform(container::svector& exprs, - double threshold = default_biorth_threshold); +void biorthogonal_transform( + container::svector& exprs, + double pseudoinverse_threshold = + default_biorthogonalizer_pseudoinverse_threshold); /// performs symbolic biorthogonal transform of CC-like equation using ///(for rank-3 and higher -/// Wang-Knizia biorthogonalization (https://arxiv.org/abs/1805.00565) is used +/// [Wang-Knizia biorthogonalization](https://arxiv.org/abs/1805.00565). +/// +/// @note uses hardcoded coefficients for ranks 1-5, +/// for higher ranks computes coefficients (if Eigen3 is available, else throws +/// an exception) [[nodiscard]] ExprPtr biorthogonal_transform( const ExprPtr& expr, const container::svector>& ext_index_groups = {}, - double threshold = default_biorth_threshold); + double pseudoinverse_threshold = + default_biorthogonalizer_pseudoinverse_threshold); + +namespace detail { + +/// \brief Computes the non-null space (NNS) projection coefficients +/// +/// \param n_particles The rank of external index pairs +/// \param threshold The threshold to compute the pseudoinverse matrix +/// (set to default_biorth_threshold) +/// +/// \return Vector of computed NNS projection coefficients +[[nodiscard]] std::vector compute_nns_p_coeffs( + std::size_t n_particles, + double pseudoinverse_threshold = + default_biorthogonalizer_pseudoinverse_threshold); + +/// \brief Provides permuted indices using libperm unrank function +/// +/// \param indices The indices to permute +/// \param perm_rank The rank of the permutation +/// \param n_particles The rank of external index pairs +/// +/// \return The permuted indices +container::svector compute_permuted_indices( + const container::svector& indices, size_t perm_rank, + size_t n_particles); + +/// \brief Provides one row of the NNS projector matrix, +/// hardcoded from Mathematica to avoid numerical precision loss. +/// +/// The NNS projector weights are obtained from the normalized pseudoinverse +/// of M: first compute M_pinv (the pseudoinverse), then normalize it by the +/// factor ((n_particles)!/rank(M)). +/// Finally, NNS projector = normalized_M_pinv · M. +/// +/// \param n_particles The rank of external index pairs +/// +/// \return Optional vector of NNS projector weights representing the last row, +/// std::nullopt if n_particles is outside the range [1,5]. +template + requires(std::floating_point || meta::is_complex_v) +std::optional> hardcoded_nns_projector(std::size_t n_particles) { + switch (n_particles) { + case 1: + return std::vector{T(1) / T(1)}; + + case 2: + return std::vector{T(0) / T(1), T(1) / T(1)}; + + case 3: + return std::vector{T(-1) / T(5), T(-1) / T(5), T(-1) / T(5), + T(-1) / T(5), T(-1) / T(5), T(1) / T(1)}; + + case 4: + return std::vector{ + T(1) / T(7), T(1) / T(7), T(1) / T(7), T(-1) / T(14), + T(1) / T(7), T(1) / T(7), T(1) / T(7), T(-1) / T(14), + T(-1) / T(14), T(-1) / T(14), T(1) / T(7), T(-2) / T(7), + T(-1) / T(14), T(1) / T(7), T(-1) / T(14), T(-2) / T(7), + T(1) / T(7), T(-1) / T(14), T(-1) / T(14), T(-2) / T(7), + T(-2) / T(7), T(-2) / T(7), T(-2) / T(7), T(1) / T(1)}; + + case 5: + return std::vector{ + T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), + T(2) / T(21), T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), + T(-1) / T(14), T(2) / T(21), T(-1) / T(14), T(-1) / T(14), + T(-1) / T(14), T(-1) / T(14), T(2) / T(21), T(2) / T(21), + T(2) / T(21), T(2) / T(21), T(-1) / T(21), T(0) / T(1), + T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), + T(2) / T(21), T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), + T(-1) / T(14), T(2) / T(21), T(-1) / T(14), T(-1) / T(14), + T(-1) / T(14), T(-1) / T(14), T(2) / T(21), T(2) / T(21), + T(2) / T(21), T(2) / T(21), T(-1) / T(21), T(0) / T(1), + T(2) / T(21), T(2) / T(21), T(-1) / T(21), T(2) / T(21), + T(0) / T(1), T(2) / T(21), T(2) / T(21), T(-1) / T(21), + T(2) / T(21), T(0) / T(1), T(-1) / T(21), T(-1) / T(21), + T(-1) / T(21), T(-1) / T(21), T(1) / T(7), T(0) / T(1), + T(0) / T(1), T(1) / T(7), T(1) / T(7), T(-1) / T(3), + T(2) / T(21), T(-1) / T(21), T(2) / T(21), T(2) / T(21), + T(0) / T(1), T(-1) / T(21), T(-1) / T(21), T(-1) / T(21), + T(-1) / T(21), T(1) / T(7), T(2) / T(21), T(-1) / T(21), + T(2) / T(21), T(2) / T(21), T(0) / T(1), T(0) / T(1), + T(1) / T(7), T(0) / T(1), T(1) / T(7), T(-1) / T(3), + T(-1) / T(21), T(-1) / T(21), T(-1) / T(21), T(-1) / T(21), + T(1) / T(7), T(-1) / T(21), T(2) / T(21), T(2) / T(21), + T(2) / T(21), T(0) / T(1), T(-1) / T(21), T(2) / T(21), + T(2) / T(21), T(2) / T(21), T(0) / T(1), T(1) / T(7), + T(0) / T(1), T(0) / T(1), T(1) / T(7), T(-1) / T(3), + T(0) / T(1), T(1) / T(7), T(1) / T(7), T(0) / T(1), + T(-1) / T(3), T(1) / T(7), T(0) / T(1), T(1) / T(7), + T(0) / T(1), T(-1) / T(3), T(1) / T(7), T(1) / T(7), + T(0) / T(1), T(0) / T(1), T(-1) / T(3), T(-1) / T(3), + T(-1) / T(3), T(-1) / T(3), T(-1) / T(3), T(1) / T(1)}; + + default: + return std::nullopt; + } +} + +/// \brief Provides NNS projection weights for a given rank +/// +/// \tparam T The numeric type (must be floating point or complex) +/// \param n_particles The rank of external index pairs +/// \param threshold The threshold to compute the pseudoinverse matrix +/// (set to default_biorthogonalizer_pseudoinverse_threshold) +/// +/// \return (memoized) Vector of hrdcoded/computed NNS projection weights +template + requires(std::floating_point || meta::is_complex_v) +[[nodiscard]] const std::vector& nns_projection_weights( + std::size_t n_particles, + double pseudoinverse_threshold = + default_biorthogonalizer_pseudoinverse_threshold) { + static const std::vector empty_vec{}; + + if (n_particles < 3) { + return empty_vec; + } + + using CacheKey = std::pair; + using CacheValue = std::optional>; + + static std::mutex cache_mutex; + static std::condition_variable cache_cv; + static container::map cache; + + CacheKey key{n_particles, pseudoinverse_threshold}; + + { + std::unique_lock lock(cache_mutex); + auto [it, inserted] = cache.try_emplace(key, std::nullopt); + if (!inserted) { + cache_cv.wait(lock, [&] { return it->second.has_value(); }); + return it->second.value(); + } + } + + std::vector nns_p_coeffs; + + constexpr std::size_t max_rank_hardcoded_nns_projector = 5; + if (n_particles <= max_rank_hardcoded_nns_projector) { + auto hardcoded_coeffs = hardcoded_nns_projector(n_particles); + if (hardcoded_coeffs) { + nns_p_coeffs = std::move(hardcoded_coeffs.value()); + } + } else { + auto coeffs = + detail::compute_nns_p_coeffs(n_particles, pseudoinverse_threshold); + nns_p_coeffs.reserve(coeffs.size()); + for (const auto& c : coeffs) { + nns_p_coeffs.push_back(static_cast(c)); + } + } + + { + std::lock_guard lock(cache_mutex); + cache[key] = std::move(nns_p_coeffs); + cache_cv.notify_all(); + return cache[key].value(); + } +} + +} // namespace detail + +#if defined(SEQUANT_HAS_TILEDARRAY) + +/// \brief This function is used to implement +/// ResultPtr::biorthogonal_nns_project for TA::DistArray +/// +/// \param arr The array to be "cleaned up" +/// \param bra_rank The rank of the bra indices +/// +/// \return The cleaned TA::DistArray. +template +auto biorthogonal_nns_project_ta(TA::DistArray const& arr, + size_t bra_rank) { + using ranges::views::iota; + size_t const rank = arr.trange().rank(); + SEQUANT_ASSERT(bra_rank <= rank); + size_t const ket_rank = rank - bra_rank; + + // Residuals of rank 4 or less have no redundancy and don't require NNS + // projection + if (rank <= 4) return arr; + + using numeric_type = typename TA::DistArray::numeric_type; + + const auto& nns_p_coeffs = + detail::nns_projection_weights(ket_rank); + + TA::DistArray result; + + perm_t perm = iota(size_t{0}, rank) | ranges::to; + perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; + perm_t ket_perm = iota(bra_rank, rank) | ranges::to; + + const auto lannot = ords_to_annot(perm); + + if (ket_rank > 2 && !nns_p_coeffs.empty()) { + const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm); + + size_t num_perms = nns_p_coeffs.size(); + for (size_t perm_rank = 0; perm_rank < num_perms; ++perm_rank) { + perm_t permuted_ket = + detail::compute_permuted_indices(ket_perm, perm_rank, ket_rank); + + numeric_type coeff = nns_p_coeffs[perm_rank]; + + const auto ket_annot = ords_to_annot(permuted_ket); + const auto annot = + bra_annot.empty() ? ket_annot : bra_annot + "," + ket_annot; + + if (result.is_initialized()) { + result(lannot) += coeff * arr(annot); + } else { + result(lannot) = coeff * arr(annot); + } + } + } else { + result(lannot) = arr(lannot); + } + + TA::DistArray::wait_for_lazy_cleanup(result.world()); + return result; +} + +template +auto biorthogonal_nns_project(TA::DistArray const& arr, + size_t bra_rank) { + return biorthogonal_nns_project_ta(arr, bra_rank); +} + +#endif // defined(SEQUANT_HAS_TILEDARRAY) + +#if defined(SEQUANT_HAS_BTAS) + +/// \brief This function is used to implement +/// ResultPtr::biorthogonal_nns_project for btas::Tensor +/// +/// \param arr The array to be "cleaned up" +/// \param bra_rank The rank of the bra indices +/// +/// \return The cleaned btas::Tensor. +template +auto biorthogonal_nns_project_btas(btas::Tensor const& arr, + size_t bra_rank) { + using ranges::views::iota; + size_t const rank = arr.rank(); + SEQUANT_ASSERT(bra_rank <= rank); + size_t const ket_rank = rank - bra_rank; + + // Residuals of rank 4 or less have no redundancy and don't require NNS + // projection + if (rank <= 4) return arr; + + using numeric_type = typename btas::Tensor::numeric_type; + + const auto& nns_p_coeffs = + detail::nns_projection_weights(ket_rank); + + btas::Tensor result; + + perm_t perm = iota(size_t{0}, rank) | ranges::to; + perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to; + perm_t ket_perm = iota(bra_rank, rank) | ranges::to; + + if (ket_rank > 2 && !nns_p_coeffs.empty()) { + bool result_initialized = false; + + size_t num_perms = nns_p_coeffs.size(); + for (size_t perm_rank = 0; perm_rank < num_perms; ++perm_rank) { + perm_t permuted_ket = + detail::compute_permuted_indices(ket_perm, perm_rank, ket_rank); + + numeric_type coeff = nns_p_coeffs[perm_rank]; + + perm_t annot = bra_perm; + annot.insert(annot.end(), permuted_ket.begin(), permuted_ket.end()); + + btas::Tensor temp; + btas::permute(arr, annot, temp, perm); + btas::scal(coeff, temp); + + if (result_initialized) { + result += temp; + } else { + result = temp; + result_initialized = true; + } + } + + } else { + result = arr; + } + + return result; +} + +template +auto biorthogonal_nns_project(btas::Tensor const& arr, + size_t bra_rank) { + return biorthogonal_nns_project_btas(arr, bra_rank); +} + +#endif // defined(SEQUANT_HAS_BTAS) } // namespace sequant diff --git a/SeQuant/domain/mbpt/spin.cpp b/SeQuant/domain/mbpt/spin.cpp index ae5b3f0acf..90c5ec8a82 100644 --- a/SeQuant/domain/mbpt/spin.cpp +++ b/SeQuant/domain/mbpt/spin.cpp @@ -1169,28 +1169,20 @@ ExprPtr closed_shell_CC_spintrace_v2(ExprPtr const& expr, st_expr; } simplify(st_expr); - // expanding S after spintracing and biorthogonalization, to avoid dealing - // with large number of terms + st_expr = S_maps(st_expr); // canonicalizer must be called before hash-filter to combine terms canonicalize(st_expr); - // apply hash filter method to get unique set of terms st_expr = WK_biorthogonalization_filter(st_expr, ext_idxs); - // add S tensor again + st_expr = ex(Tensor{L"S", bra(std::move(kixs)), ket(std::move(bixs))}) * st_expr; - rational combined_factor; - if (ext_idxs.size() <= 2) { - combined_factor = rational(1, factorial(ext_idxs.size())); - } else { - auto fact_n = factorial(ext_idxs.size()); - combined_factor = - rational(1, fact_n - 1); // this is (1/fact_n) * (fact_n/(fact_n-1)) - } - st_expr = ex(combined_factor) * st_expr; + const auto nf = ex( + rational{1, factorial(ext_idxs.size())}); // normalization factor for S + st_expr = nf * st_expr; } simplify(st_expr); diff --git a/SeQuant/domain/mbpt/spin.hpp b/SeQuant/domain/mbpt/spin.hpp index 5061e0a5fe..a537213833 100644 --- a/SeQuant/domain/mbpt/spin.hpp +++ b/SeQuant/domain/mbpt/spin.hpp @@ -214,7 +214,7 @@ ExprPtr S_maps(const ExprPtr& expr); /// WK biorthogonalization rewrites biorthogonal expressions as a projector /// onto non-null-space (NNS) -/// applied to the biorothogonal expressions where out of each +/// applied to the biorthogonal expressions where out of each /// group of terms related by permutation of external indices /// those with the largest coefficients are selected. /// This function performs the selection by forming groups of terms that diff --git a/cmake/modules/FindOrFetchBTAS.cmake b/cmake/modules/FindOrFetchBTAS.cmake new file mode 100644 index 0000000000..22253cf3b4 --- /dev/null +++ b/cmake/modules/FindOrFetchBTAS.cmake @@ -0,0 +1,68 @@ +# try find_package +if (NOT TARGET BTAS::BTAS) + include (FindPackageRegimport) + find_package_regimport(BTAS 1.0.0 QUIET CONFIG) + if (TARGET BTAS::BTAS) + message(STATUS "Found BTAS CONFIG at ${BTAS_CONFIG}") + endif (TARGET BTAS::BTAS) +endif (NOT TARGET BTAS::BTAS) + +# if not found, build via FetchContent +if (NOT TARGET BTAS::BTAS) + + # BTAS will load BLAS++/LAPACK++ ... if those use CMake's FindBLAS/FindLAPACK (as indicated by defined BLA_VENDOR) + # will need to specify Fortran linkage convention ... manually for now, switching to NWX's linear algebra discovery + # is necessary to handle all the corner cases for automatic discovery + if (DEFINED BLA_VENDOR) + set(_linalgpp_use_standard_linalg_kits TRUE) + endif(DEFINED BLA_VENDOR) + + include(FetchContent) + FetchContent_Declare( + BTAS + GIT_REPOSITORY https://github.com/BTAS/btas.git + GIT_TAG ${TA_TRACKED_BTAS_TAG} + EXCLUDE_FROM_ALL + SYSTEM + ) + FetchContent_MakeAvailable(BTAS) + FetchContent_GetProperties(BTAS + SOURCE_DIR BTAS_SOURCE_DIR + BINARY_DIR BTAS_BINARY_DIR + ) + + # use subproject targets as if they were in exported namespace ... + if (TARGET BTAS AND NOT TARGET BTAS::BTAS) + add_library(BTAS::BTAS ALIAS BTAS) + endif(TARGET BTAS AND NOT TARGET BTAS::BTAS) + + # set BTAS_CONFIG to the install location so that we know where to find it + set(BTAS_CONFIG ${CMAKE_INSTALL_PREFIX}/${BTAS_INSTALL_CMAKEDIR}/btas-config.cmake) + + # define macros specifying Fortran mangling convention, if necessary + if (_linalgpp_use_standard_linalg_kits) + if (NOT TARGET blaspp AND NOT TARGET lapackpp) + message(FATAL_ERROR "blaspp or lapackpp targets missing") + endif(NOT TARGET blaspp AND NOT TARGET lapackpp) + if (LINALG_MANGLING STREQUAL lower) + target_compile_definitions(blaspp PUBLIC -DBLAS_FORTRAN_LOWER=1) + target_compile_definitions(lapackpp PUBLIC -DLAPACK_FORTRAN_LOWER=1) + elseif(LINALG_MANGLING STREQUAL UPPER OR LINALG_MANGLING STREQUAL upper) + target_compile_definitions(blaspp PUBLIC -DBLAS_FORTRAN_UPPER=1) + target_compile_definitions(lapackpp PUBLIC -DLAPACK_FORTRAN_UPPER=1) + else() + if (NOT LINALG_MANGLING STREQUAL lower_) + message(WARNING "Linear algebra libraries' mangling convention not specified; specify -DLINALG_MANGLING={lower,lower_,UPPER}, if needed; BLASPP will try to autodetect") + endif(NOT LINALG_MANGLING STREQUAL lower_) + # these were needed for some configs at some point in the past? But in most cases they just produce compile noise +# target_compile_definitions(blaspp PUBLIC -DBLAS_FORTRAN_ADD_=1) +# target_compile_definitions(lapackpp PUBLIC -DLAPACK_FORTRAN_ADD_=1) + endif() + endif (_linalgpp_use_standard_linalg_kits) + +endif(NOT TARGET BTAS::BTAS) + +# postcond check +if (NOT TARGET BTAS::BTAS) + message(FATAL_ERROR "FindOrFetchBTAS could not make BTAS::BTAS target available") +endif(NOT TARGET BTAS::BTAS) diff --git a/cmake/sequant-config.cmake.in b/cmake/sequant-config.cmake.in index 49169e5f7e..a2c639f7ad 100644 --- a/cmake/sequant-config.cmake.in +++ b/cmake/sequant-config.cmake.in @@ -28,6 +28,16 @@ if(SEQUANT_HAS_TILEDARRAY AND NOT TARGET tiledarray) find_dependency(TiledArray CONFIG QUIET REQUIRED COMPONENTS tiledarray PATHS ${TiledArray_DIR} NO_DEFAULT_PATH) endif() +set(SEQUANT_HAS_BTAS @SEQUANT_HAS_BTAS@) +if(SEQUANT_HAS_BTAS AND NOT TARGET BTAS::BTAS) + set(BTAS_CONFIG @BTAS_CONFIG@) + if (NOT BTAS_CONFIG OR NOT EXISTS ${BTAS_CONFIG}) + message(FATAL_ERROR "Expected BTAS config file at ${BTAS_CONFIG}; directory moved since SeQuant configuration?") + endif() + get_filename_component(BTAS_DIR ${BTAS_CONFIG} DIRECTORY) + find_dependency(BTAS CONFIG QUIET REQUIRED PATHS ${BTAS_DIR} NO_DEFAULT_PATH) +endif() + set(SEQUANT_HAS_EIGEN @SEQUANT_HAS_EIGEN@) if (NOT TARGET Eigen3::Eigen AND SEQUANT_HAS_EIGEN) if (TARGET TiledArray_Eigen) diff --git a/doc/user/getting_started/installing.rst b/doc/user/getting_started/installing.rst index b578d4fba2..da83a6412e 100644 --- a/doc/user/getting_started/installing.rst +++ b/doc/user/getting_started/installing.rst @@ -67,9 +67,12 @@ Useful CMake Variables * - SEQUANT_TESTS - `BUILD_TESTING `_ - Enables test targets, e.g. ``check-sequant``. - * - SEQUANT_EVAL_TESTS + * - SEQUANT_BTAS - OFF - - Enables SeQuant evaluation tests using ``TiledArray`` and ``BTAS``. + - SeQuant will look for (or build) `BTAS tensor library ` and enable its use as an evaluation backend. + * - SEQUANT_TILEDARRAY + - OFF + - SeQuant will look for (or build) `TiledArray tensor framework ` and enable its use as an evaluation backend. * - SEQUANT_MIMALLOC - OFF - Use `mimalloc `_ for fast memory allocation. diff --git a/tests/integration/eval/CMakeLists.txt b/tests/integration/eval/CMakeLists.txt index 93702f4c8a..8b389424f2 100644 --- a/tests/integration/eval/CMakeLists.txt +++ b/tests/integration/eval/CMakeLists.txt @@ -1,6 +1,3 @@ -option(SEQUANT_EVAL_TESTS "Enable building of evaluation tests (if true, will look for and/or build TiledArray)" OFF) -add_feature_info(EVAL_TESTS SEQUANT_EVAL_TESTS "Build evaluation tests (if true, will look for and/or build TiledArray)") - # uccf12 example moved to MPQC add_library(eval_shared STATIC EXCLUDE_FROM_ALL @@ -17,15 +14,13 @@ add_library(eval_shared STATIC EXCLUDE_FROM_ALL target_link_libraries(eval_shared PUBLIC SeQuant) target_include_directories(eval_shared PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") -if (TARGET tiledarray) - set(example5 eval_ta) +if (SEQUANT_HAS_TILEDARRAY) add_executable(eval_ta ${BUILD_BY_DEFAULT} "ta/data_world_ta.hpp" "ta/scf_ta.hpp" "ta/main.cpp" ) target_link_libraries(eval_ta PRIVATE eval_shared tiledarray) - target_compile_definitions(eval_ta PRIVATE SEQUANT_HAS_TILEDARRAY) set(test_name "sequant/integration/eval_ta") add_test( @@ -35,14 +30,15 @@ if (TARGET tiledarray) ) build_test_as_needed(eval_ta "${test_name}" test_name) +endif (SEQUANT_HAS_TILEDARRAY) +if (SEQUANT_HAS_BTAS) add_executable(eval_btas ${BUILD_BY_DEFAULT} "btas/data_world_btas.hpp" "btas/scf_btas.hpp" "btas/main.cpp" ) - target_include_directories(eval_btas PRIVATE ${BTAS_SOURCE_DIR}) - target_link_libraries(eval_btas PRIVATE eval_shared) + target_link_libraries(eval_btas PRIVATE eval_shared BTAS::BTAS) set(test_name "sequant/integration/eval_btas") add_test( @@ -52,4 +48,4 @@ if (TARGET tiledarray) ) build_test_as_needed(eval_btas "${test_name}" test_name) -endif (TARGET tiledarray) +endif (SEQUANT_HAS_BTAS) diff --git a/tests/integration/eval/btas/data_world_btas.hpp b/tests/integration/eval/btas/data_world_btas.hpp index 2ced086456..c9e8498323 100644 --- a/tests/integration/eval/btas/data_world_btas.hpp +++ b/tests/integration/eval/btas/data_world_btas.hpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include #include #include diff --git a/tests/integration/eval/btas/scf_btas.hpp b/tests/integration/eval/btas/scf_btas.hpp index a07dea280f..3528bacc7b 100644 --- a/tests/integration/eval/btas/scf_btas.hpp +++ b/tests/integration/eval/btas/scf_btas.hpp @@ -10,11 +10,12 @@ #include #include +#include #include #include #include -#include +#include #include #include diff --git a/tests/integration/eval/calc_info.hpp b/tests/integration/eval/calc_info.hpp index cc05f3ac7b..835059d6bf 100644 --- a/tests/integration/eval/calc_info.hpp +++ b/tests/integration/eval/calc_info.hpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/tests/integration/eval/eval_utils.hpp b/tests/integration/eval/eval_utils.hpp index 944be4c149..3849c9cb3a 100644 --- a/tests/integration/eval/eval_utils.hpp +++ b/tests/integration/eval/eval_utils.hpp @@ -6,7 +6,7 @@ #define SEQUANT_EVAL_EVAL_UTILS_HPP #include -#include +#include #include #include #include diff --git a/tests/integration/eval/ta/data_world_ta.hpp b/tests/integration/eval/ta/data_world_ta.hpp index 6a6986e61e..3be6f2f070 100644 --- a/tests/integration/eval/ta/data_world_ta.hpp +++ b/tests/integration/eval/ta/data_world_ta.hpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/tests/integration/eval/ta/scf_ta.hpp b/tests/integration/eval/ta/scf_ta.hpp index 0a43b9efb4..e2d554afff 100644 --- a/tests/integration/eval/ta/scf_ta.hpp +++ b/tests/integration/eval/ta/scf_ta.hpp @@ -6,6 +6,7 @@ #define SEQUANT_EVAL_SCF_TA_HPP #include +#include #include #include #include diff --git a/tests/integration/srcc.cpp b/tests/integration/srcc.cpp index 5175a2135d..8a57eb0692 100644 --- a/tests/integration/srcc.cpp +++ b/tests/integration/srcc.cpp @@ -197,23 +197,16 @@ class compute_cceqvec { eqvec[R] = ex(Tensor{L"S", bra(kixs), ket(bixs)}) * eqvec[R]; eqvec[R] = expand(eqvec[R]); - // apply normalization and rescaling factors - rational combined_factor; - if (ext_idxs.size() <= 2) { - combined_factor = rational(1, factorial(ext_idxs.size())); - } else { - auto fact_n = factorial(ext_idxs.size()); - combined_factor = rational( - 1, fact_n - 1); // this is (1/fact_n) * (fact_n/(fact_n-1)) - } - eqvec[R] = ex(combined_factor) * eqvec[R]; + // apply normalization factor + auto const nf = rational(1, factorial(ext_idxs.size())); + eqvec[R] = ex(nf) * eqvec[R]; simplify(eqvec[R]); // WK_biorthogonalization_filter method removes the redundancy caused // by biorthogonal transformation and gives the most compact set of // equations. However, we need to restore the effects of those deleted // terms. So, after evaluate_symm call in sequant evaluation scope, we - // need to call evaluate_biorthogonal_nns_project. + // need to call biorthogonal_nns_project_. std::wcout << "biorthogonal spin-free R" << R << "(expS" << N << ") has " << eqvec[R]->size() << " terms:" << std::endl; diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index ff65ed4037..0c341696e1 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -91,22 +91,24 @@ if (SEQUANT_SKIP_LONG_TESTS) target_compile_definitions(unit_tests-sequant PRIVATE SEQUANT_SKIP_LONG_TESTS=1) endif() -if (TARGET tiledarray) +if (SEQUANT_HAS_EVAL) + set(sq_ut_eval_src "test_cache_manager.cpp") + if (SEQUANT_HAS_TILEDARRAY) + list(APPEND sq_ut_eval_src "test_eval_ta.cpp") + endif() + if (SEQUANT_HAS_BTAS) + list(APPEND sq_ut_eval_src "test_eval_btas.cpp") + endif() target_sources(unit_tests-sequant PRIVATE - "test_cache_manager.cpp" - "test_eval_btas.cpp" - "test_eval_ta.cpp" + ${sq_ut_eval_src} ) set_source_files_properties( - "test_eval_btas.cpp" - "test_eval_ta.cpp" + ${sq_ut_eval_src} "test_main.cpp" PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON ) - target_link_libraries(unit_tests-sequant PRIVATE tiledarray) - target_compile_definitions(unit_tests-sequant PRIVATE SEQUANT_HAS_TILEDARRAY) -endif (TARGET tiledarray) +endif (SEQUANT_HAS_EVAL) target_link_libraries(unit_tests-sequant PRIVATE SeQuant::SeQuant Catch2::Catch2 dtl::dtl) diff --git a/tests/unit/test_eval_btas.cpp b/tests/unit/test_eval_btas.cpp index 1d0db339ad..3be911859c 100644 --- a/tests/unit/test_eval_btas.cpp +++ b/tests/unit/test_eval_btas.cpp @@ -3,13 +3,16 @@ #include "catch2_sequant.hpp" +#include +#include #include -#include #include #include +#include #include #include + #include #include @@ -178,7 +181,9 @@ TEST_CASE("eval_with_btas", "[eval_btas]") { using BTensorD = btas::Tensor; - auto norm = [](BTensorD const& tnsr) { return btas::norm(tnsr); }; + auto norm = [](BTensorD const& tnsr) { + return std::sqrt(btas::dotc(tnsr, tnsr)); + }; std::srand(2023); const size_t nocc = 2, nvirt = 20; @@ -209,9 +214,9 @@ TEST_CASE("eval_with_btas", "[eval_btas]") { auto eval_biorthogonal_nns_project = [&yield_](sequant::ExprPtr const& expr, container::svector const& target_labels) { - return evaluate_biorthogonal_nns_project(eval_node(expr), target_labels, - yield_) - ->get(); + auto result = evaluate(eval_node(expr), target_labels, yield_); + return biorthogonal_nns_project( + result->get(), eval_node(expr)->as_tensor().bra_rank()); }; auto parse_antisymm = [](auto const& xpr) { @@ -401,14 +406,13 @@ TEST_CASE("eval_with_btas", "[eval_btas]") { BTensorD perm_sum{r2.range()}; perm_sum.fill(0); - perm_sum += r2; perm_sum += BTensorD{permute(r2, {0, 1, 2, 3, 5, 4})}; perm_sum += BTensorD{permute(r2, {0, 1, 2, 4, 3, 5})}; perm_sum += BTensorD{permute(r2, {0, 1, 2, 4, 5, 3})}; perm_sum += BTensorD{permute(r2, {0, 1, 2, 5, 3, 4})}; perm_sum += BTensorD{permute(r2, {0, 1, 2, 5, 4, 3})}; - btas::scal(1.0 / 6.0, perm_sum); + btas::scal(1.0 / 5.0, perm_sum); man2 -= perm_sum; REQUIRE(norm(eval2) == Catch::Approx(norm(man2))); diff --git a/tests/unit/test_eval_expr.cpp b/tests/unit/test_eval_expr.cpp index 02116fa704..48fef17f35 100644 --- a/tests/unit/test_eval_expr.cpp +++ b/tests/unit/test_eval_expr.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/tests/unit/test_eval_node.cpp b/tests/unit/test_eval_node.cpp index fcd1137134..84b8290a93 100644 --- a/tests/unit/test_eval_node.cpp +++ b/tests/unit/test_eval_node.cpp @@ -6,8 +6,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/tests/unit/test_eval_ta.cpp b/tests/unit/test_eval_ta.cpp index 721034a6a3..87d4432744 100644 --- a/tests/unit/test_eval_ta.cpp +++ b/tests/unit/test_eval_ta.cpp @@ -4,10 +4,13 @@ #include "catch2_sequant.hpp" #include +#include +#include #include #include #include #include +#include #include #include @@ -324,9 +327,9 @@ TEST_CASE("eval_with_tiledarray", "[eval]") { auto eval_biorthogonal_nns_project = [&yield_]( sequant::ExprPtr const& expr, std::string const& target_labels) { - return evaluate_biorthogonal_nns_project(eval_node(expr), target_labels, - yield_) - ->get(); + auto result = evaluate(eval_node(expr), target_labels, yield_); + return sequant::biorthogonal_nns_project( + result->get(), eval_node(expr)->as_tensor().bra_rank()); }; SECTION("summation") { @@ -510,7 +513,7 @@ TEST_CASE("eval_with_tiledarray", "[eval]") { } SECTION("Biorthogonal Cleanup") { - // low-rank residuals: skip cleanup + // low-rank residuals: skip nns auto expr1 = parse_antisymm(L"R_{a1, a2}^{i1, i2}"); auto eval1 = eval_biorthogonal_nns_project(expr1, "a_1,a_2,i_1,i_2"); auto const& arr1 = yield(L"R{a1,a2;i1,i2}"); @@ -524,8 +527,8 @@ TEST_CASE("eval_with_tiledarray", "[eval]") { REQUIRE(norm(zero1) == Catch::Approx(0).margin( 100 * std::numeric_limits::epsilon())); - // high-rank residuals: cleanup applies: - // result = identity - (1/ket_rank!) * sum_of_ket_permutations + // for rank 3 residual, nns applies: + // result = NNS_P * sum_of_ket_permutations auto expr2 = parse_antisymm(L"R_{a1, a2, a3}^{i1, i2, i3}"); auto eval2 = eval_biorthogonal_nns_project(expr2, "a_1,a_2,a_3,i_1,i_2,i_3"); @@ -534,15 +537,56 @@ TEST_CASE("eval_with_tiledarray", "[eval]") { auto man2 = TArrayD{}; man2("0,1,2,3,4,5") = arr2("0,1,2,3,4,5") - - (1.0 / 6.0) * - (arr2("0,1,2,3,4,5") + arr2("0,1,2,3,5,4") + arr2("0,1,2,4,3,5") + - arr2("0,1,2,4,5,3") + arr2("0,1,2,5,3,4") + arr2("0,1,2,5,4,3")); + (1.0 / 5.0) * + (arr2("0,1,2,3,5,4") + arr2("0,1,2,4,3,5") + arr2("0,1,2,4,5,3") + + arr2("0,1,2,5,3,4") + arr2("0,1,2,5,4,3")); REQUIRE(norm(man2) == Catch::Approx(norm(eval2))); TArrayD zero2; zero2("0,1,2,3,4,5") = man2("0,1,2,3,4,5") - eval2("0,1,2,3,4,5"); - REQUIRE(norm(zero1) == Catch::Approx(0).margin( + REQUIRE(norm(zero2) == Catch::Approx(0).margin( 100 * std::numeric_limits::epsilon())); + + // for rank 4 residual, nns applies: + // result = NNS_P * sum_of_ket_permutations + auto expr3 = parse_antisymm(L"R_{a1, a2, a3, a4}^{i1, i2, i3, i4}"); + auto eval3 = eval_biorthogonal_nns_project( + expr3, "a_1,a_2,a_3,a_4,i_1,i_2,i_3,i_4"); + auto const& arr3 = yield(L"R{a1,a2,a3,a4;i1,i2,i3,i4}"); + + auto man3 = TArrayD{}; + man3("0,1,2,3,4,5,6,7") = 1.0 * arr3("0,1,2,3,4,5,6,7") + + -4.0 / 14.0 * arr3("0,1,2,3,4,5,7,6") + + -4.0 / 14.0 * arr3("0,1,2,3,4,6,5,7") + + -1.0 / 14.0 * arr3("0,1,2,3,4,6,7,5") + + -1.0 / 14.0 * arr3("0,1,2,3,4,7,5,6") + + -4.0 / 14.0 * arr3("0,1,2,3,4,7,6,5") + + -4.0 / 14.0 * arr3("0,1,2,3,5,4,6,7") + + 2.0 / 14.0 * arr3("0,1,2,3,5,4,7,6") + + -1.0 / 14.0 * arr3("0,1,2,3,5,6,4,7") + + 2.0 / 14.0 * arr3("0,1,2,3,5,6,7,4") + + 2.0 / 14.0 * arr3("0,1,2,3,5,7,4,6") + + -1.0 / 14.0 * arr3("0,1,2,3,5,7,6,4") + + -1.0 / 14.0 * arr3("0,1,2,3,6,4,5,7") + + 2.0 / 14.0 * arr3("0,1,2,3,6,4,7,5") + + -4.0 / 14.0 * arr3("0,1,2,3,6,5,4,7") + + -1.0 / 14.0 * arr3("0,1,2,3,6,5,7,4") + + 2.0 / 14.0 * arr3("0,1,2,3,6,7,4,5") + + 2.0 / 14.0 * arr3("0,1,2,3,6,7,5,4") + + 2.0 / 14.0 * arr3("0,1,2,3,7,4,5,6") + + -1.0 / 14.0 * arr3("0,1,2,3,7,4,6,5") + + -1.0 / 14.0 * arr3("0,1,2,3,7,5,4,6") + + -4.0 / 14.0 * arr3("0,1,2,3,7,5,6,4") + + 2.0 / 14.0 * arr3("0,1,2,3,7,6,4,5") + + 2.0 / 14.0 * arr3("0,1,2,3,7,6,5,4"); + + REQUIRE(norm(man3) == Catch::Approx(norm(eval3))); + TArrayD zero3; + zero3("0,1,2,3,4,5,6,7") = + man3("0,1,2,3,4,5,6,7") - eval3("0,1,2,3,4,5,6,7"); + REQUIRE(norm(zero3) == + Catch::Approx(0).margin(1000 * + std::numeric_limits::epsilon())); } SECTION("Others") { diff --git a/tests/unit/test_optimize.cpp b/tests/unit/test_optimize.cpp index df122873e8..951e286aaf 100644 --- a/tests/unit/test_optimize.cpp +++ b/tests/unit/test_optimize.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/tests/unit/test_spin.cpp b/tests/unit/test_spin.cpp index e3aeb180c3..da85c53262 100644 --- a/tests/unit/test_spin.cpp +++ b/tests/unit/test_spin.cpp @@ -1090,8 +1090,8 @@ SECTION("Closed-shell spintrace CCSDT terms") { "g{a_1,a_2;a_4,a_5}:N-C-S * t{a_3,a_4,a_5;i_1,i_2,i_3}:N-C-S + 2 " "g{a_1,a_3;a_4,a_5}:N-C-S * t{a_2,a_4,a_5;i_1,i_3,i_2}:N-C-S")); - // the new efficient method, spintracing with partial expansion, then - // expanding by S_map ( this method is used in + // the new efficient method, does spintracing with partial expansion, then + // expanding by S_map (this method is used in // closed_shell_CC_spintrace_v2) auto result_2 = closed_shell_spintrace( input, {{L"i_1", L"a_1"}, {L"i_2", L"a_2"}, {L"i_3", L"a_3"}}); @@ -1124,19 +1124,13 @@ SECTION("Closed-shell spintrace CCSDT terms") { "g{a_1,a_3;a_4,a_5}:N-C-S * t{a_2,a_4,a_5;i_1,i_3,i_2}:N-C-S")); } - SECTION("ppl term in optimal") { // results in 1 term + SECTION("most expensive terms in CCSDT") { // results in 1 term const auto input = ex(ExprPtrList{ parse_expr(L"1/24 A{i_1,i_2,i_3;a_1,a_2,a_3} * " L"g{a_1,a_2;a_4,a_5} * t{a_3,a_4,a_5;i_1,i_2,i_3}", Symmetry::Antisymm)}); auto result = closed_shell_CC_spintrace_v2(input); - // multiply the result by 6/5 to revert the rescaling factor - result *= ex(rational{5, 6}); - - // There is a problem with casting a single term to Sum - // REQUIRE(result->size()== 1); // it needs to be checked - REQUIRE_THAT( result, EquivalentTo(