diff --git a/CMakeLists.txt b/CMakeLists.txt
index cfe1b2a5ef..33846d8a53 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -112,6 +112,11 @@ set(SEQUANT_INSTALL_CMAKEDIR "lib/cmake/sequant"
 ############################
 option(SEQUANT_BENCHMARKS "Enable SeQuant benchmarks" ${PROJECT_IS_TOP_LEVEL})
 option(SEQUANT_UTILITIES "Enable SeQuant utility programs" ${PROJECT_IS_TOP_LEVEL})
+option(SEQUANT_EVAL_TESTS "OBSOLETE: use SEQUANT_TILEDARRAY and SEQUANT_BTAS" OFF)
+option(SEQUANT_BTAS "Enable BTAS eval backend" ${SEQUANT_EVAL_TESTS})
+add_feature_info(SEQUANT_EVAL_BTAS SEQUANT_BTAS "Enable BTAS eval backend")
+option(SEQUANT_TILEDARRAY "Enable TiledArray eval backend" ${SEQUANT_EVAL_TESTS})
+add_feature_info(SEQUANT_EVAL_TILEDARRAY SEQUANT_TILEDARRAY "Enable TiledArray eval backend")
 
 option(SEQUANT_IWYU "Whether to use the include-what-you-use tool (if found)" OFF)
 option(SEQUANT_WARNINGS_AS_ERRORS "Whether to treat compiler warnings as errors" ${PROJECT_IS_TOP_LEVEL})
@@ -260,9 +265,9 @@ set(SeQuant_src
         SeQuant/core/container.hpp
         SeQuant/core/context.cpp
         SeQuant/core/context.hpp
-        SeQuant/core/eval_expr.cpp
-        SeQuant/core/eval_expr.hpp
-        SeQuant/core/eval_node.hpp
+        SeQuant/core/eval/eval_expr.cpp
+        SeQuant/core/eval/eval_expr.hpp
+        SeQuant/core/eval/eval_node.hpp
         SeQuant/core/export/compute_selection.cpp
         SeQuant/core/export/compute_selection.hpp
         SeQuant/core/export/context.cpp
@@ -405,20 +410,44 @@ set_source_files_properties(
 )
 
 ### optional prereqs
-if (SEQUANT_EVAL_TESTS)
-    include(FindOrFetchTiledArray)
-endif (SEQUANT_EVAL_TESTS)
-
-if (TARGET tiledarray)
+set(SEQUANT_HAS_EVAL OFF)  # do not build SQ/eval unless there is a backend
+if (SEQUANT_TILEDARRAY)
+    if (NOT TARGET tiledarray)
+        include(FindOrFetchTiledArray)
+    endif()
+    set(SEQUANT_HAS_EVAL ON)
     set(SEQUANT_HAS_TILEDARRAY ON)
+endif ()
+if (SEQUANT_BTAS)
+    if (NOT TARGET BTAS::BTAS)
+        include(FindOrFetchBTAS)
+    endif()
+    set(SEQUANT_HAS_EVAL ON)
+    set(SEQUANT_HAS_BTAS ON)
+endif ()
+
+if (SEQUANT_HAS_EVAL)
     list(APPEND SeQuant_src
             SeQuant/core/eval/cache_manager.cpp
             SeQuant/core/eval/cache_manager.hpp
             SeQuant/core/eval/eval.hpp
             SeQuant/core/eval/result.cpp
             SeQuant/core/eval/result.hpp
-            SeQuant/core/eval/eval_fwd.hpp
+            SeQuant/core/eval/fwd.hpp
             )
+    if (SEQUANT_HAS_TILEDARRAY)
+        list(APPEND SeQuant_src
+                SeQuant/core/eval/backends/tiledarray/eval_expr.hpp
+                SeQuant/core/eval/backends/tiledarray/result.hpp
+                SeQuant/core/eval/backends/tiledarray/result.cpp
+                )
+    endif ()
+    if (SEQUANT_HAS_BTAS)
+        list(APPEND SeQuant_src
+                SeQuant/core/eval/backends/btas/eval_expr.hpp
+                SeQuant/core/eval/backends/btas/result.hpp
+                )
+    endif ()
 endif ()
 
 add_library(SeQuant
@@ -451,9 +480,17 @@ if (Boost_IS_MODULARIZED)
             Boost::spirit
     )
 endif()
-if (TARGET tiledarray)
+if (SEQUANT_HAS_EVAL)
+  target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_EVAL=1)
+  if (SEQUANT_HAS_TILEDARRAY)
     target_link_libraries(SeQuant PUBLIC tiledarray)
-endif ()
+    target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_TILEDARRAY=1)
+  endif ()
+  if (SEQUANT_HAS_BTAS)
+    target_link_libraries(SeQuant PUBLIC BTAS::BTAS)
+    target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_BTAS=1)
+  endif ()
+endif()
 if (SEQUANT_HAS_EXECUTION_HEADER_STANDALONE OR SEQUANT_HAS_EXECUTION_HEADER_WITH_TBB)
     target_compile_definitions(SeQuant PUBLIC SEQUANT_HAS_EXECUTION_HEADER)
     if (SEQUANT_HAS_EXECUTION_HEADER_WITH_TBB)
diff --git a/SeQuant/core/eval/backends/btas/eval_expr.hpp b/SeQuant/core/eval/backends/btas/eval_expr.hpp
new file mode 100644
index 0000000000..ef16b38ec7
--- /dev/null
+++ b/SeQuant/core/eval/backends/btas/eval_expr.hpp
@@ -0,0 +1,70 @@
+#ifndef SEQUANT_EVAL_BACKENDS_BTAS_EVAL_EXPR_HPP
+#define SEQUANT_EVAL_BACKENDS_BTAS_EVAL_EXPR_HPP
+
+#ifdef SEQUANT_HAS_BTAS
+
+#include <SeQuant/core/eval/eval_expr.hpp>
+
+#include <SeQuant/core/container.hpp>
+#include <SeQuant/core/hash.hpp>
+#include <SeQuant/core/index.hpp>
+
+#include <range/v3/view.hpp>
+
+namespace sequant {
+
+///
+/// \brief This class extends the EvalExpr class by adding an annot() method so
+///        that it can be used to evaluate using BTAS.
+///
+class EvalExprBTAS final : public EvalExpr {
+ public:
+  using annot_t = container::svector<long>;
+
+  ///
+  /// \param bk iterable of Index objects.
+  /// \return vector of long-type hash values
+  ///         of the labels of indices in \c bk
+  ///
+  template <typename Iterable>
+  static auto index_hash(Iterable&& bk) {
+    return ranges::views::transform(
+        std::forward<Iterable>(bk), [](auto const& idx) {
+          //
+          // WARNING!
+          // The BTAS uses long for scalar indexing by default.
+          // Hence, here we explicitly cast the size_t values to long
+          // Which is a potentially narrowing conversion leading to
+          // integral overflow. Hence, the values in the returned
+          // container are mixed negative and positive integers (long type)
+          //
+          return static_cast<long>(sequant::hash::value(Index{idx}.label()));
+        });
+  }
+
+  template <typename... Args, typename = std::enable_if_t<
+                                  std::is_constructible_v<EvalExpr, Args...>>>
+  EvalExprBTAS(Args&&... args) : EvalExpr{std::forward<Args>(args)...} {
+    annot_ = index_hash(canon_indices()) | ranges::to<annot_t>;
+  }
+
+  ///
+  /// \return Annotation (container::svector<long>) for BTAS::Tensor.
+  ///
+  [[nodiscard]] inline annot_t const& annot() const noexcept { return annot_; }
+
+ private:
+  annot_t annot_;
+};
+
+/// Type alias for BTAS evaluation nodes
+using EvalNodeBTAS = EvalNode<EvalExprBTAS>;
+
+static_assert(meta::eval_node<EvalNodeBTAS>);
+static_assert(meta::can_evaluate<EvalNodeBTAS>);
+
+}  // namespace sequant
+
+#endif  // SEQUANT_HAS_BTAS
+
+#endif  // SEQUANT_EVAL_BACKENDS_BTAS_EVAL_EXPR_HPP
diff --git a/SeQuant/core/eval/backends/btas/result.hpp b/SeQuant/core/eval/backends/btas/result.hpp
new file mode 100644
index 0000000000..9eec126c40
--- /dev/null
+++ b/SeQuant/core/eval/backends/btas/result.hpp
@@ -0,0 +1,217 @@
+#ifndef SEQUANT_EVAL_BACKENDS_BTAS_RESULT_HPP
+#define SEQUANT_EVAL_BACKENDS_BTAS_RESULT_HPP
+
+#ifdef SEQUANT_HAS_BTAS
+
+#include <SeQuant/core/eval/result.hpp>
+
+#include <btas/btas.h>
+
+namespace sequant {
+
+namespace {
+
+///
+/// \brief This function implements the symmetrization of btas::Tensor.
+///
+/// \param arr The tensor to be symmetrized.
+///
+/// \pre The rank of the tensor must be even.
+///
+/// \return The symmetrized btas::Tensor.
+///
+template <typename... Args>
+auto column_symmetrize_btas(btas::Tensor<Args...> const& arr) {
+  using ranges::views::iota;
+
+  size_t const rank = arr.rank();
+
+  if (rank % 2 != 0)
+    throw std::domain_error("This function only supports even-ranked tensors");
+
+  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
+
+  auto const lannot = perm;
+
+  auto result = btas::Tensor<Args...>{arr.range()};
+  result.fill(0);
+
+  auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() {
+    btas::Tensor<Args...> temp;
+    btas::permute(arr, lannot, temp, perm);
+    result += temp;
+  };
+
+  auto const nparticles = rank / 2;
+  symmetric_permutation(SymmetricParticleRange{perm.begin(),               //
+                                               perm.begin() + nparticles,  //
+                                               nparticles},
+                        call_back);
+
+  return result;
+}
+
+///
+/// \brief This function implements the antisymmetrization of btas::Tensor.
+///
+/// \param arr The tensor to be antisymmetrized
+///
+/// \param bra_rank The rank of the bra indices
+///
+/// \return The antisymmetrized btas::Tensor.
+///
+template <typename... Args>
+auto particle_antisymmetrize_btas(btas::Tensor<Args...> const& arr,
+                                  size_t bra_rank) {
+  using ranges::views::concat;
+  using ranges::views::iota;
+  size_t const rank = arr.rank();
+  SEQUANT_ASSERT(bra_rank <= rank);
+  size_t const ket_rank = rank - bra_rank;
+
+  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
+  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
+  const auto lannot = iota(size_t{0}, rank) | ranges::to<perm_t>;
+
+  auto process_permutations = [&lannot](const btas::Tensor<Args...>& input_arr,
+                                        size_t range_rank, perm_t range_perm,
+                                        const perm_t& other_perm, bool is_bra) {
+    if (range_rank <= 1) return input_arr;
+    btas::Tensor<Args...> result{input_arr.range()};
+
+    auto callback = [&](int parity) {
+      const auto annot =
+          is_bra ? concat(range_perm, other_perm) | ranges::to<perm_t>()
+                 : concat(other_perm, range_perm) | ranges::to<perm_t>();
+
+      typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1;
+      btas::Tensor<Args...> temp;
+      btas::permute(input_arr, lannot, temp, annot);
+      btas::scal(p_, temp);
+      result += temp;
+    };
+
+    antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank},
+                              callback);
+    return result;
+  };
+  // Process bra permutations first
+  const auto ket_annot = ket_rank == 0 ? perm_t{} : ket_perm;
+  auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true);
+
+  // Process ket permutations if needed
+  const auto bra_annot = bra_rank == 0 ? perm_t{} : bra_perm;
+  result = process_permutations(result, ket_rank, ket_perm, bra_annot, false);
+
+  return result;
+}
+
+}  // namespace
+
+///
+/// \brief Result for a tensor value of btas::Tensor type.
+/// \tparam T btas::Tensor type. Must be a specialization of btas::Tensor.
+///
+template <typename T>
+class ResultTensorBTAS final : public Result {
+ public:
+  using Result::id_t;
+  using numeric_type = typename T::numeric_type;
+
+  explicit ResultTensorBTAS(T arr) : Result{std::move(arr)} {}
+
+ private:
+  // TODO make it same as that used by EvalExprBTAS class from eval.hpp file
+  using annot_t = container::svector<long>;
+  using annot_wrap = Annot<annot_t>;
+
+  [[nodiscard]] id_t type_id() const noexcept override {
+    return id_for_type<ResultTensorBTAS<T>>();
+  }
+
+  [[nodiscard]] ResultPtr sum(
+      Result const& other,
+      std::array<std::any, 3> const& annot) const override {
+    SEQUANT_ASSERT(other.is<ResultTensorBTAS<T>>());
+    auto const a = annot_wrap{annot};
+
+    T lres, rres;
+    btas::permute(get<T>(), a.lannot, lres, a.this_annot);
+    btas::permute(other.get<T>(), a.rannot, rres, a.this_annot);
+    return eval_result<ResultTensorBTAS<T>>(lres + rres);
+  }
+
+  [[nodiscard]] ResultPtr prod(Result const& other,
+                               std::array<std::any, 3> const& annot,
+                               DeNest /*DeNestFlag*/) const override {
+    auto const a = annot_wrap{annot};
+
+    if (other.is<ResultScalar<numeric_type>>()) {
+      T result;
+      btas::permute(get<T>(), a.lannot, result, a.this_annot);
+      btas::scal(other.as<ResultScalar<numeric_type>>().value(), result);
+      return eval_result<ResultTensorBTAS<T>>(std::move(result));
+    }
+
+    SEQUANT_ASSERT(other.is<ResultTensorBTAS<T>>());
+
+    if (a.this_annot.empty()) {
+      T rres;
+      btas::permute(other.get<T>(), a.rannot, rres, a.lannot);
+      return eval_result<ResultScalar<numeric_type>>(btas::dot(get<T>(), rres));
+    }
+
+    T result;
+    btas::contract(numeric_type{1},           //
+                   get<T>(), a.lannot,        //
+                   other.get<T>(), a.rannot,  //
+                   numeric_type{0},           //
+                   result, a.this_annot);
+    return eval_result<ResultTensorBTAS<T>>(std::move(result));
+  }
+
+  [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override {
+    auto pre = get<T>();
+    btas::scal(numeric_type(factor), pre);
+    return eval_result<ResultTensorBTAS<T>>(std::move(pre));
+  }
+
+  [[nodiscard]] ResultPtr permute(
+      std::array<std::any, 2> const& ann) const override {
+    auto const pre_annot = std::any_cast<annot_t>(ann[0]);
+    auto const post_annot = std::any_cast<annot_t>(ann[1]);
+    T result;
+    btas::permute(get<T>(), pre_annot, result, post_annot);
+    return eval_result<ResultTensorBTAS<T>>(std::move(result));
+  }
+
+  void add_inplace(Result const& other) override {
+    auto& t = get<T>();
+    auto const& o = other.get<T>();
+    SEQUANT_ASSERT(t.range() == o.range());
+    t += o;
+  }
+
+  [[nodiscard]] ResultPtr symmetrize() const override {
+    return eval_result<ResultTensorBTAS<T>>(column_symmetrize_btas(get<T>()));
+  }
+
+  [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override {
+    return eval_result<ResultTensorBTAS<T>>(
+        particle_antisymmetrize_btas(get<T>(), bra_rank));
+  }
+
+ private:
+  [[nodiscard]] std::size_t size_in_bytes() const final {
+    static_assert(std::is_arithmetic_v<typename T::value_type>);
+    const auto& tensor = get<T>();
+    // only count data
+    return tensor.range().volume() * sizeof(T);
+  }
+};
+
+}  // namespace sequant
+
+#endif  // SEQUANT_HAS_BTAS
+
+#endif  // SEQUANT_EVAL_BACKENDS_BTAS_RESULT_HPP
diff --git a/SeQuant/core/eval/backends/tiledarray/eval_expr.hpp b/SeQuant/core/eval/backends/tiledarray/eval_expr.hpp
new file mode 100644
index 0000000000..30efcb5722
--- /dev/null
+++ b/SeQuant/core/eval/backends/tiledarray/eval_expr.hpp
@@ -0,0 +1,40 @@
+#ifndef SEQUANT_EVAL_BACKENDS_TILEDARRAY_EVAL_EXPR_HPP
+#define SEQUANT_EVAL_BACKENDS_TILEDARRAY_EVAL_EXPR_HPP
+
+#ifdef SEQUANT_HAS_TILEDARRAY
+
+#include <SeQuant/core/eval/eval_expr.hpp>
+
+#include <string>
+
+namespace sequant {
+
+///
+/// \brief This class extends the EvalExpr class by adding an annot() method so
+///        that it can be used to evaluate using TiledArray.
+///
+class EvalExprTA final : public EvalExpr {
+ public:
+  template <typename... Args, typename = std::enable_if_t<
+                                  std::is_constructible_v<EvalExpr, Args...>>>
+  EvalExprTA(Args&&... args) : EvalExpr{std::forward<Args>(args)...} {
+    annot_ = indices_annot();
+  }
+
+  [[nodiscard]] inline auto const& annot() const noexcept { return annot_; }
+
+ private:
+  std::string annot_;
+};
+
+/// Type alias for TiledArray evaluation nodes
+using EvalNodeTA = EvalNode<EvalExprTA>;
+
+static_assert(meta::eval_node<EvalNodeTA>);
+static_assert(meta::can_evaluate<EvalNodeTA>);
+
+}  // namespace sequant
+
+#endif  // SEQUANT_HAS_TILEDARRAY
+
+#endif  // SEQUANT_EVAL_BACKENDS_TILEDARRAY_EVAL_EXPR_HPP
diff --git a/SeQuant/core/eval/backends/tiledarray/result.cpp b/SeQuant/core/eval/backends/tiledarray/result.cpp
new file mode 100644
index 0000000000..14169d4d67
--- /dev/null
+++ b/SeQuant/core/eval/backends/tiledarray/result.cpp
@@ -0,0 +1,62 @@
+#include <SeQuant/core/eval/backends/tiledarray/result.hpp>
+
+#ifdef SEQUANT_HAS_TILEDARRAY
+
+#include <SeQuant/core/logger.hpp>
+
+#include <tiledarray.h>
+
+#include <iomanip>
+#include <sstream>
+#include <vector>
+
+namespace sequant {
+
+void log_ta_tensor_host_memory_use([[maybe_unused]] madness::World& world,
+                                   [[maybe_unused]] std::string_view label) {
+#if defined(TA_TENSOR_MEM_PROFILE)
+  auto logger = Logger::instance();
+  if (logger.eval.level < 3) return;
+  std::vector<std::uint64_t> hwsize(world.size(), 0);
+  std::vector<std::uint64_t> currsize(world.size(), 0);
+  std::vector<std::uint64_t> actsize(world.size(), 0);
+  hwsize[world.rank()] =
+      TA::hostEnv::instance()->host_allocator_getActualHighWatermark();
+  currsize[world.rank()] =
+      TA::hostEnv::instance()->host_allocator().getCurrentSize();
+  actsize[world.rank()] =
+      TA::hostEnv::instance()->host_allocator().getActualSize();
+  world.gop.sum(hwsize.data(), hwsize.size());
+  world.gop.sum(currsize.data(), currsize.size());
+  world.gop.sum(actsize.data(), actsize.size());
+
+  std::ostringstream oss;
+  oss << label << ": TA_TENSOR_MEM_PROFILE allocation statistics (MiB):\n";
+  oss << std::setw(5) << "rank"  //
+      << std::setw(12) << "hw"   //
+      << std::setw(12) << "cur"  //
+      << std::setw(12) << "act"  //
+      << '\n';                   //
+  oss << "--------------------------------------------\n";
+  std::uint64_t total = 0;
+  for (auto rank = 0; rank != world.size(); ++rank) {
+    oss << std::setw(5) << rank                         //
+        << std::setw(12) << hwsize[rank] / (1 << 20)    //
+        << std::setw(12) << currsize[rank] / (1 << 20)  //
+        << std::setw(12) << actsize[rank] / (1 << 20)   //
+        << '\n';
+    total += currsize[rank] / (1 << 20);
+  }
+  oss << std::setw(5) << "total"  //
+      << std::setw(12) << ""      //
+      << std::setw(12) << total   //
+      << std::setw(12) << ""      //
+      << '\n';
+  oss << "--------------------------------------------" << std::endl;
+  write_log(logger, oss.str());
+#endif
+}
+
+}  // namespace sequant
+
+#endif  // SEQUANT_HAS_TILEDARRAY
diff --git a/SeQuant/core/eval/backends/tiledarray/result.hpp b/SeQuant/core/eval/backends/tiledarray/result.hpp
new file mode 100644
index 0000000000..47cd27e775
--- /dev/null
+++ b/SeQuant/core/eval/backends/tiledarray/result.hpp
@@ -0,0 +1,435 @@
+#ifndef SEQUANT_EVAL_BACKENDS_TILEDARRAY_RESULT_HPP
+#define SEQUANT_EVAL_BACKENDS_TILEDARRAY_RESULT_HPP
+
+#ifdef SEQUANT_HAS_TILEDARRAY
+
+#include <SeQuant/core/eval/result.hpp>
+
+#include <TiledArray/einsum/tiledarray.h>
+#include <tiledarray.h>
+
+namespace sequant {
+
+namespace {
+
+///
+/// \brief This function implements the symmetrization of TA::DistArray.
+///
+/// \param arr The array to be symmetrized
+///
+/// \pre The rank of the array must be even
+///
+/// \return The symmetrized TA::DistArray.
+///
+template <typename... Args>
+auto column_symmetrize_ta(TA::DistArray<Args...> const& arr) {
+  using ranges::views::iota;
+
+  size_t const rank = arr.trange().rank();
+  if (rank % 2 != 0)
+    throw std::domain_error("This function only supports even-ranked tensors");
+
+  TA::DistArray<Args...> result;
+
+  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
+
+  auto const lannot = ords_to_annot(perm);
+
+  auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() {
+    auto const rannot = ords_to_annot(perm);
+    if (result.is_initialized()) {
+      result(lannot) += arr(rannot);
+    } else {
+      result(lannot) = arr(rannot);
+    }
+  };
+
+  auto const nparticles = rank / 2;
+  symmetric_permutation(SymmetricParticleRange{perm.begin(),               //
+                                               perm.begin() + nparticles,  //
+                                               nparticles},
+                        call_back);
+
+  TA::DistArray<Args...>::wait_for_lazy_cleanup(result.world());
+
+  return result;
+}
+
+///
+/// \brief This function implements the antisymmetrization of TA::DistArray.
+///
+/// \param arr The array to be antisymmetrized.
+///
+/// \param bra_rank The rank of the bra indices
+///
+/// \return The antisymmetrized TA::DistArray.
+///
+template <typename... Args>
+auto particle_antisymmetrize_ta(TA::DistArray<Args...> const& arr,
+                                size_t bra_rank) {
+  using ranges::views::iota;
+  size_t const rank = arr.trange().rank();
+  SEQUANT_ASSERT(bra_rank <= rank);
+  size_t const ket_rank = rank - bra_rank;
+
+  if (bra_rank <= 1 && ket_rank <= 1) {
+    // nothing to do
+    return arr;
+  }
+
+  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
+  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
+  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
+
+  const auto lannot = ords_to_annot(perm);
+
+  auto process_permutations = [&lannot](const TA::DistArray<Args...>& input_arr,
+                                        size_t range_rank, perm_t range_perm,
+                                        const std::string& other_annot,
+                                        bool is_bra) -> TA::DistArray<Args...> {
+    if (range_rank <= 1) return input_arr;
+    TA::DistArray<Args...> result;
+
+    auto callback = [&](int parity) {
+      const auto range_annot = ords_to_annot(range_perm);
+      const auto annot = other_annot.empty()
+                             ? range_annot
+                             : (is_bra ? range_annot + "," + other_annot
+                                       : other_annot + "," + range_annot);
+
+      typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1;
+      if (result.is_initialized()) {
+        result(lannot) += p_ * input_arr(annot);
+      } else {
+        result(lannot) = p_ * input_arr(annot);
+      }
+    };
+    antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank},
+                              callback);
+    return result;
+  };
+
+  // Process bra permutations first
+  const auto ket_annot = ket_rank == 0 ? "" : ords_to_annot(ket_perm);
+  auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true);
+
+  // Process ket permutations
+  const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm);
+  result = process_permutations(result, ket_rank, ket_perm, bra_annot, false);
+
+  TA::DistArray<Args...>::wait_for_lazy_cleanup(result.world());
+  return result;
+}
+
+template <typename... Args>
+inline void log_ta(Args const&... args) noexcept {
+  log_result("[TA] ", args...);
+}
+
+/// Convert sequant::DeNest to TA::DeNest
+inline constexpr TA::DeNest to_ta_denest(DeNest d) noexcept {
+  return d == DeNest::True ? TA::DeNest::True : TA::DeNest::False;
+}
+
+}  // namespace
+
+/// TA::Tensor memory use logger
+/// If TiledArray was configured with TA_TENSOR_MEM_PROFILE set this
+/// prints the current use of memory by TA::Tensor objects in host memory space
+/// to \p os .
+/// \param world the world object to use for logging
+/// \param label string to prepend to the profile
+void log_ta_tensor_host_memory_use(madness::World& world,
+                                   std::string_view label = "");
+
+///
+/// \brief Result for a tensor value of TA::DistArray type.
+/// \tparam ArrayT TA::DistArray type. Tile type of ArrayT is regular tensor of
+///                scalars (not a tensor of tensors)
+///
+template <typename ArrayT, typename = std::enable_if_t<TA::detail::is_tensor_v<
+                               typename ArrayT::value_type>>>
+class ResultTensorTA final : public Result {
+ public:
+  using Result::id_t;
+  using numeric_type = typename ArrayT::numeric_type;
+
+  explicit ResultTensorTA(ArrayT arr) : Result{std::move(arr)} {}
+
+ private:
+  using this_type = ResultTensorTA<ArrayT>;
+  using annot_wrap = Annot<std::string>;
+
+  [[nodiscard]] id_t type_id() const noexcept override {
+    return id_for_type<this_type>();
+  }
+
+  [[nodiscard]] ResultPtr sum(
+      Result const& other,
+      std::array<std::any, 3> const& annot) const override {
+    SEQUANT_ASSERT(other.is<this_type>());
+    auto const a = annot_wrap{annot};
+
+    log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n");
+
+    ArrayT result;
+    result(a.this_annot) =
+        get<ArrayT>()(a.lannot) + other.get<ArrayT>()(a.rannot);
+    decltype(result)::wait_for_lazy_cleanup(result.world());
+    return eval_result<this_type>(std::move(result));
+  }
+
+  [[nodiscard]] ResultPtr prod(Result const& other,
+                               std::array<std::any, 3> const& annot,
+                               DeNest DeNestFlag) const override {
+    auto const a = annot_wrap{annot};
+
+    if (other.is<ResultScalar<numeric_type>>()) {
+      auto result = get<ArrayT>();
+      auto scalar = other.get<numeric_type>();
+
+      log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n");
+
+      result(a.this_annot) = scalar * result(a.lannot);
+
+      decltype(result)::wait_for_lazy_cleanup(result.world());
+      return eval_result<this_type>(std::move(result));
+    }
+
+    if (a.this_annot.empty()) {
+      // DOT product
+      SEQUANT_ASSERT(other.is<this_type>());
+      numeric_type d =
+          TA::dot(get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot));
+      ArrayT::wait_for_lazy_cleanup(get<ArrayT>().world());
+      ArrayT::wait_for_lazy_cleanup(other.get<ArrayT>().world());
+
+      log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n");
+
+      return eval_result<ResultScalar<numeric_type>>(d);
+    }
+
+    if (!other.is<this_type>()) {
+      // potential T * ToT
+      auto annot_swap = annot;
+      std::swap(annot_swap[0], annot_swap[1]);
+      return other.prod(*this, annot_swap, DeNestFlag);
+    }
+
+    // confirmed: other.is<this_type>() is true
+
+    log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n");
+
+    ArrayT result;
+
+    result = TA::einsum(get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot),
+                        a.this_annot);
+    decltype(result)::wait_for_lazy_cleanup(result.world());
+    return eval_result<this_type>(std::move(result));
+  }
+
+  [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override {
+    auto pre = get<ArrayT>();
+    TA::scale(pre, numeric_type(factor));
+    return eval_result<this_type>(std::move(pre));
+  }
+
+  [[nodiscard]] ResultPtr permute(
+      std::array<std::any, 2> const& ann) const override {
+    auto const pre_annot = std::any_cast<std::string>(ann[0]);
+    auto const post_annot = std::any_cast<std::string>(ann[1]);
+
+    log_ta(pre_annot, " = ", post_annot, "\n");
+
+    ArrayT result;
+    result(post_annot) = get<ArrayT>()(pre_annot);
+    ArrayT::wait_for_lazy_cleanup(result.world());
+    return eval_result<this_type>(std::move(result));
+  }
+
+  void add_inplace(Result const& other) override {
+    SEQUANT_ASSERT(other.is<this_type>());
+
+    auto& t = get<ArrayT>();
+    auto const& o = other.get<ArrayT>();
+
+    SEQUANT_ASSERT(t.trange() == o.trange());
+    auto ann = TA::detail::dummy_annotation(t.trange().rank());
+
+    log_ta(ann, " += ", ann, "\n");
+
+    t(ann) += o(ann);
+    ArrayT::wait_for_lazy_cleanup(t.world());
+  }
+
+  [[nodiscard]] ResultPtr symmetrize() const override {
+    return eval_result<this_type>(column_symmetrize_ta(get<ArrayT>()));
+  }
+
+  [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override {
+    return eval_result<this_type>(
+        particle_antisymmetrize_ta(get<ArrayT>(), bra_rank));
+  }
+
+ private:
+  [[nodiscard]] std::size_t size_in_bytes() const final {
+    auto& v = get<ArrayT>();
+    auto local_size = TA::size_of<TA::MemorySpace::Host>(v);
+    v.world().gop.sum(local_size);
+    return local_size;
+  }
+};
+
+template <typename ArrayT,
+          typename = std::enable_if_t<
+              TA::detail::is_tensor_of_tensor_v<typename ArrayT::value_type>>>
+class ResultTensorOfTensorTA final : public Result {
+ public:
+  using Result::id_t;
+  using numeric_type = typename ArrayT::numeric_type;
+
+  explicit ResultTensorOfTensorTA(ArrayT arr) : Result{std::move(arr)} {}
+
+ private:
+  using this_type = ResultTensorOfTensorTA<ArrayT>;
+  using annot_wrap = Annot<std::string>;
+
+  using _inner_tensor_type = typename ArrayT::value_type::value_type;
+
+  using compatible_regular_distarray_type =
+      TA::DistArray<_inner_tensor_type, typename ArrayT::policy_type>;
+
+  // Only @c that_type type is allowed for ToT * T computation
+  using that_type = ResultTensorTA<compatible_regular_distarray_type>;
+
+  [[nodiscard]] id_t type_id() const noexcept override {
+    return id_for_type<this_type>();
+  }
+
+  [[nodiscard]] ResultPtr sum(
+      Result const& other,
+      std::array<std::any, 3> const& annot) const override {
+    SEQUANT_ASSERT(other.is<this_type>());
+    auto const a = annot_wrap{annot};
+
+    log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n");
+
+    ArrayT result;
+    result(a.this_annot) =
+        get<ArrayT>()(a.lannot) + other.get<ArrayT>()(a.rannot);
+    decltype(result)::wait_for_lazy_cleanup(result.world());
+    return eval_result<this_type>(std::move(result));
+  }
+
+  [[nodiscard]] ResultPtr prod(Result const& other,
+                               std::array<std::any, 3> const& annot,
+                               DeNest DeNestFlag) const override {
+    auto const a = annot_wrap{annot};
+
+    if (other.is<ResultScalar<numeric_type>>()) {
+      auto result = get<ArrayT>();
+      auto scalar = other.get<numeric_type>();
+
+      log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n");
+
+      result(a.this_annot) = scalar * result(a.lannot);
+
+      decltype(result)::wait_for_lazy_cleanup(result.world());
+      return eval_result<this_type>(std::move(result));
+    } else if (a.this_annot.empty()) {
+      // DOT product
+      SEQUANT_ASSERT(other.is<this_type>());
+      numeric_type d =
+          TA::dot(get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot));
+      ArrayT::wait_for_lazy_cleanup(get<ArrayT>().world());
+      ArrayT::wait_for_lazy_cleanup(other.get<ArrayT>().world());
+
+      log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n");
+
+      return eval_result<ResultScalar<numeric_type>>(d);
+    }
+
+    log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n");
+
+    if (other.is<that_type>()) {
+      // ToT * T -> ToT
+      auto result =
+          TA::einsum(get<ArrayT>()(a.lannot),
+                     other.get<compatible_regular_distarray_type>()(a.rannot),
+                     a.this_annot);
+      return eval_result<this_type>(std::move(result));
+
+    } else if (other.is<this_type>() && DeNestFlag == DeNest::True) {
+      // ToT * ToT -> T
+      auto result = TA::einsum<TA::DeNest::True>(
+          get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot), a.this_annot);
+      return eval_result<that_type>(std::move(result));
+
+    } else if (other.is<this_type>() && DeNestFlag == DeNest::False) {
+      // ToT * ToT -> ToT
+      auto result = TA::einsum(get<ArrayT>()(a.lannot),
+                               other.get<ArrayT>()(a.rannot), a.this_annot);
+      return eval_result<this_type>(std::move(result));
+    } else {
+      throw invalid_operand();
+    }
+  }
+
+  [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override {
+    auto pre = get<ArrayT>();
+    TA::scale(pre, numeric_type(factor));
+    return eval_result<this_type>(std::move(pre));
+  }
+
+  [[nodiscard]] ResultPtr permute(
+      std::array<std::any, 2> const& ann) const override {
+    auto const pre_annot = std::any_cast<std::string>(ann[0]);
+    auto const post_annot = std::any_cast<std::string>(ann[1]);
+
+    log_ta(pre_annot, " = ", post_annot, "\n");
+
+    ArrayT result;
+    result(post_annot) = get<ArrayT>()(pre_annot);
+    ArrayT::wait_for_lazy_cleanup(result.world());
+    return eval_result<this_type>(std::move(result));
+  }
+
+  void add_inplace(Result const& other) override {
+    SEQUANT_ASSERT(other.is<this_type>());
+
+    auto& t = get<ArrayT>();
+    auto const& o = other.get<ArrayT>();
+
+    SEQUANT_ASSERT(t.trange() == o.trange());
+    auto ann = TA::detail::dummy_annotation(t.trange().rank());
+
+    log_ta(ann, " += ", ann, "\n");
+
+    t(ann) += o(ann);
+    ArrayT::wait_for_lazy_cleanup(t.world());
+  }
+
+  [[nodiscard]] ResultPtr symmetrize() const override {
+    // not implemented yet
+    return nullptr;
+  }
+
+  [[nodiscard]] ResultPtr antisymmetrize(size_t /*bra_rank*/) const override {
+    // not implemented yet
+    return nullptr;
+  }
+
+ private:
+  [[nodiscard]] std::size_t size_in_bytes() const final {
+    auto& v = get<ArrayT>();
+    auto local_size = TA::size_of<TA::MemorySpace::Host>(v);
+    v.world().gop.sum(local_size);
+    return local_size;
+  }
+};
+
+}  // namespace sequant
+
+#endif  // SEQUANT_HAS_TILEDARRAY
+
+#endif  // SEQUANT_EVAL_BACKENDS_TILEDARRAY_RESULT_HPP
diff --git a/SeQuant/core/eval/cache_manager.cpp b/SeQuant/core/eval/cache_manager.cpp
index 72e90d65b2..8895824fe2 100644
--- a/SeQuant/core/eval/cache_manager.cpp
+++ b/SeQuant/core/eval/cache_manager.cpp
@@ -5,7 +5,7 @@
 #include <SeQuant/core/eval/cache_manager.hpp>
 #include <SeQuant/core/eval/result.hpp>
 
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 
 namespace sequant {
 
diff --git a/SeQuant/core/eval/cache_manager.hpp b/SeQuant/core/eval/cache_manager.hpp
index cc7c68ffe9..27ee25a534 100644
--- a/SeQuant/core/eval/cache_manager.hpp
+++ b/SeQuant/core/eval/cache_manager.hpp
@@ -3,8 +3,8 @@
 
 #include <SeQuant/core/asy_cost.hpp>
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval/eval_fwd.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
+#include <SeQuant/core/eval/fwd.hpp>
 #include <SeQuant/core/expr.hpp>
 
 #include <memory>
diff --git a/SeQuant/core/eval/eval.hpp b/SeQuant/core/eval/eval.hpp
index 825fd792a9..5da4fb0c0c 100644
--- a/SeQuant/core/eval/eval.hpp
+++ b/SeQuant/core/eval/eval.hpp
@@ -1,20 +1,18 @@
 #ifndef SEQUANT_EVAL_EVAL_HPP
 #define SEQUANT_EVAL_EVAL_HPP
 
+#include <SeQuant/core/eval/fwd.hpp>
+
 #include <SeQuant/core/container.hpp>
 #include <SeQuant/core/eval/cache_manager.hpp>
-#include <SeQuant/core/eval/eval_fwd.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/eval/result.hpp>
-#include <SeQuant/core/eval_node.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/logger.hpp>
 #include <SeQuant/core/meta.hpp>
 #include <SeQuant/core/parse.hpp>
 #include <SeQuant/core/utility/macros.hpp>
 
-#include <btas/btas.h>
-#include <tiledarray.h>
-
 #include <chrono>
 #include <range/v3/numeric.hpp>
 #include <range/v3/view.hpp>
@@ -60,9 +58,6 @@ enum struct EvalMode {
   SumInplace,
   Symmetrize,
   Antisymmetrize,
-  /// NNS projection of Wang-Knizia biorthogonalization
-  /// @sa ResultPtr::biorthogonal_nns_project
-  BiorthogonalNNSProject,
   Unknown
 };
 
@@ -80,18 +75,17 @@ enum struct EvalMode {
 }
 
 [[nodiscard]] constexpr auto to_string(EvalMode mode) noexcept {
-  return (mode == EvalMode::Constant)                 ? "Constant"
-         : (mode == EvalMode::Variable)               ? "Variable"
-         : (mode == EvalMode::Tensor)                 ? "Tensor"
-         : (mode == EvalMode::Permute)                ? "Permute"
-         : (mode == EvalMode::Product)                ? "Product"
-         : (mode == EvalMode::MultByPhase)            ? "MultByPhase"
-         : (mode == EvalMode::Sum)                    ? "Sum"
-         : (mode == EvalMode::SumInplace)             ? "SumInplace"
-         : (mode == EvalMode::Symmetrize)             ? "Symmetrize"
-         : (mode == EvalMode::Antisymmetrize)         ? "Antisymmetrize"
-         : (mode == EvalMode::BiorthogonalNNSProject) ? "BiorthogonalNNSProject"
-                                                      : "??";
+  return (mode == EvalMode::Constant)         ? "Constant"
+         : (mode == EvalMode::Variable)       ? "Variable"
+         : (mode == EvalMode::Tensor)         ? "Tensor"
+         : (mode == EvalMode::Permute)        ? "Permute"
+         : (mode == EvalMode::Product)        ? "Product"
+         : (mode == EvalMode::MultByPhase)    ? "MultByPhase"
+         : (mode == EvalMode::Sum)            ? "Sum"
+         : (mode == EvalMode::SumInplace)     ? "SumInplace"
+         : (mode == EvalMode::Symmetrize)     ? "Symmetrize"
+         : (mode == EvalMode::Antisymmetrize) ? "Antisymmetrize"
+                                              : "??";
 }
 
 enum struct CacheMode { Store, Access, Release };
@@ -315,8 +309,8 @@ ResultPtr evaluate(Node const& node,  //
       auto const de_nest =
           node.left()->tot() && node.right()->tot() && !node->tot();
       time = timed_eval_inplace([&]() {
-        result = left->prod(*right, ann,
-                            de_nest ? TA::DeNest::True : TA::DeNest::False);
+        result =
+            left->prod(*right, ann, de_nest ? DeNest::True : DeNest::False);
       });
     }
   }
@@ -543,30 +537,6 @@ ResultPtr evaluate_antisymm(Args&&... args) {
   return result;
 }
 
-/// \brief Calls sequant::evaluate followed by
-/// ResultPtr::biorthogonal_nns_project \return Evaluated result as ResultPtr.
-/// \sa ResultPtr::biorthogonal_nns_project
-template <Trace EvalTrace = Trace::Default, typename... Args>
-ResultPtr evaluate_biorthogonal_nns_project(Args&&... args) {
-  ResultPtr pre = evaluate<EvalTrace>(std::forward<Args>(args)...);
-  SEQUANT_ASSERT(pre);
-
-  auto const& n0 = node0(arg0(std::forward<Args>(args)...));
-
-  ResultPtr result;
-  auto time = timed_eval_inplace([&]() {
-    result = pre->biorthogonal_nns_project(n0->as_tensor().bra_rank());
-  });
-
-  // logging
-  if constexpr (trace(EvalTrace)) {
-    auto stat = log::EvalStat{.mode = log::EvalMode::BiorthogonalNNSProject,
-                              .time = time,
-                              .memory = log::bytes(pre, result)};
-    log::eval(stat, n0->label());
-  }
-  return result;
-}
 }  // namespace sequant
 
 #endif  // SEQUANT_EVAL_EVAL_HPP
diff --git a/SeQuant/core/eval_expr.cpp b/SeQuant/core/eval/eval_expr.cpp
similarity index 99%
rename from SeQuant/core/eval_expr.cpp
rename to SeQuant/core/eval/eval_expr.cpp
index 563daaadac..d8483765f1 100644
--- a/SeQuant/core/eval_expr.cpp
+++ b/SeQuant/core/eval/eval_expr.cpp
@@ -2,8 +2,8 @@
 #include <SeQuant/core/complex.hpp>
 #include <SeQuant/core/container.hpp>
 #include <SeQuant/core/context.hpp>
-#include <SeQuant/core/eval_expr.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/hash.hpp>
 #include <SeQuant/core/index.hpp>
diff --git a/SeQuant/core/eval_expr.hpp b/SeQuant/core/eval/eval_expr.hpp
similarity index 84%
rename from SeQuant/core/eval_expr.hpp
rename to SeQuant/core/eval/eval_expr.hpp
index 5069031b15..cafb8c2149 100644
--- a/SeQuant/core/eval_expr.hpp
+++ b/SeQuant/core/eval/eval_expr.hpp
@@ -1,8 +1,9 @@
-#ifndef SEQUANT_EVAL_EXPR_HPP
-#define SEQUANT_EVAL_EXPR_HPP
+#ifndef SEQUANT_EVAL_EVAL_EXPR_HPP
+#define SEQUANT_EVAL_EVAL_EXPR_HPP
 
 #include <SeQuant/core/binary_node.hpp>
 #include <SeQuant/core/container.hpp>
+#include <SeQuant/core/eval/fwd.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/index.hpp>
 #include <SeQuant/core/utility/macros.hpp>
@@ -248,68 +249,6 @@ struct EvalOpSetter {
   void set(EvalExpr& expr, EvalOp op) { expr.op_type_ = op; }
 };
 
-///
-/// \brief This class extends the EvalExpr class by adding an annot() method so
-///        that it can be used to evaluate using TiledArray.
-///
-class EvalExprTA final : public EvalExpr {
- public:
-  template <typename... Args, typename = std::enable_if_t<
-                                  std::is_constructible_v<EvalExpr, Args...>>>
-  EvalExprTA(Args&&... args) : EvalExpr{std::forward<Args>(args)...} {
-    annot_ = indices_annot();
-  }
-
-  [[nodiscard]] inline auto const& annot() const noexcept { return annot_; }
-
- private:
-  std::string annot_;
-};
-
-///
-/// \brief This class extends the EvalExpr class by adding an annot() method so
-///        that it can be used to evaluate using BTAS.
-///
-class EvalExprBTAS final : public EvalExpr {
- public:
-  using annot_t = container::svector<long>;
-
-  ///
-  /// \param bk iterable of Index objects.
-  /// \return vector of long-type hash values
-  ///         of the labels of indices in \c bk
-  ///
-  template <typename Iterable>
-  static auto index_hash(Iterable&& bk) {
-    return ranges::views::transform(
-        std::forward<Iterable>(bk), [](auto const& idx) {
-          //
-          // WARNING!
-          // The BTAS uses long for scalar indexing by default.
-          // Hence, here we explicitly cast the size_t values to long
-          // Which is a potentially narrowing conversion leading to
-          // integral overflow. Hence, the values in the returned
-          // container are mixed negative and positive integers (long type)
-          //
-          return static_cast<long>(sequant::hash::value(Index{idx}.label()));
-        });
-  }
-
-  template <typename... Args, typename = std::enable_if_t<
-                                  std::is_constructible_v<EvalExpr, Args...>>>
-  EvalExprBTAS(Args&&... args) : EvalExpr{std::forward<Args>(args)...} {
-    annot_ = index_hash(canon_indices()) | ranges::to<annot_t>;
-  }
-
-  ///
-  /// \return Annotation (container::svector<long>) for BTAS::Tensor.
-  ///
-  [[nodiscard]] inline annot_t const& annot() const noexcept { return annot_; }
-
- private:
-  annot_t annot_;
-};
-
 namespace meta {
 
 namespace detail {
@@ -350,6 +289,48 @@ template <typename Rng>
 concept eval_node_range =
     std::ranges::range<Rng> && eval_node<std::ranges::range_value_t<Rng>>;
 
+///
+/// \brief Satisfied by a type with a method named `annot` that returns
+///        a non-void type.
+///
+template <typename T>
+concept has_annot = requires(T t) {
+  t.annot();
+  requires !std::is_void_v<decltype(t.annot())>;
+};
+
+///
+/// \brief Satisfied by an eval_node whose dereferenced type satisfies the
+///        has_annot method.
+/// \example
+///          * `static_assert(!meta::can_evaluate<EvalNode<EvalExpr>>)`
+///          * `static_assert(meta::can_evaluate<EvalNodeTA>)` (where EvalNodeTA
+///            is defined in backends/tiledarray/eval_expr.hpp)
+///
+template <typename T>
+concept can_evaluate = eval_node<T> && requires(T n) {
+  { *n } -> has_annot;
+};
+
+///
+/// \brief Satisfied by a range type of objects satisfying can_evaluate.
+///
+template <typename Rng>
+concept can_evaluate_range =
+    std::ranges::range<Rng> && can_evaluate<std::ranges::range_value_t<Rng>>;
+
+///
+/// \brief \tparam F is a leaf node evaluator of type \tparam Node if
+///        an object (a function object) of type \tparam F returns ResultPtr
+///        when called with the single argument of const ref type to
+///        \tparam Node and the \tparam Node satisfies can_evaluate.
+///
+template <typename Node, typename F>
+concept leaf_node_evaluator =
+    can_evaluate<Node> && requires(F f, Node const& n) {
+      { f(n) } -> std::same_as<ResultPtr>;
+    };
+
 }  // namespace meta
 
 namespace impl {
@@ -362,6 +343,9 @@ FullBinaryNode<EvalExpr> binarize(ExprPtr const&);
 template <meta::eval_expr T>
 using EvalNode = FullBinaryNode<T>;
 
+static_assert(meta::eval_node<EvalNode<EvalExpr>>);
+static_assert(!meta::can_evaluate<EvalNode<EvalExpr>>);
+
 ///
 /// Creates a binary tree for evaluation.
 ///
@@ -460,4 +444,4 @@ ExprPtr to_expr(meta::eval_node auto const& node) {
 
 }  // namespace sequant
 
-#endif  // SEQUANT_EVAL_EXPR_HPP
+#endif  // SEQUANT_EVAL_EVAL_EXPR_HPP
diff --git a/SeQuant/core/eval/eval_fwd.hpp b/SeQuant/core/eval/eval_fwd.hpp
deleted file mode 100644
index 0c2113e854..0000000000
--- a/SeQuant/core/eval/eval_fwd.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//
-// Created by Bimal Gaudel on 3/27/25.
-//
-
-#ifndef SEQUANT_EVAL_FWD_HPP
-#define SEQUANT_EVAL_FWD_HPP
-
-#include <SeQuant/core/eval_expr.hpp>
-
-namespace sequant {
-
-class CacheManager;
-class Result;
-
-///
-/// \brief Managed pointer to the result of an evaluation.
-///
-using ResultPtr = std::shared_ptr<Result>;
-
-namespace meta {
-
-///
-/// \brief Satisfied by a type with a method named `annot` that returns
-///        a non-void type.
-///
-template <typename T>
-concept has_annot = requires(T t) {
-  t.annot();
-  requires !std::is_void_v<decltype(t.annot())>;
-};
-
-///
-/// \brief Satisfied by an eval_node whose dereferenced type satisfies the
-///        has_annot method.
-/// \example
-///          * `static_assert(!meta::can_evaluate<EvalNode<EvalExpr>>)`
-///          * `static_assert(meta::can_evaluate<EvalNode<EvalExprTA>>)`
-///
-template <typename T>
-concept can_evaluate = eval_node<T> && requires(T n) {
-  { *n } -> has_annot;
-};
-
-///
-/// \brief Satisfied by a range type of objects satisfying can_evaluate.
-///
-template <typename Rng>
-concept can_evaluate_range =
-    std::ranges::range<Rng> && can_evaluate<std::ranges::range_value_t<Rng>>;
-
-///
-/// \brief \tparam F is a leaf node evaluator of type \tparam Node if
-///        an object (a function object) of type \tparam F returns ResultPtr
-///        when called with the single argument of const ref type to
-///        \tparam Node and the \tparam Node satisfies can_evaluate.
-///
-template <typename Node, typename F>
-concept leaf_node_evaluator =
-    can_evaluate<Node> && requires(F f, Node const& n) {
-      { f(n) } -> std::same_as<ResultPtr>;
-    };
-}  // namespace meta
-
-static_assert(meta::eval_node<EvalNode<EvalExpr>>);
-static_assert(meta::eval_node<EvalNode<EvalExprTA>>);
-static_assert(meta::eval_node<EvalNode<EvalExprBTAS>>);
-
-static_assert(!meta::can_evaluate<EvalNode<EvalExpr>>);
-static_assert(meta::can_evaluate<EvalNode<EvalExprTA>>);
-static_assert(meta::can_evaluate<EvalNode<EvalExprBTAS>>);
-
-}  // namespace sequant
-#endif  // SEQUANT_EVAL_FWD_HPP
diff --git a/SeQuant/core/eval_node.hpp b/SeQuant/core/eval/eval_node.hpp
similarity index 98%
rename from SeQuant/core/eval_node.hpp
rename to SeQuant/core/eval/eval_node.hpp
index 8be58232ca..2be333062a 100644
--- a/SeQuant/core/eval_node.hpp
+++ b/SeQuant/core/eval/eval_node.hpp
@@ -2,12 +2,12 @@
 // Created by Bimal Gaudel on 5/24/21.
 //
 
-#ifndef SEQUANT_EVAL_NODE_HPP
-#define SEQUANT_EVAL_NODE_HPP
+#ifndef SEQUANT_EVAL_EVAL_NODE_HPP
+#define SEQUANT_EVAL_EVAL_NODE_HPP
 
 #include <SeQuant/core/asy_cost.hpp>
 #include <SeQuant/core/binary_node.hpp>
-#include <SeQuant/core/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/math.hpp>
 #include <SeQuant/core/utility/macros.hpp>
@@ -252,4 +252,4 @@ AsyCost min_storage(meta::eval_node auto const& node) {
 
 }  // namespace sequant
 
-#endif  // SEQUANT_EVAL_NODE_HPP
+#endif  // SEQUANT_EVAL_EVAL_NODE_HPP
diff --git a/SeQuant/core/eval/fwd.hpp b/SeQuant/core/eval/fwd.hpp
new file mode 100644
index 0000000000..f378d63f3f
--- /dev/null
+++ b/SeQuant/core/eval/fwd.hpp
@@ -0,0 +1,27 @@
+//
+// Created by Bimal Gaudel on 3/27/25.
+//
+
+#ifndef SEQUANT_EVAL_FWD_HPP
+#define SEQUANT_EVAL_FWD_HPP
+
+#include <memory>
+
+namespace sequant {
+
+/// Backend-agnostic flag to control tensor de-nesting behavior during products.
+/// When multiplying tensor-of-tensor types, this controls whether the result
+/// should be "de-nested" (flattened) to a regular tensor or kept as nested.
+enum class DeNest { True, False };
+
+class CacheManager;
+class Result;
+
+///
+/// \brief Managed pointer to the result of an evaluation.
+///
+using ResultPtr = std::shared_ptr<Result>;
+
+}  // namespace sequant
+
+#endif  // SEQUANT_EVAL_FWD_HPP
diff --git a/SeQuant/core/eval/result.cpp b/SeQuant/core/eval/result.cpp
index c0998b6ac4..151b495bf1 100644
--- a/SeQuant/core/eval/result.cpp
+++ b/SeQuant/core/eval/result.cpp
@@ -9,48 +9,4 @@ Result::id_t Result::next_id() noexcept {
 
 bool Result::has_value() const noexcept { return value_.has_value(); }
 
-void log_ta_tensor_host_memory_use([[maybe_unused]] madness::World& world,
-                                   [[maybe_unused]] std::string_view label) {
-#if defined(TA_TENSOR_MEM_PROFILE)
-  auto logger = Logger::instance();
-  if (logger.eval.level < 3) return;
-  std::vector<std::uint64_t> hwsize(world.size(), 0);
-  std::vector<std::uint64_t> currsize(world.size(), 0);
-  std::vector<std::uint64_t> actsize(world.size(), 0);
-  hwsize[world.rank()] =
-      TA::hostEnv::instance()->host_allocator_getActualHighWatermark();
-  currsize[world.rank()] =
-      TA::hostEnv::instance()->host_allocator().getCurrentSize();
-  actsize[world.rank()] =
-      TA::hostEnv::instance()->host_allocator().getActualSize();
-  world.gop.sum(hwsize.data(), hwsize.size());
-  world.gop.sum(currsize.data(), currsize.size());
-  world.gop.sum(actsize.data(), actsize.size());
-
-  std::ostringstream oss;
-  oss << label << ": TA_TENSOR_MEM_PROFILE allocation statistics (MiB):\n";
-  oss << std::setw(5) << "rank"  //
-      << std::setw(12) << "hw"   //
-      << std::setw(12) << "cur"  //
-      << std::setw(12) << "act"  //
-      << '\n';                   //
-  oss << "--------------------------------------------\n";
-  std::uint64_t total = 0;
-  for (auto rank = 0; rank != world.size(); ++rank) {
-    oss << std::setw(5) << rank                         //
-        << std::setw(12) << hwsize[rank] / (1 << 20)    //
-        << std::setw(12) << currsize[rank] / (1 << 20)  //
-        << std::setw(12) << actsize[rank] / (1 << 20)   //
-        << '\n';
-    total += currsize[rank] / (1 << 20);
-  }
-  oss << std::setw(5) << "total"  //
-      << std::setw(12) << ""      //
-      << std::setw(12) << total   //
-      << std::setw(12) << ""      //
-      << '\n';
-  oss << "--------------------------------------------" << std::endl;
-  write_log(logger, oss.str());
-#endif
-}
 }  // namespace sequant
diff --git a/SeQuant/core/eval/result.hpp b/SeQuant/core/eval/result.hpp
index 6f3cc48d5e..cb1c1e55f6 100644
--- a/SeQuant/core/eval/result.hpp
+++ b/SeQuant/core/eval/result.hpp
@@ -1,17 +1,15 @@
 #ifndef SEQUANT_EVAL_RESULT_HPP
 #define SEQUANT_EVAL_RESULT_HPP
 
+#include <SeQuant/core/eval/fwd.hpp>
+
 #include <SeQuant/core/algorithm.hpp>
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval/eval_fwd.hpp>
 #include <SeQuant/core/hash.hpp>
 #include <SeQuant/core/index.hpp>
 #include <SeQuant/core/logger.hpp>
 #include <SeQuant/core/utility/macros.hpp>
 
-#include <TiledArray/einsum/tiledarray.h>
-#include <btas/btas.h>
-#include <tiledarray.h>
 #include <range/v3/numeric.hpp>
 #include <range/v3/view.hpp>
 
@@ -136,372 +134,12 @@ std::string ords_to_annot(RngOfOrdinals const& ords) {
          ranges::to<std::string>;
 }
 
-///
-/// \brief This function implements the symmetrization of TA::DistArray.
-///
-/// \param arr The array to be symmetrized
-///
-/// \pre The rank of the array must be even
-///
-/// \return The symmetrized TA::DistArray.
-///
-template <typename... Args>
-auto column_symmetrize_ta(TA::DistArray<Args...> const& arr) {
-  using ranges::views::iota;
-
-  size_t const rank = arr.trange().rank();
-  if (rank % 2 != 0)
-    throw std::domain_error("This function only supports even-ranked tensors");
-
-  TA::DistArray<Args...> result;
-
-  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
-
-  auto const lannot = ords_to_annot(perm);
-
-  auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() {
-    auto const rannot = ords_to_annot(perm);
-    if (result.is_initialized()) {
-      result(lannot) += arr(rannot);
-    } else {
-      result(lannot) = arr(rannot);
-    }
-  };
-
-  auto const nparticles = rank / 2;
-  symmetric_permutation(SymmetricParticleRange{perm.begin(),               //
-                                               perm.begin() + nparticles,  //
-                                               nparticles},
-                        call_back);
-
-  TA::DistArray<Args...>::wait_for_lazy_cleanup(result.world());
-
-  return result;
-}
-
-///
-/// \brief This function implements the antisymmetrization of TA::DistArray.
-///
-/// \param arr The array to be antisymmetrized.
-///
-/// \param bra_rank The rank of the bra indices
-///
-/// \return The antisymmetrized TA::DistArray.
-///
-template <typename... Args>
-auto particle_antisymmetrize_ta(TA::DistArray<Args...> const& arr,
-                                size_t bra_rank) {
-  using ranges::views::iota;
-  size_t const rank = arr.trange().rank();
-  SEQUANT_ASSERT(bra_rank <= rank);
-  size_t const ket_rank = rank - bra_rank;
-
-  if (bra_rank <= 1 && ket_rank <= 1) {
-    // nothing to do
-    return arr;
-  }
-
-  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
-  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
-  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
-
-  const auto lannot = ords_to_annot(perm);
-
-  auto process_permutations = [&lannot](const TA::DistArray<Args...>& input_arr,
-                                        size_t range_rank, perm_t range_perm,
-                                        const std::string& other_annot,
-                                        bool is_bra) -> TA::DistArray<Args...> {
-    if (range_rank <= 1) return input_arr;
-    TA::DistArray<Args...> result;
-
-    auto callback = [&](int parity) {
-      const auto range_annot = ords_to_annot(range_perm);
-      const auto annot = other_annot.empty()
-                             ? range_annot
-                             : (is_bra ? range_annot + "," + other_annot
-                                       : other_annot + "," + range_annot);
-
-      typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1;
-      if (result.is_initialized()) {
-        result(lannot) += p_ * input_arr(annot);
-      } else {
-        result(lannot) = p_ * input_arr(annot);
-      }
-    };
-    antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank},
-                              callback);
-    return result;
-  };
-
-  // Process bra permutations first
-  const auto ket_annot = ket_rank == 0 ? "" : ords_to_annot(ket_perm);
-  auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true);
-
-  // Process ket permutations
-  const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm);
-  result = process_permutations(result, ket_rank, ket_perm, bra_annot, false);
-
-  TA::DistArray<Args...>::wait_for_lazy_cleanup(result.world());
-  return result;
-}
-
-///
-/// \brief This function implements the symmetrization of btas::Tensor.
-///
-/// \param arr The tensor to be symmetrized.
-///
-/// \pre The rank of the tensor must be even.
-///
-/// \return The symmetrized btas::Tensor.
-///
-template <typename... Args>
-auto column_symmetrize_btas(btas::Tensor<Args...> const& arr) {
-  using ranges::views::iota;
-
-  size_t const rank = arr.rank();
-
-  if (rank % 2 != 0)
-    throw std::domain_error("This function only supports even-ranked tensors");
-
-  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
-
-  auto const lannot = perm;
-
-  auto result = btas::Tensor<Args...>{arr.range()};
-  result.fill(0);
-
-  auto call_back = [&result, &lannot, &arr, &perm = std::as_const(perm)]() {
-    btas::Tensor<Args...> temp;
-    btas::permute(arr, lannot, temp, perm);
-    result += temp;
-  };
-
-  auto const nparticles = rank / 2;
-  symmetric_permutation(SymmetricParticleRange{perm.begin(),               //
-                                               perm.begin() + nparticles,  //
-                                               nparticles},
-                        call_back);
-
-  return result;
-}
-
-///
-/// \brief This function implements the antisymmetrization of btas::Tensor.
-///
-/// \param arr The tensor to be antisymmetrized
-///
-/// \param bra_rank The rank of the bra indices
-///
-/// \return The antisymmetrized btas::Tensor.
-///
-template <typename... Args>
-auto particle_antisymmetrize_btas(btas::Tensor<Args...> const& arr,
-                                  size_t bra_rank) {
-  using ranges::views::concat;
-  using ranges::views::iota;
-  size_t const rank = arr.rank();
-  SEQUANT_ASSERT(bra_rank <= rank);
-  size_t const ket_rank = rank - bra_rank;
-
-  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
-  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
-  const auto lannot = iota(size_t{0}, rank) | ranges::to<perm_t>;
-
-  auto process_permutations = [&lannot](const btas::Tensor<Args...>& input_arr,
-                                        size_t range_rank, perm_t range_perm,
-                                        const perm_t& other_perm, bool is_bra) {
-    if (range_rank <= 1) return input_arr;
-    btas::Tensor<Args...> result{input_arr.range()};
-
-    auto callback = [&](int parity) {
-      const auto annot =
-          is_bra ? concat(range_perm, other_perm) | ranges::to<perm_t>()
-                 : concat(other_perm, range_perm) | ranges::to<perm_t>();
-
-      typename decltype(result)::numeric_type p_ = parity == 0 ? 1 : -1;
-      btas::Tensor<Args...> temp;
-      btas::permute(input_arr, lannot, temp, annot);
-      btas::scal(p_, temp);
-      result += temp;
-    };
-
-    antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank},
-                              callback);
-    return result;
-  };
-  // Process bra permutations first
-  const auto ket_annot = ket_rank == 0 ? perm_t{} : ket_perm;
-  auto result = process_permutations(arr, bra_rank, bra_perm, ket_annot, true);
-
-  // Process ket permutations if needed
-  const auto bra_annot = bra_rank == 0 ? perm_t{} : bra_perm;
-  result = process_permutations(result, ket_rank, ket_perm, bra_annot, false);
-
-  return result;
-}
-
-/// \brief This function is used to implement
-/// ResultPtr::biorthogonal_nns_project for TA::DistArray
-///
-/// \param arr The array to be "cleaned up"
-/// \param bra_rank The rank of the bra indices
-///
-/// \return The cleaned TA::DistArray.
-template <typename... Args>
-auto biorthogonal_nns_project_ta(TA::DistArray<Args...> const& arr,
-                                 size_t bra_rank) {
-  using ranges::views::iota;
-  size_t const rank = arr.trange().rank();
-  SEQUANT_ASSERT(bra_rank <= rank);
-  size_t const ket_rank = rank - bra_rank;
-
-  if (rank <= 4) {
-    return arr;
-  }
-
-  using numeric_type = typename TA::DistArray<Args...>::numeric_type;
-
-  size_t factorial_ket = 1;
-  for (size_t i = 2; i <= ket_rank; ++i) {
-    factorial_ket *= i;
-  }
-  numeric_type norm_factor = numeric_type(1) / numeric_type(factorial_ket);
-
-  TA::DistArray<Args...> result;
-
-  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
-  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
-  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
-
-  const auto lannot = ords_to_annot(perm);
-
-  auto process_permutations = [&lannot](const TA::DistArray<Args...>& input_arr,
-                                        size_t range_rank, perm_t range_perm,
-                                        const std::string& other_annot,
-                                        bool is_bra) -> TA::DistArray<Args...> {
-    if (range_rank <= 1) return input_arr;
-    TA::DistArray<Args...> result;
-
-    auto callback = [&]([[maybe_unused]] int parity) {
-      const auto range_annot = ords_to_annot(range_perm);
-      const auto annot = other_annot.empty()
-                             ? range_annot
-                             : (is_bra ? range_annot + "," + other_annot
-                                       : other_annot + "," + range_annot);
-
-      // ignore parity, all permutations get same coefficient
-      numeric_type p_ = 1;
-      if (result.is_initialized()) {
-        result(lannot) += p_ * input_arr(annot);
-      } else {
-        result(lannot) = p_ * input_arr(annot);
-      }
-    };
-    antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank},
-                              callback);
-    return result;
-  };
-
-  // identity term with coefficient +1
-  result(lannot) = arr(lannot);
-
-  // process only ket permutations with coefficient norm_factor
-  if (ket_rank > 1) {
-    const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm);
-    auto ket_result =
-        process_permutations(arr, ket_rank, ket_perm, bra_annot, false);
-
-    result(lannot) -= norm_factor * ket_result(lannot);
-  }
-
-  TA::DistArray<Args...>::wait_for_lazy_cleanup(result.world());
-  return result;
-}
-
-/// \brief This function is used to implement
-/// ResultPtr::biorthogonal_nns_project for btas::Tensor
-///
-/// \param arr The array to be "cleaned up"
-/// \param bra_rank The rank of the bra indices
-///
-/// \return The cleaned btas::Tensor.
-template <typename... Args>
-auto biorthogonal_nns_project_btas(btas::Tensor<Args...> const& arr,
-                                   size_t bra_rank) {
-  using ranges::views::concat;
-  using ranges::views::iota;
-  size_t const rank = arr.rank();
-  SEQUANT_ASSERT(bra_rank <= rank);
-  size_t const ket_rank = rank - bra_rank;
-
-  if (rank <= 4) {
-    return arr;
-  }
-
-  using numeric_type = typename btas::Tensor<Args...>::numeric_type;
-
-  size_t factorial_ket = 1;
-  for (size_t i = 2; i <= ket_rank; ++i) {
-    factorial_ket *= i;
-  }
-  numeric_type norm_factor = numeric_type(1) / numeric_type(factorial_ket);
-
-  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
-  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
-  const auto lannot = iota(size_t{0}, rank) | ranges::to<perm_t>;
-
-  auto process_permutations = [&lannot](const btas::Tensor<Args...>& input_arr,
-                                        size_t range_rank, perm_t range_perm,
-                                        const perm_t& other_perm, bool is_bra) {
-    if (range_rank <= 1) return input_arr;
-    btas::Tensor<Args...> result{input_arr.range()};
-    result.fill(0);
-
-    auto callback = [&]([[maybe_unused]] int parity) {
-      const auto annot =
-          is_bra ? concat(range_perm, other_perm) | ranges::to<perm_t>()
-                 : concat(other_perm, range_perm) | ranges::to<perm_t>();
-
-      // ignore parity, all permutations get same coefficient
-      numeric_type p_ = 1;
-      btas::Tensor<Args...> temp;
-      btas::permute(input_arr, lannot, temp, annot);
-      btas::scal(p_, temp);
-      result += temp;
-    };
-
-    antisymmetric_permutation(ParticleRange{range_perm.begin(), range_rank},
-                              callback);
-    return result;
-  };
-
-  // identity term with coefficient +1
-  auto result = arr;
-
-  // process only ket permutations with coefficient norm_factor
-  if (ket_rank > 1) {
-    const auto bra_annot = bra_rank == 0 ? perm_t{} : bra_perm;
-    auto ket_result =
-        process_permutations(arr, ket_rank, ket_perm, bra_annot, false);
-
-    btas::scal(norm_factor, ket_result);
-    result -= ket_result;
-  }
-
-  return result;
-}
-
 template <typename... Args>
 inline void log_result(Args const&... args) noexcept {
   auto& l = Logger::instance();
   if (l.eval.level > 1) write_log(l, args...);
 }
 
-template <typename... Args>
-inline void log_ta(Args const&... args) noexcept {
-  log_result("[TA] ", args...);
-}
-
 template <typename... Args>
 inline void log_constant(Args const&... args) noexcept {
   log_result("[CONST] ", args...);
@@ -509,15 +147,6 @@ inline void log_constant(Args const&... args) noexcept {
 
 }  // namespace
 
-/// TA::Tensor memory use logger
-/// If TiledArray was configured with TA_TENSOR_MEM_PROFILE set this
-/// prints the current use of memory by TA::Tensor objects in host memory space
-/// to \p os .
-/// \param world the world object to use for logging
-/// \param label string to prepend to the profile
-void log_ta_tensor_host_memory_use(madness::World& world,
-                                   std::string_view label = "");
-
 /******************************************************************************/
 
 ///
@@ -604,7 +233,7 @@ class Result {
   ///
   [[nodiscard]] virtual ResultPtr prod(Result const&,
                                        std::array<std::any, 3> const&,
-                                       TA::DeNest DeNestFlag) const = 0;
+                                       DeNest DeNestFlag) const = 0;
 
   ///
   /// \brief Permute this object according to the annotations in the argument.
@@ -631,19 +260,6 @@ class Result {
   ///
   [[nodiscard]] virtual ResultPtr antisymmetrize(size_t bra_rank) const = 0;
 
-  /// \brief Implements "biorthogonal cleanup" of closed-shell
-  /// more compact spintraced equations produced via method of
-  /// <a href="https://arxiv.org/abs/1805.00565">Wang and Knizia</a>.
-  ///
-  /// For 3-body residual (`bra_rank=3`) this implements Eq. (41) of the
-  /// Wang/Knizia paper, same as the first line of Figure 1.
-  /// For 4-body residual this implements the first line of Figure 2.
-  /// The implementation is for arbitrary ranks.
-  /// @param bra_rank the particle rank of the residual tensor (i.e.
-  ///                 its order halved)
-  [[nodiscard]] virtual ResultPtr biorthogonal_nns_project(
-      size_t bra_rank) const = 0;
-
   [[nodiscard]] bool has_value() const noexcept;
 
   [[nodiscard]] virtual ResultPtr mult_by_phase(std::int8_t) const = 0;
@@ -720,7 +336,7 @@ class ResultScalar final : public Result {
 
   [[nodiscard]] ResultPtr prod(Result const& other,
                                std::array<std::any, 3> const& maybe_empty,
-                               TA::DeNest DeNestFlag) const override {
+                               DeNest DeNestFlag) const override {
     if (other.is<ResultScalar<T>>()) {
       auto const& o = other.as<ResultScalar<T>>();
       auto p = value() * o.value();
@@ -755,11 +371,6 @@ class ResultScalar final : public Result {
     throw unimplemented_method("antisymmetrize");
   }
 
-  [[nodiscard]] ResultPtr biorthogonal_nns_project(
-      [[maybe_unused]] size_t bra_rank) const override {
-    throw unimplemented_method("biorthogonal_nns_project");
-  }
-
   [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override {
     return eval_result<ResultScalar<T>>(value() * T(factor));
   }
@@ -772,413 +383,6 @@ class ResultScalar final : public Result {
   [[nodiscard]] std::size_t size_in_bytes() const final { return sizeof(T); }
 };
 
-///
-/// \brief Result for a tensor value of TA::DistArray type.
-/// \tparam ArrayT TA::DistArray type. Tile type of ArrayT is regular tensor of
-///                scalars (not a tensor of tensors)
-///
-template <typename ArrayT, typename = std::enable_if_t<TA::detail::is_tensor_v<
-                               typename ArrayT::value_type>>>
-class ResultTensorTA final : public Result {
- public:
-  using Result::id_t;
-  using numeric_type = typename ArrayT::numeric_type;
-
-  explicit ResultTensorTA(ArrayT arr) : Result{std::move(arr)} {}
-
- private:
-  using this_type = ResultTensorTA<ArrayT>;
-  using annot_wrap = Annot<std::string>;
-
-  [[nodiscard]] id_t type_id() const noexcept override {
-    return id_for_type<this_type>();
-  }
-
-  [[nodiscard]] ResultPtr sum(
-      Result const& other,
-      std::array<std::any, 3> const& annot) const override {
-    SEQUANT_ASSERT(other.is<this_type>());
-    auto const a = annot_wrap{annot};
-
-    log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n");
-
-    ArrayT result;
-    result(a.this_annot) =
-        get<ArrayT>()(a.lannot) + other.get<ArrayT>()(a.rannot);
-    decltype(result)::wait_for_lazy_cleanup(result.world());
-    return eval_result<this_type>(std::move(result));
-  }
-
-  [[nodiscard]] ResultPtr prod(Result const& other,
-                               std::array<std::any, 3> const& annot,
-                               TA::DeNest DeNestFlag) const override {
-    auto const a = annot_wrap{annot};
-
-    if (other.is<ResultScalar<numeric_type>>()) {
-      auto result = get<ArrayT>();
-      auto scalar = other.get<numeric_type>();
-
-      log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n");
-
-      result(a.this_annot) = scalar * result(a.lannot);
-
-      decltype(result)::wait_for_lazy_cleanup(result.world());
-      return eval_result<this_type>(std::move(result));
-    }
-
-    if (a.this_annot.empty()) {
-      // DOT product
-      SEQUANT_ASSERT(other.is<this_type>());
-      numeric_type d =
-          TA::dot(get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot));
-      ArrayT::wait_for_lazy_cleanup(get<ArrayT>().world());
-      ArrayT::wait_for_lazy_cleanup(other.get<ArrayT>().world());
-
-      log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n");
-
-      return eval_result<ResultScalar<numeric_type>>(d);
-    }
-
-    if (!other.is<this_type>()) {
-      // potential T * ToT
-      auto annot_swap = annot;
-      std::swap(annot_swap[0], annot_swap[1]);
-      return other.prod(*this, annot_swap, DeNestFlag);
-    }
-
-    // confirmed: other.is<this_type>() is true
-
-    log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n");
-
-    ArrayT result;
-
-    result = TA::einsum(get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot),
-                        a.this_annot);
-    decltype(result)::wait_for_lazy_cleanup(result.world());
-    return eval_result<this_type>(std::move(result));
-  }
-
-  [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override {
-    auto pre = get<ArrayT>();
-    TA::scale(pre, numeric_type(factor));
-    return eval_result<this_type>(std::move(pre));
-  }
-
-  [[nodiscard]] ResultPtr permute(
-      std::array<std::any, 2> const& ann) const override {
-    auto const pre_annot = std::any_cast<std::string>(ann[0]);
-    auto const post_annot = std::any_cast<std::string>(ann[1]);
-
-    log_ta(pre_annot, " = ", post_annot, "\n");
-
-    ArrayT result;
-    result(post_annot) = get<ArrayT>()(pre_annot);
-    ArrayT::wait_for_lazy_cleanup(result.world());
-    return eval_result<this_type>(std::move(result));
-  }
-
-  void add_inplace(Result const& other) override {
-    SEQUANT_ASSERT(other.is<this_type>());
-
-    auto& t = get<ArrayT>();
-    auto const& o = other.get<ArrayT>();
-
-    SEQUANT_ASSERT(t.trange() == o.trange());
-    auto ann = TA::detail::dummy_annotation(t.trange().rank());
-
-    log_ta(ann, " += ", ann, "\n");
-
-    t(ann) += o(ann);
-    ArrayT::wait_for_lazy_cleanup(t.world());
-  }
-
-  [[nodiscard]] ResultPtr symmetrize() const override {
-    return eval_result<this_type>(column_symmetrize_ta(get<ArrayT>()));
-  }
-
-  [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override {
-    return eval_result<this_type>(
-        particle_antisymmetrize_ta(get<ArrayT>(), bra_rank));
-  }
-
-  [[nodiscard]] ResultPtr biorthogonal_nns_project(
-      size_t bra_rank) const override {
-    return eval_result<this_type>(
-        biorthogonal_nns_project_ta(get<ArrayT>(), bra_rank));
-  }
-
- private:
-  [[nodiscard]] std::size_t size_in_bytes() const final {
-    auto& v = get<ArrayT>();
-    auto local_size = TA::size_of<TA::MemorySpace::Host>(v);
-    v.world().gop.sum(local_size);
-    return local_size;
-  }
-};
-
-template <typename ArrayT,
-          typename = std::enable_if_t<
-              TA::detail::is_tensor_of_tensor_v<typename ArrayT::value_type>>>
-class ResultTensorOfTensorTA final : public Result {
- public:
-  using Result::id_t;
-  using numeric_type = typename ArrayT::numeric_type;
-
-  explicit ResultTensorOfTensorTA(ArrayT arr) : Result{std::move(arr)} {}
-
- private:
-  using this_type = ResultTensorOfTensorTA<ArrayT>;
-  using annot_wrap = Annot<std::string>;
-
-  using _inner_tensor_type = typename ArrayT::value_type::value_type;
-
-  using compatible_regular_distarray_type =
-      TA::DistArray<_inner_tensor_type, typename ArrayT::policy_type>;
-
-  // Only @c that_type type is allowed for ToT * T computation
-  using that_type = ResultTensorTA<compatible_regular_distarray_type>;
-
-  [[nodiscard]] id_t type_id() const noexcept override {
-    return id_for_type<this_type>();
-  }
-
-  [[nodiscard]] ResultPtr sum(
-      Result const& other,
-      std::array<std::any, 3> const& annot) const override {
-    SEQUANT_ASSERT(other.is<this_type>());
-    auto const a = annot_wrap{annot};
-
-    log_ta(a.lannot, " + ", a.rannot, " = ", a.this_annot, "\n");
-
-    ArrayT result;
-    result(a.this_annot) =
-        get<ArrayT>()(a.lannot) + other.get<ArrayT>()(a.rannot);
-    decltype(result)::wait_for_lazy_cleanup(result.world());
-    return eval_result<this_type>(std::move(result));
-  }
-
-  [[nodiscard]] ResultPtr prod(Result const& other,
-                               std::array<std::any, 3> const& annot,
-                               TA::DeNest DeNestFlag) const override {
-    auto const a = annot_wrap{annot};
-
-    if (other.is<ResultScalar<numeric_type>>()) {
-      auto result = get<ArrayT>();
-      auto scalar = other.get<numeric_type>();
-
-      log_ta(a.lannot, " * ", scalar, " = ", a.this_annot, "\n");
-
-      result(a.this_annot) = scalar * result(a.lannot);
-
-      decltype(result)::wait_for_lazy_cleanup(result.world());
-      return eval_result<this_type>(std::move(result));
-    } else if (a.this_annot.empty()) {
-      // DOT product
-      SEQUANT_ASSERT(other.is<this_type>());
-      numeric_type d =
-          TA::dot(get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot));
-      ArrayT::wait_for_lazy_cleanup(get<ArrayT>().world());
-      ArrayT::wait_for_lazy_cleanup(other.get<ArrayT>().world());
-
-      log_ta(a.lannot, " * ", a.rannot, " = ", d, "\n");
-
-      return eval_result<ResultScalar<numeric_type>>(d);
-    }
-
-    log_ta(a.lannot, " * ", a.rannot, " = ", a.this_annot, "\n");
-
-    if (other.is<that_type>()) {
-      // ToT * T -> ToT
-      auto result =
-          TA::einsum(get<ArrayT>()(a.lannot),
-                     other.get<compatible_regular_distarray_type>()(a.rannot),
-                     a.this_annot);
-      return eval_result<this_type>(std::move(result));
-
-    } else if (other.is<this_type>() && DeNestFlag == TA::DeNest::True) {
-      // ToT * ToT -> T
-      auto result = TA::einsum<TA::DeNest::True>(
-          get<ArrayT>()(a.lannot), other.get<ArrayT>()(a.rannot), a.this_annot);
-      return eval_result<that_type>(std::move(result));
-
-    } else if (other.is<this_type>() && DeNestFlag == TA::DeNest::False) {
-      // ToT * ToT -> ToT
-      auto result = TA::einsum(get<ArrayT>()(a.lannot),
-                               other.get<ArrayT>()(a.rannot), a.this_annot);
-      return eval_result<this_type>(std::move(result));
-    } else {
-      throw invalid_operand();
-    }
-  }
-
-  [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override {
-    auto pre = get<ArrayT>();
-    TA::scale(pre, numeric_type(factor));
-    return eval_result<this_type>(std::move(pre));
-  }
-
-  [[nodiscard]] ResultPtr permute(
-      std::array<std::any, 2> const& ann) const override {
-    auto const pre_annot = std::any_cast<std::string>(ann[0]);
-    auto const post_annot = std::any_cast<std::string>(ann[1]);
-
-    log_ta(pre_annot, " = ", post_annot, "\n");
-
-    ArrayT result;
-    result(post_annot) = get<ArrayT>()(pre_annot);
-    ArrayT::wait_for_lazy_cleanup(result.world());
-    return eval_result<this_type>(std::move(result));
-  }
-
-  void add_inplace(Result const& other) override {
-    SEQUANT_ASSERT(other.is<this_type>());
-
-    auto& t = get<ArrayT>();
-    auto const& o = other.get<ArrayT>();
-
-    SEQUANT_ASSERT(t.trange() == o.trange());
-    auto ann = TA::detail::dummy_annotation(t.trange().rank());
-
-    log_ta(ann, " += ", ann, "\n");
-
-    t(ann) += o(ann);
-    ArrayT::wait_for_lazy_cleanup(t.world());
-  }
-
-  [[nodiscard]] ResultPtr symmetrize() const override {
-    // not implemented yet
-    return nullptr;
-  }
-
-  [[nodiscard]] ResultPtr antisymmetrize(size_t /*bra_rank*/) const override {
-    // not implemented yet
-    return nullptr;
-  }
-
-  [[nodiscard]] ResultPtr biorthogonal_nns_project(
-      [[maybe_unused]] size_t bra_rank) const override {
-    // or? throw unimplemented_method("biorthogonal_nns_project");
-    // not implemented yet, I think I need it for CSV
-    return nullptr;
-  }
-
- private:
-  [[nodiscard]] std::size_t size_in_bytes() const final {
-    auto& v = get<ArrayT>();
-    auto local_size = TA::size_of<TA::MemorySpace::Host>(v);
-    v.world().gop.sum(local_size);
-    return local_size;
-  }
-};
-
-///
-/// \brief Result for a tensor value of btas::Tensor type.
-/// \tparam T btas::Tensor type. Must be a specialization of btas::Tensor.
-///
-template <typename T>
-class ResultTensorBTAS final : public Result {
- public:
-  using Result::id_t;
-  using numeric_type = typename T::numeric_type;
-
-  explicit ResultTensorBTAS(T arr) : Result{std::move(arr)} {}
-
- private:
-  // TODO make it same as that used by EvalExprBTAS class from eval.hpp file
-  using annot_t = container::svector<long>;
-  using annot_wrap = Annot<annot_t>;
-
-  [[nodiscard]] id_t type_id() const noexcept override {
-    return id_for_type<ResultTensorBTAS<T>>();
-  }
-
-  [[nodiscard]] ResultPtr sum(
-      Result const& other,
-      std::array<std::any, 3> const& annot) const override {
-    SEQUANT_ASSERT(other.is<ResultTensorBTAS<T>>());
-    auto const a = annot_wrap{annot};
-
-    T lres, rres;
-    btas::permute(get<T>(), a.lannot, lres, a.this_annot);
-    btas::permute(other.get<T>(), a.rannot, rres, a.this_annot);
-    return eval_result<ResultTensorBTAS<T>>(lres + rres);
-  }
-
-  [[nodiscard]] ResultPtr prod(Result const& other,
-                               std::array<std::any, 3> const& annot,
-                               TA::DeNest /*DeNestFlag*/) const override {
-    auto const a = annot_wrap{annot};
-
-    if (other.is<ResultScalar<numeric_type>>()) {
-      T result;
-      btas::permute(get<T>(), a.lannot, result, a.this_annot);
-      btas::scal(other.as<ResultScalar<numeric_type>>().value(), result);
-      return eval_result<ResultTensorBTAS<T>>(std::move(result));
-    }
-
-    SEQUANT_ASSERT(other.is<ResultTensorBTAS<T>>());
-
-    if (a.this_annot.empty()) {
-      T rres;
-      btas::permute(other.get<T>(), a.rannot, rres, a.lannot);
-      return eval_result<ResultScalar<numeric_type>>(btas::dot(get<T>(), rres));
-    }
-
-    T result;
-    btas::contract(numeric_type{1},           //
-                   get<T>(), a.lannot,        //
-                   other.get<T>(), a.rannot,  //
-                   numeric_type{0},           //
-                   result, a.this_annot);
-    return eval_result<ResultTensorBTAS<T>>(std::move(result));
-  }
-
-  [[nodiscard]] ResultPtr mult_by_phase(std::int8_t factor) const override {
-    auto pre = get<T>();
-    btas::scal(numeric_type(factor), pre);
-    return eval_result<ResultTensorBTAS<T>>(std::move(pre));
-  }
-
-  [[nodiscard]] ResultPtr permute(
-      std::array<std::any, 2> const& ann) const override {
-    auto const pre_annot = std::any_cast<annot_t>(ann[0]);
-    auto const post_annot = std::any_cast<annot_t>(ann[1]);
-    T result;
-    btas::permute(get<T>(), pre_annot, result, post_annot);
-    return eval_result<ResultTensorBTAS<T>>(std::move(result));
-  }
-
-  void add_inplace(Result const& other) override {
-    auto& t = get<T>();
-    auto const& o = other.get<T>();
-    SEQUANT_ASSERT(t.range() == o.range());
-    t += o;
-  }
-
-  [[nodiscard]] ResultPtr symmetrize() const override {
-    return eval_result<ResultTensorBTAS<T>>(column_symmetrize_btas(get<T>()));
-  }
-
-  [[nodiscard]] ResultPtr antisymmetrize(size_t bra_rank) const override {
-    return eval_result<ResultTensorBTAS<T>>(
-        particle_antisymmetrize_btas(get<T>(), bra_rank));
-  }
-
-  [[nodiscard]] ResultPtr biorthogonal_nns_project(
-      [[maybe_unused]] size_t bra_rank) const override {
-    return eval_result<ResultTensorBTAS<T>>(
-        biorthogonal_nns_project_btas(get<T>(), bra_rank));
-  }
-
- private:
-  [[nodiscard]] std::size_t size_in_bytes() const final {
-    static_assert(std::is_arithmetic_v<typename T::value_type>);
-    const auto& tensor = get<T>();
-    // only count data
-    return tensor.range().volume() * sizeof(T);
-  }
-};
-
 }  // namespace sequant
 
 #endif  // SEQUANT_EVAL_RESULT_HPP
diff --git a/SeQuant/core/export/export.hpp b/SeQuant/core/export/export.hpp
index a3e6479095..a15fbd327f 100644
--- a/SeQuant/core/export/export.hpp
+++ b/SeQuant/core/export/export.hpp
@@ -2,7 +2,7 @@
 #define SEQUANT_CORE_EXPORT_EXPORT_HPP
 
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
 #include <SeQuant/core/export/compute_selection.hpp>
 #include <SeQuant/core/export/context.hpp>
 #include <SeQuant/core/export/export_expr.hpp>
diff --git a/SeQuant/core/export/export_expr.cpp b/SeQuant/core/export/export_expr.cpp
index c990ac6f15..ab2a56ad71 100644
--- a/SeQuant/core/export/export_expr.cpp
+++ b/SeQuant/core/export/export_expr.cpp
@@ -1,4 +1,4 @@
-#include <SeQuant/core/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
 #include <SeQuant/core/export/compute_selection.hpp>
 #include <SeQuant/core/export/export_expr.hpp>
 #include <SeQuant/core/utility/atomic.hpp>
diff --git a/SeQuant/core/export/export_expr.hpp b/SeQuant/core/export/export_expr.hpp
index 431cbd8069..b4f939dea0 100644
--- a/SeQuant/core/export/export_expr.hpp
+++ b/SeQuant/core/export/export_expr.hpp
@@ -1,7 +1,7 @@
 #ifndef SEQUANT_CORE_EXPORT_EXPORT_EXPR_HPP
 #define SEQUANT_CORE_EXPORT_EXPORT_EXPR_HPP
 
-#include <SeQuant/core/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
 #include <SeQuant/core/export/compute_selection.hpp>
 
 #include <cstddef>
diff --git a/SeQuant/core/optimize/common_subexpression_elimination.hpp b/SeQuant/core/optimize/common_subexpression_elimination.hpp
index 6ef7f138c6..6db312e1cf 100644
--- a/SeQuant/core/optimize/common_subexpression_elimination.hpp
+++ b/SeQuant/core/optimize/common_subexpression_elimination.hpp
@@ -3,8 +3,8 @@
 
 #include <SeQuant/core/binary_node.hpp>
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval_expr.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/hash.hpp>
 #include <SeQuant/core/utility/macros.hpp>
diff --git a/SeQuant/core/optimize/optimize.cpp b/SeQuant/core/optimize/optimize.cpp
index f470c411d7..099e002367 100644
--- a/SeQuant/core/optimize/optimize.cpp
+++ b/SeQuant/core/optimize/optimize.cpp
@@ -1,8 +1,8 @@
 #include <SeQuant/core/binary_node.hpp>
 #include <SeQuant/core/complex.hpp>
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval_expr.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/hash.hpp>
 #include <SeQuant/core/optimize.hpp>
diff --git a/SeQuant/core/utility/exception.hpp b/SeQuant/core/utility/exception.hpp
index 10ef041a3f..535e31c9f8 100644
--- a/SeQuant/core/utility/exception.hpp
+++ b/SeQuant/core/utility/exception.hpp
@@ -14,7 +14,7 @@ namespace sequant {
 class Exception {
  public:
   Exception(const std::string& str) : msg_(str) {}
-  const auto& what() const { return msg_; }
+  virtual std::string_view what() const { return msg_; }
 
  private:
   std::string msg_;
diff --git a/SeQuant/domain/mbpt/biorthogonalization.cpp b/SeQuant/domain/mbpt/biorthogonalization.cpp
index 9c372b6f2c..a8cd857e6a 100644
--- a/SeQuant/domain/mbpt/biorthogonalization.cpp
+++ b/SeQuant/domain/mbpt/biorthogonalization.cpp
@@ -9,6 +9,7 @@
 #include <SeQuant/core/utility/macros.hpp>
 #include <SeQuant/core/utility/permutation.hpp>
 
+#include <Eigen/Core>
 #include <Eigen/Eigenvalues>
 
 #include <libperm/Permutation.hpp>
@@ -29,8 +30,134 @@ struct compare_first_less {
 using IndexPair = std::pair<Index, Index>;
 using ParticlePairings = container::svector<IndexPair>;
 
-ResultExpr biorthogonal_transform_copy(const ResultExpr& expr,
-                                       double threshold) {
+// clang-format off
+/// \brief Provides the first row of the biorthogonal coefficients matrix,
+/// hardcoded from Mathematica to avoid numerical precision loss.
+///
+/// The Myrvold-Ruskey unrank1 algorithm (doi.org/10.1016/S0020-0190(01)00141-7)
+/// is used to order permutations, then the permutational overlap matrix M is
+/// constructed with elements (-2)^{c} × (-1)^{n_particles}, where c is the
+/// number of cycles in the relative permutation.
+///
+/// The biorthogonal coefficients are obtained from the normalized pseudoinverse
+/// of M: first compute M_pinv (the pseudoinverse), then normalize it by the
+/// factor ((n_particles)!/rank(M)).
+/// Finally, biorthogonal coefficients = normalized_M_pinv · e_1,
+/// where e_1 is the first unit vector.
+/// See [DOI 10.48550/ARXIV.1805.00565](https://doi.org/10.48550/ARXIV.1805.00565)
+/// for more details.
+///
+/// \param n_particles The rank of external index pairs
+///
+/// \return Vector of rational coefficients representing the first row
+///
+/// \throw std::runtime_error if n_particles is not in the range [1,5]
+// clang-format on
+std::vector<sequant::rational> hardcoded_biorthogonalizer_row(
+    std::size_t n_particles) {
+  switch (n_particles) {
+    case 1:
+      return std::vector<sequant::rational>{ratio(1, 2)};
+
+    case 2:
+      return std::vector<sequant::rational>{ratio(1, 3), ratio(1, 6)};
+
+    case 3:
+      return std::vector<sequant::rational>{ratio(17, 120), ratio(-7, 120),
+                                            ratio(-1, 120), ratio(-1, 120),
+                                            ratio(-1, 120), ratio(-7, 120)};
+
+    case 4:
+      return std::vector<sequant::rational>{
+          ratio(43, 840), ratio(-19, 1680), ratio(-19, 1680),
+          ratio(-1, 105), ratio(-19, 1680), ratio(-19, 1680),
+          ratio(13, 840), ratio(1, 120),    ratio(-1, 105),
+          ratio(1, 120),  ratio(-1, 105),   ratio(-19, 1680),
+          ratio(-1, 105), ratio(1, 120),    ratio(1, 120),
+          ratio(13, 840), ratio(-1, 105),   ratio(-1, 105),
+          ratio(1, 120),  ratio(-19, 1680), ratio(-19, 1680),
+          ratio(13, 840), ratio(-19, 1680), ratio(1, 120)};
+
+    case 5:
+      return std::vector<sequant::rational>{
+          ratio(59, 3780),   ratio(-5, 3024),   ratio(-5, 3024),
+          ratio(-5, 3024),   ratio(-31, 7560),  ratio(-5, 3024),
+          ratio(-5, 3024),   ratio(-23, 30240), ratio(19, 7560),
+          ratio(37, 15120),  ratio(-5, 3024),   ratio(-23, 30240),
+          ratio(-5, 3024),   ratio(19, 7560),   ratio(37, 15120),
+          ratio(-31, 7560),  ratio(37, 15120),  ratio(37, 15120),
+          ratio(-31, 7560),  ratio(-5, 3024),   ratio(-5, 3024),
+          ratio(-23, 30240), ratio(-23, 30240), ratio(-23, 30240),
+          ratio(-13, 7560),  ratio(-5, 3024),   ratio(-5, 3024),
+          ratio(19, 7560),   ratio(-23, 30240), ratio(37, 15120),
+          ratio(19, 7560),   ratio(-23, 30240), ratio(19, 7560),
+          ratio(-23, 30240), ratio(-13, 7560),  ratio(37, 15120),
+          ratio(-13, 7560),  ratio(-13, 7560),  ratio(37, 15120),
+          ratio(-23, 30240), ratio(-31, 7560),  ratio(-13, 7560),
+          ratio(37, 15120),  ratio(37, 15120),  ratio(19, 7560),
+          ratio(37, 15120),  ratio(37, 15120),  ratio(-13, 7560),
+          ratio(-13, 7560),  ratio(-23, 30240), ratio(-31, 7560),
+          ratio(37, 15120),  ratio(-31, 7560),  ratio(37, 15120),
+          ratio(-5, 3024),   ratio(-5, 3024),   ratio(-23, 30240),
+          ratio(19, 7560),   ratio(-5, 3024),   ratio(37, 15120),
+          ratio(-31, 7560),  ratio(37, 15120),  ratio(37, 15120),
+          ratio(-13, 7560),  ratio(19, 7560),   ratio(37, 15120),
+          ratio(37, 15120),  ratio(-13, 7560),  ratio(-13, 7560),
+          ratio(-23, 30240), ratio(37, 15120),  ratio(-13, 7560),
+          ratio(37, 15120),  ratio(-13, 7560),  ratio(-23, 30240),
+          ratio(19, 7560),   ratio(-23, 30240), ratio(-23, 30240),
+          ratio(19, 7560),   ratio(-13, 7560),  ratio(-31, 7560),
+          ratio(37, 15120),  ratio(-13, 7560),  ratio(37, 15120),
+          ratio(19, 7560),   ratio(-31, 7560),  ratio(-31, 7560),
+          ratio(37, 15120),  ratio(37, 15120),  ratio(-5, 3024),
+          ratio(37, 15120),  ratio(-13, 7560),  ratio(37, 15120),
+          ratio(-13, 7560),  ratio(-23, 30240), ratio(-5, 3024),
+          ratio(19, 7560),   ratio(-23, 30240), ratio(-5, 3024),
+          ratio(37, 15120),  ratio(-5, 3024),   ratio(-23, 30240),
+          ratio(-23, 30240), ratio(-23, 30240), ratio(-13, 7560),
+          ratio(19, 7560),   ratio(19, 7560),   ratio(-23, 30240),
+          ratio(-23, 30240), ratio(-13, 7560),  ratio(-5, 3024),
+          ratio(19, 7560),   ratio(-5, 3024),   ratio(-23, 30240),
+          ratio(37, 15120),  ratio(37, 15120),  ratio(-13, 7560),
+          ratio(-13, 7560),  ratio(37, 15120),  ratio(-23, 30240)};
+
+    default:
+      throw std::runtime_error(
+          "hardcoded biorthogonal coefficients only available for ranks 1-5, "
+          "requested rank is : " +
+          std::to_string(n_particles));
+  }
+}
+
+Eigen::Matrix<sequant::rational, Eigen::Dynamic, Eigen::Dynamic>
+make_hardcoded_biorthogonalizer_matrix(
+    const std::vector<sequant::rational>& first_row, std::size_t n_particles) {
+  const auto n = first_row.size();
+  Eigen::Matrix<sequant::rational, Eigen::Dynamic, Eigen::Dynamic> M(n, n);
+
+  for (std::size_t row = 0; row < n; ++row) {
+    for (std::size_t col = 0; col < n; ++col) {
+      perm::Permutation row_perm = perm::unrank(n - 1 - row, n_particles);
+      perm::Permutation col_perm = perm::unrank(col, n_particles);
+
+      col_perm->preMultiply(row_perm);
+
+      std::size_t source_idx = perm::rank(col_perm, n_particles);
+      M(row, col) = first_row[source_idx];
+    }
+  }
+  return M;
+}
+
+Eigen::Matrix<sequant::rational, Eigen::Dynamic, Eigen::Dynamic>
+hardcoded_biorthogonalizer_matrix(std::size_t n_particles) {
+  auto first_row = hardcoded_biorthogonalizer_row(n_particles);
+  return make_hardcoded_biorthogonalizer_matrix(first_row, n_particles);
+}
+
+ResultExpr biorthogonal_transform_copy(
+    const ResultExpr& expr,
+    double threshold = default_biorthogonalizer_pseudoinverse_threshold) {
   container::svector<ResultExpr> wrapper = {expr.clone()};
 
   biorthogonal_transform(wrapper, threshold);
@@ -39,7 +166,8 @@ ResultExpr biorthogonal_transform_copy(const ResultExpr& expr,
 }
 
 container::svector<ResultExpr> biorthogonal_transform_copy(
-    const container::svector<ResultExpr>& exprs, double threshold) {
+    const container::svector<ResultExpr>& exprs,
+    double threshold = default_biorthogonalizer_pseudoinverse_threshold) {
   container::svector<ResultExpr> copy;
   copy.reserve(exprs.size());
 
@@ -101,8 +229,8 @@ Eigen::MatrixXd permutational_overlap_matrix(std::size_t n_particles) {
   return M;
 }
 
-Eigen::MatrixXd compute_biorth_coeffs(std::size_t n_particles,
-                                      double threshold) {
+Eigen::MatrixXd compute_biorthogonalizer_matrix(std::size_t n_particles,
+                                                double threshold) {
   auto perm_ovlp_mat = permutational_overlap_matrix(n_particles);
   SEQUANT_ASSERT(perm_ovlp_mat.rows() == perm_ovlp_mat.cols());
   SEQUANT_ASSERT(perm_ovlp_mat.isApprox(perm_ovlp_mat.transpose()));
@@ -309,7 +437,7 @@ void biorthogonal_transform(container::svector<ResultExpr>& result_exprs,
   // like R^{IJ}_{AB} and the index pairing of the result is what determines
   // the required symmetrization. Hence, the symmetrization operator must not
   // be changed when transforming from one representation into the other.
-  assert(std::all_of(
+  SEQUANT_ASSERT(std::all_of(
       result_exprs.begin(), result_exprs.end(), [](const ResultExpr& res) {
         bool found = false;
         res.expression()->visit(
@@ -336,12 +464,7 @@ void biorthogonal_transform(container::svector<ResultExpr>& result_exprs,
                ranges::to<container::svector<std::size_t>>();
 
   const std::size_t n_particles = externals.front().size();
-
-  Eigen::MatrixXd coefficients = compute_biorth_coeffs(n_particles, threshold);
-
   auto num_perms = factorial(n_particles);
-  SEQUANT_ASSERT(num_perms == coefficients.rows());
-  SEQUANT_ASSERT(num_perms == coefficients.cols());
 
   auto original_exprs = result_exprs |
                         ranges::views::transform([](const ResultExpr& res) {
@@ -349,6 +472,60 @@ void biorthogonal_transform(container::svector<ResultExpr>& result_exprs,
                         }) |
                         ranges::to<container::svector<ExprPtr>>();
 
+  auto memoize = []<typename T>(container::map<std::pair<std::size_t, double>,
+                                               std::optional<T>>& cache,
+                                std::mutex& mutex, std::condition_variable& cv,
+                                std::pair<std::size_t, double> key,
+                                auto compute_fn) -> const T& {
+    {
+      std::unique_lock<std::mutex> lock(mutex);
+      auto [it, inserted] = cache.try_emplace(key, std::nullopt);
+      if (!inserted) {
+        cv.wait(lock, [&] { return it->second.has_value(); });
+        return it->second.value();
+      }
+    }
+
+    T result = compute_fn();
+
+    {
+      std::lock_guard<std::mutex> lock(mutex);
+      cache[key] = std::move(result);
+      cv.notify_all();
+      return cache[key].value();
+    }
+  };
+
+  using HardcodedMatrix =
+      Eigen::Matrix<sequant::rational, Eigen::Dynamic, Eigen::Dynamic>;
+  using ComputedMatrix = Eigen::MatrixXd;
+  using CacheKey = std::pair<std::size_t, double>;
+
+  static std::mutex cache_mutex;
+  static std::condition_variable cache_cv;
+  static container::map<CacheKey, std::optional<HardcodedMatrix>>
+      hardcoded_cache;
+  static container::map<CacheKey, std::optional<ComputedMatrix>> computed_cache;
+
+  constexpr std::size_t max_rank_hardcoded_biorthogonalizer_matrix = 5;
+  CacheKey key{n_particles, threshold};
+
+  const HardcodedMatrix* hardcoded_coefficients = nullptr;
+  const ComputedMatrix* computed_coefficients = nullptr;
+
+  if (n_particles <= max_rank_hardcoded_biorthogonalizer_matrix) {
+    hardcoded_coefficients = &memoize(
+        hardcoded_cache, cache_mutex, cache_cv, key,
+        [&] { return hardcoded_biorthogonalizer_matrix(n_particles); });
+  } else {
+    computed_coefficients =
+        &memoize(computed_cache, cache_mutex, cache_cv, key, [&] {
+          return compute_biorthogonalizer_matrix(n_particles, threshold);
+        });
+    SEQUANT_ASSERT(num_perms == computed_coefficients->rows());
+    SEQUANT_ASSERT(num_perms == computed_coefficients->cols());
+  }
+
   for (std::size_t i = 0; i < result_exprs.size(); ++i) {
     result_exprs.at(i).expression() = ex<Constant>(0);
     perm::Permutation reference = perm::unrank(ranks.at(i), n_particles);
@@ -358,9 +535,14 @@ void biorthogonal_transform(container::svector<ResultExpr>& result_exprs,
       perm::Permutation perm = perm::unrank(rank, n_particles);
       perm->postMultiply(reference);
 
+      sequant::rational coeff =
+          (n_particles <= max_rank_hardcoded_biorthogonalizer_matrix)
+              ? (*hardcoded_coefficients)(ranks.at(i), rank)
+              : to_rational((*computed_coefficients)(ranks.at(i), rank),
+                            threshold);
+
       result_exprs.at(i).expression() +=
-          ex<Constant>(
-              to_rational(coefficients(ranks.at(i), rank), threshold)) *
+          ex<Constant>(coeff) *
           create_expr_for(externals.at(i), perm, externals, original_exprs);
     }
 
@@ -390,4 +572,36 @@ ExprPtr biorthogonal_transform(
   return res.expression();
 }
 
+namespace detail {
+
+std::vector<double> compute_nns_p_coeffs(std::size_t n_particles,
+                                         double threshold) {
+  auto perm_ovlp_mat = permutational_overlap_matrix(n_particles);
+  auto normalized_pinv =
+      compute_biorthogonalizer_matrix(n_particles, threshold);
+  Eigen::MatrixXd nns_matrix = perm_ovlp_mat * normalized_pinv;
+
+  auto num_perms = nns_matrix.rows();
+  std::vector<double> coeffs;
+  coeffs.reserve(num_perms);
+  for (std::size_t i = 0; i < num_perms; ++i) {
+    coeffs.push_back(nns_matrix(num_perms - 1, i));
+  }
+  return coeffs;
+}
+
+container::svector<size_t> compute_permuted_indices(
+    const container::svector<size_t>& indices, size_t perm_rank,
+    size_t n_particles) {
+  perm::Permutation perm_obj = perm::unrank(perm_rank, n_particles);
+
+  container::svector<size_t> permuted_indices(n_particles);
+  for (size_t i = 0; i < n_particles; ++i) {
+    permuted_indices[i] = indices[perm_obj[i]];
+  }
+  return permuted_indices;
+}
+
+}  // namespace detail
+
 }  // namespace sequant
diff --git a/SeQuant/domain/mbpt/biorthogonalization.hpp b/SeQuant/domain/mbpt/biorthogonalization.hpp
index 62c415dd86..2e7eea28ba 100644
--- a/SeQuant/domain/mbpt/biorthogonalization.hpp
+++ b/SeQuant/domain/mbpt/biorthogonalization.hpp
@@ -5,33 +5,354 @@
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/index.hpp>
 
-namespace sequant {
+#if defined(SEQUANT_HAS_TILEDARRAY)
+#include <SeQuant/core/eval/backends/tiledarray/eval_expr.hpp>
+#include <SeQuant/core/eval/backends/tiledarray/result.hpp>
+#endif
+#if defined(SEQUANT_HAS_BTAS)
+#include <SeQuant/core/eval/backends/btas/eval_expr.hpp>
+#include <SeQuant/core/eval/backends/btas/result.hpp>
+#endif
 
-namespace {
-static constexpr double default_biorth_threshold = 1e-12;
-}
+#include <concepts>
+#include <condition_variable>
+#include <cstddef>
+#include <mutex>
+#include <optional>
+#include <vector>
 
-[[nodiscard]] ResultExpr biorthogonal_transform_copy(
-    const ResultExpr& expr, double threshold = default_biorth_threshold);
+namespace sequant {
 
-[[nodiscard]] container::svector<ResultExpr> biorthogonal_transform_copy(
-    const container::svector<ResultExpr>& exprs,
-    double threshold = default_biorth_threshold);
+static constexpr double default_biorthogonalizer_pseudoinverse_threshold =
+    1e-12;
 
-void biorthogonal_transform(ResultExpr& expr,
-                            double threshold = default_biorth_threshold);
+void biorthogonal_transform(
+    ResultExpr& expr, double pseudoinverse_threshold =
+                          default_biorthogonalizer_pseudoinverse_threshold);
 
-void biorthogonal_transform(container::svector<ResultExpr>& exprs,
-                            double threshold = default_biorth_threshold);
+void biorthogonal_transform(
+    container::svector<ResultExpr>& exprs,
+    double pseudoinverse_threshold =
+        default_biorthogonalizer_pseudoinverse_threshold);
 
 /// performs symbolic biorthogonal transform of CC-like equation using
 ///(for rank-3 and higher
-/// Wang-Knizia biorthogonalization (https://arxiv.org/abs/1805.00565) is used
+/// [Wang-Knizia biorthogonalization](https://arxiv.org/abs/1805.00565).
+///
+/// @note uses hardcoded coefficients for ranks 1-5,
+///  for higher ranks computes coefficients (if Eigen3 is available, else throws
+///  an exception)
 [[nodiscard]] ExprPtr biorthogonal_transform(
     const ExprPtr& expr,
     const container::svector<container::svector<sequant::Index>>&
         ext_index_groups = {},
-    double threshold = default_biorth_threshold);
+    double pseudoinverse_threshold =
+        default_biorthogonalizer_pseudoinverse_threshold);
+
+namespace detail {
+
+/// \brief Computes the non-null space (NNS) projection coefficients
+///
+/// \param n_particles The rank of external index pairs
+/// \param threshold The threshold to compute the pseudoinverse matrix
+///        (set to default_biorth_threshold)
+///
+/// \return Vector of computed NNS projection coefficients
+[[nodiscard]] std::vector<double> compute_nns_p_coeffs(
+    std::size_t n_particles,
+    double pseudoinverse_threshold =
+        default_biorthogonalizer_pseudoinverse_threshold);
+
+/// \brief Provides permuted indices using libperm unrank function
+///
+/// \param indices The indices to permute
+/// \param perm_rank The rank of the permutation
+/// \param n_particles The rank of external index pairs
+///
+/// \return The permuted indices
+container::svector<size_t> compute_permuted_indices(
+    const container::svector<size_t>& indices, size_t perm_rank,
+    size_t n_particles);
+
+/// \brief Provides one row of the NNS projector matrix,
+/// hardcoded from Mathematica to avoid numerical precision loss.
+///
+/// The NNS projector weights are obtained from the normalized pseudoinverse
+/// of M: first compute M_pinv (the pseudoinverse), then normalize it by the
+/// factor ((n_particles)!/rank(M)).
+/// Finally, NNS projector = normalized_M_pinv · M.
+///
+/// \param n_particles The rank of external index pairs
+///
+/// \return Optional vector of NNS projector weights representing the last row,
+///         std::nullopt if n_particles is outside the range [1,5].
+template <typename T>
+  requires(std::floating_point<T> || meta::is_complex_v<T>)
+std::optional<std::vector<T>> hardcoded_nns_projector(std::size_t n_particles) {
+  switch (n_particles) {
+    case 1:
+      return std::vector<T>{T(1) / T(1)};
+
+    case 2:
+      return std::vector<T>{T(0) / T(1), T(1) / T(1)};
+
+    case 3:
+      return std::vector<T>{T(-1) / T(5), T(-1) / T(5), T(-1) / T(5),
+                            T(-1) / T(5), T(-1) / T(5), T(1) / T(1)};
+
+    case 4:
+      return std::vector<T>{
+          T(1) / T(7),   T(1) / T(7),   T(1) / T(7),   T(-1) / T(14),
+          T(1) / T(7),   T(1) / T(7),   T(1) / T(7),   T(-1) / T(14),
+          T(-1) / T(14), T(-1) / T(14), T(1) / T(7),   T(-2) / T(7),
+          T(-1) / T(14), T(1) / T(7),   T(-1) / T(14), T(-2) / T(7),
+          T(1) / T(7),   T(-1) / T(14), T(-1) / T(14), T(-2) / T(7),
+          T(-2) / T(7),  T(-2) / T(7),  T(-2) / T(7),  T(1) / T(1)};
+
+    case 5:
+      return std::vector<T>{
+          T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), T(-1) / T(14),
+          T(2) / T(21),  T(-1) / T(14), T(-1) / T(14), T(-1) / T(14),
+          T(-1) / T(14), T(2) / T(21),  T(-1) / T(14), T(-1) / T(14),
+          T(-1) / T(14), T(-1) / T(14), T(2) / T(21),  T(2) / T(21),
+          T(2) / T(21),  T(2) / T(21),  T(-1) / T(21), T(0) / T(1),
+          T(-1) / T(14), T(-1) / T(14), T(-1) / T(14), T(-1) / T(14),
+          T(2) / T(21),  T(-1) / T(14), T(-1) / T(14), T(-1) / T(14),
+          T(-1) / T(14), T(2) / T(21),  T(-1) / T(14), T(-1) / T(14),
+          T(-1) / T(14), T(-1) / T(14), T(2) / T(21),  T(2) / T(21),
+          T(2) / T(21),  T(2) / T(21),  T(-1) / T(21), T(0) / T(1),
+          T(2) / T(21),  T(2) / T(21),  T(-1) / T(21), T(2) / T(21),
+          T(0) / T(1),   T(2) / T(21),  T(2) / T(21),  T(-1) / T(21),
+          T(2) / T(21),  T(0) / T(1),   T(-1) / T(21), T(-1) / T(21),
+          T(-1) / T(21), T(-1) / T(21), T(1) / T(7),   T(0) / T(1),
+          T(0) / T(1),   T(1) / T(7),   T(1) / T(7),   T(-1) / T(3),
+          T(2) / T(21),  T(-1) / T(21), T(2) / T(21),  T(2) / T(21),
+          T(0) / T(1),   T(-1) / T(21), T(-1) / T(21), T(-1) / T(21),
+          T(-1) / T(21), T(1) / T(7),   T(2) / T(21),  T(-1) / T(21),
+          T(2) / T(21),  T(2) / T(21),  T(0) / T(1),   T(0) / T(1),
+          T(1) / T(7),   T(0) / T(1),   T(1) / T(7),   T(-1) / T(3),
+          T(-1) / T(21), T(-1) / T(21), T(-1) / T(21), T(-1) / T(21),
+          T(1) / T(7),   T(-1) / T(21), T(2) / T(21),  T(2) / T(21),
+          T(2) / T(21),  T(0) / T(1),   T(-1) / T(21), T(2) / T(21),
+          T(2) / T(21),  T(2) / T(21),  T(0) / T(1),   T(1) / T(7),
+          T(0) / T(1),   T(0) / T(1),   T(1) / T(7),   T(-1) / T(3),
+          T(0) / T(1),   T(1) / T(7),   T(1) / T(7),   T(0) / T(1),
+          T(-1) / T(3),  T(1) / T(7),   T(0) / T(1),   T(1) / T(7),
+          T(0) / T(1),   T(-1) / T(3),  T(1) / T(7),   T(1) / T(7),
+          T(0) / T(1),   T(0) / T(1),   T(-1) / T(3),  T(-1) / T(3),
+          T(-1) / T(3),  T(-1) / T(3),  T(-1) / T(3),  T(1) / T(1)};
+
+    default:
+      return std::nullopt;
+  }
+}
+
+/// \brief Provides NNS projection weights for a given rank
+///
+/// \tparam T The numeric type (must be floating point or complex)
+/// \param n_particles The rank of external index pairs
+/// \param threshold The threshold to compute the pseudoinverse matrix
+///        (set to default_biorthogonalizer_pseudoinverse_threshold)
+///
+/// \return (memoized) Vector of hrdcoded/computed NNS projection weights
+template <typename T>
+  requires(std::floating_point<T> || meta::is_complex_v<T>)
+[[nodiscard]] const std::vector<T>& nns_projection_weights(
+    std::size_t n_particles,
+    double pseudoinverse_threshold =
+        default_biorthogonalizer_pseudoinverse_threshold) {
+  static const std::vector<T> empty_vec{};
+
+  if (n_particles < 3) {
+    return empty_vec;
+  }
+
+  using CacheKey = std::pair<std::size_t, double>;
+  using CacheValue = std::optional<std::vector<T>>;
+
+  static std::mutex cache_mutex;
+  static std::condition_variable cache_cv;
+  static container::map<CacheKey, CacheValue> cache;
+
+  CacheKey key{n_particles, pseudoinverse_threshold};
+
+  {
+    std::unique_lock<std::mutex> lock(cache_mutex);
+    auto [it, inserted] = cache.try_emplace(key, std::nullopt);
+    if (!inserted) {
+      cache_cv.wait(lock, [&] { return it->second.has_value(); });
+      return it->second.value();
+    }
+  }
+
+  std::vector<T> nns_p_coeffs;
+
+  constexpr std::size_t max_rank_hardcoded_nns_projector = 5;
+  if (n_particles <= max_rank_hardcoded_nns_projector) {
+    auto hardcoded_coeffs = hardcoded_nns_projector<T>(n_particles);
+    if (hardcoded_coeffs) {
+      nns_p_coeffs = std::move(hardcoded_coeffs.value());
+    }
+  } else {
+    auto coeffs =
+        detail::compute_nns_p_coeffs(n_particles, pseudoinverse_threshold);
+    nns_p_coeffs.reserve(coeffs.size());
+    for (const auto& c : coeffs) {
+      nns_p_coeffs.push_back(static_cast<T>(c));
+    }
+  }
+
+  {
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    cache[key] = std::move(nns_p_coeffs);
+    cache_cv.notify_all();
+    return cache[key].value();
+  }
+}
+
+}  // namespace detail
+
+#if defined(SEQUANT_HAS_TILEDARRAY)
+
+/// \brief This function is used to implement
+/// ResultPtr::biorthogonal_nns_project for TA::DistArray
+///
+/// \param arr The array to be "cleaned up"
+/// \param bra_rank The rank of the bra indices
+///
+/// \return The cleaned TA::DistArray.
+template <typename... Args>
+auto biorthogonal_nns_project_ta(TA::DistArray<Args...> const& arr,
+                                 size_t bra_rank) {
+  using ranges::views::iota;
+  size_t const rank = arr.trange().rank();
+  SEQUANT_ASSERT(bra_rank <= rank);
+  size_t const ket_rank = rank - bra_rank;
+
+  // Residuals of rank 4 or less have no redundancy and don't require NNS
+  // projection
+  if (rank <= 4) return arr;
+
+  using numeric_type = typename TA::DistArray<Args...>::numeric_type;
+
+  const auto& nns_p_coeffs =
+      detail::nns_projection_weights<numeric_type>(ket_rank);
+
+  TA::DistArray<Args...> result;
+
+  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
+  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
+  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
+
+  const auto lannot = ords_to_annot(perm);
+
+  if (ket_rank > 2 && !nns_p_coeffs.empty()) {
+    const auto bra_annot = bra_rank == 0 ? "" : ords_to_annot(bra_perm);
+
+    size_t num_perms = nns_p_coeffs.size();
+    for (size_t perm_rank = 0; perm_rank < num_perms; ++perm_rank) {
+      perm_t permuted_ket =
+          detail::compute_permuted_indices(ket_perm, perm_rank, ket_rank);
+
+      numeric_type coeff = nns_p_coeffs[perm_rank];
+
+      const auto ket_annot = ords_to_annot(permuted_ket);
+      const auto annot =
+          bra_annot.empty() ? ket_annot : bra_annot + "," + ket_annot;
+
+      if (result.is_initialized()) {
+        result(lannot) += coeff * arr(annot);
+      } else {
+        result(lannot) = coeff * arr(annot);
+      }
+    }
+  } else {
+    result(lannot) = arr(lannot);
+  }
+
+  TA::DistArray<Args...>::wait_for_lazy_cleanup(result.world());
+  return result;
+}
+
+template <typename... Args>
+auto biorthogonal_nns_project(TA::DistArray<Args...> const& arr,
+                              size_t bra_rank) {
+  return biorthogonal_nns_project_ta(arr, bra_rank);
+}
+
+#endif  // defined(SEQUANT_HAS_TILEDARRAY)
+
+#if defined(SEQUANT_HAS_BTAS)
+
+/// \brief This function is used to implement
+/// ResultPtr::biorthogonal_nns_project for btas::Tensor
+///
+/// \param arr The array to be "cleaned up"
+/// \param bra_rank The rank of the bra indices
+///
+/// \return The cleaned btas::Tensor.
+template <typename... Args>
+auto biorthogonal_nns_project_btas(btas::Tensor<Args...> const& arr,
+                                   size_t bra_rank) {
+  using ranges::views::iota;
+  size_t const rank = arr.rank();
+  SEQUANT_ASSERT(bra_rank <= rank);
+  size_t const ket_rank = rank - bra_rank;
+
+  // Residuals of rank 4 or less have no redundancy and don't require NNS
+  // projection
+  if (rank <= 4) return arr;
+
+  using numeric_type = typename btas::Tensor<Args...>::numeric_type;
+
+  const auto& nns_p_coeffs =
+      detail::nns_projection_weights<numeric_type>(ket_rank);
+
+  btas::Tensor<Args...> result;
+
+  perm_t perm = iota(size_t{0}, rank) | ranges::to<perm_t>;
+  perm_t bra_perm = iota(size_t{0}, bra_rank) | ranges::to<perm_t>;
+  perm_t ket_perm = iota(bra_rank, rank) | ranges::to<perm_t>;
+
+  if (ket_rank > 2 && !nns_p_coeffs.empty()) {
+    bool result_initialized = false;
+
+    size_t num_perms = nns_p_coeffs.size();
+    for (size_t perm_rank = 0; perm_rank < num_perms; ++perm_rank) {
+      perm_t permuted_ket =
+          detail::compute_permuted_indices(ket_perm, perm_rank, ket_rank);
+
+      numeric_type coeff = nns_p_coeffs[perm_rank];
+
+      perm_t annot = bra_perm;
+      annot.insert(annot.end(), permuted_ket.begin(), permuted_ket.end());
+
+      btas::Tensor<Args...> temp;
+      btas::permute(arr, annot, temp, perm);
+      btas::scal(coeff, temp);
+
+      if (result_initialized) {
+        result += temp;
+      } else {
+        result = temp;
+        result_initialized = true;
+      }
+    }
+
+  } else {
+    result = arr;
+  }
+
+  return result;
+}
+
+template <typename... Args>
+auto biorthogonal_nns_project(btas::Tensor<Args...> const& arr,
+                              size_t bra_rank) {
+  return biorthogonal_nns_project_btas(arr, bra_rank);
+}
+
+#endif  // defined(SEQUANT_HAS_BTAS)
 
 }  // namespace sequant
 
diff --git a/SeQuant/domain/mbpt/spin.cpp b/SeQuant/domain/mbpt/spin.cpp
index ae5b3f0acf..90c5ec8a82 100644
--- a/SeQuant/domain/mbpt/spin.cpp
+++ b/SeQuant/domain/mbpt/spin.cpp
@@ -1169,28 +1169,20 @@ ExprPtr closed_shell_CC_spintrace_v2(ExprPtr const& expr,
           st_expr;
     }
     simplify(st_expr);
-    // expanding S after spintracing and biorthogonalization, to avoid dealing
-    // with large number of terms
+
     st_expr = S_maps(st_expr);
     // canonicalizer must be called before hash-filter to combine terms
     canonicalize(st_expr);
 
-    // apply hash filter method to get unique set of terms
     st_expr = WK_biorthogonalization_filter(st_expr, ext_idxs);
-    // add S tensor again
+
     st_expr =
         ex<Tensor>(Tensor{L"S", bra(std::move(kixs)), ket(std::move(bixs))}) *
         st_expr;
 
-    rational combined_factor;
-    if (ext_idxs.size() <= 2) {
-      combined_factor = rational(1, factorial(ext_idxs.size()));
-    } else {
-      auto fact_n = factorial(ext_idxs.size());
-      combined_factor =
-          rational(1, fact_n - 1);  // this is (1/fact_n) * (fact_n/(fact_n-1))
-    }
-    st_expr = ex<Constant>(combined_factor) * st_expr;
+    const auto nf = ex<Constant>(
+        rational{1, factorial(ext_idxs.size())});  // normalization factor for S
+    st_expr = nf * st_expr;
   }
 
   simplify(st_expr);
diff --git a/SeQuant/domain/mbpt/spin.hpp b/SeQuant/domain/mbpt/spin.hpp
index 5061e0a5fe..a537213833 100644
--- a/SeQuant/domain/mbpt/spin.hpp
+++ b/SeQuant/domain/mbpt/spin.hpp
@@ -214,7 +214,7 @@ ExprPtr S_maps(const ExprPtr& expr);
 
 /// WK biorthogonalization rewrites biorthogonal expressions as a projector
 /// onto non-null-space (NNS)
-/// applied to the biorothogonal expressions where out of each
+/// applied to the biorthogonal expressions where out of each
 /// group of terms related by permutation of external indices
 /// those with the largest coefficients are selected.
 /// This function performs the selection by forming groups of terms that
diff --git a/cmake/modules/FindOrFetchBTAS.cmake b/cmake/modules/FindOrFetchBTAS.cmake
new file mode 100644
index 0000000000..22253cf3b4
--- /dev/null
+++ b/cmake/modules/FindOrFetchBTAS.cmake
@@ -0,0 +1,68 @@
+# try find_package
+if (NOT TARGET BTAS::BTAS)
+  include (FindPackageRegimport)
+  find_package_regimport(BTAS 1.0.0 QUIET CONFIG)
+  if (TARGET BTAS::BTAS)
+    message(STATUS "Found BTAS CONFIG at ${BTAS_CONFIG}")
+  endif (TARGET BTAS::BTAS)
+endif (NOT TARGET BTAS::BTAS)
+
+# if not found, build via FetchContent
+if (NOT TARGET BTAS::BTAS)
+
+  # BTAS will load BLAS++/LAPACK++ ... if those use CMake's FindBLAS/FindLAPACK (as indicated by defined BLA_VENDOR)
+  # will need to specify Fortran linkage convention ... manually for now, switching to NWX's linear algebra discovery
+  # is necessary to handle all the corner cases for automatic discovery
+  if (DEFINED BLA_VENDOR)
+    set(_linalgpp_use_standard_linalg_kits TRUE)
+  endif(DEFINED BLA_VENDOR)
+
+  include(FetchContent)
+  FetchContent_Declare(
+      BTAS
+      GIT_REPOSITORY      https://github.com/BTAS/btas.git
+      GIT_TAG             ${TA_TRACKED_BTAS_TAG}
+      EXCLUDE_FROM_ALL
+      SYSTEM
+  )
+  FetchContent_MakeAvailable(BTAS)
+  FetchContent_GetProperties(BTAS
+      SOURCE_DIR BTAS_SOURCE_DIR
+      BINARY_DIR BTAS_BINARY_DIR
+      )
+
+  # use subproject targets as if they were in exported namespace ...
+  if (TARGET BTAS AND NOT TARGET BTAS::BTAS)
+    add_library(BTAS::BTAS ALIAS BTAS)
+  endif(TARGET BTAS AND NOT TARGET BTAS::BTAS)
+
+  # set BTAS_CONFIG to the install location so that we know where to find it
+  set(BTAS_CONFIG ${CMAKE_INSTALL_PREFIX}/${BTAS_INSTALL_CMAKEDIR}/btas-config.cmake)
+
+  # define macros specifying Fortran mangling convention, if necessary
+  if (_linalgpp_use_standard_linalg_kits)
+    if (NOT TARGET blaspp AND NOT TARGET lapackpp)
+      message(FATAL_ERROR "blaspp or lapackpp targets missing")
+    endif(NOT TARGET blaspp AND NOT TARGET lapackpp)
+    if (LINALG_MANGLING STREQUAL lower)
+      target_compile_definitions(blaspp PUBLIC -DBLAS_FORTRAN_LOWER=1)
+      target_compile_definitions(lapackpp PUBLIC -DLAPACK_FORTRAN_LOWER=1)
+    elseif(LINALG_MANGLING STREQUAL UPPER OR LINALG_MANGLING STREQUAL upper)
+      target_compile_definitions(blaspp PUBLIC -DBLAS_FORTRAN_UPPER=1)
+      target_compile_definitions(lapackpp PUBLIC -DLAPACK_FORTRAN_UPPER=1)
+    else()
+      if (NOT LINALG_MANGLING STREQUAL lower_)
+        message(WARNING "Linear algebra libraries' mangling convention not specified; specify -DLINALG_MANGLING={lower,lower_,UPPER}, if needed; BLASPP will try to autodetect")
+      endif(NOT LINALG_MANGLING STREQUAL lower_)
+      # these were needed for some configs at some point in the past? But in most cases they just produce compile noise
+#      target_compile_definitions(blaspp PUBLIC -DBLAS_FORTRAN_ADD_=1)
+#      target_compile_definitions(lapackpp PUBLIC -DLAPACK_FORTRAN_ADD_=1)
+    endif()
+  endif (_linalgpp_use_standard_linalg_kits)
+
+endif(NOT TARGET BTAS::BTAS)
+
+# postcond check
+if (NOT TARGET BTAS::BTAS)
+  message(FATAL_ERROR "FindOrFetchBTAS could not make BTAS::BTAS target available")
+endif(NOT TARGET BTAS::BTAS)
diff --git a/cmake/sequant-config.cmake.in b/cmake/sequant-config.cmake.in
index 49169e5f7e..a2c639f7ad 100644
--- a/cmake/sequant-config.cmake.in
+++ b/cmake/sequant-config.cmake.in
@@ -28,6 +28,16 @@ if(SEQUANT_HAS_TILEDARRAY AND NOT TARGET tiledarray)
   find_dependency(TiledArray CONFIG QUIET REQUIRED COMPONENTS tiledarray PATHS ${TiledArray_DIR} NO_DEFAULT_PATH)
 endif()
 
+set(SEQUANT_HAS_BTAS @SEQUANT_HAS_BTAS@)
+if(SEQUANT_HAS_BTAS AND NOT TARGET BTAS::BTAS)
+  set(BTAS_CONFIG @BTAS_CONFIG@)
+  if (NOT BTAS_CONFIG OR NOT EXISTS ${BTAS_CONFIG})
+    message(FATAL_ERROR "Expected BTAS config file at ${BTAS_CONFIG}; directory moved since SeQuant configuration?")
+  endif()
+  get_filename_component(BTAS_DIR ${BTAS_CONFIG} DIRECTORY)
+  find_dependency(BTAS CONFIG QUIET REQUIRED PATHS ${BTAS_DIR} NO_DEFAULT_PATH)
+endif()
+
 set(SEQUANT_HAS_EIGEN @SEQUANT_HAS_EIGEN@)
 if (NOT TARGET Eigen3::Eigen AND SEQUANT_HAS_EIGEN)
   if (TARGET TiledArray_Eigen)
diff --git a/doc/user/getting_started/installing.rst b/doc/user/getting_started/installing.rst
index b578d4fba2..da83a6412e 100644
--- a/doc/user/getting_started/installing.rst
+++ b/doc/user/getting_started/installing.rst
@@ -67,9 +67,12 @@ Useful CMake Variables
    * - SEQUANT_TESTS
      - `BUILD_TESTING <https://cmake.org/cmake/help/latest/variable/BUILD_TESTING.html>`_
      - Enables test targets, e.g. ``check-sequant``.
-   * - SEQUANT_EVAL_TESTS
+   * - SEQUANT_BTAS
      - OFF
-     - Enables SeQuant evaluation tests using ``TiledArray`` and ``BTAS``.
+     - SeQuant will look for (or build) `BTAS tensor library <https://github.com/ValeevGroup/BTAS>` and enable its use as an evaluation backend.
+   * - SEQUANT_TILEDARRAY
+     - OFF
+     - SeQuant will look for (or build) `TiledArray tensor framework <https://github.com/ValeevGroup/TiledArray>` and enable its use as an evaluation backend.
    * - SEQUANT_MIMALLOC
      - OFF
      - Use `mimalloc <https://github.com/microsoft/mimalloc>`_ for fast memory allocation.
diff --git a/tests/integration/eval/CMakeLists.txt b/tests/integration/eval/CMakeLists.txt
index 93702f4c8a..8b389424f2 100644
--- a/tests/integration/eval/CMakeLists.txt
+++ b/tests/integration/eval/CMakeLists.txt
@@ -1,6 +1,3 @@
-option(SEQUANT_EVAL_TESTS "Enable building of evaluation tests (if true, will look for and/or build TiledArray)" OFF)
-add_feature_info(EVAL_TESTS SEQUANT_EVAL_TESTS "Build evaluation tests (if true, will look for and/or build TiledArray)")
-
 # uccf12 example moved to MPQC
 
 add_library(eval_shared STATIC EXCLUDE_FROM_ALL
@@ -17,15 +14,13 @@ add_library(eval_shared STATIC EXCLUDE_FROM_ALL
 target_link_libraries(eval_shared PUBLIC SeQuant)
 target_include_directories(eval_shared PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 
-if (TARGET tiledarray)
-    set(example5 eval_ta)
+if (SEQUANT_HAS_TILEDARRAY)
     add_executable(eval_ta ${BUILD_BY_DEFAULT}
             "ta/data_world_ta.hpp"
             "ta/scf_ta.hpp"
             "ta/main.cpp"
     )
     target_link_libraries(eval_ta PRIVATE eval_shared tiledarray)
-    target_compile_definitions(eval_ta PRIVATE SEQUANT_HAS_TILEDARRAY)
 
     set(test_name "sequant/integration/eval_ta")
     add_test(
@@ -35,14 +30,15 @@ if (TARGET tiledarray)
     )
 
     build_test_as_needed(eval_ta "${test_name}" test_name)
+endif (SEQUANT_HAS_TILEDARRAY)
 
+if (SEQUANT_HAS_BTAS)
     add_executable(eval_btas ${BUILD_BY_DEFAULT}
             "btas/data_world_btas.hpp"
             "btas/scf_btas.hpp"
             "btas/main.cpp"
     )
-    target_include_directories(eval_btas PRIVATE ${BTAS_SOURCE_DIR})
-    target_link_libraries(eval_btas PRIVATE eval_shared)
+    target_link_libraries(eval_btas PRIVATE eval_shared BTAS::BTAS)
 
     set(test_name "sequant/integration/eval_btas")
     add_test(
@@ -52,4 +48,4 @@ if (TARGET tiledarray)
     )
 
     build_test_as_needed(eval_btas "${test_name}" test_name)
-endif (TARGET tiledarray)
+endif (SEQUANT_HAS_BTAS)
diff --git a/tests/integration/eval/btas/data_world_btas.hpp b/tests/integration/eval/btas/data_world_btas.hpp
index 2ced086456..c9e8498323 100644
--- a/tests/integration/eval/btas/data_world_btas.hpp
+++ b/tests/integration/eval/btas/data_world_btas.hpp
@@ -10,6 +10,8 @@
 
 #include <btas/btas.h>
 #include <SeQuant/core/container.hpp>
+#include <SeQuant/core/eval/backends/btas/eval_expr.hpp>
+#include <SeQuant/core/eval/backends/btas/result.hpp>
 #include <SeQuant/core/eval/eval.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/utility/macros.hpp>
diff --git a/tests/integration/eval/btas/scf_btas.hpp b/tests/integration/eval/btas/scf_btas.hpp
index a07dea280f..3528bacc7b 100644
--- a/tests/integration/eval/btas/scf_btas.hpp
+++ b/tests/integration/eval/btas/scf_btas.hpp
@@ -10,11 +10,12 @@
 #include <data_info.hpp>
 #include <scf.hpp>
 
+#include <SeQuant/core/eval/backends/btas/eval_expr.hpp>
 #include <SeQuant/core/eval/cache_manager.hpp>
 #include <SeQuant/core/eval/eval.hpp>
 
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/parse.hpp>
 #include <SeQuant/core/utility/macros.hpp>
 
diff --git a/tests/integration/eval/calc_info.hpp b/tests/integration/eval/calc_info.hpp
index cc05f3ac7b..835059d6bf 100644
--- a/tests/integration/eval/calc_info.hpp
+++ b/tests/integration/eval/calc_info.hpp
@@ -10,7 +10,7 @@
 
 #include <SeQuant/core/container.hpp>
 #include <SeQuant/core/eval/cache_manager.hpp>
-#include <SeQuant/core/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
 #include <SeQuant/core/optimize.hpp>
 #include <SeQuant/core/utility/macros.hpp>
 #include <SeQuant/domain/mbpt/spin.hpp>
diff --git a/tests/integration/eval/eval_utils.hpp b/tests/integration/eval/eval_utils.hpp
index 944be4c149..3849c9cb3a 100644
--- a/tests/integration/eval/eval_utils.hpp
+++ b/tests/integration/eval/eval_utils.hpp
@@ -6,7 +6,7 @@
 #define SEQUANT_EVAL_EVAL_UTILS_HPP
 
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/utility/macros.hpp>
 #include <chrono>
 #include <fstream>
diff --git a/tests/integration/eval/ta/data_world_ta.hpp b/tests/integration/eval/ta/data_world_ta.hpp
index 6a6986e61e..3be6f2f070 100644
--- a/tests/integration/eval/ta/data_world_ta.hpp
+++ b/tests/integration/eval/ta/data_world_ta.hpp
@@ -9,7 +9,7 @@
 #include <eval_utils.hpp>
 
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval/result.hpp>
+#include <SeQuant/core/eval/backends/tiledarray/result.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/utility/macros.hpp>
 
diff --git a/tests/integration/eval/ta/scf_ta.hpp b/tests/integration/eval/ta/scf_ta.hpp
index 0a43b9efb4..e2d554afff 100644
--- a/tests/integration/eval/ta/scf_ta.hpp
+++ b/tests/integration/eval/ta/scf_ta.hpp
@@ -6,6 +6,7 @@
 #define SEQUANT_EVAL_SCF_TA_HPP
 
 #include <SeQuant/core/container.hpp>
+#include <SeQuant/core/eval/backends/tiledarray/eval_expr.hpp>
 #include <SeQuant/core/eval/cache_manager.hpp>
 #include <SeQuant/core/eval/eval.hpp>
 #include <SeQuant/core/parse.hpp>
diff --git a/tests/integration/srcc.cpp b/tests/integration/srcc.cpp
index 5175a2135d..8a57eb0692 100644
--- a/tests/integration/srcc.cpp
+++ b/tests/integration/srcc.cpp
@@ -197,23 +197,16 @@ class compute_cceqvec {
           eqvec[R] = ex<Tensor>(Tensor{L"S", bra(kixs), ket(bixs)}) * eqvec[R];
           eqvec[R] = expand(eqvec[R]);
 
-          // apply normalization and rescaling factors
-          rational combined_factor;
-          if (ext_idxs.size() <= 2) {
-            combined_factor = rational(1, factorial(ext_idxs.size()));
-          } else {
-            auto fact_n = factorial(ext_idxs.size());
-            combined_factor = rational(
-                1, fact_n - 1);  // this is (1/fact_n) * (fact_n/(fact_n-1))
-          }
-          eqvec[R] = ex<Constant>(combined_factor) * eqvec[R];
+          // apply normalization factor
+          auto const nf = rational(1, factorial(ext_idxs.size()));
+          eqvec[R] = ex<Constant>(nf) * eqvec[R];
           simplify(eqvec[R]);
 
           // WK_biorthogonalization_filter method removes the redundancy caused
           // by biorthogonal transformation and gives the most compact set of
           // equations. However, we need to restore the effects of those deleted
           // terms. So, after evaluate_symm call in sequant evaluation scope, we
-          // need to call evaluate_biorthogonal_nns_project.
+          // need to call biorthogonal_nns_project_<backend>.
 
           std::wcout << "biorthogonal spin-free R" << R << "(expS" << N
                      << ") has " << eqvec[R]->size() << " terms:" << std::endl;
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index ff65ed4037..0c341696e1 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -91,22 +91,24 @@ if (SEQUANT_SKIP_LONG_TESTS)
     target_compile_definitions(unit_tests-sequant PRIVATE SEQUANT_SKIP_LONG_TESTS=1)
 endif()
 
-if (TARGET tiledarray)
+if (SEQUANT_HAS_EVAL)
+    set(sq_ut_eval_src "test_cache_manager.cpp")
+    if (SEQUANT_HAS_TILEDARRAY)
+        list(APPEND sq_ut_eval_src "test_eval_ta.cpp")
+    endif()
+    if (SEQUANT_HAS_BTAS)
+        list(APPEND sq_ut_eval_src "test_eval_btas.cpp")
+    endif()
     target_sources(unit_tests-sequant
         PRIVATE
-            "test_cache_manager.cpp"
-            "test_eval_btas.cpp"
-            "test_eval_ta.cpp"
+            ${sq_ut_eval_src}
     )
     set_source_files_properties(
-        "test_eval_btas.cpp"
-        "test_eval_ta.cpp"
+        ${sq_ut_eval_src}
         "test_main.cpp"
         PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON
     )
-    target_link_libraries(unit_tests-sequant PRIVATE tiledarray)
-    target_compile_definitions(unit_tests-sequant PRIVATE SEQUANT_HAS_TILEDARRAY)
-endif (TARGET tiledarray)
+endif (SEQUANT_HAS_EVAL)
 
 target_link_libraries(unit_tests-sequant PRIVATE SeQuant::SeQuant Catch2::Catch2 dtl::dtl)
 
diff --git a/tests/unit/test_eval_btas.cpp b/tests/unit/test_eval_btas.cpp
index 1d0db339ad..3be911859c 100644
--- a/tests/unit/test_eval_btas.cpp
+++ b/tests/unit/test_eval_btas.cpp
@@ -3,13 +3,16 @@
 
 #include "catch2_sequant.hpp"
 
+#include <SeQuant/core/eval/backends/btas/eval_expr.hpp>
+#include <SeQuant/core/eval/backends/btas/result.hpp>
 #include <SeQuant/core/eval/eval.hpp>
-#include <SeQuant/core/eval/result.hpp>
 #include <SeQuant/core/optimize.hpp>
 #include <SeQuant/core/parse.hpp>
+#include <SeQuant/domain/mbpt/biorthogonalization.hpp>
 
 #include <btas/btas.h>
 #include <btas/tensor_func.h>
+
 #include <boost/regex.hpp>
 
 #include <string>
@@ -178,7 +181,9 @@ TEST_CASE("eval_with_btas", "[eval_btas]") {
 
   using BTensorD = btas::Tensor<double>;
 
-  auto norm = [](BTensorD const& tnsr) { return btas::norm(tnsr); };
+  auto norm = [](BTensorD const& tnsr) {
+    return std::sqrt(btas::dotc(tnsr, tnsr));
+  };
 
   std::srand(2023);
   const size_t nocc = 2, nvirt = 20;
@@ -209,9 +214,9 @@ TEST_CASE("eval_with_btas", "[eval_btas]") {
   auto eval_biorthogonal_nns_project =
       [&yield_](sequant::ExprPtr const& expr,
                 container::svector<long> const& target_labels) {
-        return evaluate_biorthogonal_nns_project(eval_node(expr), target_labels,
-                                                 yield_)
-            ->get<BTensorD>();
+        auto result = evaluate(eval_node(expr), target_labels, yield_);
+        return biorthogonal_nns_project(
+            result->get<BTensorD>(), eval_node(expr)->as_tensor().bra_rank());
       };
 
   auto parse_antisymm = [](auto const& xpr) {
@@ -401,14 +406,13 @@ TEST_CASE("eval_with_btas", "[eval_btas]") {
     BTensorD perm_sum{r2.range()};
     perm_sum.fill(0);
 
-    perm_sum += r2;
     perm_sum += BTensorD{permute(r2, {0, 1, 2, 3, 5, 4})};
     perm_sum += BTensorD{permute(r2, {0, 1, 2, 4, 3, 5})};
     perm_sum += BTensorD{permute(r2, {0, 1, 2, 4, 5, 3})};
     perm_sum += BTensorD{permute(r2, {0, 1, 2, 5, 3, 4})};
     perm_sum += BTensorD{permute(r2, {0, 1, 2, 5, 4, 3})};
 
-    btas::scal(1.0 / 6.0, perm_sum);
+    btas::scal(1.0 / 5.0, perm_sum);
     man2 -= perm_sum;
     REQUIRE(norm(eval2) == Catch::Approx(norm(man2)));
 
diff --git a/tests/unit/test_eval_expr.cpp b/tests/unit/test_eval_expr.cpp
index 02116fa704..48fef17f35 100644
--- a/tests/unit/test_eval_expr.cpp
+++ b/tests/unit/test_eval_expr.cpp
@@ -5,7 +5,7 @@
 #include <SeQuant/core/attr.hpp>
 #include <SeQuant/core/container.hpp>
 #include <SeQuant/core/context.hpp>
-#include <SeQuant/core/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/index.hpp>
 #include <SeQuant/core/parse.hpp>
diff --git a/tests/unit/test_eval_node.cpp b/tests/unit/test_eval_node.cpp
index fcd1137134..84b8290a93 100644
--- a/tests/unit/test_eval_node.cpp
+++ b/tests/unit/test_eval_node.cpp
@@ -6,8 +6,8 @@
 #include <SeQuant/core/attr.hpp>
 #include <SeQuant/core/binary_node.hpp>
 #include <SeQuant/core/container.hpp>
-#include <SeQuant/core/eval_expr.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/parse.hpp>
 #include <SeQuant/core/rational.hpp>
diff --git a/tests/unit/test_eval_ta.cpp b/tests/unit/test_eval_ta.cpp
index 721034a6a3..87d4432744 100644
--- a/tests/unit/test_eval_ta.cpp
+++ b/tests/unit/test_eval_ta.cpp
@@ -4,10 +4,13 @@
 #include "catch2_sequant.hpp"
 
 #include <SeQuant/core/context.hpp>
+#include <SeQuant/core/eval/backends/tiledarray/eval_expr.hpp>
+#include <SeQuant/core/eval/backends/tiledarray/result.hpp>
 #include <SeQuant/core/eval/eval.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/parse.hpp>
 #include <SeQuant/core/utility/macros.hpp>
+#include <SeQuant/domain/mbpt/biorthogonalization.hpp>
 #include <SeQuant/domain/mbpt/convention.hpp>
 
 #include <tiledarray.h>
@@ -324,9 +327,9 @@ TEST_CASE("eval_with_tiledarray", "[eval]") {
     auto eval_biorthogonal_nns_project = [&yield_](
                                              sequant::ExprPtr const& expr,
                                              std::string const& target_labels) {
-      return evaluate_biorthogonal_nns_project(eval_node(expr), target_labels,
-                                               yield_)
-          ->get<TA::TArrayD>();
+      auto result = evaluate(eval_node(expr), target_labels, yield_);
+      return sequant::biorthogonal_nns_project(
+          result->get<TA::TArrayD>(), eval_node(expr)->as_tensor().bra_rank());
     };
 
     SECTION("summation") {
@@ -510,7 +513,7 @@ TEST_CASE("eval_with_tiledarray", "[eval]") {
     }
 
     SECTION("Biorthogonal Cleanup") {
-      // low-rank residuals: skip cleanup
+      // low-rank residuals: skip nns
       auto expr1 = parse_antisymm(L"R_{a1, a2}^{i1, i2}");
       auto eval1 = eval_biorthogonal_nns_project(expr1, "a_1,a_2,i_1,i_2");
       auto const& arr1 = yield(L"R{a1,a2;i1,i2}");
@@ -524,8 +527,8 @@ TEST_CASE("eval_with_tiledarray", "[eval]") {
       REQUIRE(norm(zero1) == Catch::Approx(0).margin(
                                  100 * std::numeric_limits<double>::epsilon()));
 
-      // high-rank residuals: cleanup applies:
-      // result = identity - (1/ket_rank!) * sum_of_ket_permutations
+      // for rank 3 residual, nns applies:
+      // result = NNS_P * sum_of_ket_permutations
       auto expr2 = parse_antisymm(L"R_{a1, a2, a3}^{i1, i2, i3}");
       auto eval2 =
           eval_biorthogonal_nns_project(expr2, "a_1,a_2,a_3,i_1,i_2,i_3");
@@ -534,15 +537,56 @@ TEST_CASE("eval_with_tiledarray", "[eval]") {
       auto man2 = TArrayD{};
       man2("0,1,2,3,4,5") =
           arr2("0,1,2,3,4,5") -
-          (1.0 / 6.0) *
-              (arr2("0,1,2,3,4,5") + arr2("0,1,2,3,5,4") + arr2("0,1,2,4,3,5") +
-               arr2("0,1,2,4,5,3") + arr2("0,1,2,5,3,4") + arr2("0,1,2,5,4,3"));
+          (1.0 / 5.0) *
+              (arr2("0,1,2,3,5,4") + arr2("0,1,2,4,3,5") + arr2("0,1,2,4,5,3") +
+               arr2("0,1,2,5,3,4") + arr2("0,1,2,5,4,3"));
 
       REQUIRE(norm(man2) == Catch::Approx(norm(eval2)));
       TArrayD zero2;
       zero2("0,1,2,3,4,5") = man2("0,1,2,3,4,5") - eval2("0,1,2,3,4,5");
-      REQUIRE(norm(zero1) == Catch::Approx(0).margin(
+      REQUIRE(norm(zero2) == Catch::Approx(0).margin(
                                  100 * std::numeric_limits<double>::epsilon()));
+
+      // for rank 4 residual, nns applies:
+      // result = NNS_P * sum_of_ket_permutations
+      auto expr3 = parse_antisymm(L"R_{a1, a2, a3, a4}^{i1, i2, i3, i4}");
+      auto eval3 = eval_biorthogonal_nns_project(
+          expr3, "a_1,a_2,a_3,a_4,i_1,i_2,i_3,i_4");
+      auto const& arr3 = yield(L"R{a1,a2,a3,a4;i1,i2,i3,i4}");
+
+      auto man3 = TArrayD{};
+      man3("0,1,2,3,4,5,6,7") = 1.0 * arr3("0,1,2,3,4,5,6,7") +
+                                -4.0 / 14.0 * arr3("0,1,2,3,4,5,7,6") +
+                                -4.0 / 14.0 * arr3("0,1,2,3,4,6,5,7") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,4,6,7,5") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,4,7,5,6") +
+                                -4.0 / 14.0 * arr3("0,1,2,3,4,7,6,5") +
+                                -4.0 / 14.0 * arr3("0,1,2,3,5,4,6,7") +
+                                2.0 / 14.0 * arr3("0,1,2,3,5,4,7,6") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,5,6,4,7") +
+                                2.0 / 14.0 * arr3("0,1,2,3,5,6,7,4") +
+                                2.0 / 14.0 * arr3("0,1,2,3,5,7,4,6") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,5,7,6,4") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,6,4,5,7") +
+                                2.0 / 14.0 * arr3("0,1,2,3,6,4,7,5") +
+                                -4.0 / 14.0 * arr3("0,1,2,3,6,5,4,7") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,6,5,7,4") +
+                                2.0 / 14.0 * arr3("0,1,2,3,6,7,4,5") +
+                                2.0 / 14.0 * arr3("0,1,2,3,6,7,5,4") +
+                                2.0 / 14.0 * arr3("0,1,2,3,7,4,5,6") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,7,4,6,5") +
+                                -1.0 / 14.0 * arr3("0,1,2,3,7,5,4,6") +
+                                -4.0 / 14.0 * arr3("0,1,2,3,7,5,6,4") +
+                                2.0 / 14.0 * arr3("0,1,2,3,7,6,4,5") +
+                                2.0 / 14.0 * arr3("0,1,2,3,7,6,5,4");
+
+      REQUIRE(norm(man3) == Catch::Approx(norm(eval3)));
+      TArrayD zero3;
+      zero3("0,1,2,3,4,5,6,7") =
+          man3("0,1,2,3,4,5,6,7") - eval3("0,1,2,3,4,5,6,7");
+      REQUIRE(norm(zero3) ==
+              Catch::Approx(0).margin(1000 *
+                                      std::numeric_limits<double>::epsilon()));
     }
 
     SECTION("Others") {
diff --git a/tests/unit/test_optimize.cpp b/tests/unit/test_optimize.cpp
index df122873e8..951e286aaf 100644
--- a/tests/unit/test_optimize.cpp
+++ b/tests/unit/test_optimize.cpp
@@ -4,8 +4,8 @@
 
 #include <SeQuant/core/algorithm.hpp>
 #include <SeQuant/core/attr.hpp>
-#include <SeQuant/core/eval_expr.hpp>
-#include <SeQuant/core/eval_node.hpp>
+#include <SeQuant/core/eval/eval_expr.hpp>
+#include <SeQuant/core/eval/eval_node.hpp>
 #include <SeQuant/core/expr.hpp>
 #include <SeQuant/core/index.hpp>
 #include <SeQuant/core/optimize.hpp>
diff --git a/tests/unit/test_spin.cpp b/tests/unit/test_spin.cpp
index e3aeb180c3..da85c53262 100644
--- a/tests/unit/test_spin.cpp
+++ b/tests/unit/test_spin.cpp
@@ -1090,8 +1090,8 @@ SECTION("Closed-shell spintrace CCSDT terms") {
             "g{a_1,a_2;a_4,a_5}:N-C-S * t{a_3,a_4,a_5;i_1,i_2,i_3}:N-C-S + 2 "
             "g{a_1,a_3;a_4,a_5}:N-C-S * t{a_2,a_4,a_5;i_1,i_3,i_2}:N-C-S"));
 
-    // the new efficient method, spintracing with partial expansion, then
-    // expanding by S_map ( this method is used in
+    // the new efficient method, does spintracing with partial expansion, then
+    // expanding by S_map (this method is used in
     // closed_shell_CC_spintrace_v2)
     auto result_2 = closed_shell_spintrace(
         input, {{L"i_1", L"a_1"}, {L"i_2", L"a_2"}, {L"i_3", L"a_3"}});
@@ -1124,19 +1124,13 @@ SECTION("Closed-shell spintrace CCSDT terms") {
             "g{a_1,a_3;a_4,a_5}:N-C-S * t{a_2,a_4,a_5;i_1,i_3,i_2}:N-C-S"));
   }
 
-  SECTION("ppl term in optimal") {  // results in 1 term
+  SECTION("most expensive terms in CCSDT") {  // results in 1 term
     const auto input = ex<Sum>(ExprPtrList{
         parse_expr(L"1/24 A{i_1,i_2,i_3;a_1,a_2,a_3} * "
                    L"g{a_1,a_2;a_4,a_5} * t{a_3,a_4,a_5;i_1,i_2,i_3}",
                    Symmetry::Antisymm)});
 
     auto result = closed_shell_CC_spintrace_v2(input);
-    // multiply the result by 6/5 to revert the rescaling factor
-    result *= ex<Constant>(rational{5, 6});
-
-    // There is a problem with casting a single term to Sum
-    // REQUIRE(result->size()== 1); // it needs to be checked
-
     REQUIRE_THAT(
         result,
         EquivalentTo(