Skip to content

Commit

Permalink
Replace scalar enumerable_thread_specific with parallel_reduce (#121)
Browse files Browse the repository at this point in the history
* Replace scalar enumerable_thread_specific with parallel_reduce

* Remove std::plus

* Remove comments
  • Loading branch information
zfergus committed Sep 10, 2024
1 parent a4e59a7 commit a34c034
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 54 deletions.
21 changes: 9 additions & 12 deletions src/ipc/collisions/collisions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <ipc/utils/local_to_global.hpp>

#include <tbb/parallel_for.h>
#include <tbb/parallel_reduce.h>
#include <tbb/parallel_sort.h>
#include <tbb/blocked_range.h>
#include <tbb/enumerable_thread_specific.h>
Expand Down Expand Up @@ -328,25 +329,21 @@ double Collisions::compute_minimum_distance(
const Eigen::MatrixXi& edges = mesh.edges();
const Eigen::MatrixXi& faces = mesh.faces();

tbb::enumerable_thread_specific<double> storage(
std::numeric_limits<double>::infinity());

tbb::parallel_for(
return tbb::parallel_reduce(
tbb::blocked_range<size_t>(0, size()),
[&](tbb::blocked_range<size_t> r) {
double& local_min_dist = storage.local();

std::numeric_limits<double>::infinity(),
[&](tbb::blocked_range<size_t> r, double partial_min_dist) -> double {
for (size_t i = r.begin(); i < r.end(); i++) {
const double dist = (*this)[i].compute_distance(
(*this)[i].dof(vertices, edges, faces));

if (dist < local_min_dist) {
local_min_dist = dist;
if (dist < partial_min_dist) {
partial_min_dist = dist;
}
}
});

return storage.combine([](double a, double b) { return std::min(a, b); });
return partial_min_dist;
},
[](double a, double b) { return std::min(a, b); });
}

// ============================================================================
Expand Down
18 changes: 7 additions & 11 deletions src/ipc/implicits/plane.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
#include <ipc/distance/point_plane.hpp>
#include <ipc/ccd/point_static_plane.hpp>

#include <tbb/parallel_for.h>
#include <tbb/parallel_reduce.h>
#include <tbb/blocked_range.h>
#include <tbb/enumerable_thread_specific.h>

namespace ipc {

Expand Down Expand Up @@ -98,13 +97,10 @@ double compute_point_plane_collision_free_stepsize(
assert(plane_normals.rows() == n_planes);
assert(points_t0.rows() == points_t1.rows());

tbb::enumerable_thread_specific<double> storage(1);

tbb::parallel_for(
const double earliest_toi = tbb::parallel_reduce(
tbb::blocked_range<size_t>(0, points_t0.rows()),
[&](tbb::blocked_range<size_t> r) {
double& earliest_toi = storage.local();

/*inital_step_size=*/1.0,
[&](tbb::blocked_range<size_t> r, double earliest_toi) {
for (size_t vi = r.begin(); vi < r.end(); vi++) {
for (size_t pi = 0; pi < n_planes; pi++) {
if (!can_collide(vi, pi)) {
Expand All @@ -126,10 +122,10 @@ double compute_point_plane_collision_free_stepsize(
}
}
}
});
return earliest_toi;
},
[&](double a, double b) { return std::min(a, b); });

const double earliest_toi =
storage.combine([](double a, double b) { return std::min(a, b); });
assert(earliest_toi >= 0 && earliest_toi <= 1.0);
return earliest_toi;
}
Expand Down
2 changes: 1 addition & 1 deletion src/ipc/potentials/friction_potential.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Eigen::VectorXd FrictionPotential::force(
const Eigen::MatrixXi& edges = mesh.edges();
const Eigen::MatrixXi& faces = mesh.faces();

tbb::enumerable_thread_specific<Eigen::VectorXd> storage(
tbb::combinable<Eigen::VectorXd> storage(
Eigen::VectorXd::Zero(velocities.size()));

tbb::parallel_for(
Expand Down
63 changes: 33 additions & 30 deletions src/ipc/potentials/potential.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

#include <ipc/utils/local_to_global.hpp>

#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/combinable.h>
#include <tbb/enumerable_thread_specific.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_reduce.h>

namespace ipc {

Expand All @@ -18,25 +20,18 @@ double Potential<TCollisions>::operator()(
{
assert(X.rows() == mesh.num_vertices());

if (collisions.empty()) {
return 0;
}

tbb::enumerable_thread_specific<double> storage(0);

tbb::parallel_for(
tbb::blocked_range<size_t>(size_t(0), collisions.size()),
[&](const tbb::blocked_range<size_t>& r) {
auto& local_potential = storage.local();
return tbb::parallel_reduce(
tbb::blocked_range<size_t>(size_t(0), collisions.size()), 0.0,
[&](const tbb::blocked_range<size_t>& r, double partial_sum) {
for (size_t i = r.begin(); i < r.end(); i++) {
// Quadrature weight is premultiplied by local potential
local_potential += (*this)(
partial_sum += (*this)(
collisions[i],
collisions[i].dof(X, mesh.edges(), mesh.faces()));
}
});

return storage.combine([](double a, double b) { return a + b; });
return partial_sum;
},
[](double a, double b) { return a + b; });
}

template <class TCollisions>
Expand All @@ -53,14 +48,11 @@ Eigen::VectorXd Potential<TCollisions>::gradient(

const int dim = X.cols();

tbb::enumerable_thread_specific<Eigen::VectorXd> storage(
Eigen::VectorXd::Zero(X.size()));
tbb::combinable<Eigen::VectorXd> grad(Eigen::VectorXd::Zero(X.size()));

tbb::parallel_for(
tbb::blocked_range<size_t>(size_t(0), collisions.size()),
[&](const tbb::blocked_range<size_t>& r) {
auto& global_grad = storage.local();

for (size_t i = r.begin(); i < r.end(); i++) {
const TCollision& collision = collisions[i];

Expand All @@ -71,12 +63,13 @@ Eigen::VectorXd Potential<TCollisions>::gradient(
collision.vertex_ids(mesh.edges(), mesh.faces());

local_gradient_to_global_gradient(
local_grad, vids, dim, global_grad);
local_grad, vids, dim, grad.local());
}
});

return storage.combine([](const Eigen::VectorXd& a,
const Eigen::VectorXd& b) { return a + b; });
return grad.combine([](const Eigen::VectorXd& a, const Eigen::VectorXd& b) {
return a + b;
});
}

template <class TCollisions>
Expand Down Expand Up @@ -121,14 +114,24 @@ Eigen::SparseMatrix<double> Potential<TCollisions>::hessian(
}
});

Eigen::SparseMatrix<double> hess(ndof, ndof);
for (const auto& local_hess_triplets : storage) {
Eigen::SparseMatrix<double> local_hess(ndof, ndof);
local_hess.setFromTriplets(
local_hess_triplets.begin(), local_hess_triplets.end());
hess += local_hess;
}
return hess;
// Combine the local hessians
tbb::combinable<Eigen::SparseMatrix<double>> hess(
Eigen::SparseMatrix<double>(ndof, ndof));

tbb::parallel_for(
tbb::blocked_range<decltype(storage)::iterator>(
storage.begin(), storage.end()),
[&](const tbb::blocked_range<decltype(storage)::iterator>& r) {
for (auto it = r.begin(); it != r.end(); ++it) {
Eigen::SparseMatrix<double> local_hess(ndof, ndof);
local_hess.setFromTriplets(it->begin(), it->end());
hess.local() += local_hess;
}
});

return hess.combine(
[](const Eigen::SparseMatrix<double>& a,
const Eigen::SparseMatrix<double>& b) { return a + b; });
}

} // namespace ipc

0 comments on commit a34c034

Please sign in to comment.