diff --git a/demos/FiniteVolume/advection_2d.cpp b/demos/FiniteVolume/advection_2d.cpp index c07d2296b..8ed0d9301 100644 --- a/demos/FiniteVolume/advection_2d.cpp +++ b/demos/FiniteVolume/advection_2d.cpp @@ -16,6 +16,9 @@ #include #include +#include +#include + #include namespace fs = std::filesystem; @@ -149,18 +152,24 @@ void save(const fs::path& path, const std::string& filename, const Field& u, con auto mesh = u.mesh(); auto level_ = samurai::make_scalar_field("level", mesh); + auto domain_ = samurai::make_scalar_field("domain", mesh); + if (!fs::exists(path)) { fs::create_directory(path); } + int mrank = 0; + samurai::for_each_cell(mesh, [&](const auto& cell) { - level_[cell] = cell.level; + level_[cell] = cell.level; + domain_[cell] = mrank; }); #ifdef SAMURAI_WITH_MPI mpi::communicator world; + mrank = world.rank(); samurai::save(path, fmt::format("{}_size_{}{}", filename, world.size(), suffix), mesh, u, level_); #else samurai::save(path, fmt::format("{}{}", filename, suffix), mesh, u, level_); @@ -194,9 +203,10 @@ int main(int argc, char* argv[]) bool correction = false; // Output parameters - fs::path path = fs::current_path(); - std::string filename = "FV_advection_2d"; - std::size_t nfiles = 1; + fs::path path = fs::current_path(); + std::string filename = "FV_advection_2d"; + std::size_t nfiles = 1; + std::size_t nt_loadbalance = 10; // nombre d'iteration entre les equilibrages app.add_option("--min-corner", min_corner, "The min corner of the box")->capture_default_str()->group("Simulation parameters"); app.add_option("--max-corner", max_corner, "The max corner of the box")->capture_default_str()->group("Simulation parameters"); @@ -207,6 +217,9 @@ int main(int argc, char* argv[]) app.add_option("--restart-file", restart_file, "Restart file")->capture_default_str()->group("Simulation parameters"); app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); +#ifdef SAMURAI_WITH_MPI + app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); +#endif app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") ->capture_default_str() ->group("Multiresolution"); @@ -251,8 +264,19 @@ int main(int argc, char* argv[]) std::size_t nsave = 1; std::size_t nt = 0; +#ifdef SAMURAI_WITH_MPI + Load_balancing::Diffusion balancer; +#endif + while (t != Tf) { +#ifdef SAMURAI_WITH_MPI + if (((nt % nt_loadbalance == 0) && nt > 1) || nt == 1) + { + balancer.load_balance(mesh, u); + } +#endif + MRadaptation(mr_epsilon, mr_regularity); t += dt; diff --git a/demos/FiniteVolume/burgers.cpp b/demos/FiniteVolume/burgers.cpp index 8ea7c097d..10ed12e92 100644 --- a/demos/FiniteVolume/burgers.cpp +++ b/demos/FiniteVolume/burgers.cpp @@ -8,6 +8,9 @@ #include #include +#include +#include + #include namespace fs = std::filesystem; @@ -85,6 +88,10 @@ int main_dim(int argc, char* argv[]) std::string filename = "burgers_" + std::to_string(dim) + "D"; std::size_t nfiles = 50; +#if SAMURAI_WITH_MPI + std::size_t nt_loadbalance = 10; // nombre d'iteration entre les equilibrages +#endif + app.add_option("--left", left_box, "The left border of the box")->capture_default_str()->group("Simulation parameters"); app.add_option("--right", right_box, "The right border of the box")->capture_default_str()->group("Simulation parameters"); app.add_option("--init-sol", init_sol, "Initial solution: hat/linear/bands")->capture_default_str()->group("Simulation parameters"); @@ -120,6 +127,10 @@ int main_dim(int argc, char* argv[]) Box box(box_corner1, box_corner2); samurai::MRMesh mesh; +#ifdef SAMURAI_WITH_MPI + Load_balancing::Diffusion balancer; +#endif + auto u = samurai::make_vector_field("u", mesh); auto u1 = samurai::make_vector_field("u1", mesh); auto u2 = samurai::make_vector_field("u2", mesh); @@ -247,6 +258,13 @@ int main_dim(int argc, char* argv[]) while (t != Tf) { +#ifdef SAMURAI_WITH_MPI + if ((nt % nt_loadbalance == 0) && nt > 1) + { + balancer.load_balance(mesh, u); + } +#endif + // Move to next timestep t += dt; if (t > Tf) diff --git a/demos/FiniteVolume/linear_convection.cpp b/demos/FiniteVolume/linear_convection.cpp index aca48e50a..aa9fcd77c 100644 --- a/demos/FiniteVolume/linear_convection.cpp +++ b/demos/FiniteVolume/linear_convection.cpp @@ -8,6 +8,9 @@ #include #include +#include +#include + #include namespace fs = std::filesystem; @@ -46,7 +49,6 @@ int main(int argc, char* argv[]) //--------------------// // Program parameters // //--------------------// - // Simulation parameters double left_box = -1; double right_box = 1; @@ -68,7 +70,9 @@ int main(int argc, char* argv[]) fs::path path = fs::current_path(); std::string filename = "linear_convection_" + std::to_string(dim) + "D"; std::size_t nfiles = 0; - +#ifdef SAMURAI_WITH_MPI + std::size_t nt_loadbalance = 10; +#endif app.add_option("--left", left_box, "The left border of the box")->capture_default_str()->group("Simulation parameters"); app.add_option("--right", right_box, "The right border of the box")->capture_default_str()->group("Simulation parameters"); app.add_option("--Ti", t, "Initial time")->capture_default_str()->group("Simulation parameters"); @@ -78,6 +82,9 @@ int main(int argc, char* argv[]) app.add_option("--cfl", cfl, "The CFL")->capture_default_str()->group("Simulation parameters"); app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); +#ifdef SAMURAI_WITH_MPI + app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); +#endif app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") ->capture_default_str() ->group("Multiresolution"); @@ -150,6 +157,10 @@ int main(int argc, char* argv[]) } auto conv = samurai::make_convection_weno5(velocity); +#ifdef SAMURAI_WITH_MPI + Load_balancing::Diffusion balancer; +#endif + //--------------------// // Time iteration // //--------------------// @@ -174,6 +185,12 @@ int main(int argc, char* argv[]) } while (t != Tf) { +#ifdef SAMURAI_WITH_MPI + if (nt % nt_loadbalance == 0 && nt > 1) + { + balancer.load_balance(mesh, u); + } +#endif // Move to next timestep t += dt; if (t > Tf) @@ -182,7 +199,6 @@ int main(int argc, char* argv[]) t = Tf; } std::cout << fmt::format("iteration {}: t = {:.2f}, dt = {}", nt++, t, dt) << std::flush; - // Mesh adaptation MRadaptation(mr_epsilon, mr_regularity); samurai::update_ghost_mr(u); diff --git a/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp b/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp index d45c07f86..24183c067 100644 --- a/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp +++ b/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp @@ -4,44 +4,32 @@ #include #include -#include +#include +#include #include #include #include -#include +#include +#include #include -#include -#include "boundary_conditions.hpp" -#include "prediction_map_2d.hpp" - -#include "utils_lbm_mr_2d.hpp" - -/// Timer used in tic & toc -auto tic_timer = std::chrono::high_resolution_clock::now(); - -/// Launching the timer -void tic() -{ - tic_timer = std::chrono::high_resolution_clock::now(); -} - -/// Stopping the timer and returning the duration in seconds -double toc() -{ - const auto toc_timer = std::chrono::high_resolution_clock::now(); - const std::chrono::duration time_span = toc_timer - tic_timer; - return time_span.count(); -} +#include +#include +#include +#include +#include +#include +#include +#include double gm = 1.4; // Gas constant template -auto init_f(samurai::MROMesh& mesh, int config, double lambda) +auto init_f(samurai::MRMesh& mesh, int config, double lambda) { constexpr std::size_t nvel = 16; - using mesh_id_t = typename samurai::MROMesh::mesh_id_t; + using mesh_id_t = typename samurai::MRMesh::mesh_id_t; auto f = samurai::make_vector_field("f", mesh); f.fill(0); @@ -192,79 +180,8 @@ auto init_f(samurai::MROMesh& mesh, int config, double lambda) return f; } -template -auto compute_prediction(std::size_t min_level, std::size_t max_level) -{ - coord_index_t i = 0, j = 0; - std::vector>> data(max_level - min_level + 1); - - auto rotation_of_pi_over_two = [](int alpha, int k, int h) - { - // Returns the rotation of (k, h) of an angle alpha * pi / 2. - // All the operations are performed on integer, to be exact - int cosinus = static_cast(std::round(std::cos(alpha * M_PI / 2.))); - int sinus = static_cast(std::round(std::sin(alpha * M_PI / 2.))); - - return std::pair(cosinus * k - sinus * h, sinus * k + cosinus * h); - }; - - // Transforms the coordinates to apply the rotation - auto tau = [](int delta, int k) - { - // The case in which delta = 0 is rather exceptional - if (delta == 0) - { - return k; - } - else - { - auto tmp = (1 << (delta - 1)); - return static_cast((k < tmp) ? (k - tmp) : (k - tmp + 1)); - } - }; - - auto tau_inverse = [](int delta, int k) - { - if (delta == 0) - { - return k; - } - else - { - auto tmp = (1 << (delta - 1)); - return static_cast((k < 0) ? (k + tmp) : (k + tmp - 1)); - } - }; - - for (std::size_t k = 0; k < max_level - min_level + 1; ++k) - { - int size = (1 << k); - data[k].resize(4); - - for (int alpha = 0; alpha < 4; ++alpha) - { - for (int l = 0; l < size; ++l) - { - // The reference direction from which the other ones are - // computed is that of (1, 0) - auto rotated_in = rotation_of_pi_over_two(alpha, tau(k, i * size - 1), tau(k, j * size + l)); - auto rotated_out = rotation_of_pi_over_two(alpha, tau(k, (i + 1) * size - 1), tau(k, j * size + l)); - - // For the cells inside the domain, we can already combine - // entering and exiting fluxes and we have a compensation of - // many cells. - data[k][alpha] += (prediction(k, tau_inverse(k, rotated_in.first), tau_inverse(k, rotated_in.second)) - - prediction(k, tau_inverse(k, rotated_out.first), tau_inverse(k, rotated_out.second))); - } - } - } - return data; -} - -template +template void one_time_step(Field& f, - Func&& update_bc_for_level, - const pred& pred_coeff, const double lambda, const double sq_rho, const double sxy_rho, @@ -276,189 +193,158 @@ void one_time_step(Field& f, constexpr std::size_t nvel = Field::n_comp; using coord_index_t = typename Field::interval_t::coord_index_t; - auto mesh = f.mesh(); - using mesh_id_t = typename decltype(mesh)::mesh_id_t; + // std::size_t max_level = mesh.max_level(); - auto min_level = mesh.min_level(); - auto max_level = mesh.max_level(); - - samurai::update_ghost_mr(f, std::forward(update_bc_for_level)); - samurai::update_overleaves_mr(f, std::forward(update_bc_for_level)); + samurai::times::timers.start("ugm-step"); + samurai::update_ghost_mr(f); + samurai::times::timers.stop("ugm-step"); + samurai::times::timers.start("field-step"); Field new_f{"new_f", mesh}; new_f.array().fill(0.); - Field fluxes{"fluxes", mesh}; // This stored the fluxes computed at the level of the overleaves - fluxes.array().fill(0.); Field advected{"advected", mesh}; advected.array().fill(0.); + samurai::times::timers.stop("field-step"); - for (std::size_t level = min_level; level <= max_level; ++level) - { - auto leaves = samurai::intersection(mesh[mesh_id_t::cells][level], mesh[mesh_id_t::cells][level]); - - if (level == max_level) - { // Advection at the finest level - - leaves( - [&](auto& interval, auto& index) - { - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y - - // We enforce a bounce-back - for (int scheme_n = 0; scheme_n < 4; ++scheme_n) - { // We have 4 schemes - advected(0 + 4 * scheme_n, level, k, h) = f(0 + 4 * scheme_n, level, k - 1, h); - advected(1 + 4 * scheme_n, level, k, h) = f(1 + 4 * scheme_n, level, k, h - 1); - advected(2 + 4 * scheme_n, level, k, h) = f(2 + 4 * scheme_n, level, k + 1, h); - advected(3 + 4 * scheme_n, level, k, h) = f(3 + 4 * scheme_n, level, k, h + 1); - } - }); - } - else // Advection at the coarse levels using the overleaves + samurai::times::timers.start("lbm-step"); + samurai::for_each_interval( + mesh[mesh_id_t::cells], + [&](std::size_t level, auto& i, auto& index) { - auto lev_p_1 = level + 1; - std::size_t j = max_level - (lev_p_1); - double coeff = 1. / (1 << (2 * j)); // ATTENTION A LA DIMENSION 2 !!!! - - leaves.on(level + 1)([&](auto& interval, auto& index) { // This are overleaves - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y + auto j = index[0]; + // auto jump = max_level - level; + // double coef = 1. / (1 << (dim * jump)); + for (std::size_t scheme_n = 0; scheme_n < 4; ++scheme_n) + { // We have 4 schemes + advected(0 + 4 * scheme_n, level, i, j) = f(0 + 4 * scheme_n, level, i - 1, j); + advected(1 + 4 * scheme_n, level, i, j) = f(1 + 4 * scheme_n, level, i, j - 1); + advected(2 + 4 * scheme_n, level, i, j) = f(2 + 4 * scheme_n, level, i + 1, j); + advected(3 + 4 * scheme_n, level, i, j) = f(3 + 4 * scheme_n, level, i, j + 1); + + // advected( + // 0 + 4 * scheme_n, + // level, + // i, + // j) = f(0 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 0 + 4 * scheme_n, level, i - 1, j, jump, {(1 << jump) - 1, (1 << jump)}, {0, (1 + // << jump)}) + // - coef * samurai::portion(f, 0 + 4 * scheme_n, level, i, j, jump, {(1 << jump) - 1, (1 << jump)}, {0, (1 << + // jump)}); + + // advected( + // 1 + 4 * scheme_n, + // level, + // i, + // j) = f(1 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 1 + 4 * scheme_n, level, i, j - 1, jump, {0, (1 << jump)}, {(1 << jump) - 1, (1 + // << jump)}) + // - coef * samurai::portion(f, 1 + 4 * scheme_n, level, i, j, jump, {0, (1 << jump)}, {(1 << jump) - 1, (1 << + // jump)}); + + // advected(2 + 4 * scheme_n, level, i, j) = f( + // 2 + 4 * scheme_n, + // level, + // i, + // j) = f(2 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 2 + 4 * scheme_n, level, i + 1, j, jump, {0, 1}, {0, (1 << jump)}) + // - coef * samurai::portion(f, 2 + 4 * scheme_n, level, i, j, jump, {0, 1}, {0, (1 << jump)}); + // advected(3 + 4 * scheme_n, + // level, + // i, + // j) = f(3 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 3 + 4 * scheme_n, level, i, j + 1, jump, {0, (1 << jump)}, {0, 1}) + // - coef * samurai::portion(f, 3 + 4 * scheme_n, level, i, j, jump, {0, (1 << jump)}, {0, 1}); + } - for (int scheme_n = 0; scheme_n < 4; ++scheme_n) - { - auto shift = 4 * scheme_n; - - for (std::size_t alpha = 0; alpha < 4; ++alpha) - { - for (auto& c : pred_coeff[j][alpha].coeff) - { - coord_index_t stencil_x, stencil_y; - std::tie(stencil_x, stencil_y) = c.first; - - fluxes(alpha + shift, lev_p_1, k, h) += c.second * f(alpha + shift, lev_p_1, k + stencil_x, h + stencil_y); - } - } - } - }); + // We compute the advected momenti + auto m0_0 = xt::eval(advected(0, level, i, j) + advected(1, level, i, j) + advected(2, level, i, j) + advected(3, level, i, j)); + auto m0_1 = xt::eval(lambda * (advected(0, level, i, j) - advected(2, level, i, j))); + auto m0_2 = xt::eval(lambda * (advected(1, level, i, j) - advected(3, level, i, j))); + auto m0_3 = xt::eval( + lambda * lambda * (advected(0, level, i, j) - advected(1, level, i, j) + advected(2, level, i, j) - advected(3, level, i, j))); + + auto m1_0 = xt::eval(advected(4, level, i, j) + advected(5, level, i, j) + advected(6, level, i, j) + advected(7, level, i, j)); + auto m1_1 = xt::eval(lambda * (advected(4, level, i, j) - advected(6, level, i, j))); + auto m1_2 = xt::eval(lambda * (advected(5, level, i, j) - advected(7, level, i, j))); + auto m1_3 = xt::eval( + lambda * lambda * (advected(4, level, i, j) - advected(5, level, i, j) + advected(6, level, i, j) - advected(7, level, i, j))); + + auto m2_0 = xt::eval(advected(8, level, i, j) + advected(9, level, i, j) + advected(10, level, i, j) + advected(11, level, i, j)); + auto m2_1 = xt::eval(lambda * (advected(8, level, i, j) - advected(10, level, i, j))); + auto m2_2 = xt::eval(lambda * (advected(9, level, i, j) - advected(11, level, i, j))); + auto m2_3 = xt::eval( + lambda * lambda + * (advected(8, level, i, j) - advected(9, level, i, j) + advected(10, level, i, j) - advected(11, level, i, j))); + + auto m3_0 = xt::eval(advected(12, level, i, j) + advected(13, level, i, j) + advected(14, level, i, j) + + advected(15, level, i, j)); + auto m3_1 = xt::eval(lambda * (advected(12, level, i, j) - advected(14, level, i, j))); + auto m3_2 = xt::eval(lambda * (advected(13, level, i, j) - advected(15, level, i, j))); + auto m3_3 = xt::eval( + lambda * lambda + * (advected(12, level, i, j) - advected(13, level, i, j) + advected(14, level, i, j) - advected(15, level, i, j))); + + m0_1 = (1 - sq_rho) * m0_1 + sq_rho * (m1_0); + m0_2 = (1 - sq_rho) * m0_2 + sq_rho * (m2_0); + m0_3 = (1 - sxy_rho) * m0_3; + + m1_1 = (1 - sq_q) * m1_1 + + sq_q * ((3. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (1. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (gm - 1.) * m3_0); + m1_2 = (1 - sq_q) * m1_2 + sq_q * (m1_0 * m2_0 / m0_0); + m1_3 = (1 - sxy_q) * m1_3; + + m2_1 = (1 - sq_q) * m2_1 + sq_q * (m1_0 * m2_0 / m0_0); + m2_2 = (1 - sq_q) * m2_2 + + sq_q * ((3. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (1. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (gm - 1.) * m3_0); + m2_3 = (1 - sxy_q) * m2_3; + + m3_1 = (1 - sq_e) * m3_1 + + sq_e + * (gm * (m1_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m1_0 * m1_0 * m1_0) / (m0_0 * m0_0) + - (gm / 2. - 1. / 2.) * (m1_0 * m2_0 * m2_0) / (m0_0 * m0_0)); + m3_2 = (1 - sq_e) * m3_2 + + sq_e + * (gm * (m2_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m2_0 * m2_0 * m2_0) / (m0_0 * m0_0) + - (gm / 2. - 1. / 2.) * (m2_0 * m1_0 * m1_0) / (m0_0 * m0_0)); + m3_3 = (1 - sxy_e) * m3_3; + + new_f(0, level, i, j) = .25 * m0_0 + .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; + new_f(1, level, i, j) = .25 * m0_0 + .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; + new_f(2, level, i, j) = .25 * m0_0 - .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; + new_f(3, level, i, j) = .25 * m0_0 - .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; + + new_f(4, level, i, j) = .25 * m1_0 + .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; + new_f(5, level, i, j) = .25 * m1_0 + .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; + new_f(6, level, i, j) = .25 * m1_0 - .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; + new_f(7, level, i, j) = .25 * m1_0 - .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; + + new_f(8, level, i, j) = .25 * m2_0 + .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; + new_f(9, level, i, j) = .25 * m2_0 + .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; + new_f(10, level, i, j) = .25 * m2_0 - .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; + new_f(11, level, i, j) = .25 * m2_0 - .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; + + new_f(12, level, i, j) = .25 * m3_0 + .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; + new_f(13, level, i, j) = .25 * m3_0 + .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; + new_f(14, level, i, j) = .25 * m3_0 - .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; + new_f(15, level, i, j) = .25 * m3_0 - .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; + }); - leaves( - [&](auto& interval, auto& index) - { - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y - - for (int alpha = 0; alpha < 16; ++alpha) - { - advected(alpha, level, k, h) = f(alpha, level, k, h) - + coeff * 0.25 - * (fluxes(alpha, lev_p_1, 2 * k, 2 * h) + fluxes(alpha, lev_p_1, 2 * k + 1, 2 * h) - + fluxes(alpha, lev_p_1, 2 * k, 2 * h + 1) - + fluxes(alpha, lev_p_1, 2 * k + 1, 2 * h + 1)); - } - }); - } + samurai::times::timers.stop("lbm-step"); - leaves( - [&](auto& interval, auto& index) - { - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y - - // We compute the advected momenti - auto m0_0 = xt::eval(advected(0, level, k, h) + advected(1, level, k, h) + advected(2, level, k, h) - + advected(3, level, k, h)); - auto m0_1 = xt::eval(lambda * (advected(0, level, k, h) - advected(2, level, k, h))); - auto m0_2 = xt::eval(lambda * (advected(1, level, k, h) - advected(3, level, k, h))); - auto m0_3 = xt::eval( - lambda * lambda - * (advected(0, level, k, h) - advected(1, level, k, h) + advected(2, level, k, h) - advected(3, level, k, h))); - - auto m1_0 = xt::eval(advected(4, level, k, h) + advected(5, level, k, h) + advected(6, level, k, h) - + advected(7, level, k, h)); - auto m1_1 = xt::eval(lambda * (advected(4, level, k, h) - advected(6, level, k, h))); - auto m1_2 = xt::eval(lambda * (advected(5, level, k, h) - advected(7, level, k, h))); - auto m1_3 = xt::eval( - lambda * lambda - * (advected(4, level, k, h) - advected(5, level, k, h) + advected(6, level, k, h) - advected(7, level, k, h))); - - auto m2_0 = xt::eval(advected(8, level, k, h) + advected(9, level, k, h) + advected(10, level, k, h) - + advected(11, level, k, h)); - auto m2_1 = xt::eval(lambda * (advected(8, level, k, h) - advected(10, level, k, h))); - auto m2_2 = xt::eval(lambda * (advected(9, level, k, h) - advected(11, level, k, h))); - auto m2_3 = xt::eval( - lambda * lambda - * (advected(8, level, k, h) - advected(9, level, k, h) + advected(10, level, k, h) - advected(11, level, k, h))); - - auto m3_0 = xt::eval(advected(12, level, k, h) + advected(13, level, k, h) + advected(14, level, k, h) - + advected(15, level, k, h)); - auto m3_1 = xt::eval(lambda * (advected(12, level, k, h) - advected(14, level, k, h))); - auto m3_2 = xt::eval(lambda * (advected(13, level, k, h) - advected(15, level, k, h))); - auto m3_3 = xt::eval( - lambda * lambda - * (advected(12, level, k, h) - advected(13, level, k, h) + advected(14, level, k, h) - advected(15, level, k, h))); - - m0_1 = (1 - sq_rho) * m0_1 + sq_rho * (m1_0); - m0_2 = (1 - sq_rho) * m0_2 + sq_rho * (m2_0); - m0_3 = (1 - sxy_rho) * m0_3; - - m1_1 = (1 - sq_q) * m1_1 - + sq_q * ((3. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (1. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (gm - 1.) * m3_0); - m1_2 = (1 - sq_q) * m1_2 + sq_q * (m1_0 * m2_0 / m0_0); - m1_3 = (1 - sxy_q) * m1_3; - - m2_1 = (1 - sq_q) * m2_1 + sq_q * (m1_0 * m2_0 / m0_0); - m2_2 = (1 - sq_q) * m2_2 - + sq_q * ((3. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (1. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (gm - 1.) * m3_0); - m2_3 = (1 - sxy_q) * m2_3; - - m3_1 = (1 - sq_e) * m3_1 - + sq_e - * (gm * (m1_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m1_0 * m1_0 * m1_0) / (m0_0 * m0_0) - - (gm / 2. - 1. / 2.) * (m1_0 * m2_0 * m2_0) / (m0_0 * m0_0)); - m3_2 = (1 - sq_e) * m3_2 - + sq_e - * (gm * (m2_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m2_0 * m2_0 * m2_0) / (m0_0 * m0_0) - - (gm / 2. - 1. / 2.) * (m2_0 * m1_0 * m1_0) / (m0_0 * m0_0)); - m3_3 = (1 - sxy_e) * m3_3; - - new_f(0, level, k, h) = .25 * m0_0 + .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; - new_f(1, level, k, h) = .25 * m0_0 + .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; - new_f(2, level, k, h) = .25 * m0_0 - .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; - new_f(3, level, k, h) = .25 * m0_0 - .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; - - new_f(4, level, k, h) = .25 * m1_0 + .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; - new_f(5, level, k, h) = .25 * m1_0 + .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; - new_f(6, level, k, h) = .25 * m1_0 - .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; - new_f(7, level, k, h) = .25 * m1_0 - .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; - - new_f(8, level, k, h) = .25 * m2_0 + .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; - new_f(9, level, k, h) = .25 * m2_0 + .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; - new_f(10, level, k, h) = .25 * m2_0 - .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; - new_f(11, level, k, h) = .25 * m2_0 - .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; - - new_f(12, level, k, h) = .25 * m3_0 + .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; - new_f(13, level, k, h) = .25 * m3_0 + .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; - new_f(14, level, k, h) = .25 * m3_0 - .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; - new_f(15, level, k, h) = .25 * m3_0 - .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; - }); - } std::swap(f.array(), new_f.array()); } template -void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "") +void save_solution(Field& f, double eps, std::size_t ite, std::size_t freq_out, std::string ext = "") { using value_t = typename Field::value_type; - auto mesh = f.mesh(); - using mesh_id_t = typename decltype(mesh)::mesh_id_t; - - std::size_t min_level = mesh.min_level(); - std::size_t max_level = mesh.max_level(); + if (ite % freq_out != 0) + { + return; + } - std::stringstream str; - str << "LBM_D2Q4_3_Euler_" << ext << "_lmin_" << min_level << "_lmax-" << max_level << "_eps-" << eps << "_ite-" << ite; + auto& mesh = f.mesh(); auto level = samurai::make_scalar_field("level", mesh); auto rho = samurai::make_scalar_field("rho", mesh); @@ -467,7 +353,7 @@ void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "") auto e = samurai::make_scalar_field("e", mesh); auto s = samurai::make_scalar_field("entropy", mesh); - samurai::for_each_cell(mesh[mesh_id_t::cells], + samurai::for_each_cell(mesh, [&](auto& cell) { level[cell] = cell.level; @@ -758,173 +644,109 @@ int main(int argc, char* argv[]) { samurai::initialize(argc, argv); - cxxopts::Options options("lbm_d2q4_3_Euler", - "Multi resolution for a D2Q4 LBM scheme for the " - "scalar advection equation"); - - options.add_options()("min_level", "minimum level", cxxopts::value()->default_value("2"))( - "max_level", - "maximum level", - cxxopts::value()->default_value("7"))("epsilon", "maximum level", cxxopts::value()->default_value("0.0001"))( - "ite", - "number of iteration", - cxxopts::value()->default_value("100"))("reg", "regularity", cxxopts::value()->default_value("0."))( - "config", - "Lax-Liu configuration", - cxxopts::value()->default_value("12"))("h, help", "Help"); - - try - { - auto result = options.parse(argc, argv); - - if (result.count("help")) - { - std::cout << options.help() << "\n"; - } - else - { - constexpr size_t dim = 2; - using Config = samurai::MROConfig; - - std::size_t min_level = result["min_level"].as(); - std::size_t max_level = result["max_level"].as(); - std::size_t total_nb_ite = result["ite"].as(); - double eps = result["epsilon"].as(); - double regularity = result["reg"].as(); - int configuration = result["config"].as(); + CLI::App app{"Multi resolution for a D2Q4 LBM scheme for the scalar advection equation"}; - // double lambda = 1./0.3; //4.0; - // double lambda = 1./0.2499; //4.0; - double lambda = 1. / 0.2; // This seems to work - double T = 0.25; // 0.3;//1.2; + std::size_t freq_out = 1; // frequency for output in iteration + std::size_t total_nb_ite = 100; + int configuration = 12; - double sq_rho = 1.9; - double sxy_rho = 1.; + // Multiresolution parameters + std::size_t min_level = 2; + std::size_t max_level = 9; + double mr_epsilon = 1.e-3; // Threshold used by multiresolution + double mr_regularity = 2.; // Regularity guess for multiresolution - double sq_q = 1.75; - double sxy_q = 1.; + std::size_t nt_loadbalance = 10; - double sq_e = 1.75; - double sxy_e = 1.; - - if (configuration == 12) - { - T = .25; - } - else - { - T = .3; - // T = 0.1; - } + app.add_option("--nb-ite", total_nb_ite, "number of iteration")->capture_default_str(); + app.add_option("--config", configuration, "Lax-Liu configuration")->capture_default_str(); + app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") + ->capture_default_str() + ->group("Multiresolution"); + app.add_option("--mr-reg", mr_regularity, "The regularity criteria used by the multiresolution to adapt the mesh") + ->capture_default_str() + ->group("Multiresolution"); + CLI11_PARSE(app, argc, argv); - // // This were the old test case (version 3) - // double sq = 1.75; - // double sxy = 2.; - // if (configuration == 12) { - // sxy = 1.5; - // T = 0.25; - // } - // else { - // sxy = 0.5; - // T = 0.3; - // } + constexpr size_t dim = 2; + using Config = samurai::MRConfig; - samurai::Box box({0, 0}, {1, 1}); - samurai::MROMesh mesh(box, min_level, max_level); - using mesh_id_t = typename samurai::MROMesh::mesh_id_t; - samurai::MROMesh mesh_ref{box, max_level, max_level}; + double lambda = 1. / 0.2; // This seems to work + double T = 0.25; // 0.3;//1.2; - using coord_index_t = typename samurai::MROMesh::coord_index_t; - auto pred_coeff = compute_prediction(min_level, max_level); + double sq_rho = 1.9; + double sxy_rho = 1.; - // Initialization - auto f = init_f(mesh, configuration, lambda); // Adaptive scheme - auto f_ref = init_f(mesh_ref, configuration, lambda); // Reference scheme + double sq_q = 1.75; + double sxy_q = 1.; - double dx = 1.0 / (1 << max_level); - double dt = dx / lambda; + double sq_e = 1.75; + double sxy_e = 1.; - std::size_t N = static_cast(T / dt); - - std::string dirname("./LaxLiu/"); - std::string suffix("_Config_" + std::to_string(configuration) + "_min_" + std::to_string(min_level) + "_max_" - + std::to_string(max_level) + "_eps_" + std::to_string(eps)); - - // std::ofstream stream_number_leaves; - // stream_number_leaves.open - // (dirname+"number_leaves"+suffix+".dat"); - - // std::ofstream stream_number_cells; - // stream_number_cells.open (dirname+"number_cells"+suffix+".dat"); - - // std::ofstream stream_time_scheme_ref; - // stream_time_scheme_ref.open - // (dirname+"time_scheme_ref"+suffix+".dat"); + if (configuration == 12) + { + T = .25; + } + else + { + T = .3; + // T = 0.1; + } - // std::ofstream stream_number_leaves_ref; - // stream_number_leaves_ref.open - // (dirname+"number_leaves_ref"+suffix+".dat"); + samurai::Box box({0, 0}, {1, 1}); + samurai::MRMesh mesh(box, min_level, max_level); - // std::ofstream stream_number_cells_ref; - // stream_number_cells_ref.open - // (dirname+"number_cells_ref"+suffix+".dat"); + // Initialization + auto f = init_f(mesh, configuration, lambda); // Adaptive scheme + samurai::make_bc>(f, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.); - int howoften = 1; // How often is the solution saved ? + double dx = 1.0 / (1 << max_level); + double dt = dx / lambda; - auto update_bc_for_level = [](auto& field, std::size_t level) - { - update_bc_D2Q4_3_Euler_constant_extension(field, level); - }; + std::size_t N = static_cast(T / dt); - auto MRadaptation = samurai::make_MRAdapt(f, update_bc_for_level); + // SFC_LoadBalancer_interval balancer; + // SFC_LoadBalancer_interval balancer; + // Load_balancing::Life balancer; + // Void_LoadBalancer balancer; + Diffusion_LoadBalancer_cell balancer; + // Diffusion_LoadBalancer_interval balancer; + // Load_balancing::Diffusion balancer; - for (std::size_t nb_ite = 0; nb_ite <= N; ++nb_ite) - { - std::cout << std::endl << " Iteration number = " << nb_ite << std::endl; + auto MRadaptation = samurai::make_MRAdapt(f); - if (max_level > min_level) - { - MRadaptation(eps, regularity); - } - - if (nb_ite == N) - { - auto error_density = compute_error(f, f_ref, update_bc_for_level); - std::cout << std::endl << "#### Epsilon = " << eps << " error = " << error_density << std::endl; - save_solution(f, eps, nb_ite, std::string("final_")); - save_reconstructed(f, f_ref, update_bc_for_level, eps, nb_ite); - } - - // if (nb_ite % howoften == 0) { - // save_solution(f , eps, nb_ite/howoften, - // std::string("Config_")+std::to_string(configuration)); // - // Before applying the scheme - // } + double t = 0.; + samurai::times::timers.start("tloop"); + for (std::size_t nt = 0; nt <= N && nt < total_nb_ite; ++nt) + { + std::cout << fmt::format("\n\t> Iteration {}, t: {}, dt: {} ", nt, t, dt) << std::endl; - one_time_step(f, update_bc_for_level, pred_coeff, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e); - one_time_step(f_ref, update_bc_for_level, pred_coeff, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e); + if (nt % nt_loadbalance == 0 && nt > 1) + { + samurai::times::timers.start("tloop.lb"); + balancer.load_balance(mesh, f); + samurai::times::timers.stop("tloop.lb"); + } - auto number_leaves = mesh.nb_cells(mesh_id_t::cells); - auto number_cells = mesh.nb_cells(); + samurai::times::timers.start("tloop.MRAdaptation"); + MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("tloop.MRAdaptation"); - samurai::statistics("D2Q4444_Euler_Lax_Liu", mesh); - // stream_number_leaves< nb_cells_finest_level; @@ -565,8 +565,12 @@ namespace samurai const auto& mesh = tag.mesh(); - const size_t start_level = old_ca.min_level(); - const size_t end_level = old_ca.max_level() + 1; + size_t start_level = old_ca.min_level(); + if (start_level == max_size + 1) + { + return old_ca; + } + size_t end_level = old_ca.max_level() + 1; // create the ensemble of cells to coarsen ca_type ca_add_m; diff --git a/include/samurai/boundary.hpp b/include/samurai/boundary.hpp index 26ec0fe9c..bf671ef63 100644 --- a/include/samurai/boundary.hpp +++ b/include/samurai/boundary.hpp @@ -10,6 +10,9 @@ namespace samurai using mesh_id_t = typename Mesh::mesh_id_t; auto& cells = mesh[mesh_id_t::cells][level]; + // auto& domain = mesh.domain(); + // REBASE FIXME : I don't know if we need to override domain + // auto& domain = mesh.subdomain(); return difference(cells, translate(self(domain).on(level), -layer_width * direction)); } diff --git a/include/samurai/cell_array.hpp b/include/samurai/cell_array.hpp index 9d68412f0..16092815a 100644 --- a/include/samurai/cell_array.hpp +++ b/include/samurai/cell_array.hpp @@ -99,7 +99,6 @@ namespace samurai template const interval_t& get_interval(std::size_t level, const interval_t& interval, const xt::xexpression& index) const; - template const interval_t& get_interval(std::size_t level, const xt::xexpression& coord) const; @@ -420,7 +419,8 @@ namespace samurai return level; } } - return 0; + // return 0; + return max_size; } /** diff --git a/include/samurai/load_balancing.hpp b/include/samurai/load_balancing.hpp new file mode 100644 index 000000000..7d63b153b --- /dev/null +++ b/include/samurai/load_balancing.hpp @@ -0,0 +1,372 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "algorithm.hpp" +#include "algorithm/utils.hpp" +#include "mesh.hpp" +#include "mr/mesh.hpp" + +#ifdef SAMURAI_WITH_MPI +#include +#endif + +#ifdef SAMURAI_WITH_MPI +namespace samurai +{ + enum BalanceElement_t + { + CELL, + INTERVAL + }; + + /** + * Compute the load of the current process based on intervals or cells. It uses the + * mesh_id_t::cells to only consider leaves. + */ + template + static std::size_t cmptLoad(const Mesh_t& mesh) + { + using mesh_id_t = typename Mesh_t::mesh_id_t; + const auto& current_mesh = mesh[mesh_id_t::cells]; + std::size_t current_process_load = 0; + // cell-based load without weight. + samurai::for_each_interval(current_mesh, + [&]([[maybe_unused]] std::size_t level, const auto& interval, [[maybe_unused]] const auto& index) + { + current_process_load += interval.size(); // * load_balancing_cell_weight[ level ]; + }); + return current_process_load; + } + + /** + * Compute fluxes based on load computing stategy based on graph with label + * propagation algorithm. Return, for the current process, the flux in term of + * load, i.e. the quantity of "load" to transfer to its neighbours. If the load + * is negative, it means that the process (current) must send load to neighbour, + * if positive it means that it must receive load. + * + * This function use 2 MPI all_gather calls. + * + */ + template + std::vector cmptFluxes(Mesh_t& mesh, int niterations) + { + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + boost::mpi::communicator world; + // give access to geometricaly neighbour process rank and mesh + std::vector& neighbourhood = mesh.mpi_neighbourhood(); + size_t n_neighbours = neighbourhood.size(); + + // load of current process + int my_load = static_cast(cmptLoad(mesh)); + // fluxes between processes + std::vector fluxes(n_neighbours, 0); + // load of each process (all processes not only neighbours) + std::vector loads; + int nt = 0; + while (nt < niterations) + { + boost::mpi::all_gather(world, my_load, loads); + + // compute updated my_load for current process based on its neighbourhood + int my_load_new = my_load; + for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i) + // get "my_load" from other processes + { + std::size_t neighbour_rank = static_cast(neighbourhood[n_i].rank); + int neighbour_load = loads[neighbour_rank]; + double diff_load = static_cast(neighbour_load - my_load_new); + + // if transferLoad < 0 -> need to send data, if transferLoad > 0 need to receive data + int transfertLoad = static_cast(std::trunc(0.5 * diff_load)); + std::cout << "transfert load : " << transfertLoad << std::endl; + fluxes[n_i] += transfertLoad; + // my_load_new += transfertLoad; + my_load += transfertLoad; + } + nt++; + } + return fluxes; + } + + template + class LoadBalancer + { + private: + + public: + + int nloadbalancing; + + template + void update_field(Mesh_t& new_mesh, Field_t& field) + { + using mesh_id_t = typename Mesh_t::mesh_id_t; + using value_t = typename Field_t::value_type; + boost::mpi::communicator world; + + Field_t new_field("new_f", new_mesh); + new_field.fill(0); + + auto& old_mesh = field.mesh(); + // auto min_level = boost::mpi::all_reduce(world, mesh[mesh_id_t::cells].min_level(), boost::mpi::minimum()); + // auto max_level = boost::mpi::all_reduce(world, mesh[mesh_id_t::cells].max_level(), boost::mpi::maximum()); + auto min_level = old_mesh.min_level(); + auto max_level = old_mesh.max_level(); + + // copy data of intervals that are didn't move + for (std::size_t level = min_level; level <= max_level; ++level) + { + auto intersect_old_new = intersection(old_mesh[mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]); + intersect_old_new.apply_op(samurai::copy(new_field, field)); + } + + std::vector req, reqs; + std::vector> to_send(static_cast(world.size())); + + // here we have to define all_* at size n_neighbours... + std::vector all_new_meshes, all_old_meshes; + Mesh_t recv_old_mesh, recv_new_mesh; + for (auto& neighbour : new_mesh.mpi_neighbourhood()) + { + reqs.push_back(world.isend(neighbour.rank, 0, new_mesh)); + reqs.push_back(world.isend(neighbour.rank, 1, old_mesh)); + + world.recv(neighbour.rank, 0, recv_new_mesh); + world.recv(neighbour.rank, 1, recv_old_mesh); + + all_new_meshes.push_back(recv_new_mesh); + all_old_meshes.push_back(recv_old_mesh); + } + boost::mpi::wait_all(reqs.begin(), reqs.end()); + + // build payload of field that has been sent to neighbour, so compare old mesh with new neighbour mesh + // for (auto& neighbour : new_mesh.mpi_neighbourhood()) + // for (auto& neighbour : new_mesh.mpi_neighbourhood()){ + for (size_t ni = 0; ni < all_new_meshes.size(); ++ni) + { + // auto & neighbour_new_mesh = neighbour.mesh; + auto& neighbour_new_mesh = all_new_meshes[ni]; + + for (std::size_t level = min_level; level <= max_level; ++level) + { + if (!old_mesh[mesh_id_t::cells][level].empty() && !neighbour_new_mesh[mesh_id_t::cells][level].empty()) + { + auto intersect_old_mesh_new_neigh = intersection(old_mesh[mesh_id_t::cells][level], + neighbour_new_mesh[mesh_id_t::cells][level]); + intersect_old_mesh_new_neigh( + [&](const auto& interval, const auto& index) + { + std::copy(field(level, interval, index).begin(), + field(level, interval, index).end(), + std::back_inserter(to_send[ni])); + }); + } + } + + if (to_send[ni].size() != 0) + { + // neighbour_rank = neighbour.rank; + auto neighbour_rank = new_mesh.mpi_neighbourhood()[ni].rank; + req.push_back(world.isend(neighbour_rank, neighbour_rank, to_send[ni])); + } + } + + // build payload of field that I need to receive from neighbour, so compare NEW mesh with OLD neighbour mesh + for (size_t ni = 0; ni < all_old_meshes.size(); ++ni) + { + bool isintersect = false; + for (std::size_t level = min_level; level <= max_level; ++level) + { + if (!new_mesh[mesh_id_t::cells][level].empty() && !all_old_meshes[ni][mesh_id_t::cells][level].empty()) + { + std::vector to_recv; + + auto in_interface = intersection(all_old_meshes[ni][mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]); + + in_interface( + [&]([[maybe_unused]] const auto& i, [[maybe_unused]] const auto& index) + { + isintersect = true; + }); + + if (isintersect) + { + break; + } + } + } + + if (isintersect) + { + std::ptrdiff_t count = 0; + std::vector to_recv; + world.recv(new_mesh.mpi_neighbourhood()[ni].rank, world.rank(), to_recv); + + for (std::size_t level = min_level; level <= max_level; ++level) + { + if (!new_mesh[mesh_id_t::cells][level].empty() && !all_old_meshes[ni][mesh_id_t::cells][level].empty()) + { + auto in_interface = intersection(all_old_meshes[ni][mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]); + + in_interface( + [&](const auto& i, const auto& index) + { + std::copy(to_recv.begin() + count, + to_recv.begin() + count + static_cast(i.size() * field.n_comp), + new_field(level, i, index).begin()); + count += static_cast(i.size() * field.n_comp); + + // logs << fmt::format("Process {}, recv interval {}", world.rank(), i) << std::endl; + }); + } + } + } + } + + if (!req.empty()) + { + mpi::wait_all(req.begin(), req.end()); + } + + std::swap(field.array(), new_field.array()); + } + + template + void update_fields(Mesh_t& new_mesh, Field_t& field, Fields_t&... kw) + + { + update_field(new_mesh, field); + update_fields(new_mesh, kw...); + } + + template + void update_fields([[maybe_unused]] Mesh_t& new_mesh) + { + } + + public: + + LoadBalancer() + { + boost::mpi::communicator world; + nloadbalancing = 0; + } + + template + Mesh_t update_mesh(Mesh_t& mesh, const Field_t& flags) + { + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = typename Mesh_t::ca_type; + + boost::mpi::communicator world; + + CellList_t new_cl; + // TODO why wolrd size ? scaliility ??? + std::vector payload(static_cast(world.size())); + std::vector payload_size(static_cast(world.size()), 0); + + std::map comm; + + // build cell list for the current process && cells lists of cells for other processes + samurai::for_each_cell( + mesh[Mesh_t::mesh_id_t::cells], + [&](const auto& cell) + { + if (flags[cell] == world.rank()) + { + if constexpr (Mesh_t::dim == 1) + { + new_cl[cell.level][{}].add_point(cell.indices[0]); + } + if constexpr (Mesh_t::dim == 2) + { + new_cl[cell.level][{cell.indices[1]}].add_point(cell.indices[0]); + } + if constexpr (Mesh_t::dim == 3) + { + // TODO : it works ?? + new_cl[cell.level][{cell.indices[1], cell.indices[2]}].add_point(cell.indices[0]); + } + } + else + { + assert(static_cast(flags[cell]) < payload.size()); + + if (comm.find(flags[cell]) == comm.end()) + { + comm[flags[cell]] = true; + } + + if constexpr (Mesh_t::dim == 1) + { + payload[static_cast(flags[cell])][cell.level][{}].add_point(cell.indices[0]); + } + if constexpr (Mesh_t::dim == 2) + { + payload[static_cast(flags[cell])][cell.level][{cell.indices[1]}].add_point(cell.indices[0]); + } + if constexpr (Mesh_t::dim == 3) + { + payload[static_cast(flags[cell])][cell.level][{cell.indices[1], cell.indices[2]}].add_point( + cell.indices[0]); + } + + payload_size[static_cast(flags[cell])]++; + } + }); + + std::vector req; + + // actual data echange between processes that need to exchange data + for (int iproc = 0; iproc < world.size(); ++iproc) + { + if (iproc == world.rank()) + { + continue; + } + CellArray_t to_send = {payload[static_cast(iproc)], false}; + req.push_back(world.isend(iproc, 17, to_send)); + } + + for (int iproc = 0; iproc < world.size(); ++iproc) + { + if (iproc == world.rank()) + { + continue; + } + CellArray_t to_rcv; + world.recv(iproc, 17, to_rcv); + + samurai::for_each_interval(to_rcv, + [&](std::size_t level, const auto& interval, const auto& index) + { + new_cl[level][index].add_interval(interval); + }); + } + boost::mpi::wait_all(req.begin(), req.end()); + Mesh_t new_mesh(new_cl, mesh); + return new_mesh; + } + + template + void load_balance(Mesh_t& mesh, Field_t& field, Fields&... kw) + { + auto flags = static_cast(this)->load_balance_impl(field.mesh()); + auto new_mesh = update_mesh(mesh, flags); + + // update each physical field on the new load balanced mesh + update_fields(new_mesh, field, kw...); + // swap mesh reference to new load balanced mesh. FIX: this is not clean + field.mesh().swap(new_mesh); + nloadbalancing += 1; + } + }; + +} // namespace samurai +#endif diff --git a/include/samurai/load_balancing_diffusion.hpp b/include/samurai/load_balancing_diffusion.hpp new file mode 100644 index 000000000..54561fee8 --- /dev/null +++ b/include/samurai/load_balancing_diffusion.hpp @@ -0,0 +1,154 @@ + +#include "field.hpp" +#include "load_balancing.hpp" +#include + +// for std::sort +#include + +#ifdef SAMURAI_WITH_MPI +namespace Load_balancing +{ + + class Diffusion : public samurai::LoadBalancer + { + private: + + int _ndomains; + int _rank; + + public: + + Diffusion() + { +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); +#else + _ndomains = 1; + _rank = 0; +#endif + } + + template + auto load_balance_impl(Mesh_t& mesh) + { + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + using CellList_t = typename Mesh_t::cl_type; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + using Coord_t = xt::xtensor_fixed>; + using Stencil = xt::xtensor_fixed>; + + boost::mpi::communicator world; + std::vector& neighbourhood = mesh.mpi_neighbourhood(); + size_t n_neighbours = neighbourhood.size(); + + // compute fluxes in terms of number of intervals to transfer/receive + // by default, perform 5 iterations + // std::vector fluxes = samurai::cmptFluxes( mesh, forceNeighbour, 5 ); + std::vector fluxes = samurai::cmptFluxes(mesh, 5); + std::vector cl_to_send(n_neighbours); + + // set field "flags" for each rank. Initialized to current for all cells (leaves only) + auto flags = samurai::make_scalar_field("diffusion_flag", mesh); + flags.fill(world.rank()); + // load balancing order + + std::vector order(n_neighbours); + for (size_t i = 0; i < order.size(); ++i) + { + order[i] = i; + } + // order neighbour to echange data with, based on load + std::sort(order.begin(), + order.end(), + [&fluxes](size_t i, size_t j) + { + return fluxes[i] < fluxes[j]; + }); + + using cell_t = typename Mesh_t::cell_t; + std::vector cells; + cells.reserve(mesh.nb_cells(mesh_id_t::cells)); + + samurai::for_each_cell(mesh[mesh_id_t::cells], + [&](auto cell) + { + cells.push_back(cell); + }); + + std::sort(cells.begin(), + cells.end(), + [&](const cell_t& a, const cell_t& b) + { + auto ca = a.center(); + auto cb = b.center(); + if (ca(1) != cb(1)) + { + return ca(1) > cb(1); + } + else + { + return ca(0) > cb(0); + } + }); + + std::size_t n = 0; + + // at this point : works only for a horizontal partitioning + if (world.size() > 1) + { + n = static_cast(std::abs(fluxes[0])); + } + + if (world.size() > 1) + { + if (world.rank() == 0) + { + for (std::size_t i = 0; i < n && i < cells.size(); ++i) + { + if (fluxes[0] < 0) + { + flags[cells[i]] = 1; + } + } + } + else if (world.rank() == world.size() - 1) + { + for (std::size_t i = 0; i < n && i < cells.size(); ++i) + { + if (fluxes[0] < 0) + { + flags[cells[cells.size() - 1 - i]] = world.rank() - 1; + } + } + } + else + { + std::size_t n1 = static_cast(std::abs(fluxes[0])); + std::size_t n2 = static_cast(std::abs(fluxes[1])); + for (std::size_t i = 0; i < n1 && i < cells.size(); ++i) + { + if (fluxes[0] < 0) + { + flags[cells[cells.size() - 1 - i]] = world.rank() - 1; + } + } + + for (std::size_t i = 0; i < n2 && i < cells.size(); ++i) + { + if (fluxes[1] < 0) + { + flags[cells[i]] = world.rank() + 1; + } + } + } + } + + return flags; + } + }; +} +#endif diff --git a/include/samurai/mesh.hpp b/include/samurai/mesh.hpp index 6c3329d8d..b7066f2d6 100644 --- a/include/samurai/mesh.hpp +++ b/include/samurai/mesh.hpp @@ -108,6 +108,8 @@ namespace samurai bool is_periodic(std::size_t d) const; const std::array& periodicity() const; // std::vector& neighbouring_ranks(); + + const std::vector& mpi_neighbourhood() const; std::vector& mpi_neighbourhood(); void swap(Mesh_base& mesh) noexcept; @@ -162,6 +164,9 @@ namespace samurai double approx_box_tol = lca_type::default_approx_box_tol, double scaling_factor = 0); + // Used for load balancing + Mesh_base(const cl_type& cl, std::size_t min_level, std::size_t max_level, std::vector& neighbourhood); + derived_type& derived_cast() & noexcept; const derived_type& derived_cast() const& noexcept; derived_type derived_cast() && noexcept; @@ -178,8 +183,6 @@ namespace samurai void find_neighbourhood_naive(); void partition_mesh(std::size_t start_level, const Box& global_box); - void load_balancing(); - void load_transfer(const std::vector& load_fluxes); std::size_t max_nb_cells(std::size_t level) const; lca_type m_domain; @@ -245,7 +248,6 @@ namespace samurai #ifdef SAMURAI_WITH_MPI partition_mesh(start_level, b); - // load_balancing(); #else this->m_cells[mesh_id_t::cells][start_level] = {start_level, b, approx_box_tol, scaling_factor_}; #endif @@ -276,7 +278,6 @@ namespace samurai #ifdef SAMURAI_WITH_MPI partition_mesh(start_level, b); - // load_balancing(); #else this->m_cells[mesh_id_t::cells][start_level] = {start_level, b, approx_box_tol, scaling_factor_}; #endif @@ -289,6 +290,7 @@ namespace samurai set_origin_point(origin_point()); set_scaling_factor(scaling_factor()); + // update_mesh_neighbour(); } template @@ -304,7 +306,7 @@ namespace samurai construct_subdomain(); m_domain = m_subdomain; construct_union(); - update_sub_mesh(); + update_sub_mesh(); // MPI AllReduce inside renumbering(); update_mesh_neighbour(); @@ -312,6 +314,7 @@ namespace samurai set_scaling_factor(cl.scaling_factor()); } + // /** template inline Mesh_base::Mesh_base(const ca_type& ca, std::size_t min_level, std::size_t max_level) : m_min_level{min_level} @@ -333,6 +336,8 @@ namespace samurai set_scaling_factor(ca.scaling_factor()); } + // **/ + template inline Mesh_base::Mesh_base(const ca_type& ca, const self_type& ref_mesh) : m_domain(ref_mesh.m_domain) @@ -375,6 +380,32 @@ namespace samurai set_scaling_factor(ref_mesh.scaling_factor()); } + /** + template + inline Mesh_base::Mesh_base(const cl_type& cl, + std::size_t min_level, + std::size_t max_level, + std::vector& neighbourhood) + : m_min_level(min_level) + , m_max_level(max_level) + , m_mpi_neighbourhood(neighbourhood) + { + m_periodic.fill(false); + assert(min_level <= max_level); + + // what to do with m_domain ? + m_domain = m_subdomain; + + m_cells[mesh_id_t::cells] = {cl, false}; + + construct_subdomain(); // required ? + construct_union(); // required ? + update_sub_mesh(); // perform MPI allReduce calls + renumbering(); // required ? + update_mesh_neighbour(); // required to do that here ?? + } + **/ + template inline auto Mesh_base::cells() -> mesh_t& { @@ -583,6 +614,12 @@ namespace samurai return m_periodic; } + template + inline auto Mesh_base::mpi_neighbourhood() const -> const std::vector& + { + return m_mpi_neighbourhood; + } + template inline auto Mesh_base::mpi_neighbourhood() -> std::vector& { @@ -836,7 +873,8 @@ namespace samurai int product_of_sizes = 1; for (std::size_t d = 0; d < dim - 1; ++d) { - sizes[d] = static_cast(floor(pow(size, 1. / dim) * global_box.length()[d] / length_harmonic_avg)); + sizes[d] = std::max(static_cast(floor(pow(size, 1. / dim) * global_box.length()[d] / length_harmonic_avg)); + product_of_sizes *= sizes[d]; } sizes[dim - 1] = size / product_of_sizes; @@ -973,85 +1011,7 @@ namespace samurai // } // m_mpi_neighbourhood.push_back(neighbour(shift)); // } - // }); - -#endif - } - - template - void Mesh_base::load_balancing() - { -#ifdef SAMURAI_WITH_MPI - mpi::communicator world; - auto rank = world.rank(); - - std::size_t load = nb_cells(mesh_id_t::cells); - std::vector loads; - - std::vector load_fluxes(m_mpi_neighbourhood.size(), 0); - - const std::size_t n_iterations = 1; - - for (std::size_t k = 0; k < n_iterations; ++k) - { - world.barrier(); - if (rank == 0) - { - std::cout << "---------------- k = " << k << " ----------------" << std::endl; - } - mpi::all_gather(world, load, loads); - - std::vector nb_neighbours; - mpi::all_gather(world, m_mpi_neighbourhood.size(), nb_neighbours); - - double load_np1 = static_cast(load); - for (std::size_t i_rank = 0; i_rank < m_mpi_neighbourhood.size(); ++i_rank) - { - auto neighbour = m_mpi_neighbourhood[i_rank]; - - auto neighbour_load = loads[static_cast(neighbour.rank)]; - int neighbour_load_minus_my_load; - if (load < neighbour_load) - { - neighbour_load_minus_my_load = static_cast(neighbour_load - load); - } - else - { - neighbour_load_minus_my_load = -static_cast(load - neighbour_load); - } - double weight = 1. / std::max(m_mpi_neighbourhood.size(), nb_neighbours[neighbour.rank]); - load_fluxes[i_rank] = weight * neighbour_load_minus_my_load; - load_np1 += load_fluxes[i_rank]; - } - load_np1 = floor(load_np1); - - load_transfer(load_fluxes); - - std::cout << rank << ": load = " << load << ", load_np1 = " << load_np1 << std::endl; - - load = static_cast(load_np1); - } -#endif - } - - template - void Mesh_base::load_transfer([[maybe_unused]] const std::vector& load_fluxes) - { -#ifdef SAMURAI_WITH_MPI - mpi::communicator world; - std::cout << world.rank() << ": "; - for (std::size_t i_rank = 0; i_rank < m_mpi_neighbourhood.size(); ++i_rank) - { - auto neighbour = m_mpi_neighbourhood[i_rank]; - if (load_fluxes[i_rank] < 0) // must tranfer load to the neighbour - { - } - else if (load_fluxes[i_rank] > 0) // must receive load from the neighbour - { - } - std::cout << "--> " << neighbour.rank << ": " << load_fluxes[i_rank] << ", "; - } - std::cout << std::endl; + // }); #endif }