diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index cbcf2828..27a2aaaf 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,5 +3,6 @@ # SPDX-License-Identifier: MIT add_subdirectory(poisson) +add_subdirectory(poisson_parallel) add_subdirectory(linelast) add_subdirectory(stokes) diff --git a/examples/poisson_parallel/CMakeLists.txt b/examples/poisson_parallel/CMakeLists.txt new file mode 100644 index 00000000..9b24e124 --- /dev/null +++ b/examples/poisson_parallel/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2024 Lawrence Livermore National Security, LLC. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +file(COPY poisson.sampling.yml DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel) +file(COPY array.8.yml DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel) + +file(COPY generate_configs.py DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel) +file(COPY setup_poisson.sh DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel) + +file(COPY ../stokes/meshes/square.o3.mesh DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel/meshes) +file(COPY ../stokes/meshes/square-circle.msh.mfem DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel/meshes) +file(COPY ../stokes/meshes/square-triangle.msh.mfem DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel/meshes) +file(COPY ../stokes/meshes/square-star.msh.mfem DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel/meshes) +file(COPY ../stokes/meshes/square-square.msh.mfem DESTINATION ${CMAKE_BINARY_DIR}/examples/poisson_parallel/meshes) diff --git a/examples/poisson_parallel/array.8.yml b/examples/poisson_parallel/array.8.yml new file mode 100644 index 00000000..5f37a45c --- /dev/null +++ b/examples/poisson_parallel/array.8.yml @@ -0,0 +1,92 @@ +main: +#mode: run_example/sample_generation/build_rom/single_run + mode: single_run + use_rom: true + solver: poisson + +mesh: + type: component-wise + uniform_refinement: 0 + component-wise: +# you can try other config files you generated by specifying it here. + global_config: "configs/test.box-channel.8x8.h5" + components: + - name: "empty" + file: "meshes/square.o3.mesh" + - name: "square-circle" + file: "meshes/square-circle.msh.mfem" + - name: "square-square" + file: "meshes/square-square.msh.mfem" + - name: "square-triangle" + file: "meshes/square-triangle.msh.mfem" + - name: "square-star" + file: "meshes/square-star.msh.mfem" + +domain-decomposition: + type: interior_penalty + +discretization: + order: 2 + full-discrete-galerkin: false + +solver: + direct_solve: true + use_amg: true + max_iter: 1000000 + print_level: 0 + absolute_tolerance: 1.0e-10 + relative_tolerance: 1.0e-10 + +visualization: + enabled: true + visualize_error: true + unified_paraview: false + file_path: + prefix: paraview/8x8/poisson_array_output + +parameterized_problem: + name: poisson_component + +single_run: + choose_from_random_sample: false + poisson_component: + k0: 0.6 + k1: -0.6 + bdr_k0: 0.05 + bdr_k1: 0.04 + offset: 0.1 + bdr_offset: 0. + +basis: + prefix: "basis/poisson_basis" + number_of_basis: 10 + tags: + - name: "empty" + - name: "square-circle" + - name: "square-square" + - name: "square-triangle" + - name: "square-star" + svd: + save_spectrum: true + update_right_sv: false + visualization: + enabled: false + prefix: poisson_comp + +model_reduction: + rom_handler_type: mfem + linear_solver_type: direct + linear_system_type: spd + hypre_matrix: yes +# This is the input for saving ROM matrix/rhs system. + save_linear_system: + enabled: true + prefix: rom-system/poisson.8x8 + preconditioner: gs + # individual/universal + subdomain_training: universal + save_operator: + level: component + prefix: "poisson_comp" + compare_solution: + enabled: true diff --git a/examples/poisson_parallel/generate_configs.py b/examples/poisson_parallel/generate_configs.py new file mode 100644 index 00000000..63384997 --- /dev/null +++ b/examples/poisson_parallel/generate_configs.py @@ -0,0 +1,24 @@ +import sys +# Add path to root utils/python/ directory to find config scripts +sys.path.insert(0, "../../../utils/python/") + +from config import Empty, ObjectInSpace +from box_channel_config import BoxChannelConfig + +if __name__ == "__main__": + comp_list = {'empty': Empty(), + 'circle': ObjectInSpace('square-circle'), + 'square': ObjectInSpace('square-square'), + 'triangle': ObjectInSpace('square-triangle'), + 'star': ObjectInSpace('square-star'),} + + example = BoxChannelConfig(2,2) + for name, comp in comp_list.items(): + example.addComponent(comp) + + example.GenerateAllConfigs(0) + + test = BoxChannelConfig(8,8) + for name, comp in comp_list.items(): + test.addComponent(comp) + test.CreateRandomConfig('test.box-channel.8x8.h5') diff --git a/examples/poisson_parallel/poisson.sampling.yml b/examples/poisson_parallel/poisson.sampling.yml new file mode 100644 index 00000000..c376924a --- /dev/null +++ b/examples/poisson_parallel/poisson.sampling.yml @@ -0,0 +1,115 @@ +main: + mode: sample_generation + use_rom: false + solver: poisson + +mesh: + uniform_refinement: 0 + type: component-wise + component-wise: + global_config: "configs/box-channel.comp.h5" + components: + - name: "empty" + file: "meshes/square.o3.mesh" + - name: "square-circle" + file: "meshes/square-circle.msh.mfem" + - name: "square-square" + file: "meshes/square-square.msh.mfem" + - name: "square-triangle" + file: "meshes/square-triangle.msh.mfem" + - name: "square-star" + file: "meshes/square-star.msh.mfem" + +domain-decomposition: + type: interior_penalty + +discretization: + order: 2 + full-discrete-galerkin: false + +solver: + direct_solve: true + use_amg: true # used only for iterative solver + +visualization: + enabled: false + unified_paraview: false + file_path: + prefix: poisson_output + +parameterized_problem: + name: poisson_component + +single_run: + poisson_component: + k0: 0.6 + k1: -0.6 + bdr_k0: 0.05 + bdr_k1: 0.04 + offset: 0.1 + bdr_offset: 0. + +sample_generation: + maximum_number_of_snapshots: 4000 + type: "random" + random_sample_generator: + number_of_samples: 1400 + report_frequency: 10 + file_path: + prefix: "./snapshots/poisson_array" + parameters: + - key: mesh/component-wise/global_config + type: filename + minimum: 0 + maximum: 624 + format: "./configs/samples/box-channel.config-%05d.h5" + - key: single_run/poisson_component/k0 + type: double + minimum: -0.5 + maximum: 0.5 + - key: single_run/poisson_component/k1 + type: double + minimum: -0.5 + maximum: 0.5 + - key: single_run/poisson_component/offset + type: double + minimum: 0.0 + maximum: 1.0 + - key: single_run/poisson_component/bdr_offset + type: double + minimum: 0.0 + maximum: 1.0 + +basis: + prefix: "basis/poisson_basis" + number_of_basis: 10 + tags: + - name: empty + snapshot_files: ["./snapshots/poisson_array_sample_empty_snapshot"] + - name: square-circle + snapshot_files: ["./snapshots/poisson_array_sample_square-circle_snapshot"] + - name: square-square + snapshot_files: ["./snapshots/poisson_array_sample_square-square_snapshot"] + - name: square-star + snapshot_files: ["./snapshots/poisson_array_sample_square-star_snapshot"] + - name: square-triangle + snapshot_files: ["./snapshots/poisson_array_sample_square-triangle_snapshot"] + svd: + save_spectrum: true + update_right_sv: false + visualization: + enabled: false + prefix: poisson_comp + +model_reduction: + separate_variable_basis: false # true for vel/pres separate basis + rom_handler_type: mfem + linear_solver_type: direct + linear_system_type: spd + # individual/universal + subdomain_training: universal + save_operator: + level: component + prefix: "poisson_comp" + compare_solution: + enabled: false diff --git a/examples/poisson_parallel/setup_poisson.sh b/examples/poisson_parallel/setup_poisson.sh new file mode 100755 index 00000000..13c03e28 --- /dev/null +++ b/examples/poisson_parallel/setup_poisson.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Generates input meshes for the Poisson example +# Note: assumes this is run from the build/examples/poisson_parallel directory +comp_script="../../../utils/python/box_comp_config.py" +if [[ ! -f ${comp_script} ]]; then + echo "Could not find box_comp_config.py script" + exit 1 +fi + +# Generate box-channel.comp.h5 +python3 ${comp_script} + +# Generate all sample config meshes +python3 generate_configs.py + +mkdir configs/ +mv box-channel.comp.h5 configs/ +mv test.box-channel.8x8.h5 configs/ + +mkdir configs/samples/ +mv *.h5 configs/samples/ diff --git a/examples/stokes/array.8.yml b/examples/stokes/array.8.yml index cc4c9694..3ac55cda 100644 --- a/examples/stokes/array.8.yml +++ b/examples/stokes/array.8.yml @@ -101,6 +101,7 @@ model_reduction: rom_handler_type: mfem linear_solver_type: direct linear_system_type: spd + hypre_matrix: yes # This is the input for saving ROM matrix/rhs system. save_linear_system: enabled: true diff --git a/include/component_topology_handler.hpp b/include/component_topology_handler.hpp index c90222ec..c575eb93 100644 --- a/include/component_topology_handler.hpp +++ b/include/component_topology_handler.hpp @@ -146,6 +146,10 @@ class ComponentTopologyHandler : public TopologyHandler virtual Array* const GetRefInterfaceInfos(const int &k) { return ref_interfaces[k]; } virtual Array* GetBdrAttrComponentToGlobalMap(const int &m) { return bdr_c2g[m]; } + int LocalSubdomainIndex(int global_subdomain) override; + int GlobalSubdomainIndex(int local_subdomain) override; + int GlobalSubdomainRank(int global_subdomain) override; + // return component indexes for a reference port (ComponentTopologyHandler only) virtual void GetComponentPair(const int &ref_port_idx, int &comp1, int &comp2); virtual void GetRefPortInfo(const int &ref_port_idx, int &comp1, int &comp2, int &attr1, int &attr2); @@ -182,6 +186,11 @@ class ComponentTopologyHandler : public TopologyHandler void SetupPorts(); bool ComponentBdrAttrCheck(Mesh *comp); + +private: + Mesh* CreateMesh(int global_subdomain) const; + void SetupPortNeighborMeshes(const std::string &global_config); + void SetupBoundaryAttributes(); }; #endif diff --git a/include/interface_form.hpp b/include/interface_form.hpp index 24ffdf6a..a2428c8d 100644 --- a/include/interface_form.hpp +++ b/include/interface_form.hpp @@ -19,6 +19,7 @@ class InterfaceForm mutable TimeProfiler timer; int numSub = -1; + int numSubStored = -1; int skip_zeros = 1; Array meshes; // not owned @@ -95,6 +96,7 @@ class MixedInterfaceForm { protected: int numSub = -1; + int numSubStored = -1; int skip_zeros = 1; Array meshes; // not owned diff --git a/include/multiblock_solver.hpp b/include/multiblock_solver.hpp index ea327e0d..4c22ae95 100644 --- a/include/multiblock_solver.hpp +++ b/include/multiblock_solver.hpp @@ -44,11 +44,18 @@ friend class ParameterizedProblem; Mesh *pmesh = NULL; // parent mesh. only available from SubMeshTopologyHandler. Array meshes; - // Informations received from Topology Handler. + // Information received from Topology Handler. int numSub; // number of subdomains. + int numSubLoc; // number of local subdomains. + int numSubStored; // number of subdomains stored on this MPI rank. int dim; // Spatial dimension. Array global_bdr_attributes; // boundary attributes of global system. + // MPI partitioning data + Array localNumBlocks; + std::array localBlocks; + Array neighbors; + // Solution dimension, by default -1 (scalar). int udim = -1; // vector dimension of the entire solution variable int num_var = -1; // number of variables diff --git a/include/poisson_solver.hpp b/include/poisson_solver.hpp index 9aa72f86..4cf72fe5 100644 --- a/include/poisson_solver.hpp +++ b/include/poisson_solver.hpp @@ -33,6 +33,7 @@ friend class ParameterizedProblem; HYPRE_BigInt sys_glob_size; HYPRE_BigInt sys_row_starts[2]; HypreParMatrix *globalMat_hypre = NULL; + HypreParMatrix *systemOp_hypre = NULL; MUMPSSolver *mumps = NULL; // operators @@ -102,6 +103,19 @@ friend class ParameterizedProblem; protected: virtual void SetMUMPSSolver(); + void SetupMUMPSSolverSerial(); + void SetupMUMPSSolverParallel(); + + void CreateHypreParMatrix(); + void SetGlobalOffsets(); + + // Data for HypreParMatrix construction + Array global_offsets; + Array global_var_offsets; + SparseMatrix *hdiag = NULL; + SparseMatrix *hoffd = NULL; + HYPRE_BigInt *cmap = NULL; + }; #endif diff --git a/include/rom_handler.hpp b/include/rom_handler.hpp index 490ce95e..770e59be 100644 --- a/include/rom_handler.hpp +++ b/include/rom_handler.hpp @@ -45,8 +45,10 @@ class ROMHandlerBase protected: // public: int numSub = -1; // number of subdomains. + int numSubLoc = -1; // number of local subdomains. int num_var = -1; // number of variables for which POD is performed. int num_rom_blocks = -1; // number of ROM blocks for the global domain. + int num_rom_blocks_local = -1; // number of ROM blocks for the local domain. int num_rom_ref_blocks = -1; // number of ROM reference component blocks. int num_rom_comp = -1; // number of ROM reference components. std::vector fom_var_names; // dimension of each variable. @@ -92,6 +94,13 @@ class ROMHandlerBase bool basis_loaded; bool operator_loaded; + bool hypre_assemble = false; + + // MPI partitioning data + Array localSizes; + Array localNumBlocks; + std::array localBlocks; + // domain rom variables. /* offset for the global domain ROM blocks. @@ -148,6 +157,11 @@ class ROMHandlerBase const int GetBlockIndex(const int m, const int v=-1); void GetDomainAndVariableIndex(const int &rom_block_index, int &m, int &v); + bool HypreAssemble() const { return hypre_assemble; } + + std::array GetLocalBlocks() { return localBlocks; } + void GetLocalNumBlocks(Array &lnb) { lnb = localNumBlocks; } + mfem::BlockVector* GetReducedSolution() { return reduced_sol; } mfem::BlockVector* GetReducedRHS() { return reduced_rhs; } @@ -184,7 +198,7 @@ class ROMHandlerBase virtual void LiftUpFromDomainBasis(const int &i, const Vector &rom_vec, Vector &vec) = 0; virtual void LiftUpGlobal(const BlockVector &rom_vec, BlockVector &vec) = 0; - virtual void Solve(BlockVector &rhs, BlockVector &sol) = 0; + virtual void Solve(Vector &rhs, Vector &sol) = 0; virtual void Solve(BlockVector* U) = 0; virtual void NonlinearSolve(Operator &oper, BlockVector* U, Solver *prec=NULL) = 0; @@ -199,6 +213,9 @@ class ROMHandlerBase virtual void SaveReducedRHS(const std::string &filename) = 0; virtual void AppendReferenceBasis(const int &idx, const DenseMatrix &mat) = 0; + + virtual void CreateHypreParMatrix(BlockMatrix *input_mat, int rank, int nproc) = 0; + virtual void LoadBalanceROMBlocks(int rank, int nproc) = 0; }; class MFEMROMHandler : public ROMHandlerBase @@ -224,12 +241,20 @@ class MFEMROMHandler : public ROMHandlerBase BlockMatrix *romMat = NULL; SparseMatrix *romMat_mono = NULL; + // hypre matrix data + SparseMatrix *hdiag = NULL; + SparseMatrix *hoffd = NULL; + HYPRE_BigInt *cmap = NULL; + // variables needed for direct solve HYPRE_BigInt sys_glob_size; HYPRE_BigInt sys_row_starts[2]; HypreParMatrix *romMat_hypre = NULL; MUMPSSolver *mumps = NULL; + Vector reduced_rhs_hypre; + HYPRE_BigInt hypre_start; + public: MFEMROMHandler(TopologyHandler *input_topol, const Array &input_var_offsets, const std::vector &var_names, const bool separate_variable_basis); @@ -267,7 +292,7 @@ class MFEMROMHandler : public ROMHandlerBase virtual void LiftUpFromDomainBasis(const int &i, const Vector &rom_vec, Vector &vec); virtual void LiftUpGlobal(const BlockVector &rom_vec, BlockVector &vec); - void Solve(BlockVector &rhs, BlockVector &sol) override; + void Solve(Vector &rhs, Vector &sol) override; void Solve(BlockVector* U) override; void NonlinearSolve(Operator &oper, BlockVector* U, Solver *prec=NULL) override; @@ -285,6 +310,9 @@ class MFEMROMHandler : public ROMHandlerBase virtual void AppendReferenceBasis(const int &idx, const DenseMatrix &mat); + void CreateHypreParMatrix(BlockMatrix *input_mat, int rank, int nproc) override; + void LoadBalanceROMBlocks(int rank, int nproc) override; + private: IterativeSolver* SetIterativeSolver(const MFEMROMHandler::SolverType &linsol_type_, const std::string &prec_type); // void GetBlockSparsity(const SparseMatrix *mat, const Array &block_offsets, Array2D &mat_zero_blocks); diff --git a/include/stokes_solver.hpp b/include/stokes_solver.hpp index e1171956..7aefdc40 100644 --- a/include/stokes_solver.hpp +++ b/include/stokes_solver.hpp @@ -96,6 +96,7 @@ friend class ParameterizedProblem; // System matrix for Bilinear case. Array u_offsets, p_offsets, vblock_offsets; + Array global_u_offsets, global_p_offsets, global_os_u; Array2D m_mats, b_mats; BlockMatrix *mMat = NULL, *bMat = NULL; SparseMatrix *M = NULL, *B = NULL; @@ -109,6 +110,12 @@ friend class ParameterizedProblem; HypreParMatrix *systemOp_hypre = NULL; MUMPSSolver *mumps = NULL; + // Data for HypreParMatrix construction + Array global_offsets; + SparseMatrix *hdiag = NULL; + SparseMatrix *hoffd = NULL; + HYPRE_BigInt *cmap = NULL; + // operators Array fs, gs; Array ms; @@ -183,6 +190,9 @@ friend class ParameterizedProblem; const MUMPSSolver::MatType mat_type=MUMPSSolver::MatType::SYMMETRIC_INDEFINITE); virtual void SetupPressureMassMatrix(); + void SetupMUMPSSolverSerial(); + void SetupMUMPSSolverParallel(); + // Component-wise assembly void BuildCompROMLinElems() override; void BuildBdrROMLinElems() override; @@ -216,6 +226,10 @@ friend class ParameterizedProblem; double ComputeBEIntegral(const FiniteElement &el, ElementTransformation &Tr, Coefficient &Q); void ComputeBEIntegral(const FiniteElement &el, ElementTransformation &Tr, VectorCoefficient &Q, Vector &result); + + void CreateHypreParMatrix(); + + void SetGlobalOffsets(); }; #endif diff --git a/include/topology_handler.hpp b/include/topology_handler.hpp index 71ee1238..62d2f21a 100644 --- a/include/topology_handler.hpp +++ b/include/topology_handler.hpp @@ -57,10 +57,20 @@ class TopologyHandler { protected: int numSub = -1; // number of subdomains. - int num_comp = -1; // number of components. Submesh - only one component / Component - multiple compoenents allowed, not yet implemented. + int numSubLoc = -1; // number of local subdomains on this rank + Array local_subs; // local subdomains + std::map g2l_sub; // map from global to local subdomains + int num_comp = -1; // number of components. Submesh - only one component / Component - multiple components allowed, not yet implemented. Array sub_composition; // number of subdomains per each component index. std::vector comp_names; + // Global data about all subdomains or all ranks. TODO: eliminate this for better scalability. + Array subdomain_rank, allNumSub; + + // MPI data + MPI_Comm comm; + int rank, nprocs; + // Spatial dimension. int dim = -1; @@ -77,6 +87,10 @@ class TopologyHandler Array port_infos; Array*> interface_infos; + std::set subNeighbors; + + std::map nghb2loc; // Global subdomain index to locally stored (for non-local neighboring subdomains) + public: TopologyHandler(const TopologyHandlerMode &input_type); @@ -87,6 +101,7 @@ class TopologyHandler // access const TopologyHandlerMode GetType() { return type; } const int GetNumSubdomains() { return numSub; } + const int GetNumLocalSubdomains() { return numSubLoc; } const int GetNumSubdomains(const int &c) { return sub_composition[c]; } const int GetNumComponents() { return num_comp; } const int GetNumPorts() { return num_ports; } @@ -138,9 +153,21 @@ class TopologyHandler virtual void PrintPortInfo(const int k = -1); virtual void PrintInterfaceInfo(const int k = -1); + MPI_Comm GetComm() { return comm; } + int GetRank() const { return rank; } + + virtual int LocalSubdomainIndex(int global_subdomain) { return global_subdomain; } + virtual int GlobalSubdomainIndex(int local_subdomain) { return local_subdomain; } + virtual int GlobalSubdomainRank(int global_subdomain) { return 0; } + + void FindPortNeighborSubdomains(); + void GetAllNumSub(Array &ns); + void GetNeighbors(Array &neighbors); + protected: virtual void UpdateAttributes(Mesh& m); virtual void UpdateBdrAttributes(Mesh& m); + void LoadBalance(); }; class SubMeshTopologyHandler : public TopologyHandler diff --git a/src/component_topology_handler.cpp b/src/component_topology_handler.cpp index 25a2a863..2722ebf5 100644 --- a/src/component_topology_handler.cpp +++ b/src/component_topology_handler.cpp @@ -23,6 +23,7 @@ ComponentTopologyHandler::ComponentTopologyHandler() std::string global_config = config.GetRequiredOption("mesh/component-wise/global_config"); ReadComponentsFromFile(global_config); + // TODO: limit this to the components on this MPI process. SetupComponents(); // Assume all components have the same spatial dimension. @@ -47,9 +48,15 @@ ComponentTopologyHandler::ComponentTopologyHandler() } } - // Do we really need to copy all meshes? + LoadBalance(); + + // TODO: instead of copying all meshes, just construct one of each type and + // translate/rotate during assembly. SetupMeshes(); + ReadPortsFromFile(global_config); + SetupPortNeighborMeshes(global_config); + bool success = ReadBoundariesFromFile(global_config); if (!success) { @@ -58,7 +65,7 @@ ComponentTopologyHandler::ComponentTopologyHandler() if (!success) mfem_error("ComponentTopologyHandler: failed to read boundary! specify it either in global or boundary config file.\n"); } - ReadPortsFromFile(global_config); + SetupBoundaryAttributes(); if (num_ref_ports > 0) SetupReferencePorts(); @@ -314,7 +321,10 @@ void ComponentTopologyHandler::ReadPortsFromFile(const std::string filename) errf = H5Fclose(file_id); assert(errf >= 0); +} +void ComponentTopologyHandler::SetupBoundaryAttributes() +{ // set up global bdr attributes. // Port attribute will be setup with a value that does not overlap with any component boundary attribute. int attr_offset = 0; @@ -339,8 +349,12 @@ void ComponentTopologyHandler::ReadPortsFromFile(const std::string filename) idx1 = components[c1]->bdr_attributes.Find(port_infos[p].Attr1); idx2 = components[c2]->bdr_attributes.Find(port_infos[p].Attr2); assert((idx1 >= 0) && (idx2 >= 0)); - (*bdr_c2g[port_infos[p].Mesh1])[idx1] = attr_offset; - (*bdr_c2g[port_infos[p].Mesh2])[idx2] = attr_offset; + + const int lm1 = LocalSubdomainIndex(port_infos[p].Mesh1); + const int lm2 = LocalSubdomainIndex(port_infos[p].Mesh2); + if (lm1 >= 0) (*bdr_c2g[lm1])[idx1] = attr_offset; + if (lm2 >= 0) (*bdr_c2g[lm2])[idx2] = attr_offset; + // interface attributes are not included in the global boundary attributes. attr_offset += 1; @@ -413,8 +427,8 @@ bool ComponentTopologyHandler::ReadBoundariesFromFile(const std::string filename int c_idx = components[c]->bdr_attributes.Find(b_data[2]); assert(c_idx >= 0); - (*bdr_c2g[m])[c_idx] = b_data[0]; - + const int lm = LocalSubdomainIndex(m); + if (lm >= 0) (*bdr_c2g[lm])[c_idx] = b_data[0]; int idx = bdr_attributes.Find(b_data[0]); if (idx < 0) bdr_attributes.Append(b_data[0]); } @@ -575,47 +589,70 @@ void ComponentTopologyHandler::WritePortDataToFile(const PortData &port, return; } +Mesh* ComponentTopologyHandler::CreateMesh(int global_subdomain) const +{ + Mesh *mesh = new Mesh(*components[mesh_types[global_subdomain]]); + + for (int d = 0; d < 3; d++) + { + mesh_config::trans[d] = mesh_configs[global_subdomain].trans[d]; + mesh_config::rotate[d] = mesh_configs[global_subdomain].rotate[d]; + } + mesh->Transform(*tf_ptr); + + return mesh; +} + void ComponentTopologyHandler::SetupMeshes() { - assert(numSub > 0); + assert(numSub > 0 && numSubLoc > 0); assert(mesh_types.Size() == numSub); assert(mesh_configs.Size() == numSub); - meshes.SetSize(numSub); + meshes.SetSize(numSubLoc); meshes = NULL; - for (int m = 0; m < numSub; m++) + for (int i = 0; i < numSubLoc; i++) { - meshes[m] = new Mesh(*components[mesh_types[m]]); - - for (int d = 0; d < 3; d++) - { - mesh_config::trans[d] = mesh_configs[m].trans[d]; - mesh_config::rotate[d] = mesh_configs[m].rotate[d]; - } - meshes[m]->Transform(*tf_ptr); + const int m = local_subs[i]; + meshes[i] = CreateMesh(m); } - for (int m = 0; m < numSub; m++) assert(meshes[m] != NULL); + for (int m = 0; m < numSubLoc; m++) assert(meshes[m] != NULL); // Set up boundary attribute map from component to global. // Only the initialization. - bdr_c2g.SetSize(numSub); + bdr_c2g.SetSize(numSubLoc); bdr_attributes.SetSize(0); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { bdr_c2g[m] = new Array(meshes[m]->bdr_attributes.Size()); *bdr_c2g[m] = -1; } } +void ComponentTopologyHandler::SetupPortNeighborMeshes(const std::string &global_config) +{ + FindPortNeighborSubdomains(); + + for (auto neighbor : subNeighbors) + { + const int m = meshes.Size(); + nghb2loc[neighbor] = m; + local_subs.Append(neighbor); + meshes.Append(CreateMesh(neighbor)); + bdr_c2g.Append(new Array(meshes[m]->bdr_attributes.Size())); + *bdr_c2g[m] = -1; + } +} + void ComponentTopologyHandler::SetupBdrAttributes() { - assert(meshes.Size() == numSub); + assert(meshes.Size() >= numSubLoc); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < meshes.Size(); m++) { - int c = mesh_types[m]; + int c = mesh_types[local_subs[m]]; Mesh *comp = components[c]; // std::unordered_map *c2g_map = bdr_c2g[m]; @@ -1008,4 +1045,24 @@ bool ComponentTopologyHandler::ComponentBdrAttrCheck(Mesh *comp) } return success; -} \ No newline at end of file +} + +int ComponentTopologyHandler::GlobalSubdomainRank(int global_subdomain) +{ + return subdomain_rank[global_subdomain]; +} + +int ComponentTopologyHandler::LocalSubdomainIndex(int global_subdomain) +{ + if (g2l_sub.count(global_subdomain)) + return g2l_sub.at(global_subdomain); + if (nghb2loc.count(global_subdomain)) + return nghb2loc.at(global_subdomain); + + return -1; +} + +int ComponentTopologyHandler::GlobalSubdomainIndex(int local_subdomain) +{ + return local_subs[local_subdomain]; +} diff --git a/src/dg_mixed_bilin.cpp b/src/dg_mixed_bilin.cpp index 31e8a4de..1e5acac7 100644 --- a/src/dg_mixed_bilin.cpp +++ b/src/dg_mixed_bilin.cpp @@ -118,7 +118,7 @@ void MixedBilinearFormDGExtension::Assemble(int skip_zeros) break; } Array &bdr_marker = *boundary_face_integs_marker[k]; - MFEM_ASSERT(bdr_marker.Size() == bdr_attr_marker.Size(), + MFEM_ASSERT(bdr_marker.Size() >= bdr_attr_marker.Size(), "invalid boundary marker for boundary trace face" "integrator #" << k << ", counting from zero"); for (int i = 0; i < bdr_attr_marker.Size(); i++) diff --git a/src/interface_form.cpp b/src/interface_form.cpp index 5eae5853..8c7e0fef 100644 --- a/src/interface_form.cpp +++ b/src/interface_form.cpp @@ -13,9 +13,10 @@ namespace mfem InterfaceForm::InterfaceForm( Array &meshes_, Array &fes_, TopologyHandler *topol_) : meshes(meshes_), fes(fes_), topol_handler(topol_), - numSub(meshes_.Size()), timer("InterfaceForm") + numSub(topol_->GetNumLocalSubdomains()), + numSubStored(meshes_.Size()), timer("InterfaceForm") { - assert(fes_.Size() == numSub); + assert(fes_.Size() == numSubStored); block_offsets.SetSize(numSub + 1); block_offsets = 0; @@ -29,12 +30,13 @@ InterfaceForm::~InterfaceForm() DeletePointers(fnfi); } +// TODO: use a sparse array of SparseMatrix pointers, rather than a dense Array2D. void InterfaceForm::AssembleInterfaceMatrices(Array2D &mats) const { - assert(mats.NumRows() == numSub); - assert(mats.NumCols() == numSub); - for (int i = 0; i < numSub; i++) - for (int j = 0; j < numSub; j++) assert(mats(i, j)); + assert(mats.NumRows() == numSubStored); + assert(mats.NumCols() == numSubStored); + for (int i = 0; i < numSubStored; i++) + for (int j = 0; j < numSubStored; j++) assert(mats(i, j)); const PortInfo *pInfo; Array midx(2); @@ -49,7 +51,13 @@ void InterfaceForm::AssembleInterfaceMatrices(Array2D &mats) con midx[0] = pInfo->Mesh1; midx[1] = pInfo->Mesh2; - + + for (int i = 0; i < 2; i++) + midx[i] = topol_handler->LocalSubdomainIndex(midx[i]); + + if (midx[0] < 0 || midx[1] < 0) + continue; + for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) mats_p(i, j) = mats(midx[i], midx[j]); @@ -372,10 +380,12 @@ void InterfaceForm::AssembleInterfaceGrad(Mesh *mesh1, Mesh *mesh2, MixedInterfaceForm::MixedInterfaceForm( Array &meshes_, Array &trial_fes_, Array &test_fes_, TopologyHandler *topol_) - : meshes(meshes_), trial_fes(trial_fes_), test_fes(test_fes_), topol_handler(topol_), numSub(meshes_.Size()) + : meshes(meshes_), trial_fes(trial_fes_), test_fes(test_fes_), topol_handler(topol_), + numSub(topol_->GetNumLocalSubdomains()), + numSubStored(meshes_.Size()) { - assert(trial_fes_.Size() == numSub); - assert(test_fes_.Size() == numSub); + assert(trial_fes_.Size() == numSubStored); + assert(test_fes_.Size() == numSubStored); trial_block_offsets.SetSize(numSub + 1); test_block_offsets.SetSize(numSub + 1); @@ -410,7 +420,13 @@ void MixedInterfaceForm::AssembleInterfaceMatrices(Array2D &mats midx[0] = pInfo->Mesh1; midx[1] = pInfo->Mesh2; - + + for (int i = 0; i < 2; i++) + midx[i] = topol_handler->LocalSubdomainIndex(midx[i]); + + if (midx[0] < 0 || midx[1] < 0) + continue; + for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) b_mats_p(i, j) = mats(midx[i], midx[j]); diff --git a/src/main_workflow.cpp b/src/main_workflow.cpp index cec992a6..2be1ad8c 100644 --- a/src/main_workflow.cpp +++ b/src/main_workflow.cpp @@ -154,7 +154,10 @@ void GenerateSamples(MPI_Comm comm) int s = 0; while (s < sample_generator->GetTotalSampleSize()) { - if (!sample_generator->IsMyJob(s)) continue; + if (!sample_generator->IsMyJob(s)) { + s++; + continue; + } // NOTE: this will change config.dict_ sample_generator->SetSampleParams(s); diff --git a/src/multiblock_solver.cpp b/src/multiblock_solver.cpp index d946c710..f89b2d3f 100644 --- a/src/multiblock_solver.cpp +++ b/src/multiblock_solver.cpp @@ -44,6 +44,8 @@ MultiBlockSolver::MultiBlockSolver() // Receive topology info numSub = topol_data.numSub; + numSubLoc = topol_handler->GetNumLocalSubdomains(); + numSubStored = meshes.Size(); dim = topol_data.dim; global_bdr_attributes = *(topol_data.global_bdr_attributes); numBdr = global_bdr_attributes.Size(); @@ -168,13 +170,13 @@ void MultiBlockSolver::SetVariableVector(const int &var_idx, BlockVector &var, B void MultiBlockSolver::SortBySubdomains(BlockVector &by_var, BlockVector &by_sub) { - assert(by_var.NumBlocks() == (num_var * numSub)); - assert(by_sub.NumBlocks() == (num_var * numSub)); + assert(by_var.NumBlocks() == (num_var * numSubLoc)); + assert(by_sub.NumBlocks() == (num_var * numSubLoc)); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) for (int v = 0; v < num_var; v++) { - int by_var_idx = numSub * v + m; + int by_var_idx = numSubLoc * v + m; int by_sub_idx = num_var * m + v; assert(by_var.BlockSize(by_var_idx) == by_sub.BlockSize(by_sub_idx)); @@ -186,13 +188,13 @@ void MultiBlockSolver::SortBySubdomains(BlockVector &by_var, BlockVector &by_sub void MultiBlockSolver::SortByVariables(BlockVector &by_sub, BlockVector &by_var) { - assert(by_var.NumBlocks() == (num_var * numSub)); - assert(by_sub.NumBlocks() == (num_var * numSub)); + assert(by_var.NumBlocks() == (num_var * numSubLoc)); + assert(by_sub.NumBlocks() == (num_var * numSubLoc)); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) for (int v = 0; v < num_var; v++) { - int by_var_idx = numSub * v + m; + int by_var_idx = numSubLoc * v + m; int by_sub_idx = num_var * m + v; assert(by_var.BlockSize(by_var_idx) == by_sub.BlockSize(by_sub_idx)); @@ -206,10 +208,14 @@ void MultiBlockSolver::SetupBCVariables() { // Set up boundary markers. int max_bdr_attr = -1; - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { max_bdr_attr = max(max_bdr_attr, meshes[m]->bdr_attributes.Max()); } + { + const int local_max = max_bdr_attr; + MPI_Allreduce(&local_max, &max_bdr_attr, 1, MPI_INT, MPI_MAX, topol_handler->GetComm()); + } // TODO: technically this should be Array*> for each meshes. // Running with MFEM debug version will lead to error when assembling boundary integrators. @@ -276,10 +282,21 @@ void MultiBlockSolver::AssembleROMMat() BlockMatrix *romMat = new BlockMatrix(*rom_block_offsets); romMat->owns_blocks = true; - AssembleROMMat(*romMat); + AssembleROMMat(*romMat); romMat->Finalize(); - rom_handler->SetRomMat(romMat); + const bool hypreAssemble = rom_handler->HypreAssemble(); + rom_handler->SetRomMat(romMat, !hypreAssemble); + + if (hypreAssemble) + { + rom_handler->CreateHypreParMatrix(romMat, rank, nproc); + + localBlocks = rom_handler->GetLocalBlocks(); // TODO: get this for FOM without depending on ROM? + rom_handler->GetLocalNumBlocks(localNumBlocks); // TODO: get this for FOM without depending on ROM? + + topol_handler->GetNeighbors(neighbors); + } } void MultiBlockSolver::AssembleROMMat(BlockMatrix &romMat) @@ -287,11 +304,15 @@ void MultiBlockSolver::AssembleROMMat(BlockMatrix &romMat) assert(topol_mode == TopologyHandlerMode::COMPONENT); assert(rom_elems); + const int ossub = topol_handler->GlobalSubdomainIndex(0); // Offset for first local subdomain + // component domain matrix. - for (int m = 0; m < numSub; m++) - { - int c_type = topol_handler->GetMeshType(m); - int num_block = (separate_variable_basis) ? num_var : 1; + for (int mm = 0; mm < numSubLoc; mm++) + { + const int m = mm + ossub; + const int m_global = topol_handler->GlobalSubdomainIndex(mm); + const int c_type = topol_handler->GetMeshType(m_global); + const int num_block = (separate_variable_basis) ? num_var : 1; Array midx(num_block); for (int v = 0; v < num_block; v++) @@ -301,7 +322,7 @@ void MultiBlockSolver::AssembleROMMat(BlockMatrix &romMat) AddToBlockMatrix(midx, midx, *comp_mat, romMat); // boundary matrices of each component. - Array *bdr_c2g = topol_handler->GetBdrAttrComponentToGlobalMap(m); + Array *bdr_c2g = topol_handler->GetBdrAttrComponentToGlobalMap(mm); for (int b = 0; b < bdr_c2g->Size(); b++) { @@ -365,15 +386,15 @@ void MultiBlockSolver::InitVisualization(const std::string& output_path) void MultiBlockSolver::InitIndividualParaview(const std::string& file_prefix) { assert(var_names.size() == num_var); - assert((visual.domain_offset >= 0) && (visual.domain_offset < numSub)); - assert((visual.domain_interval > 0) && (visual.domain_interval <= numSub)); - paraviewColls.SetSize(numSub); + assert((visual.domain_offset >= 0) && (visual.domain_offset < numSubLoc)); + assert((visual.domain_interval > 0) && (visual.domain_interval <= numSubLoc)); + paraviewColls.SetSize(numSubLoc); paraviewColls = NULL; std::string error_type, tmp; if (visual.save_error) { - error_visual.SetSize(num_var * numSub); + error_visual.SetSize(num_var * numSubLoc); error_visual = NULL; for (int k = 0; k < error_visual.Size(); k++) error_visual[k] = new GridFunction(fes[k]); @@ -381,7 +402,7 @@ void MultiBlockSolver::InitIndividualParaview(const std::string& file_prefix) error_type = "_abs_error"; } - for (int m = 0; m < numSub; m++) { + for (int m = 0; m < numSubLoc; m++) { if ((m < visual.domain_offset) || (m % visual.domain_interval != 0)) continue; ostringstream oss; @@ -608,6 +629,7 @@ void MultiBlockSolver::CopySolution(BlockVector *input_sol) void MultiBlockSolver::InitROMHandler() { rom_handler = new MFEMROMHandler(topol_handler, var_offsets, var_names, separate_variable_basis); + rom_handler->LoadBalanceROMBlocks(rank, nproc); if (!(topol_mode == TopologyHandlerMode::COMPONENT)) return; @@ -719,13 +741,13 @@ void MultiBlockSolver::ComputeSubdomainErrorAndNorm(GridFunction *fom_sol, GridF void MultiBlockSolver::ComputeRelativeError(Array fom_sols, Array rom_sols, Vector &error) { - assert(fom_sols.Size() == (num_var * numSub)); - assert(rom_sols.Size() == (num_var * numSub)); + assert(fom_sols.Size() == (num_var * numSubLoc)); + assert(rom_sols.Size() == (num_var * numSubLoc)); Vector norm(num_var); error.SetSize(num_var); norm = 0.0; error = 0.0; - for (int m = 0, idx = 0; m < numSub; m++) + for (int m = 0, idx = 0; m < numSubLoc; m++) { for (int v = 0; v < num_var; v++, idx++) { @@ -739,10 +761,17 @@ void MultiBlockSolver::ComputeRelativeError(Array fom_sols, Arra for (int v = 0; v < num_var; v++) { - norm[v] = sqrt(norm[v]); - error[v] = sqrt(error[v]); + double sum = 0.0; + MPI_Allreduce(&norm[v], &sum, 1, MPI_DOUBLE, MPI_SUM, topol_handler->GetComm()); + norm[v] = sqrt(sum); + + sum = 0.0; + MPI_Allreduce(&error[v], &sum, 1, MPI_DOUBLE, MPI_SUM, topol_handler->GetComm()); + error[v] = sqrt(sum); + + if (rank == 0) printf("Variable %d global absolute error: %.5E\n", v, error[v]); error[v] /= norm[v]; - printf("Variable %d relative error: %.5E\n", v, error[v]); + if (rank == 0) printf("Variable %d global relative error: %.5E\n", v, error[v]); } } @@ -753,7 +782,7 @@ void MultiBlockSolver::CompareSolution(BlockVector &test_U, Vector &error) assert(test_U.BlockSize(b) == U->BlockSize(b)); Array test_us; - test_us.SetSize(num_var * numSub); + test_us.SetSize(num_var * numSubLoc); for (int k = 0; k < test_us.Size(); k++) { test_us[k] = new GridFunction(fes[k], test_U.GetBlock(k), 0); diff --git a/src/poisson_solver.cpp b/src/poisson_solver.cpp index 948ed837..18dafa6b 100644 --- a/src/poisson_solver.cpp +++ b/src/poisson_solver.cpp @@ -35,8 +35,8 @@ PoissonSolver::PoissonSolver() fec = new H1_FECollection(order, dim); } - fes.SetSize(numSub); - for (int m = 0; m < numSub; m++) { + fes.SetSize(numSubStored); + for (int m = 0; m < numSubStored; m++) { fes[m] = new FiniteElementSpace(meshes[m], fec[0], udim); } } @@ -107,12 +107,12 @@ void PoissonSolver::AddBCFunction(const double &F, const int battr) void PoissonSolver::InitVariables() { // number of blocks = solution dimension * number of subdomain; - block_offsets.SetSize(udim * numSub + 1); - var_offsets.SetSize(numSub + 1); - num_vdofs.SetSize(numSub); + block_offsets.SetSize(udim * numSubLoc + 1); + var_offsets.SetSize(numSubLoc + 1); + num_vdofs.SetSize(numSubLoc); block_offsets[0] = 0; var_offsets[0] = 0; - for (int i = 0; i < numSub; i++) + for (int i = 0; i < numSubLoc; i++) { var_offsets[i + 1] = fes[i]->GetTrueVSize(); num_vdofs[i] = fes[i]->GetTrueVSize(); @@ -140,8 +140,8 @@ void PoissonSolver::InitVariables() These are system-specific, therefore not defining it now. */ - us.SetSize(numSub); - for (int m = 0; m < numSub; m++) + us.SetSize(numSubLoc); + for (int m = 0; m < numSubLoc; m++) { us[m] = new GridFunction(fes[m], U->GetBlock(m), 0); (*us[m]) = 0.0; @@ -152,6 +152,14 @@ void PoissonSolver::InitVariables() } rhs_coeffs.SetSize(0); + + global_offsets.SetSize(nproc + 1); + global_offsets[0] = 0; + int numLocal = var_offsets.Last(); + MPI_Allgather(&numLocal, 1, MPI_INT, + &global_offsets[1], 1, MPI_INT, MPI_COMM_WORLD); + + global_offsets.PartialSum(); } void PoissonSolver::BuildOperators() @@ -165,11 +173,11 @@ void PoissonSolver::BuildRHSOperators() { SanityCheckOnCoeffs(); - bs.SetSize(numSub); + bs.SetSize(numSubLoc); // These are heavily system-dependent. // Based on scalar/vector system, different integrators/coefficients will be used. - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { bs[m] = new LinearForm(fes[m], RHS->GetBlock(m).GetData()); for (int r = 0; r < rhs_coeffs.Size(); r++) @@ -181,9 +189,9 @@ void PoissonSolver::BuildDomainOperators() { SanityCheckOnCoeffs(); - as.SetSize(numSub); + as.SetSize(numSubStored); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { as[m] = new BilinearForm(fes[m]); as[m]->AddDomainIntegrator(new DiffusionIntegrator); @@ -213,9 +221,9 @@ void PoissonSolver::SetupRHSBCOperators() { SanityCheckOnCoeffs(); - assert(bs.Size() == numSub); + assert(bs.Size() == numSubLoc); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { assert(bs[m]); for (int b = 0; b < global_bdr_attributes.Size(); b++) @@ -235,9 +243,9 @@ void PoissonSolver::SetupDomainBCOperators() { SanityCheckOnCoeffs(); - assert(as.Size() == numSub); + assert(as.Size() == numSubStored); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { assert(as[m]); for (int b = 0; b < global_bdr_attributes.Size(); b++) @@ -263,15 +271,15 @@ void PoissonSolver::AssembleRHS() { SanityCheckOnCoeffs(); - MFEM_ASSERT(bs.Size() == numSub, "LinearForm bs != numSub.\n"); + MFEM_ASSERT(bs.Size() == numSubLoc, "LinearForm bs != numSub.\n"); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { MFEM_ASSERT(bs[m], "LinearForm or BilinearForm pointer of a subdomain is not associated!\n"); bs[m]->Assemble(); } - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) // Do we really need SyncAliasMemory? bs[m]->SyncAliasMemory(*RHS); // Synchronize with block vector RHS. What is different from SyncMemory? } @@ -280,18 +288,18 @@ void PoissonSolver::AssembleOperator() { SanityCheckOnCoeffs(); - MFEM_ASSERT(as.Size() == numSub, "BilinearForm bs != numSub.\n"); + MFEM_ASSERT(as.Size() == numSubStored, "BilinearForm bs != numSub.\n"); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { MFEM_ASSERT(as[m], "LinearForm or BilinearForm pointer of a subdomain is not associated!\n"); as[m]->Assemble(); } - mats.SetSize(numSub, numSub); - for (int i = 0; i < numSub; i++) + mats.SetSize(numSubStored, numSubStored); + for (int i = 0; i < numSubStored; i++) { - for (int j = 0; j < numSub; j++) + for (int j = 0; j < numSubStored; j++) { if (i == j) { mats(i, i) = &(as[i]->SpMat()); @@ -302,35 +310,47 @@ void PoissonSolver::AssembleOperator() } AssembleInterfaceMatrices(); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) as[m]->Finalize(); - // globalMat = new BlockOperator(block_offsets); - // NOTE: currently, domain-decomposed system will have a significantly different sparsity pattern. - // This is especially true for vector solution, where ordering of component is changed. - // This is quite inevitable, but is it desirable? - globalMat = new BlockMatrix(var_offsets); - for (int i = 0; i < numSub; i++) - { - for (int j = 0; j < numSub; j++) - { - if (i != j) mats(i, j)->Finalize(); - - globalMat->SetBlock(i, j, mats(i, j)); - } - } + if (nproc == 1) + { + // globalMat = new BlockOperator(block_offsets); + // NOTE: currently, domain-decomposed system will have a significantly different sparsity pattern. + // This is especially true for vector solution, where ordering of component is changed. + // This is quite inevitable, but is it desirable? + globalMat = new BlockMatrix(var_offsets); + for (int i = 0; i < numSubStored; i++) + { + for (int j = 0; j < numSubStored; j++) + { + if (i != j) mats(i, j)->Finalize(); + + globalMat->SetBlock(i, j, mats(i, j)); + } + } + } if (use_amg || direct_solve) { - globalMat_mono = globalMat->CreateMonolithic(); - - // TODO: need to change when the actual parallelization is implemented. - sys_glob_size = globalMat_mono->NumRows(); - sys_row_starts[0] = 0; - sys_row_starts[1] = globalMat_mono->NumRows(); - globalMat_hypre = new HypreParMatrix(MPI_COMM_SELF, sys_glob_size, sys_row_starts, globalMat_mono); - - if (direct_solve) SetMUMPSSolver(); + if (nproc == 1) + { + globalMat_mono = globalMat->CreateMonolithic(); + + sys_glob_size = globalMat_mono->NumRows(); + sys_row_starts[0] = 0; + sys_row_starts[1] = globalMat_mono->NumRows(); + globalMat_hypre = new HypreParMatrix(MPI_COMM_SELF, sys_glob_size, sys_row_starts, globalMat_mono); + } + else + { + CreateHypreParMatrix(); + } + + if (direct_solve) + { + SetMUMPSSolver(); + } } } @@ -427,6 +447,147 @@ void PoissonSolver::BuildItfaceROMLinElems() } // for (int p = 0; p < num_ref_ports; p++) } +void PoissonSolver::SetGlobalOffsets() +{ + // TODO: use point-to-point communication instead of global communication, for better scalability. + + Array allNumSub, disp; + topol_handler->GetAllNumSub(allNumSub); + + disp.SetSize(nproc); + disp[0] = 0; + for (int i=1; i starts(2); + starts[0] = global_offsets[rank]; + starts[1] = global_offsets[rank + 1]; + + SetGlobalOffsets(); + + const int localSize = starts[1] - starts[0]; + hdiag = new SparseMatrix(localSize, localSize); // Owned by systemOp_hypre + hoffd = new SparseMatrix(localSize, gsize); // Owned by systemOp_hypre + + if (rank == 0) + { + cout << "Global size " << gsize << endl; + cout << "Local size " << localSize << endl << std::flush; + } + + std::set offd_cols; + + for (int i = 0; i < numSubLoc; i++) + { + for (int j = 0; j < numSubLoc; j++) + { + if (mats(i, j)) + { + const int oscol = var_offsets[j]; + for (int r=0; rNumRows(); ++r) + { + Array cols; + Vector srow; + mats(i, j)->GetRow(r, cols, srow); + + MFEM_VERIFY(cols.Size() == srow.Size(), ""); + + const int row = var_offsets[i] + r; + for (int l=0; lSet(row, cols[l] + oscol, srow[l]); + } + } + } + } + + for (int j = numSubLoc; j < numSubStored; j++) + { + const int globalSub = neighbors[j - numSubLoc]; + const int rank_j = topol_handler->GlobalSubdomainRank(globalSub); + const int gos_j = global_offsets[rank_j]; + + if (mats(i, j)) + { + const int oscol = gos_j + global_var_offsets[globalSub]; + for (int r=0; rNumRows(); ++r) + { + Array cols; + Vector srow; + mats(i, j)->GetRow(r, cols, srow); + + MFEM_VERIFY(cols.Size() == srow.Size(), ""); + + const int row = var_offsets[i] + r; + for (int l=0; lSet(row, cols[l] + oscol, srow[l]); + offd_cols.insert(cols[l] + oscol); + } + } + } + } + } + + // TODO: avoid making so many copies of the same matrices. + + const int num_offd_cols = offd_cols.size(); + + hdiag->Finalize(); + hoffd->Finalize(); + + cmap = new HYPRE_BigInt[num_offd_cols]; + std::map cmap_inv; + + int cnt = 0; + for (auto col : offd_cols) + { + cmap[cnt] = col; + cmap_inv[col] = cnt; + + cnt++; + } + + MFEM_VERIFY(cnt == num_offd_cols, ""); + + if (num_offd_cols > 0) + { + // Map column indices in hoffd + int *offI = hoffd->GetI(); + int *offJ = hoffd->GetJ(); + + const int ne = offI[localSize]; // Total number of entries in hoffd + + for (int i=0; iSortColumnIndices(); + } + + hoffd->SetWidth(num_offd_cols); + + // See rom_handler.cpp for another case of using this constructor with 8+1 arguments. + // constructor with 8+1 arguments + systemOp_hypre = new HypreParMatrix(MPI_COMM_WORLD, gsize, gsize, + starts.GetData(), starts.GetData(), + hdiag, hoffd, cmap, true); + + systemOp_hypre->SetOwnerFlags(systemOp_hypre->OwnsDiag(), systemOp_hypre->OwnsOffd(), 1); +} + bool PoissonSolver::Solve(SampleGenerator *sample_generator) { // If using direct solver, returns always true. @@ -585,9 +746,27 @@ void PoissonSolver::SetParameterizedProblem(ParameterizedProblem *problem) } void PoissonSolver::SetMUMPSSolver() +{ + if (nproc == 1) + SetupMUMPSSolverSerial(); + else + SetupMUMPSSolverParallel(); + + mumps->SetMatrixSymType(MUMPSSolver::MatType::SYMMETRIC_POSITIVE_DEFINITE); +} + +void PoissonSolver::SetupMUMPSSolverSerial() { assert(globalMat_hypre); mumps = new MUMPSSolver(MPI_COMM_SELF); - mumps->SetMatrixSymType(MUMPSSolver::MatType::SYMMETRIC_POSITIVE_DEFINITE); mumps->SetOperator(*globalMat_hypre); -} \ No newline at end of file + //globalMat_hypre->Print("PoissonHypreSerial"); +} + +void PoissonSolver::SetupMUMPSSolverParallel() +{ + assert(systemOp_hypre); + mumps = new MUMPSSolver(MPI_COMM_WORLD); + mumps->SetOperator(*systemOp_hypre); + //systemOp_hypre->Print("PoissonHypre"); +} diff --git a/src/rom_handler.cpp b/src/rom_handler.cpp index 3edc51f3..10f125aa 100644 --- a/src/rom_handler.cpp +++ b/src/rom_handler.cpp @@ -38,6 +38,7 @@ ROMHandlerBase::ROMHandlerBase( const std::vector &var_names, const bool separate_variable_basis) : topol_handler(input_topol), numSub(input_topol->GetNumSubdomains()), + numSubLoc(input_topol->GetNumLocalSubdomains()), fom_var_names(var_names), fom_var_offsets(input_var_offsets), basis_tags(0), @@ -47,8 +48,8 @@ ROMHandlerBase::ROMHandlerBase( { num_var = fom_var_names.size(); - fom_num_vdofs.SetSize(numSub); - for (int m = 0, idx = 0; m < numSub; m++) + fom_num_vdofs.SetSize(numSubLoc); + for (int m = 0, idx = 0; m < numSubLoc; m++) { fom_num_vdofs[m] = 0; for (int v = 0; v < num_var; v++, idx++) @@ -85,6 +86,7 @@ void ROMHandlerBase::ParseInputs() operator_prefix = config.GetRequiredOption("model_reduction/save_operator/prefix"); num_rom_blocks = numSub; + num_rom_blocks_local = num_rom_blocks; num_rom_ref_blocks = topol_handler->GetNumComponents(); num_rom_comp = num_rom_ref_blocks; @@ -126,7 +128,7 @@ void ROMHandlerBase::ParseInputs() /* parse the dimension of basis */ int midx = -1; int vidx = (separate_variable) ? b % num_var : 0; - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) if (topol_handler->GetMeshType(m) == (separate_variable ? b / num_var : b)) { midx = m; @@ -134,7 +136,7 @@ void ROMHandlerBase::ParseInputs() } assert(midx >= 0); - int idx = (separate_variable) ? midx * num_var + vidx : midx; + const int idx = (separate_variable) ? midx * num_var + vidx : midx; if (separate_variable) dim_ref_basis[b] = fom_var_offsets[idx + 1] - fom_var_offsets[idx]; else @@ -257,11 +259,10 @@ void ROMHandlerBase::LoadReducedBasis() Once fully parallelized, each process will load only local part of the basis. */ { - int local_dim = CAROM::split_dimension(dim_ref_basis[k], MPI_COMM_WORLD); - basis_reader = new CAROM::BasisReader(basis_name + basis_tags[k].print(), CAROM::Database::formats::HDF5_MPIO, local_dim); + int local_dim = dim_ref_basis[k]; + basis_reader = new CAROM::BasisReader(basis_name + basis_tags[k].print() + ".000000", CAROM::Database::formats::HDF5, local_dim, MPI_COMM_NULL); carom_ref_basis[k] = new CAROM::Matrix(*basis_reader->getSpatialBasis(num_ref_basis[k])); - carom_ref_basis[k]->gather(); } numRowRB = carom_ref_basis[k]->numRows(); numColumnRB = carom_ref_basis[k]->numColumns(); @@ -344,6 +345,9 @@ MFEMROMHandler::MFEMROMHandler( { mfem_error("Unknown ROM linear solver type!\n"); } + + std::string hypre_matrix_str = config.GetOption("model_reduction/hypre_matrix", "no"); + if (hypre_matrix_str == "yes") hypre_assemble = true; if (linsol_type == MFEMROMHandler::SolverType::DIRECT) { @@ -475,17 +479,23 @@ void MFEMROMHandler::ProjectToDomainBasis(const int &i, const Vector &vec, Vecto void MFEMROMHandler::ProjectGlobalToDomainBasis(const BlockVector* vec, BlockVector*& rom_vec) { - assert(vec->NumBlocks() == num_rom_blocks); + if (separate_variable) + assert(vec->NumBlocks() == num_rom_blocks); + else + assert(vec->NumBlocks() == num_rom_blocks_local); + // reset rom_vec if initiated a priori. if (rom_vec) delete rom_vec; + // TODO: distribute this! rom_vec = new BlockVector(rom_block_offsets); int m, v, fom_idx; - for (int i = 0; i < num_rom_blocks; i++) + for (int i = localBlocks[0]; i < localBlocks[1]; ++i) { GetDomainAndVariableIndex(i, m, v); - fom_idx = (separate_variable)? v + m * num_var : m; + const int mloc = topol_handler->LocalSubdomainIndex(m); + fom_idx = (separate_variable)? v + mloc * num_var : mloc; ProjectToDomainBasis(i, vec->GetBlock(fom_idx), rom_vec->GetBlock(i)); } @@ -520,19 +530,23 @@ void MFEMROMHandler::LiftUpFromDomainBasis(const int &i, const Vector &rom_vec, void MFEMROMHandler::LiftUpGlobal(const BlockVector &rom_vec, BlockVector &vec) { assert(rom_vec.NumBlocks() == num_rom_blocks); - assert(vec.NumBlocks() == num_rom_blocks); + if (separate_variable) + assert(vec.NumBlocks() == num_rom_blocks); + else + assert(vec.NumBlocks() == num_rom_blocks_local); int m, v, fom_idx; - for (int i = 0; i < num_rom_blocks; i++) + for (int i = localBlocks[0]; i < localBlocks[1]; ++i) { GetDomainAndVariableIndex(i, m, v); - fom_idx = (separate_variable)? v + m * num_var : m; + const int mloc = topol_handler->LocalSubdomainIndex(m); + fom_idx = (separate_variable)? v + mloc * num_var : mloc; LiftUpFromDomainBasis(i, rom_vec.GetBlock(i), vec.GetBlock(fom_idx)); } } -void MFEMROMHandler::Solve(BlockVector &rhs, BlockVector &sol) +void MFEMROMHandler::Solve(Vector &rhs, Vector &sol) { assert(operator_loaded); @@ -544,6 +558,8 @@ void MFEMROMHandler::Solve(BlockVector &rhs, BlockVector &sol) if (linsol_type == SolverType::DIRECT) { + if (topol_handler->GetRank() == 0) + cout << "MFEMROMHandler::Solve direct, size " << rhs.Size() << endl; assert(mumps); mumps->SetPrintLevel(print_level); mumps->Mult(rhs, sol); @@ -562,7 +578,7 @@ void MFEMROMHandler::Solve(BlockVector &rhs, BlockVector &sol) // TODO: need to change when the actual parallelization is implemented. HYPRE_BigInt glob_size = rom_block_offsets.Last(); HYPRE_BigInt row_starts[2] = {0, rom_block_offsets.Last()}; - parRomMat = new HypreParMatrix(MPI_COMM_SELF, glob_size, row_starts, romMat_mono); + parRomMat = new HypreParMatrix(MPI_COMM_WORLD, glob_size, row_starts, romMat_mono); K = parRomMat; } else if ((prec_str == "gs") || (prec_str == "none")) @@ -595,8 +611,12 @@ void MFEMROMHandler::Solve(BlockVector &rhs, BlockVector &sol) if (prec_str != "none") solver->SetPreconditioner(*M); - solver->SetOperator(*K); - + + if (romMat_hypre) + solver->SetOperator(*romMat_hypre); + else + solver->SetOperator(*K); + solver->SetAbsTol(atol); solver->SetRelTol(rtol); solver->SetMaxIter(maxIter); @@ -618,16 +638,44 @@ void MFEMROMHandler::Solve(BlockVector &rhs, BlockVector &sol) void MFEMROMHandler::Solve(BlockVector* U) { - assert(U->NumBlocks() == num_rom_blocks); + // TODO: reduced_rhs and reduced_sol are still global size! + if (separate_variable) + assert(U->NumBlocks() == num_rom_blocks); + else + assert(U->NumBlocks() == num_rom_blocks_local); assert(reduced_rhs); printf("Solve ROM.\n"); - reduced_sol = new BlockVector(rom_block_offsets); + reduced_sol = new BlockVector(rom_block_offsets); // TODO: distribute this in parallel. (*reduced_sol) = 0.0; - Solve(*reduced_rhs, *reduced_sol); + if (reduced_rhs_hypre.Size() > 0) + { + Vector reduced_sol_hypre(reduced_rhs_hypre.Size()); + + // TODO: eliminate global RHS vector in parallel. + for (int i=0; iSize(), ""); + + for (int i=0; iSize(); ++i) + (*reduced_sol)[i] = globalSol(i); + } + else + Solve(*reduced_rhs, *reduced_sol); // 23. reconstruct FOM state + // TODO: distribute this in the parallel case. LiftUpGlobal(*reduced_sol, *U); } @@ -662,7 +710,7 @@ void MFEMROMHandler::NonlinearSolve(Operator &oper, BlockVector* U, Solver *prec Solver *J_solver = NULL; if (linsol_type == SolverType::DIRECT) { - mumps = new MUMPSSolver(MPI_COMM_SELF); + mumps = new MUMPSSolver(MPI_COMM_WORLD); mumps->SetMatrixSymType(mat_type); mumps->SetPrintLevel(jac_print_level); J_solver = mumps; @@ -916,6 +964,241 @@ void MFEMROMHandler::LoadOperatorFromFile(const std::string filename) assert(errf >= 0); } +// TODO: should this be in the base class ROMHandlerBase? +void MFEMROMHandler::LoadBalanceROMBlocks(int rank, int nproc) +{ + MFEM_VERIFY(num_rom_blocks + 1 == rom_block_offsets.Size(), ""); + MFEM_VERIFY(num_rom_blocks >= nproc, ""); + + const int gsize = rom_block_offsets[num_rom_blocks]; + + const int bp = num_rom_blocks / nproc; // Number of blocks per rank + const int ne = num_rom_blocks - (bp * nproc); // Number of ranks needing an extra block + + localSizes.SetSize(nproc); + localSizes = 0; + + localNumBlocks.SetSize(nproc); + localNumBlocks = 0; + + localBlocks[0] = 0; + + int sum = 0; + int bsum = 0; + for (int j=0, i=0; j boffset(nproc+1); + + Array offset(nproc+1); + boffset[0] = 0; + offset[0] = 0; + for (int i=1; i<=nproc; ++i) + { + offset[i] = offset[i - 1] + localSizes[i - 1]; + boffset[i] = boffset[i - 1] + localNumBlocks[i - 1]; + } + + const int loc0 = offset[rank]; + const int loc1 = offset[rank] + localSizes[rank]; + + std::set offd_blocks; + std::set offd_cols; + + const int nblocks = localNumBlocks[rank]; + num_rom_blocks_local = nblocks; + + for (int b=0; bIsZeroBlock(gb, j)) + { + offd_blocks.insert(j); + } + } + } + + int offd_size = 0; + for (auto b : offd_blocks) + { + const int bsize_j = rom_block_offsets[b + 1] - rom_block_offsets[b]; + offd_size += bsize_j; + } + + // The following construction of a HypreParMatrix, by using a SparseMatrix + // for diag and offd, follows the example of + // ParFiniteElementSpace::ParallelDerefinementMatrix + hdiag = new SparseMatrix(localSizes[rank], localSizes[rank]); + SparseMatrix &diag = *hdiag; + hoffd = new SparseMatrix(localSizes[rank], gsize); + SparseMatrix &offd = *hoffd; + + // Set diag and offd + + int localOffset = 0; + for (int b=0; b rows(bsize); + for (int i=0; iIsZeroBlock(gb, j)) + { + const SparseMatrix &block = input_mat->GetBlock(gb, j); + // TODO: this conversion to DenseMatrix is inefficient. If ROM blocks are always + // dense, can we just store them as DenseMatrix instances in the first place? + + DenseMatrix *db = block.ToDenseMatrix(); + + const bool diagBlock = boffset[rank] <= j && j < boffset[rank] + nblocks; + + const int bsize_j = rom_block_offsets[j + 1] - rom_block_offsets[j]; + Array cols(bsize_j); + for (int i=0; i= loc1) == !diagBlock, ""); + + if (cols[i] < loc0 || cols[i] >= loc1) // Off-diagonal + { + offd_cols.insert(cols[i]); + } + + if (diagBlock) + cols[i] -= loc0; + } + + if (diagBlock) + { + // Diagonal block + MFEM_VERIFY(bsize_j == bsize, ""); + diag.AddSubMatrix(rows, cols, *db); + } + else + { + // Off-diagonal block + offd.AddSubMatrix(rows, cols, *db); + } + + delete db; + } + } + } + + const int num_offd_cols = offd_cols.size(); + MFEM_VERIFY(num_offd_cols == offd_size, ""); + cmap = new HYPRE_BigInt[num_offd_cols]; + std::map cmap_inv; + + int cnt = 0; + for (auto col : offd_cols) + { + cmap[cnt] = col; + cmap_inv[col] = cnt; + + cnt++; + } + + MFEM_VERIFY(cnt == num_offd_cols, ""); + + diag.Finalize(); + offd.Finalize(); + + if (num_offd_cols > 0) + { + // Map column indices in offd + int *offI = offd.GetI(); + int *offJ = offd.GetJ(); + + const int ne = offI[localSizes[rank]]; // Total number of entries in offd + + for (int i=0; i starts(2); + starts[0] = offset[rank]; + starts[1] = offset[rank + 1]; + + MFEM_VERIFY(HYPRE_AssumedPartitionCheck(), ""); + + if (nproc == 1) // Serial case + { + // constructor with 4 arguments, v1 + romMat_hypre = new HypreParMatrix(MPI_COMM_WORLD, gsize, + starts.GetData(), &diag); + } + else + { + // constructor with 8+1 arguments + romMat_hypre = new HypreParMatrix(MPI_COMM_WORLD, gsize, gsize, + starts.GetData(), starts.GetData(), + hdiag, hoffd, cmap, true); + romMat_hypre->SetOwnerFlags(romMat_hypre->OwnsDiag(), romMat_hypre->OwnsOffd(), 1); + } + + hypre_start = starts[0]; + + reduced_rhs_hypre.SetSize(starts[1] - starts[0]); + + delete mumps; + mumps = new MUMPSSolver(MPI_COMM_WORLD); + mumps->SetMatrixSymType(mat_type); + mumps->SetOperator(*romMat_hypre); +} + void MFEMROMHandler::SetRomMat(BlockMatrix *input_mat, const bool init_direct_solver) { if (romMat != input_mat) @@ -934,6 +1217,8 @@ void MFEMROMHandler::SetRomMat(BlockMatrix *input_mat, const bool init_direct_so void MFEMROMHandler::SaveRomSystem(const std::string &input_prefix, const std::string type) { + if (topol_handler->GetRank() != 0) return; // Only the root process writes files. + if (!romMat_mono) { assert(romMat); @@ -1017,19 +1302,19 @@ IterativeSolver* MFEMROMHandler::SetIterativeSolver(const MFEMROMHandler::Solver { case (SolverType::CG): { - if (prec_type == "amg") solver = new CGSolver(MPI_COMM_SELF); - else solver = new CGSolver(); + if (prec_type == "amg") solver = new CGSolver(MPI_COMM_WORLD); + else solver = new CGSolver(MPI_COMM_WORLD); break; } case (SolverType::MINRES): { - if (prec_type == "amg") solver = new MINRESSolver(MPI_COMM_SELF); + if (prec_type == "amg") solver = new MINRESSolver(MPI_COMM_WORLD); else solver = new MINRESSolver(); break; } case (SolverType::GMRES): { - if (prec_type == "amg") solver = new GMRESSolver(MPI_COMM_SELF); + if (prec_type == "amg") solver = new GMRESSolver(MPI_COMM_WORLD); else solver = new GMRESSolver(); break; } @@ -1049,9 +1334,6 @@ void MFEMROMHandler::SetupDirectSolver() if ((linsol_type != MFEMROMHandler::SolverType::DIRECT)) return; - assert(romMat_mono); - delete romMat_hypre, mumps; - // TODO: need to change when the actual parallelization is implemented. sys_glob_size = romMat_mono->NumRows(); sys_row_starts[0] = 0; diff --git a/src/rom_nonlinearform.cpp b/src/rom_nonlinearform.cpp index 9b7bf00d..2805e66f 100644 --- a/src/rom_nonlinearform.cpp +++ b/src/rom_nonlinearform.cpp @@ -240,9 +240,11 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const break; } Array &bdr_marker = *bfnfi_marker[k]; + /* MFEM_ASSERT(bdr_marker.Size() == bdr_attr_marker.Size(), "invalid boundary marker for boundary face integrator #" << k << ", counting from zero"); + */ for (int i = 0; i < bdr_attr_marker.Size(); i++) { bdr_attr_marker[i] |= bdr_marker[i]; @@ -520,9 +522,11 @@ Operator& ROMNonlinearForm::GetGradient(const Vector &x) const break; } Array &bdr_marker = *bfnfi_marker[k]; + /* MFEM_ASSERT(bdr_marker.Size() == bdr_attr_marker.Size(), "invalid boundary marker for boundary face integrator #" << k << ", counting from zero"); + */ for (int i = 0; i < bdr_attr_marker.Size(); i++) { bdr_attr_marker[i] |= bdr_marker[i]; @@ -672,9 +676,11 @@ void ROMNonlinearForm::PrecomputeCoefficients() break; } Array &bdr_marker = *bfnfi_marker[k]; + /* MFEM_ASSERT(bdr_marker.Size() == bdr_attr_marker.Size(), "invalid boundary marker for boundary face integrator #" << k << ", counting from zero"); + */ for (int i = 0; i < bdr_attr_marker.Size(); i++) { bdr_attr_marker[i] |= bdr_marker[i]; @@ -774,9 +780,11 @@ void ROMNonlinearForm::TrainEQP(const CAROM::Matrix &snapshots, const double eqp else { Array &bdr_marker = *bfnfi_marker[k]; + /* MFEM_ASSERT(bdr_marker.Size() == bdr_attr_marker.Size(), "invalid boundary marker for boundary face integrator #" << k << ", counting from zero"); + */ for (int i = 0; i < bdr_attr_marker.Size(); i++) { bdr_attr_marker[i] |= bdr_marker[i]; diff --git a/src/sample_generator.cpp b/src/sample_generator.cpp index d58c0e68..8c76fdbc 100644 --- a/src/sample_generator.cpp +++ b/src/sample_generator.cpp @@ -405,7 +405,7 @@ void SampleGenerator::CollectSnapshotsByBasis(const std::string &basis_prefix, CAROM::BasisGenerator *basis_generator = snapshot_generators[index]; for (int s = 0; s < file_list.size(); s++) - basis_generator->loadSamples(file_list[s], "snapshot", 1e9, CAROM::Database::formats::HDF5_MPIO); + basis_generator->loadSamples(file_list[s], "snapshot", 1e9, CAROM::Database::formats::HDF5_MPIO); } void SampleGenerator::CollectSnapshotsByPort( diff --git a/src/stokes_solver.cpp b/src/stokes_solver.cpp index 2ff664a1..e17fb466 100644 --- a/src/stokes_solver.cpp +++ b/src/stokes_solver.cpp @@ -47,10 +47,13 @@ StokesSolver::StokesSolver() fec[1] = new H1_FECollection(porder, dim); } - fes.SetSize(num_var * numSub); - ufes.SetSize(numSub); - pfes.SetSize(numSub); - for (int m = 0; m < numSub; m++) { + fes.SetSize(num_var * numSubStored); + ufes.SetSize(numSubStored); + pfes.SetSize(numSubStored); + // TODO: just construct one fes per mesh type, not all subdomains? + // There are pros and cons to both approaches. + + for (int m = 0; m < numSubStored; m++) { ufes[m] = new FiniteElementSpace(meshes[m], fec[0], dim); pfes[m] = new FiniteElementSpace(meshes[m], fec[1]); // NOTE: ownership is in fes, not ufes and pfes! @@ -143,22 +146,22 @@ void StokesSolver::AddBCFunction(const Vector &F, const int battr) void StokesSolver::InitVariables() { - // number of blocks = solution dimension * number of subdomain; - block_offsets.SetSize(udim * numSub + 1); - var_offsets.SetSize(num_var * numSub + 1); - num_vdofs.SetSize(numSub); - u_offsets.SetSize(numSub + 1); - p_offsets.SetSize(numSub + 1); + // number of blocks = solution dimension * number of subdomains; + block_offsets.SetSize(udim * numSubLoc + 1); + var_offsets.SetSize(num_var * numSubLoc + 1); + num_vdofs.SetSize(numSubLoc); + u_offsets.SetSize(numSubLoc + 1); + p_offsets.SetSize(numSubLoc + 1); block_offsets[0] = 0; var_offsets[0] = 0; u_offsets[0] = 0; p_offsets[0] = 0; - domain_offsets.SetSize(numSub + 1); + domain_offsets.SetSize(numSubLoc + 1); domain_offsets = 0; - for (int m = 0, block_idx = 1, var_idx=1; m < numSub; m++) + for (int m = 0, block_idx = 1, var_idx=1; m < numSubLoc; m++) { for (int v = 0; v < num_var; v++, var_idx++) { @@ -174,7 +177,7 @@ void StokesSolver::InitVariables() p_offsets[m + 1] = pfes[m]->GetVSize(); } block_offsets.PartialSum(); - domain_offsets.GetSubArray(1, numSub, num_vdofs); + domain_offsets.GetSubArray(1, numSubLoc, num_vdofs); var_offsets.PartialSum(); domain_offsets.PartialSum(); u_offsets.PartialSum(); @@ -201,10 +204,10 @@ void StokesSolver::InitVariables() These are system-specific, therefore not defining it now. */ - us.SetSize(num_var * numSub); - vels.SetSize(numSub); - ps.SetSize(numSub); - for (int m = 0; m < numSub; m++) + us.SetSize(num_var * numSubLoc); + vels.SetSize(numSubLoc); + ps.SetSize(numSubLoc); + for (int m = 0; m < numSubLoc; m++) { vels[m] = new GridFunction(ufes[m], U->GetBlock(num_var * m), 0); ps[m] = new GridFunction(pfes[m], U->GetBlock(num_var * m + 1), 0); @@ -217,6 +220,13 @@ void StokesSolver::InitVariables() } f_coeffs.SetSize(0); + + global_offsets.SetSize(nproc + 1); + global_offsets[0] = 0; + MPI_Allgather(&vblock_offsets[2], 1, MPI_INT, + &global_offsets[1], 1, MPI_INT, MPI_COMM_WORLD); + + global_offsets.PartialSum(); } void StokesSolver::DeterminePressureDirichlet() @@ -240,12 +250,12 @@ void StokesSolver::BuildRHSOperators() { SanityCheckOnCoeffs(); - fs.SetSize(numSub); - gs.SetSize(numSub); + fs.SetSize(numSubLoc); + gs.SetSize(numSubLoc); // These are heavily system-dependent. // Based on scalar/vector system, different integrators/coefficients will be used. - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { fs[m] = new LinearForm; fs[m]->Update(ufes[m], RHS->GetBlock(num_var * m), 0); @@ -263,10 +273,10 @@ void StokesSolver::BuildDomainOperators() { SanityCheckOnCoeffs(); - ms.SetSize(numSub); - bs.SetSize(numSub); + ms.SetSize(numSubStored); + bs.SetSize(numSubStored); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { ms[m] = new BilinearForm(ufes[m]); bs[m] = new MixedBilinearFormDGExtension(ufes[m], pfes[m]); @@ -286,8 +296,8 @@ void StokesSolver::BuildDomainOperators() b_itf->AddInterfaceIntegrator(new InterfaceDGNormalFluxIntegrator); // pressure mass matrix for preconditioner. - pms.SetSize(numSub); - for (int m = 0; m < numSub; m++) + pms.SetSize(numSubStored); + for (int m = 0; m < numSubStored; m++) { pms[m] = new BilinearForm(pfes[m]); pms[m]->AddDomainIntegrator(new MassIntegrator); @@ -312,10 +322,10 @@ void StokesSolver::SetupRHSBCOperators() { SanityCheckOnCoeffs(); - assert(fs.Size() == numSub); - assert(gs.Size() == numSub); + assert(fs.Size() == numSubLoc); + assert(gs.Size() == numSubLoc); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { assert(fs[m] && gs[m]); for (int b = 0; b < global_bdr_attributes.Size(); b++) @@ -345,10 +355,10 @@ void StokesSolver::SetupDomainBCOperators() { SanityCheckOnCoeffs(); - assert(ms.Size() == numSub); - assert(bs.Size() == numSub); + assert(ms.Size() == numSubStored); + assert(bs.Size() == numSubStored); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { assert(ms[m] && bs[m]); for (int b = 0; b < global_bdr_attributes.Size(); b++) @@ -378,10 +388,10 @@ void StokesSolver::AssembleRHS() { SanityCheckOnCoeffs(); - assert(fs.Size() == numSub); - assert(gs.Size() == numSub); + assert(fs.Size() == numSubLoc); + assert(gs.Size() == numSubLoc); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubLoc; m++) { assert(fs[m] && gs[m]); fs[m]->Assemble(); @@ -407,20 +417,21 @@ void StokesSolver::AssembleOperatorBase() { SanityCheckOnCoeffs(); - assert(ms.Size() == numSub); - assert(bs.Size() == numSub); + assert(ms.Size() == numSubStored); + assert(bs.Size() == numSubStored); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { assert(ms[m] && bs[m]); ms[m]->Assemble(); bs[m]->Assemble(); } - m_mats.SetSize(numSub, numSub); - b_mats.SetSize(numSub, numSub); - for (int i = 0; i < numSub; i++) - for (int j = 0; j < numSub; j++) + // TODO: do we only need numSubLoc diagonal blocks? + m_mats.SetSize(numSubStored, numSubStored); + b_mats.SetSize(numSubStored, numSubStored); + for (int i = 0; i < numSubStored; i++) + for (int j = 0; j < numSubStored; j++) { if (i == j) { @@ -436,7 +447,7 @@ void StokesSolver::AssembleOperatorBase() AssembleInterfaceMatrices(); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { ms[m]->Finalize(); bs[m]->Finalize(); @@ -447,11 +458,13 @@ void StokesSolver::AssembleOperatorBase() // This is especially true for vector solution, where ordering of component is changed. // This is quite inevitable, but is it desirable? // globalMat = new BlockMatrix(domain_offsets); + mMat = new BlockMatrix(u_offsets); bMat = new BlockMatrix(p_offsets, u_offsets); - for (int i = 0; i < numSub; i++) + + for (int i = 0; i < numSubStored; i++) { - for (int j = 0; j < numSub; j++) + for (int j = 0; j < numSubStored; j++) { if (i != j) { @@ -459,23 +472,256 @@ void StokesSolver::AssembleOperatorBase() b_mats(i, j)->Finalize(); } - mMat->SetBlock(i, j, m_mats(i, j)); - bMat->SetBlock(i, j, b_mats(i, j)); + if (nproc == 1) + { + mMat->SetBlock(i, j, m_mats(i, j)); + bMat->SetBlock(i, j, b_mats(i, j)); + } } } - // global block matrix. - M = mMat->CreateMonolithic(); - B = bMat->CreateMonolithic(); - Bt = Transpose(*B); + // TODO: use HypreParMatrix even if nproc == 1? + if (nproc == 1) + { + // global block matrix. + M = mMat->CreateMonolithic(); + B = bMat->CreateMonolithic(); + Bt = Transpose(*B); + + systemOp = new BlockMatrix(vblock_offsets); + systemOp->SetBlock(0,0, M); + systemOp->SetBlock(0,1, Bt); + systemOp->SetBlock(1,0, B); + } + else + { + CreateHypreParMatrix(); + } +} + +void StokesSolver::SetGlobalOffsets() +{ + // TODO: use point-to-point communication instead of global communication, for better scalability. + + Array allNumSub, disp; + topol_handler->GetAllNumSub(allNumSub); + + disp.SetSize(nproc); + disp[0] = 0; + for (int i=1; iSetBlock(0,0, M); - systemOp->SetBlock(0,1, Bt); - systemOp->SetBlock(1,0, B); + global_u_offsets.SetSize(numSub); + global_p_offsets.SetSize(numSub); + + MPI_Allgatherv(u_offsets.GetData(), numSubLoc, MPI_INT, global_u_offsets.GetData(), allNumSub.GetData(), disp.GetData(), MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv(p_offsets.GetData(), numSubLoc, MPI_INT, global_p_offsets.GetData(), allNumSub.GetData(), disp.GetData(), MPI_INT, MPI_COMM_WORLD); + + const int os_u = u_offsets[numSubLoc]; + global_os_u.SetSize(nproc); + MPI_Allgather(&os_u, 1, MPI_INT, global_os_u.GetData(), 1, MPI_INT, MPI_COMM_WORLD); +} + +void StokesSolver::CreateHypreParMatrix() +{ + const HYPRE_BigInt gsize = global_offsets[nproc]; + Array starts(2); + starts[0] = global_offsets[rank]; + starts[1] = global_offsets[rank + 1]; + + SetGlobalOffsets(); + + const int localSize = starts[1] - starts[0]; + hdiag = new SparseMatrix(localSize, localSize); // Owned by systemOp_hypre + hoffd = new SparseMatrix(localSize, gsize); // Owned by systemOp_hypre + + std::set offd_cols; + + // Blocks are ordered with all u-blocks followed by all p-blocks, for each rank. + const int os_u = u_offsets[numSubLoc]; + + for (int i = 0; i < numSubLoc; i++) + { + for (int j = 0; j < numSubLoc; j++) + { + if (m_mats(i, j)) + { + const int oscol = u_offsets[j]; + for (int r=0; rNumRows(); ++r) + { + Array cols; + Vector srow; + m_mats(i, j)->GetRow(r, cols, srow); + + MFEM_VERIFY(cols.Size() == srow.Size(), ""); + + const int row = u_offsets[i] + r; + for (int l=0; lSet(row, cols[l] + oscol, srow[l]); + } + } + } + + if (b_mats(i, j)) + { + const int oscol_p = os_u + p_offsets[i]; + for (int r=0; rNumRows(); ++r) + { + Array cols; + Vector srow; + b_mats(i, j)->GetRow(r, cols, srow); + + MFEM_VERIFY(cols.Size() == srow.Size(), ""); + + for (int l=0; lSet(u_offsets[j] + cols[l], r + oscol_p, srow[l]); + + // Block (1, 0, B) + hdiag->Set(os_u + p_offsets[i] + r, u_offsets[j] + cols[l], srow[l]); + } + } + } + } + + for (int j = numSubLoc; j < numSubStored; j++) + { + const int globalSub = neighbors[j - numSubLoc]; + const int rank_j = topol_handler->GlobalSubdomainRank(globalSub); + const int gos_j = global_offsets[rank_j]; + + if (m_mats(i, j)) + { + const int oscol = gos_j + global_u_offsets[globalSub]; + for (int r=0; rNumRows(); ++r) + { + Array cols; + Vector srow; + m_mats(i, j)->GetRow(r, cols, srow); + + MFEM_VERIFY(cols.Size() == srow.Size(), ""); + + const int row = u_offsets[i] + r; + for (int l=0; lSet(row, cols[l] + oscol, srow[l]); + offd_cols.insert(cols[l] + oscol); + } + } + } + + if (b_mats(i, j)) + { + MFEM_VERIFY(b_mats(j, i), "Assuming symmetry w.r.t. components"); + + for (int r=0; rNumRows(); ++r) // B_{i,j} + { + Array cols; + Vector srow; + b_mats(i, j)->GetRow(r, cols, srow); + MFEM_VERIFY(cols.Size() == srow.Size(), ""); + + for (int l=0; lSet(r + os_u + p_offsets[i], + cols[l] + gos_j + global_u_offsets[globalSub], + srow[l]); + + offd_cols.insert(cols[l] + gos_j + global_u_offsets[globalSub]); + } + } + + // TODO: can this be optimized by using symmetry, so that B_{i,j} can be used instead of assembling B_{j,i}? + for (int r=0; rNumRows(); ++r) // B_{j,i} + { + Array cols; + Vector srow; + b_mats(j, i)->GetRow(r, cols, srow); + MFEM_VERIFY(cols.Size() == srow.Size(), ""); + + for (int l=0; lSet(u_offsets[i] + cols[l], + r + gos_j + global_os_u[rank_j] + global_p_offsets[globalSub], + srow[l]); + + offd_cols.insert(r + gos_j + global_os_u[rank_j] + global_p_offsets[globalSub]); + } + } + } + } + } + + // TODO: avoid making so many copies of the same matrices. + + const int num_offd_cols = offd_cols.size(); + + hdiag->Finalize(); + hoffd->Finalize(); + + cmap = new HYPRE_BigInt[num_offd_cols]; + std::map cmap_inv; + + int cnt = 0; + for (auto col : offd_cols) + { + cmap[cnt] = col; + cmap_inv[col] = cnt; + + cnt++; + } + + MFEM_VERIFY(cnt == num_offd_cols, ""); + + if (num_offd_cols > 0) + { + // Map column indices in hoffd + int *offI = hoffd->GetI(); + int *offJ = hoffd->GetJ(); + + const int ne = offI[localSize]; // Total number of entries in hoffd + + for (int i=0; iSortColumnIndices(); + } + + hoffd->SetWidth(num_offd_cols); + + // See rom_handler.cpp for another case of using this constructor with 8+1 arguments. + // constructor with 8+1 arguments + systemOp_hypre = new HypreParMatrix(MPI_COMM_WORLD, gsize, gsize, + starts.GetData(), starts.GetData(), + hdiag, hoffd, cmap, true); + + systemOp_hypre->SetOwnerFlags(systemOp_hypre->OwnsDiag(), systemOp_hypre->OwnsOffd(), 1); } void StokesSolver::SetupMUMPSSolver(bool set_oper, const MUMPSSolver::MatType mat_type) +{ + if (nproc == 1) + SetupMUMPSSolverSerial(); + else + SetupMUMPSSolverParallel(); + + mumps->SetMatrixSymType(mat_type); + if (set_oper) mumps->SetOperator(*systemOp_hypre); +} + +void StokesSolver::SetupMUMPSSolverParallel() +{ + mumps = new MUMPSSolver(MPI_COMM_WORLD); +} + +void StokesSolver::SetupMUMPSSolverSerial() { assert(systemOp); delete systemOp_mono; @@ -491,18 +737,16 @@ void StokesSolver::SetupMUMPSSolver(bool set_oper, const MUMPSSolver::MatType ma systemOp_hypre = new HypreParMatrix(MPI_COMM_SELF, sys_glob_size, sys_row_starts, systemOp_mono); mumps = new MUMPSSolver(MPI_COMM_SELF); - mumps->SetMatrixSymType(mat_type); - if (set_oper) mumps->SetOperator(*systemOp_hypre); } void StokesSolver::SetupPressureMassMatrix() { - assert(pms.Size() == numSub); + assert(pms.Size() == numSubStored); delete pmMat; delete pM; pmMat = new BlockMatrix(p_offsets); - for (int m = 0; m < numSub; m++) + for (int m = 0; m < numSubStored; m++) { assert(pms[m]); pms[m]->Assemble(); @@ -715,12 +959,12 @@ bool StokesSolver::Solve(SampleGenerator *sample_generator) int print_level = config.GetOption("solver/print_level", 0); // same size as var_offsets, but sorted by variables first (then by subdomain). - Array offsets_byvar(num_var * numSub + 1); + Array offsets_byvar(num_var * numSubLoc + 1); offsets_byvar = 0; - for (int k = 0; k < numSub; k++) + for (int k = 0; k < numSubLoc; k++) { offsets_byvar[k+1] = u_offsets[k+1]; - offsets_byvar[k+1 + numSub] = p_offsets[k+1] + u_offsets.Last(); + offsets_byvar[k+1 + numSubLoc] = p_offsets[k+1] + u_offsets.Last(); } // sort out solution/rhs by variables. @@ -1445,4 +1689,4 @@ void StokesSolver::ComputeBEIntegral( Qvec *= Tr.Weight() * ip.weight; result += Qvec; } -} \ No newline at end of file +} diff --git a/src/topology_handler.cpp b/src/topology_handler.cpp index 3f5670a4..ee4cce47 100644 --- a/src/topology_handler.cpp +++ b/src/topology_handler.cpp @@ -31,6 +31,10 @@ const TopologyHandlerMode SetTopologyHandlerMode() TopologyHandler::TopologyHandler(const TopologyHandlerMode &input_type) : type(input_type) { + comm = MPI_COMM_WORLD; + MPI_Comm_size(comm, &nprocs); + MPI_Comm_rank(comm, &rank); + std::string dd_mode_str = config.GetOption("domain-decomposition/type", "interior_penalty"); if (dd_mode_str == "interior_penalty") { @@ -163,6 +167,125 @@ void TopologyHandler::PrintInterfaceInfo(const int k) } } +//#define PARTITION2D + +void TopologyHandler::LoadBalance() +{ + // TODO: balance the number of DOFs per rank, rather than the number of subdomains. + const int nloc = numSub / nprocs; // Number of subdomains per rank (may be low due to integer division) + const int ne = numSub - (nloc * nprocs); // Number of ranks assigned an extra subdomain + + subdomain_rank.SetSize(numSub); + allNumSub.SetSize(nprocs); + +#ifdef PARTITION2D + const int ns1 = sqrt(numSub); + MFEM_VERIFY(ns1 * ns1 == numSub, ""); + MFEM_VERIFY(nloc * nprocs == numSub, ""); + + const int np1 = sqrt(nprocs); + MFEM_VERIFY(np1 * np1 == nprocs, ""); + + const int nloc1 = ns1 / np1; + MFEM_VERIFY(np1 * nloc1 == ns1, ""); + MFEM_VERIFY(nloc1 * nloc1 == nloc, ""); +#endif + + int os = 0; +#ifdef PARTITION2D + for (int l=0; l &ns) +{ + ns = allNumSub; +} + +void TopologyHandler::FindPortNeighborSubdomains() +{ + MFEM_VERIFY(port_infos.Size() == num_ports, ""); + + subNeighbors.clear(); + for (auto pi : port_infos) + { + std::array meshIndex = {pi.Mesh1, pi.Mesh2}; + bool localPort = false; + for (auto m : meshIndex) + { + if (LocalSubdomainIndex(m) >= 0) // if subdomain is local + localPort = true; + } + + if (!localPort) continue; + + for (auto m : meshIndex) + { + if (LocalSubdomainIndex(m) == -1) + subNeighbors.insert(m); + } + } +} + +void TopologyHandler::GetNeighbors(Array &neighbors) +{ + neighbors.SetSize(subNeighbors.size()); + + int cnt = 0; + for (auto n : subNeighbors) + { + neighbors[cnt++] = n; + } + + MFEM_VERIFY(cnt == neighbors.Size(), ""); +} + /* SubMeshTopologyHandler */ @@ -212,6 +335,7 @@ SubMeshTopologyHandler::SubMeshTopologyHandler(Mesh* pmesh_) /* for SubMeshTopologyHandler, each subdomain corresponds to a unique component. */ num_comp = numSub; + numSubLoc = numSub; /* inidividual subdomains are unique */ mesh_types.SetSize(numSub); diff --git a/test/test_topol.cpp b/test/test_topol.cpp index ca0924ce..4119ea98 100644 --- a/test/test_topol.cpp +++ b/test/test_topol.cpp @@ -109,8 +109,11 @@ TEST(PortReadWrite_test, Test_topol) int main(int argc, char* argv[]) { + MPI_Init(&argc, &argv); ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS(); + MPI_Finalize(); + return result; } void CompareWithSubMesh() @@ -155,4 +158,4 @@ void CompareWithSubMesh() } } return; -} \ No newline at end of file +}