diff --git a/bin/main.cpp b/bin/main.cpp index 4522a05b..e84e19a3 100644 --- a/bin/main.cpp +++ b/bin/main.cpp @@ -28,11 +28,13 @@ int main(int argc, char *argv[]) { if (rank == 0) RunExample(); } else { - if (mode == "sample_generation") GenerateSamples(MPI_COMM_WORLD); - else if (mode == "build_rom") BuildROM(MPI_COMM_WORLD); - else if (mode == "train_rom") TrainROM(MPI_COMM_WORLD); - else if (mode == "train_eqp") TrainEQP(MPI_COMM_WORLD); - else if (mode == "single_run") double dump = SingleRun(MPI_COMM_WORLD, output_file); + if (mode == "sample_generation") GenerateSamples(MPI_COMM_WORLD); + else if (mode == "build_rom") BuildROM(MPI_COMM_WORLD); + else if (mode == "train_rom") TrainROM(MPI_COMM_WORLD); + else if (mode == "aux_train_rom") AuxiliaryTrainROM(MPI_COMM_WORLD); + else if (mode == "train_eqp") TrainEQP(MPI_COMM_WORLD); + else if (mode == "single_run") SingleRun(MPI_COMM_WORLD, output_file); + else if (mode == "print_eqp") PrintEQPCoords(MPI_COMM_WORLD); else { if (rank == 0) printf("Unknown mode %s!\n", mode.c_str()); diff --git a/include/etc.hpp b/include/etc.hpp index f144f62a..9aad33e9 100644 --- a/include/etc.hpp +++ b/include/etc.hpp @@ -6,6 +6,7 @@ #define ETC_HPP #include "mfem.hpp" +#include "input_parser.hpp" using namespace mfem; using namespace std; @@ -38,4 +39,113 @@ std::string string_format( const std::string& format, Args ... args ) return std::string( buf.get(), buf.get() + size - 1 ); // We don't want the '\0' inside } +class TimeProfiler +{ +private: + std::string title = ""; + + Array timers; + + Array calls; + Array starts; + std::vector names; + std::unordered_map indices; + + const MPI_Comm comm; + int rank; + +public: + TimeProfiler(const std::string &title_, MPI_Comm comm_=MPI_COMM_WORLD) + : title(title_), comm(comm_) + { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + calls.SetSize(0); + names.clear(); + indices.clear(); + } + + virtual ~TimeProfiler() + { + std::string option = "time_profile/" + title; + if (config.GetOption(option, false)) + Print(title); + DeletePointers(timers); + } + + void SetTitle(const std::string &title_) + { title = title_; } + + void Start(const std::string &name) + { + if (!indices.count(name)) + { + indices[name] = timers.Size(); + timers.Append(new StopWatch); + names.push_back(name); + calls.Append(0); + starts.Append(false); + } + + assert(indices.count(name)); + + int idx = indices[name]; + if (starts[idx]) + { + printf("TimeProfiler: %s is already started!\n", name.c_str()); + assert(!starts[idx]); + } + + timers[idx]->Start(); + starts[idx] = true; + } + + void Stop(const std::string &name) + { + assert(indices.count(name)); + + int idx = indices[name]; + if (!starts[idx]) + { + printf("TimeProfiler: %s is not started!\n", name.c_str()); + assert(starts[idx]); + } + + timers[idx]->Stop(); + calls[idx] += 1; + starts[idx] = false; + } + + void Print(const std::string &title_, const bool compute_sum=false) + { + int nfunc = timers.Size(); + for (int k = 0; k < nfunc; k++) + assert(!starts[k]); + + Array times(nfunc); + for (int k = 0; k < nfunc; k++) + times[k] = timers[k]->RealTime(); + double total = times.Sum(); + + MPI_Reduce(MPI_IN_PLACE, times.GetData(), nfunc, MPI_DOUBLE, MPI_SUM, 0, comm); + MPI_Reduce(MPI_IN_PLACE, calls.GetData(), nfunc, MPI_INT, MPI_SUM, 0, comm); + + if (rank == 0) + { + printf("%s", (title_ + "\n").c_str()); + + std::string line = std::string(100, '='); + line += "\n"; + printf("%s", line.c_str()); + printf("%20s\t%20s\t%20s\t%20s\n", "Function", "Total time", "Calls", "Time per call"); + for (int k = 0; k < nfunc; k++) + { + printf("%20s\t%20.5e\t%20d\t%20.5e\n", names[k].c_str(), times[k], calls[k], times[k] / calls[k]); + } + if (compute_sum) + printf("%20s\t%20.5e\t%20d\t%20.5e\n", "Total time", total, 1, total); + printf("%s", line.c_str()); + } + } +}; + #endif diff --git a/include/interface_form.hpp b/include/interface_form.hpp index 515960b5..24ffdf6a 100644 --- a/include/interface_form.hpp +++ b/include/interface_form.hpp @@ -16,6 +16,8 @@ namespace mfem class InterfaceForm { protected: + mutable TimeProfiler timer; + int numSub = -1; int skip_zeros = 1; diff --git a/include/interfaceinteg.hpp b/include/interfaceinteg.hpp index a48f69c7..4f251372 100644 --- a/include/interfaceinteg.hpp +++ b/include/interfaceinteg.hpp @@ -7,6 +7,7 @@ #include "mfem.hpp" #include "hyperreduction_integ.hpp" +#include "etc.hpp" namespace mfem { @@ -298,9 +299,13 @@ class DGLaxFriedrichsFluxIntegrator : public InterfaceNonlinearFormIntegrator DenseMatrix udof1, udof2, elv1, elv2; DenseMatrix elmat_comp11, elmat_comp12, elmat_comp21, elmat_comp22; + mutable TimeProfiler timer; + public: DGLaxFriedrichsFluxIntegrator(Coefficient &q, VectorCoefficient *ud = NULL, const IntegrationRule *ir = NULL) - : InterfaceNonlinearFormIntegrator(ir), Q(&q), UD(ud) {} + : InterfaceNonlinearFormIntegrator(ir), Q(&q), UD(ud), timer("DGLaxFriedrichsFluxIntegrator") {} + + virtual ~DGLaxFriedrichsFluxIntegrator() {}; void AssembleFaceVector(const FiniteElement &el1, const FiniteElement &el2, diff --git a/include/main_workflow.hpp b/include/main_workflow.hpp index 946701a2..c08684b7 100644 --- a/include/main_workflow.hpp +++ b/include/main_workflow.hpp @@ -24,7 +24,7 @@ void CollectSamplesByBasis(SampleGenerator *sample_generator, const std::string void BuildROM(MPI_Comm comm); void TrainROM(MPI_Comm comm); // supremizer-enrichment etc.. -void AuxiliaryTrainROM(MPI_Comm comm, SampleGenerator *sample_generator); +void AuxiliaryTrainROM(MPI_Comm comm); // EQP training, could include hypre-reduction optimization. void TrainEQP(MPI_Comm comm); // Input parsing routine to list out all snapshot files for training a basis. @@ -32,4 +32,7 @@ void FindSnapshotFilesForBasis(const BasisTag &basis_tag, const std::string &def // return relative error if comparing solution. double SingleRun(MPI_Comm comm, const std::string output_file = ""); +// Auxiliary function to print out EQP point coordinates +void PrintEQPCoords(MPI_Comm comm); + #endif diff --git a/include/multiblock_solver.hpp b/include/multiblock_solver.hpp index c215ef49..ea327e0d 100644 --- a/include/multiblock_solver.hpp +++ b/include/multiblock_solver.hpp @@ -260,6 +260,8 @@ friend class ParameterizedProblem; void ComputeRelativeError(Array fom_sols, Array rom_sols, Vector &error); void CompareSolution(BlockVector &test_U, Vector &error); + virtual void SaveEQPCoords(const std::string &filename) {} + protected: virtual void AssembleROMMat(BlockMatrix &romMat); }; diff --git a/include/rom_interfaceform.hpp b/include/rom_interfaceform.hpp index e091fbd7..4193c71c 100644 --- a/include/rom_interfaceform.hpp +++ b/include/rom_interfaceform.hpp @@ -10,6 +10,7 @@ #include "hdf5_utils.hpp" #include "hyperreduction_integ.hpp" #include "linalg/NNLS.h" +#include "etc.hpp" namespace mfem { diff --git a/include/rom_nonlinearform.hpp b/include/rom_nonlinearform.hpp index 0ec1c360..2bbe8850 100644 --- a/include/rom_nonlinearform.hpp +++ b/include/rom_nonlinearform.hpp @@ -16,9 +16,11 @@ namespace mfem class ROMNonlinearForm : public NonlinearForm { +// private: +// static const int Nt = 6; +// mutable StopWatch *jac_timers[Nt]; private: - static const int Nt = 6; - mutable StopWatch *jac_timers[Nt]; + mutable TimeProfiler timer; protected: /// ROM basis for projection. @@ -231,6 +233,8 @@ class ROMNonlinearForm : public NonlinearForm The state @a x must be a true-dof vector. */ virtual Operator &GetGradient(const Vector &x) const; + void SaveDomainEQPCoords(const int k, hid_t file_id, const std::string &dsetname); + private: void PrecomputeDomainEQPSample(const IntegrationRule &ir, const DenseMatrix &basis, EQPSample &eqp_sample); void PrecomputeFaceEQPSample(const IntegrationRule &ir, const DenseMatrix &basis, diff --git a/include/steady_ns_solver.hpp b/include/steady_ns_solver.hpp index 13ee520d..c5bb02c8 100644 --- a/include/steady_ns_solver.hpp +++ b/include/steady_ns_solver.hpp @@ -8,6 +8,7 @@ #include "stokes_solver.hpp" #include "rom_nonlinearform.hpp" #include "rom_interfaceform.hpp" +#include "etc.hpp" // By convention we only use mfem namespace as default, not CAROM. using namespace mfem; @@ -17,6 +18,9 @@ using namespace mfem; // Ultimately, we should implement InterfaceForm to pass, not the MultiBlockSolver itself. class SteadyNSOperator : public Operator { +private: + mutable TimeProfiler timer; + protected: bool direct_solve; @@ -99,6 +103,9 @@ class SteadyNSTensorROM : public SteadyNSROM class SteadyNSEQPROM : public SteadyNSROM { +private: + mutable TimeProfiler timer; + protected: Array hs; // not owned by SteadyNSEQPROM. ROMInterfaceForm *itf = NULL; // not owned by SteadyNSEQPROM. @@ -189,6 +196,8 @@ friend class SteadyNSOperator; virtual void LoadROMNlinElems(const std::string &input_prefix) override; virtual void AssembleROMNlinOper() override; + void SaveEQPCoords(const std::string &filename) override; + private: DenseTensor* GetReducedTensor(DenseMatrix *basis, FiniteElementSpace *fespace); diff --git a/include/unsteady_ns_solver.hpp b/include/unsteady_ns_solver.hpp index fd3e367d..c579b57a 100644 --- a/include/unsteady_ns_solver.hpp +++ b/include/unsteady_ns_solver.hpp @@ -16,6 +16,9 @@ class UnsteadyNSSolver : public SteadyNSSolver friend class ParameterizedProblem; friend class SteadyNSOperator; +private: + mutable TimeProfiler timer; + protected: // number of timesteps @@ -67,6 +70,9 @@ friend class SteadyNSOperator; Array rom_u_offsets; BlockMatrix *rom_mass = NULL; +private: + double times[10]; + public: UnsteadyNSSolver(); diff --git a/src/interface_form.cpp b/src/interface_form.cpp index 4839a40d..5eae5853 100644 --- a/src/interface_form.cpp +++ b/src/interface_form.cpp @@ -12,7 +12,8 @@ namespace mfem InterfaceForm::InterfaceForm( Array &meshes_, Array &fes_, TopologyHandler *topol_) - : meshes(meshes_), fes(fes_), topol_handler(topol_), numSub(meshes_.Size()) + : meshes(meshes_), fes(fes_), topol_handler(topol_), + numSub(meshes_.Size()), timer("InterfaceForm") { assert(fes_.Size() == numSub); @@ -65,6 +66,9 @@ void InterfaceForm::AssembleInterfaceMatrices(Array2D &mats) con void InterfaceForm::InterfaceAddMult(const Vector &x, Vector &y) const { + timer.Start("Mult"); + timer.Start("Mult/init"); + x_tmp.Update(const_cast(x), block_offsets); y_tmp.Update(y, block_offsets); @@ -72,8 +76,12 @@ void InterfaceForm::InterfaceAddMult(const Vector &x, Vector &y) const Mesh *mesh1, *mesh2; FiniteElementSpace *fes1, *fes2; + timer.Stop("Mult/init"); + for (int p = 0; p < topol_handler->GetNumPorts(); p++) { + timer.Start("Mult/port-init"); + const PortInfo *pInfo = topol_handler->GetPortInfo(p); midx[0] = pInfo->Mesh1; @@ -86,17 +94,29 @@ void InterfaceForm::InterfaceAddMult(const Vector &x, Vector &y) const fes2 = fes[midx[1]]; Array* const interface_infos = topol_handler->GetInterfaceInfos(p); + + timer.Stop("Mult/port-init"); + AssembleInterfaceVector(mesh1, mesh2, fes1, fes2, interface_infos, x_tmp.GetBlock(midx[0]), x_tmp.GetBlock(midx[1]), y_tmp.GetBlock(midx[0]), y_tmp.GetBlock(midx[1])); } // for (int p = 0; p < topol_handler->GetNumPorts(); p++) + timer.Start("Mult/final"); + for (int i=0; i < y_tmp.NumBlocks(); ++i) y_tmp.GetBlock(i).SyncAliasMemory(y); + + timer.Stop("Mult/final"); + timer.Stop("Mult"); } void InterfaceForm::InterfaceGetGradient(const Vector &x, Array2D &mats) const { + timer.Start("GetGradient"); + + timer.Start("Grad/init"); + assert(mats.NumRows() == numSub); assert(mats.NumCols() == numSub); for (int i = 0; i < numSub; i++) @@ -110,8 +130,12 @@ void InterfaceForm::InterfaceGetGradient(const Vector &x, Array2DGetNumPorts(); p++) { + timer.Start("Grad/port-init"); + const PortInfo *pInfo = topol_handler->GetPortInfo(p); midx[0] = pInfo->Mesh1; @@ -128,9 +152,12 @@ void InterfaceForm::InterfaceGetGradient(const Vector &x, Array2D* const interface_infos = topol_handler->GetInterfaceInfos(p); + timer.Stop("Grad/port-init"); AssembleInterfaceGrad(mesh1, mesh2, fes1, fes2, interface_infos, x_tmp.GetBlock(midx[0]), x_tmp.GetBlock(midx[1]), mats_p); } // for (int p = 0; p < topol_handler->GetNumPorts(); p++) + + timer.Stop("GetGradient"); } void InterfaceForm::AssembleInterfaceMatrix( @@ -237,10 +264,16 @@ void InterfaceForm::AssembleInterfaceVector(Mesh *mesh1, Mesh *mesh2, for (int bn = 0; bn < interface_infos->Size(); bn++) { + timer.Start("topol"); + InterfaceInfo *if_info = &((*interface_infos)[bn]); topol_handler->GetInterfaceTransformations(mesh1, mesh2, if_info, tr1, tr2); + timer.Stop("topol"); + + timer.Start("assemble"); + if ((tr1 != NULL) && (tr2 != NULL)) { fes1->GetElementVDofs(tr1->Elem1No, vdofs1); @@ -257,12 +290,16 @@ void InterfaceForm::AssembleInterfaceVector(Mesh *mesh1, Mesh *mesh2, { assert(fnfi[itg]); + timer.Start("assemble-itf-vec"); fnfi[itg]->AssembleInterfaceVector(*fe1, *fe2, *tr1, *tr2, el_x1, el_x2, el_y1, el_y2); + timer.Stop("assemble-itf-vec"); y1.AddElementVector(vdofs1, el_y1); y2.AddElementVector(vdofs2, el_y2); } } // if ((tr1 != NULL) && (tr2 != NULL)) + + timer.Stop("assemble"); } // for (int bn = 0; bn < interface_infos.Size(); bn++) } @@ -270,6 +307,8 @@ void InterfaceForm::AssembleInterfaceGrad(Mesh *mesh1, Mesh *mesh2, FiniteElementSpace *fes1, FiniteElementSpace *fes2, Array *interface_infos, const Vector &x1, const Vector &x2, Array2D &mats) const { + timer.Start("Grad/assemble-init"); + assert(x1.Size() == fes1->GetTrueVSize()); assert(x2.Size() == fes2->GetTrueVSize()); @@ -281,14 +320,22 @@ void InterfaceForm::AssembleInterfaceGrad(Mesh *mesh1, Mesh *mesh2, Array vdofs1, vdofs2; Vector el_x1, el_x2, el_y1, el_y2; + timer.Stop("Grad/assemble-init"); + for (int bn = 0; bn < interface_infos->Size(); bn++) { + timer.Start("Grad/topol"); + InterfaceInfo *if_info = &((*interface_infos)[bn]); topol_handler->GetInterfaceTransformations(mesh1, mesh2, if_info, tr1, tr2); + timer.Stop("Grad/topol"); + if ((tr1 != NULL) && (tr2 != NULL)) { + timer.Start("Grad/assemble"); + fes1->GetElementVDofs(tr1->Elem1No, vdofs1); fes2->GetElementVDofs(tr2->Elem1No, vdofs2); @@ -310,6 +357,8 @@ void InterfaceForm::AssembleInterfaceGrad(Mesh *mesh1, Mesh *mesh2, mats(1, 0)->AddSubMatrix(vdofs2, vdofs1, *elemmats(1, 0), skip_zeros); mats(1, 1)->AddSubMatrix(vdofs2, vdofs2, *elemmats(1, 1), skip_zeros); } + + timer.Stop("Grad/assemble"); } // if ((tr1 != NULL) && (tr2 != NULL)) } // for (int bn = 0; bn < interface_infos.Size(); bn++) diff --git a/src/interfaceinteg.cpp b/src/interfaceinteg.cpp index ae9d7033..e791a07c 100644 --- a/src/interfaceinteg.cpp +++ b/src/interfaceinteg.cpp @@ -5,6 +5,7 @@ #include "interfaceinteg.hpp" #include "etc.hpp" #include "linalg_utils.hpp" +#include "input_parser.hpp" // #include // #include @@ -1397,6 +1398,8 @@ void DGLaxFriedrichsFluxIntegrator::AssembleQuadVectorBase( const DenseMatrix &elfun1, const DenseMatrix &elfun2, DenseMatrix &elvect1, DenseMatrix &elvect2) { + timer.Start("fomvec_interface/eval"); + assert(Tr1); assert(elfun1.NumRows() == shape1.Size()); assert(elfun1.NumCols() == u1.Size()); @@ -1448,12 +1451,17 @@ void DGLaxFriedrichsFluxIntegrator::AssembleQuadVectorBase( ComputeFluxDotN(u1, u2, nor, eval2, flux); + timer.Stop("fomvec_interface/eval"); + timer.Start("fomvec_interface/assemble"); + w = iw; if (Q) { w *= Q->Eval(*Tr1, ip); } AddMult_a_VWt(-w, shape1, flux, elvect1); if (ndofs2) AddMult_a_VWt(w, shape2, flux, elvect2); + + timer.Stop("fomvec_interface/assemble"); } void DGLaxFriedrichsFluxIntegrator::AssembleQuadGradBase( @@ -1527,6 +1535,8 @@ void DGLaxFriedrichsFluxIntegrator::AssembleFaceVector( const FiniteElement &el1, const FiniteElement &el2, FaceElementTransformations &Tr, const Vector &elfun, Vector &elvect) { + timer.Start("AssembleFaceVector_interior"); + dim = el1.GetDim(); ndofs1 = el1.GetDof(); ndofs2 = (Tr.Elem2No >= 0) ? el2.GetDof() : 0; @@ -1564,12 +1574,16 @@ void DGLaxFriedrichsFluxIntegrator::AssembleFaceVector( AssembleQuadVectorBase(el1, el2, &Tr, NULL, ip, ip.weight, ndofs2, udof1, udof2, elv1, elv2); } + + timer.Stop("AssembleFaceVector_interior"); } void DGLaxFriedrichsFluxIntegrator::AssembleFaceGrad( const FiniteElement &el1, const FiniteElement &el2, FaceElementTransformations &Tr, const Vector &elfun, DenseMatrix &elmat) { + timer.Start("AssembleFaceGrad_interior"); + dim = el1.GetDim(); ndofs1 = el1.GetDof(); ndofs2 = (Tr.Elem2No >= 0) ? el2.GetDof() : 0; @@ -1627,6 +1641,8 @@ void DGLaxFriedrichsFluxIntegrator::AssembleFaceGrad( } } } // for (int pind = 0; pind < ir->GetNPoints(); ++pind) + + timer.Stop("AssembleFaceGrad_interior"); } void DGLaxFriedrichsFluxIntegrator::AssembleQuadratureVector( @@ -1712,6 +1728,9 @@ void DGLaxFriedrichsFluxIntegrator::AssembleQuadratureGrad( void DGLaxFriedrichsFluxIntegrator::AddAssembleVector_Fast( const EQPSample &eqp_sample, FaceElementTransformations &T, const Vector &x, Vector &y) { + timer.Start("eqpvec_Fast_interior"); + timer.Start("eqpvec_Fast_interior/eval"); + const IntegrationPoint &ip = GetIntegrationRule()->IntPoint(eqp_sample.info.qp); const double qw = eqp_sample.info.qw; DenseMatrix *shapes1 = eqp_sample.shape1; @@ -1750,17 +1769,25 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleVector_Fast( ComputeFluxDotN(u1, u2, nor, eval2, flux); + timer.Stop("eqpvec_Fast_interior/eval"); + timer.Start("eqpvec_Fast_interior/assemble"); + w = qw; if (Q) { w *= Q->Eval(T, ip); } assert(y.Size() == x.Size()); shapes1->AddMultTranspose(flux, y, -w); if (el2) shapes2->AddMultTranspose(flux, y, w); + + timer.Stop("eqpvec_Fast_interior/assemble"); + timer.Stop("eqpvec_Fast_interior"); } void DGLaxFriedrichsFluxIntegrator::AddAssembleGrad_Fast( const EQPSample &eqp_sample, FaceElementTransformations &T, const Vector &x, DenseMatrix &jac) { + timer.Start("eqpgrad_Fast_interior"); + const IntegrationPoint &ip = GetIntegrationRule()->IntPoint(eqp_sample.info.qp); const double qw = eqp_sample.info.qw; DenseMatrix *shapes1 = eqp_sample.shape1; @@ -1812,6 +1839,8 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleGrad_Fast( AddwRtAP(*shapes1, gradu2, *shapes2, jac, -w); AddwRtAP(*shapes2, gradu2, *shapes2, jac, w); } + + timer.Stop("eqpgrad_Fast_interior"); } void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceVector( @@ -1820,6 +1849,10 @@ void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceVector( const Vector &elfun1, const Vector &elfun2, Vector &elvect1, Vector &elvect2) { + timer.Start("fomvec_interface"); + + timer.Start("fomvec_interface/init"); + dim = el1.GetDim(); ndofs1 = el1.GetDof(); ndofs2 = el2.GetDof(); @@ -1848,6 +1881,8 @@ void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceVector( ir = &IntRules.Get(Tr1.GetGeometryType(), order); } + timer.Stop("fomvec_interface/init"); + elvect1 = 0.0; elvect2 = 0.0; for (int pind = 0; pind < ir->GetNPoints(); ++pind) { @@ -1855,6 +1890,8 @@ void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceVector( AssembleQuadVectorBase(el1, el2, &Tr1, &Tr2, ip, ip.weight, ndofs2, udof1, udof2, elv1, elv2); } + + timer.Stop("fomvec_interface"); } void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceGrad( @@ -1862,6 +1899,10 @@ void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceGrad( FaceElementTransformations &Tr1, FaceElementTransformations &Tr2, const Vector &elfun1, const Vector &elfun2, Array2D &elmats) { + timer.Start("fomgrad_interface"); + + timer.Start("fomgrad_interface/init"); + assert(elmats.NumRows() == 2); assert(elmats.NumCols() == 2); for (int i = 0; i < 2; i++) @@ -1905,13 +1946,20 @@ void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceGrad( for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) *(elmats(i, j)) = 0.0; + timer.Stop("fomgrad_interface/init"); + for (int pind = 0; pind < ir->GetNPoints(); ++pind) { + timer.Start("fomgrad_interface/eval"); + const IntegrationPoint &ip = ir->IntPoint(pind); AssembleQuadGradBase(el1, el2, &Tr1, &Tr2, ip, ip.weight, ndofs2, udof1, udof2, w, gradu1, gradu2, elmat_comp11, elmat_comp12, elmat_comp21, elmat_comp22); + timer.Stop("fomgrad_interface/eval"); + timer.Start("fomgrad_interface/assemble"); + for (int di = 0; di < dim; di++) { for (int dj = 0; dj < dim; dj++) @@ -1925,7 +1973,11 @@ void DGLaxFriedrichsFluxIntegrator::AssembleInterfaceGrad( } } } + + timer.Stop("fomgrad_interface/assemble"); } // for (int pind = 0; pind < ir->GetNPoints(); ++pind) + + timer.Stop("fomgrad_interface"); } void DGLaxFriedrichsFluxIntegrator::AssembleQuadratureVector( @@ -2023,6 +2075,9 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleVector_Fast( FaceElementTransformations &Tr2, const Vector &x1, const Vector &x2, Vector &y1, Vector &y2) { + timer.Start("eqpvec_Fast_interface"); + timer.Start("eqpvec_Fast_interior/init"); + const IntegrationRule *ir = GetIntegrationRule(); const IntegrationPoint &ip = ir->IntPoint(eqp_sample.info.qp); const double qw = eqp_sample.info.qw; @@ -2041,6 +2096,9 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleVector_Fast( const IntegrationPoint &eip1 = Tr1.GetElement1IntPoint(); // const IntegrationPoint &eip2 = T.GetElement2IntPoint(); + timer.Stop("eqpvec_Fast_interior/init"); + timer.Start("eqpvec_Fast_interior/eval"); + shapes1->Mult(x1, u1); shapes2->Mult(x2, u2); @@ -2055,6 +2113,9 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleVector_Fast( ComputeFluxDotN(u1, u2, nor, true, flux); + timer.Stop("eqpvec_Fast_interior/eval"); + timer.Start("eqpvec_Fast_interior/assemble"); + w = qw; if (Q) { w *= Q->Eval(Tr1, ip); } @@ -2062,6 +2123,9 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleVector_Fast( assert(y2.Size() == x2.Size()); shapes1->AddMultTranspose(flux, y1, -w); shapes2->AddMultTranspose(flux, y2, w); + + timer.Stop("eqpvec_Fast_interior/assemble"); + timer.Stop("eqpvec_Fast_interface"); } void DGLaxFriedrichsFluxIntegrator::AddAssembleGrad_Fast( @@ -2069,6 +2133,9 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleGrad_Fast( FaceElementTransformations &Tr2, const Vector &x1, const Vector &x2, Array2D &jac) { + timer.Start("eqpgrad_Fast_interface"); + timer.Start("eqpgrad_Fast_interface/init"); + const IntegrationRule *ir = GetIntegrationRule(); const IntegrationPoint &ip = ir->IntPoint(eqp_sample.info.qp); const double qw = eqp_sample.info.qw; @@ -2089,6 +2156,9 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleGrad_Fast( const IntegrationPoint &eip1 = Tr1.GetElement1IntPoint(); // const IntegrationPoint &eip2 = T.GetElement2IntPoint(); + timer.Stop("eqpgrad_Fast_interface/init"); + timer.Start("eqpgrad_Fast_interface/eval"); + shapes1->Mult(x1, u1); shapes2->Mult(x2, u2); @@ -2108,10 +2178,32 @@ void DGLaxFriedrichsFluxIntegrator::AddAssembleGrad_Fast( if (Q) w *= Q->Eval(Tr1, ip); + timer.Stop("eqpgrad_Fast_interface/eval"); +// { +// Array2D test_jac(2, 2); +// for (int i = 0; i < 2; i++) +// for (int j = 0; j < 2; j++) +// test_jac(i, j) = new DenseMatrix(jac(i, j)->Height(), jac(i, j)->Width()); + +// timer.Start("eqpgrad_Fast_interface/assemble_test"); +// AddwRtAP(*shapes1, gradu1, *shapes1, *test_jac(0, 0), -w); +// AddwRtAP(*shapes2, gradu1, *shapes1, *test_jac(1, 0), w); +// AddwRtAP(*shapes1, gradu2, *shapes2, *test_jac(0, 1), -w); +// AddwRtAP(*shapes2, gradu2, *shapes2, *test_jac(1, 1), w); +// timer.Stop("eqpgrad_Fast_interface/assemble_test"); + +// DeletePointers(test_jac); +// } + timer.Start("eqpgrad_Fast_interface/assemble"); + AddwRtAP(*shapes1, gradu1, *shapes1, *jac(0, 0), -w); AddwRtAP(*shapes2, gradu1, *shapes1, *jac(1, 0), w); AddwRtAP(*shapes1, gradu2, *shapes2, *jac(0, 1), -w); AddwRtAP(*shapes2, gradu2, *shapes2, *jac(1, 1), w); + + timer.Stop("eqpgrad_Fast_interface/assemble"); + + timer.Stop("eqpgrad_Fast_interface"); } } diff --git a/src/main_workflow.cpp b/src/main_workflow.cpp index 6a1202c5..cec992a6 100644 --- a/src/main_workflow.cpp +++ b/src/main_workflow.cpp @@ -121,6 +121,7 @@ std::vector GetGlobalBasisTagList(const TopologyHandlerMode &topol_mod else if (solver_type == "stokes") var_list = StokesSolver::GetVariableNames(); else if (solver_type == "steady-ns") var_list = SteadyNSSolver::GetVariableNames(); else if (solver_type == "linelast") var_list = LinElastSolver::GetVariableNames(); + else if (solver_type == "unsteady-ns") var_list = UnsteadyNSSolver::GetVariableNames(); else { printf("Unknown MultiBlockSolver %s!\n", solver_type.c_str()); @@ -277,12 +278,12 @@ void TrainROM(MPI_Comm comm) sample_generator->FormReducedBasis(basis_prefix); - AuxiliaryTrainROM(comm, sample_generator); + AuxiliaryTrainROM(comm); delete sample_generator; } -void AuxiliaryTrainROM(MPI_Comm comm, SampleGenerator *sample_generator) +void AuxiliaryTrainROM(MPI_Comm comm) { std::string solver_type = config.GetRequiredOption("main/solver"); bool separate_variable_basis = config.GetOption("model_reduction/separate_variable_basis", false); @@ -701,3 +702,52 @@ double SingleRun(MPI_Comm comm, const std::string output_file) // return the maximum error over all variables. return error.Max(); } + +void PrintEQPCoords(MPI_Comm comm) +{ + ParameterizedProblem *problem = InitParameterizedProblem(); + MultiBlockSolver *test = InitSolver(); + test->InitVariables(); + + assert(test->UseRom()); + if (!test->IsNonlinear()) + { + printf("PrintEQPPoints: given solver is not nonlinear. Exiting\n"); + return; + } + + test->InitROMHandler(); + + StopWatch solveTimer; + + problem->SetSingleRun(); + test->SetParameterizedProblem(problem); + + // TODO: there are skippable operations depending on rom/fom mode. + test->BuildRHSOperators(); + test->SetupRHSBCOperators(); + test->AssembleRHS(); + + const int num_var = test->GetNumVar(); + + ROMHandlerBase *rom = test->GetROMHandler(); + test->LoadReducedBasis(); + test->AllocateROMNlinElems(); + + ROMBuildingLevel save_operator = rom->GetBuildingLevel(); + TopologyHandlerMode topol_mode = test->GetTopologyMode(); + assert(save_operator == ROMBuildingLevel::COMPONENT); + assert(topol_mode != TopologyHandlerMode::SUBMESH); + assert(rom->GetNonlinearHandling() == NonlinearHandling::EQP); + + test->LoadROMNlinElems(rom->GetOperatorPrefix()); + + const std::string filename = config.GetRequiredOption("model_reduction/eqp/save_coords"); + test->SaveEQPCoords(filename); + + delete test; + delete problem; + + // return the maximum error over all variables. + return; +} diff --git a/src/rom_handler.cpp b/src/rom_handler.cpp index bfeed644..3edc51f3 100644 --- a/src/rom_handler.cpp +++ b/src/rom_handler.cpp @@ -175,13 +175,17 @@ void ROMHandlerBase::ParseSupremizerInput(Array &num_ref_supreme, Array("basis/number_of_supremizer", -1); BasisTag basis_tag; for (int b = 0; b < num_rom_comp; b++) { basis_tag = GetBasisTagForComponent(b, topol_handler, fom_var_names[1]); - num_ref_supreme[b] = config.LookUpFromDict("name", basis_tag.print(), "number_of_supremizer", num_ref_basis[b * num_var + 1], basis_list); + // By default the same number of pressure basis. + const int nsup0 = (nsup_default > 0) ? nsup_default : num_ref_basis[b * num_var + 1]; + + num_ref_supreme[b] = config.LookUpFromDict("name", basis_tag.print(), "number_of_supremizer", nsup0, basis_list); } for (int m = 0; m < numSub; m++) @@ -634,12 +638,13 @@ void MFEMROMHandler::NonlinearSolve(Operator &oper, BlockVector* U, Solver *prec printf("Solve ROM.\n"); reduced_sol = new BlockVector(rom_block_offsets); bool use_restart = config.GetOption("solver/use_restart", false); + double amp = config.GetOption("solver/initial_random_perturbation", 1.0e-5); if (use_restart) ProjectGlobalToDomainBasis(U, reduced_sol); else { for (int k = 0; k < reduced_sol->Size(); k++) - (*reduced_sol)(k) = 1.0e-1 * UniformRandom(); + (*reduced_sol)(k) = amp * UniformRandom(); } int maxIter = config.GetOption("solver/max_iter", 100); diff --git a/src/rom_interfaceform.cpp b/src/rom_interfaceform.cpp index 425bbfd8..13e5dca7 100644 --- a/src/rom_interfaceform.cpp +++ b/src/rom_interfaceform.cpp @@ -32,6 +32,8 @@ ROMInterfaceForm::ROMInterfaceForm( else if (nnls_str == "linf") nnls_criterion = CAROM::NNLS_termination::LINF; else mfem_error("ROMNonlinearForm: unknown NNLS criterion!\n"); + + timer.SetTitle("ROMInterfaceForm"); } ROMInterfaceForm::~ROMInterfaceForm() @@ -73,6 +75,10 @@ void ROMInterfaceForm::UpdateBlockOffsets() void ROMInterfaceForm::InterfaceAddMult(const Vector &x, Vector &y) const { + timer.Start("Mult"); + + timer.Start("Mult/init"); + assert(block_offsets.Min() >= 0); assert(x.Size() == block_offsets.Last()); assert(y.Size() == block_offsets.Last()); @@ -96,15 +102,23 @@ void ROMInterfaceForm::InterfaceAddMult(const Vector &x, Vector &y) const Array vdofs1, vdofs2; Vector el_x1, el_x2, el_y1, el_y2; + timer.Stop("Mult/init"); + for (int k = 0; k < fnfi.Size(); k++) { + timer.Start("Mult/itg-init"); + assert(fnfi[k]); const IntegrationRule *ir = fnfi[k]->GetIntegrationRule(); assert(ir); // we enforce all integrators to set the IntegrationRule a priori. + timer.Stop("Mult/itg-init"); + for (int p = 0; p < numPorts; p++) { + timer.Start("Mult/port-init"); + const PortInfo *pInfo = topol_handler->GetPortInfo(p); midx[0] = pInfo->Mesh1; @@ -131,65 +145,91 @@ void ROMInterfaceForm::InterfaceAddMult(const Vector &x, Vector &y) const assert(interface_infos); eqp_elem = fnfi_sample[p + k * numPorts]; + + timer.Stop("Mult/port-init"); + if (!eqp_elem) continue; int prev_itf = -1; for (int i = 0; i < eqp_elem->Size(); i++) { + timer.Start("Mult/elem-init"); + EQPSample *sample = eqp_elem->GetSample(i); int itf = sample->info.el; - InterfaceInfo *if_info = &((*interface_infos)[itf]); - topol_handler->GetInterfaceTransformations(mesh1, mesh2, if_info, tr1, tr2); - const IntegrationPoint &ip = ir->IntPoint(sample->info.qp); + if (itf != prev_itf) + { + InterfaceInfo *if_info = &((*interface_infos)[itf]); + timer.Start("Mult/topol"); + topol_handler->GetInterfaceTransformations(mesh1, mesh2, if_info, tr1, tr2); + timer.Stop("Mult/topol"); + + prev_itf = itf; + } // if (itf != prev_itf) + + timer.Stop("Mult/elem-init"); if (precompute) { + timer.Start("Mult/fast-eqp"); fnfi[k]->AddAssembleVector_Fast(*sample, *tr1, *tr2, x1, x2, y1, y2); + timer.Stop("Mult/fast-eqp"); continue; } - if (itf != prev_itf) - { - if ((tr1 == NULL) || (tr2 == NULL)) - mfem_error("InterfaceTransformation of the sampled face is NULL,\n" - " indicating that an empty quadrature point is sampled.\n"); + timer.Start("Mult/slow-eqp"); - fes1->GetElementVDofs(tr1->Elem1No, vdofs1); - fes2->GetElementVDofs(tr2->Elem1No, vdofs2); + if ((tr1 == NULL) || (tr2 == NULL)) + mfem_error("InterfaceTransformation of the sampled face is NULL,\n" + " indicating that an empty quadrature point is sampled.\n"); - if (basis1_offset > 0) - for (int v = 0; v < vdofs1.Size(); v++) - vdofs1[v] += basis1_offset; - if (basis2_offset > 0) - for (int v = 0; v < vdofs2.Size(); v++) - vdofs2[v] += basis2_offset; + fes1->GetElementVDofs(tr1->Elem1No, vdofs1); + fes2->GetElementVDofs(tr2->Elem1No, vdofs2); - // Both domains will have the adjacent element as Elem1. - fe1 = fes1->GetFE(tr1->Elem1No); - fe2 = fes2->GetFE(tr2->Elem1No); + if (basis1_offset > 0) + for (int v = 0; v < vdofs1.Size(); v++) + vdofs1[v] += basis1_offset; + if (basis2_offset > 0) + for (int v = 0; v < vdofs2.Size(); v++) + vdofs2[v] += basis2_offset; - MultSubMatrix(*basis1, vdofs1, x1, el_x1); - MultSubMatrix(*basis2, vdofs2, x2, el_x2); + // Both domains will have the adjacent element as Elem1. + fe1 = fes1->GetFE(tr1->Elem1No); + fe2 = fes2->GetFE(tr2->Elem1No); - prev_itf = itf; - } // if (itf != prev_itf) + MultSubMatrix(*basis1, vdofs1, x1, el_x1); + MultSubMatrix(*basis2, vdofs2, x2, el_x2); + + const IntegrationPoint &ip = ir->IntPoint(sample->info.qp); fnfi[k]->AssembleQuadratureVector( *fe1, *fe2, *tr1, *tr2, ip, sample->info.qw, el_x1, el_x2, el_y1, el_y2); AddMultTransposeSubMatrix(*basis1, vdofs1, el_y1, y1); AddMultTransposeSubMatrix(*basis2, vdofs2, el_y2, y2); + + timer.Stop("Mult/slow-eqp"); } // for (int i = 0; i < sample_info->Size(); i++, sample++) } // for (int p = 0; p < numPorts; p++) } // for (int k = 0; k < fnfi.Size(); k++) + timer.Start("Mult/final"); + for (int i=0; i < y_tmp.NumBlocks(); ++i) y_tmp.GetBlock(i).SyncAliasMemory(y); + + timer.Stop("Mult/final"); + + timer.Stop("Mult"); } void ROMInterfaceForm::InterfaceGetGradient(const Vector &x, Array2D &mats) const { + timer.Start("GetGradient"); + + timer.Start("Grad/init"); + assert(mats.NumRows() == numSub); assert(mats.NumCols() == numSub); for (int i = 0; i < numSub; i++) @@ -219,15 +259,23 @@ void ROMInterfaceForm::InterfaceGetGradient(const Vector &x, Array2DGetIntegrationRule(); assert(ir); // we enforce all integrators to set the IntegrationRule a priori. + timer.Stop("Grad/fnfi-init"); + for (int p = 0; p < numPorts; p++) { + timer.Start("Grad/port-init"); + const PortInfo *pInfo = topol_handler->GetPortInfo(p); midx[0] = pInfo->Mesh1; @@ -256,24 +304,37 @@ void ROMInterfaceForm::InterfaceGetGradient(const Vector &x, Array2DSize(), eqp_elem->Size()); + int prev_itf = -1; for (int i = 0; i < eqp_elem->Size(); i++) { + timer.Start("Grad/elem-init"); + EQPSample *sample = eqp_elem->GetSample(i); int itf = sample->info.el; InterfaceInfo *if_info = &((*interface_infos)[itf]); + timer.Start("Grad/topol"); topol_handler->GetInterfaceTransformations(mesh1, mesh2, if_info, tr1, tr2); + timer.Stop("Grad/topol"); const IntegrationPoint &ip = ir->IntPoint(sample->info.qp); + timer.Stop("Grad/elem-init"); + if (precompute) { + timer.Start("Grad/fast-eqp"); fnfi[k]->AddAssembleGrad_Fast(*sample, *tr1, *tr2, x1, x2, mats_p); + timer.Stop("Grad/fast-eqp"); continue; } + timer.Start("Grad/slow-eqp"); + if (itf != prev_itf) { if ((tr1 == NULL) || (tr2 == NULL)) @@ -307,11 +368,15 @@ void ROMInterfaceForm::InterfaceGetGradient(const Vector &x, Array2DSize(); i++, sample++) } // for (int p = 0; p < numPorts; p++) } // for (int k = 0; k < fnfi.Size(); k++) DeletePointers(quadmats); + + timer.Stop("GetGradient"); } void ROMInterfaceForm::TrainEQPForRefPort(const int p, @@ -630,7 +695,7 @@ void ROMInterfaceForm::TrainEQPForIntegrator( samples.Append({.el = i / nqe, .qp = i % nqe, .qw = eqpSol_global(i)}); } } - printf("Size of sampled qp: %d\n", samples.Size()); + printf("Size of sampled qp: %d/%d\n", samples.Size(), eqpSol_global.dim()); if (nnz != samples.Size()) printf("Sample quadrature points with weight < 1.0e-12 are neglected.\n"); } diff --git a/src/rom_nonlinearform.cpp b/src/rom_nonlinearform.cpp index 61651773..9b7bf00d 100644 --- a/src/rom_nonlinearform.cpp +++ b/src/rom_nonlinearform.cpp @@ -12,7 +12,7 @@ namespace mfem { ROMNonlinearForm::ROMNonlinearForm(const int num_basis, FiniteElementSpace *f, const bool reference_) - : NonlinearForm(f), reference(reference_) + : NonlinearForm(f), reference(reference_), timer("ROMNonlinearForm") { height = width = num_basis; @@ -21,8 +21,6 @@ ROMNonlinearForm::ROMNonlinearForm(const int num_basis, FiniteElementSpace *f, c else if (nnls_str == "linf") nnls_criterion = CAROM::NNLS_termination::LINF; else mfem_error("ROMNonlinearForm: unknown NNLS criterion!\n"); - - for (int k = 0; k < Nt; k++) jac_timers[k] = new StopWatch; } ROMNonlinearForm::~ROMNonlinearForm() @@ -43,23 +41,14 @@ ROMNonlinearForm::~ROMNonlinearForm() DeletePointers(fnfi_sample); DeletePointers(bfnfi_sample); } - - printf("%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", - "init", "sample-load", "elem-load", "assem-grad", "others", "sum", "total"); - double sum = 0.0; - for (int k = 0; k < Nt-1; k++) - { - printf("%.3E\t", jac_timers[k]->RealTime()); - sum += jac_timers[k]->RealTime(); - } - printf("%.3E\t", sum); - printf("%.3E\n", jac_timers[Nt-1]->RealTime()); - - for (int k = 0; k < Nt; k++) delete jac_timers[k]; } void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const { + timer.Start("Mult"); + + timer.Start("Mult/init"); + assert(x.Size() == width); assert(y.Size() == height); @@ -103,29 +92,43 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const // py = 0.0; y = 0.0; + timer.Stop("Mult/init"); + if (dnfi.Size()) { for (int k = 0; k < dnfi.Size(); k++) { + timer.Start("Mult/dnfi-init"); + const IntegrationRule *ir = dnfi[k]->GetIntegrationRule(); assert(ir); // we enforce all integrators to set the IntegrationRule a priori. EQPElement *eqp_elem = dnfi_sample[k]; assert(eqp_elem); + timer.Stop("Mult/dnfi-init"); + int prev_el = -1; for (int i = 0; i < eqp_elem->Size(); i++) { + timer.Start("Mult/dnfi-elem-init"); + EQPSample *sample = eqp_elem->GetSample(i); int el = sample->info.el; T = fes->GetElementTransformation(el); + timer.Stop("Mult/dnfi-elem-init"); + if (precompute) { + timer.Start("Mult/dnfi-fast-eqp"); dnfi[k]->AddAssembleVector_Fast(*sample, *T, x, y); + timer.Stop("Mult/dnfi-fast-eqp"); continue; } + timer.Start("Mult/dnfi-slow-eqp"); + const IntegrationPoint &ip = ir->IntPoint(sample->info.qp); if (el != prev_el) { @@ -141,6 +144,8 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const if (doftrans) { doftrans->TransformDual(el_y); } AddMultTransposeSubMatrix(*basis, vdofs, el_y, y); + + timer.Stop("Mult/dnfi-slow-eqp"); } // for (int i = 0; i < el_samples->Size(); i++) } // for (int k = 0; k < dnfi.Size(); k++) } // if (dnfi.Size()) @@ -153,26 +158,38 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const for (int k = 0; k < fnfi.Size(); k++) { + timer.Start("Mult/fnfi-init"); + const IntegrationRule *ir = fnfi[k]->GetIntegrationRule(); assert(ir); // we enforce all integrators to set the IntegrationRule a priori. EQPElement *eqp_elem = fnfi_sample[k]; assert(eqp_elem); + timer.Stop("Mult/fnfi-init"); + int prev_face = -1; for (int i = 0; i < eqp_elem->Size(); i++) { + timer.Start("Mult/fnfi-elem-init"); + EQPSample *sample = eqp_elem->GetSample(i); int face = sample->info.el; tr = mesh->GetInteriorFaceTransformations(face); + timer.Stop("Mult/fnfi-elem-init"); + if (precompute) { + timer.Start("Mult/fnfi-fast-eqp"); fnfi[k]->AddAssembleVector_Fast(*sample, *tr, x, y); + timer.Stop("Mult/fnfi-fast-eqp"); continue; } + timer.Start("Mult/fnfi-slow-eqp"); + const IntegrationPoint &ip = ir->IntPoint(sample->info.qp); if (face != prev_face) @@ -200,6 +217,8 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const fnfi[k]->AssembleQuadratureVector(*fe1, *fe2, *tr, ip, sample->info.qw, el_x, el_y); AddMultTransposeSubMatrix(*basis, vdofs, el_y, y); + + timer.Stop("Mult/fnfi-slow-eqp"); } // for (int i = 0; i < sample_info->Size(); i++, sample++) } // for (int k = 0; k < fnfi.Size(); k++) } // if (fnfi.Size()) @@ -232,15 +251,21 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const for (int k = 0; k < bfnfi.Size(); k++) { + timer.Start("Mult/bfnfi-init"); + const IntegrationRule *ir = bfnfi[k]->GetIntegrationRule(); assert(ir); // we enforce all integrators to set the IntegrationRule a priori. EQPElement *eqp_elem = bfnfi_sample[k]; assert(eqp_elem); + timer.Stop("Mult/bfnfi-init"); + int prev_be = -1; for (int i = 0; i < eqp_elem->Size(); i++) { + timer.Start("Mult/bfnfi-elem-init"); + EQPSample *sample = eqp_elem->GetSample(i); int be = sample->info.el; const int bdr_attr = mesh->GetBdrAttribute(be); @@ -256,12 +281,18 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const tr = mesh->GetBdrFaceTransformations (be); + timer.Stop("Mult/bfnfi-elem-init"); + if (precompute) { + timer.Start("Mult/bfnfi-fast-eqp"); bfnfi[k]->AddAssembleVector_Fast(*sample, *tr, x, y); + timer.Stop("Mult/bfnfi-fast-eqp"); continue; } + timer.Start("Mult/bfnfi-slow-eqp"); + const IntegrationPoint &ip = ir->IntPoint(sample->info.qp); if (be != prev_be) @@ -290,6 +321,8 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const bfnfi[k]->AssembleQuadratureVector(*fe1, *fe2, *tr, ip, sample->info.qw, el_x, el_y); AddMultTransposeSubMatrix(*basis, vdofs, el_y, y); + + timer.Stop("Mult/bfnfi-slow-eqp"); } // for (int i = 0; i < sample_info->Size(); i++, sample++) } // for (int k = 0; k < bfnfi.Size(); k++) } // if (bfnfi.Size()) @@ -308,13 +341,14 @@ void ROMNonlinearForm::Mult(const Vector &x, Vector &y) const // // y(ess_tdof_list[i]) = x(ess_tdof_list[i]); // } // // In parallel, the result is in 'py' which is an alias for 'aux2'. + timer.Stop("Mult"); } Operator& ROMNonlinearForm::GetGradient(const Vector &x) const { - jac_timers[5]->Start(); + timer.Start("GetGradient"); - jac_timers[0]->Start(); + timer.Start("Grad/init"); assert(x.Size() == width); // if (ext) // { @@ -356,13 +390,13 @@ Operator& ROMNonlinearForm::GetGradient(const Vector &x) const { *Grad = 0.0; } - jac_timers[0]->Stop(); + timer.Stop("Grad/init"); if (dnfi.Size()) { for (int k = 0; k < dnfi.Size(); k++) { - jac_timers[1]->Start(); + timer.Start("Grad/sample-load"); const IntegrationRule *ir = dnfi[k]->GetIntegrationRule(); assert(ir); // we enforce all integrators to set the IntegrationRule a priori. @@ -370,19 +404,20 @@ Operator& ROMNonlinearForm::GetGradient(const Vector &x) const assert(eqp_elem); int prev_el = -1; - jac_timers[1]->Stop(); + timer.Stop("Grad/sample-load"); for (int i = 0; i < eqp_elem->Size(); i++) { - jac_timers[2]->Start(); + timer.Start("Grad/elem-load"); EQPSample *sample = eqp_elem->GetSample(i); int el = sample->info.el; T = fes->GetElementTransformation(el); - jac_timers[2]->Stop(); + timer.Stop("Grad/elem-load"); - jac_timers[3]->Start(); + timer.Start("Grad/assem-grad"); if (precompute) { dnfi[k]->AddAssembleGrad_Fast(*sample, *T, x, *Grad); + timer.Stop("Grad/assem-grad"); continue; } @@ -403,12 +438,12 @@ Operator& ROMNonlinearForm::GetGradient(const Vector &x) const AddSubMatrixRtAP(*basis, vdofs, elmat, *basis, vdofs, *Grad); // Grad->AddSubMatrix(rom_vdofs, rom_vdofs, quadmat, skip_zeros); - jac_timers[3]->Stop(); + timer.Stop("Grad/assem-grad"); } // for (int i = 0; i < el_samples->Size(); i++) } // for (int k = 0; k < dnfi.Size(); k++) } // if (dnfi.Size()) - jac_timers[4]->Start(); + timer.Start("Grad/other-integ"); if (fnfi.Size()) { FaceElementTransformations *tr = NULL; @@ -566,8 +601,8 @@ Operator& ROMNonlinearForm::GetGradient(const Vector &x) const DenseMatrix *mGrad = Grad; - jac_timers[4]->Stop(); - jac_timers[5]->Stop(); + timer.Stop("Grad/other-integ"); + timer.Stop("GetGradient"); // TODO(kevin): will need to consider how we should address this case. // do we really need prolongation when we lift up from ROM? // we might need a similar operation on the ROM basis DenseMatrix. @@ -853,7 +888,7 @@ void ROMNonlinearForm::TrainEQPForIntegrator( samples.Append({.el = i / nqe, .qp = i % nqe, .qw = eqpSol_global(i)}); } } - printf("Size of sampled qp: %d\n", samples.Size()); + printf("Size of sampled qp: %d/%d\n", samples.Size(), eqpSol_global.dim()); if (nnz != samples.Size()) printf("Sample quadrature points with weight < 1.0e-12 are neglected.\n"); } @@ -1483,4 +1518,47 @@ void ROMNonlinearForm::PrecomputeBdrFaceEQPSample( PrecomputeFaceEQPSample(ir, basis, T, eqp_sample); } +void ROMNonlinearForm::SaveDomainEQPCoords(const int k, hid_t file_id, const std::string &dsetname) +{ + if (!dnfi.Size()) + return; + + assert((k >= 0) && (k < dnfi.Size())); + + const IntegrationRule *ir = dnfi[k]->GetIntegrationRule(); + EQPElement *eqp_elem = dnfi_sample[k]; + assert(ir); + assert(eqp_elem); + + DenseMatrix eqp_coords(eqp_elem->Size(), 3); + + EQPSample *eqp_sample; + for (int i = 0; i < eqp_elem->Size(); i++) + { + eqp_sample = eqp_elem->GetSample(i); + const int el = eqp_sample->info.el; + const FiniteElement *fe = fes->GetFE(el); + ElementTransformation *T = fes->GetElementTransformation(el); + const IntegrationPoint &ip = ir->IntPoint(eqp_sample->info.qp); + + double x[3]; + Vector transip(x, 3); + T->Transform(ip, transip); + + eqp_coords.SetRow(i, transip); + } // for (int i = 0; i < eqp_elem->Size(); i++) + + assert(file_id >= 0); + hid_t grp_id; + herr_t errf; + + grp_id = H5Gcreate(file_id, dsetname.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + assert(grp_id >= 0); + + hdf5_utils::WriteDataset(grp_id, "coords", eqp_coords); + + errf = H5Gclose(grp_id); + assert(errf >= 0); +} + } diff --git a/src/steady_ns_solver.cpp b/src/steady_ns_solver.cpp index 395a3358..7a3ae61a 100644 --- a/src/steady_ns_solver.cpp +++ b/src/steady_ns_solver.cpp @@ -24,7 +24,8 @@ SteadyNSOperator::SteadyNSOperator( Array &u_offsets_, const bool direct_solve_) : Operator(linearOp_->Height(), linearOp_->Width()), linearOp(linearOp_), hs(hs_), nl_itf(nl_itf_), u_offsets(u_offsets_), direct_solve(direct_solve_), - M(&(linearOp_->GetBlock(0, 0))), Bt(&(linearOp_->GetBlock(0, 1))), B(&(linearOp_->GetBlock(1, 0))) + M(&(linearOp_->GetBlock(0, 0))), Bt(&(linearOp_->GetBlock(0, 1))), B(&(linearOp_->GetBlock(1, 0))), + timer("SteadyNSOperator") { vblock_offsets.SetSize(3); vblock_offsets[0] = 0; @@ -61,19 +62,34 @@ SteadyNSOperator::~SteadyNSOperator() void SteadyNSOperator::Mult(const Vector &x, Vector &y) const { + timer.Start("Mult"); + assert(linearOp); x_u.MakeRef(const_cast(x), 0, u_offsets.Last()); y_u.MakeRef(y, 0, u_offsets.Last()); y = 0.0; + timer.Start("Mult/nlin_domain"); Hop->Mult(x_u, y_u); - if (nl_itf) nl_itf->InterfaceAddMult(x_u, y_u); + timer.Stop("Mult/nlin_domain"); + if (nl_itf) + { + timer.Start("Mult/itf"); + nl_itf->InterfaceAddMult(x_u, y_u); + timer.Stop("Mult/itf"); + } + timer.Start("Mult/lin"); linearOp->AddMult(x, y); + timer.Stop("Mult/lin"); + + timer.Stop("Mult"); } Operator& SteadyNSOperator::GetGradient(const Vector &x) const { + timer.Start("GetGradient"); + // NonlinearForm owns the gradient operator. // DeletePointers(hs_mats); delete hs_jac; @@ -82,6 +98,7 @@ Operator& SteadyNSOperator::GetGradient(const Vector &x) const delete mono_jac; delete jac_hypre; + timer.Start("GetGradient/nlin-jac"); hs_jac = new BlockMatrix(u_offsets); for (int i = 0; i < hs.Size(); i++) for (int j = 0; j < hs.Size(); j++) @@ -97,9 +114,12 @@ Operator& SteadyNSOperator::GetGradient(const Vector &x) const hs_mats(i, j) = new SparseMatrix(u_offsets[i+1] - u_offsets[i], u_offsets[j+1] - u_offsets[j]); } } + timer.Stop("GetGradient/nlin-jac"); + timer.Start("GetGradient/itf-jac"); x_u.MakeRef(const_cast(x), 0, u_offsets.Last()); if (nl_itf) nl_itf->InterfaceGetGradient(x_u, hs_mats); + timer.Stop("GetGradient/itf-jac"); for (int i = 0; i < hs.Size(); i++) for (int j = 0; j < hs.Size(); j++) @@ -108,9 +128,11 @@ Operator& SteadyNSOperator::GetGradient(const Vector &x) const hs_jac->SetBlock(i, j, hs_mats(i, j)); } + timer.Start("GetGradient/lin-jac"); SparseMatrix *hs_jac_mono = hs_jac->CreateMonolithic(); uu_mono = Add(*M, *hs_jac_mono); delete hs_jac_mono; + timer.Stop("GetGradient/lin-jac"); assert(B && Bt); @@ -123,10 +145,14 @@ Operator& SteadyNSOperator::GetGradient(const Vector &x) const if (direct_solve) { jac_hypre = new HypreParMatrix(MPI_COMM_SELF, sys_glob_size, sys_row_starts, mono_jac); + timer.Stop("GetGradient"); return *jac_hypre; } else + { + timer.Stop("GetGradient"); return *mono_jac; + } } /* @@ -222,7 +248,8 @@ Operator& SteadyNSTensorROM::GetGradient(const Vector &x) const SteadyNSEQPROM::SteadyNSEQPROM( ROMHandlerBase *rom_handler, Array &hs_, ROMInterfaceForm *itf_, const bool direct_solve_) - : SteadyNSROM(hs_.Size(), rom_handler, direct_solve_), hs(hs_), itf(itf_) + : SteadyNSROM(hs_.Size(), rom_handler, direct_solve_), hs(hs_), itf(itf_), + timer("SteadyNSEQPROM") { if (!separate_variable) { @@ -246,9 +273,14 @@ SteadyNSEQPROM::SteadyNSEQPROM( void SteadyNSEQPROM::Mult(const Vector &x, Vector &y) const { + timer.Start("Mult"); + y = 0.0; + timer.Start("Mult/lin"); linearOp->Mult(x, y); + timer.Stop("Mult/lin"); + timer.Start("Mult/nlin"); for (int m = 0; m < numSub; m++) { int midx = midxs[m]; @@ -257,23 +289,39 @@ void SteadyNSEQPROM::Mult(const Vector &x, Vector &y) const hs[m]->AddMult(x_comp, y_comp); } + timer.Stop("Mult/nlin"); - if (!itf) return; + if (!itf) + { + timer.Stop("Mult"); + return; + } GetVel(x, x_u); y_u = 0.0; + + timer.Start("Mult/itf"); itf->InterfaceAddMult(x_u, y_u); + timer.Stop("Mult/itf"); + AddVel(y_u, y); + + timer.Stop("Mult"); } Operator& SteadyNSEQPROM::GetGradient(const Vector &x) const { + timer.Start("GetGradient"); + delete jac_mono; delete jac_hypre; + timer.Start("GetGradient/lin"); jac_mono = new SparseMatrix(*linearOp); + timer.Stop("GetGradient/lin"); if (itf) { + timer.Start("GetGradient/itf"); BlockMatrix jac(block_offsets); jac.owns_blocks = true; @@ -288,6 +336,7 @@ Operator& SteadyNSEQPROM::GetGradient(const Vector &x) const } GetVel(x, x_u); + itf->InterfaceGetGradient(x_u, mats); MatrixBlocks u_mats(mats); @@ -298,8 +347,10 @@ Operator& SteadyNSEQPROM::GetGradient(const Vector &x) const (*jac_mono) += *tmp; delete tmp; + timer.Stop("GetGradient/itf"); } + timer.Start("GetGradient/nlin"); DenseMatrix *jac_comp; for (int m = 0; m < numSub; m++) { @@ -310,7 +361,10 @@ Operator& SteadyNSEQPROM::GetGradient(const Vector &x) const jac_comp = dynamic_cast(&hs[m]->GetGradient(x_comp)); jac_mono->AddSubMatrix(*block_idxs[m], *block_idxs[m], *jac_comp); } + timer.Stop("GetGradient/nlin"); jac_mono->Finalize(); + + timer.Stop("GetGradient"); if (direct_solve) { @@ -424,6 +478,7 @@ SteadyNSSolver::~SteadyNSSolver() else if (rom_handler->GetNonlinearHandling() == NonlinearHandling::EQP) { DeletePointers(comp_eqps); + DeletePointers(subdomain_eqps); delete itf_eqp; } } @@ -625,7 +680,7 @@ bool SteadyNSSolver::Solve(SampleGenerator *sample_generator) else { for (int k = 0; k < sol_byvar.Size(); k++) - sol_byvar(k) = UniformRandom(); + sol_byvar(k) = 1.0e-5 * UniformRandom(); } SteadyNSOperator oper(systemOp, hs, nl_itf, u_offsets, direct_solve); @@ -1032,6 +1087,8 @@ void SteadyNSSolver::AllocateROMEQPElems() itf_eqp->UpdateBlockOffsets(); itf_eqp->SetPrecomputeMode(precompute); +printf("precompute: %d\n", precompute); +printf("itf_eqp PrecomputeMode: %d\n", itf_eqp->PrecomputeMode()); } void SteadyNSSolver::TrainROMEQPElems(SampleGenerator *sample_generator) @@ -1060,6 +1117,8 @@ void SteadyNSSolver::TrainROMEQPElems(SampleGenerator *sample_generator) if (oper_type != OperType::LF) return; + eqp_tol = config.GetOption("model_reduction/eqp/interface/relative_tolerance", eqp_tol); + /* EQP NNLS for interface ROM, for each reference port */ for (int p = 0; p < topol_handler->GetNumRefPorts(); p++) { @@ -1182,7 +1241,7 @@ void SteadyNSSolver::LoadEQPElems(const std::string &filename) int num_comp; hdf5_utils::ReadAttribute(grp_id, "number_of_components", num_comp); assert(num_comp >= topol_handler->GetNumComponents()); - assert(comp_eqps.Size() == num_comp); + assert(comp_eqps.Size() == topol_handler->GetNumComponents()); std::string dset_name; for (int c = 0; c < topol_handler->GetNumComponents(); c++) @@ -1220,7 +1279,10 @@ void SteadyNSSolver::LoadEQPElems(const std::string &filename) itf_eqp->LoadEQPForIntegrator(0, file_id, "interface_integ0"); if (itf_eqp->PrecomputeMode()) +{ +printf("\n\nI AM PRECOMPUTING!!\n\n"); itf_eqp->PrecomputeCoefficients(); +} } errf = H5Fclose(file_id); @@ -1361,3 +1423,52 @@ DenseTensor* SteadyNSSolver::GetReducedTensor(DenseMatrix *basis, FiniteElementS return tensor; } + +void SteadyNSSolver::SaveEQPCoords(const std::string &filename) +{ + assert(topol_mode == TopologyHandlerMode::COMPONENT); + + /* + TODO(kevin): this is a boilerplate for parallel POD/EQP training. + Full parallelization will save EQ points/weights in a parallel way. + */ + if (rank == 0) + { + hid_t file_id; + herr_t errf = 0; + file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + assert(file_id >= 0); + + hid_t grp_id; + + grp_id = H5Gcreate(file_id, "components", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + assert(grp_id >= 0); + + const int num_comp = topol_handler->GetNumComponents(); + assert(comp_eqps.Size() == num_comp); + + hdf5_utils::WriteAttribute(grp_id, "number_of_components", num_comp); + + std::string dset_name; + for (int c = 0; c < num_comp; c++) + { + assert(comp_eqps[c]); + dset_name = topol_handler->GetComponentName(c); + + comp_eqps[c]->SaveDomainEQPCoords(0, grp_id, dset_name + "_integ0"); + if (oper_type == OperType::LF) + { + assert(!full_dg); + // comp_eqps[c]->SaveEQPForIntegrator(IntegratorType::INTERIORFACE, 0, grp_id, dset_name + "_integ1"); + } // if (oper_type == OperType::LF) + } // for (int c = 0; c < num_comp; c++) + + errf = H5Gclose(grp_id); + assert(errf >= 0); + + errf = H5Fclose(file_id); + assert(errf >= 0); + } + MPI_Barrier(MPI_COMM_WORLD); + return; +} diff --git a/src/unsteady_ns_solver.cpp b/src/unsteady_ns_solver.cpp index 6cdd7ec6..bda721fc 100644 --- a/src/unsteady_ns_solver.cpp +++ b/src/unsteady_ns_solver.cpp @@ -13,7 +13,7 @@ using namespace mfem; */ UnsteadyNSSolver::UnsteadyNSSolver() - : SteadyNSSolver() + : SteadyNSSolver(), timer("UnsteadyNSSolver") { nonlinear_mode = true; @@ -88,6 +88,8 @@ bool UnsteadyNSSolver::Solve(SampleGenerator *sample_generator) std::string restart_file, file_fmt; file_fmt = "%s/%s_%08d.h5"; + timer.Start("Solve/init"); + SetupInitialCondition(initial_step, time); int sample_interval = config.GetOption("sample_generation/time-integration/sample_interval", 0); @@ -99,11 +101,15 @@ bool UnsteadyNSSolver::Solve(SampleGenerator *sample_generator) SaveVisualization(0, time); + timer.Stop("Solve/init"); + double cfl = 0.0; for (int step = initial_step; step < nt; step++) { Step(time, step); + timer.Start("Solve/save_sol"); + cfl = ComputeCFL(dt); SanityCheck(step); if (report_interval && @@ -125,10 +131,18 @@ bool UnsteadyNSSolver::Solve(SampleGenerator *sample_generator) if (sample_generator && sample_interval && ((step+1) > bootstrap) && (((step+1) % sample_interval) == 0)) SaveSnapshots(sample_generator); + + timer.Stop("Solve/save_sol"); } + timer.Start("Solve/final"); + SortBySubdomains(*U_step, *U); + timer.Stop("Solve/final"); + + timer.Print("UnsteadyNSSolver::Solve", true); + return converged; } @@ -208,29 +222,53 @@ void UnsteadyNSSolver::SetupInitialCondition(int &initial_step, double &time) void UnsteadyNSSolver::Step(double &time, int step) { + timer.Start("Step/copy_u"); + /* set time for forcing/boundary. At this point, time remains at the previous timestep. */ SetTime(time); /* copy velocity */ u1 = U_stepview->GetBlock(0); + timer.Stop("Step/copy_u"); + timer.Start("Step/advect_dom"); + /* evaluate nonlinear advection at previous time step */ Hop->Mult(u1, Cu1); + + timer.Stop("Step/advect_dom"); + timer.Start("Step/advect_itf"); + nl_itf->InterfaceAddMult(u1, Cu1); + timer.Stop("Step/advect_itf"); + timer.Start("Step/rhs_sort"); + /* Base right-hand side for boundary conditions and forcing */ SortByVariables(*RHS, *RHS_step); + timer.Stop("Step/rhs_sort"); + timer.Start("Step/add_advect"); + /* Add nonlinear convection */ RHS_stepview->GetBlock(0).Add(-ab1, Cu1); + timer.Stop("Step/add_advect"); + timer.Start("Step/add_udot"); + /* Add time derivative term */ // TODO: extend for high order bdf schemes massMat->AddMult(u1, RHS_stepview->GetBlock(0), -bd1 / dt); + timer.Stop("Step/add_udot"); + timer.Start("Step/solve_system"); + /* Solve for the next step */ mumps->Mult(*RHS_step, *U_step); + timer.Stop("Step/solve_system"); + timer.Start("Step/pres_scale"); + /* remove pressure scalar if all dirichlet bc */ if (!pres_dbc) { @@ -239,6 +277,8 @@ void UnsteadyNSSolver::Step(double &time, int step) U_stepview->GetBlock(1) -= p_const; } + timer.Stop("Step/pres_scale"); + time += dt; } @@ -431,6 +471,8 @@ void UnsteadyNSSolver::SolveROM() { assert(rom_handler->GetOrdering() == ROMOrderBy::VARIABLE); + timer.Start("SolveROM/init"); + int initial_step = 0; double time = 0.0; @@ -482,31 +524,60 @@ void UnsteadyNSSolver::SolveROM() } } + timer.Stop("SolveROM/init"); + for (int step = initial_step; step < nt; step++) { + timer.Start("SolveROM/set_time"); + /* set time for forcing/boundary. At this point, time remains at the previous timestep. */ SetTime(time); + timer.Stop("SolveROM/set_time"); + timer.Start("SolveROM/copy_u"); + /* copy velocity */ u1 = rsol_view->GetBlock(0); + timer.Stop("SolveROM/copy_u"); + timer.Start("SolveROM/advect_dom"); + /* evaluate nonlinear advection at previous time step */ Hop->Mult(u1, Cu1); + + timer.Stop("SolveROM/advect_dom"); + timer.Start("SolveROM/advect_itf"); + itf_eqp->InterfaceAddMult(u1, Cu1); + timer.Stop("SolveROM/advect_itf"); + timer.Start("SolveROM/get_rhs"); + /* Base right-hand side for boundary conditions and forcing */ reduced_rhs = *(rom_handler->GetReducedRHS()); + timer.Stop("SolveROM/get_rhs"); + timer.Start("SolveROM/add_advect"); + /* Add nonlinear convection */ rrhs_view->GetBlock(0).Add(-ab1, Cu1); + timer.Stop("SolveROM/add_advect"); + timer.Start("SolveROM/add_udot"); + /* Add time derivative term */ // TODO: extend for high order bdf schemes rom_mass->AddMult(u1, rrhs_view->GetBlock(0), -bd1 / dt); + timer.Stop("SolveROM/add_udot"); + timer.Start("SolveROM/solve_system"); + /* Solve for the next step */ rom_handler->Solve(reduced_rhs, *reduced_sol); + timer.Stop("SolveROM/solve_system"); + timer.Start("SolveROM/pres_scale"); + /* remove pressure scalar if all dirichlet bc */ if (!pres_dbc) { @@ -515,11 +586,19 @@ void UnsteadyNSSolver::SolveROM() rsol_view->GetBlock(1).Add(-p_const, rom_ones); } + timer.Stop("SolveROM/pres_scale"); + time += dt; } + timer.Start("SolveROM/liftup"); + rom_handler->LiftUpGlobal(*reduced_sol, *U); + timer.Stop("SolveROM/liftup"); + + timer.Print("UnsteadyNSSolver::SolveROM", true); + delete rsol_view; delete reduced_sol; return;