From cb95ccb3272f0be0695359b121314832fcaf917d Mon Sep 17 00:00:00 2001 From: nychiang Date: Tue, 7 Mar 2023 19:45:31 -0800 Subject: [PATCH 1/5] process bounds and relax fixed var --- src/LinAlg/hiopVector.hpp | 42 ++++++++++ src/LinAlg/hiopVectorPar.cpp | 90 ++++++++++++++++++++ src/LinAlg/hiopVectorPar.hpp | 13 +++ src/LinAlg/hiopVectorRaja.hpp | 13 +++ src/LinAlg/hiopVectorRajaImpl.hpp | 92 ++++++++++++++++++++ src/Optimization/hiopNlpFormulation.cpp | 107 ++++-------------------- src/Optimization/hiopNlpFormulation.hpp | 2 +- src/Optimization/hiopNlpTransforms.cpp | 68 +++++---------- src/Optimization/hiopNlpTransforms.hpp | 24 +++--- 9 files changed, 302 insertions(+), 149 deletions(-) diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp index 289583481..c2f1beda1 100644 --- a/src/LinAlg/hiopVector.hpp +++ b/src/LinAlg/hiopVector.hpp @@ -1002,6 +1002,48 @@ class hiopVector */ virtual bool is_equal(const hiopVector& vec) const = 0; + /** + * @brief preprocess bounds in a form supported by the NLP formulation. Returns counts of + * the variables with lower, upper, and lower and upper bounds, as well of the fixed + * variables. + * + * @param[in] this - lower bound of primal variable `x` + * @param[in] xu - lower bound of primal variable `x` + * @param[in] ixl - index of the variables with lower bounds + * @param[in] ixu - index of the variables with upper bounds + * @param[out] n_bnds_low - number of variables with lower bounds only + * @param[out] n_bnds_upp - number of variables with upper bounds only + * @param[out] n_bnds_lu - number of variables with both lower and upper bounds + * @param[out] nfixed_vars - number of fixed variables + * @param[in] fixed_var_tol - tolerance used to define fixed variables + * + * @pre this is a local method + * @todo: add unit test, or should we remove this function? + */ + virtual bool process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& nfixed_vars, + const double& fixed_var_tol) = 0; + + /** + * @brief relax variable bounds + * + * @param[in] this - lower bound of primal variable `x` + * @param[in] xu - lower bound of primal variable `x` + * @param[in] fixed_var_tol - tolerance used to define fixed variables + * @param[in] fixed_var_perturb - perturbation added to bounds + * + * @pre this is a local method + * @todo: add unit test, or should we remove this function? + */ + virtual void relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) = 0; + protected: size_type n_; //we assume sequential data protected: diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp index 0fb38bd89..725fe3967 100644 --- a/src/LinAlg/hiopVectorPar.cpp +++ b/src/LinAlg/hiopVectorPar.cpp @@ -1282,6 +1282,96 @@ bool hiopVectorPar::is_equal(const hiopVector& vec) const return all_equal; } +bool hiopVectorPar::process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& nfixed_vars, + const double& fixed_var_tol) +{ + n_bnds_low = 0; + n_bnds_upp = 0; + n_bnds_lu = 0; + nfixed_vars = 0; + + const double* xl_vec = this->local_data_const(); + const double* xu_vec = xu.local_data_const(); + double* ixl_vec = ixl.local_data(); + double* ixu_vec = ixu.local_data(); +#ifdef HIOP_DEEPCHECKS + const int maxBndsCloseMsgs=3; + int nBndsClose=0; +#endif + int nlocal = this->get_local_size(); + for(int i=0;i -1e20) { + ixl_vec[i] = 1.; + n_bnds_low++; + if(xu_vec[i] < 1e20) { + n_bnds_lu++; + } + } else { + ixl_vec[i] = 0.; + } + + if(xu_vec[i] < 1e20) { + ixu_vec[i] = 1.; + n_bnds_upp++; + } else { + ixu_vec[i] = 0.; + } + +#ifdef HIOP_DEEPCHECKS + assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible"); +#endif + + if( xu_vec[i] < 1e20 && + fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) { + nfixed_vars++; + } else { +#ifdef HIOP_DEEPCHECKS +#define min_dist 1e-8 + if(fixed_var_tolprintf(hovWarning, + "Lower (%g) and upper bound (%g) for variable %d are very close. " + "Consider fixing this variable or increase 'fixed_var_tolerance'.\n", + i, xl_vec[i], xu_vec[i]); + nBndsClose++; + } + } + if(nBndsClose==maxBndsCloseMsgs) { + log->printf(hovWarning, "[further messages were surpressed]\n"); + nBndsClose++; + } + } +#endif + } + } // end of for(int i=0;i local_data(); + double *xua = xu.local_data(); + size_type n = this->get_local_size(); + + double xuabs; + for(index_type i=0; i& exec_space() const { return exec_space_; diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index 6dcb111a6..eac76e6c0 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -2232,4 +2232,96 @@ bool hiopVectorRaja::is_equal(const hiopVector& vec) const return all_equal; } +template +bool hiopVectorRaja::process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& nfixed_vars, + const double& fixed_var_tol) +{ +#ifdef HIOP_DEEPCHECKS + const hiopVectorRaja& vxu = dynamic_cast&>(xu); + const hiopVectorRaja& vixl = dynamic_cast&>(ixl); + const hiopVectorRaja& vixu = dynamic_cast&>(ixu); + assert(vxu.n_local_ == this->n_local_); + assert(vixl.n_local_ == this->n_local_); + assert(vixu.n_local_ == this->n_local_); +#endif + + const double* xl_vec = this->local_data_const(); + const double* xu_vec = xu.local_data_const(); + double* ixl_vec = ixl.local_data(); + double* ixu_vec = ixu.local_data(); + + size_type nlocal = this->get_local_size(); + + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_low(0); + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_upp(0); + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_lu(0); + RAJA::ReduceSum< hiop_raja_reduce, int > sum_nfixed_vars(0); + + RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal), + RAJA_LAMBDA(RAJA::Index_type i) + { + if(xl_vec[i] > -1e20) { + ixl_vec[i] = 1.; + sum_n_bnds_low += 1; + if(xu_vec[i] < 1e20) { + sum_n_bnds_lu += 1; + } + } else { + ixl_vec[i] = 0.; + } + + if(xu_vec[i] < 1e20) { + ixu_vec[i] = 1.; + sum_n_bnds_upp += 1; + } else { + ixu_vec[i] = 0.; + } + + if(xu_vec[i] < 1e20 && + fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) { + sum_nfixed_vars += 1; + } + }); + + n_bnds_low = sum_n_bnds_low.get(); + n_bnds_upp = sum_n_bnds_upp.get(); + n_bnds_lu = sum_n_bnds_lu.get(); + nfixed_vars = sum_nfixed_vars.get(); + + return true; +} + +template +bool hiopVectorRaja::relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) +{ +#ifdef HIOP_DEEPCHECKS + const hiopVectorRaja& vxu = dynamic_cast&>(xu); + assert(vxu.n_local_ == this->n_local_); +#endif + + double *xla = this->local_data(); + double *xua = xu.local_data(); + size_type n = this->get_local_size(); + + RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal), + RAJA_LAMBDA(RAJA::Index_type i) + { + double xuabs = std::fabs(xua[i]); + if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) { + xua[i] += fixed_var_perturb*std::fmax(1.,xuabs); + xla[i] -= fixed_var_perturb*std::fmax(1.,xuabs); + } + }); + + return true; +} + } // namespace hiop diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp index b8fb9d6d3..e2679932f 100644 --- a/src/Optimization/hiopNlpFormulation.cpp +++ b/src/Optimization/hiopNlpFormulation.cpp @@ -193,8 +193,8 @@ bool hiopNlpFormulation::finalizeInitialization() if(strFixedVars_ != options->GetString("fixed_var")) { doinit=true; } - const double fixedVarTol = options->GetNumeric("fixed_var_tolerance"); - if(dFixedVarsTol_ != fixedVarTol) { + const double fixed_var_tol = options->GetNumeric("fixed_var_tolerance"); + if(dFixedVarsTol_ != fixed_var_tol) { doinit=true; } @@ -257,12 +257,12 @@ bool hiopNlpFormulation::finalizeInitialization() //preprocess variables bounds - this is curently done on the CPU // size_type nfixed_vars_local; - process_bounds(n_bnds_low_local_,n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local); + process_bounds(n_bnds_low_local_, n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local); /////////////////////////////////////////////////////////////////////////// // Handling of fixed variables ////////////////////////////////////////////////////////////////////////// - dFixedVarsTol_ = fixedVarTol; + dFixedVarsTol_ = fixed_var_tol; size_type nfixed_vars=nfixed_vars_local; #ifdef HIOP_USE_MPI int ierr = MPI_Allreduce(&nfixed_vars_local, &nfixed_vars, 1, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); @@ -277,11 +277,15 @@ bool hiopNlpFormulation::finalizeInitialization() // remove free variables // log->printf(hovWarning, "Fixed variables will be removed internally.\n"); + if(options->GetString("compute_mode")=="gpu") { + assert(false && "HiOp hasn't support removing fixed variables under GPU mode yet."); + return false; + } fixedVarsRemover = new hiopFixedVarsRemover(this, *xl_, *xu_, - fixedVarTol, + fixed_var_tol, nfixed_vars, nfixed_vars_local); @@ -417,7 +421,7 @@ bool hiopNlpFormulation::finalizeInitialization() cons_body_ = nullptr; delete cons_Jac_; - cons_Jac_ = NULL; + cons_Jac_ = nullptr; delete cons_lambdas_; cons_lambdas_ = nullptr; @@ -439,89 +443,14 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, size_type& n_bnds_lu, size_type& nfixed_vars) { - - n_bnds_low = 0; - n_bnds_upp = 0; - n_bnds_lu = 0; - nfixed_vars = 0; - -#if !defined(HIOP_USE_MPI) - int* vec_distrib_ = nullptr; - MPI_Comm comm_ = MPI_COMM_SELF; -#endif - hiopVectorPar xl_tmp(n_vars_, vec_distrib_, comm_); - hiopVectorPar xu_tmp(n_vars_, vec_distrib_, comm_); - hiopVectorPar ixl_tmp(n_vars_, vec_distrib_, comm_); - hiopVectorPar ixu_tmp(n_vars_, vec_distrib_, comm_); - - this->xl_->copy_to_vectorpar(xl_tmp); - this->xu_->copy_to_vectorpar(xu_tmp); - this->ixl_->copy_to_vectorpar(ixl_tmp); - this->ixu_->copy_to_vectorpar(ixu_tmp); - - double *ixl_vec = ixl_tmp.local_data_host(); - double *ixu_vec = ixu_tmp.local_data_host(); - - double* xl_vec = xl_tmp.local_data_host(); - double* xu_vec = xu_tmp.local_data_host(); -#ifdef HIOP_DEEPCHECKS - const int maxBndsCloseMsgs=3; int nBndsClose=0; -#endif - const double fixedVarTol = options->GetNumeric("fixed_var_tolerance"); - int nlocal=xl_->get_local_size(); - for(int i=0;i -1e20) { - ixl_vec[i] = 1.; - n_bnds_low++; - if(xu_vec[i] < 1e20) { - n_bnds_lu++; - } - } else { - ixl_vec[i] = 0.; - } - - if(xu_vec[i] < 1e20) { - ixu_vec[i] = 1.; - n_bnds_upp++; - } else { - ixu_vec[i] = 0.; - } - -#ifdef HIOP_DEEPCHECKS - assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible"); -#endif - - //if(xl_vec[i]==xu_vec[i]) { - if( xu_vec[i]<1e20 && - fabs(xl_vec[i]-xu_vec[i]) <= fixedVarTol*fmax(1.,fabs(xu_vec[i]))) { - nfixed_vars++; - } else { -#ifdef HIOP_DEEPCHECKS -#define min_dist 1e-8 - if(fixedVarTolprintf(hovWarning, - "Lower (%g) and upper bound (%g) for variable %d are very close. " - "Consider fixing this variable or increase 'fixed_var_tolerance'.\n", - i, xl_vec[i], xu_vec[i]); - nBndsClose++; - } - } - if(nBndsClose==maxBndsCloseMsgs) { - log->printf(hovWarning, "[further messages were surpressed]\n"); - nBndsClose++; - } - } -#endif - } - } - - this->xl_->copy_from_vectorpar(xl_tmp); - this->xu_->copy_from_vectorpar(xu_tmp); - this->ixl_->copy_from_vectorpar(ixl_tmp); - this->ixu_->copy_from_vectorpar(ixu_tmp); - + this->xl_->process_bounds_local(*this->xu_, + *this->ixl_, + *this->ixu_, + n_bnds_low, + n_bnds_upp, + n_bnds_lu, + nfixed_vars, + options->GetNumeric("fixed_var_tolerance")); return true; } diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp index b8b107c0b..5a656c765 100644 --- a/src/Optimization/hiopNlpFormulation.hpp +++ b/src/Optimization/hiopNlpFormulation.hpp @@ -290,7 +290,7 @@ class hiopNlpFormulation #endif protected: /* Preprocess bounds in a form supported by the NLP formulation. Returns counts of - * the variables with lower, upper, and lower and lower bounds, as well of the fixed + * the variables with lower, upper, and lower and upper bounds, as well of the fixed * variables. */ virtual bool process_bounds(size_type& n_bnds_low, diff --git a/src/Optimization/hiopNlpTransforms.cpp b/src/Optimization/hiopNlpTransforms.cpp index 456cf89b4..4696a9cb0 100644 --- a/src/Optimization/hiopNlpTransforms.cpp +++ b/src/Optimization/hiopNlpTransforms.cpp @@ -65,11 +65,11 @@ hiopFixedVarsRemover:: hiopFixedVarsRemover(hiopNlpFormulation* nlp, const hiopVector& xl, const hiopVector& xu, - const double& fixedVarTol_, + const double& fixed_var_tol, const size_type& numFixedVars, const size_type& numFixedVars_local) : hiopNlpTransformation(nlp), - n_fixed_vars_local(numFixedVars_local), fixedVarTol(fixedVarTol_), + n_fixed_vars_local(numFixedVars_local), fixed_var_tol_(fixed_var_tol), Jacc_fs(NULL), Jacd_fs(NULL), fs2rs_idx_map(xl.get_local_size()), x_rs_ref_(nullptr), Jacc_rs_ref(NULL), Jacd_rs_ref(NULL) @@ -158,7 +158,7 @@ bool hiopFixedVarsRemover::setupDecisionVectorPart() int it_rs=0; for(int i=0;i=0) { - M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j]; + M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j]; } } } @@ -291,38 +291,19 @@ hiopFixedVarsRelaxer(hiopNlpFormulation* nlp, const size_type& numFixedVars, const size_type& numFixedVars_local) : hiopNlpTransformation(nlp), - xl_copy(NULL), xu_copy(NULL), n_vars(xl.get_size()), n_vars_local(xl.get_local_size()) + n_vars(xl.get_size()), + n_vars_local(xl.get_local_size()) { - //xl_copy = xl.new_copy(); // no need to copy at this point - //xu_copy = xu.new_copy(); // no need to copy at this point } hiopFixedVarsRelaxer::~hiopFixedVarsRelaxer() { - if(xl_copy) delete xl_copy; - if(xu_copy) delete xu_copy; } void hiopFixedVarsRelaxer:: relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu) { - double *xla=xl.local_data(), *xua=xu.local_data(), *v; - size_type n=xl.get_local_size(); - double xuabs; - for(index_type i=0; i1.) { @@ -464,9 +443,4 @@ hiopNLPObjGradScaling::~hiopNLPObjGradScaling() if(scale_factor_cd) delete scale_factor_cd; } - - - - - } //end of namespace diff --git a/src/Optimization/hiopNlpTransforms.hpp b/src/Optimization/hiopNlpTransforms.hpp index 9c7f37d61..d0634bf7f 100644 --- a/src/Optimization/hiopNlpTransforms.hpp +++ b/src/Optimization/hiopNlpTransforms.hpp @@ -153,7 +153,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation hiopFixedVarsRemover(hiopNlpFormulation* nlp, const hiopVector& xl, const hiopVector& xu, - const double& fixedVarTol, + const double& fixed_var_tol, const size_type& numFixedVars, const size_type& numFixedVars_local); ~hiopFixedVarsRemover(); @@ -287,7 +287,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation size_type n_fixed_vars_local; size_type n_fixed_vars; - double fixedVarTol; + double fixed_var_tol_; size_type n_fs; //full-space n size_type n_rs; //reduced-space n @@ -326,20 +326,20 @@ class hiopFixedVarsRelaxer : public hiopNlpTransformation virtual ~hiopFixedVarsRelaxer(); /* number of vars in the NLP after the tranformation */ - inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); } + inline size_type n_post() {return n_vars;} /* number of vars in the NLP to which the tranformation is to be applied */ - virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); } + virtual size_type n_pre() {return n_vars;} - inline size_type n_post_local() { return n_vars_local; } //xl_copy->get_local_size(); } - inline size_type n_pre_local() { return n_vars_local; } //xl_copy->get_local_size(); } + inline size_type n_post_local() {return n_vars_local;} + inline size_type n_pre_local() {return n_vars_local;} inline bool setup() { return true; } void relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu); private: - hiopVector*xl_copy, *xu_copy; - size_type n_vars; int n_vars_local; + size_type n_vars; + size_type n_vars_local; }; /** @@ -514,10 +514,10 @@ class hiopBoundsRelaxer : public hiopNlpTransformation const hiopVector& du); virtual ~hiopBoundsRelaxer(); - inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } - virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } - inline size_type n_post_local() { return n_vars_local; } - inline size_type n_pre_local() { return n_vars_local; } + inline size_type n_post() {return n_vars;} + virtual size_type n_pre() {return n_vars;} + inline size_type n_post_local() {return n_vars_local;} + inline size_type n_pre_local() {return n_vars_local;} inline bool setup() { return true; } inline hiopVector* apply_to_x(hiopVector& x) From 1d7e80032b4c839a309fd3e45a64aeee6ab582f6 Mon Sep 17 00:00:00 2001 From: nychiang Date: Tue, 7 Mar 2023 20:01:42 -0800 Subject: [PATCH 2/5] fix print issue --- src/LinAlg/hiopVectorPar.cpp | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp index 725fe3967..236822ce9 100644 --- a/src/LinAlg/hiopVectorPar.cpp +++ b/src/LinAlg/hiopVectorPar.cpp @@ -1301,10 +1301,6 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu, double* ixl_vec = ixl.local_data(); double* ixu_vec = ixu.local_data(); -#ifdef HIOP_DEEPCHECKS - const int maxBndsCloseMsgs=3; - int nBndsClose=0; -#endif int nlocal = this->get_local_size(); for(int i=0;i -1e20) { @@ -1334,18 +1330,32 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu, } else { #ifdef HIOP_DEEPCHECKS #define min_dist 1e-8 + const int maxBndsCloseMsgs=3; + int nBndsClose=0; + int myrank_ = 0; + int numranks = 1; +#ifdef HIOP_USE_MPI + int err = MPI_Comm_rank(comm_, &myrank_); assert(err==MPI_SUCCESS); + err = MPI_Comm_size(comm_, &numranks); assert(err==MPI_SUCCESS); +#endif + if(fixed_var_tolprintf(hovWarning, - "Lower (%g) and upper bound (%g) for variable %d are very close. " - "Consider fixing this variable or increase 'fixed_var_tolerance'.\n", - i, xl_vec[i], xu_vec[i]); + if(myrank_==0) { + fprintf(stdout, + "Lower (%g) and upper bound (%g) for variable %d are very close. " + "Consider fixing this variable or increase 'fixed_var_tolerance'.\n", + i, xl_vec[i], xu_vec[i]); + } nBndsClose++; } } if(nBndsClose==maxBndsCloseMsgs) { - log->printf(hovWarning, "[further messages were surpressed]\n"); + if(myrank_==0) { + fprintf(stdout, + "[further messages were surpressed]\n"); + } nBndsClose++; } } From d1d1c4d6ced8096ae89388d5db5025ca1136d2b8 Mon Sep 17 00:00:00 2001 From: nychiang Date: Tue, 7 Mar 2023 23:40:59 -0800 Subject: [PATCH 3/5] cuda/hip implementation & unit test --- src/LinAlg/VectorCudaKernels.cu | 128 ++++++++++++++++++++++++++++++ src/LinAlg/VectorCudaKernels.hpp | 19 +++++ src/LinAlg/VectorHipKernels.cpp | 128 ++++++++++++++++++++++++++++++ src/LinAlg/VectorHipKernels.hpp | 19 +++++ src/LinAlg/hiopVector.hpp | 6 +- src/LinAlg/hiopVectorCuda.cpp | 47 +++++++++++ src/LinAlg/hiopVectorCuda.hpp | 13 +++ src/LinAlg/hiopVectorHip.cpp | 53 ++++++++++++- src/LinAlg/hiopVectorHip.hpp | 13 +++ src/LinAlg/hiopVectorPar.cpp | 6 +- src/LinAlg/hiopVectorPar.hpp | 2 +- src/LinAlg/hiopVectorRaja.hpp | 2 +- src/LinAlg/hiopVectorRajaImpl.hpp | 18 ++--- tests/LinAlg/vectorTests.hpp | 102 ++++++++++++++++++++++++ tests/testVector.cpp | 2 + 15 files changed, 536 insertions(+), 22 deletions(-) diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu index b1c9a5911..c1b7cd77c 100644 --- a/src/LinAlg/VectorCudaKernels.cu +++ b/src/LinAlg/VectorCudaKernels.cu @@ -640,6 +640,71 @@ __global__ void copyToStartingAt_w_pattern_cu(int n_src, } } +/** @brief process variable bounds */ +__global__ void process_bounds_cu(int n, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int* bnds_low, + int* bnds_upp, + int* bnds_lu, + int* fixed_vars, + double fixed_var_tol) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + // preemptive loop to reduce number of iterations? + bnds_low[i] = 0; + bnds_upp[i] = 0; + bnds_lu[i] = 0; + fixed_vars[i] = 0; + + if(xl[i] > -1e20) { + ixl[i] = 1.; + bnds_low[i] = 1; + if(xu[i] < 1e20) { + bnds_lu[i] = 1; + } + } else { + ixl[i] = 0.; + } + + if(xu[i] < 1e20) { + ixu[i] = 1.; + bnds_upp[i] = 1; + } else { + ixu[i] = 0.; + } + + if(xu[i] < 1e20 && + fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) { + fixed_vars[i] = 1; + } + } +} + +/** @brief relax variable bounds */ +__global__ void relax_bounds_cu(int n, + double* xla, + double* xua, + double fixed_var_tol, + double fixed_var_perturb) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + double xuabs = fabs(xua[i]); + if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) { + xua[i] += fixed_var_perturb * std::fmax(1.,xuabs); + xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs); + } + } +} + namespace hiop { namespace cuda @@ -1276,7 +1341,70 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol) +{ + int num_blocks = (n_local+block_size-1)/block_size; + + thrust::device_ptr bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr); + + thrust::device_ptr bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr); + + thrust::device_ptr bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr); + thrust::device_ptr n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr); + + // set values + process_bounds_cu<<>>(n_local, + xl, + xu, + ixl, + ixu, + bnds_low_r_ptr, + bnds_upp_r_ptr, + bnds_lu_r_ptr, + n_fixed_vars_r_ptr, + fixed_var_tol); + + // compute sum + n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus()); + n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus()); + + thrust::device_free(bnds_low_d_ptr); + thrust::device_free(bnds_upp_d_ptr); + thrust::device_free(bnds_lu_d_ptr); + thrust::device_free(n_fixed_vars_d_ptr); +} + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb) +{ + int num_blocks = (n_local+block_size-1)/block_size; + relax_bounds_cu<<>>(n_local, + xl, + xu, + fixed_var_tol, + fixed_var_perturb); +} /// for hiopVectorIntCuda /** diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp index 90fe1f355..a3ec501dd 100644 --- a/src/LinAlg/VectorCudaKernels.hpp +++ b/src/LinAlg/VectorCudaKernels.hpp @@ -296,6 +296,25 @@ void copyToStartingAt_w_pattern_kernel(int n_src, double *vd, const double* dd); +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol); + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb); + /// for hiopVectorIntCuda /** * @brief Set the vector entries to be a linear space of starting at i0 containing evenly diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp index 413ee183f..21f791abd 100644 --- a/src/LinAlg/VectorHipKernels.cpp +++ b/src/LinAlg/VectorHipKernels.cpp @@ -635,6 +635,71 @@ __global__ void copyToStartingAt_w_pattern_hip(int n_src, } } +/** @brief process variable bounds */ +__global__ void process_bounds_hip(int n, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + double* bnds_low, + double* bnds_upp, + double* bnds_lu, + double* fixed_vars, + double fixed_var_tol) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + // preemptive loop to reduce number of iterations? + bnds_low[i] = 0; + bnds_upp[i] = 0; + bnds_lu[i] = 0; + fixed_vars[i] = 0; + + if(xl[i] > -1e20) { + ixl[i] = 1.; + bnds_low[i] = 1; + if(xu[i] < 1e20) { + bnds_lu[i] = 1; + } + } else { + ixl[i] = 0.; + } + + if(xu[i] < 1e20) { + ixu[i] = 1.; + bnds_upp[i] = 1; + } else { + ixu[i] = 0.; + } + + if(xu[i] < 1e20 && + fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) { + fixed_vars[i] = 1; + } + } +} + +/** @brief relax variable bounds */ +__global__ void relax_bounds_hip(int n, + double* xla, + double* xua, + double fixed_var_tol, + double fixed_var_perturb) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + double xuabs = fabs(xua[i]); + if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) { + xua[i] += fixed_var_perturb * std::fmax(1.,xuabs); + xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs); + } + } +} + namespace hiop { namespace hip @@ -1271,7 +1336,70 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol) +{ + int num_blocks = (n_local+block_size-1)/block_size; + + thrust::device_ptr bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr); + + thrust::device_ptr bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr); + + thrust::device_ptr bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr); + thrust::device_ptr n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr); + + // set values + process_bounds_hip<<>>(n, + xl, + xu, + ixl, + ixu, + bnds_low_r_ptr, + bnds_upp_r_ptr, + bnds_lu_r_ptr, + n_fixed_vars_r_ptr, + fixed_var_tol); + + // compute sum + n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus()); + n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus()); + + thrust::device_free(bnds_low_d_ptr); + thrust::device_free(bnds_upp_d_ptr); + thrust::device_free(bnds_lu_d_ptr); + thrust::device_free(n_fixed_vars_d_ptr); +} + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb) +{ + int num_blocks = (n_local+block_size-1)/block_size; + relax_bounds_hip<<>>(n_local, + xl, + xu, + fixed_var_tol, + fixed_var_perturb); +} /// for hiopVectorIntHip /** diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp index b1c0b8022..adc851b17 100644 --- a/src/LinAlg/VectorHipKernels.hpp +++ b/src/LinAlg/VectorHipKernels.hpp @@ -296,6 +296,25 @@ void copyToStartingAt_w_pattern_kernel(int n_src, double *vd, const double* dd); +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol); + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb); + /// for hiopVectorIntHip /** * @brief Set the vector entries to be a linear space of starting at i0 containing evenly diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp index c2f1beda1..2ed99890f 100644 --- a/src/LinAlg/hiopVector.hpp +++ b/src/LinAlg/hiopVector.hpp @@ -1014,11 +1014,10 @@ class hiopVector * @param[out] n_bnds_low - number of variables with lower bounds only * @param[out] n_bnds_upp - number of variables with upper bounds only * @param[out] n_bnds_lu - number of variables with both lower and upper bounds - * @param[out] nfixed_vars - number of fixed variables + * @param[out] n_fixed_vars - number of fixed variables * @param[in] fixed_var_tol - tolerance used to define fixed variables * * @pre this is a local method - * @todo: add unit test, or should we remove this function? */ virtual bool process_bounds_local(const hiopVector& xu, hiopVector& ixl, @@ -1026,7 +1025,7 @@ class hiopVector size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, - size_type& nfixed_vars, + size_type& n_fixed_vars, const double& fixed_var_tol) = 0; /** @@ -1038,7 +1037,6 @@ class hiopVector * @param[in] fixed_var_perturb - perturbation added to bounds * * @pre this is a local method - * @todo: add unit test, or should we remove this function? */ virtual void relax_bounds_vec(hiopVector& xu, const double& fixed_var_tol, diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp index 0be1e2eaf..e0123cfae 100644 --- a/src/LinAlg/hiopVectorCuda.cpp +++ b/src/LinAlg/hiopVectorCuda.cpp @@ -1087,6 +1087,53 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const assert(false&&"NOT needed. Remove this func. TODO"); } +bool hiopVectorCuda::process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); + assert(ixl.get_local_size()==n_local_); + assert(ixu.get_local_size()==n_local_); +#endif + const double* xl_arr = data_; + const double* xu_arr = xu.local_data_const(); + double* ixl_arr = ixl.local_data(); + double* ixu_arr = ixu.local_data(); + + hiop::cuda::process_bounds_local_kernel(n_local_, + xl_arr, + xu_arr, + ixl_arr, + ixu_arr, + n_bnds_low, + n_bnds_upp, + n_bnds_lu, + n_fixed_vars, + fixed_var_tol); +} + +void hiopVectorCuda::relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); +#endif + double* xl_arr = data_; + double* xu_arr = xu.local_data(); + + hiop::cuda::relax_bounds_kernel(n_local_, + xl_arr, + xu_arr, + fixed_var_tol, + fixed_var_perturb); +} } // namespace hiop diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp index 5d504521d..0b16273f8 100644 --- a/src/LinAlg/hiopVectorCuda.hpp +++ b/src/LinAlg/hiopVectorCuda.hpp @@ -347,6 +347,19 @@ class hiopVectorCuda : public hiopVector /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; + virtual bool process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol); + + virtual void relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb); + /* functions for this class */ inline MPI_Comm get_mpi_comm() const { return comm_; } diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp index ab5b88f15..7a324b2ff 100644 --- a/src/LinAlg/hiopVectorHip.cpp +++ b/src/LinAlg/hiopVectorHip.cpp @@ -940,9 +940,9 @@ bool hiopVectorHip::matchesPattern(const hiopVector& pattern) /** @brief Adjusts duals. */ void hiopVectorHip::adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa) + const hiopVector& ixvec, + const double& mu, + const double& kappa) { #ifdef HIOP_DEEPCHECKS assert(xvec.get_local_size()==n_local_); @@ -1091,6 +1091,53 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const assert(false&&"NOT needed. Remove this func. TODO"); } +bool hiopVectorHip::process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); + assert(ixl.get_local_size()==n_local_); + assert(ixu.get_local_size()==n_local_); +#endif + const double* xl_arr = data_; + const double* xu_arr = xu.local_data_const(); + double* ixl_arr = ixl.local_data(); + double* ixu_arr = ixu.local_data(); + + hiop::hip::process_bounds_local_kernel(n_local_, + xl_arr, + xu_arr, + ixl_arr, + ixu_arr, + n_bnds_low, + n_bnds_upp, + n_bnds_lu, + n_fixed_vars, + fixed_var_tol); +} + +void hiopVectorHip::relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); +#endif + const double* xl_arr = data_; + const double* xu_arr = xu.local_data_const(); + + hiop::hip::process_bounds_local_kernel(n_local_, + xl_arr, + xu_arr, + fixed_var_tol, + fixed_var_perturb); +} } // namespace hiop diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp index addaf1d03..cf1edb117 100644 --- a/src/LinAlg/hiopVectorHip.hpp +++ b/src/LinAlg/hiopVectorHip.hpp @@ -349,6 +349,19 @@ class hiopVectorHip : public hiopVector /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; + virtual bool process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol); + + virtual void relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb); + /* functions for this class */ inline MPI_Comm get_mpi_comm() const { return comm_; } diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp index 236822ce9..b70c7b693 100644 --- a/src/LinAlg/hiopVectorPar.cpp +++ b/src/LinAlg/hiopVectorPar.cpp @@ -1288,13 +1288,13 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu, size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, - size_type& nfixed_vars, + size_type& n_fixed_vars, const double& fixed_var_tol) { n_bnds_low = 0; n_bnds_upp = 0; n_bnds_lu = 0; - nfixed_vars = 0; + n_fixed_vars = 0; const double* xl_vec = this->local_data_const(); const double* xu_vec = xu.local_data_const(); @@ -1326,7 +1326,7 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu, if( xu_vec[i] < 1e20 && fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) { - nfixed_vars++; + n_fixed_vars++; } else { #ifdef HIOP_DEEPCHECKS #define min_dist 1e-8 diff --git a/src/LinAlg/hiopVectorPar.hpp b/src/LinAlg/hiopVectorPar.hpp index b1d967549..f6f36995a 100644 --- a/src/LinAlg/hiopVectorPar.hpp +++ b/src/LinAlg/hiopVectorPar.hpp @@ -315,7 +315,7 @@ class hiopVectorPar : public hiopVector size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, - size_type& nfixed_vars, + size_type& n_fixed_vars, const double& fixed_var_tol); virtual void relax_bounds_vec(hiopVector& xu, diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp index 1a375be7c..17ba7dd82 100644 --- a/src/LinAlg/hiopVectorRaja.hpp +++ b/src/LinAlg/hiopVectorRaja.hpp @@ -318,7 +318,7 @@ class hiopVectorRaja : public hiopVector size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, - size_type& nfixed_vars, + size_type& n_fixed_vars, const double& fixed_var_tol); virtual void relax_bounds_vec(hiopVector& xu, diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index eac76e6c0..f61ee8d83 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -2239,7 +2239,7 @@ bool hiopVectorRaja::process_bounds_local(const hiopVector& xu, size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, - size_type& nfixed_vars, + size_type& n_fixed_vars, const double& fixed_var_tol) { #ifdef HIOP_DEEPCHECKS @@ -2261,7 +2261,7 @@ bool hiopVectorRaja::process_bounds_local(const hiopVector& xu, RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_low(0); RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_upp(0); RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_lu(0); - RAJA::ReduceSum< hiop_raja_reduce, int > sum_nfixed_vars(0); + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_fixed_vars(0); RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal), RAJA_LAMBDA(RAJA::Index_type i) @@ -2285,20 +2285,20 @@ bool hiopVectorRaja::process_bounds_local(const hiopVector& xu, if(xu_vec[i] < 1e20 && fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) { - sum_nfixed_vars += 1; + sum_n_fixed_vars += 1; } }); n_bnds_low = sum_n_bnds_low.get(); n_bnds_upp = sum_n_bnds_upp.get(); n_bnds_lu = sum_n_bnds_lu.get(); - nfixed_vars = sum_nfixed_vars.get(); + n_fixed_vars = sum_n_fixed_vars.get(); return true; } template -bool hiopVectorRaja::relax_bounds_vec(hiopVector& xu, +void hiopVectorRaja::relax_bounds_vec(hiopVector& xu, const double& fixed_var_tol, const double& fixed_var_perturb) { @@ -2311,17 +2311,15 @@ bool hiopVectorRaja::relax_bounds_vec(hiopVector& xu, double *xua = xu.local_data(); size_type n = this->get_local_size(); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal), + RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n), RAJA_LAMBDA(RAJA::Index_type i) { double xuabs = std::fabs(xua[i]); if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) { - xua[i] += fixed_var_perturb*std::fmax(1.,xuabs); - xla[i] -= fixed_var_perturb*std::fmax(1.,xuabs); + xua[i] += fixed_var_perturb * std::fmax(1.,xuabs); + xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs); } }); - - return true; } } // namespace hiop diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp index 225be242a..2a7ea1746 100644 --- a/tests/LinAlg/vectorTests.hpp +++ b/tests/LinAlg/vectorTests.hpp @@ -2050,6 +2050,108 @@ class VectorTests : public TestBase return reduceReturn(fail, &x); } + /** + * @brief Test that hiop correctly processes variable bounds + * + * @note This is local method only + */ + bool vector_process_bounds(hiop::hiopVector& xl, + hiop::hiopVector& xu, + hiop::hiopVector& ixl, + hiop::hiopVector& ixu, + const int rank = 0) + { + const local_ordinal_type N = getLocalSize(&xl); + assert(N == getLocalSize(&xu)); + assert(N == getLocalSize(&ixl)); + assert(N == getLocalSize(&ixu)); + assert(N >= 3); // only test N>=3 + int fail = 0; + + int n_low = 0; + int n_upp = 0; + int n_lu = 0; + int n_fixed = 0; + double fixed_var_tol = 1e-8; + + // xl = [1, .., 1, -inf] + xl.setToConstant(one); + setLocalElement(&xl, N-1, -one/zero); + + // xl = [inf, 1, 2, .., 2] + xu.setToConstant(two); + setLocalElement(&xu, 0, one/zero); + setLocalElement(&xu, 1, one); + + xl.process_bounds_local(xu, ixl, ixu, n_low, n_upp, n_lu, n_fixed, fixed_var_tol); + + // Check that the last element of rank zero's vector is + // zero, and that x_val was added to all other elements + fail += verifyAnswer(&ixl, + [=] (local_ordinal_type i) -> real_type + { + return (i == N-1) ? 0. : 1.; + }); + + fail += verifyAnswer(&ixu, + [=] (local_ordinal_type i) -> real_type + { + return (i == 0) ? 0. : 1.; + }); + + fail += (n_low != N-1); + fail += (n_upp != N-1); + fail += (n_lu != N-2); + fail += (n_fixed != 1); + + printMessage(fail, __func__, rank); + return reduceReturn(fail, &xl); + } + + /** + * @brief Test that hiop correctly relaxes variable bounds + * + * @note This is local method only + */ + bool vector_relax_bounds(hiop::hiopVector& xl, + hiop::hiopVector& xu, + const int rank = 0) + { + const local_ordinal_type N = getLocalSize(&xl); + assert(N == getLocalSize(&xu)); + int fail = 0; + + double fixed_var_tol = 1e-8; + double fixed_var_perturb = 1e-1; + + // xl = [1, .., 1, 2] + xl.setToConstant(one); + setLocalElement(&xl, N-1, two); + + // xl = [2, .., 2] + xu.setToConstant(two); + + xl.relax_bounds_vec(xu, fixed_var_tol, fixed_var_perturb); + + // Check that the last element of rank zero's vector is + // zero, and that x_val was added to all other elements + fail += verifyAnswer(&xl, + [=] (local_ordinal_type i) -> real_type + { + return (i == N-1) ? 1.8 : one; + }); + + fail += verifyAnswer(&xu, + [=] (local_ordinal_type i) -> real_type + { + return (i == N-1) ? 2.2 : two; + }); + + printMessage(fail, __func__, rank); + return reduceReturn(fail, &xl); + } + + /// Returns element _i_ of vector _x_. real_type getLocalElement(hiop::hiopVector* x, local_ordinal_type i) { diff --git a/tests/testVector.cpp b/tests/testVector.cpp index 65834293a..01a79ba5e 100644 --- a/tests/testVector.cpp +++ b/tests/testVector.cpp @@ -326,6 +326,8 @@ int runTests(const char* mem_space, MPI_Comm comm) fail += test.vectorMatchesPattern(*x, *y, rank); fail += test.vectorAdjustDuals_plh(*x, *y, *z, *a, rank); + fail += test.vector_process_bounds(*x, *y, *z, *a); + fail += test.vector_relax_bounds(*x, *y); if (rank == 0) { From 641fa109e0621d362a27040ea0590a1b4c98d756 Mon Sep 17 00:00:00 2001 From: nychiang Date: Wed, 8 Mar 2023 22:22:31 -0800 Subject: [PATCH 4/5] add some functions for follow-up development of process_constraints --- src/LinAlg/VectorCudaKernels.cu | 37 ++++++++++++++++++++++++- src/LinAlg/VectorCudaKernels.hpp | 3 ++ src/LinAlg/VectorHipKernels.cpp | 37 ++++++++++++++++++++++++- src/LinAlg/VectorHipKernels.hpp | 3 ++ src/LinAlg/hiopVector.hpp | 10 ++++++- src/LinAlg/hiopVectorCuda.cpp | 14 ++++++++++ src/LinAlg/hiopVectorCuda.hpp | 2 ++ src/LinAlg/hiopVectorHip.cpp | 14 ++++++++++ src/LinAlg/hiopVectorHip.hpp | 2 ++ src/LinAlg/hiopVectorPar.cpp | 22 +++++++++++++++ src/LinAlg/hiopVectorPar.hpp | 2 ++ src/LinAlg/hiopVectorRaja.hpp | 2 ++ src/LinAlg/hiopVectorRajaImpl.hpp | 29 +++++++++++++++++++ src/Optimization/hiopNlpFormulation.cpp | 20 +++++-------- src/Optimization/hiopNlpFormulation.hpp | 2 +- tests/LinAlg/vectorTests.hpp | 28 +++++++++++++++++++ tests/testVector.cpp | 7 +++-- 17 files changed, 214 insertions(+), 20 deletions(-) diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu index c1b7cd77c..09948dff4 100644 --- a/src/LinAlg/VectorCudaKernels.cu +++ b/src/LinAlg/VectorCudaKernels.cu @@ -705,6 +705,24 @@ __global__ void relax_bounds_cu(int n, } } +/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */ +__global__ void set_if_match_cu(int n, + double* d_ptr, + double* d1, + double* d2) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + if(d1[i]==d2[i]) { + d_ptr[i] = 1; + } else { + d_ptr[i] = 0; + } + } +} + namespace hiop { namespace cuda @@ -1177,7 +1195,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n); - // TODO: how to return double from device to host? double *ret_host = new double[1]; double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); cudaError_t cuerr = cudaMemcpy(ret_host, ret_ptr, (1)*sizeof(double), cudaMemcpyDeviceToHost); @@ -1341,6 +1358,24 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } +int num_match_local_kernel(int n, double* d1, const double* d2) +{ + int num_blocks = (n+block_size-1)/block_size; + + // TODO: how to avoid this temp vec? + thrust::device_ptr dv_ptr = thrust::device_malloc(n*sizeof(int)); + int* d_ptr = thrust::raw_pointer_cast(dv_ptr); + + // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 + set_if_match_cu<<>>(n, d_ptr, d1, d2); + + int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus()); + + thrust::device_free(dv_ptr); + + return rval; +} + /** @brief process variable bounds */ void process_bounds_local_kernel(int n_local, const double* xl, diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp index a3ec501dd..1afe55b82 100644 --- a/src/LinAlg/VectorCudaKernels.hpp +++ b/src/LinAlg/VectorCudaKernels.hpp @@ -296,6 +296,9 @@ void copyToStartingAt_w_pattern_kernel(int n_src, double *vd, const double* dd); +/// @brief return the numbers of identical elements between two vectors +int num_match_local_kernel(int n, double* d1, const double* d2); + /** @brief process variable bounds */ void process_bounds_local_kernel(int n_local, const double* xl, diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp index 21f791abd..85acb17b0 100644 --- a/src/LinAlg/VectorHipKernels.cpp +++ b/src/LinAlg/VectorHipKernels.cpp @@ -700,6 +700,24 @@ __global__ void relax_bounds_hip(int n, } } +/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */ +__global__ void set_if_match_hip(int n, + double* d_ptr, + double* d1, + double* d2) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + if(d1[i]==d2[i]) { + d_ptr[i] = 1; + } else { + d_ptr[i] = 0; + } + } +} + namespace hiop { namespace hip @@ -1172,7 +1190,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n); - // TODO: how to return double from device to host? double *ret_host = new double[1]; double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1)*sizeof(double), hipMemcpyDeviceToHost); @@ -1336,6 +1353,24 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } +int num_match_local_kernel(int n, double* d1, const double* d2) +{ + int num_blocks = (n+block_size-1)/block_size; + + // TODO: how to avoid this temp vec? + thrust::device_ptr dv_ptr = thrust::device_malloc(n*sizeof(int)); + int* d_ptr = thrust::raw_pointer_cast(dv_ptr); + + // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 + set_if_match_cu<<>>(n, d_ptr, d1, d2); + + int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus()); + + thrust::device_free(dv_ptr); + + return rval; +} + /** @brief process variable bounds */ void process_bounds_local_kernel(int n_local, const double* xl, diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp index adc851b17..5d851e2de 100644 --- a/src/LinAlg/VectorHipKernels.hpp +++ b/src/LinAlg/VectorHipKernels.hpp @@ -296,6 +296,9 @@ void copyToStartingAt_w_pattern_kernel(int n_src, double *vd, const double* dd); +/// @brief return the numbers of identical elements between two vectors +int num_match_local_kernel(int n, double* d1, const double* d2); + /** @brief process variable bounds */ void process_bounds_local_kernel(int n_local, const double* xl, diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp index 2ed99890f..80c720004 100644 --- a/src/LinAlg/hiopVector.hpp +++ b/src/LinAlg/hiopVector.hpp @@ -998,10 +998,18 @@ class hiopVector * @brief check if `this` vector is identical to `vec` * * @param[in] vec - vector used to be compared with `this` - * @todo: add unit test, or should we remove this function? + * @todo: should we remove this function? */ virtual bool is_equal(const hiopVector& vec) const = 0; + /** + * @brief return the numbers of identical elements between two vectors + * + * @param[in] vec - vector used to be compared with `this` + * @post `vec` is not modified + */ + virtual size_type num_match(const hiopVector& vec) const = 0; + /** * @brief preprocess bounds in a form supported by the NLP formulation. Returns counts of * the variables with lower, upper, and lower and upper bounds, as well of the fixed diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp index e0123cfae..ea0d14b0c 100644 --- a/src/LinAlg/hiopVectorCuda.cpp +++ b/src/LinAlg/hiopVectorCuda.cpp @@ -1087,6 +1087,20 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const assert(false&&"NOT needed. Remove this func. TODO"); } +size_type hiopVectorCuda::num_match(const hiopVector& vec) const +{ + double* dd = data_; + double* vd = vec.local_data(); + int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd); + +#ifdef HIOP_USE_MPI + int sumG; + int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + return sumG; +#endif + return sum_match; +} + bool hiopVectorCuda::process_bounds_local(const hiopVector& xu, hiopVector& ixl, hiopVector& ixu, diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp index 0b16273f8..7ec1b3faa 100644 --- a/src/LinAlg/hiopVectorCuda.hpp +++ b/src/LinAlg/hiopVectorCuda.hpp @@ -347,6 +347,8 @@ class hiopVectorCuda : public hiopVector /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; + virtual size_type num_match(const hiopVector& vec) const; + virtual bool process_bounds_local(const hiopVector& xu, hiopVector& ixl, hiopVector& ixu, diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp index 7a324b2ff..4f6a144de 100644 --- a/src/LinAlg/hiopVectorHip.cpp +++ b/src/LinAlg/hiopVectorHip.cpp @@ -1091,6 +1091,20 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const assert(false&&"NOT needed. Remove this func. TODO"); } +size_type hiopVectorHip::num_match(const hiopVector& vec) const +{ + double* dd = data_; + double* vd = vec.local_data(); + int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd); + +#ifdef HIOP_USE_MPI + int sumG; + int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + return sumG; +#endif + return sum_match; +} + bool hiopVectorHip::process_bounds_local(const hiopVector& xu, hiopVector& ixl, hiopVector& ixu, diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp index cf1edb117..de594201c 100644 --- a/src/LinAlg/hiopVectorHip.hpp +++ b/src/LinAlg/hiopVectorHip.hpp @@ -349,6 +349,8 @@ class hiopVectorHip : public hiopVector /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; + virtual size_type num_match(const hiopVector& vec) const; + virtual bool process_bounds_local(const hiopVector& xu, hiopVector& ixl, hiopVector& ixu, diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp index b70c7b693..b50927729 100644 --- a/src/LinAlg/hiopVectorPar.cpp +++ b/src/LinAlg/hiopVectorPar.cpp @@ -1282,6 +1282,28 @@ bool hiopVectorPar::is_equal(const hiopVector& vec) const return all_equal; } + +size_type hiopVectorPar::num_match(const hiopVector& vec) const +{ + if(n_local_ != vec.get_local_size()) { + return 0; + } + int sum_match = 0; + const double* data_v = vec.local_data_const(); + for(auto i=0; i::is_equal(const hiopVector& vec) const return all_equal; } +template +size_type hiopVectorRaja::num_match(const hiopVector& vec) const +{ +#ifdef HIOP_DEEPCHECKS + const hiopVectorRaja& v = dynamic_cast&>(vec); + assert(v.n_local_ == n_local_); +#endif + + const double* data_v = vec.local_data_const(); + const double* data = data_dev_; + RAJA::ReduceSum< hiop_raja_reduce, int > sum(0); + RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) + { + if(data[i]==data_v[i]) { + sum += 1; + } + }); + int all_equal = (sum.get() == 0); + +#ifdef HIOP_USE_MPI + int all_equalG; + int ierr = MPI_Allreduce(&all_equal, &all_equalG, 1, MPI_INT, MPI_SUM, comm_); + assert(MPI_SUCCESS==ierr); + return all_equalG; +#endif + return all_equal; +} + template bool hiopVectorRaja::process_bounds_local(const hiopVector& xu, hiopVector& ixl, diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp index e2679932f..ac50c04fa 100644 --- a/src/Optimization/hiopNlpFormulation.cpp +++ b/src/Optimization/hiopNlpFormulation.cpp @@ -441,7 +441,7 @@ bool hiopNlpFormulation::finalizeInitialization() bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, - size_type& nfixed_vars) + size_type& n_fixed_vars) { this->xl_->process_bounds_local(*this->xu_, *this->ixl_, @@ -449,7 +449,7 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, n_bnds_low, n_bnds_upp, n_bnds_lu, - nfixed_vars, + n_fixed_vars, options->GetNumeric("fixed_var_tolerance")); return true; } @@ -485,24 +485,18 @@ bool hiopNlpFormulation::process_constraints() assert(gl->get_local_size()==n_cons_); assert(gu->get_local_size()==n_cons_); + n_cons_eq_ = gl->num_match(*gu); + n_cons_ineq_ = n_cons_ - n_cons_eq_; + // transfer to host hiopVectorPar gl_host(n_cons_); hiopVectorPar gu_host(n_cons_); gl->copy_to_vectorpar(gl_host); gu->copy_to_vectorpar(gu_host); - + double* gl_vec = gl_host.local_data(); double* gu_vec = gu_host.local_data(); - n_cons_eq_ = 0; - n_cons_ineq_ = 0; - for(int i=0;i Date: Wed, 8 Mar 2023 22:44:02 -0800 Subject: [PATCH 5/5] pass LLNL ci --- src/LinAlg/VectorCudaKernels.cu | 8 ++++---- src/LinAlg/VectorCudaKernels.hpp | 2 +- src/LinAlg/VectorHipKernels.cpp | 10 +++++----- src/LinAlg/VectorHipKernels.hpp | 2 +- src/LinAlg/hiopVectorCuda.cpp | 6 ++++-- src/LinAlg/hiopVectorHip.cpp | 6 ++++-- src/LinAlg/hiopVectorRajaImpl.hpp | 2 +- tests/LinAlg/vectorTests.hpp | 1 - 8 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu index 09948dff4..405dfff78 100644 --- a/src/LinAlg/VectorCudaKernels.cu +++ b/src/LinAlg/VectorCudaKernels.cu @@ -707,9 +707,9 @@ __global__ void relax_bounds_cu(int n, /** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */ __global__ void set_if_match_cu(int n, - double* d_ptr, - double* d1, - double* d2) + int* d_ptr, + const double* d1, + const double* d2) { const int num_threads = blockDim.x * gridDim.x; const int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -1358,7 +1358,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } -int num_match_local_kernel(int n, double* d1, const double* d2) +int num_match_local_kernel(int n, const double* d1, const double* d2) { int num_blocks = (n+block_size-1)/block_size; diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp index 1afe55b82..3d8cf6cba 100644 --- a/src/LinAlg/VectorCudaKernels.hpp +++ b/src/LinAlg/VectorCudaKernels.hpp @@ -297,7 +297,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src, const double* dd); /// @brief return the numbers of identical elements between two vectors -int num_match_local_kernel(int n, double* d1, const double* d2); +int num_match_local_kernel(int n, const double* d1, const double* d2); /** @brief process variable bounds */ void process_bounds_local_kernel(int n_local, diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp index 85acb17b0..bcea53a66 100644 --- a/src/LinAlg/VectorHipKernels.cpp +++ b/src/LinAlg/VectorHipKernels.cpp @@ -702,9 +702,9 @@ __global__ void relax_bounds_hip(int n, /** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */ __global__ void set_if_match_hip(int n, - double* d_ptr, - double* d1, - double* d2) + int* d_ptr, + const double* d1, + const double* d2) { const int num_threads = blockDim.x * gridDim.x; const int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -1353,7 +1353,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } -int num_match_local_kernel(int n, double* d1, const double* d2) +int num_match_local_kernel(int n, const double* d1, const double* d2) { int num_blocks = (n+block_size-1)/block_size; @@ -1362,7 +1362,7 @@ int num_match_local_kernel(int n, double* d1, const double* d2) int* d_ptr = thrust::raw_pointer_cast(dv_ptr); // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 - set_if_match_cu<<>>(n, d_ptr, d1, d2); + set_if_match_hip<<>>(n, d_ptr, d1, d2); int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus()); diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp index 5d851e2de..ece799f18 100644 --- a/src/LinAlg/VectorHipKernels.hpp +++ b/src/LinAlg/VectorHipKernels.hpp @@ -297,7 +297,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src, const double* dd); /// @brief return the numbers of identical elements between two vectors -int num_match_local_kernel(int n, double* d1, const double* d2); +int num_match_local_kernel(int n, const double* d1, const double* d2); /** @brief process variable bounds */ void process_bounds_local_kernel(int n_local, diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp index ea0d14b0c..1bf877992 100644 --- a/src/LinAlg/hiopVectorCuda.cpp +++ b/src/LinAlg/hiopVectorCuda.cpp @@ -1089,8 +1089,8 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const size_type hiopVectorCuda::num_match(const hiopVector& vec) const { - double* dd = data_; - double* vd = vec.local_data(); + const double* dd = data_; + const double* vd = vec.local_data_const(); int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd); #ifdef HIOP_USE_MPI @@ -1130,6 +1130,8 @@ bool hiopVectorCuda::process_bounds_local(const hiopVector& xu, n_bnds_lu, n_fixed_vars, fixed_var_tol); + + return true; } void hiopVectorCuda::relax_bounds_vec(hiopVector& xu, diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp index 4f6a144de..e2a173468 100644 --- a/src/LinAlg/hiopVectorHip.cpp +++ b/src/LinAlg/hiopVectorHip.cpp @@ -1093,8 +1093,8 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const size_type hiopVectorHip::num_match(const hiopVector& vec) const { - double* dd = data_; - double* vd = vec.local_data(); + const double* dd = data_; + const double* vd = vec.local_data_const(); int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd); #ifdef HIOP_USE_MPI @@ -1134,6 +1134,8 @@ bool hiopVectorHip::process_bounds_local(const hiopVector& xu, n_bnds_lu, n_fixed_vars, fixed_var_tol); + + return true; } void hiopVectorHip::relax_bounds_vec(hiopVector& xu, diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index ff3091162..6e8b552cc 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -2250,7 +2250,7 @@ size_type hiopVectorRaja::num_match(const hiopVector& vec) const sum += 1; } }); - int all_equal = (sum.get() == 0); + int all_equal = sum.get(); #ifdef HIOP_USE_MPI int all_equalG; diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp index 1d81711b5..78d2f820e 100644 --- a/tests/LinAlg/vectorTests.hpp +++ b/tests/LinAlg/vectorTests.hpp @@ -2058,7 +2058,6 @@ class VectorTests : public TestBase { const local_ordinal_type Nx = x.get_size(); int fail = 0; - int n_match = 0; x.setToConstant(one); y.setToConstant(one);