From cb95ccb3272f0be0695359b121314832fcaf917d Mon Sep 17 00:00:00 2001
From: nychiang <sorakid507@gmail.com>
Date: Tue, 7 Mar 2023 19:45:31 -0800
Subject: [PATCH 1/5] process bounds and relax fixed var

---
 src/LinAlg/hiopVector.hpp               |  42 ++++++++++
 src/LinAlg/hiopVectorPar.cpp            |  90 ++++++++++++++++++++
 src/LinAlg/hiopVectorPar.hpp            |  13 +++
 src/LinAlg/hiopVectorRaja.hpp           |  13 +++
 src/LinAlg/hiopVectorRajaImpl.hpp       |  92 ++++++++++++++++++++
 src/Optimization/hiopNlpFormulation.cpp | 107 ++++--------------------
 src/Optimization/hiopNlpFormulation.hpp |   2 +-
 src/Optimization/hiopNlpTransforms.cpp  |  68 +++++----------
 src/Optimization/hiopNlpTransforms.hpp  |  24 +++---
 9 files changed, 302 insertions(+), 149 deletions(-)

diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp
index 289583481..c2f1beda1 100644
--- a/src/LinAlg/hiopVector.hpp
+++ b/src/LinAlg/hiopVector.hpp
@@ -1002,6 +1002,48 @@ class hiopVector
    */
   virtual bool is_equal(const hiopVector& vec) const = 0;
 
+  /**
+   * @brief preprocess bounds in a form supported by the NLP formulation. Returns counts of
+   * the variables with lower, upper, and lower and upper bounds, as well of the fixed 
+   * variables.
+   *
+   * @param[in] this - lower bound of primal variable `x`
+   * @param[in] xu - lower bound of primal variable `x`
+   * @param[in] ixl - index of the variables with lower bounds
+   * @param[in] ixu - index of the variables with upper bounds
+   * @param[out] n_bnds_low - number of variables with lower bounds only
+   * @param[out] n_bnds_upp - number of variables with upper bounds only
+   * @param[out] n_bnds_lu - number of variables with both lower and upper bounds
+   * @param[out] nfixed_vars - number of fixed variables
+   * @param[in] fixed_var_tol - tolerance used to define fixed variables
+   * 
+   * @pre this is a local method
+   * @todo: add unit test, or should we remove this function?
+   */
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& nfixed_vars,
+                                    const double& fixed_var_tol) = 0;
+
+  /**
+   * @brief relax variable bounds
+   *
+   * @param[in] this - lower bound of primal variable `x`
+   * @param[in] xu - lower bound of primal variable `x`
+   * @param[in] fixed_var_tol - tolerance used to define fixed variables
+   * @param[in] fixed_var_perturb - perturbation added to bounds
+   * 
+   * @pre this is a local method
+   * @todo: add unit test, or should we remove this function?
+   */
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb) = 0;
+
 protected:
   size_type n_; //we assume sequential data
 protected:
diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp
index 0fb38bd89..725fe3967 100644
--- a/src/LinAlg/hiopVectorPar.cpp
+++ b/src/LinAlg/hiopVectorPar.cpp
@@ -1282,6 +1282,96 @@ bool hiopVectorPar::is_equal(const hiopVector& vec) const
   return all_equal;
 }
 
+bool hiopVectorPar::process_bounds_local(const hiopVector& xu,
+                                         hiopVector& ixl,
+                                         hiopVector& ixu,
+                                         size_type& n_bnds_low,
+                                         size_type& n_bnds_upp,
+                                         size_type& n_bnds_lu,
+                                         size_type& nfixed_vars,
+                                         const double& fixed_var_tol)
+{
+  n_bnds_low = 0;
+  n_bnds_upp = 0;
+  n_bnds_lu = 0;
+  nfixed_vars = 0;
+
+  const double* xl_vec = this->local_data_const();
+  const double* xu_vec = xu.local_data_const();
+  double* ixl_vec = ixl.local_data();
+  double* ixu_vec = ixu.local_data();
 
+#ifdef HIOP_DEEPCHECKS
+  const int maxBndsCloseMsgs=3;
+  int nBndsClose=0;
+#endif
+  int nlocal = this->get_local_size();
+  for(int i=0;i <nlocal; i++) {
+    if(xl_vec[i] > -1e20) {
+      ixl_vec[i] = 1.;
+      n_bnds_low++;
+      if(xu_vec[i] < 1e20) {
+        n_bnds_lu++;
+      }
+    } else {
+      ixl_vec[i] = 0.;
+    }
+
+    if(xu_vec[i] < 1e20) {
+      ixu_vec[i] = 1.;
+      n_bnds_upp++;
+    } else {
+      ixu_vec[i] = 0.;
+    }
+
+#ifdef HIOP_DEEPCHECKS
+    assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible");
+#endif
+
+    if( xu_vec[i] < 1e20 &&
+        fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) {
+      nfixed_vars++;
+    } else {
+#ifdef HIOP_DEEPCHECKS
+#define min_dist 1e-8
+      if(fixed_var_tol<min_dist) { 
+        if(nBndsClose<maxBndsCloseMsgs) {
+          if(std::fabs(xl_vec[i]-xu_vec[i]) / std::max(1.,std::fabs(xu_vec[i]))<min_dist) {
+            log->printf(hovWarning, 
+                        "Lower (%g) and upper bound (%g) for variable %d are very close. "
+                        "Consider fixing this variable or increase 'fixed_var_tolerance'.\n",
+                        i, xl_vec[i], xu_vec[i]);
+            nBndsClose++;
+          }
+        }
+        if(nBndsClose==maxBndsCloseMsgs) {
+          log->printf(hovWarning, "[further messages were surpressed]\n");
+          nBndsClose++;
+        }
+      }
+#endif
+    }
+  } // end of for(int i=0;i <nlocal; i++) loop
+
+  return true;
+}
+
+void hiopVectorPar::relax_bounds_vec(hiopVector& xu,
+                                     const double& fixed_var_tol,
+                                     const double& fixed_var_perturb)
+{
+  double *xla = this->local_data();
+  double *xua = xu.local_data();
+  size_type n = this->get_local_size();
+
+  double xuabs;
+  for(index_type i=0; i<n; i++) {
+    xuabs = std::fabs(xua[i]);
+    if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) {
+      xua[i] += fixed_var_perturb*std::fmax(1.,xuabs);
+      xla[i] -= fixed_var_perturb*std::fmax(1.,xuabs);
+    }
+  }
+}
 
 };
diff --git a/src/LinAlg/hiopVectorPar.hpp b/src/LinAlg/hiopVectorPar.hpp
index 3f8a464bd..b1d967549 100644
--- a/src/LinAlg/hiopVectorPar.hpp
+++ b/src/LinAlg/hiopVectorPar.hpp
@@ -309,6 +309,19 @@ class hiopVectorPar : public hiopVector
 
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& nfixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   /**
    * @brief accessor to the execution policy
    */
diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp
index 31cd4a2e9..1a375be7c 100644
--- a/src/LinAlg/hiopVectorRaja.hpp
+++ b/src/LinAlg/hiopVectorRaja.hpp
@@ -312,6 +312,19 @@ class hiopVectorRaja : public hiopVector
 
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& nfixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   const ExecSpace<MEMBACKEND, EXECPOLICYRAJA>& exec_space() const
   {
     return exec_space_;
diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index 6dcb111a6..eac76e6c0 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -2232,4 +2232,96 @@ bool hiopVectorRaja<MEM, POL>::is_equal(const hiopVector& vec) const
   return all_equal;
 }
 
+template<class MEM, class POL>
+bool hiopVectorRaja<MEM, POL>::process_bounds_local(const hiopVector& xu,
+                                                    hiopVector& ixl,
+                                                    hiopVector& ixu,
+                                                    size_type& n_bnds_low,
+                                                    size_type& n_bnds_upp,
+                                                    size_type& n_bnds_lu,
+                                                    size_type& nfixed_vars,
+                                                    const double& fixed_var_tol)
+{
+#ifdef HIOP_DEEPCHECKS
+  const hiopVectorRaja& vxu = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(xu);
+  const hiopVectorRaja& vixl = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(ixl);
+  const hiopVectorRaja& vixu = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(ixu);
+  assert(vxu.n_local_ == this->n_local_);
+  assert(vixl.n_local_ == this->n_local_);
+  assert(vixu.n_local_ == this->n_local_);
+#endif
+
+  const double* xl_vec = this->local_data_const();
+  const double* xu_vec = xu.local_data_const();
+  double* ixl_vec = ixl.local_data();
+  double* ixu_vec = ixu.local_data();
+
+  size_type nlocal = this->get_local_size();
+
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_low(0);
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_upp(0);
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_lu(0);
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_nfixed_vars(0);
+
+  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal),
+    RAJA_LAMBDA(RAJA::Index_type i) 
+    {
+      if(xl_vec[i] > -1e20) {
+        ixl_vec[i] = 1.;
+        sum_n_bnds_low += 1;
+        if(xu_vec[i] < 1e20) {
+          sum_n_bnds_lu += 1;
+        }
+      } else {
+        ixl_vec[i] = 0.;
+      }
+
+      if(xu_vec[i] < 1e20) {
+        ixu_vec[i] = 1.;
+        sum_n_bnds_upp += 1;
+      } else {
+        ixu_vec[i] = 0.;
+      }
+
+      if(xu_vec[i] < 1e20 &&
+         fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) {
+        sum_nfixed_vars += 1;
+      }
+    });
+
+  n_bnds_low = sum_n_bnds_low.get();
+  n_bnds_upp = sum_n_bnds_upp.get();
+  n_bnds_lu = sum_n_bnds_lu.get();
+  nfixed_vars = sum_nfixed_vars.get();
+
+  return true;
+}
+
+template<class MEM, class POL>
+bool hiopVectorRaja<MEM, POL>::relax_bounds_vec(hiopVector& xu,
+                                                const double& fixed_var_tol,
+                                                const double& fixed_var_perturb)
+{
+#ifdef HIOP_DEEPCHECKS
+  const hiopVectorRaja& vxu = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(xu);
+  assert(vxu.n_local_ == this->n_local_);
+#endif
+
+  double *xla = this->local_data();
+  double *xua = xu.local_data();
+  size_type n = this->get_local_size();
+
+  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal),
+    RAJA_LAMBDA(RAJA::Index_type i) 
+    {
+      double xuabs = std::fabs(xua[i]);
+      if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) {
+        xua[i] += fixed_var_perturb*std::fmax(1.,xuabs);
+        xla[i] -= fixed_var_perturb*std::fmax(1.,xuabs);
+      }
+    });
+
+  return true;
+}
+
 } // namespace hiop
diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp
index b8fb9d6d3..e2679932f 100644
--- a/src/Optimization/hiopNlpFormulation.cpp
+++ b/src/Optimization/hiopNlpFormulation.cpp
@@ -193,8 +193,8 @@ bool hiopNlpFormulation::finalizeInitialization()
   if(strFixedVars_ != options->GetString("fixed_var")) {
     doinit=true;
   }
-  const double fixedVarTol = options->GetNumeric("fixed_var_tolerance");
-  if(dFixedVarsTol_ != fixedVarTol) {
+  const double fixed_var_tol = options->GetNumeric("fixed_var_tolerance");
+  if(dFixedVarsTol_ != fixed_var_tol) {
     doinit=true;
   }
 
@@ -257,12 +257,12 @@ bool hiopNlpFormulation::finalizeInitialization()
   //preprocess variables bounds - this is curently done on the CPU
   //
   size_type nfixed_vars_local;
-  process_bounds(n_bnds_low_local_,n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local);
+  process_bounds(n_bnds_low_local_, n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local);
 
   ///////////////////////////////////////////////////////////////////////////
   //  Handling of fixed variables
   //////////////////////////////////////////////////////////////////////////
-  dFixedVarsTol_ = fixedVarTol;  
+  dFixedVarsTol_ = fixed_var_tol;
   size_type nfixed_vars=nfixed_vars_local;
 #ifdef HIOP_USE_MPI
   int ierr = MPI_Allreduce(&nfixed_vars_local, &nfixed_vars, 1, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); 
@@ -277,11 +277,15 @@ bool hiopNlpFormulation::finalizeInitialization()
       // remove free variables
       //
       log->printf(hovWarning, "Fixed variables will be removed internally.\n");
+      if(options->GetString("compute_mode")=="gpu") {
+        assert(false && "HiOp hasn't support removing fixed variables under GPU mode yet.");
+        return false;
+      }
 
       fixedVarsRemover = new hiopFixedVarsRemover(this,
                                                   *xl_,
                                                   *xu_,
-                                                  fixedVarTol,
+                                                  fixed_var_tol,
                                                   nfixed_vars,
                                                   nfixed_vars_local);
       
@@ -417,7 +421,7 @@ bool hiopNlpFormulation::finalizeInitialization()
   cons_body_ = nullptr;
   
   delete cons_Jac_;
-  cons_Jac_ = NULL;
+  cons_Jac_ = nullptr;
 
   delete cons_lambdas_;
   cons_lambdas_ = nullptr;
@@ -439,89 +443,14 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low,
                                         size_type& n_bnds_lu,
                                         size_type& nfixed_vars)
 {
-
-  n_bnds_low = 0;
-  n_bnds_upp = 0;
-  n_bnds_lu = 0;
-  nfixed_vars = 0;
-
-#if !defined(HIOP_USE_MPI)
-  int* vec_distrib_ = nullptr;
-  MPI_Comm comm_ = MPI_COMM_SELF;
-#endif  
-  hiopVectorPar xl_tmp(n_vars_, vec_distrib_, comm_);
-  hiopVectorPar xu_tmp(n_vars_, vec_distrib_, comm_);
-  hiopVectorPar ixl_tmp(n_vars_, vec_distrib_, comm_);
-  hiopVectorPar ixu_tmp(n_vars_, vec_distrib_, comm_);
-  
-  this->xl_->copy_to_vectorpar(xl_tmp);
-  this->xu_->copy_to_vectorpar(xu_tmp);
-  this->ixl_->copy_to_vectorpar(ixl_tmp);
-  this->ixu_->copy_to_vectorpar(ixu_tmp);
-  
-  double *ixl_vec = ixl_tmp.local_data_host();
-  double *ixu_vec = ixu_tmp.local_data_host();
-
-  double* xl_vec = xl_tmp.local_data_host();
-  double* xu_vec = xu_tmp.local_data_host();
-#ifdef HIOP_DEEPCHECKS
-  const int maxBndsCloseMsgs=3; int nBndsClose=0;
-#endif
-  const double fixedVarTol = options->GetNumeric("fixed_var_tolerance");
-  int nlocal=xl_->get_local_size();
-  for(int i=0;i<nlocal; i++) {
-    if(xl_vec[i] > -1e20) {
-      ixl_vec[i] = 1.;
-      n_bnds_low++;
-      if(xu_vec[i] < 1e20) {
-        n_bnds_lu++;
-      }
-    } else {
-      ixl_vec[i] = 0.;
-    }
-
-    if(xu_vec[i] < 1e20) {
-      ixu_vec[i] = 1.;
-      n_bnds_upp++;
-    } else {
-      ixu_vec[i] = 0.;
-    }
-
-#ifdef HIOP_DEEPCHECKS
-    assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible");
-#endif
-
-    //if(xl_vec[i]==xu_vec[i]) {
-    if( xu_vec[i]<1e20 &&
-        fabs(xl_vec[i]-xu_vec[i]) <= fixedVarTol*fmax(1.,fabs(xu_vec[i]))) {
-      nfixed_vars++;
-    } else {
-#ifdef HIOP_DEEPCHECKS
-#define min_dist 1e-8
-      if(fixedVarTol<min_dist) { 
-        if(nBndsClose<maxBndsCloseMsgs) {
-          if(fabs(xl_vec[i]-xu_vec[i]) / std::max(1.,fabs(xu_vec[i]))<min_dist) {
-            log->printf(hovWarning, 
-                        "Lower (%g) and upper bound (%g) for variable %d are very close. "
-                        "Consider fixing this variable or increase 'fixed_var_tolerance'.\n",
-                        i, xl_vec[i], xu_vec[i]);
-            nBndsClose++;
-          }
-        } 
-        if(nBndsClose==maxBndsCloseMsgs) {
-          log->printf(hovWarning, "[further messages were surpressed]\n");
-          nBndsClose++;
-        }
-      }
-#endif
-    }
-  }
-  
-  this->xl_->copy_from_vectorpar(xl_tmp);
-  this->xu_->copy_from_vectorpar(xu_tmp);
-  this->ixl_->copy_from_vectorpar(ixl_tmp);
-  this->ixu_->copy_from_vectorpar(ixu_tmp);
-
+  this->xl_->process_bounds_local(*this->xu_,
+                                  *this->ixl_,
+                                  *this->ixu_,
+                                  n_bnds_low,
+                                  n_bnds_upp,
+                                  n_bnds_lu,
+                                  nfixed_vars,
+                                  options->GetNumeric("fixed_var_tolerance"));
   return true;
 } 
 
diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp
index b8b107c0b..5a656c765 100644
--- a/src/Optimization/hiopNlpFormulation.hpp
+++ b/src/Optimization/hiopNlpFormulation.hpp
@@ -290,7 +290,7 @@ class hiopNlpFormulation
 #endif
 protected:
   /* Preprocess bounds in a form supported by the NLP formulation. Returns counts of
-   * the variables with lower, upper, and lower and lower bounds, as well of the fixed 
+   * the variables with lower, upper, and lower and upper bounds, as well of the fixed
    * variables. 
    */
   virtual bool process_bounds(size_type& n_bnds_low,
diff --git a/src/Optimization/hiopNlpTransforms.cpp b/src/Optimization/hiopNlpTransforms.cpp
index 456cf89b4..4696a9cb0 100644
--- a/src/Optimization/hiopNlpTransforms.cpp
+++ b/src/Optimization/hiopNlpTransforms.cpp
@@ -65,11 +65,11 @@ hiopFixedVarsRemover::
 hiopFixedVarsRemover(hiopNlpFormulation* nlp,
                      const hiopVector& xl,
                      const hiopVector& xu,
-                     const double& fixedVarTol_,
+                     const double& fixed_var_tol,
                      const size_type& numFixedVars,
                      const size_type& numFixedVars_local)
   : hiopNlpTransformation(nlp),
-    n_fixed_vars_local(numFixedVars_local), fixedVarTol(fixedVarTol_),
+    n_fixed_vars_local(numFixedVars_local), fixed_var_tol_(fixed_var_tol),
     Jacc_fs(NULL), Jacd_fs(NULL),
     fs2rs_idx_map(xl.get_local_size()),
     x_rs_ref_(nullptr), Jacc_rs_ref(NULL), Jacd_rs_ref(NULL)
@@ -158,7 +158,7 @@ bool hiopFixedVarsRemover::setupDecisionVectorPart()
   int it_rs=0; 
   for(int i=0;i<n_fs_local; i++) {
     //if(xl_vec[i]==xu_vec[i]) {
-    if(fabs(xl_vec[i]-xu_vec[i])<= fixedVarTol*fmax(1.,fabs(xu_vec[i]))) {
+    if(fabs(xl_vec[i]-xu_vec[i])<= fixed_var_tol_*fmax(1.,fabs(xu_vec[i]))) {
       fs2rs_idx_map[i]=-1;
     } else {
       fs2rs_idx_map[i]=it_rs;
@@ -278,7 +278,7 @@ void hiopFixedVarsRemover::applyInvToMatrix(const double* M_fs, const int& m_in,
     for(int j=0; j<fs2rs_idx_map.size(); j++) {  
       rs_idx = fs2rs_idx_map[j];
       if(rs_idx>=0) {
-  	M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j];
+        M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j];
       }
     }
   }
@@ -291,38 +291,19 @@ hiopFixedVarsRelaxer(hiopNlpFormulation* nlp,
                      const size_type& numFixedVars,
                      const size_type& numFixedVars_local)
   : hiopNlpTransformation(nlp),
-    xl_copy(NULL), xu_copy(NULL), n_vars(xl.get_size()), n_vars_local(xl.get_local_size())
+    n_vars(xl.get_size()),
+    n_vars_local(xl.get_local_size())
 {
-  //xl_copy = xl.new_copy(); // no need to copy at this point
-  //xu_copy = xu.new_copy(); // no need to copy at this point
 }
 
 hiopFixedVarsRelaxer::~hiopFixedVarsRelaxer()
 {
-  if(xl_copy) delete xl_copy;
-  if(xu_copy) delete xu_copy;
 }
 
 void hiopFixedVarsRelaxer::
 relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu)
 {
-  double *xla=xl.local_data(), *xua=xu.local_data(), *v;
-  size_type n=xl.get_local_size();
-  double xuabs;
-  for(index_type i=0; i<n; i++) {
-    xuabs = fabs(xua[i]);
-    if(fabs(xua[i]-xla[i])<= fixed_var_tol*fmax(1.,xuabs)) {
-
-      xua[i] += fixed_var_perturb*fmax(1.,xuabs);
-      xla[i] -= fixed_var_perturb*fmax(1.,xuabs);
-      //if(xla[i]==xua[i]) {
-      // this does not apply anymore
-      //if fixed a zero or less,  increase upper bound
-      //if fixed at positive val, decrease lower bound
-      //if(xua[i]<=0.)      xua[i] += fixed_var_perturb*fmax(1.,fabs(xua[i]));
-      //else                xla[i] -= fixed_var_perturb*fmax(1.,fabs(xla[i]));
-    }
-  }
+  xl.relax_bounds_vec(xu, fixed_var_tol, fixed_var_perturb);
 }
 
 hiopBoundsRelaxer::
@@ -332,8 +313,12 @@ hiopBoundsRelaxer(hiopNlpFormulation* nlp,
                   const hiopVector& dl,
                   const hiopVector& du)
   : hiopNlpTransformation(nlp),
-    xl_ori(NULL), xu_ori(NULL), dl_ori(NULL), du_ori(NULL),
-    n_vars(xl.get_size()), n_vars_local(xl.get_local_size()),
+    xl_ori(nullptr),
+    xu_ori(nullptr),
+    dl_ori(nullptr),
+    du_ori(nullptr),
+    n_vars(xl.get_size()),
+    n_vars_local(xl.get_local_size()),
     n_ineq(dl.get_size())
 {
   xl_ori = xl.new_copy();
@@ -344,18 +329,10 @@ hiopBoundsRelaxer(hiopNlpFormulation* nlp,
 
 hiopBoundsRelaxer::~hiopBoundsRelaxer()
 {
-  if(xl_ori) {
-    delete xl_ori;
-  }
-  if(xu_ori) {
-    delete xu_ori;
-  }
-  if(dl_ori) {
-    delete dl_ori;
-  }
-  if(du_ori) {
-    delete du_ori;
-  }
+  delete xl_ori;
+  delete xu_ori;
+  delete dl_ori;
+  delete du_ori;
 }
 
 void hiopBoundsRelaxer::
@@ -425,9 +402,11 @@ hiopNLPObjGradScaling::hiopNLPObjGradScaling(hiopNlpFormulation* nlp,
                                              hiopVectorInt& cons_eq_mapping, 
                                              hiopVectorInt& cons_ineq_mapping)
   : hiopNlpTransformation(nlp),
-    n_vars(gradf.get_size()), n_vars_local(gradf.get_local_size()),
+    n_vars(gradf.get_size()),
+    n_vars_local(gradf.get_local_size()),
     scale_factor_obj(1.),
-    n_eq(c.get_size()), n_ineq(d.get_size())
+    n_eq(c.get_size()),
+    n_ineq(d.get_size())
 {
   scale_factor_obj = max_grad/gradf.infnorm();
   if(scale_factor_obj>1.) {
@@ -464,9 +443,4 @@ hiopNLPObjGradScaling::~hiopNLPObjGradScaling()
   if(scale_factor_cd) delete scale_factor_cd;
 }
 
-
-
-
-
-
 } //end of namespace
diff --git a/src/Optimization/hiopNlpTransforms.hpp b/src/Optimization/hiopNlpTransforms.hpp
index 9c7f37d61..d0634bf7f 100644
--- a/src/Optimization/hiopNlpTransforms.hpp
+++ b/src/Optimization/hiopNlpTransforms.hpp
@@ -153,7 +153,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation
   hiopFixedVarsRemover(hiopNlpFormulation* nlp,
                        const hiopVector& xl,
                        const hiopVector& xu,
-                       const double& fixedVarTol,
+                       const double& fixed_var_tol,
                        const size_type& numFixedVars,
                        const size_type& numFixedVars_local);
   ~hiopFixedVarsRemover();
@@ -287,7 +287,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation
   size_type n_fixed_vars_local;
   size_type n_fixed_vars;
 
-  double fixedVarTol;
+  double fixed_var_tol_;
 
   size_type n_fs; //full-space n
   size_type n_rs; //reduced-space n
@@ -326,20 +326,20 @@ class hiopFixedVarsRelaxer : public hiopNlpTransformation
   virtual ~hiopFixedVarsRelaxer();
 
   /* number of vars in the NLP after the tranformation */
-  inline size_type n_post()  { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); }
+  inline size_type n_post() {return n_vars;}
   /* number of vars in the NLP to which the tranformation is to be applied */
-  virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); }
+  virtual size_type n_pre() {return n_vars;}
 
-  inline size_type n_post_local()  { return n_vars_local; } //xl_copy->get_local_size(); }
-  inline size_type n_pre_local()  { return n_vars_local; } //xl_copy->get_local_size(); }
+  inline size_type n_post_local() {return n_vars_local;}
+  inline size_type n_pre_local() {return n_vars_local;}
 
   inline bool setup() { return true; }
 
   void relax(const double& fixed_var_tol, const double& fixed_var_perturb, 
 	     hiopVector& xl, hiopVector& xu);
 private:
-  hiopVector*xl_copy, *xu_copy;
-  size_type  n_vars; int n_vars_local;
+  size_type n_vars;
+  size_type n_vars_local;
 };
 
 /** 
@@ -514,10 +514,10 @@ class hiopBoundsRelaxer : public hiopNlpTransformation
                     const hiopVector& du);
   virtual ~hiopBoundsRelaxer();
 
-  inline size_type n_post()  { /*assert(xl_copy);*/ return n_vars; }
-  virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; }
-  inline size_type n_post_local()  { return n_vars_local; }
-  inline size_type n_pre_local()  { return n_vars_local; }
+  inline size_type n_post() {return n_vars;}
+  virtual size_type n_pre() {return n_vars;}
+  inline size_type n_post_local() {return n_vars_local;}
+  inline size_type n_pre_local() {return n_vars_local;}
   inline bool setup() { return true; }
 
   inline hiopVector* apply_to_x(hiopVector& x)

From 1d7e80032b4c839a309fd3e45a64aeee6ab582f6 Mon Sep 17 00:00:00 2001
From: nychiang <sorakid507@gmail.com>
Date: Tue, 7 Mar 2023 20:01:42 -0800
Subject: [PATCH 2/5] fix print issue

---
 src/LinAlg/hiopVectorPar.cpp | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp
index 725fe3967..236822ce9 100644
--- a/src/LinAlg/hiopVectorPar.cpp
+++ b/src/LinAlg/hiopVectorPar.cpp
@@ -1301,10 +1301,6 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu,
   double* ixl_vec = ixl.local_data();
   double* ixu_vec = ixu.local_data();
 
-#ifdef HIOP_DEEPCHECKS
-  const int maxBndsCloseMsgs=3;
-  int nBndsClose=0;
-#endif
   int nlocal = this->get_local_size();
   for(int i=0;i <nlocal; i++) {
     if(xl_vec[i] > -1e20) {
@@ -1334,18 +1330,32 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu,
     } else {
 #ifdef HIOP_DEEPCHECKS
 #define min_dist 1e-8
+      const int maxBndsCloseMsgs=3;
+      int nBndsClose=0;
+      int myrank_ = 0;
+      int numranks = 1;  
+#ifdef HIOP_USE_MPI
+      int err = MPI_Comm_rank(comm_, &myrank_); assert(err==MPI_SUCCESS);
+      err = MPI_Comm_size(comm_, &numranks); assert(err==MPI_SUCCESS);
+#endif
+
       if(fixed_var_tol<min_dist) { 
         if(nBndsClose<maxBndsCloseMsgs) {
           if(std::fabs(xl_vec[i]-xu_vec[i]) / std::max(1.,std::fabs(xu_vec[i]))<min_dist) {
-            log->printf(hovWarning, 
-                        "Lower (%g) and upper bound (%g) for variable %d are very close. "
-                        "Consider fixing this variable or increase 'fixed_var_tolerance'.\n",
-                        i, xl_vec[i], xu_vec[i]);
+            if(myrank_==0) {
+              fprintf(stdout,
+                      "Lower (%g) and upper bound (%g) for variable %d are very close. "
+                      "Consider fixing this variable or increase 'fixed_var_tolerance'.\n",
+                      i, xl_vec[i], xu_vec[i]);
+            }
             nBndsClose++;
           }
         }
         if(nBndsClose==maxBndsCloseMsgs) {
-          log->printf(hovWarning, "[further messages were surpressed]\n");
+          if(myrank_==0) {
+            fprintf(stdout,
+                    "[further messages were surpressed]\n");
+          }
           nBndsClose++;
         }
       }

From d1d1c4d6ced8096ae89388d5db5025ca1136d2b8 Mon Sep 17 00:00:00 2001
From: nychiang <sorakid507@gmail.com>
Date: Tue, 7 Mar 2023 23:40:59 -0800
Subject: [PATCH 3/5] cuda/hip implementation & unit test

---
 src/LinAlg/VectorCudaKernels.cu   | 128 ++++++++++++++++++++++++++++++
 src/LinAlg/VectorCudaKernels.hpp  |  19 +++++
 src/LinAlg/VectorHipKernels.cpp   | 128 ++++++++++++++++++++++++++++++
 src/LinAlg/VectorHipKernels.hpp   |  19 +++++
 src/LinAlg/hiopVector.hpp         |   6 +-
 src/LinAlg/hiopVectorCuda.cpp     |  47 +++++++++++
 src/LinAlg/hiopVectorCuda.hpp     |  13 +++
 src/LinAlg/hiopVectorHip.cpp      |  53 ++++++++++++-
 src/LinAlg/hiopVectorHip.hpp      |  13 +++
 src/LinAlg/hiopVectorPar.cpp      |   6 +-
 src/LinAlg/hiopVectorPar.hpp      |   2 +-
 src/LinAlg/hiopVectorRaja.hpp     |   2 +-
 src/LinAlg/hiopVectorRajaImpl.hpp |  18 ++---
 tests/LinAlg/vectorTests.hpp      | 102 ++++++++++++++++++++++++
 tests/testVector.cpp              |   2 +
 15 files changed, 536 insertions(+), 22 deletions(-)

diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu
index b1c9a5911..c1b7cd77c 100644
--- a/src/LinAlg/VectorCudaKernels.cu
+++ b/src/LinAlg/VectorCudaKernels.cu
@@ -640,6 +640,71 @@ __global__ void copyToStartingAt_w_pattern_cu(int n_src,
   }
 }
 
+/** @brief process variable bounds */
+__global__ void process_bounds_cu(int n,
+                                  const double* xl,
+                                  const double* xu,
+                                  double* ixl,
+                                  double* ixu,
+                                  int* bnds_low,
+                                  int* bnds_upp,
+                                  int* bnds_lu,
+                                  int* fixed_vars,
+                                  double fixed_var_tol)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int i = tid; i < n; i += num_threads) {
+    // preemptive loop to reduce number of iterations?
+    bnds_low[i] = 0;
+    bnds_upp[i] = 0;
+    bnds_lu[i] = 0;
+    fixed_vars[i] = 0;
+
+    if(xl[i] > -1e20) {
+      ixl[i] = 1.;
+      bnds_low[i] = 1;
+      if(xu[i] < 1e20) {
+        bnds_lu[i] = 1;
+      }
+    } else {
+      ixl[i] = 0.;
+    }
+
+    if(xu[i] < 1e20) {
+      ixu[i] = 1.;
+      bnds_upp[i] = 1;
+    } else {
+      ixu[i] = 0.;
+    }
+
+    if(xu[i] < 1e20 &&
+       fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) {
+      fixed_vars[i] = 1;
+    }
+  }
+}
+
+/** @brief relax variable bounds */
+__global__ void relax_bounds_cu(int n,
+                                double* xla,
+                                double* xua,
+                                double fixed_var_tol,
+                                double fixed_var_perturb)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    double xuabs = fabs(xua[i]);
+    if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) {
+      xua[i] += fixed_var_perturb * std::fmax(1.,xuabs);
+      xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs);
+    }
+  }
+}
+
 namespace hiop
 {
 namespace cuda
@@ -1276,7 +1341,70 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+
+  thrust::device_ptr<int> bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr);
+
+  thrust::device_ptr<int> bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr);
+
+  thrust::device_ptr<int> bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr);
 
+  thrust::device_ptr<int> n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr);
+
+  // set values
+  process_bounds_cu<<<num_blocks,block_size>>>(n_local,
+                                               xl,
+                                               xu,
+                                               ixl,
+                                               ixu,
+                                               bnds_low_r_ptr,
+                                               bnds_upp_r_ptr,
+                                               bnds_lu_r_ptr,
+                                               n_fixed_vars_r_ptr,
+                                               fixed_var_tol);
+
+  // compute sum
+  n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus<int>());
+
+  thrust::device_free(bnds_low_d_ptr);
+  thrust::device_free(bnds_upp_d_ptr);
+  thrust::device_free(bnds_lu_d_ptr);
+  thrust::device_free(n_fixed_vars_d_ptr);
+}
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+  relax_bounds_cu<<<num_blocks,block_size>>>(n_local,
+                                             xl,
+                                             xu,
+                                             fixed_var_tol,
+                                             fixed_var_perturb);
+}
 
 /// for hiopVectorIntCuda
 /**
diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp
index 90fe1f355..a3ec501dd 100644
--- a/src/LinAlg/VectorCudaKernels.hpp
+++ b/src/LinAlg/VectorCudaKernels.hpp
@@ -296,6 +296,25 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        double *vd,
                                        const double* dd);
 
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol);
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb);
+
 /// for hiopVectorIntCuda
 /**
  * @brief Set the vector entries to be a linear space of starting at i0 containing evenly 
diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp
index 413ee183f..21f791abd 100644
--- a/src/LinAlg/VectorHipKernels.cpp
+++ b/src/LinAlg/VectorHipKernels.cpp
@@ -635,6 +635,71 @@ __global__ void copyToStartingAt_w_pattern_hip(int n_src,
   }
 }
 
+/** @brief process variable bounds */
+__global__ void process_bounds_hip(int n,
+                                   const double* xl,
+                                   const double* xu,
+                                   double* ixl,
+                                   double* ixu,
+                                   double* bnds_low,
+                                   double* bnds_upp,
+                                   double* bnds_lu,
+                                   double* fixed_vars,
+                                   double fixed_var_tol)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int i = tid; i < n; i += num_threads) {
+    // preemptive loop to reduce number of iterations?
+    bnds_low[i] = 0;
+    bnds_upp[i] = 0;
+    bnds_lu[i] = 0;
+    fixed_vars[i] = 0;
+
+    if(xl[i] > -1e20) {
+      ixl[i] = 1.;
+      bnds_low[i] = 1;
+      if(xu[i] < 1e20) {
+        bnds_lu[i] = 1;
+      }
+    } else {
+      ixl[i] = 0.;
+    }
+
+    if(xu[i] < 1e20) {
+      ixu[i] = 1.;
+      bnds_upp[i] = 1;
+    } else {
+      ixu[i] = 0.;
+    }
+
+    if(xu[i] < 1e20 &&
+       fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) {
+      fixed_vars[i] = 1;
+    }
+  }
+}
+
+/** @brief relax variable bounds */
+__global__ void relax_bounds_hip(int n,
+                                 double* xla,
+                                 double* xua,
+                                 double fixed_var_tol,
+                                 double fixed_var_perturb)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    double xuabs = fabs(xua[i]);
+    if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) {
+      xua[i] += fixed_var_perturb * std::fmax(1.,xuabs);
+      xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs);
+    }
+  }
+}
+
 namespace hiop
 {
 namespace hip
@@ -1271,7 +1336,70 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+
+  thrust::device_ptr<int> bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr);
+
+  thrust::device_ptr<int> bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr);
+
+  thrust::device_ptr<int> bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr);
 
+  thrust::device_ptr<int> n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr);
+
+  // set values
+  process_bounds_hip<<<num_blocks,block_size>>>(n,
+                                                xl,
+                                                xu,
+                                                ixl,
+                                                ixu,
+                                                bnds_low_r_ptr,
+                                                bnds_upp_r_ptr,
+                                                bnds_lu_r_ptr,
+                                                n_fixed_vars_r_ptr,
+                                                fixed_var_tol);
+
+  // compute sum
+  n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus<int>());
+
+  thrust::device_free(bnds_low_d_ptr);
+  thrust::device_free(bnds_upp_d_ptr);
+  thrust::device_free(bnds_lu_d_ptr);
+  thrust::device_free(n_fixed_vars_d_ptr);
+}
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+  relax_bounds_hip<<<num_blocks,block_size>>>(n_local,
+                                              xl,
+                                              xu,
+                                              fixed_var_tol,
+                                              fixed_var_perturb);
+}
 
 /// for hiopVectorIntHip
 /**
diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp
index b1c0b8022..adc851b17 100644
--- a/src/LinAlg/VectorHipKernels.hpp
+++ b/src/LinAlg/VectorHipKernels.hpp
@@ -296,6 +296,25 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        double *vd,
                                        const double* dd);
 
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol);
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb);
+
 /// for hiopVectorIntHip
 /**
  * @brief Set the vector entries to be a linear space of starting at i0 containing evenly 
diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp
index c2f1beda1..2ed99890f 100644
--- a/src/LinAlg/hiopVector.hpp
+++ b/src/LinAlg/hiopVector.hpp
@@ -1014,11 +1014,10 @@ class hiopVector
    * @param[out] n_bnds_low - number of variables with lower bounds only
    * @param[out] n_bnds_upp - number of variables with upper bounds only
    * @param[out] n_bnds_lu - number of variables with both lower and upper bounds
-   * @param[out] nfixed_vars - number of fixed variables
+   * @param[out] n_fixed_vars - number of fixed variables
    * @param[in] fixed_var_tol - tolerance used to define fixed variables
    * 
    * @pre this is a local method
-   * @todo: add unit test, or should we remove this function?
    */
   virtual bool process_bounds_local(const hiopVector& xu,
                                     hiopVector& ixl,
@@ -1026,7 +1025,7 @@ class hiopVector
                                     size_type& n_bnds_low,
                                     size_type& n_bnds_upp,
                                     size_type& n_bnds_lu,
-                                    size_type& nfixed_vars,
+                                    size_type& n_fixed_vars,
                                     const double& fixed_var_tol) = 0;
 
   /**
@@ -1038,7 +1037,6 @@ class hiopVector
    * @param[in] fixed_var_perturb - perturbation added to bounds
    * 
    * @pre this is a local method
-   * @todo: add unit test, or should we remove this function?
    */
   virtual void relax_bounds_vec(hiopVector& xu,
                                 const double& fixed_var_tol,
diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp
index 0be1e2eaf..e0123cfae 100644
--- a/src/LinAlg/hiopVectorCuda.cpp
+++ b/src/LinAlg/hiopVectorCuda.cpp
@@ -1087,6 +1087,53 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const
   assert(false&&"NOT needed. Remove this func. TODO");
 }
 
+bool hiopVectorCuda::process_bounds_local(const hiopVector& xu,
+                                          hiopVector& ixl,
+                                          hiopVector& ixu,
+                                          size_type& n_bnds_low,
+                                          size_type& n_bnds_upp,
+                                          size_type& n_bnds_lu,
+                                          size_type& n_fixed_vars,
+                                          const double& fixed_var_tol)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+  assert(ixl.get_local_size()==n_local_);
+  assert(ixu.get_local_size()==n_local_);
+#endif
+  const double* xl_arr = data_;
+  const double* xu_arr = xu.local_data_const();
+  double* ixl_arr = ixl.local_data();
+  double* ixu_arr = ixu.local_data();
+  
+  hiop::cuda::process_bounds_local_kernel(n_local_,
+                                          xl_arr,
+                                          xu_arr,
+                                          ixl_arr,
+                                          ixu_arr,
+                                          n_bnds_low,
+                                          n_bnds_upp,
+                                          n_bnds_lu,
+                                          n_fixed_vars,
+                                          fixed_var_tol);
+}
+
+void hiopVectorCuda::relax_bounds_vec(hiopVector& xu,
+                                      const double& fixed_var_tol,
+                                      const double& fixed_var_perturb)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+#endif
+  double* xl_arr = data_;
+  double* xu_arr = xu.local_data();
+  
+  hiop::cuda::relax_bounds_kernel(n_local_,
+                                  xl_arr,
+                                  xu_arr,
+                                  fixed_var_tol,
+                                  fixed_var_perturb);
+}
 
 } // namespace hiop
 
diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp
index 5d504521d..0b16273f8 100644
--- a/src/LinAlg/hiopVectorCuda.hpp
+++ b/src/LinAlg/hiopVectorCuda.hpp
@@ -347,6 +347,19 @@ class hiopVectorCuda : public hiopVector
   /// @brief check if `this` vector is identical to `vec`
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& n_fixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   /* functions for this class */
   inline MPI_Comm get_mpi_comm() const { return comm_; }
 
diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp
index ab5b88f15..7a324b2ff 100644
--- a/src/LinAlg/hiopVectorHip.cpp
+++ b/src/LinAlg/hiopVectorHip.cpp
@@ -940,9 +940,9 @@ bool hiopVectorHip::matchesPattern(const hiopVector& pattern)
 
 /** @brief Adjusts duals. */
 void hiopVectorHip::adjustDuals_plh(const hiopVector& xvec, 
-                                     const hiopVector& ixvec,
-                                     const double& mu,
-                                     const double& kappa)
+                                    const hiopVector& ixvec,
+                                    const double& mu,
+                                    const double& kappa)
 {
 #ifdef HIOP_DEEPCHECKS
   assert(xvec.get_local_size()==n_local_);
@@ -1091,6 +1091,53 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const
   assert(false&&"NOT needed. Remove this func. TODO");
 }
 
+bool hiopVectorHip::process_bounds_local(const hiopVector& xu,
+                                         hiopVector& ixl,
+                                         hiopVector& ixu,
+                                         size_type& n_bnds_low,
+                                         size_type& n_bnds_upp,
+                                         size_type& n_bnds_lu,
+                                         size_type& n_fixed_vars,
+                                         const double& fixed_var_tol)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+  assert(ixl.get_local_size()==n_local_);
+  assert(ixu.get_local_size()==n_local_);
+#endif
+  const double* xl_arr = data_;
+  const double* xu_arr = xu.local_data_const();
+  double* ixl_arr = ixl.local_data();
+  double* ixu_arr = ixu.local_data();
+  
+  hiop::hip::process_bounds_local_kernel(n_local_,
+                                         xl_arr,
+                                         xu_arr,
+                                         ixl_arr,
+                                         ixu_arr,
+                                         n_bnds_low,
+                                         n_bnds_upp,
+                                         n_bnds_lu,
+                                         n_fixed_vars,
+                                         fixed_var_tol);
+}
+
+void hiopVectorHip::relax_bounds_vec(hiopVector& xu,
+                                     const double& fixed_var_tol,
+                                     const double& fixed_var_perturb)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+#endif
+  const double* xl_arr = data_;
+  const double* xu_arr = xu.local_data_const();
+  
+  hiop::hip::process_bounds_local_kernel(n_local_,
+                                         xl_arr,
+                                         xu_arr,
+                                         fixed_var_tol,
+                                         fixed_var_perturb);
+}
 
 } // namespace hiop
 
diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp
index addaf1d03..cf1edb117 100644
--- a/src/LinAlg/hiopVectorHip.hpp
+++ b/src/LinAlg/hiopVectorHip.hpp
@@ -349,6 +349,19 @@ class hiopVectorHip : public hiopVector
   /// @brief check if `this` vector is identical to `vec`
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& n_fixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   /* functions for this class */
   inline MPI_Comm get_mpi_comm() const { return comm_; }
 
diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp
index 236822ce9..b70c7b693 100644
--- a/src/LinAlg/hiopVectorPar.cpp
+++ b/src/LinAlg/hiopVectorPar.cpp
@@ -1288,13 +1288,13 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu,
                                          size_type& n_bnds_low,
                                          size_type& n_bnds_upp,
                                          size_type& n_bnds_lu,
-                                         size_type& nfixed_vars,
+                                         size_type& n_fixed_vars,
                                          const double& fixed_var_tol)
 {
   n_bnds_low = 0;
   n_bnds_upp = 0;
   n_bnds_lu = 0;
-  nfixed_vars = 0;
+  n_fixed_vars = 0;
 
   const double* xl_vec = this->local_data_const();
   const double* xu_vec = xu.local_data_const();
@@ -1326,7 +1326,7 @@ bool hiopVectorPar::process_bounds_local(const hiopVector& xu,
 
     if( xu_vec[i] < 1e20 &&
         fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) {
-      nfixed_vars++;
+      n_fixed_vars++;
     } else {
 #ifdef HIOP_DEEPCHECKS
 #define min_dist 1e-8
diff --git a/src/LinAlg/hiopVectorPar.hpp b/src/LinAlg/hiopVectorPar.hpp
index b1d967549..f6f36995a 100644
--- a/src/LinAlg/hiopVectorPar.hpp
+++ b/src/LinAlg/hiopVectorPar.hpp
@@ -315,7 +315,7 @@ class hiopVectorPar : public hiopVector
                                     size_type& n_bnds_low,
                                     size_type& n_bnds_upp,
                                     size_type& n_bnds_lu,
-                                    size_type& nfixed_vars,
+                                    size_type& n_fixed_vars,
                                     const double& fixed_var_tol);
 
   virtual void relax_bounds_vec(hiopVector& xu,
diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp
index 1a375be7c..17ba7dd82 100644
--- a/src/LinAlg/hiopVectorRaja.hpp
+++ b/src/LinAlg/hiopVectorRaja.hpp
@@ -318,7 +318,7 @@ class hiopVectorRaja : public hiopVector
                                     size_type& n_bnds_low,
                                     size_type& n_bnds_upp,
                                     size_type& n_bnds_lu,
-                                    size_type& nfixed_vars,
+                                    size_type& n_fixed_vars,
                                     const double& fixed_var_tol);
 
   virtual void relax_bounds_vec(hiopVector& xu,
diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index eac76e6c0..f61ee8d83 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -2239,7 +2239,7 @@ bool hiopVectorRaja<MEM, POL>::process_bounds_local(const hiopVector& xu,
                                                     size_type& n_bnds_low,
                                                     size_type& n_bnds_upp,
                                                     size_type& n_bnds_lu,
-                                                    size_type& nfixed_vars,
+                                                    size_type& n_fixed_vars,
                                                     const double& fixed_var_tol)
 {
 #ifdef HIOP_DEEPCHECKS
@@ -2261,7 +2261,7 @@ bool hiopVectorRaja<MEM, POL>::process_bounds_local(const hiopVector& xu,
   RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_low(0);
   RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_upp(0);
   RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_lu(0);
-  RAJA::ReduceSum< hiop_raja_reduce, int > sum_nfixed_vars(0);
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_fixed_vars(0);
 
   RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal),
     RAJA_LAMBDA(RAJA::Index_type i) 
@@ -2285,20 +2285,20 @@ bool hiopVectorRaja<MEM, POL>::process_bounds_local(const hiopVector& xu,
 
       if(xu_vec[i] < 1e20 &&
          fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) {
-        sum_nfixed_vars += 1;
+        sum_n_fixed_vars += 1;
       }
     });
 
   n_bnds_low = sum_n_bnds_low.get();
   n_bnds_upp = sum_n_bnds_upp.get();
   n_bnds_lu = sum_n_bnds_lu.get();
-  nfixed_vars = sum_nfixed_vars.get();
+  n_fixed_vars = sum_n_fixed_vars.get();
 
   return true;
 }
 
 template<class MEM, class POL>
-bool hiopVectorRaja<MEM, POL>::relax_bounds_vec(hiopVector& xu,
+void hiopVectorRaja<MEM, POL>::relax_bounds_vec(hiopVector& xu,
                                                 const double& fixed_var_tol,
                                                 const double& fixed_var_perturb)
 {
@@ -2311,17 +2311,15 @@ bool hiopVectorRaja<MEM, POL>::relax_bounds_vec(hiopVector& xu,
   double *xua = xu.local_data();
   size_type n = this->get_local_size();
 
-  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal),
+  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n),
     RAJA_LAMBDA(RAJA::Index_type i) 
     {
       double xuabs = std::fabs(xua[i]);
       if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) {
-        xua[i] += fixed_var_perturb*std::fmax(1.,xuabs);
-        xla[i] -= fixed_var_perturb*std::fmax(1.,xuabs);
+        xua[i] += fixed_var_perturb * std::fmax(1.,xuabs);
+        xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs);
       }
     });
-
-  return true;
 }
 
 } // namespace hiop
diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp
index 225be242a..2a7ea1746 100644
--- a/tests/LinAlg/vectorTests.hpp
+++ b/tests/LinAlg/vectorTests.hpp
@@ -2050,6 +2050,108 @@ class VectorTests : public TestBase
     return reduceReturn(fail, &x);
   }
 
+  /**
+   * @brief Test that hiop correctly processes variable bounds
+   * 
+   * @note This is local method only
+   */
+  bool vector_process_bounds(hiop::hiopVector& xl,
+                             hiop::hiopVector& xu,
+                             hiop::hiopVector& ixl,
+                             hiop::hiopVector& ixu,
+                             const int rank = 0)
+  {
+    const local_ordinal_type N = getLocalSize(&xl);
+    assert(N == getLocalSize(&xu));
+    assert(N == getLocalSize(&ixl));
+    assert(N == getLocalSize(&ixu));
+    assert(N >= 3); // only test N>=3
+    int fail = 0;
+ 
+    int n_low = 0;
+    int n_upp = 0;
+    int n_lu = 0;
+    int n_fixed = 0;
+    double fixed_var_tol = 1e-8;
+
+    // xl = [1, .., 1, -inf]
+    xl.setToConstant(one);
+    setLocalElement(&xl, N-1, -one/zero);
+
+    // xl = [inf, 1, 2, .., 2]
+    xu.setToConstant(two);
+    setLocalElement(&xu, 0, one/zero);
+    setLocalElement(&xu, 1, one);
+
+    xl.process_bounds_local(xu, ixl, ixu, n_low, n_upp, n_lu, n_fixed, fixed_var_tol);
+
+    // Check that the last element of rank zero's vector is
+    // zero, and that x_val was added to all other elements
+    fail += verifyAnswer(&ixl,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == N-1) ? 0. : 1.;
+      });
+
+    fail += verifyAnswer(&ixu,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == 0) ? 0. : 1.;
+      });
+    
+    fail += (n_low != N-1);
+    fail += (n_upp != N-1);
+    fail += (n_lu != N-2);
+    fail += (n_fixed != 1);
+
+    printMessage(fail, __func__, rank);
+    return reduceReturn(fail, &xl);
+  }
+
+  /**
+   * @brief Test that hiop correctly relaxes variable bounds
+   * 
+   * @note This is local method only
+   */
+  bool vector_relax_bounds(hiop::hiopVector& xl,
+                           hiop::hiopVector& xu,
+                           const int rank = 0)
+  {
+    const local_ordinal_type N = getLocalSize(&xl);
+    assert(N == getLocalSize(&xu));
+    int fail = 0;
+
+    double fixed_var_tol = 1e-8;
+    double fixed_var_perturb = 1e-1;
+
+    // xl = [1, .., 1, 2]
+    xl.setToConstant(one);
+    setLocalElement(&xl, N-1, two);
+
+    // xl = [2, .., 2]
+    xu.setToConstant(two);
+
+    xl.relax_bounds_vec(xu, fixed_var_tol, fixed_var_perturb);
+
+    // Check that the last element of rank zero's vector is
+    // zero, and that x_val was added to all other elements
+    fail += verifyAnswer(&xl,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == N-1) ? 1.8 : one;
+      });
+
+    fail += verifyAnswer(&xu,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == N-1) ? 2.2 : two;
+      });
+
+    printMessage(fail, __func__, rank);
+    return reduceReturn(fail, &xl);
+  }
+
+
   /// Returns element _i_ of vector _x_.
   real_type getLocalElement(hiop::hiopVector* x, local_ordinal_type i)
   {
diff --git a/tests/testVector.cpp b/tests/testVector.cpp
index 65834293a..01a79ba5e 100644
--- a/tests/testVector.cpp
+++ b/tests/testVector.cpp
@@ -326,6 +326,8 @@ int runTests(const char* mem_space, MPI_Comm comm)
 
   fail += test.vectorMatchesPattern(*x, *y, rank);
   fail += test.vectorAdjustDuals_plh(*x, *y, *z, *a, rank);
+  fail += test.vector_process_bounds(*x, *y, *z, *a);
+  fail += test.vector_relax_bounds(*x, *y);
 
   if (rank == 0)
   {

From 641fa109e0621d362a27040ea0590a1b4c98d756 Mon Sep 17 00:00:00 2001
From: nychiang <sorakid507@gmail.com>
Date: Wed, 8 Mar 2023 22:22:31 -0800
Subject: [PATCH 4/5] add some functions for follow-up development of
 process_constraints

---
 src/LinAlg/VectorCudaKernels.cu         | 37 ++++++++++++++++++++++++-
 src/LinAlg/VectorCudaKernels.hpp        |  3 ++
 src/LinAlg/VectorHipKernels.cpp         | 37 ++++++++++++++++++++++++-
 src/LinAlg/VectorHipKernels.hpp         |  3 ++
 src/LinAlg/hiopVector.hpp               | 10 ++++++-
 src/LinAlg/hiopVectorCuda.cpp           | 14 ++++++++++
 src/LinAlg/hiopVectorCuda.hpp           |  2 ++
 src/LinAlg/hiopVectorHip.cpp            | 14 ++++++++++
 src/LinAlg/hiopVectorHip.hpp            |  2 ++
 src/LinAlg/hiopVectorPar.cpp            | 22 +++++++++++++++
 src/LinAlg/hiopVectorPar.hpp            |  2 ++
 src/LinAlg/hiopVectorRaja.hpp           |  2 ++
 src/LinAlg/hiopVectorRajaImpl.hpp       | 29 +++++++++++++++++++
 src/Optimization/hiopNlpFormulation.cpp | 20 +++++--------
 src/Optimization/hiopNlpFormulation.hpp |  2 +-
 tests/LinAlg/vectorTests.hpp            | 28 +++++++++++++++++++
 tests/testVector.cpp                    |  7 +++--
 17 files changed, 214 insertions(+), 20 deletions(-)

diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu
index c1b7cd77c..09948dff4 100644
--- a/src/LinAlg/VectorCudaKernels.cu
+++ b/src/LinAlg/VectorCudaKernels.cu
@@ -705,6 +705,24 @@ __global__ void relax_bounds_cu(int n,
   }
 }
 
+/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */
+__global__ void set_if_match_cu(int n,
+                                double* d_ptr,
+                                double* d1,
+                                double* d2)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    if(d1[i]==d2[i]) {
+      d_ptr[i] = 1;
+    } else {
+      d_ptr[i] = 0;
+    }
+  }
+}
+
 namespace hiop
 {
 namespace cuda
@@ -1177,7 +1195,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma
 
   thrust::device_ptr<double> ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n);
 
-  // TODO: how to return double from device to host?
   double *ret_host = new double[1];
   double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr);
   cudaError_t cuerr = cudaMemcpy(ret_host, ret_ptr, (1)*sizeof(double), cudaMemcpyDeviceToHost);
@@ -1341,6 +1358,24 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
+int num_match_local_kernel(int n, double* d1, const double* d2)
+{
+  int num_blocks = (n+block_size-1)/block_size;
+  
+  // TODO: how to avoid this temp vec?
+  thrust::device_ptr<int> dv_ptr = thrust::device_malloc(n*sizeof(int));
+  int* d_ptr = thrust::raw_pointer_cast(dv_ptr);
+
+  // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0
+  set_if_match_cu<<<num_blocks,block_size>>>(n, d_ptr, d1, d2);
+  
+  int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus<int>());
+
+  thrust::device_free(dv_ptr);
+  
+  return rval;
+}
+
 /** @brief process variable bounds */
 void process_bounds_local_kernel(int n_local,
                                  const double* xl,
diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp
index a3ec501dd..1afe55b82 100644
--- a/src/LinAlg/VectorCudaKernels.hpp
+++ b/src/LinAlg/VectorCudaKernels.hpp
@@ -296,6 +296,9 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        double *vd,
                                        const double* dd);
 
+/// @brief return the numbers of identical elements between two vectors
+int num_match_local_kernel(int n, double* d1, const double* d2);
+
 /** @brief process variable bounds */
 void process_bounds_local_kernel(int n_local,
                                  const double* xl,
diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp
index 21f791abd..85acb17b0 100644
--- a/src/LinAlg/VectorHipKernels.cpp
+++ b/src/LinAlg/VectorHipKernels.cpp
@@ -700,6 +700,24 @@ __global__ void relax_bounds_hip(int n,
   }
 }
 
+/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */
+__global__ void set_if_match_hip(int n,
+                                 double* d_ptr,
+                                 double* d1,
+                                 double* d2)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    if(d1[i]==d2[i]) {
+      d_ptr[i] = 1;
+    } else {
+      d_ptr[i] = 0;
+    }
+  }
+}
+
 namespace hiop
 {
 namespace hip
@@ -1172,7 +1190,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma
 
   thrust::device_ptr<double> ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n);
 
-  // TODO: how to return double from device to host?
   double *ret_host = new double[1];
   double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr);
   hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1)*sizeof(double), hipMemcpyDeviceToHost);
@@ -1336,6 +1353,24 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
+int num_match_local_kernel(int n, double* d1, const double* d2)
+{
+  int num_blocks = (n+block_size-1)/block_size;
+  
+  // TODO: how to avoid this temp vec?
+  thrust::device_ptr<int> dv_ptr = thrust::device_malloc(n*sizeof(int));
+  int* d_ptr = thrust::raw_pointer_cast(dv_ptr);
+
+  // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0
+  set_if_match_cu<<<num_blocks,block_size>>>(n, d_ptr, d1, d2);
+  
+  int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus<int>());
+
+  thrust::device_free(dv_ptr);
+  
+  return rval;
+}
+
 /** @brief process variable bounds */
 void process_bounds_local_kernel(int n_local,
                                  const double* xl,
diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp
index adc851b17..5d851e2de 100644
--- a/src/LinAlg/VectorHipKernels.hpp
+++ b/src/LinAlg/VectorHipKernels.hpp
@@ -296,6 +296,9 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        double *vd,
                                        const double* dd);
 
+/// @brief return the numbers of identical elements between two vectors
+int num_match_local_kernel(int n, double* d1, const double* d2);
+
 /** @brief process variable bounds */
 void process_bounds_local_kernel(int n_local,
                                  const double* xl,
diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp
index 2ed99890f..80c720004 100644
--- a/src/LinAlg/hiopVector.hpp
+++ b/src/LinAlg/hiopVector.hpp
@@ -998,10 +998,18 @@ class hiopVector
    * @brief check if `this` vector is identical to `vec`
    *
    * @param[in] vec - vector used to be compared with `this`
-   * @todo: add unit test, or should we remove this function?
+   * @todo: should we remove this function?
    */
   virtual bool is_equal(const hiopVector& vec) const = 0;
 
+  /**
+   * @brief return the numbers of identical elements between two vectors
+   *
+   * @param[in] vec - vector used to be compared with `this`
+   * @post `vec` is not modified
+   */
+  virtual size_type num_match(const hiopVector& vec) const = 0;
+  
   /**
    * @brief preprocess bounds in a form supported by the NLP formulation. Returns counts of
    * the variables with lower, upper, and lower and upper bounds, as well of the fixed 
diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp
index e0123cfae..ea0d14b0c 100644
--- a/src/LinAlg/hiopVectorCuda.cpp
+++ b/src/LinAlg/hiopVectorCuda.cpp
@@ -1087,6 +1087,20 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const
   assert(false&&"NOT needed. Remove this func. TODO");
 }
 
+size_type hiopVectorCuda::num_match(const hiopVector& vec) const
+{
+  double* dd = data_;
+  double* vd = vec.local_data();
+  int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd);
+
+#ifdef HIOP_USE_MPI
+  int sumG;
+  int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr);
+  return sumG;
+#endif
+  return sum_match;
+}
+
 bool hiopVectorCuda::process_bounds_local(const hiopVector& xu,
                                           hiopVector& ixl,
                                           hiopVector& ixu,
diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp
index 0b16273f8..7ec1b3faa 100644
--- a/src/LinAlg/hiopVectorCuda.hpp
+++ b/src/LinAlg/hiopVectorCuda.hpp
@@ -347,6 +347,8 @@ class hiopVectorCuda : public hiopVector
   /// @brief check if `this` vector is identical to `vec`
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
   virtual bool process_bounds_local(const hiopVector& xu,
                                     hiopVector& ixl,
                                     hiopVector& ixu,
diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp
index 7a324b2ff..4f6a144de 100644
--- a/src/LinAlg/hiopVectorHip.cpp
+++ b/src/LinAlg/hiopVectorHip.cpp
@@ -1091,6 +1091,20 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const
   assert(false&&"NOT needed. Remove this func. TODO");
 }
 
+size_type hiopVectorHip::num_match(const hiopVector& vec) const
+{
+  double* dd = data_;
+  double* vd = vec.local_data();
+  int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd);
+
+#ifdef HIOP_USE_MPI
+  int sumG;
+  int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr);
+  return sumG;
+#endif
+  return sum_match;
+}
+
 bool hiopVectorHip::process_bounds_local(const hiopVector& xu,
                                          hiopVector& ixl,
                                          hiopVector& ixu,
diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp
index cf1edb117..de594201c 100644
--- a/src/LinAlg/hiopVectorHip.hpp
+++ b/src/LinAlg/hiopVectorHip.hpp
@@ -349,6 +349,8 @@ class hiopVectorHip : public hiopVector
   /// @brief check if `this` vector is identical to `vec`
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
   virtual bool process_bounds_local(const hiopVector& xu,
                                     hiopVector& ixl,
                                     hiopVector& ixu,
diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp
index b70c7b693..b50927729 100644
--- a/src/LinAlg/hiopVectorPar.cpp
+++ b/src/LinAlg/hiopVectorPar.cpp
@@ -1282,6 +1282,28 @@ bool hiopVectorPar::is_equal(const hiopVector& vec) const
   return all_equal;
 }
 
+
+size_type hiopVectorPar::num_match(const hiopVector& vec) const
+{
+  if(n_local_ != vec.get_local_size()) {
+    return 0;
+  }
+  int sum_match = 0;
+  const double* data_v = vec.local_data_const();
+  for(auto i=0; i<n_local_; ++i) {
+    if(data_[i]==data_v[i]) {
+      sum_match++;
+    }
+  }
+
+#ifdef HIOP_USE_MPI
+  int sumG;
+  int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr);
+  return sumG;
+#endif
+  return sum_match;
+}
+
 bool hiopVectorPar::process_bounds_local(const hiopVector& xu,
                                          hiopVector& ixl,
                                          hiopVector& ixu,
diff --git a/src/LinAlg/hiopVectorPar.hpp b/src/LinAlg/hiopVectorPar.hpp
index f6f36995a..ae10ad7db 100644
--- a/src/LinAlg/hiopVectorPar.hpp
+++ b/src/LinAlg/hiopVectorPar.hpp
@@ -309,6 +309,8 @@ class hiopVectorPar : public hiopVector
 
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
   virtual bool process_bounds_local(const hiopVector& xu,
                                     hiopVector& ixl,
                                     hiopVector& ixu,
diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp
index 17ba7dd82..ae0b936cc 100644
--- a/src/LinAlg/hiopVectorRaja.hpp
+++ b/src/LinAlg/hiopVectorRaja.hpp
@@ -312,6 +312,8 @@ class hiopVectorRaja : public hiopVector
 
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
   virtual bool process_bounds_local(const hiopVector& xu,
                                     hiopVector& ixl,
                                     hiopVector& ixu,
diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index f61ee8d83..ff3091162 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -2232,6 +2232,35 @@ bool hiopVectorRaja<MEM, POL>::is_equal(const hiopVector& vec) const
   return all_equal;
 }
 
+template<class MEM, class POL>
+size_type hiopVectorRaja<MEM, POL>::num_match(const hiopVector& vec) const
+{
+#ifdef HIOP_DEEPCHECKS
+  const hiopVectorRaja& v = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(vec);
+  assert(v.n_local_ == n_local_);
+#endif 
+
+  const double* data_v = vec.local_data_const();
+  const double* data = data_dev_;
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum(0);
+  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_),
+    RAJA_LAMBDA(RAJA::Index_type i) 
+    {
+      if(data[i]==data_v[i]) {
+        sum += 1;        
+      }
+    });
+  int all_equal = (sum.get() == 0);
+  
+#ifdef HIOP_USE_MPI
+  int all_equalG;
+  int ierr = MPI_Allreduce(&all_equal, &all_equalG, 1, MPI_INT, MPI_SUM, comm_);
+  assert(MPI_SUCCESS==ierr);
+  return all_equalG;
+#endif  
+  return all_equal;
+}
+
 template<class MEM, class POL>
 bool hiopVectorRaja<MEM, POL>::process_bounds_local(const hiopVector& xu,
                                                     hiopVector& ixl,
diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp
index e2679932f..ac50c04fa 100644
--- a/src/Optimization/hiopNlpFormulation.cpp
+++ b/src/Optimization/hiopNlpFormulation.cpp
@@ -441,7 +441,7 @@ bool hiopNlpFormulation::finalizeInitialization()
 bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low,
                                         size_type& n_bnds_upp,
                                         size_type& n_bnds_lu,
-                                        size_type& nfixed_vars)
+                                        size_type& n_fixed_vars)
 {
   this->xl_->process_bounds_local(*this->xu_,
                                   *this->ixl_,
@@ -449,7 +449,7 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low,
                                   n_bnds_low,
                                   n_bnds_upp,
                                   n_bnds_lu,
-                                  nfixed_vars,
+                                  n_fixed_vars,
                                   options->GetNumeric("fixed_var_tolerance"));
   return true;
 } 
@@ -485,24 +485,18 @@ bool hiopNlpFormulation::process_constraints()
   assert(gl->get_local_size()==n_cons_);
   assert(gu->get_local_size()==n_cons_);
 
+  n_cons_eq_ = gl->num_match(*gu);
+  n_cons_ineq_ = n_cons_ - n_cons_eq_;
+
   // transfer to host 
   hiopVectorPar gl_host(n_cons_);
   hiopVectorPar gu_host(n_cons_);
   gl->copy_to_vectorpar(gl_host);
   gu->copy_to_vectorpar(gu_host);
-
+  
   double* gl_vec = gl_host.local_data();
   double* gu_vec = gu_host.local_data();
-  n_cons_eq_ = 0;
-  n_cons_ineq_ = 0; 
-  for(int i=0;i<n_cons_; i++) {
-    if(gl_vec[i]==gu_vec[i]) {
-      n_cons_eq_++;
-    } else {
-      n_cons_ineq_++;
-    }
-  }
-  
+
   /* Allocate host  c_rhs, dl, and du (all serial in this formulation) for on host processing. */
   hiopVectorPar c_rhs_host(n_cons_eq_);
   cons_eq_type_ = new hiopInterfaceBase::NonlinearityType[n_cons_eq_];
diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp
index 5a656c765..771dffa88 100644
--- a/src/Optimization/hiopNlpFormulation.hpp
+++ b/src/Optimization/hiopNlpFormulation.hpp
@@ -296,7 +296,7 @@ class hiopNlpFormulation
   virtual bool process_bounds(size_type& n_bnds_low,
                               size_type& n_bnds_upp,
                               size_type& n_bnds_lu,
-                              size_type& nfixed_vars);
+                              size_type& n_fixed_vars);
   /* Preprocess constraints in a form supported the NLP formulation. */
   virtual bool process_constraints();
 protected:
diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp
index 2a7ea1746..1d81711b5 100644
--- a/tests/LinAlg/vectorTests.hpp
+++ b/tests/LinAlg/vectorTests.hpp
@@ -2050,6 +2050,34 @@ class VectorTests : public TestBase
     return reduceReturn(fail, &x);
   }
 
+  /**
+   * @brief Test:
+   * the number of identical elements between x and y, i.e., x[i] == y[i] 
+   */
+  bool vector_num_match(hiop::hiopVector& x, hiop::hiopVector& y, const int rank)
+  {
+    const local_ordinal_type Nx = x.get_size();
+    int fail = 0;
+    int n_match = 0;
+    x.setToConstant(one);
+    y.setToConstant(one);
+    
+    real_type actual = x.num_match(y);
+    real_type expected = Nx;
+    
+    fail += !isEqual(expected, actual);
+
+    if(rank == 0) {
+      setLocalElement(&x, getLocalSize(&x) - 1, two);
+    }
+    actual = x.num_match(y);
+    expected = Nx - 1;
+    fail += !isEqual(expected, actual);
+
+    printMessage(fail, __func__, rank);
+    return reduceReturn(fail, &x);
+  }
+
   /**
    * @brief Test that hiop correctly processes variable bounds
    * 
diff --git a/tests/testVector.cpp b/tests/testVector.cpp
index 01a79ba5e..c1863370e 100644
--- a/tests/testVector.cpp
+++ b/tests/testVector.cpp
@@ -326,8 +326,9 @@ int runTests(const char* mem_space, MPI_Comm comm)
 
   fail += test.vectorMatchesPattern(*x, *y, rank);
   fail += test.vectorAdjustDuals_plh(*x, *y, *z, *a, rank);
-  fail += test.vector_process_bounds(*x, *y, *z, *a);
-  fail += test.vector_relax_bounds(*x, *y);
+  fail += test.vector_num_match(*x, *y, rank);
+  fail += test.vector_process_bounds(*x, *y, *z, *a, rank);
+  fail += test.vector_relax_bounds(*x, *y, rank);
 
   if (rank == 0)
   {
@@ -336,7 +337,7 @@ int runTests(const char* mem_space, MPI_Comm comm)
     fail += test.vectorIsfinite(*v);
   }
 
-  // TODO: remove
+  // TODO: remove?
   //fail += test.vector_is_equal(*x, *y, rank);
 
   delete a;

From a8d4183313cb4a0433d8966381b21d89f17b167d Mon Sep 17 00:00:00 2001
From: nychiang <sorakid507@gmail.com>
Date: Wed, 8 Mar 2023 22:44:02 -0800
Subject: [PATCH 5/5] pass LLNL ci

---
 src/LinAlg/VectorCudaKernels.cu   |  8 ++++----
 src/LinAlg/VectorCudaKernels.hpp  |  2 +-
 src/LinAlg/VectorHipKernels.cpp   | 10 +++++-----
 src/LinAlg/VectorHipKernels.hpp   |  2 +-
 src/LinAlg/hiopVectorCuda.cpp     |  6 ++++--
 src/LinAlg/hiopVectorHip.cpp      |  6 ++++--
 src/LinAlg/hiopVectorRajaImpl.hpp |  2 +-
 tests/LinAlg/vectorTests.hpp      |  1 -
 8 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu
index 09948dff4..405dfff78 100644
--- a/src/LinAlg/VectorCudaKernels.cu
+++ b/src/LinAlg/VectorCudaKernels.cu
@@ -707,9 +707,9 @@ __global__ void relax_bounds_cu(int n,
 
 /** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */
 __global__ void set_if_match_cu(int n,
-                                double* d_ptr,
-                                double* d1,
-                                double* d2)
+                                int* d_ptr,
+                                const double* d1,
+                                const double* d2)
 {
   const int num_threads = blockDim.x * gridDim.x;
   const int tid = blockIdx.x * blockDim.x + threadIdx.x;
@@ -1358,7 +1358,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
-int num_match_local_kernel(int n, double* d1, const double* d2)
+int num_match_local_kernel(int n, const double* d1, const double* d2)
 {
   int num_blocks = (n+block_size-1)/block_size;
   
diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp
index 1afe55b82..3d8cf6cba 100644
--- a/src/LinAlg/VectorCudaKernels.hpp
+++ b/src/LinAlg/VectorCudaKernels.hpp
@@ -297,7 +297,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        const double* dd);
 
 /// @brief return the numbers of identical elements between two vectors
-int num_match_local_kernel(int n, double* d1, const double* d2);
+int num_match_local_kernel(int n, const double* d1, const double* d2);
 
 /** @brief process variable bounds */
 void process_bounds_local_kernel(int n_local,
diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp
index 85acb17b0..bcea53a66 100644
--- a/src/LinAlg/VectorHipKernels.cpp
+++ b/src/LinAlg/VectorHipKernels.cpp
@@ -702,9 +702,9 @@ __global__ void relax_bounds_hip(int n,
 
 /** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */
 __global__ void set_if_match_hip(int n,
-                                 double* d_ptr,
-                                 double* d1,
-                                 double* d2)
+                                 int* d_ptr,
+                                 const double* d1,
+                                 const double* d2)
 {
   const int num_threads = blockDim.x * gridDim.x;
   const int tid = blockIdx.x * blockDim.x + threadIdx.x;
@@ -1353,7 +1353,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
-int num_match_local_kernel(int n, double* d1, const double* d2)
+int num_match_local_kernel(int n, const double* d1, const double* d2)
 {
   int num_blocks = (n+block_size-1)/block_size;
   
@@ -1362,7 +1362,7 @@ int num_match_local_kernel(int n, double* d1, const double* d2)
   int* d_ptr = thrust::raw_pointer_cast(dv_ptr);
 
   // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0
-  set_if_match_cu<<<num_blocks,block_size>>>(n, d_ptr, d1, d2);
+  set_if_match_hip<<<num_blocks,block_size>>>(n, d_ptr, d1, d2);
   
   int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus<int>());
 
diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp
index 5d851e2de..ece799f18 100644
--- a/src/LinAlg/VectorHipKernels.hpp
+++ b/src/LinAlg/VectorHipKernels.hpp
@@ -297,7 +297,7 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        const double* dd);
 
 /// @brief return the numbers of identical elements between two vectors
-int num_match_local_kernel(int n, double* d1, const double* d2);
+int num_match_local_kernel(int n, const double* d1, const double* d2);
 
 /** @brief process variable bounds */
 void process_bounds_local_kernel(int n_local,
diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp
index ea0d14b0c..1bf877992 100644
--- a/src/LinAlg/hiopVectorCuda.cpp
+++ b/src/LinAlg/hiopVectorCuda.cpp
@@ -1089,8 +1089,8 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const
 
 size_type hiopVectorCuda::num_match(const hiopVector& vec) const
 {
-  double* dd = data_;
-  double* vd = vec.local_data();
+  const double* dd = data_;
+  const double* vd = vec.local_data_const();
   int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd);
 
 #ifdef HIOP_USE_MPI
@@ -1130,6 +1130,8 @@ bool hiopVectorCuda::process_bounds_local(const hiopVector& xu,
                                           n_bnds_lu,
                                           n_fixed_vars,
                                           fixed_var_tol);
+
+  return true;
 }
 
 void hiopVectorCuda::relax_bounds_vec(hiopVector& xu,
diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp
index 4f6a144de..e2a173468 100644
--- a/src/LinAlg/hiopVectorHip.cpp
+++ b/src/LinAlg/hiopVectorHip.cpp
@@ -1093,8 +1093,8 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const
 
 size_type hiopVectorHip::num_match(const hiopVector& vec) const
 {
-  double* dd = data_;
-  double* vd = vec.local_data();
+  const double* dd = data_;
+  const double* vd = vec.local_data_const();
   int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd);
 
 #ifdef HIOP_USE_MPI
@@ -1134,6 +1134,8 @@ bool hiopVectorHip::process_bounds_local(const hiopVector& xu,
                                          n_bnds_lu,
                                          n_fixed_vars,
                                          fixed_var_tol);
+
+  return true;
 }
 
 void hiopVectorHip::relax_bounds_vec(hiopVector& xu,
diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index ff3091162..6e8b552cc 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -2250,7 +2250,7 @@ size_type hiopVectorRaja<MEM, POL>::num_match(const hiopVector& vec) const
         sum += 1;        
       }
     });
-  int all_equal = (sum.get() == 0);
+  int all_equal = sum.get();
   
 #ifdef HIOP_USE_MPI
   int all_equalG;
diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp
index 1d81711b5..78d2f820e 100644
--- a/tests/LinAlg/vectorTests.hpp
+++ b/tests/LinAlg/vectorTests.hpp
@@ -2058,7 +2058,6 @@ class VectorTests : public TestBase
   {
     const local_ordinal_type Nx = x.get_size();
     int fail = 0;
-    int n_match = 0;
     x.setToConstant(one);
     y.setToConstant(one);