diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index 91efb88a7..1c7ed6d8c 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -18,11 +18,10 @@ add_executable(NlpSparseEx4.exe NlpSparseEx4.cpp NlpSparseEx4Driver.cpp)
 target_link_libraries(NlpSparseEx4.exe HiOp::HiOp)
 
 if(HIOP_USE_RAJA)
-  if(HIOP_USE_GPU AND HIOP_USE_CUDA)
+  if(HIOP_USE_GPU)
     set_source_files_properties(
       NlpSparseRajaEx2.cpp 
       NlpSparseRajaEx2Driver.cpp 
-      PROPERTIES LANGUAGE CUDA
     )
   
     add_executable(NlpSparseRajaEx2.exe  NlpSparseRajaEx2Driver.cpp  NlpSparseRajaEx2.cpp)
diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index 3131fc024..3bc259bae 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -239,8 +239,10 @@ int main(int argc, char **argv)
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     if (use_ginkgo_cuda) {
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "cuda");
     } else if (use_ginkgo_hip) {
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "hip");
     } else {
         nlp.options->SetStringValue("ginkgo_exec", "reference");
diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
index e61b866a0..27e3048d0 100644
--- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
@@ -248,8 +248,10 @@ int main(int argc, char **argv)
       nlp.options->SetStringValue("linsol_mode", "speculative");
       nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
       if (use_ginkgo_cuda) {
+          nlp.options->SetStringValue("compute_mode", "gpu");
           nlp.options->SetStringValue("ginkgo_exec", "cuda");
       } else if (use_ginkgo_hip) {
+          nlp.options->SetStringValue("compute_mode", "gpu");
           nlp.options->SetStringValue("ginkgo_exec", "hip");
       } else {
           nlp.options->SetStringValue("ginkgo_exec", "reference");
diff --git a/src/Drivers/Sparse/NlpSparseEx4Driver.cpp b/src/Drivers/Sparse/NlpSparseEx4Driver.cpp
index 0200284f6..878a605e9 100644
--- a/src/Drivers/Sparse/NlpSparseEx4Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx4Driver.cpp
@@ -238,8 +238,10 @@ int main(int argc, char **argv)
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     if (use_ginkgo_cuda) {
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "cuda");
     } else if (use_ginkgo_hip) {
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "hip");
     } else {
         nlp.options->SetStringValue("ginkgo_exec", "reference");
diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
index a2455ebe1..d079ed46e 100644
--- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
@@ -256,12 +256,24 @@ int main(int argc, char **argv)
     // only support cusolverLU right now, 2023.02.28
     //lsq initialization of the duals fails for this example since the Jacobian is rank deficient
     //use zero initialization
-    nlp.options->SetStringValue("linear_solver_sparse", "resolve");
     if(use_resolve_cuda_rf) {
+      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetIntegerValue("ir_inner_maxit", 20);
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     }
+    if (use_ginkgo) {
+      nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
+      nlp.options->SetIntegerValue("ir_outer_maxit", 0);
+      if (use_ginkgo_cuda) {
+        nlp.options->SetStringValue("ginkgo_exec", "cuda");
+      } else if (use_ginkgo_hip) {
+        nlp.options->SetStringValue("ginkgo_exec", "hip");
+      } else {
+        nlp.options->SetStringValue("ginkgo_exec", "reference");
+        nlp.options->SetStringValue("compute_mode", "cpu");
+      }
+    }
     nlp.options->SetStringValue("duals_init", "zero");
     nlp.options->SetStringValue("mem_space", "device");
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
index 971ef699a..e463500b2 100644
--- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
+++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
@@ -212,13 +212,11 @@ std::shared_ptr<gko::Executor> create_exec(std::string executor_string)
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::ReferenceExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
              }},
             {"dpcpp",
              [] {
@@ -283,10 +281,19 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
       nnz_{0},
       index_covert_CSR2Triplet_{nullptr},
       index_covert_extra_Diag2CSR_{nullptr}
-  {}
+  {
+    if(nlp_->options->GetString("mem_space") == "device") {
+      M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
+    }
+  }
 
   hiopLinSolverSymSparseGinkgo::~hiopLinSolverSymSparseGinkgo()
   {
+    // If memory space is device, delete allocated host mirrors
+    if(nlp_->options->GetString("mem_space") == "device") {
+      delete M_host_;
+    }
+
     delete [] index_covert_CSR2Triplet_;
     delete [] index_covert_extra_Diag2CSR_;
   }
@@ -304,11 +311,34 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
     auto gmres_restart = nlp_->options->GetInteger("ir_inner_restart");
     iterative_refinement_ = gmres_iter > 0;
 
-    host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
+    // If the matrix is on device, copy it to the host mirror
+    std::string mem_space = nlp_->options->GetString("mem_space");
+    auto M = M_;
+    if(mem_space == "device") {
+      auto host = exec_->get_master();
+      auto nnz = M_->numberOfNonzeros();
+      //host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
+      auto dv = gko::make_const_array_view(exec_, nnz, M_->M());
+      auto hv = gko::make_array_view(host, nnz, M_host_->M());
+      host->copy_from(exec_.get(), nnz, dv.get_const_data(), hv.get_data());
+      auto di = gko::make_const_array_view(exec_, nnz, M_->i_row());
+      auto hi = gko::make_array_view(host, nnz, M_host_->i_row());
+      host->copy_from(exec_.get(), nnz, di.get_const_data(), hi.get_data());
+      auto dj = gko::make_const_array_view(exec_, nnz, M_->j_col());
+      auto hj = gko::make_array_view(host, nnz, M_host_->j_col());
+      host->copy_from(exec_.get(), nnz, dj.get_const_data(), hj.get_data());
+      //host->copy_from(exec_.get(), nnz, M_->i_row(), M_host_->i_row());
+      //host->copy_from(exec_.get(), nnz, M_->j_col(), M_host_->j_col());
+      M = M_host_;
+    } 
+
+    host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
     mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_);
     nnz_ = mtx_->get_num_stored_elements();
 
     reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart);
+
+    dense_b_ = gko::matrix::Dense<double>::create(exec_, gko::dim<2>{n_, 1});
   }
 
   int hiopLinSolverSymSparseGinkgo::matrixChanged()
@@ -321,7 +351,15 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
     if( !mtx_ ) {
       this->firstCall();
     } else {
-      update_matrix(M_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
+      std::string mem_space = nlp_->options->GetString("mem_space");
+      auto M = M_;
+      if(mem_space == "device") {
+        auto host = exec_->get_master();
+        auto nnz = M_->numberOfNonzeros();
+        host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
+        M = M_host_;
+      } 
+      update_matrix(M, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
     }
     
     gko_solver_ = gko::share(reusable_factory_->generate(mtx_));
@@ -350,24 +388,20 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
 
     nlp_->runStats.linsolv.tmTriuSolves.start();
 
-    hiopVectorPar* x = dynamic_cast<hiopVectorPar*>(&x_);
-    assert(x != NULL);
-    hiopVectorPar* rhs = dynamic_cast<hiopVectorPar*>(x->new_copy());
-    double* dx = x->local_data();
-    double* drhs = rhs->local_data();
+    std::string mem_space = nlp_->options->GetString("mem_space");
+    auto exec = host;
+    if(mem_space == "device") {
+      exec = exec_;
+    }
+
+    double* dx = x_.local_data();
     const auto size = gko::dim<2>{(long unsigned int)n_, 1};
-    auto dense_x_host = vec::create(host, size, arr::view(host, n_, dx), 1);
-    auto dense_x = vec::create(exec_, size);
-    dense_x->copy_from(dense_x_host.get());
-    auto dense_b_host = vec::create(host, size, arr::view(host, n_, drhs), 1);
-    auto dense_b = vec::create(exec_, size);
-    dense_b->copy_from(dense_b_host.get());
-
-    gko_solver_->apply(dense_b.get(), dense_x.get());
+    auto dense_x = vec::create(exec, size, arr::view(exec, n_, dx), 1);
+    dense_b_->copy_from(dense_x.get());
+
+    gko_solver_->apply(dense_b_.get(), dense_x.get());
     nlp_->runStats.linsolv.tmTriuSolves.stop();
     
-    dense_x_host->copy_from(dense_x.get());
-    delete rhs; rhs=nullptr;
     return 1;
   }
 
diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
index 058c606a5..d1c7a0419 100644
--- a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
+++ b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
@@ -90,12 +90,15 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse
   std::shared_ptr<gko::Executor> exec_;
   std::shared_ptr<gko::matrix::Csr<double, int>> mtx_;
   std::shared_ptr<gko::matrix::Csr<double, int>> host_mtx_;
+  std::shared_ptr<gko::matrix::Dense<double>> dense_b_;
   std::shared_ptr<gko::LinOpFactory> reusable_factory_;
   std::shared_ptr<gko::LinOp> gko_solver_;
   bool iterative_refinement_;
 
   static const std::map<std::string, gko::solver::trisolve_algorithm> alg_map_;
 
+  hiopMatrixSparse* M_host_{ nullptr }; ///< Host mirror for the KKT matrix
+
 public:
 
   /** called the very first time a matrix is factored. Allocates space
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index c41a8579a..3cee4380f 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -317,9 +317,7 @@ namespace hiop
         if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") {
           //ma57, pardiso and strumpack are not available or user requested ginkgo
 #ifdef HIOP_USE_GINKGO              
-          nlp_->log->printf(hovScalars,
-                            "KKT_SPARSE_XYcYd linsys: alloc GINKGO with matrix size %d (%d cons)\n",
-                            n, neq+nineq);
+          linsol_actual = "GINKGO";
           linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
 #endif  // HIOP_USE_GINKGO        
         }
@@ -376,6 +374,14 @@ namespace hiop
           linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_);
 #endif // HIOP_USE_PARDISO
         }
+        
+        if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") {
+          //ma57, pardiso and strumpack are not available or user requested ginkgo
+#ifdef HIOP_USE_GINKGO              
+          linsol_actual = "GINKGO";
+          linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
+#endif  // HIOP_USE_GINKGO        
+        }
 
         if(linSys_) {
           nlp_->log->printf(hovScalars,
@@ -747,6 +753,14 @@ namespace hiop
 #endif // HIOP_USE_PARDISO          
         }
 
+        if(nullptr == linSys_ && linear_solver == "ginkgo") {
+          //ma57, pardiso and strumpack are not available or user requested ginkgo
+#ifdef HIOP_USE_GINKGO
+          linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
+          actual_lin_solver = "GINKGO";        
+#endif  // HIOP_USE_GINKGO        
+        }
+
         if(linSys_) {
           nlp_->log->printf(hovScalars,
                             "KKT_SPARSE_XDYcYd linsys: alloc [%s] size %d (%d cons) (hybrid)\n",
@@ -781,6 +795,14 @@ namespace hiop
           }
 #endif
         } //end resolve
+        
+        if(nullptr == linSys_ && linear_solver == "ginkgo") {
+          //ma57, pardiso and strumpack are not available or user requested ginkgo
+#ifdef HIOP_USE_GINKGO
+          linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
+          actual_lin_solver = "GINKGO";        
+#endif  // HIOP_USE_GINKGO        
+        }
       } // end of compute mode gpu
     }
     assert(linSys_&& "KKT_SPARSE_XDYcYd linsys: cannot instantiate backend linear solver");