diff --git a/src/opflow/interface/opflow.cpp b/src/opflow/interface/opflow.cpp index 933d7bb4d..095cede02 100644 --- a/src/opflow/interface/opflow.cpp +++ b/src/opflow/interface/opflow.cpp @@ -1792,7 +1792,19 @@ PetscErrorCode OPFLOWSetUp(OPFLOW opflow) { } /* Create Hessian */ - ierr = PSCreateMatrix(opflow->ps, &opflow->Hes); + // ierr = PSCreateMatrix(opflow->ps, &opflow->Hes); + // CHKERRQ(ierr); + + ierr = MatCreate(opflow->comm->type, &opflow->Hes); + CHKERRQ(ierr); + ierr = + MatSetSizes(opflow->Hes, opflow->nx, opflow->nx, opflow->Nx, opflow->Nx); + CHKERRQ(ierr); + ierr = MatSetFromOptions(opflow->Hes); + CHKERRQ(ierr); + ierr = MatSeqAIJSetPreallocation(opflow->Hes, 6, NULL); + CHKERRQ(ierr); + ierr = MatSetOption(opflow->Hes, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); CHKERRQ(ierr); /* Create natural to sparse dense ordering mapping (needed for some models) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index 27e2cfa00..8911f7b11 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -21,11 +21,12 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(bl); h_allocator_.deallocate(xidx); h_allocator_.deallocate(gidx); + h_allocator_.deallocate(jacsp_idx); + h_allocator_.deallocate(jacsq_idx); + h_allocator_.deallocate(hesssp_idx); if (opflow->include_powerimbalance_variables) { h_allocator_.deallocate(xidxpimb); h_allocator_.deallocate(powerimbalance_penalty); - h_allocator_.deallocate(jacsp_idx); - h_allocator_.deallocate(jacsq_idx); } #ifdef EXAGO_ENABLE_GPU @@ -40,11 +41,12 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(bl_dev_); d_allocator_.deallocate(xidx_dev_); d_allocator_.deallocate(gidx_dev_); + d_allocator_.deallocate(jacsp_idx_dev_); + d_allocator_.deallocate(jacsq_idx_dev_); + d_allocator_.deallocate(hesssp_idx_dev_); if (opflow->include_powerimbalance_variables) { d_allocator_.deallocate(xidxpimb_dev_); d_allocator_.deallocate(powerimbalance_penalty_dev_); - d_allocator_.deallocate(jacsp_idx_dev_); - d_allocator_.deallocate(jacsq_idx_dev_); } #endif @@ -74,10 +76,11 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(xidx_dev_, xidx); resmgr.copy(gidx_dev_, gidx); + resmgr.copy(jacsp_idx_dev_, jacsp_idx); + resmgr.copy(jacsq_idx_dev_, jacsq_idx); + resmgr.copy(hesssp_idx_dev_, hesssp_idx); if (opflow->include_powerimbalance_variables) { resmgr.copy(xidxpimb_dev_, xidxpimb); - resmgr.copy(jacsp_idx_dev_, jacsp_idx); - resmgr.copy(jacsq_idx_dev_, jacsq_idx); resmgr.copy(powerimbalance_penalty_dev_, powerimbalance_penalty); } #else @@ -95,6 +98,7 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) { gidx_dev_ = gidx; jacsp_idx_dev_ = jacsp_idx; jacsq_idx_dev_ = jacsq_idx; + hesssp_idx_dev_ = hesssp_idx; powerimbalance_penalty_dev_ = powerimbalance_penalty; #endif return 0; @@ -126,11 +130,12 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) { xidx = paramAlloc(h_allocator_, nbus); gidx = paramAlloc(h_allocator_, nbus); + jacsp_idx = paramAlloc(h_allocator_, nbus); + jacsq_idx = paramAlloc(h_allocator_, nbus); + hesssp_idx = paramAlloc(h_allocator_, nbus); if (opflow->include_powerimbalance_variables) { xidxpimb = paramAlloc(h_allocator_, nbus); powerimbalance_penalty = paramAlloc(h_allocator_, nbus); - jacsp_idx = paramAlloc(h_allocator_, nbus); - jacsq_idx = paramAlloc(h_allocator_, nbus); } /* Memzero arrays */ @@ -194,11 +199,12 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) { xidx_dev_ = paramAlloc(d_allocator_, nbus); gidx_dev_ = paramAlloc(d_allocator_, nbus); + jacsp_idx_dev_ = paramAlloc(d_allocator_, nbus); + jacsq_idx_dev_ = paramAlloc(d_allocator_, nbus); + hesssp_idx_dev_ = paramAlloc(d_allocator_, nbus); if (opflow->include_powerimbalance_variables) { xidxpimb_dev_ = paramAlloc(d_allocator_, nbus); powerimbalance_penalty_dev_ = paramAlloc(d_allocator_, nbus); - jacsp_idx_dev_ = paramAlloc(d_allocator_, nbus); - jacsq_idx_dev_ = paramAlloc(d_allocator_, nbus); } #endif return 0; @@ -226,11 +232,17 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(geqidxf_dev_, geqidxf); resmgr.copy(geqidxt_dev_, geqidxt); + resmgr.copy(busf_idx_dev_, busf_idx); + resmgr.copy(bust_idx_dev_, bust_idx); + resmgr.copy(jacf_idx_dev_, jacf_idx); + resmgr.copy(jact_idx_dev_, jact_idx); + resmgr.copy(hesssp_idx_dev_, hesssp_idx); if (opflow->nlinesmon) { resmgr.copy(gineqidx_dev_, gineqidx); resmgr.copy(gbineqidx_dev_, gbineqidx); resmgr.copy(linelimidx_dev_, linelimidx); + resmgr.copy(jac_ieq_idx_dev_, jac_ieq_idx); } #else Gff_dev_ = Gff; @@ -246,10 +258,16 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { xidxt_dev_ = xidxt; geqidxf_dev_ = geqidxf; geqidxt_dev_ = geqidxt; + busf_idx_dev_ = busf_idx; + bust_idx_dev_ = bust_idx; + jacf_idx_dev_ = jacf_idx; + jact_idx_dev_ = jact_idx; + hesssp_idx_dev_ = hesssp_idx; if (opflow->nlinesmon) { gineqidx_dev_ = gineqidx; gbineqidx_dev_ = gbineqidx; linelimidx_dev_ = linelimidx; + jac_ieq_idx_dev_ = jac_ieq_idx; } #endif return 0; @@ -272,11 +290,17 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(geqidxf); h_allocator_.deallocate(geqidxt); + h_allocator_.deallocate(busf_idx); + h_allocator_.deallocate(bust_idx); + h_allocator_.deallocate(jacf_idx); + h_allocator_.deallocate(jact_idx); + h_allocator_.deallocate(hesssp_idx); if (opflow->nlinesmon) { h_allocator_.deallocate(gineqidx); h_allocator_.deallocate(gbineqidx); h_allocator_.deallocate(linelimidx); + h_allocator_.deallocate(jac_ieq_idx); } #ifdef EXAGO_ENABLE_GPU @@ -296,11 +320,17 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(geqidxf_dev_); d_allocator_.deallocate(geqidxt_dev_); + d_allocator_.deallocate(busf_idx_dev_); + d_allocator_.deallocate(bust_idx_dev_); + d_allocator_.deallocate(jacf_idx_dev_); + d_allocator_.deallocate(jact_idx_dev_); + d_allocator_.deallocate(hesssp_idx_dev_); if (opflow->nlinesmon) { d_allocator_.deallocate(gineqidx_dev_); d_allocator_.deallocate(gbineqidx_dev_); d_allocator_.deallocate(linelimidx_dev_); + d_allocator_.deallocate(jac_ieq_idx_dev_); } #endif @@ -344,10 +374,17 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { geqidxf = paramAlloc(h_allocator_, nlineON); geqidxt = paramAlloc(h_allocator_, nlineON); + busf_idx = paramAlloc(h_allocator_, nlineON); + bust_idx = paramAlloc(h_allocator_, nlineON); + jacf_idx = paramAlloc(h_allocator_, nlineON); + jact_idx = paramAlloc(h_allocator_, nlineON); + hesssp_idx = paramAlloc(h_allocator_, nlineON); + if (opflow->nlinesmon) { linelimidx = paramAlloc(h_allocator_, nlinelim); gineqidx = paramAlloc(h_allocator_, nlinelim); gbineqidx = paramAlloc(h_allocator_, nlinelim); + jac_ieq_idx = paramAlloc(h_allocator_, nlinelim); } PetscInt j = 0; @@ -386,11 +423,17 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { */ geqidxf[linei] = busf->starteqloc; geqidxt[linei] = bust->starteqloc; + busf_idx[linei] = ps->busext2intmap[line->fbus]; + bust_idx[linei] = ps->busext2intmap[line->tbus]; + jacf_idx[linei] = 0; + jact_idx[linei] = 0; + hesssp_idx[linei] = 0; if (j < opflow->nlinesmon && opflow->linesmon[j] == i) { gbineqidx[j] = opflow->nconeq + line->startineqloc; gineqidx[j] = line->startineqloc; linelimidx[j] = linei; + jac_ieq_idx[j] = 0; j++; } @@ -416,10 +459,17 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { geqidxf_dev_ = paramAlloc(d_allocator_, nlineON); geqidxt_dev_ = paramAlloc(d_allocator_, nlineON); + busf_idx_dev_ = paramAlloc(d_allocator_, nlineON); + bust_idx_dev_ = paramAlloc(d_allocator_, nlineON); + jacf_idx_dev_ = paramAlloc(d_allocator_, nlineON); + jact_idx_dev_ = paramAlloc(d_allocator_, nlineON); + hesssp_idx_dev_ = paramAlloc(d_allocator_, nlineON); + if (opflow->nconineq) { gineqidx_dev_ = paramAlloc(d_allocator_, nlinelim); gbineqidx_dev_ = paramAlloc(d_allocator_, nlinelim); linelimidx_dev_ = paramAlloc(d_allocator_, nlinelim); + jac_ieq_idx_dev_ = paramAlloc(d_allocator_, nlinelim); } #endif return 0; @@ -781,10 +831,12 @@ void PbpolModelRajaHiop::destroy(OPFLOW opflow) { auto &resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); h_allocator_.deallocate(i_jaceq); h_allocator_.deallocate(j_jaceq); h_allocator_.deallocate(val_jaceq); + d_allocator_.deallocate(idx_jaceq_dev_); h_allocator_.deallocate(i_hess); h_allocator_.deallocate(j_hess); @@ -794,6 +846,7 @@ void PbpolModelRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(i_jacineq); h_allocator_.deallocate(j_jacineq); h_allocator_.deallocate(val_jacineq); + d_allocator_.deallocate(idx_jacineq_dev_); } } #endif diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index 91f1fb9fc..ea85a38f5 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -196,6 +196,13 @@ struct LINEParamsRajaHiop { constraint bound */ int *linelimidx; /* Indices for subset of lines that have finite limits */ + int *busf_idx; /* From bus index */ + int *bust_idx; /* To bus index */ + int *jacf_idx; /* Location number in the sparse Jacobian (from) */ + int *jact_idx; /* Location number in the sparse Jacobian (to) */ + int *jac_ieq_idx; /* Location number in sparse inequality Jacobian */ + int *hesssp_idx; /* Location number in sparse Hessian */ + // Device data double *Gff_dev_; /* From side self conductance */ double *Bff_dev_; /* From side self susceptance */ @@ -219,6 +226,13 @@ struct LINEParamsRajaHiop { int * linelimidx_dev_; /* Indices for subset of lines that have finite limits */ + int *busf_idx_dev_; /* From bus index */ + int *bust_idx_dev_; /* To bus index */ + int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */ + int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */ + int *jac_ieq_idx_dev_; /* Location number in sparse inequality Jacobian */ + int *hesssp_idx_dev_; /* Location number in sparse Hessian */ + int allocate(OPFLOW); int destroy(OPFLOW); int copy(OPFLOW); @@ -237,6 +251,7 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP { i_jaceq = j_jaceq = i_jacineq = j_jacineq = NULL; i_hess = j_hess = NULL; val_jaceq = val_jacineq = val_hess = NULL; + idx_jaceq_dev_ = idx_jacineq_dev_ = idx_hess_dev_ = NULL; } void destroy(OPFLOW opflow); @@ -252,9 +267,14 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP { // GPU sparse model) int *i_jaceq, *j_jaceq; // Row and column indices for equality constrained Jacobian + int *idx_jaceq_dev_; // Permuted triplet indexes for equality constrained + // Jacobian (on-device) int *i_jacineq, *j_jacineq; // Row and column indices for inequality constrained Jacobain - int *i_hess, *j_hess; // Row and column indices for hessian + int *idx_jacineq_dev_; // Permuted triplet indexes for inequality constrained + // Jacobian (on-device) + int *i_hess, *j_hess; // Row and column indices for hessian double *val_jaceq, *val_jacineq, - *val_hess; // values for equality, inequality jacobians and hessian + *val_hess; // values for equality, inequality jacobians and hessian + int *idx_hess_dev_; // Permuted triplet indexes for Hessian (on-device) }; diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index fa609378c..e0bdefa91 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -222,11 +222,206 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; /* Need to compute the number of nonzeros in equality, inequality constraint - * Jacobians and Hessian */ - int nnz_eqjacsp = 0, nnz_ineqjacsp = 0, nnz_hesssp = 0; - opflow->nnz_eqjacsp = nnz_eqjacsp; - opflow->nnz_ineqjacsp = nnz_ineqjacsp; - opflow->nnz_hesssp = nnz_hesssp; + * Jacobians */ + int nnz_eqjac = 0, nnz_ineqjac = 0; + + // Find nonzero entries in equality constraint Jacobian by row. Using + // OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide. + + PS ps = (PS)opflow->ps; + + for (int ibus = 0, igen1 = 0, igen2 = 0, iload = 0; ibus < ps->nbus; ++ibus) { + + PSBUS bus = &(ps->bus[ibus]); + + // Nonzero entries used by each *bus* starts here + + // no matter what, each bus uses 2 rows and 2 columns + // row 1 = real, row2 = reactive + + busparams->jacsp_idx[ibus] = nnz_eqjac; + nnz_eqjac += 2; + busparams->jacsq_idx[ibus] = nnz_eqjac; + nnz_eqjac += 2; + + if (bus->ide == ISOLATED_BUS) { + continue; + } + + if (opflow->include_powerimbalance_variables) { + // 2 more entries on both real and reactive + nnz_eqjac += 4; + } + + for (int bgen = 0; bgen < bus->ngen; ++bgen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, bgen, &gen); + CHKERRQ(ierr); + if (!gen->status) + continue; + // each active generator uses 1 real and reactive entry on each bus + genparams->eqjacspbus_idx[igen1] = nnz_eqjac++; + genparams->eqjacsqbus_idx[igen1] = nnz_eqjac++; + igen1++; + } + + if (opflow->include_loadloss_variables) { + // each load adds one real and reactive entry on each bus row + // NOTE: iload is a system load counter + for (int bload = 0; bload < bus->nload; bload++, iload++) { + loadparams->jacsp_idx[iload] = nnz_eqjac; + nnz_eqjac += 2; + iload++; + } + } + + if (opflow->has_gensetpoint) { + for (int bgen = 0; bgen < bus->ngen; ++bgen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, bgen, &gen); + CHKERRQ(ierr); + + if (!gen->status || gen->isrenewable) + continue; + + // each generator uses 2 rows, 3 columns real, 1 column reactive + genparams->eqjacspbus_idx[igen2] = nnz_eqjac; + nnz_eqjac += 4; + igen2++; + } + } + } + + // Go through the lines + + for (int iline = 0; iline <= ps->nline; ++iline) { + PSLINE line = &(ps->line[iline]); + + if (!line->status) + continue; + + // each line adds 4 (off-diagonal) entries for the to bus and 4 + // entries for the from bus. Each line also modifies 4 existing + // to and from bus entries. + lineparams->jacf_idx[iline] = nnz_eqjac; + nnz_eqjac += 4; + lineparams->jact_idx[iline] = nnz_eqjac; + nnz_eqjac += 4; + } + + // if there are lines, non-zeros were over counted + if (ps->nline > 0) { + nnz_eqjac -= 8; + } + + std::cout << "Equality Jacobian nonzero count: " << nnz_eqjac << std::endl; + + if (opflow->has_gensetpoint) { + for (int ibus = 0, igen = 0; ibus < ps->nbus; ++ibus) { + PSBUS bus = &(ps->bus[ibus]); + for (int bgen = 0; bgen < bus->ngen; ++bgen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, bgen, &gen); + CHKERRQ(ierr); + if (!gen->status) + continue; + genparams->ineqjacspgen_idx[igen] = nnz_eqjac + nnz_ineqjac; + nnz_ineqjac += 6; + igen++; + } + } + } + + if (opflow->genbusvoltagetype == FIXED_WITHIN_QBOUNDS) { + for (int ibus = 0; ibus < ps->nbus; ++ibus) { + PSBUS bus = &(ps->bus[ibus]); + if (bus->ide == PV_BUS || bus->ide == REF_BUS) { + for (int bgen = 0; bgen < bus->ngen; ++bgen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, bgen, &gen); + CHKERRQ(ierr); + if (!gen->status) + continue; + } + nnz_ineqjac += 2; + } + } + } + + if (!opflow->ignore_lineflow_constraints) { + for (int iline = 0; iline < opflow->nlinesmon; ++iline) { + // PSLINE line = &ps->line[opflow->linesmon[iline]]; + lineparams->jac_ieq_idx[iline] = nnz_eqjac + nnz_ineqjac; + nnz_ineqjac += 8; + } + } + + std::cout << "Inequality Jacobian nonzero count: " << nnz_ineqjac + << std::endl; + + // Count non-zeros in *upper triangular* Hessian + + int nnz_hesssp = 0; + + for (int ibus = 0; ibus < ps->nbus; ++ibus) { + + // reserve 2 real and 2 reactive entries for each bus + // 3 upper triangular + + busparams->hesssp_idx[ibus] = nnz_hesssp; + nnz_hesssp += 3; + + if (opflow->include_powerimbalance_variables) { + nnz_hesssp += 2; + } + } + + for (int i = 0, igen = 0; i < ps->ngen; ++i) { + PSGEN gen = &(ps->gen[i]); + + if (!gen->status) + continue; + + genparams->hesssp_idx[igen] = nnz_hesssp; + nnz_hesssp += 2; + + if (opflow->has_gensetpoint) { + if (gen->isrenewable) + continue; + + // later ... + // if (opflow->use_agc) { + // nnz_hesssp += 5; + // } + } + if (opflow->genbusvoltagetype == FIXED_WITHIN_QBOUNDS) { + nnz_hesssp += 2; + } + igen++; + } + + for (int iline = 0; iline < ps->nline; ++iline) { + PSLINE line = &(ps->line[iline]); + + if (!line->status) + continue; + + lineparams->hesssp_idx[iline] = nnz_hesssp; + + // 3 diagonal entries for on the from-bus rows (already defined) + // 3 diagonal entries for on the to-bus rows (already defined) + // 4 off-diagonal entries in upper part + nnz_hesssp += 4; + } + + if (opflow->include_loadloss_variables) { + for (int iload = 0; iload < ps->nload; ++iload) { + loadparams->hesssp_idx[iload] = nnz_hesssp; + nnz_hesssp += 2; + } + } + + std::cout << "Hessian nonzero count: " << nnz_hesssp << std::endl; ierr = busparams->copy(opflow); ierr = genparams->copy(opflow); diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index adc10c8e9..27ed8a696 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1,4 +1,10 @@ +#include +#include +#include +#include +#include + #include #if defined(EXAGO_ENABLE_RAJA) @@ -15,6 +21,9 @@ #include "pbpolrajahiopsparsekernels.hpp" #include "pbpolrajahiopsparse.hpp" +static const bool debugmsg(true); +static const bool oldhostway(false); + PetscErrorCode OPFLOWSetInitialGuessArray_PBPOLRAJAHIOPSPARSE(OPFLOW opflow, double *x0_dev) { PetscErrorCode ierr; @@ -468,6 +477,120 @@ PetscErrorCode OPFLOWComputeGradientArray_PBPOLRAJAHIOPSPARSE( PetscFunctionReturn(0); } +// A routine to sort triplet matrix indexes. The index arrays are on +// the device. This sorts them on the host. +static void SortIndexes(const int &n, int *i_dev, int *j_dev, + int *idx_perm_dev) { + auto &resmgr = umpire::ResourceManager::getInstance(); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + + std::vector> idxvect; + idxvect.reserve(n); + + int *itemp(NULL); + itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(itemp, i_dev, n * sizeof(int)); + + int *jtemp(NULL); + jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(jtemp, j_dev, n * sizeof(int)); + + for (int idx = 0; idx < n; idx++) { + idxvect.push_back(std::make_tuple(itemp[idx], jtemp[idx], idx)); + } + + std::sort(idxvect.begin(), idxvect.end(), + [](std::tuple const &t1, + std::tuple const &t2) { + if (std::get<0>(t1) == std::get<0>(t2)) { + return (std::get<1>(t1) < std::get<1>(t2)); + } + return (std::get<0>(t1) < std::get<0>(t2)); + }); + + int *idx_perm; + idx_perm = (int *)(h_allocator_.allocate(n * sizeof(int))); + + for (int idx = 0; idx < n; idx++) { + itemp[idx] = std::get<0>(idxvect[idx]); + jtemp[idx] = std::get<1>(idxvect[idx]); + int i(std::get<2>(idxvect[idx])); + idx_perm[i] = idx; + } + + // std::cout << "Permuted Indexes: " << std::endl; + // for (int idx = 0; idx < n; idx++) { + // std::cout << std::setw(5) << std::right << itemp[idx] << " " + // << std::setw(5) << std::right << jtemp[idx] << " " + // << std::setw(5) << std::right << idx_perm[idx] << " " + // << std::endl; + // } + + resmgr.copy(i_dev, itemp, n * sizeof(int)); + resmgr.copy(j_dev, jtemp, n * sizeof(int)); + resmgr.copy(idx_perm_dev, idx_perm, n * sizeof(int)); + + h_allocator_.deallocate(itemp); + h_allocator_.deallocate(jtemp); + h_allocator_.deallocate(idx_perm); +} + +// A routine to get the triplet arrays from the device and print them out +static void PrintTriplets(const std::string &title, const int &n, int *iperm, + int *i, int *j, double *v) { + auto &resmgr = umpire::ResourceManager::getInstance(); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + + int *ipermtemp(NULL); + + if (iperm != NULL) { + ipermtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(ipermtemp, iperm, n * sizeof(int)); + } + + int *itemp(NULL); + + if (i != NULL) { + itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(itemp, i, n * sizeof(int)); + } + + int *jtemp(NULL); + if (j != 0) { + jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(jtemp, j, n * sizeof(int)); + } + + double *vtemp(NULL); + if (v != NULL) { + vtemp = (double *)(h_allocator_.allocate(n * sizeof(double))); + resmgr.copy(vtemp, v, n * sizeof(double)); + } + + std::cout << title << std::endl; + for (int idx = 0; idx < n; ++idx) { + std::cout << std::setw(5) << idx << " "; + if (ipermtemp != NULL) { + std::cout << std::setw(5) << std::right << ipermtemp[idx] << " "; + } + if (itemp != NULL) { + std::cout << std::setw(5) << std::right << itemp[idx] << " "; + } + if (jtemp != NULL) { + std::cout << std::setw(5) << std::right << jtemp[idx]; + } + if (vtemp != NULL) { + std::cout << std::setw(12) << std::right << std::scientific + << std::setprecision(3) << vtemp[idx]; + } + std::cout << std::endl; + } + h_allocator_.deallocate(itemp); + h_allocator_.deallocate(jtemp); + if (vtemp != NULL) + h_allocator_.deallocate(vtemp); +} + PetscErrorCode OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( OPFLOW opflow, const double *x_dev, int *iJacS_dev, int *jJacS_dev, @@ -477,7 +600,7 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscErrorCode ierr; double *x, *values; PetscInt *iRowstart, *jColstart; - PetscInt roffset, coffset; + PetscInt roffset, coffset, idxoffset; PetscInt nrow, ncol; PetscInt nvals; const PetscInt *cols; @@ -487,101 +610,342 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscFunctionBegin; + // Create arrays on host to store i,j, and val arrays + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); + if (MJacS_dev == NULL) { - /* Set locations only */ - if (opflow->Nconineq) { - // Create arrays on host to store i,j, and val arrays - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + if (debugmsg) + std::cout << "Official Inequality Jacobian nonzero count: " + << opflow->nnz_ineqjacsp << std::endl; - pbpolrajahiopsparse->i_jacineq = - (int *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int))); - pbpolrajahiopsparse->j_jacineq = - (int *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int))); - pbpolrajahiopsparse->val_jacineq = (double *)(h_allocator_.allocate( - opflow->nnz_ineqjacsp * sizeof(double))); + /* Set locations only */ - iRowstart = pbpolrajahiopsparse->i_jacineq; - jColstart = pbpolrajahiopsparse->j_jacineq; + if (opflow->Nconineq) { + ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); /* Inequality constraints start after equality constraints Hence the offset */ roffset = opflow->nconeq; coffset = 0; + idxoffset = opflow->nnz_eqjacsp; + + resmgr.memset(iJacS_dev + idxoffset, 0, + opflow->nnz_ineqjacsp * sizeof(int)); + resmgr.memset(jJacS_dev + idxoffset, 0, + opflow->nnz_ineqjacsp * sizeof(int)); + + if (!opflow->ignore_lineflow_constraints) { + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + int *jac_ieq_idx = lineparams->jac_ieq_idx_dev_; + int *linelimidx = lineparams->linelimidx_dev_; + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *gbineqidx = lineparams->gbineqidx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int iline(linelimidx[i]); + int offset(0); + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline] + 1; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline] + 1; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline] + 1; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline] + 1; + }); + } - ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Gi); - CHKERRQ(ierr); + if (pbpolrajahiopsparse->idx_jacineq_dev_ == NULL) { + pbpolrajahiopsparse->idx_jacineq_dev_ = + (int *)d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); + } - ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); - CHKERRQ(ierr); - /* Copy over locations to triplet format */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); - CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - iRowstart[j] = roffset + i; - jColstart[j] = coffset + cols[j]; + SortIndexes(opflow->nnz_ineqjacsp, iJacS_dev + opflow->nnz_eqjacsp, + jJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->idx_jacineq_dev_); + + if (debugmsg) + PrintTriplets( + "Nonzero indexes for Inequality Constraint Jacobian (GPU):", + opflow->nnz_ineqjacsp, pbpolrajahiopsparse->idx_jacineq_dev_, + iJacS_dev + opflow->nnz_eqjacsp, jJacS_dev + opflow->nnz_eqjacsp, + NULL); + + if (oldhostway) { + + /* Inequality constraints start after equality constraints + Hence the offset + */ + roffset = opflow->nconeq; + coffset = 0; + + if (pbpolrajahiopsparse->i_jacineq == NULL) { + pbpolrajahiopsparse->i_jacineq = (int *)(h_allocator_.allocate( + opflow->nnz_ineqjacsp * sizeof(int))); + pbpolrajahiopsparse->j_jacineq = (int *)(h_allocator_.allocate( + opflow->nnz_ineqjacsp * sizeof(int))); + pbpolrajahiopsparse->val_jacineq = (double *)(h_allocator_.allocate( + opflow->nnz_ineqjacsp * sizeof(double))); } - /* Increment iRow,jCol pointers */ - iRowstart += nvals; - jColstart += nvals; - ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + + iRowstart = pbpolrajahiopsparse->i_jacineq; + jColstart = pbpolrajahiopsparse->j_jacineq; + + ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Gi); CHKERRQ(ierr); - } - // Copy over i_jacineq and j_jacineq arrays to device - resmgr.copy(iJacS_dev + opflow->nnz_eqjacsp, - pbpolrajahiopsparse->i_jacineq); - resmgr.copy(jJacS_dev + opflow->nnz_eqjacsp, - pbpolrajahiopsparse->j_jacineq); + ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); + CHKERRQ(ierr); + /* Copy over locations to triplet format */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + iRowstart[j] = roffset + i; + jColstart[j] = coffset + cols[j]; + } + /* Increment iRow,jCol pointers */ + iRowstart += nvals; + jColstart += nvals; + ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + } - ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); + // Copy over i_jacineq and j_jacineq arrays to device + resmgr.copy(iJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->i_jacineq); + resmgr.copy(jJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->j_jacineq); + + if (debugmsg) + PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian:", + opflow->nnz_ineqjacsp, NULL, + iJacS_dev + opflow->nnz_eqjacsp, + jJacS_dev + opflow->nnz_eqjacsp, NULL); + + ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); + CHKERRQ(ierr); + } } } else { if (opflow->Nconineq) { ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); - ierr = VecGetArray(opflow->X, &x); - CHKERRQ(ierr); + int *iperm = pbpolrajahiopsparse->idx_jacineq_dev_; + + if (!opflow->ignore_lineflow_constraints) { + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + double *Gff_arr = lineparams->Gff_dev_; + double *Gtt_arr = lineparams->Gtt_dev_; + double *Gft_arr = lineparams->Gft_dev_; + double *Gtf_arr = lineparams->Gtf_dev_; + + double *Bff_arr = lineparams->Bff_dev_; + double *Btt_arr = lineparams->Btt_dev_; + double *Bft_arr = lineparams->Bft_dev_; + double *Btf_arr = lineparams->Btf_dev_; + + int *linelimidx = lineparams->linelimidx_dev_; + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *jac_ieq_idx = lineparams->jac_ieq_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int j = linelimidx[i]; + double val[4]; + double Pf, Qf, Pt, Qt; + double thetaf = x_dev[xidxf[j]], Vmf = x_dev[xidxf[j] + 1]; + double thetat = x_dev[xidxt[j]], Vmt = x_dev[xidxt[j] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + double dSf2_dPf, dSf2_dQf, dSt2_dPt, dSt2_dQt; + double dPf_dthetaf, dPf_dVmf, dPf_dthetat, dPf_dVmt; + double dQf_dthetaf, dQf_dVmf, dQf_dthetat, dQf_dVmt; + double dPt_dthetaf, dPt_dVmf, dPt_dthetat, dPt_dVmt; + double dQt_dthetaf, dQt_dVmf, dQt_dthetat, dQt_dVmt; + double dSf2_dthetaf, dSf2_dVmf, dSf2_dthetat, dSf2_dVmt; + double dSt2_dthetaf, dSt2_dVmf, dSt2_dthetat, dSt2_dVmt; + double Gff = Gff_arr[j], Bff = Bff_arr[j]; + double Gft = Gft_arr[j], Bft = Bft_arr[j]; + double Gtf = Gtf_arr[j], Btf = Btf_arr[j]; + double Gtt = Gtt_arr[j], Btt = Btt_arr[j]; + + Pf = Gff * Vmf * Vmf + + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Qf = -Bff * Vmf * Vmf + + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + Pt = Gtt * Vmt * Vmt + + Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Qt = -Btt * Vmt * Vmt + + Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + dSf2_dPf = 2 * Pf; + dSf2_dQf = 2 * Qf; + dSt2_dPt = 2 * Pt; + dSt2_dQt = 2 * Qt; + + dPf_dthetaf = + Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf = 2 * Gff * Vmf + + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetat = + Vmf * Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmt = Vmf * (Gft * cos(thetaft) + Bft * sin(thetaft)); + + dQf_dthetaf = + Vmf * Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmf = -2 * Bff * Vmf + + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat = + Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmt = Vmf * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + dPt_dthetat = + Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmt = 2 * Gtt * Vmt + + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetaf = + Vmt * Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmf = Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + dQt_dthetat = + Vmt * Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt = -2 * Btt * Vmt + + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf = + Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dVmf = Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + dSf2_dthetaf = dSf2_dPf * dPf_dthetaf + dSf2_dQf * dQf_dthetaf; + dSf2_dthetat = dSf2_dPf * dPf_dthetat + dSf2_dQf * dQf_dthetat; + dSf2_dVmf = dSf2_dPf * dPf_dVmf + dSf2_dQf * dQf_dVmf; + dSf2_dVmt = dSf2_dPf * dPf_dVmt + dSf2_dQf * dQf_dVmt; + + val[0] = dSf2_dthetaf; + val[1] = dSf2_dVmf; + val[2] = dSf2_dthetat; + val[3] = dSf2_dVmt; + + MJacS_dev[jac_ieq_idx[i] + 0] = val[0]; + MJacS_dev[jac_ieq_idx[i] + 1] = val[1]; + MJacS_dev[jac_ieq_idx[i] + 2] = val[2]; + MJacS_dev[jac_ieq_idx[i] + 3] = val[3]; + + dSt2_dthetaf = dSt2_dPt * dPt_dthetaf + dSt2_dQt * dQt_dthetaf; + dSt2_dthetat = dSt2_dPt * dPt_dthetat + dSt2_dQt * dQt_dthetat; + dSt2_dVmf = dSt2_dPt * dPt_dVmf + dSt2_dQt * dQt_dVmf; + dSt2_dVmt = dSt2_dPt * dPt_dVmt + dSt2_dQt * dQt_dVmt; + + val[2] = dSt2_dthetat; + val[3] = dSt2_dVmt; + val[0] = dSt2_dthetaf; + val[1] = dSt2_dVmf; + + MJacS_dev[jac_ieq_idx[i] + 4] = val[0]; + MJacS_dev[jac_ieq_idx[i] + 5] = val[1]; + MJacS_dev[jac_ieq_idx[i] + 6] = val[2]; + MJacS_dev[jac_ieq_idx[i] + 7] = val[3]; + }); + } - // Copy from device to host - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - registerWith(x, opflow->nx, resmgr, h_allocator_); - resmgr.copy((double *)x, (double *)x_dev); + int *ipermout = + (int *)d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); + resmgr.copy(ipermout, iperm); - ierr = VecRestoreArray(opflow->X, &x); - CHKERRQ(ierr); + RAJA::stable_sort_pairs( + RAJA::make_span(ipermout, opflow->nnz_ineqjacsp), + RAJA::make_span(MJacS_dev + opflow->nnz_eqjacsp, + opflow->nnz_ineqjacsp), + RAJA::operators::less{}); - /* Compute inequality constraint jacobian */ - ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Gi); - CHKERRQ(ierr); + if (debugmsg) + PrintTriplets( + "Inequality Constraint Jacobian (GPU):", opflow->nnz_ineqjacsp, + iperm, (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), + (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), + MJacS_dev); - ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); - CHKERRQ(ierr); + d_allocator_.deallocate(ipermout); - values = pbpolrajahiopsparse->val_jacineq; - /* Copy over values */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + if (oldhostway) { + + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - values[j] = vals[j]; + + // Copy from device to host + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + registerWith(x, opflow->nx, resmgr, h_allocator_); + resmgr.copy((double *)x, (double *)x_dev); + + ierr = VecRestoreArray(opflow->X, &x); + CHKERRQ(ierr); + + /* Compute inequality constraint jacobian */ + ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Gi); + CHKERRQ(ierr); + + ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); + CHKERRQ(ierr); + + values = pbpolrajahiopsparse->val_jacineq; + /* Copy over values */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + values[j] = vals[j]; + } + values += nvals; + ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); } - values += nvals; - ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + // Copy over val_jacineq to device + resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->val_jacineq); + + if (debugmsg) + PrintTriplets( + "Inequality Constraint Jacobian:", opflow->nnz_ineqjacsp, NULL, + (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), + (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), + MJacS_dev + opflow->nnz_eqjacsp); + + ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); } - // Copy over val_jacineq to device - resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp, - pbpolrajahiopsparse->val_jacineq); - - ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); } } @@ -594,6 +958,11 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( double *MJacS_dev) { PbpolModelRajaHiop *pbpolrajahiopsparse = reinterpret_cast(opflow->model); + GENParamsRajaHiop *genparams = &pbpolrajahiopsparse->genparams; + LOADParamsRajaHiop *loadparams = &pbpolrajahiopsparse->loadparams; + BUSParamsRajaHiop *busparams = &pbpolrajahiopsparse->busparams; + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + PetscErrorCode ierr; PetscInt *iRowstart, *jColstart; PetscScalar *x, *values; @@ -607,94 +976,496 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscFunctionBegin; + ierr = PetscLogEventBegin(opflow->eqconsjaclogger, 0, 0, 0, 0); + CHKERRQ(ierr); + + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); + + /* Using OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide */ + if (MJacS_dev == NULL) { + + if (debugmsg) + std::cout << "Official Equality Jacobian nonzero count: " + << opflow->nnz_eqjacsp << std::endl; + /* Set locations only */ - roffset = 0; - coffset = 0; + resmgr.memset(iJacS_dev, 0, opflow->nnz_eqjacsp * sizeof(int)); + resmgr.memset(jJacS_dev, 0, opflow->nnz_eqjacsp * sizeof(int)); - // Create arrays on host to store i,j, and val arrays - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + /* Bus power imbalance contribution */ + int *b_xidxpimb = busparams->xidxpimb_dev_; + int *b_gidx = busparams->gidx_dev_; + int *b_xidx = busparams->xidx_dev_; + int *b_jacsp_idx = busparams->jacsp_idx_dev_; + int *b_jacsq_idx = busparams->jacsq_idx_dev_; - pbpolrajahiopsparse->i_jaceq = - (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - pbpolrajahiopsparse->j_jaceq = - (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - pbpolrajahiopsparse->val_jaceq = - (double *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(double))); + /* Bus */ + if (debugmsg) + std::cout << "Begin with buses" << std::endl; + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i]] = b_xidx[i]; + iJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i] + 1] = b_xidx[i] + 1; + + iJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i]] = b_xidx[i]; + iJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i] + 1] = b_xidx[i] + 1; + }); - iRowstart = pbpolrajahiopsparse->i_jaceq; - jColstart = pbpolrajahiopsparse->j_jaceq; + if (opflow->include_powerimbalance_variables) { + if (debugmsg) + std::cout << "Bus power imbalance variables" << std::endl; + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i]] = b_xidxpimb[i]; + iJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i] + 1] = b_xidxpimb[i] + 1; + + iJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i]] = b_xidxpimb[i] + 2; + iJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i] + 1] = b_xidxpimb[i] + 3; + }); + } - ierr = (*opflow->modelops.computeequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Ge); - CHKERRQ(ierr); + /* generation contributions */ - ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); - CHKERRQ(ierr); + if (debugmsg) + std::cout << "Generators " << std::endl; - /* Copy over locations to triplet format */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); - CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - iRowstart[j] = roffset + i; - jColstart[j] = coffset + cols[j]; + int *g_gidxbus = genparams->gidxbus_dev_; + int *g_xidx = genparams->xidx_dev_; + int *eqjacspbus_idx = genparams->eqjacspbus_idx_dev_; + int *eqjacsqbus_idx = genparams->eqjacsqbus_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[eqjacspbus_idx[i]] = g_gidxbus[i]; + jJacS_dev[eqjacspbus_idx[i]] = g_xidx[i]; + + iJacS_dev[eqjacsqbus_idx[i]] = g_gidxbus[i] + 1; + jJacS_dev[eqjacsqbus_idx[i]] = g_xidx[i] + 1; + }); + + /* Loadloss contributions */ + + if (opflow->include_loadloss_variables) { + + if (debugmsg) + std::cout << "Load Loss" << std::endl; + int *l_gidx = loadparams->gidx_dev_; + int *l_xidx = loadparams->xidx_dev_; + int *l_jacsp_idx = loadparams->jacsp_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[l_jacsp_idx[i]] = l_gidx[i]; + jJacS_dev[l_jacsp_idx[i]] = l_xidx[i]; + iJacS_dev[l_jacsp_idx[i] + 1] = l_gidx[i] + 1; + jJacS_dev[l_jacsp_idx[i] + 1] = l_xidx[i] + 1; + }); + } + + /* Connected lines */ + + if (debugmsg) + std::cout << "Connected Lines" << std::endl; + + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *geqidxf = lineparams->geqidxf_dev_; + int *geqidxt = lineparams->geqidxt_dev_; + int *jacf_idx = lineparams->jacf_idx_dev_; + int *jact_idx = lineparams->jact_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlineON), + RAJA_LAMBDA(RAJA::Index_type i) { + int offset; + + offset = 0; + + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; + offset++; + + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; + offset++; + + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; + offset++; + + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; + offset++; + + // to bus indexes + + offset = 0; + + iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; + jJacS_dev[jact_idx[i] + offset] = xidxf[i]; + offset++; + + iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; + jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; + offset++; + + iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; + jJacS_dev[jact_idx[i] + offset] = xidxf[i]; + offset++; + + iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; + jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; + offset++; + }); + + if (opflow->has_gensetpoint) { + + if (debugmsg) + std::cout << "Generator set point" << std::endl; + int *eqjacspgen_idx = genparams->eqjacspgen_idx_dev_; + int *g_geqidxgen = genparams->geqidxgen_dev_; + int *g_xidx = genparams->xidx_dev_; + int *g_isrenewable = genparams->isrenewable_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + if (!g_isrenewable[i]) { + iJacS_dev[eqjacspgen_idx[i]] = g_geqidxgen[i]; + jJacS_dev[eqjacspgen_idx[i]] = g_xidx[i]; + + iJacS_dev[eqjacspgen_idx[i] + 1] = g_geqidxgen[i]; + jJacS_dev[eqjacspgen_idx[i] + 1] = g_xidx[i] + 2; + + iJacS_dev[eqjacspgen_idx[i] + 2] = g_geqidxgen[i]; + jJacS_dev[eqjacspgen_idx[i] + 2] = g_xidx[i] + 3; + + iJacS_dev[eqjacspgen_idx[i] + 3] = g_geqidxgen[i] + 1; + jJacS_dev[eqjacspgen_idx[i] + 3] = g_xidx[i] + 3; + } + }); + } + + if (pbpolrajahiopsparse->idx_jaceq_dev_ == NULL) { + pbpolrajahiopsparse->idx_jaceq_dev_ = + (int *)d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); + } + + SortIndexes(opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, + pbpolrajahiopsparse->idx_jaceq_dev_); + + if (debugmsg) + PrintTriplets("Non-zero indexes for Equality Constraint Jacobian (GPU):", + opflow->nnz_eqjacsp, pbpolrajahiopsparse->idx_jaceq_dev_, + iJacS_dev, jJacS_dev, NULL); + + if (oldhostway) { + roffset = 0; + coffset = 0; + + if (pbpolrajahiopsparse->i_jaceq == NULL) { + pbpolrajahiopsparse->i_jaceq = + (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + pbpolrajahiopsparse->j_jaceq = + (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + pbpolrajahiopsparse->val_jaceq = (double *)(h_allocator_.allocate( + opflow->nnz_eqjacsp * sizeof(double))); } - /* Increment iRow,jCol pointers */ - iRowstart += nvals; - jColstart += nvals; - ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + + iRowstart = pbpolrajahiopsparse->i_jaceq; + jColstart = pbpolrajahiopsparse->j_jaceq; + + ierr = (*opflow->modelops.computeequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Ge); CHKERRQ(ierr); + + ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); + CHKERRQ(ierr); + + /* Copy over locations to triplet format */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + iRowstart[j] = roffset + i; + jColstart[j] = coffset + cols[j]; + } + /* Increment iRow,jCol pointers */ + iRowstart += nvals; + jColstart += nvals; + ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + } + + // Copy over i_jaceq and j_jaceq arrays to device + resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); + resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); + + if (debugmsg) + PrintTriplets("Non-zero indexes for Equality Constraint Jacobian:", + opflow->nnz_eqjacsp, NULL, iJacS_dev, jJacS_dev, NULL); } - // Copy over i_jaceq and j_jaceq arrays to device - resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); - resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); } else { - ierr = PetscLogEventBegin(opflow->eqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); - ierr = VecGetArray(opflow->X, &x); - CHKERRQ(ierr); + // Bus Contribution + int *b_jacsp_idx = busparams->jacsp_idx_dev_; + int *b_jacsq_idx = busparams->jacsq_idx_dev_; + int *isisolated = busparams->isisolated_dev_; + int *ispvpq = busparams->ispvpq_dev_; + double *gl = busparams->gl_dev_; + double *bl = busparams->bl_dev_; + int *b_xidx = busparams->xidx_dev_; - // Copy from device to host - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - registerWith(x, opflow->nx, resmgr, h_allocator_); - resmgr.copy((double *)x, (double *)x_dev); + int *iperm = pbpolrajahiopsparse->idx_jaceq_dev_; - ierr = VecRestoreArray(opflow->X, &x); - CHKERRQ(ierr); + // Basic bus contribution + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + double Vm = x_dev[b_xidx[i] + 1]; + MJacS_dev[b_jacsp_idx[i]] = isisolated[i] * 1.0 + ispvpq[i] * 0.0; + MJacS_dev[b_jacsp_idx[i] + 1] = + isisolated[i] * 0.0 + ispvpq[i] * 2 * Vm * gl[i]; + MJacS_dev[b_jacsq_idx[i]] = 0.0; + MJacS_dev[b_jacsq_idx[i] + 1] = + isisolated[i] * 1.0 + ispvpq[i] * -2 * Vm * bl[i]; + }); - /* Compute equality constraint jacobian */ - ierr = (*opflow->modelops.computeequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Ge); - CHKERRQ(ierr); + // Power imbalance + if (opflow->include_powerimbalance_variables) { + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + MJacS_dev[b_jacsp_idx[i]] = 1.0; + MJacS_dev[b_jacsp_idx[i] + 1] = -1.0; + MJacS_dev[b_jacsq_idx[i]] = 1.0; + MJacS_dev[b_jacsq_idx[i] + 1] = -1.0; + }); + } - ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); - CHKERRQ(ierr); + // Generator contributions + int *eqjacspbus_idx = genparams->eqjacspbus_idx_dev_; + int *eqjacsqbus_idx = genparams->eqjacsqbus_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + MJacS_dev[eqjacspbus_idx[i]] = -1.0; + MJacS_dev[eqjacsqbus_idx[i]] = -1.0; + }); - values = pbpolrajahiopsparse->val_jaceq; + if (opflow->has_gensetpoint) { + int *eqjacspgen_idx = genparams->eqjacspgen_idx_dev_; + int *g_isrenewable = genparams->isrenewable_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + if (!g_isrenewable[i]) { + MJacS_dev[eqjacspgen_idx[i]] = -1.0; + MJacS_dev[eqjacspgen_idx[i] + 1] = 1.0; + MJacS_dev[eqjacspgen_idx[i] + 2] = 1.0; + MJacS_dev[eqjacspgen_idx[i] + 3] = 1.0; + } + }); + } - /* Copy over values */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + /* Loadloss contributions - 2 contributions expected */ + if (opflow->include_loadloss_variables) { + int *l_jacsp_idx = loadparams->jacsp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + MJacS_dev[l_jacsp_idx[i]] = -1; + MJacS_dev[l_jacsp_idx[i] + 1] = -1; + }); + } + + // Line contributions + + double *Gff = lineparams->Gff_dev_; + double *Gtt = lineparams->Gtt_dev_; + double *Gft = lineparams->Gft_dev_; + double *Gtf = lineparams->Gtf_dev_; + + double *Bff = lineparams->Bff_dev_; + double *Btt = lineparams->Btt_dev_; + double *Bft = lineparams->Bft_dev_; + double *Btf = lineparams->Btf_dev_; + + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *busf_idx = lineparams->busf_idx_dev_; + int *bust_idx = lineparams->bust_idx_dev_; + int *jacf_idx = lineparams->jacf_idx_dev_; + int *jact_idx = lineparams->jact_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlineON), + RAJA_LAMBDA(RAJA::Index_type i) { + double thetaf = x_dev[xidxf[i]], Vmf = x_dev[xidxf[i] + 1]; + double thetat = x_dev[xidxt[i]], Vmt = x_dev[xidxt[i] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + int ifrom(busf_idx[i]), ito(bust_idx[i]); + + // This confusing and could probably be done in a clearer + // way. Two indexing schemes are needed. Some line + // contributions (from/from, to/to) need to be added to the + // original bus contribution entries -- get those + // from the bus index list. A separate indexing system is for + // the extra (to/from, from/to) entries. + + // for reference, these are computed in the same order as + // OPFLOWComputeDenseEqualityConstraintJacobian_PBPOLRAJAHIOP() + + // from bus real entries + + /* dPf_dthetaf */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ifrom]]), + Vmf * Vmt * (-Gft[i] * sin(thetaft) + Bft[i] * cos(thetaft))); + /*dPf_dVmf */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ifrom] + 1]), + 2 * Gff[i] * Vmf + + Vmt * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft))); + /*dPf_dthetat */ + MJacS_dev[jacf_idx[i] + 0] = + Vmf * Vmt * (Gft[i] * sin(thetaft) - Bft[i] * cos(thetaft)); + /* dPf_dVmt */ + MJacS_dev[jacf_idx[i] + 1] = + Vmf * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft)); + + // from bus reactive entries + + /* dQf_dthetaf */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ifrom]]), + Vmf * Vmt * (Bft[i] * sin(thetaft) + Gft[i] * cos(thetaft))); + /* dQf_dVmf */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ifrom] + 1]), + -2 * Bff[i] * Vmf + + Vmt * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft))); + /* dQf_dthetat */ + MJacS_dev[jacf_idx[i] + 2] = + Vmf * Vmt * (-Bft[i] * sin(thetaft) - Gft[i] * cos(thetaft)); + /* dQf_dVmt */ + MJacS_dev[jacf_idx[i] + 3] = + Vmf * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); + + // to bus real entries + + /* dPt_dthetat */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ito]]), + Vmt * Vmf * (-Gtf[i] * sin(thetatf) + Btf[i] * cos(thetatf))); + /* dPt_dVmt */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ito] + 1]), + 2 * Gtt[i] * Vmt + + Vmf * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf))); + /* dPt_dthetaf */ + MJacS_dev[jact_idx[i] + 0] = + Vmt * Vmf * (Gtf[i] * sin(thetatf) - Btf[i] * cos(thetatf)); + /* dPt_dVmf */ + MJacS_dev[jact_idx[i] + 1] = + Vmt * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf)); + + // to bus reactive entries + + /* dQt_dthetat */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ito]]), + Vmt * Vmf * (Btf[i] * sin(thetatf) + Gtf[i] * cos(thetatf))); + /* dQt_dVmt */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ito] + 1]), + -2 * Btt[i] * Vmt + + Vmf * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf))); + /* dQt_dthetaf */ + MJacS_dev[jact_idx[i] + 2] = + Vmt * Vmf * (-Btf[i] * sin(thetatf) - Gtf[i] * cos(thetatf)); + /* dQt_dVmf */ + MJacS_dev[jact_idx[i] + 3] = + Vmt * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); + }); + + int *ipermout = + (int *)d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); + resmgr.copy(ipermout, iperm); + + RAJA::stable_sort_pairs( + RAJA::make_span(ipermout, opflow->nnz_eqjacsp), + RAJA::make_span(MJacS_dev, opflow->nnz_eqjacsp), + RAJA::operators::less{}); + + d_allocator_.deallocate(ipermout); + + if (debugmsg) + PrintTriplets("Equality Constraint Jacobian (GPU):", opflow->nnz_eqjacsp, + iperm, iJacS_dev, jJacS_dev, MJacS_dev); + + if (oldhostway) { + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - values[j] = vals[j]; - } - values += nvals; - ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + + // Copy from device to host + registerWith(x, opflow->nx, resmgr, h_allocator_); + resmgr.copy((double *)x, (double *)x_dev); + + ierr = VecRestoreArray(opflow->X, &x); CHKERRQ(ierr); - } - // Copy over val_ineq to device - resmgr.copy(MJacS_dev, pbpolrajahiopsparse->val_jaceq); + /* Compute equality constraint jacobian */ + ierr = (*opflow->modelops.computeequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Ge); + CHKERRQ(ierr); - ierr = PetscLogEventEnd(opflow->eqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); + ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); + CHKERRQ(ierr); + + values = pbpolrajahiopsparse->val_jaceq; + + /* Copy over values */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + values[j] = vals[j]; + } + values += nvals; + ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + } + + // Copy over val_ineq to device + resmgr.copy(MJacS_dev, pbpolrajahiopsparse->val_jaceq); + + if (debugmsg) + PrintTriplets("Equality Constraint Jacobian:", opflow->nnz_eqjacsp, + NULL, iJacS_dev, jJacS_dev, MJacS_dev); + } } + ierr = PetscLogEventEnd(opflow->eqconsjaclogger, 0, 0, 0, 0); + CHKERRQ(ierr); + PetscFunctionReturn(0); } @@ -703,10 +1474,15 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( int *jHSS_dev, double *MHSS_dev) { PbpolModelRajaHiop *pbpolrajahiopsparse = reinterpret_cast(opflow->model); + GENParamsRajaHiop *genparams = &pbpolrajahiopsparse->genparams; + LOADParamsRajaHiop *loadparams = &pbpolrajahiopsparse->loadparams; + BUSParamsRajaHiop *busparams = &pbpolrajahiopsparse->busparams; + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + PetscErrorCode ierr; PetscInt *iRow, *jCol; PetscScalar *x, *values, *lambda; - PetscInt nrow; + PetscInt nrow, ncol; PetscInt nvals; const PetscInt *cols; const PetscScalar *vals; @@ -714,19 +1490,199 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( PetscInt ctr = 0; auto &resmgr = umpire::ResourceManager::getInstance(); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); + PetscFunctionBegin; if (iHSS_dev != NULL && jHSS_dev != NULL) { + if (debugmsg) + std::cout << "Official Hessian nonzero count: " << opflow->nnz_hesssp + << std::endl; + + resmgr.memset(iHSS_dev, 0, opflow->nnz_hesssp * sizeof(int)); + resmgr.memset(jHSS_dev, 0, opflow->nnz_hesssp * sizeof(int)); + + // Bus contributions + + int *b_xidx = busparams->xidx_dev_; + int *b_hesssp_idx = busparams->hesssp_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + int off(0); + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + off++; + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + off++; + + // upper triangular only + // iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + // jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + // off++; + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + off++; + }); + + if (opflow->include_powerimbalance_variables) { + int *b_xidxpimb = busparams->xidxpimb_dev_; + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + int off(2); + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; + off++; + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i] + 1; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i] + 1; + }); + } + + /* Generator contributions for row,col numbers */ + int *g_xidx = genparams->xidx_dev_; + int *g_hesssp_idx = genparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + iHSS_dev[g_hesssp_idx[i]] = g_xidx[i]; + jHSS_dev[g_hesssp_idx[i]] = g_xidx[i]; + iHSS_dev[g_hesssp_idx[i] + 1] = g_xidx[i] + 1; + jHSS_dev[g_hesssp_idx[i] + 1] = g_xidx[i] + 1; + }); + + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *ln_hessp_idx = lineparams->hesssp_idx_dev_; + int *linelimidx = lineparams->linelimidx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int off(0); + + // from-bus diagonal entries already defined + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + // off++; + + // from-bus off diagonal entries only there if in upper part + if (xidxt[i] > xidxf[i]) { + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + off++; + } + + // to-bus diagonal entries already defined + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + // off++; + + // to-bus off diagonal entries only there if in upper part + if (xidxf[i] > xidxt[i]) { + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + off++; + } + }); + + /* Loadloss contributions - two contributions*/ + if (opflow->include_loadloss_variables) { + int *l_xidx = loadparams->xidx_dev_; + int *l_hesssp_idx = loadparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + iHSS_dev[l_hesssp_idx[i]] = l_xidx[i]; + jHSS_dev[l_hesssp_idx[i]] = l_xidx[i]; + iHSS_dev[l_hesssp_idx[i] + 1] = l_xidx[i] + 1; + jHSS_dev[l_hesssp_idx[i] + 1] = l_xidx[i] + 1; + }); + } + + if (pbpolrajahiopsparse->idx_hess_dev_ == NULL) { + pbpolrajahiopsparse->idx_hess_dev_ = + (int *)d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); + } + + SortIndexes(opflow->nnz_hesssp, iHSS_dev, jHSS_dev, + pbpolrajahiopsparse->idx_hess_dev_); + + if (debugmsg) + PrintTriplets("Hessian Indexes (GPU):", opflow->nnz_hesssp, + pbpolrajahiopsparse->idx_hess_dev_, iHSS_dev, jHSS_dev, + NULL); + // Create arrays on host to store i,j, and val arrays - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - pbpolrajahiopsparse->i_hess = - (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); - pbpolrajahiopsparse->j_hess = - (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); - pbpolrajahiopsparse->val_hess = - (double *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(double))); + if (pbpolrajahiopsparse->i_hess == NULL) { + pbpolrajahiopsparse->i_hess = + (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); + pbpolrajahiopsparse->j_hess = + (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); + pbpolrajahiopsparse->val_hess = (double *)(h_allocator_.allocate( + opflow->nnz_hesssp * sizeof(double))); + } iRow = pbpolrajahiopsparse->i_hess; jCol = pbpolrajahiopsparse->j_hess; @@ -734,9 +1690,15 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( ierr = (*opflow->modelops.computehessian)( opflow, opflow->X, opflow->Lambdae, opflow->Lambdai, opflow->Hes); CHKERRQ(ierr); - ierr = MatGetSize(opflow->Hes, &nrow, &nrow); + ierr = MatGetSize(opflow->Hes, &nrow, &ncol); CHKERRQ(ierr); + if (debugmsg) + std::cout << "Official Hessian Size: " << nrow << " rows x " << ncol + << " cols" + << "(should be" << opflow->Nx << " x " << opflow->Nx << ")" + << std::endl; + /* Copy over locations to triplet format */ /* Note that HIOP requires a upper triangular Hessian as oppposed to IPOPT which requires a lower triangular Hessian @@ -762,8 +1724,1000 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // Copy over i_hess and j_hess arrays to device resmgr.copy(iHSS_dev, pbpolrajahiopsparse->i_hess); resmgr.copy(jHSS_dev, pbpolrajahiopsparse->j_hess); + + if (debugmsg) { + PrintTriplets("Hessian Indexes:", opflow->nnz_hesssp, NULL, iHSS_dev, + jHSS_dev, NULL); + } + } else { + resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp * sizeof(double)); + + // Bus contributions + + int *b_hesssp_idx = busparams->hesssp_idx_dev_; + int *b_gidx = busparams->gidx_dev_; + int *ispvpq = busparams->ispvpq_dev_; + double *gl = busparams->gl_dev_; + double *bl = busparams->bl_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + // int row, col; + double val; + // row = b_xidx[i] + 1 - nxsparse; + // col = row; + val = ispvpq[i] * (lambda_dev[b_gidx[i]] * 2 * gl[i] + + lambda_dev[b_gidx[i] + 1] * (-2 * bl[i])); + RAJA::atomicAdd(&MHSS_dev[b_hesssp_idx[i] + 2], + val); + }); + + if (opflow->objectivetype == MIN_GEN_COST) { + int *hesssp_idx = genparams->hesssp_idx_dev_; + double *cost_alpha = genparams->cost_alpha_dev_; + double obj_factor = opflow->obj_factor; + int isobj_gencost = opflow->obj_gencost; + double MVAbase = opflow->ps->MVAbase; + double weight = opflow->weight; + + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + MHSS_dev[hesssp_idx[i]] = weight * isobj_gencost * obj_factor * + 2.0 * cost_alpha[i] * MVAbase * MVAbase; + MHSS_dev[hesssp_idx[i] + 1] = 0.0; + }); + } else if (opflow->objectivetype == NO_OBJ) { + int *hesssp_idx = genparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + MHSS_dev[hesssp_idx[i]] = 0.0; + MHSS_dev[hesssp_idx[i] + 1] = 0.0; + }); + } + + // Line contributions + + double *Gff_arr = lineparams->Gff_dev_; + double *Gtt_arr = lineparams->Gtt_dev_; + double *Gft_arr = lineparams->Gft_dev_; + double *Gtf_arr = lineparams->Gtf_dev_; + + double *Bff_arr = lineparams->Bff_dev_; + double *Btt_arr = lineparams->Btt_dev_; + double *Bft_arr = lineparams->Bft_dev_; + double *Btf_arr = lineparams->Btf_dev_; + + int *busf_idx = lineparams->busf_idx_dev_; + int *bust_idx = lineparams->bust_idx_dev_; + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *geqidxf = lineparams->geqidxf_dev_; + int *geqidxt = lineparams->geqidxt_dev_; + int *ln_hessp_idx = lineparams->hesssp_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlineON), + RAJA_LAMBDA(RAJA::Index_type i) { + int gloc; + // int row[2], col[4]; + double val[8]; + double Gff, Bff, Gft, Bft, Gtf, Btf, Gtt, Btt; + int ibusf(busf_idx[i]), ibust(bust_idx[i]); + int fbusidx(ibusf), tbusidx(ibust); + Gff = Gff_arr[i]; + Bff = Bff_arr[i]; + Gft = Gft_arr[i]; + Bft = Bft_arr[i]; + Gtf = Gtf_arr[i]; + Btf = Btf_arr[i]; + Gtt = Gtt_arr[i]; + Btt = Btt_arr[i]; + + double thetaf = x_dev[xidxf[i]], Vmf = x_dev[xidxf[i] + 1]; + double thetat = x_dev[xidxt[i]], Vmt = x_dev[xidxt[i] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + + double dPf_dthetaf_dthetaf, dPf_dthetaf_dVmf, dPf_dthetaf_dthetat, + dPf_dthetaf_dVmt; + double dPf_dVmf_dthetaf, dPf_dVmf_dVmf, dPf_dVmf_dthetat, + dPf_dVmf_dVmt; + double dPf_dthetat_dthetaf, dPf_dthetat_dVmf, dPf_dthetat_dthetat, + dPf_dthetat_dVmt; + double dPf_dVmt_dthetaf, dPf_dVmt_dVmf, dPf_dVmt_dthetat, + dPf_dVmt_dVmt; + + /* dPf_dthetaf = Vmf*Vmt*(-Gft*sin(thetaft) + Bft*cos(thetaft)); */ + dPf_dthetaf_dthetaf = + -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetaf_dVmf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dthetaf_dthetat = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetaf_dVmt = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + + /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); + */ + dPf_dVmf_dthetaf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf_dVmf = 2 * Gff; + dPf_dVmf_dthetat = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmf_dVmt = (Gft * cos(thetaft) + Bft * sin(thetaft)); + + /* dPf_dthetat = Vmf*Vmt*(Gft*sin(thetaft) - Bft*cos(thetaft)); */ + dPf_dthetat_dthetaf = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetat_dVmf = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dthetat_dthetat = + Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); + dPf_dthetat_dVmt = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + + /* dPf_dVmt = Vmf*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ + dPf_dVmt_dthetaf = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmt_dVmf = (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dVmt_dthetat = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmt_dVmt = 0.0; + + double dQf_dthetaf_dthetaf, dQf_dthetaf_dVmf, dQf_dthetaf_dthetat, + dQf_dthetaf_dVmt; + double dQf_dVmf_dthetaf, dQf_dVmf_dVmf, dQf_dVmf_dthetat, + dQf_dVmf_dVmt; + double dQf_dthetat_dthetaf, dQf_dthetat_dVmf, dQf_dthetat_dthetat, + dQf_dthetat_dVmt; + double dQf_dVmt_dthetaf, dQf_dVmt_dVmf, dQf_dVmt_dthetat, + dQf_dVmt_dVmt; + + /* dQf_dthetaf = Vmf*Vmt*(Bft*sin(thetaft) + Gft*cos(thetaft)); */ + dQf_dthetaf_dthetaf = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + dQf_dthetaf_dVmf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dthetaf_dthetat = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetaf_dVmt = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + + /* dQf_dVmf = -2*Bff*Vmf + Vmt*(-Bft*cos(thetaft) + Gft*sin(thetaft)); + */ + dQf_dVmf_dthetaf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmf_dVmf = -2 * Bff; + dQf_dVmf_dthetat = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmf_dVmt = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + /* dQf_dthetat = Vmf*Vmt*(-Bft*sin(thetaft) - Gft*cos(thetaft)); */ + dQf_dthetat_dthetaf = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat_dVmf = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dthetat_dthetat = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + dQf_dthetat_dVmt = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + + /* dQf_dVmt = Vmf*(-Bft*cos(thetaft) + Gft*sin(thetaft)); */ + dQf_dVmt_dthetaf = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmt_dVmf = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dVmt_dthetat = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmt_dVmt = 0.0; + + // row[0] = xidxf[i] - nxsparse; + // row[1] = xidxf[i] + 1 - nxsparse; + // col[0] = xidxf[i] - nxsparse; + // col[1] = xidxf[i] + 1 - nxsparse; + // col[2] = xidxt[i] - nxsparse; + // col[3] = xidxt[i] + 1 - nxsparse; + + gloc = geqidxf[i]; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; + + val[0] = lambda_dev[gloc] * dPf_dthetaf_dthetaf + + lambda_dev[gloc + 1] * dQf_dthetaf_dthetaf; + val[1] = lambda_dev[gloc] * dPf_dthetaf_dVmf + + lambda_dev[gloc + 1] * dQf_dthetaf_dVmf; + val[2] = lambda_dev[gloc] * dPf_dthetaf_dthetat + + lambda_dev[gloc + 1] * dQf_dthetaf_dthetat; + val[3] = lambda_dev[gloc] * dPf_dthetaf_dVmt + + lambda_dev[gloc + 1] * dQf_dthetaf_dVmt; + + val[4] = lambda_dev[gloc] * dPf_dVmf_dthetaf + + lambda_dev[gloc + 1] * dQf_dVmf_dthetaf; + val[5] = lambda_dev[gloc] * dPf_dVmf_dVmf + + lambda_dev[gloc + 1] * dQf_dVmf_dVmf; + val[6] = lambda_dev[gloc] * dPf_dVmf_dthetat + + lambda_dev[gloc + 1] * dQf_dVmf_dthetat; + val[7] = lambda_dev[gloc] * dPf_dVmf_dVmt + + lambda_dev[gloc + 1] * dQf_dVmf_dVmt; + + // Remember central bus locations were reserved and indexed + // by bus (from-from) + + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[5]); + + // Off-center entries (from-to bus) were reserved and + // indexed by line only if in upper triangle + + if (xidxt[i] > xidxf[i]) { + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[3]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[6]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[7]); + } + + // row[0] = xidxt[i] - nxsparse; + // row[1] = xidxt[i] + 1 - nxsparse; + + // col[0] = xidxf[i] - nxsparse; + // col[1] = xidxf[i] + 1 - nxsparse; + // col[2] = xidxt[i] - nxsparse; + // col[3] = xidxt[i] + 1 - nxsparse; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; + + val[0] = lambda_dev[gloc] * dPf_dthetat_dthetaf + + lambda_dev[gloc + 1] * dQf_dthetat_dthetaf; + val[1] = lambda_dev[gloc] * dPf_dthetat_dVmf + + lambda_dev[gloc + 1] * dQf_dthetat_dVmf; + val[2] = lambda_dev[gloc] * dPf_dthetat_dthetat + + lambda_dev[gloc + 1] * dQf_dthetat_dthetat; + val[3] = lambda_dev[gloc] * dPf_dthetat_dVmt + + lambda_dev[gloc + 1] * dQf_dthetat_dVmt; + + val[4] = lambda_dev[gloc] * dPf_dVmt_dthetaf + + lambda_dev[gloc + 1] * dQf_dVmt_dthetaf; + val[5] = lambda_dev[gloc] * dPf_dVmt_dVmf + + lambda_dev[gloc + 1] * dQf_dVmt_dVmf; + val[6] = lambda_dev[gloc] * dPf_dVmt_dthetat + + lambda_dev[gloc + 1] * dQf_dVmt_dthetat; + val[7] = lambda_dev[gloc] * dPf_dVmt_dVmt + + lambda_dev[gloc + 1] * dQf_dVmt_dVmt; + + // Remember central bus locations were reserved and indexed + // by bus (to-to) + + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[2]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[3]); + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[6]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[7]); + + // Off-center entries (to-from bus) were reserved and + // indexed by line only if in upper triangle + + if (xidxf[i] > xidxt[i]) { + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[1]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[4]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[5]); + } + + // ierr = MatSetValues(H,2,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + + double dPt_dthetat_dthetat, dPt_dthetat_dVmt, dPt_dthetat_dthetaf, + dPt_dthetat_dVmf; + double dPt_dVmt_dthetat, dPt_dVmt_dVmt, dPt_dVmt_dthetaf, + dPt_dVmt_dVmf; + double dPt_dthetaf_dthetat, dPt_dthetaf_dVmt, dPt_dthetaf_dthetaf, + dPt_dthetaf_dVmf; + double dPt_dVmf_dthetat, dPt_dVmf_dVmt, dPt_dVmf_dthetaf, + dPt_dVmf_dVmf; + + /* dPt_dthetat = Vmf*Vmt*(-Gtf*sin(thetatf) + Btf*cos(thetatf)); */ + dPt_dthetat_dthetat = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + dPt_dthetat_dVmt = Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dthetat_dthetaf = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetat_dVmf = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + + /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); + */ + dPt_dVmt_dthetat = Vmf * (-Gtf * sin(thetatf) + Bft * cos(thetatf)); + dPt_dVmt_dVmt = 2 * Gtt; + dPt_dVmt_dthetaf = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmt_dVmf = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + /* dPt_dthetaf = Vmf*Vmt*(Gtf*sin(thetatf) - Btf*cos(thetatf)); */ + dPt_dthetaf_dthetat = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetaf_dVmt = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dthetaf_dthetaf = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + dPt_dthetaf_dVmf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + + /* dPt_dVmf = Vmt*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ + dPt_dVmf_dthetat = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmf_dVmt = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dVmf_dthetaf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmf_dVmf = 0.0; + + double dQt_dthetaf_dthetaf, dQt_dthetaf_dVmf, dQt_dthetaf_dthetat, + dQt_dthetaf_dVmt; + double dQt_dVmf_dthetaf, dQt_dVmf_dVmf, dQt_dVmf_dthetat, + dQt_dVmf_dVmt; + double dQt_dthetat_dthetaf, dQt_dthetat_dVmf, dQt_dthetat_dthetat, + dQt_dthetat_dVmt; + double dQt_dVmt_dthetaf, dQt_dVmt_dVmf, dQt_dVmt_dthetat, + dQt_dVmt_dVmt; + + /* dQt_dthetat = Vmf*Vmt*(Btf*sin(thetatf) + Gtf*cos(thetatf)); */ + dQt_dthetat_dthetat = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + dQt_dthetat_dVmt = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dthetat_dthetaf = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetat_dVmf = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + + /* dQt_dVmt = -2*Btt*Vmt + Vmf*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); + */ + dQt_dVmt_dthetat = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt_dVmt = -2 * Btt; + dQt_dVmt_dthetaf = Vmf * (-Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt_dVmf = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + /* dQt_dthetaf = Vmf*Vmt*(-Btf*sin(thetatf) - Gtf*cos(thetatf)); */ + dQt_dthetaf_dthetat = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf_dVmt = Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dthetaf_dthetaf = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + dQt_dthetaf_dVmf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + + /* dQt_dVmf = Vmt*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); */ + dQt_dVmf_dthetat = Vmt * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmf_dVmt = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dVmf_dthetaf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dVmf_dVmf = 0.0; + + // row[0] = xidxt[i] - nxsparse; + // row[1] = xidxt[i] + 1 - nxsparse; + // col[0] = xidxt[i] - nxsparse; + // col[1] = xidxt[i] + 1 - nxsparse; + // col[2] = xidxf[i] - nxsparse; + // col[3] = xidxf[i] + 1 - nxsparse; + + gloc = geqidxt[i]; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; + + val[0] = lambda_dev[gloc] * dPt_dthetat_dthetat + + lambda_dev[gloc + 1] * dQt_dthetat_dthetat; + val[1] = lambda_dev[gloc] * dPt_dthetat_dVmt + + lambda_dev[gloc + 1] * dQt_dthetat_dVmt; + val[2] = lambda_dev[gloc] * dPt_dthetat_dthetaf + + lambda_dev[gloc + 1] * dQt_dthetat_dthetaf; + val[3] = lambda_dev[gloc] * dPt_dthetat_dVmf + + lambda_dev[gloc + 1] * dQt_dthetat_dVmf; + + val[4] = lambda_dev[gloc] * dPt_dVmt_dthetat + + lambda_dev[gloc + 1] * dQt_dVmt_dthetat; + val[5] = lambda_dev[gloc] * dPt_dVmt_dVmt + + lambda_dev[gloc + 1] * dQt_dVmt_dVmt; + val[6] = lambda_dev[gloc] * dPt_dVmt_dthetaf + + lambda_dev[gloc + 1] * dQt_dVmt_dthetaf; + val[7] = lambda_dev[gloc] * dPt_dVmt_dVmf + + lambda_dev[gloc + 1] * dQt_dVmt_dVmf; + + // to-to diagonal bus entries + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[0]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[1]); + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[4]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[5]); + + // off-center to-from entries + if (xidxf[i] > xidxt[i]) { + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[3]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[6]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[7]); + } + + // row[0] = xidxf[i] - nxsparse; + // row[1] = xidxf[i] + 1 - nxsparse; + // col[0] = xidxt[i] - nxsparse; + // col[1] = xidxt[i] + 1 - nxsparse; + // col[2] = xidxf[i] - nxsparse; + // col[3] = xidxf[i] + 1 - nxsparse; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; + + val[0] = lambda_dev[gloc] * dPt_dthetaf_dthetat + + lambda_dev[gloc + 1] * dQt_dthetaf_dthetat; + val[1] = lambda_dev[gloc] * dPt_dthetaf_dVmt + + lambda_dev[gloc + 1] * dQt_dthetaf_dVmt; + val[2] = lambda_dev[gloc] * dPt_dthetaf_dthetaf + + lambda_dev[gloc + 1] * dQt_dthetaf_dthetaf; + val[3] = lambda_dev[gloc] * dPt_dthetaf_dVmf + + lambda_dev[gloc + 1] * dQt_dthetaf_dVmf; + + val[4] = lambda_dev[gloc] * dPt_dVmf_dthetat + + lambda_dev[gloc + 1] * dQt_dVmf_dthetat; + val[5] = lambda_dev[gloc] * dPt_dVmf_dVmt + + lambda_dev[gloc + 1] * dQt_dVmf_dVmt; + val[6] = lambda_dev[gloc] * dPt_dVmf_dthetaf + + lambda_dev[gloc + 1] * dQt_dVmf_dthetaf; + val[7] = lambda_dev[gloc] * dPt_dVmf_dVmf + + lambda_dev[gloc + 1] * dQt_dVmf_dVmf; + + // from-from bus entries + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[7]); + + // off-center from-to entries + if (xidxt[i] > xidxf[i]) { + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[1]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[4]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[5]); + } + }); + + /* Loadloss contributions - 2 contributions expected */ + if (opflow->include_loadloss_variables) { + int *l_hesssp_idx = loadparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + MHSS_dev[l_hesssp_idx[i]] = 0.0; + MHSS_dev[l_hesssp_idx[i] + 1] = 0.0; + }); + } + + if (!opflow->ignore_lineflow_constraints) { + int *linelimidx = lineparams->linelimidx_dev_; + int *gineqidx = lineparams->gineqidx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int j = linelimidx[i]; + int gloc; + // int row[2], col[4]; + double val[8]; + + double Pf, Qf, Pt, Qt; + double thetaf = x_dev[xidxf[j]], Vmf = x_dev[xidxf[j] + 1]; + double thetat = x_dev[xidxt[j]], Vmt = x_dev[xidxt[j] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + double Gff = Gff_arr[j], Bff = Bff_arr[j]; + double Gft = Gft_arr[j], Bft = Bft_arr[j]; + double Gtf = Gtf_arr[j], Btf = Btf_arr[j]; + double Gtt = Gtt_arr[j], Btt = Btt_arr[j]; + int fbusidx(busf_idx[j]), tbusidx(bust_idx[j]); + + Pf = Gff * Vmf * Vmf + + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Qf = -Bff * Vmf * Vmf + + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + Pt = Gtt * Vmt * Vmt + + Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Qt = -Btt * Vmt * Vmt + + Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + double dSf2_dPf, dSf2_dQf, dSt2_dPt, dSt2_dQt; + + dSf2_dPf = 2. * Pf; + dSf2_dQf = 2. * Qf; + dSt2_dPt = 2. * Pt; + dSt2_dQt = 2. * Qt; + + double dPf_dthetaf, dPf_dVmf, dPf_dthetat, dPf_dVmt; + double dQf_dthetaf, dQf_dVmf, dQf_dthetat, dQf_dVmt; + double dPt_dthetaf, dPt_dVmf, dPt_dthetat, dPt_dVmt; + double dQt_dthetaf, dQt_dVmf, dQt_dthetat, dQt_dVmt; + + dPf_dthetaf = + Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf = 2. * Gff * Vmf + + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetat = Vmf * Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmt = Vmf * (Gft * cos(thetaft) + Bft * sin(thetaft)); + + dQf_dthetaf = Vmf * Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmf = -2. * Bff * Vmf + + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat = + Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmt = Vmf * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + dPt_dthetat = + Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmt = 2. * Gtt * Vmt + + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetaf = Vmt * Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmf = Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + dQt_dthetat = Vmt * Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt = -2. * Btt * Vmt + + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf = + Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dVmf = Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + double d2Pf_dthetaf_dthetaf, d2Pf_dthetaf_dVmf, + d2Pf_dthetaf_dthetat, d2Pf_dthetaf_dVmt; + double d2Pf_dVmf_dthetaf, d2Pf_dVmf_dVmf, d2Pf_dVmf_dthetat, + d2Pf_dVmf_dVmt; + double d2Pf_dthetat_dthetaf, d2Pf_dthetat_dVmf, + d2Pf_dthetat_dthetat, d2Pf_dthetat_dVmt; + double d2Pf_dVmt_dthetaf, d2Pf_dVmt_dVmf, d2Pf_dVmt_dthetat, + d2Pf_dVmt_dVmt; + + /* dPf_dthetaf = Vmf*Vmt*(-Gft*sin(thetaft) + Bft*cos(thetaft)); */ + d2Pf_dthetaf_dthetaf = + -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetaf_dVmf = + Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + d2Pf_dthetaf_dthetat = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetaf_dVmt = + Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + + /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); + */ + d2Pf_dVmf_dthetaf = + Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + d2Pf_dVmf_dVmf = 2 * Gff; + d2Pf_dVmf_dthetat = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + d2Pf_dVmf_dVmt = (Gft * cos(thetaft) + Bft * sin(thetaft)); + + /* dPf_dthetat = Vmf*Vmt*(Gft*sin(thetaft) - Bft*cos(thetaft)); */ + d2Pf_dthetat_dthetaf = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetat_dVmf = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + d2Pf_dthetat_dthetat = + Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); + d2Pf_dthetat_dVmt = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + + /* dPf_dVmt = Vmf*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ + d2Pf_dVmt_dthetaf = + Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + d2Pf_dVmt_dVmf = (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dVmt_dthetat = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + d2Pf_dVmt_dVmt = 0.0; + + double d2Qf_dthetaf_dthetaf, d2Qf_dthetaf_dVmf, + d2Qf_dthetaf_dthetat, d2Qf_dthetaf_dVmt; + double d2Qf_dVmf_dthetaf, d2Qf_dVmf_dVmf, d2Qf_dVmf_dthetat, + d2Qf_dVmf_dVmt; + double d2Qf_dthetat_dthetaf, d2Qf_dthetat_dVmf, + d2Qf_dthetat_dthetat, d2Qf_dthetat_dVmt; + double d2Qf_dVmt_dthetaf, d2Qf_dVmt_dVmf, d2Qf_dVmt_dthetat, + d2Qf_dVmt_dVmt; + + /* dQf_dthetaf = Vmf*Vmt*(Bft*sin(thetaft) + Gft*cos(thetaft)); */ + d2Qf_dthetaf_dthetaf = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + d2Qf_dthetaf_dVmf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + d2Qf_dthetaf_dthetat = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + d2Qf_dthetaf_dVmt = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + + /* dQf_dVmf = -2*Bff*Vmf + Vmt*(-Bft*cos(thetaft) + + * Gft*sin(thetaft)); + */ + d2Qf_dVmf_dthetaf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + d2Qf_dVmf_dVmf = -2 * Bff; + d2Qf_dVmf_dthetat = + Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dVmf_dVmt = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + /* dQf_dthetat = Vmf*Vmt*(-Bft*sin(thetaft) - Gft*cos(thetaft)); */ + d2Qf_dthetat_dthetaf = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + d2Qf_dthetat_dVmf = + Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dthetat_dthetat = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + d2Qf_dthetat_dVmt = + Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + + /* dQf_dVmt = Vmf*(-Bft*cos(thetaft) + Gft*sin(thetaft)); */ + d2Qf_dVmt_dthetaf = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + d2Qf_dVmt_dVmf = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + d2Qf_dVmt_dthetat = + Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dVmt_dVmt = 0.0; + + double d2Pt_dthetat_dthetat, d2Pt_dthetat_dVmt, + d2Pt_dthetat_dthetaf, d2Pt_dthetat_dVmf; + double d2Pt_dVmt_dthetat, d2Pt_dVmt_dVmt, d2Pt_dVmt_dthetaf, + d2Pt_dVmt_dVmf; + double d2Pt_dthetaf_dthetat, d2Pt_dthetaf_dVmt, + d2Pt_dthetaf_dthetaf, d2Pt_dthetaf_dVmf; + double d2Pt_dVmf_dthetat, d2Pt_dVmf_dVmt, d2Pt_dVmf_dthetaf, + d2Pt_dVmf_dVmf; + + /* dPt_dthetat = Vmf*Vmt*(-Gtf*sin(thetatf) + Btf*cos(thetatf)); */ + d2Pt_dthetat_dthetat = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + d2Pt_dthetat_dVmt = + Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + d2Pt_dthetat_dthetaf = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + d2Pt_dthetat_dVmf = + Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + + /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); + */ + d2Pt_dVmt_dthetat = + Vmf * (-Gtf * sin(thetatf) + Bft * cos(thetatf)); + d2Pt_dVmt_dVmt = 2 * Gtt; + d2Pt_dVmt_dthetaf = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + d2Pt_dVmt_dVmf = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + /* dPt_dthetaf = Vmf*Vmt*(Gtf*sin(thetatf) - Btf*cos(thetatf)); */ + d2Pt_dthetaf_dthetat = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + d2Pt_dthetaf_dVmt = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + d2Pt_dthetaf_dthetaf = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + d2Pt_dthetaf_dVmf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + + /* dPt_dVmf = Vmt*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ + d2Pt_dVmf_dthetat = + Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + d2Pt_dVmf_dVmt = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + d2Pt_dVmf_dthetaf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + d2Pt_dVmf_dVmf = 0.0; + + double d2Qt_dthetaf_dthetaf, d2Qt_dthetaf_dVmf, + d2Qt_dthetaf_dthetat, d2Qt_dthetaf_dVmt; + double d2Qt_dVmf_dthetaf, d2Qt_dVmf_dVmf, d2Qt_dVmf_dthetat, + d2Qt_dVmf_dVmt; + double d2Qt_dthetat_dthetaf, d2Qt_dthetat_dVmf, + d2Qt_dthetat_dthetat, d2Qt_dthetat_dVmt; + double d2Qt_dVmt_dthetaf, d2Qt_dVmt_dVmf, d2Qt_dVmt_dthetat, + d2Qt_dVmt_dVmt; + + /* dQt_dthetat = Vmf*Vmt*(Btf*sin(thetatf) + Gtf*cos(thetatf)); */ + d2Qt_dthetat_dthetat = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + d2Qt_dthetat_dVmt = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dthetat_dthetaf = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + d2Qt_dthetat_dVmf = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + + /* dQt_dVmt = -2*Btt*Vmt + Vmf*(-Btf*cos(thetatf) + + * Gtf*sin(thetatf)); + */ + d2Qt_dVmt_dthetat = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dVmt_dVmt = -2 * Btt; + d2Qt_dVmt_dthetaf = + Vmf * (-Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dVmt_dVmf = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + /* dQt_dthetaf = Vmf*Vmt*(-Btf*sin(thetatf) - Gtf*cos(thetatf)); */ + d2Qt_dthetaf_dthetat = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + d2Qt_dthetaf_dVmt = + Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + d2Qt_dthetaf_dthetaf = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + d2Qt_dthetaf_dVmf = + Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + + /* dQt_dVmf = Vmt*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); */ + d2Qt_dVmf_dthetat = Vmt * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dVmf_dVmt = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + d2Qt_dVmf_dthetaf = + Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + d2Qt_dVmf_dVmf = 0.0; + + double d2Sf2_dthetaf_dthetaf = 0.0, d2Sf2_dthetaf_dVmf = 0.0, + d2Sf2_dthetaf_dthetat = 0.0, d2Sf2_dthetaf_dVmt = 0.0; + double d2St2_dthetaf_dthetaf = 0.0, d2St2_dthetaf_dVmf = 0.0, + d2St2_dthetaf_dthetat = 0.0, d2St2_dthetaf_dVmt = 0.0; + + d2Sf2_dthetaf_dthetaf = 2 * dPf_dthetaf * dPf_dthetaf + + dSf2_dPf * d2Pf_dthetaf_dthetaf + + 2 * dQf_dthetaf * dQf_dthetaf + + dSf2_dQf * d2Qf_dthetaf_dthetaf; + d2Sf2_dthetaf_dVmf = + 2 * dPf_dVmf * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmf + + 2 * dQf_dVmf * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmf; + d2Sf2_dthetaf_dthetat = 2 * dPf_dthetat * dPf_dthetaf + + dSf2_dPf * d2Pf_dthetaf_dthetat + + 2 * dQf_dthetat * dQf_dthetaf + + dSf2_dQf * d2Qf_dthetaf_dthetat; + d2Sf2_dthetaf_dVmt = + 2 * dPf_dVmt * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmt + + 2 * dQf_dVmt * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmt; + + d2St2_dthetaf_dthetaf = 2 * dPt_dthetaf * dPt_dthetaf + + dSt2_dPt * d2Pt_dthetaf_dthetaf + + 2 * dQt_dthetaf * dQt_dthetaf + + dSt2_dQt * d2Qt_dthetaf_dthetaf; + d2St2_dthetaf_dVmf = + 2 * dPt_dVmf * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmf + + 2 * dQt_dVmf * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmf; + d2St2_dthetaf_dthetat = 2 * dPt_dthetat * dPt_dthetaf + + dSt2_dPt * d2Pt_dthetaf_dthetat + + 2 * dQt_dthetat * dQt_dthetaf + + dSt2_dQt * d2Qt_dthetaf_dthetat; + d2St2_dthetaf_dVmt = + 2 * dPt_dVmt * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmt + + 2 * dQt_dVmt * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + + // row[0] = xidxf[j] - nxsparse; + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + gloc = gineqidx[i]; + + val[0] = lambda_dev[gloc] * d2Sf2_dthetaf_dthetaf + + lambda_dev[gloc + 1] * d2St2_dthetaf_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dthetaf_dVmf + + lambda_dev[gloc + 1] * d2St2_dthetaf_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dthetaf_dthetat + + lambda_dev[gloc + 1] * d2St2_dthetaf_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dthetaf_dVmt + + lambda_dev[gloc + 1] * d2St2_dthetaf_dVmt; + + RAJA::atomicAdd(&MHSS_dev[fbusidx + 0], val[0]); + RAJA::atomicAdd(&MHSS_dev[fbusidx + 1], val[1]); + + if (xidxt[j] > xidxf[j]) { + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 0], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 1], + val[3]); + } + + double d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, + d2Sf2_dVmf_dVmt; + double d2St2_dVmf_dthetaf, d2St2_dVmf_dVmf, d2St2_dVmf_dthetat, + d2St2_dVmf_dVmt; + + d2Sf2_dVmf_dthetaf = + 2 * dPf_dthetaf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetaf + + 2 * dQf_dthetaf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetaf; + d2Sf2_dVmf_dVmf = + 2 * dPf_dVmf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmf + + 2 * dQf_dVmf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmf; + d2Sf2_dVmf_dthetat = + 2 * dPf_dthetat * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetat + + 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetat; + d2Sf2_dVmf_dVmt = + 2 * dPf_dVmt * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmt + + 2 * dQf_dVmt * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmt; + + d2St2_dVmf_dthetaf = + 2 * dPt_dthetaf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetaf + + 2 * dQt_dthetaf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetaf; + d2St2_dVmf_dVmf = + 2 * dPt_dVmf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmf + + 2 * dQt_dVmf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmf; + d2St2_dVmf_dthetat = + 2 * dPt_dthetat * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetat + + 2 * dQt_dthetat * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetat; + d2St2_dVmf_dVmt = + 2 * dPt_dVmt * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmt + + 2 * dQt_dVmt * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + + // row[0] = xidxf[j] + 1 - nxsparse; + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + val[0] = lambda_dev[gloc] * d2Sf2_dVmf_dthetaf + + lambda_dev[gloc + 1] * d2St2_dVmf_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dVmf_dVmf + + lambda_dev[gloc + 1] * d2St2_dVmf_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dVmf_dthetat + + lambda_dev[gloc + 1] * d2St2_dVmf_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dVmf_dVmt + + lambda_dev[gloc + 1] * d2St2_dVmf_dVmt; + + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[fbusidx + 2], val[0]); + RAJA::atomicAdd(&MHSS_dev[fbusidx + 2], val[1]); + + if (xidxt[j] > xidxf[j]) { + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 2], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 3], + val[3]); + } + + // ierr = + // MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + + double d2Sf2_dthetat_dthetaf, d2Sf2_dthetat_dVmf, + d2Sf2_dthetat_dthetat, d2Sf2_dthetat_dVmt; + double d2St2_dthetat_dthetaf, d2St2_dthetat_dVmf, + d2St2_dthetat_dthetat, d2St2_dthetat_dVmt; + + d2Sf2_dthetat_dthetaf = 2 * dPf_dthetaf * dPf_dthetat + + dSf2_dPf * d2Pf_dthetat_dthetaf + + 2 * dQf_dthetat * dQf_dthetaf + + dSf2_dQf * d2Qf_dthetat_dthetaf; + d2Sf2_dthetat_dVmf = + 2 * dPf_dVmf * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmf + + 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dthetat_dVmf; + d2Sf2_dthetat_dthetat = 2 * dPf_dthetat * dPf_dthetat + + dSf2_dPf * d2Pf_dthetat_dthetat + + 2 * dQf_dthetat * dQf_dthetat + + dSf2_dQf * d2Qf_dthetat_dthetat; + d2Sf2_dthetat_dVmt = + 2 * dPf_dVmt * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmt + + 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dthetat_dVmt; + + d2St2_dthetat_dthetaf = 2 * dPt_dthetaf * dPt_dthetat + + dSt2_dPt * d2Pt_dthetat_dthetaf + + 2 * dQt_dthetaf * dQt_dthetat + + dSt2_dQt * d2Qt_dthetat_dthetaf; + d2St2_dthetat_dVmf = + 2 * dPt_dVmf * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmf + + 2 * dQt_dVmf * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmf; + d2St2_dthetat_dthetat = 2 * dPt_dthetat * dPt_dthetat + + dSt2_dPt * d2Pt_dthetat_dthetat + + 2 * dQt_dthetat * dQt_dthetat + + dSt2_dQt * d2Qt_dthetat_dthetat; + d2St2_dthetat_dVmt = + 2 * dPt_dVmt * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmt + + 2 * dQt_dVmt * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + + // row[0] = xidxt[j] - nxsparse; + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + val[0] = lambda_dev[gloc] * d2Sf2_dthetat_dthetaf + + lambda_dev[gloc + 1] * d2St2_dthetat_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dthetat_dVmf + + lambda_dev[gloc + 1] * d2St2_dthetat_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dthetat_dthetat + + lambda_dev[gloc + 1] * d2St2_dthetat_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dthetat_dVmt + + lambda_dev[gloc + 1] * d2St2_dthetat_dVmt; + + RAJA::atomicAdd(&MHSS_dev[tbusidx + 0], val[2]); + RAJA::atomicAdd(&MHSS_dev[tbusidx + 1], val[3]); + + if (xidxf[j] > xidxt[j]) { + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 0], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 1], + val[1]); + } + // ierr = + // MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + + double d2Sf2_dVmt_dthetaf, d2Sf2_dVmt_dVmf, d2Sf2_dVmt_dthetat, + d2Sf2_dVmt_dVmt; + double d2St2_dVmt_dthetaf, d2St2_dVmt_dVmf, d2St2_dVmt_dthetat, + d2St2_dVmt_dVmt; + + d2Sf2_dVmt_dthetaf = + 2 * dPf_dthetaf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetaf + + 2 * dQf_dthetaf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetaf; + d2Sf2_dVmt_dVmf = + 2 * dPf_dVmf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmf + + 2 * dQf_dVmf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmf; + d2Sf2_dVmt_dthetat = + 2 * dPf_dthetat * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetat + + 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetat; + d2Sf2_dVmt_dVmt = + 2 * dPf_dVmt * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmt + + 2 * dQf_dVmt * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmt; + + d2St2_dVmt_dthetaf = + 2 * dPt_dthetaf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetaf + + 2 * dQt_dthetaf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetaf; + d2St2_dVmt_dVmf = + 2 * dPt_dVmf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmf + + 2 * dQt_dVmf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmf; + d2St2_dVmt_dthetat = + 2 * dPt_dthetat * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetat + + 2 * dQt_dthetat * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetat; + d2St2_dVmt_dVmt = + 2 * dPt_dVmt * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmt + + 2 * dQt_dVmt * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + + // row[0] = xidxt[j] + 1 - nxsparse; + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + val[0] = lambda_dev[gloc] * d2Sf2_dVmt_dthetaf + + lambda_dev[gloc + 1] * d2St2_dVmt_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dVmt_dVmf + + lambda_dev[gloc + 1] * d2St2_dVmt_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dVmt_dthetat + + lambda_dev[gloc + 1] * d2St2_dVmt_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dVmt_dVmt + + lambda_dev[gloc + 1] * d2St2_dVmt_dVmt; + + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[tbusidx + 2], val[2]); + RAJA::atomicAdd(&MHSS_dev[tbusidx + 2], val[3]); + + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 2], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 3], + val[1]); + }); + } + + int *iperm = pbpolrajahiopsparse->idx_hess_dev_; + + int *ipermout = + (int *)d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); + resmgr.copy(ipermout, iperm); + + RAJA::stable_sort_pairs( + RAJA::make_span(ipermout, opflow->nnz_hesssp), + RAJA::make_span(MHSS_dev, opflow->nnz_hesssp), + RAJA::operators::less{}); + + if (debugmsg) { + PrintTriplets("Hessian Values (GPU):", opflow->nnz_hesssp, NULL, iHSS_dev, + jHSS_dev, MHSS_dev); + } + + d_allocator_.deallocate(ipermout); + + resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp * sizeof(double)); + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -841,6 +2795,11 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // Copy over val_ineq to device resmgr.copy(MHSS_dev, pbpolrajahiopsparse->val_hess); + + if (debugmsg) { + PrintTriplets("Hessian Values:", opflow->nnz_hesssp, NULL, iHSS_dev, + jHSS_dev, MHSS_dev); + } } PetscFunctionReturn(0); diff --git a/src/opflow/model/power_bal_polar/pbpol.cpp b/src/opflow/model/power_bal_polar/pbpol.cpp index fbf41c3d0..1c6c05c5f 100644 --- a/src/opflow/model/power_bal_polar/pbpol.cpp +++ b/src/opflow/model/power_bal_polar/pbpol.cpp @@ -1,3 +1,6 @@ +#include +#include + #include "pbpol.h" #include "exago_config.h" #include @@ -1611,6 +1614,24 @@ PetscErrorCode OPFLOWModelSetNumConstraints_PBPOL(OPFLOW opflow, PetscFunctionReturn(0); } +static PetscErrorCode MatSetValues_and_Print(char code, Mat M, int nrow, + int row[], int ncol, int col[], + PetscScalar val[], + InsertMode mode) { + for (int r = 0, i = 0; r < nrow; ++r) { + for (int c = 0; c < ncol; ++c) { + if (col[c] >= row[r]) { + std::cout << "M" << code << ": " << std::setw(5) << std::right << row[r] + << " " << std::setw(5) << std::right << col[c] + << std::setw(12) << std::right << std::scientific + << std::setprecision(3) << val[i] << std::endl; + } + i++; + } + } + return MatSetValues(M, nrow, row, ncol, col, val, mode); +} + /* OPFLOWComputeEqualityConstraintsHessian - Computes the Hessian for the equality constraints function part @@ -1662,7 +1683,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, col[0] = xloc + 1; val[0] = lambda[gloc] * 2 * bus->gl + lambda[gloc + 1] * (-2 * bus->bl); - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('B', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); ierr = PSBUSGetSupportingLines(bus, &nconnlines, &connlines); @@ -1806,7 +1828,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPf_dVmf_dVmt + lambda[gloc + 1] * dQf_dVmf_dVmt; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xloct; @@ -1837,7 +1860,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPf_dVmt_dVmt + lambda[gloc + 1] * dQf_dVmt_dVmt; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); } else { @@ -1945,7 +1969,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPt_dVmt_dVmf + lambda[gloc + 1] * dQt_dVmt_dVmf; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocf; @@ -1976,7 +2001,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPt_dVmf_dVmf + lambda[gloc + 1] * dQt_dVmf_dVmf; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2064,20 +2090,26 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[1] = -lambda[gloc] - lambda[gloc + 1]; val[2] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); - ierr = MatSetValues(H, 1, row, 3, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 3, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 3, col, val, ADD_VALUES); // df1_ddelPg = -(Pg - gen->pt); // df2_ddelPg = gen->pb - Pg; row[0] = gen->startxpdevloc; val[0] = -lambda[gloc] - lambda[gloc + 1]; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); // df1_ddelP = gen->apf*(Pg - gen->pt); // df2_ddelP = -gen->apf*(gen->pb - Pg); row[0] = ps->startxloc; val[0] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2113,14 +2145,18 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[0] = -( lambda[gloc] + lambda[gloc + 1]); // lam_eq1*d2eq1_dQg_dV + lam_eq2*d2eq2_dQg_dV - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xloc + 1; col[0] = loc + 1; val[0] = -( lambda[gloc] + lambda[gloc + 1]); // lam_eq1* d2eq1_dQg_dV + lam_eq2*d2eq2_dV_dQg - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2406,7 +2442,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[3] = lambda[gloc] * d2Sf2_dthetaf_dVmt + lambda[gloc + 1] * d2St2_dthetaf_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, @@ -2450,7 +2487,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc + 1] * d2St2_dVmf_dthetat; val[3] = lambda[gloc] * d2Sf2_dVmf_dVmt + lambda[gloc + 1] * d2St2_dVmf_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dthetat_dthetaf, d2Sf2_dthetat_dVmf, @@ -2500,7 +2538,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[3] = lambda[gloc] * d2Sf2_dthetat_dVmt + lambda[gloc + 1] * d2St2_dthetat_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dVmt_dthetaf, d2Sf2_dVmt_dVmf, d2Sf2_dVmt_dthetat, @@ -2546,7 +2585,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[3] = lambda[gloc] * d2Sf2_dVmt_dVmt + lambda[gloc + 1] * d2St2_dVmt_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); // Must be inside for loop since there's a continue condition flps += (185 + (16 * EXAGO_FLOPS_SINOP) + (16 * EXAGO_FLOPS_COSOP)); @@ -2608,14 +2648,16 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, col[0] = xlocglob; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('I', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocglob + 1; col[0] = xlocglob + 1; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('I', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } @@ -2633,7 +2675,17 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, val[0] = weight * obj_factor * 2.0 * gen->cost_alpha * ps->MVAbase * ps->MVAbase; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); + CHKERRQ(ierr); + + // Reactive power is usually not included in the objective, + // but let's make sure there's an entry for it + row[0] = xlocglob + 1; + col[0] = xlocglob + 1; + val[0] = 0.0; + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); flps += 4; } else if (opflow->objectivetype == MIN_GENSETPOINT_DEVIATION) { @@ -2641,9 +2693,18 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, row[0] = xlocglob; col[0] = xlocglob; val[0] = weight * obj_factor * 2.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); + // Reactive power is usually not included in the objective, + // but let's make sure there's an entry for it + row[0] = xlocglob + 1; + col[0] = xlocglob + 1; + val[0] = 0.0; + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); + CHKERRQ(ierr); flps += 1; } } @@ -2659,13 +2720,15 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, row[0] = xlocglob; col[0] = xlocglob; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('l', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocglob + 1; col[0] = xlocglob + 1; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('l', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } diff --git a/tpl/pybind11 b/tpl/pybind11 index 6c7720856..59aa99860 160000 --- a/tpl/pybind11 +++ b/tpl/pybind11 @@ -1 +1 @@ -Subproject commit 6c77208561f23c255bf73e3d76674a6a9496179f +Subproject commit 59aa99860c60bd171b9565e9920f125fdb749267 diff --git a/tpl/spack b/tpl/spack index 16bc58ea4..b85c31f94 160000 --- a/tpl/spack +++ b/tpl/spack @@ -1 +1 @@ -Subproject commit 16bc58ea49256b061c7308565fe41c446e748881 +Subproject commit b85c31f946e4845d3d6efcc161bc50032f197174