diff --git a/src/ModelPredictiveControl.jl b/src/ModelPredictiveControl.jl index adefdfef..70c0e360 100644 --- a/src/ModelPredictiveControl.jl +++ b/src/ModelPredictiveControl.jl @@ -8,8 +8,10 @@ using RecipesBase using ProgressLogging using DifferentiationInterface: ADTypes.AbstractADType, AutoForwardDiff, AutoSparse -using DifferentiationInterface: gradient!, jacobian!, prepare_gradient, prepare_jacobian +using DifferentiationInterface: prepare_gradient, prepare_jacobian, prepare_hessian +using DifferentiationInterface: gradient!, jacobian!, hessian! using DifferentiationInterface: value_and_gradient!, value_and_jacobian! +using DifferentiationInterface: value_gradient_and_hessian! using DifferentiationInterface: Constant, Cache using SparseConnectivityTracer: TracerSparsityDetector using SparseMatrixColorings: GreedyColoringAlgorithm, sparsity_pattern diff --git a/src/controller/execute.jl b/src/controller/execute.jl index a3925111..a72f7d74 100644 --- a/src/controller/execute.jl +++ b/src/controller/execute.jl @@ -405,7 +405,7 @@ function optim_objective!(mpc::PredictiveController{NT}) where {NT<:Real} MOIU.reset_optimizer(optim) JuMP.optimize!(optim) else - rethrow(err) + rethrow() end end if !issolved(optim) @@ -488,7 +488,7 @@ Call `periodsleep(mpc.estim.model)`. periodsleep(mpc::PredictiveController, busywait=false) = periodsleep(mpc.estim.model, busywait) """ - setstate!(mpc::PredictiveController, x̂, P̂=nothing) -> mpc + setstate!(mpc::PredictiveController, x̂[, P̂]) -> mpc Call [`setstate!`](@ref) on `mpc.estim` [`StateEstimator`](@ref). """ diff --git a/src/controller/nonlinmpc.jl b/src/controller/nonlinmpc.jl index 0fc28102..fb3d3619 100644 --- a/src/controller/nonlinmpc.jl +++ b/src/controller/nonlinmpc.jl @@ -13,8 +13,9 @@ struct NonLinMPC{ SE<:StateEstimator, TM<:TranscriptionMethod, JM<:JuMP.GenericModel, - GB<:AbstractADType, - JB<:AbstractADType, + GB<:Union{Nothing, AbstractADType}, + HB<:Union{Nothing, AbstractADType}, + JB<:AbstractADType, PT<:Any, JEfunc<:Function, GCfunc<:Function @@ -26,6 +27,7 @@ struct NonLinMPC{ optim::JM con::ControllerConstraint{NT, GCfunc} gradient::GB + hessian ::HB jacobian::JB Z̃::Vector{NT} ŷ::Vector{NT} @@ -63,13 +65,14 @@ struct NonLinMPC{ function NonLinMPC{NT}( estim::SE, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt, JE::JEfunc, gc!::GCfunc, nc, p::PT, - transcription::TM, optim::JM, gradient::GB, jacobian::JB + transcription::TM, optim::JM, gradient::GB, hessian::HB, jacobian::JB, ) where { NT<:Real, SE<:StateEstimator, TM<:TranscriptionMethod, JM<:JuMP.GenericModel, - GB<:AbstractADType, + GB<:Union{Nothing, AbstractADType}, + HB<:Union{Nothing, AbstractADType}, JB<:AbstractADType, PT<:Any, JEfunc<:Function, @@ -107,9 +110,9 @@ struct NonLinMPC{ nZ̃ = get_nZ(estim, transcription, Hp, Hc) + nϵ Z̃ = zeros(NT, nZ̃) buffer = PredictiveControllerBuffer(estim, transcription, Hp, Hc, nϵ) - mpc = new{NT, SE, TM, JM, GB, JB, PT, JEfunc, GCfunc}( + mpc = new{NT, SE, TM, JM, GB, HB, JB, PT, JEfunc, GCfunc}( estim, transcription, optim, con, - gradient, jacobian, + gradient, hessian, jacobian, Z̃, ŷ, Hp, Hc, nϵ, weights, @@ -202,10 +205,14 @@ This controller allocates memory at each time step for the optimization. - `transcription=SingleShooting()` : a [`TranscriptionMethod`](@ref) for the optimization. - `optim=JuMP.Model(Ipopt.Optimizer)` : nonlinear optimizer used in the predictive controller, provided as a [`JuMP.Model`](@extref) object (default to [`Ipopt`](https://github.com/jump-dev/Ipopt.jl) optimizer). -- `gradient=AutoForwardDiff()` : an `AbstractADType` backend for the gradient of the objective - function, see [`DifferentiationInterface` doc](@extref DifferentiationInterface List). +- `hessian=nothing` : an `AbstractADType` backend for the Hessian of the objective function + (see [`DifferentiationInterface` doc](@extref DifferentiationInterface List)), or + `nothing` for the LBFGS approximation provided by `optim` (details in Extended Help). +- `gradient=isnothing(hessian) ? AutoForwardDiff() : nothing` : an `AbstractADType` backend + for the gradient of the objective function (see `hessian` for the options), or `nothing` + to retrieve lower-order derivatives from `hessian`. - `jacobian=default_jacobian(transcription)` : an `AbstractADType` backend for the Jacobian - of the nonlinear constraints, see `gradient` above for the options (default in Extended Help). + of the nonlinear constraints (see `hessian` for the options, defaults in Extended Help). - additional keyword arguments are passed to [`UnscentedKalmanFilter`](@ref) constructor (or [`SteadyKalmanFilter`](@ref), for [`LinModel`](@ref)). @@ -259,14 +266,17 @@ NonLinMPC controller with a sample time Ts = 10.0 s, Ipopt optimizer, UnscentedK exception: if `transcription` is not a [`SingleShooting`](@ref), the `jacobian` argument defaults to this [sparse backend](@extref DifferentiationInterface AutoSparse-object): ```julia - AutoSparse( + sparseAD = AutoSparse( AutoForwardDiff(); sparsity_detector = TracerSparsityDetector(), coloring_algorithm = GreedyColoringAlgorithm() ) ``` - Optimizers generally benefit from exact derivatives like AD. However, the [`NonLinModel`](@ref) - state-space functions must be compatible with this feature. See [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) + Also, the `hessian` argument defaults to `nothing` meaning the LBFGS approximation of + `optim`. Otherwise, a sparse backend like above is recommended to test a different + `hessian` method. In general, optimizers benefit from exact derivatives like AD. + However, the [`NonLinModel`](@ref) state-space functions must be compatible with this + feature. See [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) for common mistakes when writing these functions. Note that if `Cwt≠Inf`, the attribute `nlp_scaling_max_gradient` of `Ipopt` is set to @@ -291,7 +301,8 @@ function NonLinMPC( p = model.p, transcription::TranscriptionMethod = DEFAULT_NONLINMPC_TRANSCRIPTION, optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), - gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, + hessian ::Union{Nothing, AbstractADType} = nothing, + gradient::Union{Nothing, AbstractADType} = isnothing(hessian) ? DEFAULT_NONLINMPC_GRADIENT : nothing, jacobian::AbstractADType = default_jacobian(transcription), kwargs... ) @@ -299,7 +310,7 @@ function NonLinMPC( return NonLinMPC( estim; Hp, Hc, Mwt, Nwt, Lwt, Cwt, Ewt, JE, gc, nc, p, M_Hp, N_Hc, L_Hp, - transcription, optim, gradient, jacobian + transcription, optim, gradient, hessian, jacobian ) end @@ -322,7 +333,8 @@ function NonLinMPC( p = model.p, transcription::TranscriptionMethod = DEFAULT_NONLINMPC_TRANSCRIPTION, optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), - gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, + hessian ::Union{Nothing, AbstractADType} = nothing, + gradient::Union{Nothing, AbstractADType} = isnothing(hessian) ? DEFAULT_NONLINMPC_GRADIENT : nothing, jacobian::AbstractADType = default_jacobian(transcription), kwargs... ) @@ -330,7 +342,7 @@ function NonLinMPC( return NonLinMPC( estim; Hp, Hc, Mwt, Nwt, Lwt, Cwt, Ewt, JE, gc, nc, p, M_Hp, N_Hc, L_Hp, - transcription, optim, gradient, jacobian + transcription, optim, gradient, hessian, jacobian ) end @@ -377,7 +389,8 @@ function NonLinMPC( p = estim.model.p, transcription::TranscriptionMethod = DEFAULT_NONLINMPC_TRANSCRIPTION, optim::JuMP.GenericModel = JuMP.Model(DEFAULT_NONLINMPC_OPTIMIZER, add_bridges=false), - gradient::AbstractADType = DEFAULT_NONLINMPC_GRADIENT, + hessian ::Union{Nothing, AbstractADType} = nothing, + gradient::Union{Nothing, AbstractADType} = isnothing(hessian) ? DEFAULT_NONLINMPC_GRADIENT : nothing, jacobian::AbstractADType = default_jacobian(transcription), ) where { NT<:Real, @@ -392,7 +405,7 @@ function NonLinMPC( gc! = get_mutating_gc(NT, gc) return NonLinMPC{NT}( estim, Hp, Hc, M_Hp, N_Hc, L_Hp, Cwt, Ewt, JE, gc!, nc, p, - transcription, optim, gradient, jacobian + transcription, optim, gradient, hessian, jacobian ) end @@ -540,12 +553,12 @@ function init_optimization!(mpc::NonLinMPC, model::SimModel, optim::JuMP.Generic JuMP.set_attribute(optim, "nlp_scaling_max_gradient", 10.0/C) end end - Jfunc, ∇Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! = get_optim_functions( - mpc, optim - ) - @operator(optim, J, nZ̃, Jfunc, ∇Jfunc!) + validate_backends(mpc.gradient, mpc.hessian) + J_args, g_vec_args, geq_vec_args = get_optim_functions(mpc, optim) + #display(J_args) + @operator(optim, J, nZ̃, J_args...) @objective(optim, Min, J(Z̃var...)) - init_nonlincon!(mpc, model, transcription, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs!) + init_nonlincon!(mpc, model, transcription, g_vec_args, geq_vec_args) set_nonlincon!(mpc, model, transcription, optim) return nothing end @@ -553,14 +566,15 @@ end """ get_optim_functions( mpc::NonLinMPC, optim::JuMP.GenericModel - ) -> Jfunc, ∇Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! + ) -> J_args, g_vec_args, geq_vec_args Return the functions for the nonlinear optimization of `mpc` [`NonLinMPC`](@ref) controller. - -Return the nonlinear objective `Jfunc` function, and `∇Jfunc!`, to compute its gradient. -Also return vectors with the nonlinear inequality constraint functions `gfuncs`, and -`∇gfuncs!`, for the associated gradients. Lastly, also return vectors with the nonlinear -equality constraint functions `geqfuncs` and gradients `∇geqfuncs!`. + +Return the tuple `J_args` containing the functions to compute the objective function +value and its derivatives. Also return the tuple `g_vec_args` containing 2 vectors of +functions to compute the nonlinear inequality values and associated gradients. Lastly, also +return `geq_vec_args` containing 2 vectors of functions to compute the nonlinear equality +values and associated gradients. This method is really intricate and I'm not proud of it. That's because of 3 elements: @@ -579,7 +593,7 @@ Inspired from: [User-defined operators with vector outputs](@extref JuMP User-de function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT<:Real # ----------- common cache for Jfunc, gfuncs and geqfuncs ---------------------------- model = mpc.estim.model - grad, jac = mpc.gradient, mpc.jacobian + grad, hess, jac = mpc.gradient, mpc.hessian, mpc.jacobian nu, ny, nx̂, nϵ, nk = model.nu, model.ny, mpc.estim.nx̂, mpc.nϵ, model.nk Hp, Hc = mpc.Hp, mpc.Hc ng, nc, neq = length(mpc.con.i_g), mpc.con.nc, mpc.con.neq @@ -601,61 +615,86 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) return obj_nonlinprog!(Ŷ0, U0, mpc, model, Ue, Ŷe, ΔŨ) end - Z̃_∇J = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇J_context = ( + Z̃_J = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call + context_J = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(g), Cache(geq), ) - ∇J_prep = prepare_gradient(Jfunc!, grad, Z̃_∇J, ∇J_context...; strict) - ∇J = Vector{JNT}(undef, nZ̃) - function update_objective!(J, ∇J, Z̃, Z̃arg) - if isdifferent(Z̃arg, Z̃) - Z̃ .= Z̃arg - J[], _ = value_and_gradient!(Jfunc!, ∇J, ∇J_prep, grad, Z̃_∇J, ∇J_context...) - end - end + if !isnothing(grad) + prep_∇J = prepare_gradient(Jfunc!, grad, Z̃_J, context_J...; strict) + else + prep_∇J = nothing + end + if !isnothing(hess) + prep_∇²J = prepare_hessian(Jfunc!, hess, Z̃_J, context_J...; strict) + display(sparsity_pattern(prep_∇²J)) + else + prep_∇²J = nothing + end + ∇J = Vector{JNT}(undef, nZ̃) + ∇²J = init_diffmat(JNT, hess, prep_∇²J, nZ̃, nZ̃) function Jfunc(Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_∇J, Z̃arg) + update_memoized_diff!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) return J[]::T end - ∇Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): + ∇Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): function (Z̃arg) - update_objective!(J, ∇J, Z̃_∇J, Z̃arg) + update_memoized_diff!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) return ∇J[begin] end - else # multivariate syntax (see JuMP.@operator doc): + else # multivariate syntax (see JuMP.@operator doc): function (∇Jarg::AbstractVector{T}, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_objective!(J, ∇J, Z̃_∇J, Z̃arg) + update_memoized_diff!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) return ∇Jarg .= ∇J end end + ∇²Jfunc! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): + function (Z̃arg) + update_memoized_diff!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) + return ∇²J[begin, begin] + end + else # multivariate syntax (see JuMP.@operator doc): + function (∇²Jarg::AbstractMatrix{T}, Z̃arg::Vararg{T, N}) where {N, T<:Real} + print("d") + update_memoized_diff!( + Z̃_J, J, ∇J, ∇²J, prep_∇J, prep_∇²J, context_J, grad, hess, Jfunc!, Z̃arg + ) + for i in 1:N, j in 1:i + ∇²Jarg[i, j] = ∇²J[i, j] + end + return ∇²Jarg + end + end + J_args = isnothing(hess) ? (Jfunc, ∇Jfunc!) : (Jfunc, ∇Jfunc!, ∇²Jfunc!) # --------------------- inequality constraint functions ------------------------------- function gfunc!(g, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, geq) update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) return g end - Z̃_∇g = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇g_context = ( + Z̃_g = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call + context_g = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(geq), ) # temporarily enable all the inequality constraints for sparsity detection: mpc.con.i_g[1:end-nc] .= true - ∇g_prep = prepare_jacobian(gfunc!, g, jac, Z̃_∇g, ∇g_context...; strict) + prep_∇g = prepare_jacobian(gfunc!, g, jac, Z̃_g, context_g...; strict) mpc.con.i_g[1:end-nc] .= false - ∇g = init_diffmat(JNT, jac, ∇g_prep, nZ̃, ng) - function update_con!(g, ∇g, Z̃, Z̃arg) - if isdifferent(Z̃arg, Z̃) - Z̃ .= Z̃arg - value_and_jacobian!(gfunc!, g, ∇g, ∇g_prep, jac, Z̃, ∇g_context...) - end - end + ∇g = init_diffmat(JNT, jac, prep_∇g, nZ̃, ng) gfuncs = Vector{Function}(undef, ng) for i in eachindex(gfuncs) gfunc_i = function (Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con!(g, ∇g, Z̃_∇g, Z̃arg) + update_memoized_diff!(Z̃_g, g, ∇g, prep_∇g, context_g, jac, gfunc!, Z̃arg) return g[i]::T end gfuncs[i] = gfunc_i @@ -664,40 +703,37 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT for i in eachindex(∇gfuncs!) ∇gfuncs_i! = if nZ̃ == 1 # univariate syntax (see JuMP.@operator doc): function (Z̃arg::T) where T<:Real - update_con!(g, ∇g, Z̃_∇g, Z̃arg) + update_memoized_diff!(Z̃_g, g, ∇g, prep_∇g, context_g, jac, gfunc!, Z̃arg) return ∇g[i, begin] end - else # multivariate syntax (see JuMP.@operator doc): + else # multivariate syntax (see JuMP.@operator doc): function (∇g_i, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con!(g, ∇g, Z̃_∇g, Z̃arg) + update_memoized_diff!(Z̃_g, g, ∇g, prep_∇g, context_g, jac, gfunc!, Z̃arg) return ∇g_i .= @views ∇g[i, :] end end ∇gfuncs![i] = ∇gfuncs_i! end + g_vec_args = (gfuncs, ∇gfuncs!) # --------------------- equality constraint functions --------------------------------- function geqfunc!(geq, Z̃, ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g) update_predictions!(ΔŨ, x̂0end, Ue, Ŷe, U0, Ŷ0, Û0, K0, X̂0, gc, g, geq, mpc, Z̃) return geq end - Z̃_∇geq = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call - ∇geq_context = ( + Z̃_geq = fill(myNaN, nZ̃) # NaN to force update_predictions! at first call + context_geq = ( Cache(ΔŨ), Cache(x̂0end), Cache(Ue), Cache(Ŷe), Cache(U0), Cache(Ŷ0), Cache(Û0), Cache(K0), Cache(X̂0), Cache(gc), Cache(g) ) - ∇geq_prep = prepare_jacobian(geqfunc!, geq, jac, Z̃_∇geq, ∇geq_context...; strict) - ∇geq = init_diffmat(JNT, jac, ∇geq_prep, nZ̃, neq) - function update_con_eq!(geq, ∇geq, Z̃, Z̃arg) - if isdifferent(Z̃arg, Z̃) - Z̃ .= Z̃arg - value_and_jacobian!(geqfunc!, geq, ∇geq, ∇geq_prep, jac, Z̃, ∇geq_context...) - end - end + prep_∇geq = prepare_jacobian(geqfunc!, geq, jac, Z̃_geq, context_geq...; strict) + ∇geq = init_diffmat(JNT, jac, prep_∇geq, nZ̃, neq) geqfuncs = Vector{Function}(undef, neq) for i in eachindex(geqfuncs) geqfunc_i = function (Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con_eq!(geq, ∇geq, Z̃_∇geq, Z̃arg) + update_memoized_diff!( + Z̃_geq, geq, ∇geq, prep_∇geq, context_geq, jac, geqfunc!, Z̃arg + ) return geq[i]::T end geqfuncs[i] = geqfunc_i @@ -708,12 +744,15 @@ function get_optim_functions(mpc::NonLinMPC, ::JuMP.GenericModel{JNT}) where JNT # constraints imply MultipleShooting, thus input increment ΔU and state X̂0 in Z̃: ∇geqfuncs_i! = function (∇geq_i, Z̃arg::Vararg{T, N}) where {N, T<:Real} - update_con_eq!(geq, ∇geq, Z̃_∇geq, Z̃arg) + update_memoized_diff!( + Z̃_geq, geq, ∇geq, prep_∇geq, context_geq, jac, geqfunc!, Z̃arg + ) return ∇geq_i .= @views ∇geq[i, :] end ∇geqfuncs![i] = ∇geqfuncs_i! end - return Jfunc, ∇Jfunc!, gfuncs, ∇gfuncs!, geqfuncs, ∇geqfuncs! + geq_vec_args = (geqfuncs, ∇geqfuncs!) + return J_args, g_vec_args, geq_vec_args end """ diff --git a/src/controller/transcription.jl b/src/controller/transcription.jl index b737092f..6c0b69d2 100644 --- a/src/controller/transcription.jl +++ b/src/controller/transcription.jl @@ -604,21 +604,18 @@ end """ init_nonlincon!( - mpc::PredictiveController, model::LinModel, transcription::TranscriptionMethod, - gfuncs , ∇gfuncs!, - geqfuncs, ∇geqfuncs! - ) + mpc::PredictiveController, ::LinModel, ::TranscriptionMethod, g_vec_args, geq_vec_args + ) -> nothing Init nonlinear constraints for [`LinModel`](@ref) for all [`TranscriptionMethod`](@ref). The only nonlinear constraints are the custom inequality constraints `gc`. """ function init_nonlincon!( - mpc::PredictiveController, ::LinModel, ::TranscriptionMethod, - gfuncs, ∇gfuncs!, - _ , _ + mpc::PredictiveController, ::LinModel, ::TranscriptionMethod, g_vec_args, _ ) optim, con = mpc.optim, mpc.con + gfuncs, ∇gfuncs! = g_vec_args nZ̃ = length(mpc.Z̃) if length(con.i_g) ≠ 0 i_base = 0 @@ -634,10 +631,8 @@ end """ init_nonlincon!( - mpc::PredictiveController, model::NonLinModel, transcription::MultipleShooting, - gfuncs, ∇gfuncs!, - geqfuncs, ∇geqfuncs! - ) + mpc::PredictiveController, ::NonLinModel, ::MultipleShooting, g_vec_args, geq_vec_args + ) -> nothing Init nonlinear constraints for [`NonLinModel`](@ref) and [`MultipleShooting`](@ref). @@ -645,11 +640,11 @@ The nonlinear constraints are the output prediction `Ŷ` bounds, the custom ine constraints `gc` and all the nonlinear equality constraints `geq`. """ function init_nonlincon!( - mpc::PredictiveController, ::NonLinModel, ::MultipleShooting, - gfuncs, ∇gfuncs!, - geqfuncs, ∇geqfuncs! + mpc::PredictiveController, ::NonLinModel, ::MultipleShooting, g_vec_args, geq_vec_args ) optim, con = mpc.optim, mpc.con + gfuncs , ∇gfuncs! = g_vec_args + geqfuncs, ∇geqfuncs! = geq_vec_args ny, nx̂, Hp, nZ̃ = mpc.estim.model.ny, mpc.estim.nx̂, mpc.Hp, length(mpc.Z̃) # --- nonlinear inequality constraints --- if length(con.i_g) ≠ 0 @@ -691,10 +686,8 @@ end """ init_nonlincon!( - mpc::PredictiveController, model::NonLinModel, ::SingleShooting, - gfuncs, ∇gfuncs!, - geqfuncs, ∇geqfuncs! - ) + mpc::PredictiveController, ::NonLinModel, ::SingleShooting, g_vec_args, geq_vec_args + ) -> nothing Init nonlinear constraints for [`NonLinModel`](@ref) and [`SingleShooting`](@ref). @@ -702,9 +695,10 @@ The nonlinear constraints are the custom inequality constraints `gc`, the output prediction `Ŷ` bounds and the terminal state `x̂end` bounds. """ function init_nonlincon!( - mpc::PredictiveController, ::NonLinModel, ::SingleShooting, gfuncs, ∇gfuncs!, _ , _ + mpc::PredictiveController, ::NonLinModel, ::SingleShooting, g_vec_args, _ ) optim, con = mpc.optim, mpc.con + gfuncs, ∇gfuncs! = g_vec_args ny, nx̂, Hp, nZ̃ = mpc.estim.model.ny, mpc.estim.nx̂, mpc.Hp, length(mpc.Z̃) if length(con.i_g) ≠ 0 i_base = 0 diff --git a/src/estimator/execute.jl b/src/estimator/execute.jl index 23408995..98b63c9e 100644 --- a/src/estimator/execute.jl +++ b/src/estimator/execute.jl @@ -330,7 +330,7 @@ function validate_args(estim::StateEstimator, ym, d, u=nothing) end """ - setstate!(estim::StateEstimator, x̂, P̂=nothing) -> estim + setstate!(estim::StateEstimator, x̂[, P̂]) -> estim Set `estim.x̂0` to `x̂ - estim.x̂op` from the argument `x̂`, and `estim.P̂` to `P̂` if applicable. diff --git a/src/estimator/mhe/construct.jl b/src/estimator/mhe/construct.jl index 4c6ebb71..beba35cb 100644 --- a/src/estimator/mhe/construct.jl +++ b/src/estimator/mhe/construct.jl @@ -250,33 +250,37 @@ transcription for now. - `model::SimModel` : (deterministic) model for the estimations. - `He=nothing` : estimation horizon ``H_e``, must be specified. - `i_ym=1:model.ny` : `model` output indices that are measured ``\mathbf{y^m}``, the rest - are unmeasured ``\mathbf{y^u}``. + are unmeasured ``\mathbf{y^u}``. - `σP_0=fill(1/model.nx,model.nx)` or *`sigmaP_0`* : main diagonal of the initial estimate - covariance ``\mathbf{P}(0)``, specified as a standard deviation vector. + covariance ``\mathbf{P}(0)``, specified as a standard deviation vector. - `σQ=fill(1/model.nx,model.nx)` or *`sigmaQ`* : main diagonal of the process noise - covariance ``\mathbf{Q}`` of `model`, specified as a standard deviation vector. + covariance ``\mathbf{Q}`` of `model`, specified as a standard deviation vector. - `σR=fill(1,length(i_ym))` or *`sigmaR`* : main diagonal of the sensor noise covariance - ``\mathbf{R}`` of `model` measured outputs, specified as a standard deviation vector. + ``\mathbf{R}`` of `model` measured outputs, specified as a standard deviation vector. - `nint_u=0`: integrator quantity for the stochastic model of the unmeasured disturbances at - the manipulated inputs (vector), use `nint_u=0` for no integrator (see Extended Help). + the manipulated inputs (vector), use `nint_u=0` for no integrator (see Extended Help). - `nint_ym=default_nint(model,i_ym,nint_u)` : same than `nint_u` but for the unmeasured - disturbances at the measured outputs, use `nint_ym=0` for no integrator (see Extended Help). + disturbances at the measured outputs, use `nint_ym=0` for no integrator (see Extended Help). - `σQint_u=fill(1,sum(nint_u))` or *`sigmaQint_u`* : same than `σQ` but for the unmeasured - disturbances at manipulated inputs ``\mathbf{Q_{int_u}}`` (composed of integrators). + disturbances at manipulated inputs ``\mathbf{Q_{int_u}}`` (composed of integrators). - `σPint_u_0=fill(1,sum(nint_u))` or *`sigmaPint_u_0`* : same than `σP_0` but for the unmeasured - disturbances at manipulated inputs ``\mathbf{P_{int_u}}(0)`` (composed of integrators). + disturbances at manipulated inputs ``\mathbf{P_{int_u}}(0)`` (composed of integrators). - `σQint_ym=fill(1,sum(nint_ym))` or *`sigmaQint_u`* : same than `σQ` for the unmeasured - disturbances at measured outputs ``\mathbf{Q_{int_{ym}}}`` (composed of integrators). + disturbances at measured outputs ``\mathbf{Q_{int_{ym}}}`` (composed of integrators). - `σPint_ym_0=fill(1,sum(nint_ym))` or *`sigmaPint_ym_0`* : same than `σP_0` but for the unmeasured - disturbances at measured outputs ``\mathbf{P_{int_{ym}}}(0)`` (composed of integrators). + disturbances at measured outputs ``\mathbf{P_{int_{ym}}}(0)`` (composed of integrators). - `Cwt=Inf` : slack variable weight ``C``, default to `Inf` meaning hard constraints only. - `optim=default_optim_mhe(model)` : a [`JuMP.Model`](@extref) object with a quadratic or nonlinear optimizer for solving (default to [`Ipopt`](https://github.com/jump-dev/Ipopt.jl), or [`OSQP`](https://osqp.org/docs/parsers/jump.html) if `model` is a [`LinModel`](@ref)). -- `gradient=AutoForwardDiff()` : an `AbstractADType` backend for the gradient of the objective - function when `model` is not a [`LinModel`](@ref), see [`DifferentiationInterface` doc](@extref DifferentiationInterface List). +- `hessian=nothing` : an `AbstractADType` backend for the Hessian of the objective function + when `model` is not a [`LinModel`](@ref) (see [`DifferentiationInterface` doc](@extref DifferentiationInterface List)), + or `nothing` for the LBFGS approximation provided by `optim` (details in Extended Help). +- `gradient=isnothing(hessian) ? AutoForwardDiff() : nothing` : an `AbstractADType` backend + for the gradient of the objective function when `model` is not a [`LinModel`](@ref) (see + `hessian` for the options), or `nothing` to retrieve lower-order derivatives from `hessian`. - `jacobian=AutoForwardDiff()` : an `AbstractADType` backend for the Jacobian of the - constraints when `model` is not a [`LinModel`](@ref), see `gradient` above for the options. + constraints when `model` is not a [`LinModel`](@ref) (see `hessian` for the options). - `direct=true`: construct with a direct transmission from ``\mathbf{y^m}`` (a.k.a. current estimator, in opposition to the delayed/predictor form). @@ -359,7 +363,9 @@ MovingHorizonEstimator estimator with a sample time Ts = 10.0 s, Ipopt optimizer default, a [`KalmanFilter`](@ref) estimates the arrival covariance (customizable). - Else, a nonlinear program with dense [`ForwardDiff`](@extref ForwardDiff) automatic differentiation (AD) compute the objective and constraint derivatives by default - (customizable). Optimizers generally benefit from exact derivatives like AD. However, + (customizable). The `hessian` argument defaults the LBFGS approximation of `optim`, + but see [`NonLinMPC`](@ref) extended help for a sparse backend that would be otherwise + recommended. Optimizers generally benefit from exact derivatives like AD. However, the `f` and `h` functions must be compatible with this feature. See the [`JuMP` documentation](@extref JuMP Common-mistakes-when-writing-a-user-defined-operator) for common mistakes when writing these functions. Also, an [`UnscentedKalmanFilter`](@ref) diff --git a/src/general.jl b/src/general.jl index 90a411db..0bb2fc28 100644 --- a/src/general.jl +++ b/src/general.jl @@ -51,14 +51,97 @@ function limit_solve_time(optim::GenericModel, Ts) if isa(err, MOI.UnsupportedAttribute{MOI.TimeLimitSec}) @warn "Solving time limit is not supported by the optimizer." else - rethrow(err) + rethrow() end end end +"Verify that provided 1st and 2nd order differentiation backends are possible and efficient." +validate_backends(firstOrder::AbstractADType, secondOrder::Nothing) = nothing +validate_backends(firstOrder::Nothing, secondOrder::AbstractADType) = nothing +function validate_backends(firstOrder::AbstractADType, secondOrder::AbstractADType) + @warn( + """ + Two AbstractADType backends were provided for the 1st and 2nd order differentiations, + meaning that 1st order derivatives will be computed twice. Use nothing for the 1st + order backend to retrieve results from the hessian backend, which is more efficient. + """ + ) + return nothing +end +function validate_backends(firstOrder::Nothing, secondOrder::Nothing) + throw(ArgumentError("1st and 2nd order differentiation backends cannot be both nothing.")) +end + "Init a differentiation result matrix as dense or sparse matrix, as required by `backend`." init_diffmat(T, backend::AbstractADType, _ , nx , ny) = Matrix{T}(undef, ny, nx) init_diffmat(T, backend::AutoSparse ,prep , _ , _ ) = similar(sparsity_pattern(prep), T) +init_diffmat(T, backend::Nothing , _ , nx , ny) = Matrix{T}(undef, ny, nx) + +""" + update_memoized_diff!( + x, y, ∇f, ∇²f, prep_∇f, prep_∇²f, context, + gradient::AbstractADType, hessian::Nothing, f!, xarg + ) -> nothing + +Update `f!` value `y` and and its gradient `∇f` in-place if `x ≠ xarg`. + +The method mutates all the arguments before `gradient`. This function is used for the +memoization of the `f!` function derivatives, to avoid redundant computations with the +splatting syntax of `JuMP.@operator`. +""" +function update_memoized_diff!( + x, y, ∇f, _ , prep_∇f, _ , context, + gradient::AbstractADType, hessian::Nothing, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # more efficient than individual f! and gradient! calls: + y[], _ = value_and_gradient!(f!, ∇f, prep_∇f, gradient, x, context...) + end + return nothing +end + +"Also update the Hessian `∇²f` if `hessian isa AbstractADType` and `isnothing(gradient)`." +function update_memoized_diff!( + x, y, ∇f, ∇²f, _ , prep_∇²f, context, + gradient::Nothing, hessian::AbstractADType, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # more efficient than individual f!, gradient! and hessian! calls: + y[], _ = value_gradient_and_hessian!(f!, ∇f, ∇²f, prep_∇²f, hessian, x, context...) + end + return nothing +end + +"Update `∇f` and `∇²f` individually if both backends are `AbstractADType`." +function update_memoized_diff!( + x, y, ∇f, ∇²f, prep_∇f, prep_∇²f, context, + gradient::AbstractADType, hessian::AbstractADType, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # inefficient, as warned by validate_backends(), but still possible: + hessian!(f!, ∇²f, prep_∇²f, hessian, x, context...) + y[], _ = value_and_gradient!(f!, ∇f, prep_∇f, gradient, x, context...) + end + return nothing +end + +""" + update_memoized_diff!(x, y, ∇f, prep_∇f, context, jacobian::AbstractADType, f!, xarg) + +Update `f!` value `y` (vector) and and its jacobian `∇f` in-place if `x ≠ xarg`. + +This method mutates all the arguments before `jacobian`. +""" +function update_memoized_diff!( + x, y, ∇f, prep_∇f, context, jacobian::AbstractADType, f!::F, xarg +) where F <: Function + if isdifferent(xarg, x) + x .= xarg # more efficient than individual f! and jacobian! calls: + value_and_jacobian!(f!, y, ∇f, prep_∇f, jacobian, x, context...) + end + return nothing +end "Verify that x and y elements are different using `!==`." isdifferent(x, y) = any(xi !== yi for (xi, yi) in zip(x, y)) diff --git a/test/2_test_state_estim.jl b/test/2_test_state_estim.jl index c1cb8e76..082e0c45 100644 --- a/test/2_test_state_estim.jl +++ b/test/2_test_state_estim.jl @@ -1364,7 +1364,7 @@ end X̂_mhe = zeros(4, 6) X̂_kf = zeros(4, 6) for i in 1:6 - y = [50,31] #+ randn(2) + y = [50,31] + randn(2) x̂_mhe = preparestate!(mhe, y, [25]) x̂_kf = preparestate!(kf, y, [25]) X̂_mhe[:,i] = x̂_mhe