diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl new file mode 100644 index 0000000..51af36d --- /dev/null +++ b/examples/nestedduals.jl @@ -0,0 +1,78 @@ +### +# +# IDEA: Can we have a DualVector inside a DualVector in order to +# achieve 'nested dual numbers' thereby achieving higher order derivatives? +# +# BACKGROUND: +# +# Consider a number with two dual parts, ϵ and ϵ' such that ϵϵ' ≠ 0. +# We can represent such a number as follows: +# +# a + bϵ + cϵ' + dϵϵ' +# +# Consider an arbitrary function f: ℝ → ℝ given by f(x) = x^n. +# We can evaluate f at the above dual number as follows: +# +# f(a + bϵ + cϵ' + dϵϵ') = +# a^n + n a^(n-1) b ϵ + n a^(n-1) c ϵ' + (n a^(n-1) d + n(n-1) a^(n-2) b c) ϵϵ' +# +# Fix a ∈ ℝ. We observe that if we fix b = 1, c = 1, d = 0, we have that the +# coefficient of ϵϵ' is equivalent to f''(a). Thus using dual numbers like this +# Gives us a way to compute second derivatives. +# +# We now generalise to the vector case. Consider vectors of dual parts ϵ and ϵ' +# Such that ϵᵢϵ'ⱼ ≠ 0 for all i, j. A dual vector in these can be written as +# +# (a + Bϵ) + (C + Dϵ)ϵ' +# +# Where a is a value of reals, B is the matrix of coefficients of ϵ, +# C is the coefficients of ϵ' and D is the 3-tensor of coefficients of ϵϵ', +# (one dimension for each of a, ϵ and ϵ'. This is expanded by applying D to ϵ +# to first obtain a matrix via tensor contraction, and then applying this to ϵ') +# +# Analogously, if we fix B = I, C = I, D = 0 and apply a function f: ℝⁿ → ℝᵐ +# the coefficient of ϵϵ' will be an n x n x m tensor T such that T[:, :, i] +# is the Hessian of the i-th result of f. More simply, if we consider a +# function f: ℝⁿ → ℝ, the coefficient of ϵϵ' will be the Hessian of f. +# +# In DualArrays.jl, this can be achieved by setting up a DualVector with a DualVector +# value and DualMatrix jacobian. Applying f: ℝⁿ → ℝ will give us a Dual with a Dual +# value and a DualVector vector partials. The jacobian of the DualVector partials +# is our Hessian. +# +# We now implement an example using f(x) = x[1] * x[2] below. + +using DualArrays +f(x) = x[1] * x[2] + +# a + Bϵ +x = DualVector([2, 3], [1 0; 0 1]) + +# C + Dϵ +y = DualMatrix([1 0;0 1], zeros(2,2,2)) + +# Setup the nested dual vector (a + Bϵ) + (C + Dϵ)ϵ' +z = DualVector(x, y) + +# This will return a Dual number: +# a + b^Tϵ + (c + Dϵ)ϵ' +result = f(z) + +# equivalent to a +value = result.value.value +# equivalent to b +gradient = result.value.partials +# equivalent to c +gradient2 = result.partials.value +# equivalent to d +hessian = result.partials.jacobian.data + +# We expect this to be 6 +print("Value: ", value, "\n") +# We expect this to be [3, 2] +print("Gradient: ", gradient, "\n") +# We expect this to be the same +print("Gradient: ", gradient2, "\n") +# We expect this to be [0 1; 1 0] +print("Hessian: ", hessian, "\n") + diff --git a/src/DualArrays.jl b/src/DualArrays.jl index b639121..a5d2230 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -11,7 +11,7 @@ Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, DualMatrix, Dual, jacobian +export DualVector, DualMatrix, Dual, jacobian, ArrayOperator import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims diff --git a/src/arithmetic.jl b/src/arithmetic.jl index fee37b4..dc6d16e 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -1,5 +1,8 @@ # Arithmetic operations for DualArrays.jl +# Tensor transpose +transpose(t::ArrayOperator{N, M}) where {N, M} = ArrayOperator{M}(transpose(t.data)) + """ Addition/Subtraction of DualVectors. """ @@ -59,6 +62,16 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) jac = $p2.(x, y.value) .* y.jacobian return DualVector(val, jac) end + @eval function broadcasted(::typeof($f), x::DualVector, y::AbstractVector) + val = $f.(x.value, y) + jac = $p1.(x.value, y) .* x.jacobian + return DualVector(val, jac) + end + @eval function broadcasted(::typeof($f), x::AbstractVector, y::DualVector) + val = $f.(x, y.value) + jac = $p2.(x, y.value) .* y.jacobian + return DualVector(val, jac) + end @eval function broadcasted(::typeof($f), x::DualVector, y::Dual) val = $f.(x.value, y.value) # Product rule @@ -77,6 +90,16 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) jac = $p1.(x.value, y.value) .* x.jacobian .+ $p2.(x.value, y.value) .* y.jacobian return DualVector(val, jac) end + @eval function broadcasted(::typeof($f), x::Dual, y::AbstractVector) + val = $f.(x.value, y) + jac = $p1.(x.value, y) .* transpose(x.partials) + return DualVector(val, jac) + end + @eval function broadcasted(::typeof($f), x::AbstractVector, y::Dual) + val = $f.(x, y.value) + jac = $p2.(x, y.value) .* transpose(y.partials) + return DualVector(val, jac) + end # Must have Base.$f in order not to import everything @eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) * x.partials + $p2(x.value, y.value) * y.partials) @eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) * x.partials) diff --git a/src/indexing.jl b/src/indexing.jl index 16ed280..0ceb7fc 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -2,9 +2,13 @@ using ArrayLayouts, FillArrays, LinearAlgebra, SparseArrays + sparse_getindex(a...) = layout_getindex(a...) + +# TODO: should we move these? sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) +sparse_getindex(d::DualArray, args...) = d[args...] # We need a system of indexing that takes two tuples @@ -61,6 +65,24 @@ function getindex(x::DualVector, y::UnitRange) DualVector(newval, newjac) end +function getindex(x::DualMatrix, y::Int, ::Colon) + newval = x.value[y, :] + newjac = x.jacobian[(y,:), :] + DualVector(newval, newjac) +end + +function getindex(x::DualMatrix, ::Colon, y::Int) + newval = x.value[:, y] + newjac = x.jacobian[(:,y), :] + DualVector(newval, newjac) +end + +function getindex(x::DualMatrix, y::Vararg{Union{Colon, UnitRange}}) + newval = x.value[y...] + newjac = x.jacobian[y, :] + DualMatrix(newval, newjac) +end + # DualArray array interface for op in (:size, :axes) @eval $op(a::DualArray) = $op(a.value) diff --git a/src/types.jl b/src/types.jl index d9c32fe..fb516a0 100644 --- a/src/types.jl +++ b/src/types.jl @@ -48,7 +48,6 @@ eltype(::Type{<:ArrayOperator{N, M, T}}) where {N,M,T} = T Base.Broadcast.broadcastable(t::ArrayOperator) = t -transpose(t::ArrayOperator) = transpose(t.data) sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) # Equality is only defined for two ArrayOperators of the same (N, M). @@ -106,19 +105,25 @@ _unwrap(bc::Broadcast.Broadcasted) = Broadcast.Broadcasted(bc.f, _unwrap_args(bc _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) -# copy ensures that arithmetic involving a ArrayOperator returns a ArrayOperator +# copy ensures that arithmetic involving a Tensor returns a Tensor function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} - # We create a Broadcasted of the underlying arrays and create a ArrayOperator containing - # the evaluated broadcast - databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) - ArrayOperator{N}(copy(Broadcast.flatten(databroadcast))) + # We create a Broadcasted of the underlying arrays and create a Tensor containing + # the evaluated broadcast. We check if Base.broadcasted is a Broadcasted + # or is overriden such as with DualArrays + databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...) + result = databroadcast isa Broadcast.Broadcasted ? copy(Broadcast.flatten(databroadcast)) : databroadcast + ArrayOperator{N}(result) end # copyto adds support for .= function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} # As above - databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) - copyto!(dest.data, Broadcast.flatten(databroadcast)) + databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...) + if databroadcast isa Broadcast.Broadcasted + copyto!(dest.data, Broadcast.flatten(databroadcast)) + else + copyto!(dest.data, databroadcast) + end dest end @@ -149,6 +154,11 @@ function Dual(value::T, partials::ArrayOperator{0, M, S, L}) where {L, S, M, T} Dual(convert(T2, value), elconvert(T2, partials).data) end +# Lets us declare duals with a column vector as well as a row vector. +Dual(value::T, partials::ArrayOperator{N, 0, S, L}) where {L, S, N, T} = Dual(value, ArrayOperator{0}(partials.data)) + + + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} @@ -167,11 +177,11 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualArray{T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractVector{Dual{T}} +struct DualArray{T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractArray{Dual{T}, N} value::A jacobian::J - function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} + function DualArray{T,N,A,J}(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} if size(value) != ntuple(i -> size(jacobian, i), N) throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end @@ -179,12 +189,17 @@ struct DualArray{T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} end end +DualArray{T,N}(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} = + DualArray{T,N,A,J}(value, jacobian) + +DualArray{T,N}(value, jacobian) where {T, N} = DualArray{T,N}(elconvert(T, value), elconvert(T, jacobian)) + """ Constructor that forces type compatibility """ function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualArray(elconvert(T, value), elconvert(T, jacobian)) + DualArray{T}(value, jacobian) end # Helper function to define DualArrays with AbstractArray jacobians @@ -195,8 +210,33 @@ end const DualVector = DualArray{T, 1} where {T} const DualMatrix = DualArray{T, 2} where {T} -DualVector(value::AbstractVector, jacobian) = DualArray(value, jacobian) -DualMatrix(value::AbstractMatrix, jacobian) = DualArray(value, jacobian) +function DualVector(value::AbstractArray, jacobian::ArrayOperator) + T = promote_type(eltype(value), eltype(jacobian)) + DualVector{T}(value, jacobian) +end + +function DualMatrix(value::AbstractArray, jacobian::ArrayOperator) + T = promote_type(eltype(value), eltype(jacobian)) + DualMatrix{T}(value, jacobian) +end + + + +function DualVector(value::AbstractVector{S}, jacobian::AbstractArray{T, N}) where {S, T, N} + DualVector(value, ArrayOperator{1}(jacobian)) +end + +function DualMatrix(value::AbstractMatrix{S}, jacobian::AbstractArray{T, N}) where {S, T, N} + DualMatrix(value, ArrayOperator{2}(jacobian)) +end + + +elconvert(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(elconvert(T, a.value), elconvert(T, a.jacobian)) +elconvert(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(elconvert(T, a.value), elconvert(T, a.jacobian)) + # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials -isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file +isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) + +# Type promotion on Dual +Base.promote_rule(::Type{Dual{T1}}, ::Type{Dual{T2}}) where {T1, T2} = Dual{promote_type(T1, T2)} \ No newline at end of file diff --git a/src/utilities.jl b/src/utilities.jl index 726af37..bea6ee4 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -14,13 +14,11 @@ _jacobian(d::DualVector) = d.jacobian.data _jacobian(d::DualVector, ::Int) = d.jacobian.data _jacobian(x::Number, N::Int) = Zeros(typeof(x), 1, N) -_value(v::AbstractVector) = v _value(d::DualVector) = d.value -_value(d::Dual) = d.value _value(x::Number) = x _size(x::Real) = 1 -_size(x::DualVector) = length(x.value) +_size(x::DualVector) = size(x.jacobian.data, 2) """ Vertically concatenate Dual numbers and DualVectors. @@ -50,7 +48,9 @@ end Custom display method for DualVectors. """ show(io::IO, ::MIME"text/plain", x::DualArray) = - (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) + (show(io, x.value); print(io, " + "); show(io, x.jacobian); print(io, "𝛜")) + +show(io::IO, x::DualArray) = show(io, MIME"text/plain"(), x) """ Utility function to compute the jacobian of a function `f` at point `x`. diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 8af28cd..42b0af2 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -1,5 +1,6 @@ using Test, LinearAlgebra using DualArrays: ArrayOperator +using FillArrays @testset "ArrayOperator" begin t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) @@ -50,7 +51,6 @@ using DualArrays: ArrayOperator @test result isa ArrayOperator @test result.data == s.data @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) - @test t * t == ArrayOperator{1}([30 36 42;66 81 96; 102 126 150]) @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) @@ -62,4 +62,17 @@ using DualArrays: ArrayOperator @test_throws ArgumentError ArrayOperator{1}(ones(3,3)) .* ArrayOperator{0}(ones(3,3)) end + + @testset "Transpose" begin + @test transpose(t) == ArrayOperator{1}([1 4 7;2 5 8;3 6 9]) + @test transpose(ArrayOperator{0}([1, 2, 3])) == ArrayOperator{1}([1 2 3]) + end + + @testset "Arithmetic with nonstandard array" begin + o = OneElement(1, 1, 3) + a = ArrayOperator{1}(o) + b = ArrayOperator{1}(zeros(3)) + b .= a .* 3 + @test b == ArrayOperator{1}(OneElement(3, 1, 3)) + end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 2f4c8ed..ab24156 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices +using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices, FillArrays using DualArrays: ArrayOperator @testset "DualArrays" begin @@ -27,6 +27,9 @@ using DualArrays: ArrayOperator @test_throws BoundsError v[4] @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) + v2 = DualVector([1., 2, 3], Diagonal(ones(3))) + @test v2[1] == Dual(1, OneElement(1.0, 1, 3)) + x,y = v[1:2],v[2:3] @test x == DualVector([1,2],[1 2 3;4 5 6]) @test y == DualVector([2,3],[4 5 6;7 8 9]) @@ -38,6 +41,24 @@ using DualArrays: ArrayOperator @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end + @testset "Sparse Indexing" begin + d = DualVector([1,2,3], I(3)) + @test d[1].partials isa OneElement + @test d[1].partials == OneElement(1.0, 1, 3) + end + @testset "Indexing (Matrix)" begin + m = DualMatrix([1 2 3;4 5 6;7 8 9], ones(3,3,3)) + + @test m[1,1] isa Dual + @test m[1,1] == Dual(1, [1, 1, 1]) + + @test m[1, :] isa DualVector + @test m[1, :] == DualVector([1, 2, 3], ones(3, 3)) + @test m[:, 1] == DualVector([1, 4, 7], ones(3, 3)) + + @test m[1:2, 1:2] == DualMatrix([1 2;4 5], ones(2, 2, 3)) + end + @testset "Arithmetic (DualVector)" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) w = v + v @@ -51,6 +72,9 @@ using DualArrays: ArrayOperator @test sum(x .* y) isa Dual @test sum(x .* y) == Dual(5,[16,23,30]) + + @test y .* [1,2] == DualVector([2, 6], [4 5 6;14 16 18]) + @test [1,2] .* y == DualVector([2, 6], [4 5 6;14 16 18]) end @testset "Arithmetic (Dual)" begin @@ -69,6 +93,9 @@ using DualArrays: ArrayOperator @test sin(a) == Dual(sin(2), cos(2) * [1, 2, 3]) @test cos(b) == Dual(cos(3), -sin(3) * [4, 5, 6]) + + @test a .* [1, 2] == DualVector([2, 4], [1 2 3; 2 4 6]) + @test [1, 2] .* a == DualVector([2, 4], [1 2 3; 2 4 6]) end @testset "Dot product" begin @@ -106,10 +133,26 @@ using DualArrays: ArrayOperator @testset "show" begin d = DualVector([1.0, 2.0], [1 0; 0 1]) s = repr(MIME"text/plain"(), d) + @test repr(d) == s @test occursin(" + ", s) @test endswith(s, "𝛜") end + + @testset "Nested Duals" begin + d = DualVector([2, 3], [1 0;0 1]) + d1 = DualMatrix([1.0 0; 0 1], zeros(2, 2, 2)) + d2 = DualVector(d, d1) + + @test d2 isa DualVector + + @test d2[1].value == Dual(2, [1, 0]) + @test d2[1].partials == DualVector([1.0, 0], zeros(2, 2)) + end include("broadcast_test.jl") include("array_operator_test.jl") -end \ No newline at end of file +end + + +### test examples +include("../examples/nestedduals.jl") \ No newline at end of file