Skip to content

Commit 71cde77

Browse files
committed
cov.jl
1 parent 96aba7f commit 71cde77

File tree

7 files changed

+130
-222
lines changed

7 files changed

+130
-222
lines changed

docs/src/scalarstats.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ The package implements functions for computing various statistics over an array
77
```@docs
88
mean
99
mean!
10-
middle
1110
geomean
1211
harmmean
1312
genmean
@@ -58,6 +57,7 @@ quantile
5857
quantile!
5958
median
6059
median!
60+
middle
6161
```
6262

6363
## Mode and Modes

src/Statistics.jl

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ include("moments.jl")
3939
include("scalarstats.jl")
4040
include("cov.jl")
4141
include("partialcor.jl")
42+
include("toeplitzsolvers.jl")
4243
include("signalcorr.jl")
4344

4445
##### mean #####
@@ -727,8 +728,10 @@ function _getnobs(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int)
727728
return n
728729
end
729730

730-
_vmean(x::AbstractVector, vardim::Int) = mean(x)
731-
_vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
731+
_vmean(x::AbstractVector, vardim::Int, w::Union{AbstractWeights, Nothing}=nothing) =
732+
mean(x, weights=w)
733+
_vmean(x::AbstractMatrix, vardim::Int, w::Union{AbstractWeights, Nothing}=nothing) =
734+
mean(x, dims=vardim, weights=w)
732735

733736
# core functions
734737

@@ -771,7 +774,7 @@ end
771774
## which can't be handled by broadcast
772775
covm(x::AbstractVector, xmean; corrected::Bool=true) =
773776
covzm(map(t -> t - xmean, x); corrected=corrected)
774-
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
777+
covm(x::AbstractMatrix, xmean, weights::Nothing=nothing, vardim::Int=1; corrected::Bool=true) =
775778
covzm(x .- xmean, vardim; corrected=corrected)
776779
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
777780
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
@@ -788,14 +791,24 @@ is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `fals
788791
cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected)
789792

790793
"""
791-
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true)
794+
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true[, weights::AbstractWeights])
792795
793796
Compute the covariance matrix of the matrix `X` along the dimension `dims`. If `corrected`
794797
is `true` (the default) then the sum is scaled with `n-1`, whereas the sum is scaled with `n`
795798
if `corrected` is `false` where `n = size(X, dims)`.
799+
800+
If `weights` is provided, the biased covariance matrix (`corrected=false`)
801+
is computed by multiplying `scattermat(X, w)` by
802+
``\\frac{1}{\\sum{w}}`` to normalize. However, the unbiased covariance matrix
803+
(`corrected=true`) is dependent on the type of weights used:
804+
* `AnalyticWeights`: ``\\frac{1}{\\sum w - \\sum {w^2} / \\sum w}``
805+
* `FrequencyWeights`: ``\\frac{1}{\\sum{w} - 1}``
806+
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
807+
* `Weights`: `ArgumentError` (bias correction not supported)
796808
"""
797-
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
798-
covm(X, _vmean(X, dims), dims; corrected=corrected)
809+
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true,
810+
weights::Union{AbstractWeights, Nothing}=nothing) =
811+
covm(X, _vmean(X, dims, weights), weights, dims; corrected=corrected)
799812

800813
"""
801814
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
@@ -899,7 +912,8 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
899912

900913
corm(x::AbstractVector{T}, xmean) where {T} =
901914
T === Missing ? missing : one(float(nonmissingtype(T)))
902-
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
915+
corm(x::AbstractMatrix, xmean, weights::Nothing=nothing, vardim::Int=1) =
916+
corzm(x .- xmean, vardim)
903917
function corm(x::AbstractVector, mx, y::AbstractVector, my)
904918
require_one_based_indexing(x, y)
905919
n = length(x)
@@ -936,11 +950,13 @@ cor(x::AbstractVector{T}) where {T} =
936950
T === Missing ? missing : one(float(nonmissingtype(T)))
937951

938952
"""
939-
cor(X::AbstractMatrix; dims::Int=1)
953+
cor(X::AbstractMatrix; dims::Int=1[, weights::AbstractWeights])
940954
941955
Compute the Pearson correlation matrix of the matrix `X` along the dimension `dims`.
956+
The weighted correlation is computed if `weights` is provided.
942957
"""
943-
cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
958+
cor(X::AbstractMatrix; dims::Int=1, weights::Union{AbstractWeights, Nothing}=nothing) =
959+
corm(X, _vmean(X, dims, weights), weights, dims)
944960

945961
"""
946962
cor(x::AbstractVector, y::AbstractVector)

src/cov.jl

Lines changed: 31 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,9 @@ _unscaled_covzm(x::DenseMatrix, wv::AbstractWeights, dims::Integer) =
3232
_symmetrize!(unscaled_covzm(x, _scalevars(x, values(wv), dims), dims))
3333

3434
"""
35-
scattermat(X, [wv::AbstractWeights]; mean=nothing, dims=1)
35+
scattermat(X; mean=nothing, dims=1[, weights::AbstractWeights])
3636
3737
Compute the scatter matrix, which is an unnormalized covariance matrix.
38-
A weighting vector `wv` can be specified to weight
39-
the estimate.
4038
4139
# Arguments
4240
* `mean=nothing`: a known mean value. `nothing` indicates that the mean is
@@ -45,62 +43,33 @@ the estimate.
4543
* `dims=1`: the dimension along which the variables are organized.
4644
When `dims = 1`, the variables are considered columns with observations in rows;
4745
when `dims = 2`, variables are in rows with observations in columns.
48-
"""
49-
function scattermat end
50-
51-
52-
"""
53-
cov(X, w::AbstractWeights, vardim=1; mean=nothing, corrected=false)
54-
55-
Compute the weighted covariance matrix. Similar to `var` and `std` the biased covariance
56-
matrix (`corrected=false`) is computed by multiplying `scattermat(X, w)` by
57-
``\\frac{1}{\\sum{w}}`` to normalize. However, the unbiased covariance matrix
58-
(`corrected=true`) is dependent on the type of weights used:
59-
* `AnalyticWeights`: ``\\frac{1}{\\sum w - \\sum {w^2} / \\sum w}``
60-
* `FrequencyWeights`: ``\\frac{1}{\\sum{w} - 1}``
61-
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
62-
* `Weights`: `ArgumentError` (bias correction not supported)
63-
"""
64-
cov
65-
66-
scattermat(x::DenseMatrix; mean=nothing, dims::Int=1) =
67-
_scattermatm(x, mean, dims)
68-
_scattermatm(x::DenseMatrix, ::Nothing, dims::Int) =
69-
_unscaled_covzm(x .- mean(x, dims=dims), dims)
70-
_scattermatm(x::DenseMatrix, mean, dims::Int=1) =
46+
* `weights`: optional weights for observations.
47+
"""
48+
scattermat(x::DenseMatrix; mean=nothing, dims::Int=1,
49+
weights::Union{AbstractWeights, Nothing}=nothing) =
50+
_scattermatm(x, weights, mean, dims)
51+
_scattermatm(x::DenseMatrix, weights::Nothing, mean::Nothing, dims::Int) =
52+
_unscaled_covzm(x .- Statistics.mean(x, dims=dims), dims)
53+
_scattermatm(x::DenseMatrix, weights::Nothing, mean, dims::Int=1) =
7154
_unscaled_covzm(x .- mean, dims)
7255

73-
scattermat(x::DenseMatrix, wv::AbstractWeights; mean=nothing, dims::Int=1) =
74-
_scattermatm(x, wv, mean, dims)
75-
_scattermatm(x::DenseMatrix, wv::AbstractWeights, ::Nothing, dims::Int) =
76-
_unscaled_covzm(x .- mean(x, wv, dims=dims), wv, dims)
77-
_scattermatm(x::DenseMatrix, wv::AbstractWeights, mean, dims::Int) =
78-
_unscaled_covzm(x .- mean, wv, dims)
56+
_scattermatm(x::DenseMatrix, weights::AbstractWeights, mean::Nothing, dims::Int) =
57+
_unscaled_covzm(x .- Statistics.mean(x, weights=weights, dims=dims), weights, dims)
58+
_scattermatm(x::DenseMatrix, weights::AbstractWeights, mean, dims::Int) =
59+
_unscaled_covzm(x .- mean, weights, dims)
7960

8061
## weighted cov
81-
covm(x::DenseMatrix, mean, w::AbstractWeights, dims::Int=1;
62+
covm(x::DenseMatrix, mean, weights::AbstractWeights, dims::Int=1;
8263
corrected::Bool=true) =
83-
rmul!(scattermat(x, w, mean=mean, dims=dims), varcorrection(w, depcheck(:covm, corrected)))
84-
64+
rmul!(scattermat(x, weights=weights, mean=mean, dims=dims),
65+
varcorrection(weights, corrected))
8566

86-
cov(x::DenseMatrix, w::AbstractWeights, dims::Int=1; corrected::Bool=true) =
87-
covm(x, mean(x, w, dims=dims), w, dims; corrected=depcheck(:cov, corrected))
88-
89-
function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1)
90-
c = covm(x, mean, w, vardim; corrected=false)
91-
s = stdm(x, w, mean, vardim; corrected=false)
67+
function corm(x::DenseMatrix, mean, weights::AbstractWeights, vardim::Int=1)
68+
c = covm(x, mean, weights, vardim; corrected=false)
69+
s = std(x, mean=mean, weights=weights, dims=vardim, corrected=false)
9270
cov2cor!(c, s)
9371
end
9472

95-
"""
96-
cor(X, w::AbstractWeights, dims=1)
97-
98-
Compute the Pearson correlation matrix of `X` along the dimension
99-
`dims` with a weighting `w` .
100-
"""
101-
cor(x::DenseMatrix, w::AbstractWeights, dims::Int=1) =
102-
corm(x, mean(x, w, dims=dims), w, dims)
103-
10473
"""
10574
cov2cor(C, s)
10675
@@ -156,7 +125,8 @@ cov(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector) =
156125
error("cov is not defined for $(typeof(ce)), $(typeof(x)) and $(typeof(y))")
157126

158127
"""
159-
cov(ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights]; mean=nothing, dims::Int=1)
128+
cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1,
129+
[weights::AbstractWeights])
160130
161131
Compute the covariance matrix of the matrix `X` along dimension `dims`
162132
using estimator `ce`. A weighting vector `w` can be specified.
@@ -170,18 +140,16 @@ The keyword argument `mean` can be:
170140
* when `dims=2`, an `AbstractVector` of length `N` or an `AbstractMatrix`
171141
of size `(N,1)`.
172142
"""
173-
cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1) =
143+
cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1,
144+
weights::Union{AbstractWeights, Nothing}=nothing) =
174145
error("cov is not defined for $(typeof(ce)) and $(typeof(X))")
175146

176-
cov(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; mean=nothing, dims::Int=1) =
177-
error("cov is not defined for $(typeof(ce)), $(typeof(X)) and $(typeof(w))")
178-
179147
"""
180148
SimpleCovariance(;corrected::Bool=false)
181149
182150
Simple covariance estimator. Estimation calls `cov(x; corrected=corrected)`,
183-
`cov(x, y; corrected=corrected)` or `cov(X, w, dims; corrected=corrected)`
184-
where `x`, `y` are vectors, `X` is a matrix and `w` is a weighting vector.
151+
`cov(x, y; corrected=corrected)` or `cov(X, dims=dims, weights=weights, corrected=corrected)`
152+
where `x`, `y` are vectors, `X` is a matrix and `weights` is a weighting vector.
185153
"""
186154
struct SimpleCovariance <: CovarianceEstimator
187155
corrected::Bool
@@ -194,20 +162,13 @@ cov(sc::SimpleCovariance, x::AbstractVector) =
194162
cov(sc::SimpleCovariance, x::AbstractVector, y::AbstractVector) =
195163
cov(x, y; corrected=sc.corrected)
196164

197-
function cov(sc::SimpleCovariance, X::AbstractMatrix; dims::Int=1, mean=nothing)
198-
dims (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)"))
199-
if mean === nothing
200-
return cov(X; dims=dims, corrected=sc.corrected)
201-
else
202-
return covm(X, mean, dims, corrected=sc.corrected)
203-
end
204-
end
205-
206-
function cov(sc::SimpleCovariance, X::AbstractMatrix, w::AbstractWeights; dims::Int=1, mean=nothing)
165+
function cov(sc::SimpleCovariance, X::AbstractMatrix;
166+
dims::Int=1,
167+
weights::Union{AbstractWeights, Nothing}=nothing,
168+
mean=nothing)
207169
dims (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)"))
208170
if mean === nothing
209-
return cov(X, w, dims, corrected=sc.corrected)
210-
else
211-
return covm(X, mean, w, dims, corrected=sc.corrected)
171+
mean = Statistics.mean(X, dims=dims, weights=weights)
212172
end
173+
return covm(X, mean, weights, dims, corrected=sc.corrected)
213174
end

0 commit comments

Comments
 (0)