deprecated certain syntax for cor method. Re-arranged package (#53)

SchisslerGroup · Mar 24, 2024 · 4752c09 · 4752c09
1 parent 411a04a
commit 4752c09
Show file tree

Hide file tree

Showing 15 changed files with 325 additions and 217 deletions.
diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ margins = [Binomial(20, 0.2), Beta(2, 3), LogNormal(3, 1)]
 adjusted_corr = pearson_match(target_corr, margins)
 
 x = rvec(100_000, adjusted_corr, margins)
-cor(x, Pearson)
+cor(Pearson, x)
 ```
 
 Spearman/Kendall matching
@@ -43,7 +43,7 @@ spearman_corr = cor_randPD(3)
 adjusted_corr = cor_convert(spearman_corr, Spearman, Pearson)
 
 x = rvec(100_000, adjusted_corr, margins)
-cor(x, Spearman)
+cor(Spearman, x)
 ```
 
 Nearest correlation matrix

diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
@@ -49,7 +49,7 @@ To simulate observations from this joint distribution, we need to estimate the c
 To estimate the correlation, we use `cor` with an argument specifying the type of correlation to estimate. The options are `Pearson`, `Spearman`, or `Kendall`.
 
 ```@repl started
-ρ = cor(Matrix(df), Pearson)
+ρ = cor(Pearson, Matrix(df))
 ```
 
 ## Defining Marginal Distributions

diff --git a/docs/src/nearest_correlation_matrix.md b/docs/src/nearest_correlation_matrix.md
@@ -23,7 +23,7 @@ We provide the function `cor_nearPD` to handle this problem. It is based off of
 
 ```@repl ncm
 m = Matrix(brca)
-spearman_corr = cor(m, Spearman);
+spearman_corr = cor(Spearman, m);
 pearson_corr = cor_convert(spearman_corr, Spearman, Pearson);
 isposdef(spearman_corr), isposdef(pearson_corr)
 ```

diff --git a/docs/src/pearson_matching.md b/docs/src/pearson_matching.md
@@ -18,7 +18,7 @@ margins = [Normal(μ_Temp, σ_Temp), LogNormal(μ_Ozone, σ_Ozone)]
 Let's say we really wanted to estimate the Spearman correlation between the temperature and ozone.
 
 ```@repl 1
-spearman_corr = cor(Matrix(df), Spearman)
+spearman_corr = cor(Spearman, Matrix(df))
 cor_bounds(margins[1], margins[2], Spearman)
 ```
 
@@ -31,15 +31,15 @@ Here is what we get when we use the Spearman correlation directly with no transf
 
 ```@repl 1
 x2 = rvec(1_000_000, spearman_corr, margins);
-cor(x2, Spearman)
+cor(Spearman, x2)
 ```
 
 Let's try to address **problem 1** and convert the Spearman correlation to a Pearson correlation.
 
 ```@repl 1
 adjusted_spearman_corr = cor_convert(spearman_corr, Spearman, Pearson);
 x3 = rvec(1_000_000, adjusted_spearman_corr, margins);
-cor(x3, Spearman)
+cor(Spearman, x3)
 ```
 
 Notice that the estimated Spearman correlation is essentially the same as the target Spearman correlation. This is because the transformation in the NORTA step is monotonic, which means that rank-based correlations are preserved. As a consequence, we can match the Spearman correlation exactly (up to stochastic error) with an explicit transformation.
@@ -49,22 +49,22 @@ Notice that the estimated Spearman correlation is essentially the same as the ta
 We can employ a Pearson matching algorithm that determines the necessary input correlation in order to achieve the target Pearson correlation. Let's now address **problem 2**.
 
 ```@repl 1
-pearson_corr = cor(Matrix(df), Pearson)
+pearson_corr = cor(Pearson, Matrix(df))
 ```
 
 If we use the measured correlation directly, then the estimated correlation from the simulated data is far off:
 
 ```@repl 1
 x4 = rvec(1_000_000, pearson_corr, margins);
-cor(x4, Pearson)
+cor(Pearson, x4)
 ```
 
 The estimated correlation is much too low. Let's do some Pearson matching and observe the results.
 
 ```@repl 1
 adjusted_pearson_corr = pearson_match(pearson_corr, margins)
 x5 = rvec(1_000_000, adjusted_pearson_corr, margins);
-cor(x5)
+cor(Pearson, x5)
 ```
 
 Now the simulated data results in a correlation structure that exactly matches the target Pearson correlation!
diff --git a/src/Bigsimr.jl b/src/Bigsimr.jl
@@ -1,32 +1,44 @@
 module Bigsimr
+include("submodules/BigsimrBase.jl")
 
 
 using Distributions: UnivariateDistribution as UD
 using Distributions: ContinuousUnivariateDistribution as CUD
 using Distributions: DiscreteUnivariateDistribution as DUD
 
-using LinearAlgebra: issymmetric, isposdef, cholesky, Diagonal, diagm, diag
+using LinearAlgebra: issymmetric, isposdef, cholesky, diagm, diag
+using LinearAlgebra: Diagonal, Symmetric
 using NearestCorrelationMatrix
 using NearestCorrelationMatrix.Internals: isprecorrelation
+using PearsonCorrelationMatch
+using Reexport
 using SharedArrays
 using Statistics: cor
 using StatsBase: corspearman, corkendall
 
+
+using .BigsimrBase:
+    clampcor,
+    make_symmetric!,
+    set_diag1!,
+    norm2margin,
+    randn_shared,
+    rmvn_shared,
+    idx_subsets2
+
+
 import Statistics
+import .BigsimrBase: _cor
 
 
-using Reexport
+
+@reexport using .BigsimrBase
 @reexport using PearsonCorrelationMatch: pearson_bounds, pearson_match
 @reexport using NearestCorrelationMatrix: nearest_cor, nearest_cor!
 @reexport using NearestCorrelationMatrix: Newton, AlternatingProjections, DirectProjection
 
 
 export
-    # Correlation Types
-    CorType,
-    Pearson,
-    Spearman,
-    Kendall,
     # Correlation Methods
     cor,
     cor_fast,
@@ -52,16 +64,14 @@ export
     rvec
 
 
-include("internals/Internals.jl")
-using .Internals
-
-include("cortype.jl")
 include("cor.jl")
 include("cor_utils.jl")
 include("cor_gen.jl")
 include("nearest_cor.jl")
 include("rand.jl")
 
+include("deprecated.jl")
+
 
 """
     is_correlation(X)

diff --git a/src/cor.jl b/src/cor.jl
@@ -1,5 +1,5 @@
 """
-    cor(x[, y], ::CorType)
+    cor(cortype, x [,y])
 
 Compute the correlation matrix of a given type.
 
@@ -23,60 +23,55 @@ julia> x = [-1.62169     0.0158613   0.500375  -0.794381
             -3.38438    -1.93058     1.77413   -1.23657
              1.57527     0.836351   -1.13275   -0.277048];
 
-julia> cor(x, Pearson)
+julia> cor(Pearson, x)
 4×4 Matrix{Float64}:
   1.0        0.86985   -0.891312   0.767433
   0.86985    1.0       -0.767115   0.817407
  -0.891312  -0.767115   1.0       -0.596762
   0.767433   0.817407  -0.596762   1.0
 
-julia> cor(x, Spearman)
+julia> cor(Spearman, x)
 4×4 Matrix{Float64}:
   1.0        0.866667  -0.854545   0.709091
   0.866667   1.0       -0.781818   0.684848
  -0.854545  -0.781818   1.0       -0.612121
   0.709091   0.684848  -0.612121   1.0
 
-julia> cor(x, Kendall)
+julia> cor(Kendall, x)
 4×4 Matrix{Float64}:
   1.0        0.733333  -0.688889   0.555556
   0.733333   1.0       -0.688889   0.555556
  -0.688889  -0.688889   1.0       -0.422222
   0.555556   0.555556  -0.422222   1.0
 ```
 """
-function Statistics.cor(::Any, ::Any, ::CorType{T}) where T
-    error("Method `cor(x, y, cortype)` is not implemented for cortype $T")
+function Statistics.cor(cortype::CorType, args...; kwargs...)
+    _cor(cortype, args...; kwargs...)
 end
 
-function Statistics.cor(::Any, ::CorType{T}) where T
-    error("Method `cor(x, cortype)` is not implemented for cortype $T")
-end
-
-Statistics.cor(x,    ::CorType{:Pearson})  = cor(x)
-Statistics.cor(x, y, ::CorType{:Pearson})  = cor(x, y)
-Statistics.cor(x,    ::CorType{:Spearman}) = corspearman(x)
-Statistics.cor(x, y, ::CorType{:Spearman}) = corspearman(x, y)
-Statistics.cor(x,    ::CorType{:Kendall})  = corkendall(x)
-Statistics.cor(x, y, ::CorType{:Kendall})  = corkendall(x, y)
+_cor(::CorType{:Pearson} , args...; kwargs...) = cor(args...; kwargs...)
+_cor(::CorType{:Spearman}, args...; kwargs...) = corspearman(args...; kwargs...)
+_cor(::CorType{:Kendall} , args...; kwargs...) = corkendall(args...; kwargs...)
 
 
 """
-    cor_fast(X, cortype=Pearson)
+    cor_fast([cortype,] X)
 
 Calculate the correlation matrix in parallel using available threads.
 """
-function cor_fast(X::AbstractMatrix{T}, cortype::CorType=Pearson) where T
+function cor_fast(cortype::CorType, X::AbstractMatrix{T}) where T
     d = size(X, 2)
 
     Y = SharedMatrix{T}(d, d)
 
-    Base.Threads.@threads for (i, j) in _idx_subsets2(d)
-        @inbounds Y[i,j] = cor(@view(X[:,i]), @view(X[:,j]), cortype)
+    Base.Threads.@threads for (i, j) in idx_subsets2(d)
+        @inbounds Y[i,j] = cor(cortype, @view(X[:,i]), @view(X[:,j]))
     end
 
     C = sdata(Y)
     cor_constrain!(C)
 
     return C
 end
+
+cor_fast(X) = cor_fast(Pearson, X)
diff --git a/src/cor_utils.jl b/src/cor_utils.jl
@@ -66,12 +66,12 @@ function cor_convert(X::AbstractMatrix{<:Real}, from::CorType, to::CorType)
 end
 
 cor_convert(x::Real, ::CorType{T},         ::CorType{T}) where T = x
-cor_convert(x::Real, ::CorType{:Pearson},  ::CorType{:Spearman}) = _clampcor(asin(x / 2) * 6 / π)
-cor_convert(x::Real, ::CorType{:Pearson},  ::CorType{:Kendall})  = _clampcor(asin(x) * 2 / π)
-cor_convert(x::Real, ::CorType{:Spearman}, ::CorType{:Pearson})  = _clampcor(sinpi(x / 6) * 2)
-cor_convert(x::Real, ::CorType{:Spearman}, ::CorType{:Kendall})  = _clampcor(asin(sinpi(x / 6) * 2) * 2 / π)
-cor_convert(x::Real, ::CorType{:Kendall},  ::CorType{:Pearson})  = _clampcor(sinpi(x / 2))
-cor_convert(x::Real, ::CorType{:Kendall},  ::CorType{:Spearman}) = _clampcor(asin(sinpi(x / 2) / 2) * 6 / π)
+cor_convert(x::Real, ::CorType{:Pearson},  ::CorType{:Spearman}) = clampcor(asin(x / 2) * 6 / π)
+cor_convert(x::Real, ::CorType{:Pearson},  ::CorType{:Kendall})  = clampcor(asin(x) * 2 / π)
+cor_convert(x::Real, ::CorType{:Spearman}, ::CorType{:Pearson})  = clampcor(sinpi(x / 6) * 2)
+cor_convert(x::Real, ::CorType{:Spearman}, ::CorType{:Kendall})  = clampcor(asin(sinpi(x / 6) * 2) * 2 / π)
+cor_convert(x::Real, ::CorType{:Kendall},  ::CorType{:Pearson})  = clampcor(sinpi(x / 2))
+cor_convert(x::Real, ::CorType{:Kendall},  ::CorType{:Spearman}) = clampcor(asin(sinpi(x / 2) / 2) * 6 / π)
 
 
 """
@@ -80,12 +80,14 @@ cor_convert(x::Real, ::CorType{:Kendall},  ::CorType{:Spearman}) = _clampcor(asi
 Same as [`cor_constrain`](@ref), except that the matrix is updated in place to save memory.
 """
 function cor_constrain!(X::AbstractMatrix{<:Real})
-    X .= _clampcor.(X)
-    _symmetric!(X)
-    _set_diag1!(X)
+    X .= clampcor.(X)
+    make_symmetric!(X)
+    set_diag1!(X)
     return X
 end
 
+cor_constrain!(X::Symmetric) = cor_constrain!(X.data)
+
 
 """
     cor_constrain(X::AbstractMatrix{<:Real})
@@ -146,6 +148,12 @@ function cov2cor!(X::AbstractMatrix{<:Real})
     return cor_constrain!(X)
 end
 
+function cov2cor!(X::Symmetric)
+    D = sqrt(inv(Diagonal(X)))
+    X.data .= D * X * D
+    return cor_constrain!(X)
+end
+
 
 """
     cor_bounds(d1, d2, cortype, samples)
@@ -194,10 +202,10 @@ function cor_bounds(d1::UD, d2::UD, cortype::CorType, samples::Real)
     sort!(a)
     sort!(b)
 
-    upper = cor(a, b, cortype)
+    upper = cor(cortype, a, b)
 
     reverse!(b)
-    lower = cor(a, b, cortype)
+    lower = cor(cortype, a, b)
 
     return (lower = lower, upper = upper)
 end
@@ -250,7 +258,7 @@ function cor_bounds(margins, cortype::CorType, samples::Real)
     lower = zeros(Float64, d, d)
     upper = zeros(Float64, d, d)
 
-    Base.Threads.@threads for (i,j) in _idx_subsets2(d)
+    Base.Threads.@threads for (i,j) in idx_subsets2(d)
         l, u = cor_bounds(margins[i], margins[j], cortype, n)
         lower[i,j] = l
         upper[i,j] = u

diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -0,0 +1,14 @@
+function Statistics.cor(x::AbstractVecOrMat, y::AbstractVecOrMat, cortype::CorType)
+    @warn "The syntax 'cor(x, y, cortype)' is now deprecated. Please use 'cor(cortype, x, y)' instead." maxlog=1
+    return cor(cortype, x, y)
+end
+
+function Statistics.cor(x::AbstractVecOrMat, cortype::CorType)
+    @warn "The syntax 'cor(x, cortype)' is now deprecated. Please use 'cor(cortype, x)' instead." maxlog=1
+    return cor(cortype, x)
+end
+
+function cor_fast(X::AbstractMatrix, cortype::CorType=Pearson)
+    @warn "The syntax 'cor_fast(X, cortype)' is now deprecated. Please use 'cor_fast(cortype, X)' instead." maxlog=1
+    return cor_fast(cortype, X)
+end
diff --git a/src/rand.jl b/src/rand.jl
@@ -37,7 +37,13 @@ function rmvn(n::Real, μ::AbstractVector{<:Real}, Σ::AbstractMatrix{<:Real})
 
     n = convert(Int, n)
 
-    return μ' .+ _rmvn_shared(n, Σ)
+    X = rmvn_shared(n, Σ)
+
+    Base.Threads.@threads for i in 1:n
+        X[i,:] .+= μ
+    end
+
+    return sdata(X)
 end
 
 function rmvn(n, Σ::AbstractMatrix{T}) where {T<:Real}
@@ -88,10 +94,10 @@ function rvec(n, rho::AbstractMatrix{T}, margins) where {T<:Real}
 
     n = convert(Int, n)
 
-    Z = _rmvn_shared(n, rho)
+    Z = rmvn_shared(n, rho)
 
     Base.Threads.@threads for i in 1:d
-        @inbounds @view(Z[:,i]) .= _norm2margin(margins[i], @view(Z[:,i]))
+        @inbounds @view(Z[:,i]) .= norm2margin(margins[i], @view(Z[:,i]))
     end
 
     return sdata(Z)