Skip to content

Commit

Permalink
Merge pull request #4 from MichielStock/main
Browse files Browse the repository at this point in the history
Major refactor PR
  • Loading branch information
stefftaelman authored Feb 22, 2023
2 parents 439913f + a56503f commit ec5990f
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 12 deletions.
32 changes: 31 additions & 1 deletion src/Xicor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,39 @@ module Xicor

using Distributions, Random, StatsBase

"""
xicor(X, Y; rank=tiedrank, noties=false)
Computes the asymmetric ξ correlation coefficient sbetween two vectors `X` and `Y`.
`rank` species how the rank is computed, default uses `tiedrank`. If there
are no ties in `Y`, `noties` can be set to `true`, which speeds up computation.
`ξ` is an alias for this function.
"""
function xicor(X::AbstractVector, Y::AbstractVector; rank=denserank, noties=false)
@assert length(X) == length(Y) "length `X` and `Y` missmatch"
n = length(Y)
xsortperm = sortperm(X)
# todo: carfully check the rank function
r = rank(xsortperm, by=i->Y[i])
if !noties
l = rank(xsortperm, by=i->Y[i], rev=true)
end
ξn = 0.0
for i in 1:n-1
ξn += abs(r[i+1] - r[i])
end
ξn *= noties ? 3 / (n^2 - 1) : n / 2sum(li->li*(n-li), l)
ξn = 1 - ξn
return ξn
end

ξ = xicor


include("correlation.jl")
include("rank.jl")

export xicor
export xicor, ξ

end
20 changes: 10 additions & 10 deletions src/rank.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,33 @@ using Random, StatsBase


"""
maxrank(x)
A function that returns the sample ranks using maximum ranks
for equal values.
- x : A Vector of Floats, Integers, or Bools
"""
function maxrank(x)
function maxrank(x::AbstractVector)
sorted_x = sort(x)
ranks = zeros(Int, length(x))
for (idx,i) in enumerate(x)
for (idx, i) in enumerate(x)
ranks[idx] = findlast(isequal(i), sorted_x)
end
return ranks
end


"""
minrank(x)
A function that returns the sample ranks using minimum ranks
for equal values.
- x : A Vector of Floats, Integers, or Bools
"""
function minrank(x)
sorted_x = sort(x)
ranks = zeros(Int, length(x))
for (idx,i) in enumerate(x)
for (idx, i) in enumerate(x)
ranks[idx] = findfirst(isequal(i), sorted_x)
end
return ranks
Expand All @@ -38,11 +42,8 @@ of equal values.
- seed : either an Integer to seed the random number
generator or `missing` to use the default seed.
"""
function randrank(x; seed=missing)
@assert typeof(seed) <: Integer || typeof(seed) <: Missing
if !ismissing(seed)
Random.seed!(seed)
end
function randrank(x; seed::Union{Nothing,Integer}=nothing)
@assert seed isa Integer || isnothing(seed)
sorted_x = sort(x)
ranks = zeros(Int, length(x))
for (idx,i) in enumerate(x)
Expand All @@ -62,9 +63,8 @@ A function that returns the sample ranks of the values in a vector.
TO DO: the original R `rank` function has an argument that let's you choose the
placement of the missing values.
"""
function rank(x; ties_method::String="average", seed=missing)
function rank(x; ties_method::String="average", seed::Union{Nothing,Integer}=missing)
@assert ties_method ["average", "random", "max", "min"]
@assert typeof(seed) <: Integer || typeof(seed) <: Missing

# get NA/missing
missing_values = ismissing.(x)
Expand Down
6 changes: 5 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,9 @@ using Xicor
using Test

@testset "Xicor.jl" begin
# Write your tests here.
X = [1, 2, 3, 5]
Y = [1.0, 3.0, 8.0, 10.0]

@test ξ(X, Y) > 0
@test ξ(X, Y, rank=denserank) ξ(X, Y, rank=denserank, noties=true)
end

0 comments on commit ec5990f

Please sign in to comment.