From 74512dcf279fece95938afd4aca809bbb3019ad6 Mon Sep 17 00:00:00 2001 From: michielstock Date: Mon, 13 Feb 2023 14:38:00 +0000 Subject: [PATCH 1/3] :wrench: rewrite code --- src/Xicor.jl | 31 ++++++++++++++++++++++++++++++- src/rank.jl | 20 ++++++++++---------- test/runtests.jl | 5 ++++- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/Xicor.jl b/src/Xicor.jl index 4e93b2e..3e76289 100644 --- a/src/Xicor.jl +++ b/src/Xicor.jl @@ -2,9 +2,38 @@ module Xicor using Distributions, Random, StatsBase +""" + xicor(X, Y; rank=tiedrank, noties=false) + +Computes the asymmetric ξ correlation coefficient sbetween two vectors `X` and `Y`. +`rank` species how the rank is computed, default uses `tiedrank`. If there +are no ties in `Y`, `noties` can be set to `true`, which speeds up computation. + +`ξ` is an alias for this function. +""" +function xicor(X::AbstractVector, Y::AbstractVector; rank=tiedrank, noties=false) + @assert length(X) == length(Y) "length `X` and `Y` missmatch" + n = length(Y) + xind = sortperm(X) + r = rank(xind, by=i->Y[i]) + if !noties + l = rank(xind, by=i->Y[i], rev=true) + end + ξn = 0.0 + for i in 1:n-1 + ξn += (abs(r[i+1] - r[i])) + end + ξn *= noties ? 3 / (n^2 - 1) : n / 2sum(li->li*(n-li), l) + ξn = 1 - ξn + return ξn +end + +ξ = xicor + + include("correlation.jl") include("rank.jl") -export xicor +export xicor, ξ end diff --git a/src/rank.jl b/src/rank.jl index 5e7770e..27fa46e 100644 --- a/src/rank.jl +++ b/src/rank.jl @@ -2,14 +2,16 @@ using Random, StatsBase """ + maxrank(x) + A function that returns the sample ranks using maximum ranks for equal values. - x : A Vector of Floats, Integers, or Bools """ -function maxrank(x) +function maxrank(x::AbstractVector) sorted_x = sort(x) ranks = zeros(Int, length(x)) - for (idx,i) in enumerate(x) + for (idx, i) in enumerate(x) ranks[idx] = findlast(isequal(i), sorted_x) end return ranks @@ -17,6 +19,8 @@ end """ + minrank(x) + A function that returns the sample ranks using minimum ranks for equal values. - x : A Vector of Floats, Integers, or Bools @@ -24,7 +28,7 @@ for equal values. function minrank(x) sorted_x = sort(x) ranks = zeros(Int, length(x)) - for (idx,i) in enumerate(x) + for (idx, i) in enumerate(x) ranks[idx] = findfirst(isequal(i), sorted_x) end return ranks @@ -38,11 +42,8 @@ of equal values. - seed : either an Integer to seed the random number generator or `missing` to use the default seed. """ -function randrank(x; seed=missing) - @assert typeof(seed) <: Integer || typeof(seed) <: Missing - if !ismissing(seed) - Random.seed!(seed) - end +function randrank(x; seed::Union{Nothing,Integer}=nothing) + @assert seed isa Integer || isnothing(seed) sorted_x = sort(x) ranks = zeros(Int, length(x)) for (idx,i) in enumerate(x) @@ -62,9 +63,8 @@ A function that returns the sample ranks of the values in a vector. TO DO: the original R `rank` function has an argument that let's you choose the placement of the missing values. """ -function rank(x; ties_method::String="average", seed=missing) +function rank(x; ties_method::String="average", seed::Union{Nothing,Integer}=missing) @assert ties_method ∈ ["average", "random", "max", "min"] - @assert typeof(seed) <: Integer || typeof(seed) <: Missing # get NA/missing missing_values = ismissing.(x) diff --git a/test/runtests.jl b/test/runtests.jl index cfcde73..acdbf8b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,5 +2,8 @@ using Xicor using Test @testset "Xicor.jl" begin - # Write your tests here. + X = [1, 2, 3, 5] + Y = [1.0, 3.0, 8.0, 10.0] + + @test ξ(X, Y) > 0 end From 688e53a7874456370eb4f6f1b64a520defda3133 Mon Sep 17 00:00:00 2001 From: michielstock Date: Wed, 15 Feb 2023 09:51:20 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=E2=9C=85=20test=20whether=20the=20noties?= =?UTF-8?q?=20alternative=20is=20correct?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index acdbf8b..add6848 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,4 +6,5 @@ using Test Y = [1.0, 3.0, 8.0, 10.0] @test ξ(X, Y) > 0 + @test ξ(X, Y) ≈ ξ(X, Y, noties=true) end From a56503f0d99b567ce30c779ceaaf1d6e32ebfe8c Mon Sep 17 00:00:00 2001 From: michielstock Date: Mon, 20 Feb 2023 17:49:57 +0000 Subject: [PATCH 3/3] :ambulance: fix perm --- src/Xicor.jl | 11 ++++++----- test/runtests.jl | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Xicor.jl b/src/Xicor.jl index 3e76289..0a81d01 100644 --- a/src/Xicor.jl +++ b/src/Xicor.jl @@ -11,17 +11,18 @@ are no ties in `Y`, `noties` can be set to `true`, which speeds up computation. `ξ` is an alias for this function. """ -function xicor(X::AbstractVector, Y::AbstractVector; rank=tiedrank, noties=false) +function xicor(X::AbstractVector, Y::AbstractVector; rank=denserank, noties=false) @assert length(X) == length(Y) "length `X` and `Y` missmatch" n = length(Y) - xind = sortperm(X) - r = rank(xind, by=i->Y[i]) + xsortperm = sortperm(X) + # todo: carfully check the rank function + r = rank(xsortperm, by=i->Y[i]) if !noties - l = rank(xind, by=i->Y[i], rev=true) + l = rank(xsortperm, by=i->Y[i], rev=true) end ξn = 0.0 for i in 1:n-1 - ξn += (abs(r[i+1] - r[i])) + ξn += abs(r[i+1] - r[i]) end ξn *= noties ? 3 / (n^2 - 1) : n / 2sum(li->li*(n-li), l) ξn = 1 - ξn diff --git a/test/runtests.jl b/test/runtests.jl index add6848..3080583 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,5 +6,5 @@ using Test Y = [1.0, 3.0, 8.0, 10.0] @test ξ(X, Y) > 0 - @test ξ(X, Y) ≈ ξ(X, Y, noties=true) + @test ξ(X, Y, rank=denserank) ≈ ξ(X, Y, rank=denserank, noties=true) end