Skip to content
This repository has been archived by the owner on May 4, 2019. It is now read-only.

Commit

Permalink
Add/organize reduce- and stats-related methods
Browse files Browse the repository at this point in the history
Add:
-`Base.mean(X::NullableArray)`
-`Base.mean(X::NullableArray, w::WeightVec)`
-`Base.varm(X::NullableArray, m)`
-`Base.varzm(X::NullableArray)`
-`Base.var(X::NullableArray)
-`Base.stdm{T}(X::NullableArray, m::Union{T, Nullable{T}})
-`Base.std(X::NullableArray)

Organization:
-move above stats-related methods to `src/statistics.jl`

Also includes some minor bug fixes in `src/mapreduce.jl`
  • Loading branch information
davidagold committed Jul 13, 2015
1 parent 0512336 commit 76309a0
Show file tree
Hide file tree
Showing 5 changed files with 441 additions and 28 deletions.
199 changes: 191 additions & 8 deletions perf/mapreduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,213 @@ f{T<:Number}(x::Nullable{T}) = Nullable(5 * x.value, x.isnull)

#-----------------------------------------------------------------------------#

function profile_mapreduce(A, X, D)
function profile_reduce_methods()
A = rand(5_000_000)
B = rand(Bool, 5_000_000)
X = NullableArray(A)
Y = NullableArray(A, B)
D = DataArray(A)
E = DataArray(A, B)

profile_mapreduce(A, X, Y, D, E)
println()
profile_reduce(A, X, Y, D, E)
println()

for method in (
sum,
prod,
minimum,
maximum,
)
(method)(A)
(method)(X)
(method)(D)
println("Method: $method(A) (0 missing entries)")
print(" for Array{Float64}: ")
@time((method)(A))
print(" for NullableArray{Float64}: ")
@time((method)(X))
print(" for DataArray{Float64}: ")
@time((method)(D))

(method)(f, A)
(method)(f, X)
(method)(f, D)
println("Method: $method(f, A) (0 missing entries)")
print(" for Array{Float64}: ")
@time((method)(f, A))
print(" for NullableArray{Float64}: ")
@time((method)(f, X))
print(" for DataArray{Float64}: ")
@time((method)(f, D))
end
println()

for method in (
sum,
prod,
minimum,
maximum,
)
(method)(Y)
println("Method: $method(A) (~half missing entries, skip=false)")
print(" for NullableArray{Float64}: ")
@time((method)(Y))
(method)(E)
print(" for DataArray{Float64}: ")
@time((method)(E))

(method)(f, Y)
println("Method: $method(f, A) (~half missing entries, skip=false)")
print(" for NullableArray{Float64}: ")
@time((method)(f, Y))
if in(method, (sum, prod))
(method)(f, E)
print(" for DataArray{Float64}: ")
@time((method)(f, E))
else
println(" $method(f, A::DataArray) currently incurs error")
end
end
println()

for method in (
sum,
prod,
minimum,
maximum,
)
(method)(Y, skipnull=true)
println("Method: $method(A) (~half missing entries, skip=true)")
print(" for NullableArray{Float64}: ")
@time((method)(Y, skipnull=true))
(method)(E, skipna=true)
print(" for DataArray{Float64}: ")
@time((method)(E, skipna=true))

(method)(f, Y, skipnull=true)
println("Method: $method(f, A) (~half missing entries, skip=true)")
print(" for NullableArray{Float64}: ")
@time((method)(f, Y, skipnull=true))
(method)(f, E, skipna=true)
print(" for DataArray{Float64}: ")
@time((method)(f, E, skipna=true))
end
println()

for method in (
sumabs,
sumabs2
)
(method)(A)
(method)(X)
(method)(D)
println("Method: $method(A) (0 missing entries)")
print(" for Array{Float64}: ")
@time((method)(A))
print(" for NullableArray{Float64}: ")
@time((method)(X))
print(" for DataArray{Float64}: ")
@time((method)(D))

(method)(f, A)
(method)(f, X)
(method)(f, D)
println("Method: $method(f, A) (0 missing entries)")
print(" for Array{Float64}: ")
@time((method)(f, A))
print(" for NullableArray{Float64}: ")
@time((method)(f, X))
print(" for DataArray{Float64}: ")
@time((method)(f, D))
end

for method in (
mean,
var,
)
(method)(A)
(method)(X)
(method)(D)
println("Method: $method(A) (0 missing entries)")
print(" for Array{Float64}: ")
@time((method)(A))
print(" for NullableArray{Float64}: ")
@time((method)(X))
print(" for DataArray{Float64}: ")
@time((method)(D))

(method)(f, A)
(method)(f, X)
(method)(f, D)
println("Method: $method(f, A) (0 missing entries)")
print(" for Array{Float64}: ")
@time((method)(f, A))
print(" for NullableArray{Float64}: ")
@time((method)(f, X))
print(" for DataArray{Float64}: ")
@time((method)(f, D))
end
end


function profile_mapreduce(A, X, Y, D, E)
println("Method: mapreduce(f, op, A) (0 missing entries)")
mapreduce(f, Base.(:+), A)
mapreduce(f, Base.(:+), X)
mapreduce(f, Base.(:+), D)
println("Method: mapreduce(f, op, A)")
print(" for Array{Float64}: ")
@time(mapreduce(f, Base.(:+), A))
mapreduce(f, Base.(:+), X)
print(" for NullableArray{Float64}: ")
@time(mapreduce(f, Base.(:+), X))
mapreduce(f, Base.(:+), D)
print(" for DataArray{Float64}: ")
@time(mapreduce(f, Base.(:+), D))

println("Method: mapreduce(f, op, A) (~half missing entries, skip=false)")
mapreduce(f, Base.(:+), Y)
print(" for NullableArray{Float64}: ")
@time(mapreduce(f, Base.(:+), Y))
mapreduce(f, Base.(:+), E)
print(" for DataArray{Float64}: ")
@time(mapreduce(f, Base.(:+), E))

println("Method: mapreduce(f, op, A) (~half missing entries, skip=true)")
mapreduce(f, Base.(:+), Y, skipnull=true)
print(" for NullableArray{Float64}: ")
@time(mapreduce(f, Base.(:+), Y, skipnull=true))
mapreduce(f, Base.(:+), E, skipna=true)
print(" for DataArray{Float64}: ")
@time(mapreduce(f, Base.(:+), E, skipna=true))
end

function profile_reduce(A, X, D)
function profile_reduce(A, X, Y, D, E)
println("Method: reduce(f, op, A) (0 missing entries)")
reduce(Base.(:+), A)
reduce(Base.(:+), X)
reduce(Base.(:+), D)
println("Method: reduce(op, A)")
print(" for Array{Float64}: ")
@time(reduce(Base.(:+), A))
reduce(Base.(:+), X)
print(" for NullableArray{Float64}: ")
@time(reduce(Base.(:+), X))
reduce(Base.(:+), D)
print(" for DataArray{Float64}: ")
@time(reduce(Base.(:+), D))

println("Method: reduce(f, op, A) (~half missing entries, skip=false)")
reduce(Base.(:+), Y)
print(" for NullableArray{Float64}: ")
@time(reduce(Base.(:+), Y))
reduce(Base.(:+), E)
print(" for DataArray{Float64}: ")
@time(reduce(Base.(:+), E))

println("Method: reduce(f, op, A) (~half missing entries, skip=true)")
reduce(Base.(:+), Y, skipnull=true)
print(" for NullableArray{Float64}: ")
@time(reduce(Base.(:+), Y, skipnull=true))
reduce(Base.(:+), E, skipna=true)
print(" for DataArray{Float64}: ")
@time(reduce(Base.(:+), E, skipna=true))
end

function profile_sum1(A, X, D)
Expand Down
119 changes: 119 additions & 0 deletions perf/statistics.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
using NullableArrays
using DataArrays
using StatsBase

srand(1)
N = 5_000_000

function profile_stats_methods()
A = rand(N)
B = rand(Bool, N)
X = NullableArray(A)
Y = NullableArray(A, B)
D = DataArray(A)
E = DataArray(A, B)

profile_mean(A, X, D, Y, E)
profile_var(A, X, D, Y, E)
nothing
end

function profile_mean(A, X, D, Y, E)
W = WeightVec(rand(N))

mean(A)
println("Method: mean(A) (0 missing entries)")
print(" for Array{Float64}: ")
@time(mean(A))
mean(X)
print(" for NullableArray{Float64}: ")
@time(mean(X))
mean(D)
print(" for DataArray{Float64}: ")
@time(mean(D))
println()

mean(Y, skipnull=false)
println("Method: mean(A) (~half missing entries, skip=false)")
print(" for NullableArray{Float64}: ")
@time(mean(Y, skipnull=false))
mean(E, skipna=false)
print(" for DataArray{Float64}: ")
@time(mean(E, skipna=false))
println()

mean(Y, skipnull=true)
println("Method: mean(A) (~half missing entries, skip=true)")
print(" for NullableArray{Float64}: ")
@time(mean(Y, skipnull=true))
mean(E, skipna=true)
print(" for DataArray{Float64}: ")
@time(mean(E, skipna=true))
println()

mean(A, W)
println("Method: mean(A, w::WeightVec{W, V}) (0 missing entries, V<:Array)")
print(" for Array{Float64}: ")
@time(mean(A, W))
mean(X, W)
print(" for NullableArray{Float64}: ")
@time(mean(X, W))
mean(D, W)
print(" for DataArray{Float64}: ")
@time(mean(D, W))
println()

println("Method: mean(A, W::WeightVec) (~half missing entries, skip=false)")
mean(Y, W, skipnull=false)
print(" for NullableArray{Float64}: ")
@time(mean(Y, W, skipnull=false))
mean(E, W, skipna=false)
print(" for DataArray{Float64}: ")
@time(mean(E, W, skipna=false))
println()

println("Method: mean(A, W::WeightVec) (~half missing entries, skip=true)")
mean(Y, W, skipnull=true)
print(" for NullableArray{Float64}: ")
@time(mean(Y, W, skipnull=true))
mean(E, W, skipna=true)
print(" for DataArray{Float64}: ")
@time(mean(E, W, skipna=true))
println()
end

function profile_var(A, X, D, Y, E)
mu = mean(A)
mu2 = mean(X, skipnull=true)

varm(A, mu)
println("Method: varm(A, mu) (0 missing entries)")
print(" for Array{Float64}: ")
@time(varm(A, mu))
println(" ", varm(A, mu))
varm(X, mu)
print(" for NullableArray{Float64}: ")
@time(varm(X, mu))
varm(D, mu)
print(" for DataArray{Float64}: ")
@time(varm(D, mu))
println()

varm(Y, mu; skipnull=false)
println("Method: varm(A, mu) (~half missing entries, skip=false)")
print(" for NullableArray{Float64}: ")
@time(varm(Y, mu; skipnull=false))
varm(E, mu; skipna=false)
print(" for DataArray{Float64}: ")
@time(varm(E, mu; skipna=false))
println()

varm(Y, mu; skipnull=true)
println("Method: varm(A, mu) (~half missing entries, skip=true)")
print(" for NullableArray{Float64}: ")
@time(varm(Y, mu; skipnull=true))
varm(E, mu; skipna=true)
print(" for DataArray{Float64}: ")
@time(varm(E, mu; skipna=true))
println()
end
1 change: 1 addition & 0 deletions src/NullableArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ module NullableArrays
include("operators.jl")
include("broadcast.jl")
include("mapreduce.jl")
include("statistics.jl")
end
Loading

0 comments on commit 76309a0

Please sign in to comment.