Skip to content
This repository has been archived by the owner on May 4, 2019. It is now read-only.

Commit

Permalink
Move StatsBase describe code from DataTables to NullableArrays (#191)
Browse files Browse the repository at this point in the history
Makes minor adjustments to make former DataTables printing more
like StatsBase.
  • Loading branch information
cjprybol authored and nalimilan committed May 18, 2017
1 parent 79c31e7 commit 212b3b8
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 0 deletions.
1 change: 1 addition & 0 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
julia 0.5
Compat 0.19.0
Reexport
StatsBase 0.14
2 changes: 2 additions & 0 deletions src/NullableArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module NullableArrays
using Compat
using Compat.view
using Reexport
using StatsBase
@reexport using Base.Cartesian

export NullableArray,
Expand Down Expand Up @@ -33,5 +34,6 @@ include("reduce.jl")
include("show.jl")
include("subarray.jl")
include("deprecated.jl")
include("utils.jl")

end
53 changes: 53 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
StatsBase.describe(X::NullableVector) = StatsBase.describe(STDOUT, X)

function StatsBase.describe{T<:Real}(io::IO, X::NullableVector{T})
nullcount = sum(X.isnull)
pnull = 100nullcount/length(X)
if pnull != 100 # describe will fail if dropnull returns an empty vector
describe(io, dropnull(X))
else
println(io, "Summary Stats:")
println(io, "Type: $(eltype(X))")
end
println(io, "Number Missing: $(nullcount)")
@printf(io, "%% Missing: %.6f\n", pnull)
return
end

function StatsBase.describe(io::IO, X::NullableVector)
nullcount = sum(X.isnull)
pnull = 100nullcount/length(X)
println(io, "Summary Stats:")
println(io, "Length: $(length(X))")
println(io, "Type: $(eltype(X))")
println(io, "Number Unique: $(length(unique(X)))")
println(io, "Number Missing: $(nullcount)")
@printf(io, "%% Missing: %.6f\n", pnull)
return
end

function StatsBase.describe{T<:Real}(io::IO, X::AbstractVector{Nullable{T}})
nullcount = sum(_isnull, X)
pnull = 100nullcount/length(X)
if pnull != 100 # describe will fail if dropnull returns an empty vector
describe(io, dropnull(X))
else
println(io, "Summary Stats:")
println(io, "Type: $(eltype(X))")
end
println(io, "Number Missing: $(nullcount)")
@printf(io, "%% Missing: %.6f\n", pnull)
return
end

function StatsBase.describe{T<:Nullable}(io::IO, X::AbstractVector{T})
nullcount = sum(_isnull, X)
pnull = 100nullcount/length(X)
println(io, "Summary Stats:")
println(io, "Length: $(length(X))")
println(io, "Type: $(eltype(X))")
println(io, "Number Unique: $(length(unique(X)))")
println(io, "Number Missing: $(nullcount)")
@printf(io, "%% Missing: %.6f\n", pnull)
return
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ my_tests = [
"operators.jl",
"subarray.jl",
"show.jl",
"utils.jl"
]

println("Running tests:")
Expand Down
110 changes: 110 additions & 0 deletions test/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
module TestUtils
using StatsBase
using Base.Test
using NullableArrays

@testset "describe" begin
io = IOBuffer()
describe(io, NullableArray(1:10))
@test String(take!(io)) == """
Summary Stats:
Mean: 5.500000
Minimum: 1.000000
1st Quartile: 3.250000
Median: 5.500000
3rd Quartile: 7.750000
Maximum: 10.000000
Length: 10
Type: $Int
Number Missing: 0
% Missing: 0.000000
"""
describe(io, NullableArray([1, Nullable()]))
@test String(take!(io)) == """
Summary Stats:
Mean: 1.000000
Minimum: 1.000000
1st Quartile: 1.000000
Median: 1.000000
3rd Quartile: 1.000000
Maximum: 1.000000
Length: 1
Type: $Int
Number Missing: 1
% Missing: 50.000000
"""
describe(io, NullableArray(["s"]))
@test String(take!(io)) == """
Summary Stats:
Length: 1
Type: Nullable{String}
Number Unique: 1
Number Missing: 0
% Missing: 0.000000
"""
describe(io, NullableArray(["s", Nullable()]))
@test String(take!(io)) == """
Summary Stats:
Length: 2
Type: Nullable{String}
Number Unique: 2
Number Missing: 1
% Missing: 50.000000
"""
describe(io, ["s", Nullable()])
@test String(take!(io)) == """
Summary Stats:
Length: 2
Type: Nullable{String}
Number Unique: 2
Number Missing: 1
% Missing: 50.000000
"""
describe(io, [1, Nullable()])
@test String(take!(io)) == """
Summary Stats:
Mean: 1.000000
Minimum: 1.000000
1st Quartile: 1.000000
Median: 1.000000
3rd Quartile: 1.000000
Maximum: 1.000000
Length: 1
Type: $Int
Number Missing: 1
% Missing: 50.000000
"""
describe(io, NullableArray{Any}(5))
@test String(take!(io)) == """
Summary Stats:
Length: 5
Type: Nullable{Any}
Number Unique: 1
Number Missing: 5
% Missing: 100.000000
"""
describe(io, NullableArray{Float64}(5))
@test String(take!(io)) == """
Summary Stats:
Type: Nullable{Float64}
Number Missing: 5
% Missing: 100.000000
"""
describe(io, fill(Nullable{String}(), 5))
@test String(take!(io)) == """
Summary Stats:
Length: 5
Type: Nullable{String}
Number Unique: 1
Number Missing: 5
% Missing: 100.000000
"""
describe(io, fill(Nullable{Float64}(), 5))
@test String(take!(io)) == """
Summary Stats:
Type: Nullable{Float64}
Number Missing: 5
% Missing: 100.000000
"""
end
end

0 comments on commit 212b3b8

Please sign in to comment.