diff --git a/REQUIRE b/REQUIRE index 92242af..447a224 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,4 @@ julia 0.5 Compat 0.19.0 Reexport +StatsBase 0.14 diff --git a/src/NullableArrays.jl b/src/NullableArrays.jl index b189776..c41b9da 100644 --- a/src/NullableArrays.jl +++ b/src/NullableArrays.jl @@ -5,6 +5,7 @@ module NullableArrays using Compat using Compat.view using Reexport +using StatsBase @reexport using Base.Cartesian export NullableArray, @@ -33,5 +34,6 @@ include("reduce.jl") include("show.jl") include("subarray.jl") include("deprecated.jl") +include("utils.jl") end diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 0000000..fc52c83 --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,53 @@ +StatsBase.describe(X::NullableVector) = StatsBase.describe(STDOUT, X) + +function StatsBase.describe{T<:Real}(io::IO, X::NullableVector{T}) + nullcount = sum(X.isnull) + pnull = 100nullcount/length(X) + if pnull != 100 # describe will fail if dropnull returns an empty vector + describe(io, dropnull(X)) + else + println(io, "Summary Stats:") + println(io, "Type: $(eltype(X))") + end + println(io, "Number Missing: $(nullcount)") + @printf(io, "%% Missing: %.6f\n", pnull) + return +end + +function StatsBase.describe(io::IO, X::NullableVector) + nullcount = sum(X.isnull) + pnull = 100nullcount/length(X) + println(io, "Summary Stats:") + println(io, "Length: $(length(X))") + println(io, "Type: $(eltype(X))") + println(io, "Number Unique: $(length(unique(X)))") + println(io, "Number Missing: $(nullcount)") + @printf(io, "%% Missing: %.6f\n", pnull) + return +end + +function StatsBase.describe{T<:Real}(io::IO, X::AbstractVector{Nullable{T}}) + nullcount = sum(_isnull, X) + pnull = 100nullcount/length(X) + if pnull != 100 # describe will fail if dropnull returns an empty vector + describe(io, dropnull(X)) + else + println(io, "Summary Stats:") + println(io, "Type: $(eltype(X))") + end + println(io, "Number Missing: $(nullcount)") + @printf(io, "%% Missing: %.6f\n", pnull) + return +end + +function StatsBase.describe{T<:Nullable}(io::IO, X::AbstractVector{T}) + nullcount = sum(_isnull, X) + pnull = 100nullcount/length(X) + println(io, "Summary Stats:") + println(io, "Length: $(length(X))") + println(io, "Type: $(eltype(X))") + println(io, "Number Unique: $(length(unique(X)))") + println(io, "Number Missing: $(nullcount)") + @printf(io, "%% Missing: %.6f\n", pnull) + return +end diff --git a/test/runtests.jl b/test/runtests.jl index 55132cc..d7301f0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,7 @@ my_tests = [ "operators.jl", "subarray.jl", "show.jl", + "utils.jl" ] println("Running tests:") diff --git a/test/utils.jl b/test/utils.jl new file mode 100644 index 0000000..16b7ac4 --- /dev/null +++ b/test/utils.jl @@ -0,0 +1,110 @@ +module TestUtils + using StatsBase + using Base.Test + using NullableArrays + + @testset "describe" begin + io = IOBuffer() + describe(io, NullableArray(1:10)) + @test String(take!(io)) == """ + Summary Stats: + Mean: 5.500000 + Minimum: 1.000000 + 1st Quartile: 3.250000 + Median: 5.500000 + 3rd Quartile: 7.750000 + Maximum: 10.000000 + Length: 10 + Type: $Int + Number Missing: 0 + % Missing: 0.000000 + """ + describe(io, NullableArray([1, Nullable()])) + @test String(take!(io)) == """ + Summary Stats: + Mean: 1.000000 + Minimum: 1.000000 + 1st Quartile: 1.000000 + Median: 1.000000 + 3rd Quartile: 1.000000 + Maximum: 1.000000 + Length: 1 + Type: $Int + Number Missing: 1 + % Missing: 50.000000 + """ + describe(io, NullableArray(["s"])) + @test String(take!(io)) == """ + Summary Stats: + Length: 1 + Type: Nullable{String} + Number Unique: 1 + Number Missing: 0 + % Missing: 0.000000 + """ + describe(io, NullableArray(["s", Nullable()])) + @test String(take!(io)) == """ + Summary Stats: + Length: 2 + Type: Nullable{String} + Number Unique: 2 + Number Missing: 1 + % Missing: 50.000000 + """ + describe(io, ["s", Nullable()]) + @test String(take!(io)) == """ + Summary Stats: + Length: 2 + Type: Nullable{String} + Number Unique: 2 + Number Missing: 1 + % Missing: 50.000000 + """ + describe(io, [1, Nullable()]) + @test String(take!(io)) == """ + Summary Stats: + Mean: 1.000000 + Minimum: 1.000000 + 1st Quartile: 1.000000 + Median: 1.000000 + 3rd Quartile: 1.000000 + Maximum: 1.000000 + Length: 1 + Type: $Int + Number Missing: 1 + % Missing: 50.000000 + """ + describe(io, NullableArray{Any}(5)) + @test String(take!(io)) == """ + Summary Stats: + Length: 5 + Type: Nullable{Any} + Number Unique: 1 + Number Missing: 5 + % Missing: 100.000000 + """ + describe(io, NullableArray{Float64}(5)) + @test String(take!(io)) == """ + Summary Stats: + Type: Nullable{Float64} + Number Missing: 5 + % Missing: 100.000000 + """ + describe(io, fill(Nullable{String}(), 5)) + @test String(take!(io)) == """ + Summary Stats: + Length: 5 + Type: Nullable{String} + Number Unique: 1 + Number Missing: 5 + % Missing: 100.000000 + """ + describe(io, fill(Nullable{Float64}(), 5)) + @test String(take!(io)) == """ + Summary Stats: + Type: Nullable{Float64} + Number Missing: 5 + % Missing: 100.000000 + """ + end +end