From 40c824ccd746c03e02a055d826947ce5097fadb3 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Wed, 14 May 2014 15:37:53 +0100 Subject: [PATCH] deprecate histogram functionality --- base/deprecated.jl | 144 +++++++++++++++++++++++++++++++++++++++++++ base/statistics.jl | 147 -------------------------------------------- doc/stdlib/math.rst | 49 --------------- test/arrayops.jl | 4 -- test/parallel.jl | 3 +- test/random.jl | 25 ++++---- test/sorting.jl | 4 +- test/statistics.jl | 26 -------- 8 files changed, 161 insertions(+), 241 deletions(-) diff --git a/base/deprecated.jl b/base/deprecated.jl index 4eaf73a86db31..3622de2fc657c 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -290,3 +290,147 @@ end # 8898 @deprecate precision(x::DateTime) eps(x) @deprecate precision(x::Date) eps(x) + +# Histogram: moved to StatsBase (#6842) +function histrange{T<:FloatingPoint,N}(v::AbstractArray{T,N}, n::Integer) + nv = length(v) + if nv == 0 && n < 0 + throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n")) + elseif nv > 0 && n < 1 + throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n")) + end + if nv == 0 + return 0.0:1.0:0.0 + end + lo, hi = extrema(v) + if hi == lo + step = 1.0 + else + bw = (hi - lo) / n + e = 10.0^floor(log10(bw)) + r = bw / e + if r <= 2 + step = 2*e + elseif r <= 5 + step = 5*e + else + step = 10*e + end + end + start = step*(ceil(lo/step)-1) + nm1 = ceil(Int,(hi - start)/step) + start:step:(start + nm1*step) +end + +function histrange{T<:Integer,N}(v::AbstractArray{T,N}, n::Integer) + nv = length(v) + if nv == 0 && n < 0 + throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n")) + elseif nv > 0 && n < 1 + throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n")) + end + if nv == 0 + return 0:1:0 + end + lo, hi = extrema(v) + if hi == lo + step = 1 + else + bw = (hi - lo) / n + e = 10^max(0,floor(Int,log10(bw))) + r = bw / e + if r <= 1 + step = e + elseif r <= 2 + step = 2*e + elseif r <= 5 + step = 5*e + else + step = 10*e + end + end + start = step*(ceil(lo/step)-1) + nm1 = ceil(Int,(hi - start)/step) + start:step:(start + nm1*step) +end + +## midpoints of intervals +midpoints(r::Range) = (depwarn("midpoints(x) is deprecated. Method now in StatsBase.jl"); r[1:length(r)-1] + 0.5*step(r)) +midpoints(v::AbstractVector) = (depwarn("midpoints(x) is deprecated. Method now in StatsBase.jl"); [0.5*(v[i] + v[i+1]) for i in 1:length(v)-1]) + +## hist ## +function sturges(n) # Sturges' formula + n==0 && return one(n) + ceil(Int,log2(n))+1 +end + +function hist!{HT}(h::AbstractArray{HT}, v::AbstractVector, edg::AbstractVector; init::Bool=true) + depwarn("hist(...) and hist!(...) are deprecated. Use fit(Histogram,...) in StatsBase.jl instead.") + n = length(edg) - 1 + length(h) == n || throw(DimensionMismatch("length(histogram) must equal length(edges) - 1")) + if init + fill!(h, zero(HT)) + end + for x in v + i = searchsortedfirst(edg, x)-1 + if 1 <= i <= n + h[i] += 1 + end + end + edg, h +end + +hist(v::AbstractVector, edg::AbstractVector) = hist!(Array(Int, length(edg)-1), v, edg) +hist(v::AbstractVector, n::Integer) = hist(v,histrange(v,n)) +hist(v::AbstractVector) = hist(v,sturges(length(v))) + +function hist!{HT}(H::AbstractArray{HT,2}, A::AbstractMatrix, edg::AbstractVector; init::Bool=true) + depwarn("hist(...) and hist!(...) are deprecated. Use fit(Histogram,...) in StatsBase.jl instead.") + m, n = size(A) + sH = size(H) + sE = (length(edg)-1,n) + sH == sE || throw(DimensionMismatch("incorrect size of histogram")) + if init + fill!(H, zero(HT)) + end + for j = 1:n + hist!(sub(H, :, j), sub(A, :, j), edg) + end + edg, H +end + +hist(A::AbstractMatrix, edg::AbstractVector) = hist!(Array(Int, length(edg)-1, size(A,2)), A, edg) +hist(A::AbstractMatrix, n::Integer) = hist(A,histrange(A,n)) +hist(A::AbstractMatrix) = hist(A,sturges(size(A,1))) + + +## hist2d +function hist2d!{HT}(H::AbstractArray{HT,2}, v::AbstractMatrix, + edg1::AbstractVector, edg2::AbstractVector; init::Bool=true) + depwarn("hist2d(...) is deprecated. Use fit(Histogram,...) in StatsBase.jl instead.") + size(v,2) == 2 || throw(DimensionMismatch("hist2d requires an Nx2 matrix")) + n = length(edg1) - 1 + m = length(edg2) - 1 + size(H) == (n, m) || throw(DimensionMismatch("incorrect size of histogram")) + if init + fill!(H, zero(HT)) + end + for i = 1:size(v,1) + x = searchsortedfirst(edg1, v[i,1]) - 1 + y = searchsortedfirst(edg2, v[i,2]) - 1 + if 1 <= x <= n && 1 <= y <= m + @inbounds H[x,y] += 1 + end + end + edg1, edg2, H +end + +hist2d(v::AbstractMatrix, edg1::AbstractVector, edg2::AbstractVector) = + hist2d!(Array(Int, length(edg1)-1, length(edg2)-1), v, edg1, edg2) + +hist2d(v::AbstractMatrix, edg::AbstractVector) = hist2d(v, edg, edg) + +hist2d(v::AbstractMatrix, n1::Integer, n2::Integer) = + hist2d(v, histrange(sub(v,:,1),n1), histrange(sub(v,:,2),n2)) +hist2d(v::AbstractMatrix, n::Integer) = hist2d(v, n, n) +hist2d(v::AbstractMatrix) = hist2d(v, sturges(size(v,1))) diff --git a/base/statistics.jl b/base/statistics.jl index a2935afe9e27b..adeceeb584cbd 100644 --- a/base/statistics.jl +++ b/base/statistics.jl @@ -506,150 +506,3 @@ function bound_quantiles(qs::AbstractVector) end [min(1,max(0,q)) for q = qs] end - - - -##### histogram ##### - -## nice-valued ranges for histograms - -function histrange{T<:FloatingPoint,N}(v::AbstractArray{T,N}, n::Integer) - nv = length(v) - if nv == 0 && n < 0 - throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n")) - elseif nv > 0 && n < 1 - throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n")) - end - if nv == 0 - return 0.0:1.0:0.0 - end - lo, hi = extrema(v) - if hi == lo - step = 1.0 - else - bw = (hi - lo) / n - e = 10.0^floor(log10(bw)) - r = bw / e - if r <= 2 - step = 2*e - elseif r <= 5 - step = 5*e - else - step = 10*e - end - end - start = step*(ceil(lo/step)-1) - nm1 = ceil(Int,(hi - start)/step) - start:step:(start + nm1*step) -end - -function histrange{T<:Integer,N}(v::AbstractArray{T,N}, n::Integer) - nv = length(v) - if nv == 0 && n < 0 - throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n")) - elseif nv > 0 && n < 1 - throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n")) - end - if nv == 0 - return 0:1:0 - end - lo, hi = extrema(v) - if hi == lo - step = 1 - else - bw = (hi - lo) / n - e = 10^max(0,floor(Int,log10(bw))) - r = bw / e - if r <= 1 - step = e - elseif r <= 2 - step = 2*e - elseif r <= 5 - step = 5*e - else - step = 10*e - end - end - start = step*(ceil(lo/step)-1) - nm1 = ceil(Int,(hi - start)/step) - start:step:(start + nm1*step) -end - -## midpoints of intervals -midpoints(r::Range) = r[1:length(r)-1] + 0.5*step(r) -midpoints(v::AbstractVector) = [0.5*(v[i] + v[i+1]) for i in 1:length(v)-1] - -## hist ## -function sturges(n) # Sturges' formula - n==0 && return one(n) - ceil(Int,log2(n))+1 -end - -function hist!{HT}(h::AbstractArray{HT}, v::AbstractVector, edg::AbstractVector; init::Bool=true) - n = length(edg) - 1 - length(h) == n || throw(DimensionMismatch("length(histogram) must equal length(edges) - 1")) - if init - fill!(h, zero(HT)) - end - for x in v - i = searchsortedfirst(edg, x)-1 - if 1 <= i <= n - h[i] += 1 - end - end - edg, h -end - -hist(v::AbstractVector, edg::AbstractVector) = hist!(Array(Int, length(edg)-1), v, edg) -hist(v::AbstractVector, n::Integer) = hist(v,histrange(v,n)) -hist(v::AbstractVector) = hist(v,sturges(length(v))) - -function hist!{HT}(H::AbstractArray{HT,2}, A::AbstractMatrix, edg::AbstractVector; init::Bool=true) - m, n = size(A) - sH = size(H) - sE = (length(edg)-1,n) - sH == sE || throw(DimensionMismatch("incorrect size of histogram")) - if init - fill!(H, zero(HT)) - end - for j = 1:n - hist!(sub(H, :, j), sub(A, :, j), edg) - end - edg, H -end - -hist(A::AbstractMatrix, edg::AbstractVector) = hist!(Array(Int, length(edg)-1, size(A,2)), A, edg) -hist(A::AbstractMatrix, n::Integer) = hist(A,histrange(A,n)) -hist(A::AbstractMatrix) = hist(A,sturges(size(A,1))) - - -## hist2d -function hist2d!{HT}(H::AbstractArray{HT,2}, v::AbstractMatrix, - edg1::AbstractVector, edg2::AbstractVector; init::Bool=true) - size(v,2) == 2 || throw(DimensionMismatch("hist2d requires an Nx2 matrix")) - n = length(edg1) - 1 - m = length(edg2) - 1 - size(H) == (n, m) || throw(DimensionMismatch("incorrect size of histogram")) - if init - fill!(H, zero(HT)) - end - for i = 1:size(v,1) - x = searchsortedfirst(edg1, v[i,1]) - 1 - y = searchsortedfirst(edg2, v[i,2]) - 1 - if 1 <= x <= n && 1 <= y <= m - @inbounds H[x,y] += 1 - end - end - edg1, edg2, H -end - -hist2d(v::AbstractMatrix, edg1::AbstractVector, edg2::AbstractVector) = - hist2d!(Array(Int, length(edg1)-1, length(edg2)-1), v, edg1, edg2) - -hist2d(v::AbstractMatrix, edg::AbstractVector) = hist2d(v, edg, edg) - -hist2d(v::AbstractMatrix, n1::Integer, n2::Integer) = - hist2d(v, histrange(sub(v,:,1),n1), histrange(sub(v,:,2),n2)) -hist2d(v::AbstractMatrix, n::Integer) = hist2d(v, n, n) -hist2d(v::AbstractMatrix) = hist2d(v, sturges(size(v,1))) - diff --git a/doc/stdlib/math.rst b/doc/stdlib/math.rst index cf78075f69efe..e30eddcce48e3 100644 --- a/doc/stdlib/math.rst +++ b/doc/stdlib/math.rst @@ -1239,55 +1239,6 @@ Statistics Like ``median``, but may overwrite the input vector. -.. function:: hist(v[, n]) -> e, counts - - Compute the histogram of ``v``, optionally using approximately ``n`` - bins. The return values are a range ``e``, which correspond to the - edges of the bins, and ``counts`` containing the number of elements of - ``v`` in each bin. - Note: Julia does not ignore ``NaN`` values in the computation. - -.. function:: hist(v, e) -> e, counts - - Compute the histogram of ``v`` using a vector/range ``e`` as the edges for - the bins. The result will be a vector of length ``length(e) - 1``, such that the - element at location ``i`` satisfies ``sum(e[i] .< v .<= e[i+1])``. - Note: Julia does not ignore ``NaN`` values in the computation. - -.. function:: hist!(counts, v, e) -> e, counts - - Compute the histogram of ``v``, using a vector/range ``e`` as the edges for the bins. - This function writes the resultant counts to a pre-allocated array ``counts``. - -.. function:: hist2d(M, e1, e2) -> (edge1, edge2, counts) - - Compute a "2d histogram" of a set of N points specified by N-by-2 matrix ``M``. - Arguments ``e1`` and ``e2`` are bins for each dimension, specified either as - integer bin counts or vectors of bin edges. The result is a tuple of - ``edge1`` (the bin edges used in the first dimension), ``edge2`` (the bin edges - used in the second dimension), and ``counts``, a histogram matrix of size - ``(length(edge1)-1, length(edge2)-1)``. - Note: Julia does not ignore ``NaN`` values in the computation. - -.. function:: hist2d!(counts, M, e1, e2) -> (e1, e2, counts) - - Compute a "2d histogram" with respect to the bins delimited by the edges given - in ``e1`` and ``e2``. This function writes the results to a pre-allocated - array ``counts``. - -.. function:: histrange(v, n) - - Compute *nice* bin ranges for the edges of a histogram of ``v``, using - approximately ``n`` bins. The resulting step sizes will be 1, 2 or 5 - multiplied by a power of 10. - Note: Julia does not ignore ``NaN`` values in the computation. - -.. function:: midpoints(e) - - Compute the midpoints of the bins with edges ``e``. The result is a - vector/range of length ``length(e) - 1``. - Note: Julia does not ignore ``NaN`` values in the computation. - .. function:: quantile(v, p) Compute the quantiles of a vector ``v`` at a specified set of probability values ``p``. diff --git a/test/arrayops.jl b/test/arrayops.jl index 485c2d2986045..b82e688db0781 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -613,13 +613,9 @@ B = cat(3, 1, 2, 3) begin local a,h,i a = rand(5,5) - h = mapslices(v -> hist(v,0:0.1:1)[2], a, 1) - H = mapslices(v -> hist(v,0:0.1:1)[2], a, 2) s = mapslices(sort, a, [1]) S = mapslices(sort, a, [2]) for i = 1:5 - @test h[:,i] == hist(a[:,i],0:0.1:1)[2] - @test vec(H[i,:]) == hist(vec(a[i,:]),0:0.1:1)[2] @test s[:,i] == sort(a[:,i]) @test vec(S[i,:]) == sort(vec(a[i,:])) end diff --git a/test/parallel.jl b/test/parallel.jl index 23c7873d67126..40e21eb1ce743 100644 --- a/test/parallel.jl +++ b/test/parallel.jl @@ -167,7 +167,8 @@ map!(x->1, d) # Test @parallel load balancing - all processors should get either M or M+1 # iterations out of the loop range for some M. -workloads = hist(@parallel((a,b)->[a;b], for i=1:7; myid(); end), nprocs())[2] +ids = @parallel((a,b)->[a;b], for i=1:7; myid(); end) +workloads = Int[sum(ids .== i) for i in 1:nprocs()] @test maximum(workloads) - minimum(workloads) <= 1 # @parallel reduction should work even with very short ranges diff --git a/test/random.jl b/test/random.jl index 04cec4007f55f..d8bb37c4dce13 100644 --- a/test/random.jl +++ b/test/random.jl @@ -300,15 +300,16 @@ for rng in ([], [MersenneTwister()], [RandomDevice()]) end # test uniform distribution of floats -let bins = [prevfloat(0.0):0.25:1.0;] - for rng in [srand(MersenneTwister()), RandomDevice()] - for T in [Float16,Float32,Float64] - # array version - _, counts = hist(rand(rng, T, 2000), bins) - @test minimum(counts) > 300 # should fail with proba < 1e-26 - # scalar version - _, counts = hist([rand(rng, T) for i in 1:2000], bins) - @test minimum(counts) > 300 - end - end -end +# TODO: replace hist call +## let bins = [prevfloat(0.0):0.25:1.0;] +## for rng in [srand(MersenneTwister()), RandomDevice()] +## for T in [Float16,Float32,Float64] +## # array version +## _, counts = hist(rand(rng, T, 2000), bins) +## @test minimum(counts) > 300 # should fail with proba < 1e-26 +## # scalar version +## _, counts = hist([rand(rng, T) for i in 1:2000], bins) +## @test minimum(counts) > 300 +## end +## end +## end diff --git a/test/sorting.jl b/test/sorting.jl index e57740c1de02b..875965c8ff992 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -153,7 +153,7 @@ srand(0xdeadbeef) for n in [0:10; 100; 101; 1000; 1001] r = -5:5 v = rand(r,n) - h = hist(v,r) + h = [sum(v .== x) for x in r] for rev in [false,true] # insertion sort (stable) as reference @@ -161,7 +161,7 @@ for n in [0:10; 100; 101; 1000; 1001] @test pi == sortperm(float(v), alg=InsertionSort, rev=rev) @test isperm(pi) si = v[pi] - @test hist(si,r) == h + @test [sum(si .== x) for x in r] == h @test issorted(si, rev=rev) @test all(issorted,[pi[si.==x] for x in r]) c = copy(v) diff --git a/test/statistics.jl b/test/statistics.jl index 55736d3187d55..aaf13f28903b0 100644 --- a/test/statistics.jl +++ b/test/statistics.jl @@ -243,32 +243,6 @@ for vd in [1, 2], zm in [true, false] @test_approx_eq C Cxy end - - -# test hist - -@test sum(hist([1,2,3])[2]) == 3 -@test hist(Union()[])[2] == [] -@test hist([1])[2] == [1] -@test hist([1,2,3],[0,2,4]) == ([0,2,4],[2,1]) -@test hist([1,2,3],0:2:4) == (0:2:4,[2,1]) -@test all(hist([1:100;]/100,0.0:0.01:1.0)[2] .==1) -@test hist([1,1,1,1,1])[2][1] == 5 -@test sum(hist2d(rand(100, 2))[3]) == 100 -@test hist([1 2 3 4;1 2 3 4]) == (0.0:2.0:4.0, [2 2 0 0; 0 0 2 2]) - -@test midpoints(1.0:1.0:10.0) == 1.5:1.0:9.5 -@test midpoints(1:10) == 1.5:9.5 -@test midpoints(Float64[1.0:1.0:10.0;]) == Float64[1.5:1.0:9.5;] - @test quantile([1,2,3,4],0.5) == 2.5 @test quantile([1., 3],[.25,.5,.75])[2] == median([1., 3]) @test quantile([0.:100.;],[.1,.2,.3,.4,.5,.6,.7,.8,.9])[1] == 10.0 - -# test invalid hist nbins argument (#9999) -@test_throws ArgumentError hist(Int[], -1) -@test hist(Int[], 0)[2] == Int[] -@test_throws ArgumentError hist([1,2,3], -1) -@test_throws ArgumentError hist([1,2,3], 0) -@test_throws ArgumentError hist([1.0,2.0,3.0], -1) -@test_throws ArgumentError hist([1.0,2.0,3.0], 0)