From 39b5c3d90b15877d4f62140f583ab8c67d3af290 Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Tue, 23 Apr 2013 10:08:17 -0700 Subject: [PATCH 1/6] Add a DataStructures module with PriorityQueue and heap functions. --- base/datastructures.jl | 287 +++++++++++++++++++++++++++++++++++++++++ base/exports.jl | 1 + base/sysimg.jl | 3 + test/priorityqueue.jl | 65 ++++++++++ test/runtests.jl | 4 +- 5 files changed, 358 insertions(+), 2 deletions(-) create mode 100644 base/datastructures.jl create mode 100644 test/priorityqueue.jl diff --git a/base/datastructures.jl b/base/datastructures.jl new file mode 100644 index 0000000000000..0617c40a8090b --- /dev/null +++ b/base/datastructures.jl @@ -0,0 +1,287 @@ + +module DataStructures + +import Base: setindex!, done, get, has, isempty, length, next, getindex, start +import ..Sort: Forward, Ordering, It, lt + +export + PriorityQueue, + dequeue!, + enqueue!, + heapify!, + heappop!, + heappush!, + isheap, + peek + + + +# Heap operations on flat arrays +# ------------------------------ + + +# Binary heap indexing +heapleft(i::Integer) = 2i +heapright(i::Integer) = 2i + 1 +heapparent(i::Integer) = div(i, 2) + + +# Binary min-heap percolate down. +function percolate_down!(xs::AbstractArray, i::Integer, o::Ordering) + while (l = heapleft(i)) <= length(xs) + r = heapright(i) + j = r > length(xs) || lt(o, xs[l], xs[r]) ? l : r + if lt(o, xs[j], xs[i]) + xs[i], xs[j] = xs[j], xs[i] + i = j + else + break + end + end +end + +percolate_down!(xs::AbstractArray, i::Integer) = percolate_down!(xs, i, Forward()) + + + +# Binary min-heap percolate up. +function percolate_up!(xs::AbstractArray, i::Integer, o::Ordering) + while i > 1 + j = heapparent(i) + if lt(o, xs[i], xs[j]) + xs[i], xs[j] = xs[j], xs[i] + i = j + else + break + end + end +end + +percolate_up!(xs::AbstractArray, i::Integer) = percolate_up!(xs, i, Forward()) + + +# Binary min-heap pop. +function heappop!(xs::AbstractArray, o::Ordering) + x = xs[1] + y = pop!(xs) + if !isempty(xs) + xs[1] = y + percolate_down!(xs, 1, o) + end + x +end + +heappop!(xs::AbstractArray) = heappop!(xs, Forward()) + + +# Binary min-heap push. +function heappush!(xs::AbstractArray, x, o::Ordering) + push!(xs, x) + percolate_up!(xs, length(xs), o) + xs +end + +heappush!(xs::AbstractArray, x) = heappush!(xs, x, Forward()) + + +# Turn an arbitrary array into a binary min-heap in linear time. +function heapify!(xs::AbstractArray, o::Ordering) + for i in heapparent(length(xs)):-1:1 + percolate_down!(xs, i, o) + end + xs +end + +heapify!(xs::AbstractArray) = heapify!(xs, Forward()) +heapify(xs::AbstractArray, o::Ordering) = heapify!(copy(xs), o) +heapify(xs::AbstractArray) = heapify(xs, Forward()) + + +# Is an arbitrary array heap ordered? +function isheap(xs::AbstractArray, o::Ordering) + for i in 1:div(length(xs), 2) + if lt(o, xs[heapleft(i)], xs[i]) || + (heapright(i) <= length(xs) && lt(o, xs[heapright(i)], xs[i])) + return false + end + end + true +end + +isheap(xs::AbstractArray) = isheap(xs, Forward()) + + +# PriorityQueue +# ------------- + +# A PriorityQueue that acts like a Dict, mapping values to their priorities, +# with the addition of a dequeue! function to remove the lowest priority +# element. +type PriorityQueue{K,V} <: Associative{K,V} + # Binary heap of (element, priority) pairs. + xs::Array{(K, V), 1} + o::Ordering + + # Map elements to their index is xs + index::Dict + + function PriorityQueue(o::Ordering) + new(Array((K, V), 0), o, Dict{K, Int}()) + end + + PriorityQueue() = PriorityQueue{K,V}(Forward()) + + function PriorityQueue(ks::AbstractArray{K}, vs::AbstractArray{V}, + o::Ordering) + if length(ks) != length(vs) + error("Key and value arrays have unequal lengths.") + end + + xs = Array((K, V), length(ks)) + index = Dict{K, Int}() + for (i, (k, v)) in enumerate(zip(ks, vs)) + xs[i] = (k, v) + if has(index, k) + error("PriorityQueue keys must be unique.") + end + index[k] = i + end + pq = new(xs, o, index) + + # heapify + for i in heapparent(length(pq.xs)):-1:1 + percolate_down!(pq, i) + end + + pq + end +end + +PriorityQueue(o::Ordering) = PriorityQueue{Any,Any}(o) +PriorityQueue() = PriorityQueue{Any,Any}(Forward()) + +function PriorityQueue{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}, + o::Ordering) + PriorityQueue{K,V}(ks, vs, o) +end + +function PriorityQueue{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}) + PriorityQueue{K,V}(ks, vs, Forward()) +end + +function PriorityQueue{K,V}(kvs::Dict{K,V}, o::Ordering) + PriorityQueue{K,V}([k for k in keys(kvs)], [v for v in values(kvs)], o) +end + +function PriorityQueue{K,V}(kvs::Dict{K,V}) + PriorityQueue(kvs, Forward()) +end + + +length(pq::PriorityQueue) = length(pq.xs) +isempty(pq::PriorityQueue) = isempty(pq.xs) +has(pq::PriorityQueue, key) = has(pq.index, key) +peek(pq::PriorityQueue) = pq.xs[1] + + +# Swap two nodes in a PriorityQueue +function swap!(pq::PriorityQueue, i::Integer, j::Integer) + pq.index[pq.xs[i][1]] = j + pq.index[pq.xs[j][1]] = i + pq.xs[i], pq.xs[j] = pq.xs[j], pq.xs[i] +end + + +function percolate_down!(pq::PriorityQueue, i::Integer) + while (l = heapleft(i)) <= length(pq) + r = heapright(i) + j = r > length(pq) || lt(pq.o, pq.xs[l][2], pq.xs[r][2]) ? l : r + if lt(pq.o, pq.xs[j][2], pq.xs[i][2]) + swap!(pq, i, j) + i = j + else + break + end + end +end + + +function percolate_up!(pq::PriorityQueue, i::Integer) + while i > 1 + j = heapparent(i) + if lt(pq.o, pq.xs[i][2], pq.xs[j][2]) + swap!(pq, i, j) + i = j + else + break + end + end +end + + +function getindex{K,V}(pq::PriorityQueue{K,V}, key) + pq.xs[pq.index[key]][2] +end + + +function get{K,V}(pq::PriorityQueue{K,V}, key, deflt) + i = get(pq.index, key, 0) + i == 0 ? deflt : pq.xs[i][2] +end + + +# Change the priority of an existing element, or equeue it if it isn't present. +function setindex!{K,V}(pq::PriorityQueue{K, V}, value, key) + if has(pq, key) + i = pq.index[key] + _, oldvalue = pq.xs[i] + pq.xs[i] = (key, value) + if lt(pq.o, oldvalue, value) + percolate_down!(pq, i) + else + percolate_up!(pq, i) + end + else + enqueue!(pq, key, value) + end +end + + +function enqueue!{K,V}(pq::PriorityQueue{K,V}, key, value) + if has(pq, key) + error("PriorityQueue keys must be unique.") + end + + push!(pq.xs, (key, value)) + pq.index[key] = length(pq) + percolate_up!(pq, length(pq)) + pq +end + + +function dequeue!(pq::PriorityQueue) + x = pq.xs[1] + y = pop!(pq.xs) + if !isempty(pq) + pq.xs[1] = y + pq.index[pq.xs[1][1]] = 1 + percolate_down!(pq, 1) + end + delete!(pq.index, x[1]) + x[1] +end + + +# Unordered iteration through key value pairs in a PriorityQueue +start(pq::PriorityQueue) = start(pq.index) + +done(pq::PriorityQueue, i) = done(pq.index, i) + +function next(pq::PriorityQueue, i) + (k, idx), i = next(pq.index, i) + return ((k, pq.xs[idx][2]), i) +end + + +end # module DataStructures + diff --git a/base/exports.jl b/base/exports.jl index 83e91ec6821b2..4050be7fb99d1 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -2,6 +2,7 @@ export # Modules PCRE, FFTW, + DataStructures, DSP, LinAlg, LibRandom, diff --git a/base/sysimg.jl b/base/sysimg.jl index 41f3ce599bba7..3b992da5c8147 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -137,6 +137,9 @@ include("sort.jl") importall .Sort include("combinatorics.jl") +# basic data structures +include("datastructures.jl") + # distributed arrays and memory-mapped arrays include("darray2.jl") include("mmap.jl") diff --git a/test/priorityqueue.jl b/test/priorityqueue.jl new file mode 100644 index 0000000000000..0f5d8c541dc6b --- /dev/null +++ b/test/priorityqueue.jl @@ -0,0 +1,65 @@ + +using Base.DataStructures + + +# Test dequeing in sorted order. +function test_issorted!(pq::PriorityQueue, priorities) + last = dequeue!(pq) + while !isempty(pq) + value = dequeue!(pq) + @test priorities[last] <= priorities[value] + value = last + end +end + +pmax = 1000 +n = 10000 +priorities = Dict(1:n, rand(1:pmax, n)) + +# building from a dict +pq = PriorityQueue(priorities) +test_issorted!(pq, priorities) + + +# enqueing via enqueue! +pq = PriorityQueue() +for (k, v) in priorities + enqueue!(pq, k, v) +end +test_issorted!(pq, priorities) + + +# enqueing via assign +pq = PriorityQueue() +for (k, v) in priorities + pq[k] = v +end +test_issorted!(pq, priorities) + + +# changing priorities +pq = PriorityQueue() +for (k, v) in priorities + pq[k] = v +end + +for _ in 1:n + k = rand(1:n) + v = rand(1:pmax) + pq[k] = v + priorities[k] = v +end + +test_issorted!(pq, priorities) + + +# low level heap operations +xs = heapify!([v for v in values(priorities)]) +@test issorted([heappop!(xs) for _ in length(priorities)]) + +xs = Array(Int, 0) +for priority in values(priorities) + heappush!(xs, priority) +end +@test issorted([heappop!(xs) for _ in length(priorities)]) + diff --git a/test/runtests.jl b/test/runtests.jl index f01542afd23c9..f276fec6f8b9e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,8 @@ testnames = ["core", "keywordargs", "numbers", "strings", "unicode", "corelib", "hashing", "remote", "iostring", "arrayops", "linalg", "blas", "fft", "dsp", "sparse", "bitarray", "random", "math", "functional", "bigint", "sorting", - "statistics", "spawn", "parallel", "arpack", "bigfloat", - "file", "perf", "suitesparse", "version"] + "statistics", "spawn", "parallel", "priorityqueue", + "arpack", "bigfloat", "file", "perf", "suitesparse", "version"] # Disabled: "complex" From de3f407937c03e682b0708d13050eb7e71642be3 Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Tue, 23 Apr 2013 21:34:26 -0700 Subject: [PATCH 2/6] Rename DataStructures to Collections. Document. --- base/{datastructures.jl => collections.jl} | 2 +- base/sysimg.jl | 2 +- doc/stdlib/collections.rst | 80 ++++++++++++++++++++++ doc/stdlib/index.rst | 1 + test/priorityqueue.jl | 2 +- 5 files changed, 84 insertions(+), 3 deletions(-) rename base/{datastructures.jl => collections.jl} (99%) create mode 100644 doc/stdlib/collections.rst diff --git a/base/datastructures.jl b/base/collections.jl similarity index 99% rename from base/datastructures.jl rename to base/collections.jl index 0617c40a8090b..3aaf0f6124036 100644 --- a/base/datastructures.jl +++ b/base/collections.jl @@ -1,5 +1,5 @@ -module DataStructures +module Collections import Base: setindex!, done, get, has, isempty, length, next, getindex, start import ..Sort: Forward, Ordering, It, lt diff --git a/base/sysimg.jl b/base/sysimg.jl index 3b992da5c8147..b73ab529bdd28 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -138,7 +138,7 @@ importall .Sort include("combinatorics.jl") # basic data structures -include("datastructures.jl") +include("collections.jl") # distributed arrays and memory-mapped arrays include("darray2.jl") diff --git a/doc/stdlib/collections.rst b/doc/stdlib/collections.rst new file mode 100644 index 0000000000000..1e4330eb5279e --- /dev/null +++ b/doc/stdlib/collections.rst @@ -0,0 +1,80 @@ +:mod:`Base.Collections` --- Common data structures and containers +================================================================= + +.. module:: Base.Collections + :synopsis: + +The `Collections` module contains implementations of some common data +structures. + + +PriorityQueue +------------- + +The ``PriorityQueue`` type is a basic priority queue implementation allowing for +arbitrary key and priority types. Multiple identical keys are not permitted, but +the priority of existing keys can be changed efficiently. + +.. function:: PriorityQueue{K,V}([ord]) + + Construct a new PriorityQueue, with keys of type K and values/priorites of + type V. If an order is not given, the priority queue is min-ordered using + the default comparison for V. + +.. function:: enqueue!(pq, k, v) + + Insert the a key ``k`` into a priority queue ``pq`` with priority ``v``. + +.. function:: dequeue!(pq) + + Remove and return the lowest priority key from a priority queue. + +``PriorityQueue`` also behaves similarly to a ``Dict`` so that keys can be +inserted and priorities accessed or changed using indexing notation,:: + + # Julia code + pq = PriorityQueue() + + # Insert keys with associated priorities + pq["a"] = 10 + pq["b"] = 5 + pq["c"] = 15 + + # Change the priority of an existing key + pq["a"] = 0 + + +Heap Functions +-------------- + +Along with the ``PriorityQueue`` type are lower level functions for performing +binary heap operations on arrays. Each function takes an optional ordering +argument. If not given, default ordering is used, so that elements popped from +the heap are given in ascending order. + +.. function:: heapify(v, [ord]) + + Return a new vector in binary heap order, optionally using the given + ordering. + +.. function:: heapify!(v, [ord]) + + In-place heapify. + +.. function:: isheap(v, [ord]) + + Return true iff an array is heap-ordered according to the given order. + +.. function:: heappush!(v, [ord]) + + Given a binary heap-ordered array, push a new element, preserving the heap + property. For efficiency, this function does not check that the array is + indeed heap-ordered. + +.. function:: heappop!(v, [ord]) + + Given a binary heap-ordered array, remove and return the lowest ordered + element. For efficiency, this function does not check that the array is + indeed heap-ordered. + + diff --git a/doc/stdlib/index.rst b/doc/stdlib/index.rst index 897cd9280c45f..309711d09fef4 100644 --- a/doc/stdlib/index.rst +++ b/doc/stdlib/index.rst @@ -29,6 +29,7 @@ Built-in Modules .. toctree:: :maxdepth: 1 + collections sort test diff --git a/test/priorityqueue.jl b/test/priorityqueue.jl index 0f5d8c541dc6b..eae44650c1502 100644 --- a/test/priorityqueue.jl +++ b/test/priorityqueue.jl @@ -1,5 +1,5 @@ -using Base.DataStructures +using Base.Collections # Test dequeing in sorted order. From 98a1dffd82f8fc567c53fa0a60e750d771524eff Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Tue, 23 Apr 2013 23:45:03 -0700 Subject: [PATCH 3/6] Export Collections, not DataStructures. --- base/exports.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/exports.jl b/base/exports.jl index 4050be7fb99d1..ac0b5c8a00efa 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -2,7 +2,7 @@ export # Modules PCRE, FFTW, - DataStructures, + Collections, DSP, LinAlg, LibRandom, From 6a775e1ea6859440b39d69f82aed43322c615752 Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Tue, 23 Apr 2013 23:57:20 -0700 Subject: [PATCH 4/6] "DataStructures" to "Collections" in a comment. --- base/collections.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/collections.jl b/base/collections.jl index 3aaf0f6124036..1920bffb83bb3 100644 --- a/base/collections.jl +++ b/base/collections.jl @@ -283,5 +283,5 @@ function next(pq::PriorityQueue, i) end -end # module DataStructures +end # module Collections From 467d8137d6a48344b3dbfb1e0498b2e33353876b Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Mon, 29 Apr 2013 21:41:47 -0700 Subject: [PATCH 5/6] More specific type for index in PriorityQueue. --- base/collections.jl | 4 ++-- deps/libuv | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/collections.jl b/base/collections.jl index 1920bffb83bb3..63d22e39133f3 100644 --- a/base/collections.jl +++ b/base/collections.jl @@ -122,8 +122,8 @@ type PriorityQueue{K,V} <: Associative{K,V} xs::Array{(K, V), 1} o::Ordering - # Map elements to their index is xs - index::Dict + # Map elements to their index in xs + index::Dict{K, Int} function PriorityQueue(o::Ordering) new(Array((K, V), 0), o, Dict{K, Int}()) diff --git a/deps/libuv b/deps/libuv index c793029362a8b..d469f03eda6c9 160000 --- a/deps/libuv +++ b/deps/libuv @@ -1 +1 @@ -Subproject commit c793029362a8b78532fff99b7d7e7759fb873ae3 +Subproject commit d469f03eda6c95f064be3c25b7a9e9a7547c7617 From 347615f37539499ef27799abecc45b5a43462c18 Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Mon, 29 Apr 2013 23:25:14 -0700 Subject: [PATCH 6/6] Rename has to haskey in PriorityQueue. --- base/collections.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/base/collections.jl b/base/collections.jl index 63d22e39133f3..cdd411b96b6b1 100644 --- a/base/collections.jl +++ b/base/collections.jl @@ -1,8 +1,8 @@ module Collections -import Base: setindex!, done, get, has, isempty, length, next, getindex, start -import ..Sort: Forward, Ordering, It, lt +import Base: setindex!, done, get, haskey, isempty, length, next, getindex, start +import ..Sort: Forward, Ordering, lt export PriorityQueue, @@ -141,7 +141,7 @@ type PriorityQueue{K,V} <: Associative{K,V} index = Dict{K, Int}() for (i, (k, v)) in enumerate(zip(ks, vs)) xs[i] = (k, v) - if has(index, k) + if haskey(index, k) error("PriorityQueue keys must be unique.") end index[k] = i @@ -180,7 +180,7 @@ end length(pq::PriorityQueue) = length(pq.xs) isempty(pq::PriorityQueue) = isempty(pq.xs) -has(pq::PriorityQueue, key) = has(pq.index, key) +haskey(pq::PriorityQueue, key) = haskey(pq.index, key) peek(pq::PriorityQueue) = pq.xs[1] @@ -232,7 +232,7 @@ end # Change the priority of an existing element, or equeue it if it isn't present. function setindex!{K,V}(pq::PriorityQueue{K, V}, value, key) - if has(pq, key) + if haskey(pq, key) i = pq.index[key] _, oldvalue = pq.xs[i] pq.xs[i] = (key, value) @@ -248,7 +248,7 @@ end function enqueue!{K,V}(pq::PriorityQueue{K,V}, key, value) - if has(pq, key) + if haskey(pq, key) error("PriorityQueue keys must be unique.") end