Skip to content
This repository has been archived by the owner on Oct 8, 2021. It is now read-only.

WIP: Normalized graph cut #736

Merged
merged 33 commits into from
Jan 5, 2018
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f01a2e1
added normalized_cut
tejus-gupta Aug 27, 2017
aba678c
requested changes
tejus-gupta Aug 31, 2017
438f01a
Merge branch 'master' into ncut
sbromberger Aug 31, 2017
b9bcc79
Update LightGraphs.jl
sbromberger Sep 1, 2017
069a31b
Merge branch 'master' into ncut
sbromberger Sep 6, 2017
b948b79
Merge branch 'master' into ncut
sbromberger Sep 7, 2017
97b98f7
Merge branch 'master' into ncut
sbromberger Sep 8, 2017
3d54116
Merge branch 'master' into ncut
sbromberger Sep 10, 2017
197ed57
Merge branch 'master' into ncut
sbromberger Sep 11, 2017
a4efc39
Merge branch 'master' into ncut
sbromberger Sep 11, 2017
1dfd49c
Merge branch 'master' into ncut
sbromberger Nov 14, 2017
3882a3e
Work around ARPACK bug in eigs for normalized_cut
timholy Nov 15, 2017
6d9c9fd
Merge pull request #1 from timholy/teh/ncut
tejus-gupta Nov 15, 2017
25089eb
Merge branch 'master' into ncut
sbromberger Nov 16, 2017
ea1de86
used eigfact for small matrices
tejus-gupta Dec 2, 2017
745b7be
added test with fully connected graph
tejus-gupta Dec 2, 2017
95a5464
Merge branch 'master' into ncut
sbromberger Dec 3, 2017
4629742
Merge branch 'master' into ncut
sbromberger Dec 3, 2017
6ca8d85
Merge branch 'master' into ncut
sbromberger Dec 3, 2017
41d65fa
Merge branch 'master' into ncut
sbromberger Dec 3, 2017
0f652d1
Merge branch 'master' into ncut
sbromberger Dec 3, 2017
20690ae
corrected and added 2 tests
tejus-gupta Dec 4, 2017
19bd375
Merge branch 'master' into ncut
sbromberger Dec 4, 2017
6d144a3
removed redundant code
tejus-gupta Dec 4, 2017
ea1122d
Merge branch 'master' into ncut
sbromberger Dec 5, 2017
8c7a738
Merge branch 'master' into ncut
sbromberger Dec 14, 2017
24e7006
Merge branch 'master' into ncut
sbromberger Dec 22, 2017
de0e592
Merge branch 'master' into ncut
sbromberger Jan 2, 2018
4ae8c56
Merge branch 'master' into ncut
sbromberger Jan 2, 2018
a11db6f
make threshold a required argument for normalized_cut
jpfairbanks Jan 2, 2018
3cc9bf8
updated tests
tejus-gupta Jan 5, 2018
f932978
update function definition in docstring
tejus-gupta Jan 5, 2018
f74a4ac
Merge branch 'master' into ncut
sbromberger Jan 5, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added .Rhistory
Empty file.
8 changes: 7 additions & 1 deletion src/LightGraphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ euclidean_graph,
kruskal_mst, prim_mst,

#biconnectivity and articulation points
articulation, biconnected_components
articulation, biconnected_components,

#graphcut
normalized_cut

"""
LightGraphs
Expand Down Expand Up @@ -156,6 +159,7 @@ include("utils.jl")
include("interface.jl")
include("deprecations.jl")
include("core.jl")

include("SimpleGraphs/SimpleGraphs.jl")
using .SimpleGraphs
"""
Expand All @@ -177,6 +181,7 @@ A datastruture representing an edge between two vertices in
a `Graph` or `DiGraph`.
"""
const Edge = LightGraphs.SimpleGraphs.SimpleEdge

include("degeneracy.jl")
include("digraph/transitivity.jl")
include("digraph/cycles/johnson.jl")
Expand Down Expand Up @@ -230,6 +235,7 @@ include("spanningtrees/kruskal.jl")
include("spanningtrees/prim.jl")
include("biconnectivity/articulation.jl")
include("biconnectivity/biconnect.jl")
include("graphcut/normalized_cut.jl")

using .LinAlg
end # module
190 changes: 190 additions & 0 deletions src/graphcut/normalized_cut.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
#computes normalized cut cost for partition `cut`
function _normalized_cut_cost(cut, W::AbstractMatrix, D)
cut_cost = 0
for j in indices(W, 2)
for i in indices(W, 1)
if cut[i] != cut[j]
cut_cost += W[i, j]
end
end
end
cut_cost/=2
return cut_cost/sum(D*cut) + cut_cost/sum(D*(.~cut))
end

function _normalized_cut_cost(cut, W::SparseMatrixCSC, D)
cut_cost = 0
rows = rowvals(W)
vals = nonzeros(W)
n = size(W, 2)
for i = 1:n
for j in nzrange(W, i)
row = rows[j]
if cut[i] != cut[row]
cut_cost += vals[j]/2
end
end
end
return cut_cost/sum(D*cut) + cut_cost/sum(D*(.~cut))
end

function _partition_weightmx(cut, W::AbstractMatrix)
nv = length(cut)
nv2 = sum(cut)
nv1 = nv - nv2
newvid = Vector{Int}(nv)
vmap1 = Vector{Int}(nv1)
vmap2 = Vector{Int}(nv2)
j1 = 1
j2 = 1
for i in eachindex(cut)
if cut[i] == false
newvid[i] = j1
vmap1[j1] = i
j1+=1
else
newvid[i] = j2
vmap2[j2] = i
j2+=1
end
end

W1 = similar(W, (nv1, nv1))
W2 = similar(W, (nv2, nv2))

for j in indices(W, 2)
for i in indices(W, 1)
if cut[i] == cut[j] == false
W1[newvid[i], newvid[j]] = W[i, j]
elseif cut[i] == cut[j] == true
W2[newvid[i], newvid[j]] = W[i, j]
end
end
end

return (W1, W2, vmap1, vmap2)
end

function _partition_weightmx(cut, W::SparseMatrixCSC)
nv = length(cut)
nv2 = sum(cut)
nv1 = nv - nv2
newvid = Vector{Int}(nv)
vmap1 = Vector{Int}(nv1)
vmap2 = Vector{Int}(nv2)
j1 = 1
j2 = 1
for i in eachindex(cut)
if cut[i] == false
newvid[i] = j1
vmap1[j1] = i
j1+=1
else
newvid[i] = j2
vmap2[j2] = i
j2+=1
end
end

rows = rowvals(W)
vals = nonzeros(W)
I1 = Vector{Int}(); I2 = Vector{Int}()
J1 = Vector{Int}(); J2 = Vector{Int}()
V1 = Vector{Float64}(); V2 = Vector{Float64}()
for i = 1:nv
for j in nzrange(W, i)
row = rows[j]
if cut[i] == cut[row] == false
push!(I1, newvid[i])
push!(J1, newvid[row])
push!(V1, vals[j])
elseif cut[i] == cut[row] == true
push!(I2, newvid[i])
push!(J2, newvid[row])
push!(V2, vals[j])
end
end
end
W1 = sparse(I1, J1, V1)
W2 = sparse(I2, J2, V2)
return (W1, W2, vmap1, vmap2)
end

function _recursive_normalized_cut(W, thres=thres, num_cuts=num_cuts)
m, n = size(W)
D = Diagonal(vec(sum(W, 2)))

m == 1 && return [1]

#get eigenvector corresponding to second smallest eigenvalue
# v = eigs(D-W, D, nev=2, which=:SR)[2][:,2]
# At least some versions of ARPACK have a bug, this is a workaround
invDroot = sqrt.(inv(D)) # equal to Cholesky factorization for diagonal D
if n > 10
ret = eigs(invDroot'*(D-W)*invDroot, nev=2, which=:SR)[2][:,2]
else
ret = eigfact(full(invDroot'*(D-W)*invDroot))[:vectors][:,2]
end
v = invDroot*ret

#perform n-cuts with different partitions of v and find best one
min_cost = Inf
best_thres = -1
for t in linspace(minimum(v), maximum(v), num_cuts)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is even spacing between the min and max, should you choose the spacing as between the points of v?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in w = sort(v); (w[i+1] + w[i])/2? That way you are guaranteed that all n cuts are distinct.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the paper - Currently, the search is done by checking l evenly spaced possible splitting points, and computing the best Ncut among them.. It wasn't clear to me exactly how they searched for the best partition. https://github.com/scikit-image/scikit-image/blob/master/skimage/future/graph/graph_cut.py#L76 (line 210) Here they search evenly between min and max.

cut = v.>t
cost = _normalized_cut_cost(cut, W, D)
if cost < min_cost
min_cost = cost
best_thres = t
end
end

if min_cost < thres
#split graph, compute normalized_cut for each subgraph recursively and merge indices.
cut = v.>best_thres
W1, W2, vmap1, vmap2 = _partition_weightmx(cut, W)
labels1 = _recursive_normalized_cut(W1, thres, num_cuts)
labels2 = _recursive_normalized_cut(W2, thres, num_cuts)

labels = Vector{Int}(m)
offset = maximum(labels1)

for i in eachindex(labels1)
labels[vmap1[i]] = labels1[i]
end
for i in eachindex(labels2)
labels[vmap2[i]] = labels2[i] + offset
end

return labels
else
return ones(Int, m)
end
end

"""
normalized_cut(g, thres, distmx=weights(g), [num_cuts=10]);

Perform [recursive two-way normalized graph-cut](https://en.wikipedia.org/wiki/Segmentation-based_object_categorization#Normalized_cuts)
on a graph, partitioning the vertices into disjoint sets.
Return a vector that contains the set index for each vertex.

It is important to identify a good threshold for your application. A bisection search over the range (0,1) will help determine a good value of thres.

### Keyword Arguments
- `thres`: Subgraphs aren't split if best normalized cut is above this threshold
- `num_cuts`: Number of cuts performed to determine optimal cut

### References
"Normalized Cuts and Image Segmentation" - Jianbo Shi and Jitendra Malik
"""
function normalized_cut(
g::AbstractGraph,
thres::Real,
W::AbstractMatrix{T}=adjacency_matrix(g),
num_cuts::Int = 10
) where T <: Real

return _recursive_normalized_cut(W, thres, num_cuts)
end

98 changes: 98 additions & 0 deletions test/graphcut/normalized_cut.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
@testset "Normalized Cut" begin

gx = Graph(6)
add_edge!(gx, 1, 2)
add_edge!(gx, 2, 3)
add_edge!(gx, 1, 3)
add_edge!(gx, 4, 5)
add_edge!(gx, 4, 6)
add_edge!(gx, 5, 6)
add_edge!(gx, 1, 6)
add_edge!(gx, 3, 4)

w = zeros(6, 6)
w[2, 1] = 1.0
w[3, 1] = 1.0
w[6, 1] = 0.1
w[1, 2] = 1.0
w[3, 2] = 1.0
w[1, 3] = 1.0
w[2, 3] = 1.0
w[4, 3] = 0.2
w[3, 4] = 0.2
w[5, 4] = 1.0
w[6, 4] = 1.0
w[4, 5] = 1.0
w[6, 5] = 1.0
w[1, 6] = 0.1
w[4, 6] = 1.0
w[5, 6] = 1.0

for g in testgraphs(gx)
labels = @inferred(normalized_cut(g, 1, w))
@test labels == [1, 1, 1, 2, 2, 2] || labels == [2, 2, 2, 1, 1, 1]
end

w = SparseMatrixCSC(w)
for g in testgraphs(gx)
labels = @inferred(normalized_cut(g, 1, w))
@test labels == [1, 1, 1, 2, 2, 2] || labels == [2, 2, 2, 1, 1, 1]
end

gx = Graph(4)
add_edge!(gx, 1, 2)
add_edge!(gx, 3, 4)

w = zeros(4, 4)
w[2, 1] = 1.0
w[1, 2] = 1.0
w[4, 3] = 1.0
w[3, 4] = 1.0
for g in testgraphs(gx)
labels = @inferred(normalized_cut(g, 0.1, w))
@test labels == [1, 1, 2, 2] || labels == [2, 2, 1, 1]
end

w = SparseMatrixCSC(w)
for g in testgraphs(gx)
labels = @inferred(normalized_cut(g, 0.1, w))
@test labels == [1, 1, 2, 2] || labels == [2, 2, 1, 1]
end

w = ones(12, 12)
for g in testgraphs(gx)
labels = @inferred(normalized_cut(g, 0.1, w))
@test labels == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
end

w = ones(12, 12)
for g in testgraphs(gx)
labels = @inferred(normalized_cut(g, 0.1, w))
@test labels == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
end

g = PathGraph(50)

function contiguous(labels::Vector{Int})::Bool
changes = 0
for i in 1:length(labels)-1
if labels[i] != labels[i+1]
changes += 1
end
end
return changes == length(unique(labels)) - 1
end

num_subgraphs = Vector{Int}(9)

for t in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
labels = @inferred(normalized_cut(g, t))
@test contiguous(labels) == true
num_subgraphs[convert(Int, 10*t)] = size(unique(labels), 1)
end

@test issorted(num_subgraphs) == true

labels = @inferred(normalized_cut(g, 0.1))
@test sort(unique(labels)) == [1, 2, 3, 4]
end
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ tests = [
"spanningtrees/kruskal",
"spanningtrees/prim",
"biconnectivity/articulation",
"biconnectivity/biconnect"
"biconnectivity/biconnect",
"graphcut/normalized_cut"
]

@testset "LightGraphs" begin
Expand Down