sbromberger · sbromberger · Jan 5, 2018 · Aug 27, 2017 · Aug 31, 2017 · Aug 31, 2017
diff --git a/.Rhistory b/.Rhistory
diff --git a/src/LightGraphs.jl b/src/LightGraphs.jl
@@ -127,7 +127,10 @@ euclidean_graph,
 kruskal_mst, prim_mst,
 
 #biconnectivity and articulation points
-articulation, biconnected_components
+articulation, biconnected_components,
+
+#graphcut
+normalized_cut
 
 """
     LightGraphs
@@ -156,6 +159,7 @@ include("utils.jl")
 include("interface.jl")
 include("deprecations.jl")
 include("core.jl")
+
 include("SimpleGraphs/SimpleGraphs.jl")
 using .SimpleGraphs
 """
@@ -177,6 +181,7 @@ A datastruture representing an edge between two vertices in
 a `Graph` or `DiGraph`.
 """
 const Edge = LightGraphs.SimpleGraphs.SimpleEdge
+
 include("degeneracy.jl")
 include("digraph/transitivity.jl")
 include("digraph/cycles/johnson.jl")
@@ -230,6 +235,7 @@ include("spanningtrees/kruskal.jl")
 include("spanningtrees/prim.jl")
 include("biconnectivity/articulation.jl")
 include("biconnectivity/biconnect.jl")
+include("graphcut/normalized_cut.jl")
 
 using .LinAlg
 end # module
diff --git a/src/graphcut/normalized_cut.jl b/src/graphcut/normalized_cut.jl
@@ -0,0 +1,190 @@
+#computes normalized cut cost for partition `cut`
+function _normalized_cut_cost(cut, W::AbstractMatrix, D)
+    cut_cost = 0
+    for j in indices(W, 2)
+        for i in indices(W, 1)
+            if cut[i] != cut[j]
+                cut_cost += W[i, j]
+            end
+        end
+    end
+    cut_cost/=2
+    return cut_cost/sum(D*cut) + cut_cost/sum(D*(.~cut))
+end
+
+function _normalized_cut_cost(cut, W::SparseMatrixCSC, D)
+    cut_cost = 0
+    rows = rowvals(W)
+    vals = nonzeros(W)
+    n = size(W, 2)
+    for i = 1:n
+        for j in nzrange(W, i)
+            row = rows[j]
+            if cut[i] != cut[row]
+                cut_cost += vals[j]/2
+            end
+        end
+    end
+    return cut_cost/sum(D*cut) + cut_cost/sum(D*(.~cut))
+end
+
+function _partition_weightmx(cut, W::AbstractMatrix)
+    nv = length(cut)
+    nv2 = sum(cut)
+    nv1 = nv - nv2
+    newvid = Vector{Int}(nv)
+    vmap1 = Vector{Int}(nv1)
+    vmap2 = Vector{Int}(nv2)
+    j1 = 1
+    j2 = 1
+    for i in eachindex(cut)
+        if cut[i] == false
+            newvid[i] = j1
+            vmap1[j1] = i
+            j1+=1
+        else
+            newvid[i] = j2
+            vmap2[j2] = i
+            j2+=1
+        end
+    end
+
+    W1 = similar(W, (nv1, nv1))
+    W2 = similar(W, (nv2, nv2))
+
+    for j in indices(W, 2)
+        for i in indices(W, 1)
+            if cut[i] == cut[j] == false
+                W1[newvid[i], newvid[j]] = W[i, j]
+            elseif cut[i] == cut[j] == true
+                W2[newvid[i], newvid[j]] = W[i, j]
+            end
+        end
+    end
+
+    return (W1, W2, vmap1, vmap2)
+end
+
+function _partition_weightmx(cut, W::SparseMatrixCSC)
+    nv = length(cut)
+    nv2 = sum(cut)
+    nv1 = nv - nv2
+    newvid = Vector{Int}(nv)
+    vmap1 = Vector{Int}(nv1)
+    vmap2 = Vector{Int}(nv2)
+    j1 = 1
+    j2 = 1
+    for i in eachindex(cut)
+        if cut[i] == false
+            newvid[i] = j1
+            vmap1[j1] = i
+            j1+=1
+        else
+            newvid[i] = j2
+            vmap2[j2] = i
+            j2+=1
+        end
+    end
+
+    rows = rowvals(W)
+    vals = nonzeros(W)
+    I1 = Vector{Int}(); I2 = Vector{Int}()
+    J1 = Vector{Int}(); J2 = Vector{Int}()
+    V1 = Vector{Float64}(); V2 = Vector{Float64}()
+    for i = 1:nv
+        for j in nzrange(W, i)
+            row = rows[j]
+            if cut[i] == cut[row] == false
+                push!(I1, newvid[i])
+                push!(J1, newvid[row])
+                push!(V1, vals[j])
+            elseif cut[i] == cut[row] == true
+                push!(I2, newvid[i])
+                push!(J2, newvid[row])
+                push!(V2, vals[j])
+            end
+        end
+    end
+    W1 = sparse(I1, J1, V1)
+    W2 = sparse(I2, J2, V2)
+    return (W1, W2, vmap1, vmap2)
+end
+
+function _recursive_normalized_cut(W, thres=thres, num_cuts=num_cuts)
+    m, n = size(W)
+    D = Diagonal(vec(sum(W, 2)))
+
+    m == 1 && return [1]
+
+    #get eigenvector corresponding to second smallest eigenvalue
+    # v = eigs(D-W, D, nev=2, which=:SR)[2][:,2]
+    # At least some versions of ARPACK have a bug, this is a workaround
+    invDroot = sqrt.(inv(D)) # equal to Cholesky factorization for diagonal D
+    if n > 10
+        ret = eigs(invDroot'*(D-W)*invDroot, nev=2, which=:SR)[2][:,2]
+    else
+        ret = eigfact(full(invDroot'*(D-W)*invDroot))[:vectors][:,2]
+    end
+    v = invDroot*ret
+
+    #perform n-cuts with different partitions of v and find best one
+    min_cost = Inf
+    best_thres = -1
+    for t in linspace(minimum(v), maximum(v), num_cuts)
+        cut = v.>t
+        cost = _normalized_cut_cost(cut, W, D)
+        if cost < min_cost
+            min_cost = cost
+            best_thres = t
+        end
+    end
+
+    if min_cost < thres
+        #split graph, compute normalized_cut for each subgraph recursively and merge indices.
+        cut = v.>best_thres
+        W1, W2, vmap1, vmap2 = _partition_weightmx(cut, W)
+        labels1 = _recursive_normalized_cut(W1, thres, num_cuts)
+        labels2 = _recursive_normalized_cut(W2, thres, num_cuts)
+
+        labels = Vector{Int}(m)
+        offset = maximum(labels1)
+
+        for i in eachindex(labels1)
+            labels[vmap1[i]] = labels1[i]
+        end
+        for i in eachindex(labels2)
+            labels[vmap2[i]] = labels2[i] + offset
+        end
+
+        return labels
+    else
+        return ones(Int, m)
+    end
+end
+
+"""
+    normalized_cut(g, thres, distmx=weights(g), [num_cuts=10]);
+
+Perform [recursive two-way normalized graph-cut](https://en.wikipedia.org/wiki/Segmentation-based_object_categorization#Normalized_cuts)
+on a graph, partitioning the vertices into disjoint sets.
+Return a vector that contains the set index for each vertex.
+
+It is important to identify a good threshold for your application. A bisection search over the range (0,1) will help determine a good value of thres.
+
+### Keyword Arguments
+- `thres`: Subgraphs aren't split if best normalized cut is above this threshold
+- `num_cuts`: Number of cuts performed to determine optimal cut
+
+### References
+"Normalized Cuts and Image Segmentation" - Jianbo Shi and Jitendra Malik
+"""
+function normalized_cut(
+    g::AbstractGraph,
+    thres::Real,
+    W::AbstractMatrix{T}=adjacency_matrix(g),
+    num_cuts::Int = 10
+    ) where T <: Real
+
+    return _recursive_normalized_cut(W, thres, num_cuts)
+end
+
diff --git a/test/graphcut/normalized_cut.jl b/test/graphcut/normalized_cut.jl
@@ -0,0 +1,98 @@
+@testset "Normalized Cut" begin
+
+    gx = Graph(6)
+    add_edge!(gx, 1, 2)
+    add_edge!(gx, 2, 3)
+    add_edge!(gx, 1, 3)
+    add_edge!(gx, 4, 5)
+    add_edge!(gx, 4, 6)
+    add_edge!(gx, 5, 6)
+    add_edge!(gx, 1, 6)
+    add_edge!(gx, 3, 4)
+
+    w = zeros(6, 6)
+    w[2, 1] = 1.0
+    w[3, 1] = 1.0
+    w[6, 1] = 0.1
+    w[1, 2] = 1.0
+    w[3, 2] = 1.0
+    w[1, 3] = 1.0
+    w[2, 3] = 1.0
+    w[4, 3] = 0.2
+    w[3, 4] = 0.2
+    w[5, 4] = 1.0
+    w[6, 4] = 1.0
+    w[4, 5] = 1.0
+    w[6, 5] = 1.0
+    w[1, 6] = 0.1
+    w[4, 6] = 1.0
+    w[5, 6] = 1.0
+
+    for g in testgraphs(gx)
+      labels = @inferred(normalized_cut(g, 1, w))
+      @test labels == [1, 1, 1, 2, 2, 2] || labels == [2, 2, 2, 1, 1, 1]
+    end
+
+    w = SparseMatrixCSC(w)
+    for g in testgraphs(gx)
+      labels = @inferred(normalized_cut(g, 1, w))
+      @test labels == [1, 1, 1, 2, 2, 2] || labels == [2, 2, 2, 1, 1, 1]
+    end
+
+    gx = Graph(4)
+    add_edge!(gx, 1, 2)
+    add_edge!(gx, 3, 4)
+
+    w = zeros(4, 4)
+    w[2, 1] = 1.0
+    w[1, 2] = 1.0
+    w[4, 3] = 1.0
+    w[3, 4] = 1.0
+    for g in testgraphs(gx)
+      labels = @inferred(normalized_cut(g, 0.1, w))
+      @test labels == [1, 1, 2, 2] || labels == [2, 2, 1, 1]
+    end
+
+    w = SparseMatrixCSC(w)
+    for g in testgraphs(gx)
+      labels = @inferred(normalized_cut(g, 0.1, w))
+      @test labels == [1, 1, 2, 2] || labels == [2, 2, 1, 1]
+    end
+
+    w = ones(12, 12)
+    for g in testgraphs(gx)
+      labels = @inferred(normalized_cut(g, 0.1, w))
+      @test labels == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+    end
+
+    w = ones(12, 12)
+    for g in testgraphs(gx)
+      labels = @inferred(normalized_cut(g, 0.1, w))
+      @test labels == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+    end
+
+    g = PathGraph(50)
+
+    function contiguous(labels::Vector{Int})::Bool
+        changes = 0
+        for i in 1:length(labels)-1
+            if labels[i] != labels[i+1]
+                changes += 1
+            end
+        end
+        return changes == length(unique(labels)) - 1
+    end
+
+    num_subgraphs = Vector{Int}(9)
+
+    for t in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+        labels = @inferred(normalized_cut(g, t))
+        @test contiguous(labels) == true
+        num_subgraphs[convert(Int, 10*t)] = size(unique(labels), 1)
+    end
+
+    @test issorted(num_subgraphs) == true
+
+    labels = @inferred(normalized_cut(g, 0.1))
+    @test sort(unique(labels)) == [1, 2, 3, 4]
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -67,7 +67,8 @@ tests = [
     "spanningtrees/kruskal",
     "spanningtrees/prim",
     "biconnectivity/articulation",
-    "biconnectivity/biconnect"
+    "biconnectivity/biconnect",
+    "graphcut/normalized_cut"
 ]
 
 @testset "LightGraphs" begin