diff --git a/README.md b/README.md index 7112712..207eddb 100644 --- a/README.md +++ b/README.md @@ -65,15 +65,31 @@ with iterative constraint generation similar to section 4.3. ## Benchmarks -Baharev et al [3] use a set of 4468 benchmark graphs. These can be -loaded with the `baharev_benchmark` function. +Four benchmark datasets can be downloaded or generated with `julia +benchmarks/install_benchmark_data.jl`: -The graphs generated by the +* Baharev et al [3] use a set of 4468 benchmark graphs. These can be + loaded with the `baharev_benchmark` function. + +* The repository https://github.com/alidasdan/graph-benchmarks + contains 141 benchmark graphs, including the "ISCAS" and "IBM" + graphs. These can be loaded with the `dasdan_benchmark` function. + +* 16 graphs from the [Stanford Large Network Dataset + Collection](http://snap.stanford.edu/data/) are used as a benchmark + by Pustoslemšek et al [4]. These can be loaded with the + `snap_benchmark` function. + +* All 569, up to isomorphism, strongly connected graphs with 2 to 8 + edges. These can be loaded with the `small_graphs_benchmark` + function. + +Additionally the graphs generated by the [GoGameGraphs](https://github.com/GunnarFarneback/GoGameGraphs) -package are also useful for benchmarking feedback arc set -algorithms. These are relatively easy to solve exactly, so larger -graphs can be handled than in the Baharev benchmarks. Use the -`GoGameGraphs.go_game_graph` function to generate graphs. +package are useful for benchmarking feedback arc set algorithms. These +are relatively easy to solve exactly, so larger graphs can be handled +than in the Baharev benchmarks. Use the `GoGameGraphs.go_game_graph` +function to generate graphs. ## TODO @@ -104,5 +120,10 @@ International Symposium on Graph Drawing and Network Visualization, 2022. Springer [3] An Exact Method for the Minimum Feedback Arc Set Problem. -A. Baharev, H. Schichl, A. Neumaier, T. Achterberg +A. Baharev, H. Schichl, A. Neumaier, T. Achterberg. Journal of Experimental Algorithmics (JEA), 2021 • dl.acm.org + +[4] Improvement and Evaluation of a Heuristic Method for the Minimal +Feedback Arc Set Problem. +J. Pustoslemšek, E. Črne, and N. Rihter. +[PDF](https://www.scores.si/assets/papers/2024/SCORES24_paper_19.pdf) diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml index 540b954..b53d904 100644 --- a/benchmarks/Project.toml +++ b/benchmarks/Project.toml @@ -1,11 +1,18 @@ [deps] +Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" Git = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" +Inflate = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" NaturalSort = "c020b1a1-e9b0-503a-9c33-f039bfc54a85" +NautyGraphs = "7509a0a4-015a-4167-b44b-0799a1a2605e" +REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" Scratch = "6c6a2e73-6563-6170-7368-637461726353" ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" [compat] Git = "1.3" +Inflate = "0.1.5" NaturalSort = "1" Scratch = "1.2" ZipArchives = "2.4" diff --git a/benchmarks/install_benchmark_data.jl b/benchmarks/install_benchmark_data.jl index 24375ea..66e652e 100644 --- a/benchmarks/install_benchmark_data.jl +++ b/benchmarks/install_benchmark_data.jl @@ -6,11 +6,19 @@ using Scratch: get_scratch! using UUIDs: UUID using NaturalSort: natural using ZipArchives: ZipReader, zip_names, zip_readentry +using Inflate: inflate_gzip +using Downloads: download import Git +using REPL.TerminalMenus: MultiSelectMenu, request +using NautyGraphs: NautyDiGraph, ghash, canonize! +using Graphs: SimpleDiGraph, adjacency_matrix, is_strongly_connected, + cycle_digraph +using Combinatorics: combinations + +# UUID for FeedbackArcSets. +const uuid = UUID("6c3ede71-d29b-41ca-966d-1d2ca331f31c") function install_baharev_benchmarks() - # UUID for FeedbackArcSets. - uuid = UUID("6c3ede71-d29b-41ca-966d-1d2ca331f31c") data_dir = get_scratch!(uuid, "baharev") index_file = joinpath(data_dir, "index.txt") if isfile(index_file) @@ -38,11 +46,228 @@ function install_baharev_benchmarks() end end end - open(index_file, "a") do io + open(index_file, "w") do io + foreach(sort(index, lt = natural)) do entry + println(io, entry) + end + end +end + +function install_dasdan_benchmarks() + data_dir = get_scratch!(uuid, "dasdan") + index_file = joinpath(data_dir, "index.txt") + if isfile(index_file) + @info "The Dasdan benchmarks are already installed." + return + end + index = String[] + mktempdir() do tmpdir + url = "https://github.com/alidasdan/graph-benchmarks.git" + for (root, dirs, files) in walkdir(tmpdir) + graph_files = filter(name -> (endswith(name, ".d") + || endswith(name, ".d.gz")), + files) + if !isempty(graph_files) + for file in graph_files + source_file = joinpath(root, file) + target_file = joinpath(data_dir, + replace(file, ".d.gz" => ".d")) + name = first(split(file, ".")) + dir = basename(root) + if endswith(file, ".gz") + write(target_file, inflate_gzip(source_file)) + else + write(target_file, read(source_file)) + end + push!(index, "$dir $name") + end + end + end + end + open(index_file, "w") do io foreach(sort(index, lt = natural)) do entry println(io, entry) end end end -install_baharev_benchmarks() +function install_snap_benchmarks() + data_dir = get_scratch!(uuid, "snap") + index_file = joinpath(data_dir, "index.txt") + if isfile(index_file) + @info "The SNAP benchmarks are already installed." + return + end + index = String[] + mktempdir() do tmpdir + urls = ["http://snap.stanford.edu/data/wiki-Vote.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella08.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella09.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella06.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella05.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella04.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella25.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella24.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella30.txt.gz", + "http://snap.stanford.edu/data/p2p-Gnutella31.txt.gz", + "http://snap.stanford.edu/data/soc-Epinions1.txt.gz", + "http://snap.stanford.edu/data/email-EuAll.txt.gz", + "http://snap.stanford.edu/data/web-NotreDame.txt.gz", + "http://snap.stanford.edu/data/web-Stanford.txt.gz", + "http://snap.stanford.edu/data/soc-Slashdot0811.txt.gz", + "http://snap.stanford.edu/data/soc-Slashdot0902.txt.gz"] + + for url in urls + filename_gz = joinpath(tmpdir, basename(url)) + filename, _ = splitext(basename(url)) + name, _ = splitext(filename) + target_file = joinpath(data_dir, filename) + download(url, filename_gz) + write(target_file, inflate_gzip(filename_gz)) + push!(index, name) + end + end + open(index_file, "w") do io + foreach(sort(index, lt = natural)) do entry + println(io, entry) + end + end +end + +# Adjacency matrix for a single edge from v to w. +function edge_matrix(num_vertices, v, w) + a = zeros(Int, num_vertices, num_vertices) + a[v, w] = 1 + return a +end + +# Generate small strongly connected graphs with given number of +# vertices and edges. +function small_graphs(num_vertices, num_edges) + edges = [edge_matrix(num_vertices, v, w) + for v in 1:num_vertices, w in 1:num_vertices + if v != w] + # We can without loss of generality assume that the edge from + # vertex 1 to 2 is part of the graph. This is not necessary to do + # but saves lot of time for the higher numbers of vertices and + # edges. + fixed_edges = popfirst!(edges) + num_fixed_edges = 1 + # Furthermore any strongly connected graph with more than two + # edges must contain at least one combination of edges a -> b and + # b -> c. Thus we can without loss of generality also assume that + # 2 -> 3 is part of the graph. + if num_vertices > 2 && num_edges > 2 + fixed_edges += popat!(edges, num_vertices) + num_fixed_edges += 1 + end + graphs = Dict{UInt64, NautyDiGraph}() + for combination in combinations(edges, num_edges - num_fixed_edges) + adjacency_matrix = fixed_edges + sum(combination) + g = NautyDiGraph(adjacency_matrix) + h = ghash(g) + if !haskey(graphs, h) + canonize!(g) + graphs[h] = g + end + end + strongly_connected_graphs = SimpleDiGraph[] + for graph in values(graphs) + g = SimpleDiGraph(adjacency_matrix(graph)) + if is_strongly_connected(g) + push!(strongly_connected_graphs, g) + end + end + return strongly_connected_graphs +end + +function encode_small_graph(graph) + a = vec(adjacency_matrix(graph)) + return reduce(|, UInt64.(a) .<< (0:(length(a) - 1))) +end + +function install_small_graphs_benchmarks() + data_dir = get_scratch!(uuid, "small_graphs") + index_file = joinpath(data_dir, "index.txt") + if isfile(index_file) + @info "The small_graphs benchmarks are already installed." + return + end + data_file = joinpath(data_dir, "graphs.bin") + graph_counts = Dict(num_edges => 0 for num_edges in 2:8) + open(data_file, "w") do io + for num_edges in 2:8 + for num_vertices in 2:num_edges + if (num_vertices, num_edges) == (8, 8) + # It takes unnecessarily much time to search for this + # single graph. + graphs = [cycle_digraph(8)] + else + graphs = small_graphs(num_vertices, num_edges) + end + graph_counts[num_edges] += length(graphs) + for graph in graphs + write(io, encode_small_graph(graph)) + end + end + end + end + + write(index_file, + """ + Info: The small graphs are all stored in binary form in graphs.bin, + one UInt64 value each, where the bits form the adjacency + matrices. + """) + + # Sanity check that we have the correct number of graphs. + # Cf. https://oeis.org/A350752. + if [graph_counts[i] for i in 2:8] != [1, 1, 3, 6, 25, 91, 442] + error("Internal error. An incorrect number of small graphs were generated.") + end +end + +function install_benchmarks() + benchmarks = [["baharev", 79, install_baharev_benchmarks], + ["dasdan", 197, install_dasdan_benchmarks], + ["snap", 92, install_snap_benchmarks], + ["small_graphs", 0.005, install_small_graphs_benchmarks]] + + installed_benchmarks = String[] + uninstalled_benchmarks = [] + for benchmark in benchmarks + name = benchmark[1] + if isfile(joinpath(get_scratch!(uuid, name), "index.txt")) + push!(installed_benchmarks, name) + else + push!(uninstalled_benchmarks, benchmark) + end + end + + if !isempty(installed_benchmarks) + println("The following benchmark sets are installed:") + for name in installed_benchmarks + println(name) + end + println() + end + + if !isempty(uninstalled_benchmarks) + println("Choose which benchmark sets to install.") + options = [string(benchmark[1], " (", benchmark[2], " MB)") + for benchmark in uninstalled_benchmarks] + selected = request(MultiSelectMenu(options)) + println() + for i in sort(collect(selected)) + name, _, installer = uninstalled_benchmarks[i] + println("Installing $(name) benchmark data.") + installer() + end + isempty(selected) || println() + end + + println("To remove benchmark data, manually delete the corresponding directory in\n", + dirname(get_scratch!(uuid, "baharev"))) +end + +install_benchmarks() diff --git a/src/FeedbackArcSets.jl b/src/FeedbackArcSets.jl index 7e646f5..c3cf80d 100644 --- a/src/FeedbackArcSets.jl +++ b/src/FeedbackArcSets.jl @@ -14,7 +14,9 @@ module FeedbackArcSets export FeedbackArcSet, find_feedback_arc_set, dfs_feedback_arc_set, greedy_feedback_arc_set, pagerank_feedback_arc_set, - is_feedback_arc_set, baharev_benchmark + is_feedback_arc_set, + baharev_benchmark, dasdan_benchmark, snap_benchmark, + small_graphs_benchmark using Graphs: Graphs, SimpleDiGraph, add_edge!, edges, has_edge, has_self_loops, ne, nv, outneighbors, rem_edge!, simplecycles_iter, diff --git a/src/benchmarks.jl b/src/benchmarks.jl index 9503edd..bcb4d44 100644 --- a/src/benchmarks.jl +++ b/src/benchmarks.jl @@ -67,12 +67,14 @@ function baharev_benchmark(selection::Union{AbstractVector{<:AbstractString}, Function}) data_dir = @get_scratch!("baharev") index_file = joinpath(data_dir, "index.txt") + if !isfile(index_file) install_script = abspath(@__DIR__, "..", "benchmarks", "install_benchmark_data.jl") error("The Baharev benchmark data (79 MB) has not been installed." * - " Please run `julia $(install_script)` to do so.") + " Please run `julia $(install_script)` to install it.") end + if selection isa Function names = filter(selection, readlines(index_file)) elseif eltype(selection) <: Integer @@ -80,7 +82,9 @@ function baharev_benchmark(selection::Union{AbstractVector{<:AbstractString}, else names = selection end + benchmarks = BaharevBenchmark[] + for name in names edges = _read_baharev_file(data_dir, name, ".edges") mfes = _read_baharev_file(data_dir, name, ".mfes") @@ -93,6 +97,7 @@ function baharev_benchmark(selection::Union{AbstractVector{<:AbstractString}, push!(benchmarks, BaharevBenchmark(name, graph, Tuple.(mfes), cycles)) end + return benchmarks end @@ -104,3 +109,270 @@ function _read_baharev_file(data_dir, name, suffix) return [parse.(Int, split(line)) .+ 1 for line in eachline(joinpath(data_dir, name * suffix))] end + +struct DasdanBenchmark + name::String + graph::SimpleDiGraph + directory::String +end + +function Base.show(io::IO, bench::DasdanBenchmark) + print(io, "DasdanBenchmark: $(bench.name)") +end + +""" + dasdan_benchmark() + +Return all 141 benchmark graphs from the repository +https://github.com/alidasdan/graph-benchmarks. The return value is a +vector of structs with the fields: +* `directory`: The name of the subdirectory containing the graph. +* `name`: The name of the graph. +* `graph`: The graph in the form of a `Graphs.SimpleDiGraph`. + +Notes: + +* The original sources also contain an "arc weight" and "transit time" + for each edge in the graphs. Those are not considered by this + function. + + + dasdan_benchmark(index::Integer) + +Return a single benchmark with the given `index` in the full set of +benchmarks. + + dasdan_benchmark(indices::AbstractVector{<:Integer}) + +Return a selection of benchmarks with the given `indices`. + + dasdan_benchmark(name::AbstractString) + +Return a single benchmark with the given `name`. + + dasdan_benchmark(names::AbstractVector{<:AbstractString}) + +Return a selection of the benchmarks with the given `names`. + + dasdan_benchmark(predicate::Function) + +Return a selection of benchmarks for which `predicate(name)` returns +true. + +# Examples: + dasdan_benchmark(13) + dasdan_benchmark(1:5) + dasdan_benchmark("dsip") + dasdan_benchmark(["sample", "r05"]) + dasdan_benchmark(startswith("ibm")) +""" +function dasdan_benchmark(selection::Union{AbstractVector{<:AbstractString}, + AbstractVector{<:Integer}, + Function}) + data_dir = @get_scratch!("dasdan") + index_file = joinpath(data_dir, "index.txt") + + if !isfile(index_file) + install_script = abspath(@__DIR__, "..", "benchmarks", + "install_benchmark_data.jl") + error("The Dasdan benchmark data (197 MB) has not been installed." * + " Please run `julia $(install_script)` to install it.") + end + + name_to_dir = Dict(last(split(line)) => first(split(line)) + for line in eachline(index_file)) + all_names = [last(split(line)) for line in eachline(index_file)] + if selection isa Function + names = filter(selection, all_names) + elseif eltype(selection) <: Integer + names = all_names[selection] + else + names = selection + end + + benchmarks = DasdanBenchmark[] + + for name in names + local graph + for line in eachline(joinpath(data_dir, "$(name).d")) + isempty(line) && continue + parts = split(line) + if first(parts) == "p" + graph = SimpleDiGraph(parse(Int, parts[3])) + elseif first(parts) == "a" + add_edge!(graph, parse.(Int, parts[[2, 3]])...) + end + end + push!(benchmarks, + DasdanBenchmark(name, graph, name_to_dir[name])) + end + + return benchmarks +end + +dasdan_benchmark(n::Integer) = only(dasdan_benchmark([n])) +dasdan_benchmark(name::AbstractString) = only(dasdan_benchmark([name])) +dasdan_benchmark() = dasdan_benchmark(Returns(true)) + +struct SnapBenchmark + name::String + graph::SimpleDiGraph +end + +function Base.show(io::IO, bench::SnapBenchmark) + print(io, "SnapBenchmark: $(bench.name)") +end + +""" + snap_benchmark() + +Return 16 directed graphs from the [Stanford Large Network Dataset +Collection](http://snap.stanford.edu/data/). This selection is used as +benchmark in "Improvement and Evaluation of a Heuristic Method for the +Minimal Feedback Arc Set Problem" by Pustoslemšek et al. + +The return value is a vector of structs with the fields: +* `name`: The name of the graph. +* `graph`: The graph in the form of a `Graphs.SimpleDiGraph`. + +Notes: + +* The vertex numbers from the original sources are used unchanged + unless they start with 0, in which case all vertex numbers are + increased by one. In some cases this means that a few vertices are + isolated without any edges. + + snap_benchmark(index::Integer) + +Return a single benchmark with the given `index` in the full set of +benchmarks. + + snap_benchmark(indices::AbstractVector{<:Integer}) + +Return a selection of benchmarks with the given `indices`. + + snap_benchmark(name::AbstractString) + +Return a single benchmark with the given `name`. + + snap_benchmark(names::AbstractVector{<:AbstractString}) + +Return a selection of the benchmarks with the given `names`. + + snap_benchmark(predicate::Function) + +Return a selection of benchmarks for which `predicate(name)` returns +true. + +# Examples: + snap_benchmark(13) + snap_benchmark(1:5) + snap_benchmark("soc-Epinions1.txt") + snap_benchmark(["email-EuAll.txt", "web-Stanford.txt"]) + snap_benchmark(startswith("p2p-Gnutella")) +""" +function snap_benchmark(selection::Union{AbstractVector{<:AbstractString}, + AbstractVector{<:Integer}, + Function}) + data_dir = @get_scratch!("snap") + index_file = joinpath(data_dir, "index.txt") + + if !isfile(index_file) + install_script = abspath(@__DIR__, "..", "benchmarks", + "install_benchmark_data.jl") + error("The Snap benchmark data (92 MB) has not been installed." * + " Please run `julia $(install_script)` to install it.") + end + + all_names = readlines(index_file) + if selection isa Function + names = filter(selection, all_names) + elseif eltype(selection) <: Integer + names = all_names[selection] + else + names = selection + end + + benchmarks = SnapBenchmark[] + + for name in names + edges = Vector{Int}[] + for line in eachline(joinpath(data_dir, "$(name).txt")) + startswith(line, "#") && continue + push!(edges, parse.(Int, split(line))) + end + first_vertex, last_vertex = extrema(reduce(vcat, edges)) + @assert first_vertex >= 0 + offset = (first_vertex == 0) ? 1 : 0 + graph = SimpleDiGraph(last_vertex + offset) + for (src, dst) in edges + add_edge!(graph, src + offset, dst + offset) + end + push!(benchmarks, SnapBenchmark(name, graph)) + end + + return benchmarks +end + +snap_benchmark(n::Integer) = only(snap_benchmark([n])) +snap_benchmark(name::AbstractString) = only(snap_benchmark([name])) +snap_benchmark() = snap_benchmark(Returns(true)) + +struct SmallGraphsBenchmark + graph::SimpleDiGraph +end + +function Base.show(io::IO, bench::SmallGraphsBenchmark) + v = nv(bench.graph) + e = ne(bench.graph) + print(io, "SmallGraphsBenchmark: $v vertices, $e edges") +end + +""" + small_graphs_benchmark() + +Return all 569, up to isomorphism, strongly connected graphs with 2 to +8 edges. + +The return value is a vector of structs with the field: +* `graph`: The graph in the form of a `Graphs.SimpleDiGraph`. + + small_graphs_benchmark(index::Integer) + +Return a single benchmark with the given `index` in the full set of +benchmarks. + + small_graphs_benchmark(indices::AbstractVector{<:Integer}) + +Return a selection of benchmarks with the given `indices`. + +# Examples: + small_graphs_benchmark(13) + small_graphs_benchmark(1:5) +""" +function small_graphs_benchmark(selection::AbstractVector{<:Integer}) + data_dir = @get_scratch!("small_graphs") + index_file = joinpath(data_dir, "index.txt") + data_file = joinpath(data_dir, "graphs.bin") + + if !isfile(index_file) + install_script = abspath(@__DIR__, "..", "benchmarks", + "install_benchmark_data.jl") + error("The small_graphs benchmark data (5 kB) has not been installed." * + " Please run `julia $(install_script)` to install it." * + " It may take some time to generate it.") + end + + encoded_graphs = reinterpret(UInt64, read(data_file)) + return decode_small_graph.(encoded_graphs[selection]) +end + +small_graphs_benchmark(n::Integer) = only(small_graphs_benchmark([n])) +small_graphs_benchmark() = small_graphs_benchmark(1:569) + +function decode_small_graph(x) + num_vertices = ceil(Int, sqrt(log2(x))) + adj = reshape((x .>> (0:(num_vertices^2 -1))) .& 1, + num_vertices, num_vertices) + return SimpleDiGraph(adj) +end