Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gpuCI] Auto-merge branch-0.17 to branch-0.18 [skip ci] #1295

Merged
merged 1 commit into from
Dec 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- PR #1274 Add generic from_edgelist() and from_adjlist() APIs
- PR #1279 Add self loop check variable in graph
- PR #1277 SciPy sparse matrix input support for WCC, SCC, SSSP, and BFS
- PR #1278 Add support for shortest_path_length and fix graph vertex checks

## Improvements
- PR #1227 Pin cmake policies to cmake 3.17 version
Expand Down
1 change: 1 addition & 0 deletions python/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
sssp,
shortest_path,
filter_unreachable,
shortest_path_length
)

from cugraph.tree import minimum_spanning_tree, maximum_spanning_tree
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/structure/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1322,7 +1322,7 @@ def has_node(self, n):
return (ddf == n).any().any().compute()
if self.renumbered:
tmp = self.renumber_map.to_internal_vertex_id(cudf.Series([n]))
return tmp[0] >= 0
return tmp[0] is not cudf.NA and tmp[0] >= 0
else:
df = self.edgelist.edgelist_df[["src", "dst"]]
return (df == n).any().any()
Expand Down
9 changes: 9 additions & 0 deletions python/cugraph/tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,15 @@ def test_has_node(graph_file):
assert G.has_node(n)


def test_invalid_has_node():
df = cudf.DataFrame([[1, 2]], columns=["src", "dst"])
G = cugraph.Graph()
G.from_cudf_edgelist(df, source="src", destination="dst")
assert not G.has_node(-1)
assert not G.has_node(0)
assert not G.has_node(G.number_of_nodes() + 1)


@pytest.mark.parametrize('graph_file', utils.DATASETS)
def test_bipartite_api(graph_file):
# This test only tests the functionality of adding set of nodes and
Expand Down
177 changes: 177 additions & 0 deletions python/cugraph/tests/test_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import cudf
import cugraph
from cupy.sparse import coo_matrix as cupy_coo_matrix
import cupy
import networkx as nx
import pytest
import sys
from tempfile import NamedTemporaryFile

CONNECTED_GRAPH = """1,5,3
1,4,1
1,2,1
1,6,2
1,7,2
4,5,1
2,3,1
7,6,2
"""

DISCONNECTED_GRAPH = CONNECTED_GRAPH + "8,9,4"


@pytest.fixture
def graphs(request):
with NamedTemporaryFile(mode="w+", suffix=".csv") as graph_tf:
graph_tf.writelines(request.param)
graph_tf.seek(0)

nx_G = nx.read_weighted_edgelist(graph_tf.name, delimiter=',')
cudf_df = cudf.read_csv(graph_tf.name,
names=["src", "dst", "data"],
delimiter=",",
dtype=["int32", "int32", "float64"])
cugraph_G = cugraph.Graph()
cugraph_G.from_cudf_edgelist(
cudf_df, source="src",
destination="dst", edge_attr="data")

# construct cupy coo_matrix graph
i = []
j = []
weights = []
for index in range(cudf_df.shape[0]):
vertex1 = cudf_df.iloc[index]["src"]
vertex2 = cudf_df.iloc[index]["dst"]
weight = cudf_df.iloc[index]["data"]
i += [vertex1, vertex2]
j += [vertex2, vertex1]
weights += [weight, weight]
i = cupy.array(i)
j = cupy.array(j)
weights = cupy.array(weights)
largest_vertex = max(cupy.amax(i), cupy.amax(j))
cupy_df = cupy_coo_matrix(
(weights, (i, j)),
shape=(largest_vertex + 1, largest_vertex + 1))

yield cugraph_G, nx_G, cupy_df


@pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True)
def test_connected_graph_shortest_path_length(graphs):
cugraph_G, nx_G, cupy_df = graphs

path_1_to_1_length = cugraph.shortest_path_length(cugraph_G, 1, 1)
assert path_1_to_1_length == 0.0
assert path_1_to_1_length == nx.shortest_path_length(
nx_G, "1", target="1", weight="weight")
assert path_1_to_1_length == cugraph.shortest_path_length(nx_G, "1", "1")
assert path_1_to_1_length == cugraph.shortest_path_length(cupy_df, 1, 1)

path_1_to_5_length = cugraph.shortest_path_length(cugraph_G, 1, 5)
assert path_1_to_5_length == 2.0
assert path_1_to_5_length == nx.shortest_path_length(
nx_G, "1", target="5", weight="weight")
assert path_1_to_5_length == cugraph.shortest_path_length(nx_G, "1", "5")
assert path_1_to_5_length == cugraph.shortest_path_length(cupy_df, 1, 5)

path_1_to_3_length = cugraph.shortest_path_length(cugraph_G, 1, 3)
assert path_1_to_3_length == 2.0
assert path_1_to_3_length == nx.shortest_path_length(
nx_G, "1", target="3", weight="weight")
assert path_1_to_3_length == cugraph.shortest_path_length(nx_G, "1", "3")
assert path_1_to_3_length == cugraph.shortest_path_length(cupy_df, 1, 3)

path_1_to_6_length = cugraph.shortest_path_length(cugraph_G, 1, 6)
assert path_1_to_6_length == 2.0
assert path_1_to_6_length == nx.shortest_path_length(
nx_G, "1", target="6", weight="weight")
assert path_1_to_6_length == cugraph.shortest_path_length(nx_G, "1", "6")
assert path_1_to_6_length == cugraph.shortest_path_length(cupy_df, 1, 6)


@pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True)
def test_shortest_path_length_invalid_source(graphs):
cugraph_G, nx_G, cupy_df = graphs

with pytest.raises(ValueError):
cugraph.shortest_path_length(cugraph_G, -1, 1)

with pytest.raises(ValueError):
cugraph.shortest_path_length(nx_G, "-1", "1")

with pytest.raises(ValueError):
cugraph.shortest_path_length(cupy_df, -1, 1)


@pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True)
def test_shortest_path_length_invalid_target(graphs):
cugraph_G, nx_G, cupy_df = graphs

with pytest.raises(ValueError):
cugraph.shortest_path_length(cugraph_G, 1, 10)

with pytest.raises(ValueError):
cugraph.shortest_path_length(nx_G, "1", "10")

with pytest.raises(ValueError):
cugraph.shortest_path_length(cupy_df, 1, 10)


@pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True)
def test_shortest_path_length_invalid_vertexes(graphs):
cugraph_G, nx_G, cupy_df = graphs

with pytest.raises(ValueError):
cugraph.shortest_path_length(cugraph_G, 0, 42)

with pytest.raises(ValueError):
cugraph.shortest_path_length(nx_G, "0", "42")

with pytest.raises(ValueError):
cugraph.shortest_path_length(cupy_df, 0, 42)


@pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True)
def test_shortest_path_length_no_path(graphs):
cugraph_G, nx_G, cupy_df = graphs

path_1_to_8 = cugraph.shortest_path_length(cugraph_G, 1, 8)
assert path_1_to_8 == sys.float_info.max
assert path_1_to_8 == cugraph.shortest_path_length(nx_G, "1", "8")
assert path_1_to_8 == cugraph.shortest_path_length(cupy_df, 1, 8)


@pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True)
def test_shortest_path_length_no_target(graphs):
cugraph_G, nx_G, cupy_df = graphs

cugraph_path_1_to_all = cugraph.shortest_path_length(cugraph_G, 1)
nx_path_1_to_all = nx.shortest_path_length(
nx_G, source="1", weight="weight")
nx_gpu_path_1_to_all = cugraph.shortest_path_length(nx_G, "1")
cupy_path_1_to_all = cugraph.shortest_path_length(cupy_df, 1)

# Cast networkx graph on cugraph vertex column type from str to int.
# SSSP preserves vertex type, convert for comparison
nx_gpu_path_1_to_all["vertex"] = \
nx_gpu_path_1_to_all["vertex"].astype("int32")

assert cugraph_path_1_to_all == nx_gpu_path_1_to_all
assert cugraph_path_1_to_all == cupy_path_1_to_all

# results for vertex 8 and 9 are not returned
assert cugraph_path_1_to_all.shape[0] == len(nx_path_1_to_all) + 2

for index in range(cugraph_path_1_to_all.shape[0]):

vertex = str(cugraph_path_1_to_all["vertex"][index].item())
distance = cugraph_path_1_to_all["distance"][index].item()

# verify cugraph against networkx
if vertex in {'8', '9'}:
# Networkx does not return distances for these vertexes.
assert distance == sys.float_info.max
else:
assert distance == nx_path_1_to_all[vertex]
9 changes: 6 additions & 3 deletions python/cugraph/traversal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

from cugraph.traversal.bfs import bfs
from cugraph.traversal.bfs import bfs_edges
from cugraph.traversal.sssp import sssp
from cugraph.traversal.sssp import shortest_path
from cugraph.traversal.sssp import filter_unreachable
from cugraph.traversal.sssp import (
sssp,
shortest_path,
filter_unreachable,
shortest_path_length
)
98 changes: 98 additions & 0 deletions python/cugraph/traversal/sssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,26 @@ def sssp(G,

Parameters
----------
<<<<<<< HEAD
graph : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or
matrix object, which should contain the connectivity information. Edge
weights, if present, should be single or double precision floating
point values.
source : int
Index of the source vertex.
=======
graph : cuGraph.Graph, NetworkX.Graph, or CuPy sparse COO matrix
cuGraph graph descriptor with connectivity information. Edge weights,
if present, should be single or double precision floating point values.

source : Dependant on graph type. Index of the source vertex.

If graph is an instance of cuGraph.Graph or CuPy sparse COO matrix:
int

If graph is an instance of a NetworkX.Graph:
str
>>>>>>> Document shortest_path_length and sssp behavior

Returns
-------
Expand Down Expand Up @@ -214,6 +228,10 @@ def sssp(G,
if G.renumbered:
source = G.lookup_internal_vertex_id(cudf.Series([source]))[0]

if source is cudf.NA:
raise ValueError(
"Starting vertex should be between 0 to number of vertices")

df = sssp_wrapper.sssp(G, source)

if G.renumbered:
Expand Down Expand Up @@ -268,3 +286,83 @@ def shortest_path(G,
"""
return sssp(G, source, method, directed, return_predecessors,
unweighted, overwrite, indices)


def shortest_path_length(G, source, target=None):
"""
Compute the distance from a source vertex to one or all vertexes in graph.
Uses Single Source Shortest Path (SSSP).

Parameters
----------
graph : cuGraph.Graph, NetworkX.Graph, or CuPy sparse COO matrix
cuGraph graph descriptor with connectivity information. Edge weights,
if present, should be single or double precision floating point values.

source : Dependant on graph type. Index of the source vertex.

If graph is an instance of cuGraph.Graph or CuPy sparse COO matrix:
int

If graph is an instance of a NetworkX.Graph:
str

target: Dependant on graph type. Vertex to find distance to.

If graph is an instance of cuGraph.Graph or CuPy sparse COO matrix:
int

If graph is an instance of a NetworkX.Graph:
str

Returns
-------
Return value type is based on the input type.

If target is None, returns:

cudf.DataFrame
df['vertex']
vertex id

df['distance']
gives the path distance from the starting vertex

If target is not None, returns:

Distance from source to target vertex.
"""

# verify target is in graph before traversing
if target is not None:
if not hasattr(G, "has_node"):
# G is a cupy coo_matrix. Extract maximum possible vertex value
as_matrix = G.toarray()
if target < 0 or target >= max(as_matrix.shape[0],
as_matrix.shape[1]):
raise ValueError("Graph does not contain target vertex")
elif not G.has_node(target):
# G is an instance of cugraph or networkx graph
raise ValueError("Graph does not contain target vertex")

df = sssp(G, source)

if isinstance(df, tuple):
# cupy path, df is tuple of (distance, predecessor)
if target:
return df[0][target-1]
results = cudf.DataFrame()
results["vertex"] = range(df[0].shape[0])
results["distance"] = df[0]
return results

else:
# cugraph and networkx path
if target:
target_distance = df.loc[df["vertex"] == target]
return target_distance.iloc[0]["distance"]

results = cudf.DataFrame()
results["vertex"] = df["vertex"]
results["distance"] = df["distance"]
return results