Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gpuCI] Auto-merge branch-0.13 to branch-0.14 [skip ci] #759

Merged
merged 9 commits into from
Mar 17, 2020
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- PR #740 added utility to extract paths from SSSP/BFS results
- PR #742 Rremove gdf column from jaccard
- PR #741 Added documentation for running and adding new benchmarks and shell script to automate
- PR #747 updated viewing of graph, datatypecasting and two hop neighbor unrenumbering for multi column

## Bug Fixes
- PR #697 Updated versions in conda environments.
Expand All @@ -31,6 +32,7 @@
- PR #733 Fixed multi-column renumbering issues with indexes
- PR #746 Dask + Distributed 2.12.0+
- PR #753 ECG Error
- PR #758 Fix for graph comparison failure

# cuGraph 0.12.0 (04 Feb 2020)

Expand Down
8 changes: 4 additions & 4 deletions python/cugraph/link_analysis/pagerank_wrapper.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1.
if not input_graph.transposedadjlist:
input_graph.view_transposed_adj_list()

[offsets, indices] = graph_wrapper.datatype_cast([input_graph.transposedadjlist.offsets,
input_graph.transposedadjlist.indices], [np.int32])
[offsets, indices] = graph_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices], [np.int32])
[weights] = graph_wrapper.datatype_cast([input_graph.transposedadjlist.weights], [np.float32, np.float64])

num_verts = input_graph.number_of_vertices()
num_edges = len(indices)
Expand Down Expand Up @@ -75,8 +75,8 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1.
cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0]
cdef uintptr_t c_weights = <uintptr_t>NULL

if input_graph.transposedadjlist.weights is not None:
c_weights = input_graph.transposedadjlist.weights.__cuda_array_interface__['data'][0]
if weights is not None:
c_weights = weights.__cuda_array_interface__['data'][0]

cdef GraphCSC[int,int,float] graph_float
cdef GraphCSC[int,int,double] graph_double
Expand Down
37 changes: 27 additions & 10 deletions python/cugraph/structure/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,17 @@ class EdgeList:
def __init__(self, source, destination, edge_attr=None,
renumber_map=None):
self.renumber_map = renumber_map
df = cudf.DataFrame()
df['src'] = source
df['dst'] = destination
self.edgelist_df = cudf.DataFrame()
self.edgelist_df['src'] = source
self.edgelist_df['dst'] = destination
self.weights = False
if edge_attr is not None:
self.weights = True
if type(edge_attr) is dict:
for k in edge_attr.keys():
df[k] = edge_attr[k]
self.edgelist_df[k] = edge_attr[k]
else:
df['weights'] = edge_attr
self.edgelist_df = df
self.edgelist_df['weights'] = edge_attr

class AdjList:
def __init__(self, offsets, indices, value=None):
Expand Down Expand Up @@ -243,6 +242,7 @@ def view_edge_list(self):
self.edge_count = len(edgelist_df)
else:
edgelist_df = self.edgelist.edgelist_df

if self.renumbered:
if isinstance(self.edgelist.renumber_map, cudf.DataFrame):
df = cudf.DataFrame()
Expand Down Expand Up @@ -408,10 +408,27 @@ def get_two_hop_neighbors(self):
"""
df = graph_wrapper.get_two_hop_neighbors(self)
if self.renumbered is True:
df['first'] = self.edgelist.renumber_map[df['first']].\
reset_index(drop=True)
df['second'] = self.edgelist.renumber_map[df['second']].\
reset_index(drop=True)
if isinstance(self.edgelist.renumber_map, cudf.DataFrame):
n_cols = len(self.edgelist.renumber_map.columns) - 1
unrenumbered_df_ = df.merge(self.edgelist.renumber_map,
left_on='first', right_on='id',
how='left').\
drop(['id', 'first'])
unrenumbered_df = unrenumbered_df_.merge(self.edgelist.
renumber_map,
left_on='second',
right_on='id',
how='left').\
drop(['id', 'second'])
unrenumbered_df.columns = ['first_' + str(i)
for i in range(n_cols)]\
+ ['second_' + str(i) for i in range(n_cols)]
df = unrenumbered_df
else:
df['first'] = self.edgelist.renumber_map[df['first']].\
reset_index(drop=True)
df['second'] = self.edgelist.renumber_map[df['second']].\
reset_index(drop=True)
return df

def number_of_vertices(self):
Expand Down
50 changes: 26 additions & 24 deletions python/cugraph/structure/graph_wrapper.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,12 @@ def get_edge_list(graph_ptr):
nelem=col_size,
dtype=np_dtype_from_gdf_column(g.edgeList.edge_data))
value_col = cudf.Series(value_data)

return source_col, dest_col, value_col


def add_adj_list(graph_ptr, offset_col, index_col, value_col=None):
cdef uintptr_t graph = graph_ptr
cdef Graph * g = <Graph*> graph

cdef gdf_column c_offset_col = get_gdf_column_view(offset_col)
cdef gdf_column c_index_col = get_gdf_column_view(index_col)
cdef gdf_column c_value_col
Expand All @@ -156,7 +154,6 @@ def add_adj_list(graph_ptr, offset_col, index_col, value_col=None):
else:
c_value_col = get_gdf_column_view(value_col)
c_value_col_ptr = &c_value_col

c_graph.adj_list_view(g,
&c_offset_col,
&c_index_col,
Expand All @@ -169,7 +166,6 @@ def get_adj_list(graph_ptr):

offset_col_size = g.adjList.offsets.size
index_col_size = g.adjList.indices.size

cdef uintptr_t offset_col_data = <uintptr_t> g.adjList.offsets.data
cdef uintptr_t index_col_data = <uintptr_t> g.adjList.indices.data
cdef uintptr_t value_col_data = <uintptr_t> NULL
Expand Down Expand Up @@ -212,7 +208,9 @@ def view_edge_list(input_graph):
if input_graph.adjlist is None:
raise Exception('Graph is Empty')
else:
add_adj_list(graph, input_graph.adjlist.offsets, input_graph.adjlist.indices, input_graph.adjlist.weights)
[offsets, indices] = datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32])
[weights] = datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64])
add_adj_list(graph, offsets, indices, weights)
c_graph.add_edge_list(g)
source, dest, value = get_edge_list(graph)
input_graph.edgelist = input_graph.EdgeList(source, dest, value)
Expand All @@ -224,10 +222,12 @@ def view_adj_list(input_graph):
if input_graph.edgelist is None:
raise Exception('Graph is Empty')
else:
if len(input_graph.edgelist.edgelist_df.columns)>2:
add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'], input_graph.edgelist.edgelist_df['weights'])
[src, dst] = datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32])
if input_graph.edgelist.weights:
[weights] = datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64])
add_edge_list(graph, src, dst, weights)
else:
add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'])
add_edge_list(graph, src, dst)
c_graph.add_adj_list(g)
offsets, indices, values = get_adj_list(graph)
input_graph.adjlist = input_graph.AdjList(offsets, indices, values)
Expand All @@ -237,15 +237,19 @@ def view_transposed_adj_list(input_graph):
cdef Graph * g = <Graph*> graph
if input_graph.transposedadjlist is None:
if input_graph.edgelist is None:
raise Exception('Graph is Empty')
else:
if len(input_graph.edgelist.edgelist_df.columns)>2:
add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'], input_graph.edgelist.edgelist_df['weights'])
if input_graph.adjlist is None:
raise Exception('Graph is Empty')
else:
add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'])
c_graph.add_transposed_adj_list(g)
offsets, indices, values = get_transposed_adj_list(graph)
input_graph.transposedadjlist = input_graph.transposedAdjList(offsets, indices, values)
view_edge_list(input_graph)
[src, dst] = datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32])
if input_graph.edgelist.weights:
[weights] = datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64])
add_edge_list(graph, src, dst, weights)
else:
add_edge_list(graph, src, dst)
c_graph.add_transposed_adj_list(g)
offsets, indices, values = get_transposed_adj_list(graph)
input_graph.transposedadjlist = input_graph.transposedAdjList(offsets, indices, values)

def add_transposed_adj_list(graph_ptr, offset_col, index_col, value_col=None):
cdef uintptr_t graph = graph_ptr
Expand Down Expand Up @@ -357,15 +361,13 @@ def get_two_hop_neighbors(input_graph):
def number_of_vertices(input_graph):
cdef uintptr_t graph = allocate_cpp_graph()
cdef Graph * g = <Graph*> graph

if input_graph.adjlist:
add_adj_list(graph, input_graph.adjlist.offsets, input_graph.adjlist.indices, input_graph.adjlist.weights)
[src, dst] = datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32])
if input_graph.edgelist.weights:
[weights] = datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64])
add_edge_list(graph, src, dst, weights)
else:
if input_graph.edgelist.weights:
add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'], input_graph.edgelist.edgelist_df['weights'])
else:
add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'])
c_graph.number_of_vertices(g)
add_edge_list(graph, src, dst)
c_graph.number_of_vertices(g)
return g.numberOfVertices


Expand Down
6 changes: 2 additions & 4 deletions python/cugraph/tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def compare_graphs(nx_graph, cu_graph):

if len(edgelist_df.columns) > 2:
df0 = cudf.from_pandas(nx.to_pandas_edgelist(nx_graph))
df0 = df0.sort_values(by=['source', 'target'])
df1 = df.sort_values(by=['source', 'target'])
df0 = df0.sort_values(by=['source', 'target']).reset_index(drop=True)
df1 = df.sort_values(by=['source', 'target']).reset_index(drop=True)
if not df0['weight'].equals(df1['weight']):
return False

Expand Down Expand Up @@ -482,7 +482,6 @@ def test_networkx_compatibility(managed, pool, graph_file):
df['source'] = pd.Series(M['0'])
df['target'] = pd.Series(M['1'])
df['weight'] = pd.Series(M.weight)

gdf = cudf.from_pandas(df)

Gnx = nx.from_pandas_edgelist(df,
Expand All @@ -495,7 +494,6 @@ def test_networkx_compatibility(managed, pool, graph_file):
destination='target',
edge_attr='weight',
create_using=cugraph.DiGraph)

assert compare_graphs(Gnx, G)

Gnx.clear()
Expand Down