diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a7e01f6b27..8ac73f27015 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - PR #740 added utility to extract paths from SSSP/BFS results - PR #742 Rremove gdf column from jaccard - PR #741 Added documentation for running and adding new benchmarks and shell script to automate +- PR #747 updated viewing of graph, datatypecasting and two hop neighbor unrenumbering for multi column ## Bug Fixes - PR #697 Updated versions in conda environments. @@ -31,6 +32,7 @@ - PR #733 Fixed multi-column renumbering issues with indexes - PR #746 Dask + Distributed 2.12.0+ - PR #753 ECG Error +- PR #758 Fix for graph comparison failure # cuGraph 0.12.0 (04 Feb 2020) diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 3d8f9b8df4f..08f8a7e89e0 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -40,8 +40,8 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. if not input_graph.transposedadjlist: input_graph.view_transposed_adj_list() - [offsets, indices] = graph_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, - input_graph.transposedadjlist.indices], [np.int32]) + [offsets, indices] = graph_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices], [np.int32]) + [weights] = graph_wrapper.datatype_cast([input_graph.transposedadjlist.weights], [np.float32, np.float64]) num_verts = input_graph.number_of_vertices() num_edges = len(indices) @@ -75,8 +75,8 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = NULL - if input_graph.transposedadjlist.weights is not None: - c_weights = input_graph.transposedadjlist.weights.__cuda_array_interface__['data'][0] + if weights is not None: + c_weights = weights.__cuda_array_interface__['data'][0] cdef GraphCSC[int,int,float] graph_float cdef GraphCSC[int,int,double] graph_double diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 951a51fd7d0..b3442d9d36f 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -31,18 +31,17 @@ class EdgeList: def __init__(self, source, destination, edge_attr=None, renumber_map=None): self.renumber_map = renumber_map - df = cudf.DataFrame() - df['src'] = source - df['dst'] = destination + self.edgelist_df = cudf.DataFrame() + self.edgelist_df['src'] = source + self.edgelist_df['dst'] = destination self.weights = False if edge_attr is not None: self.weights = True if type(edge_attr) is dict: for k in edge_attr.keys(): - df[k] = edge_attr[k] + self.edgelist_df[k] = edge_attr[k] else: - df['weights'] = edge_attr - self.edgelist_df = df + self.edgelist_df['weights'] = edge_attr class AdjList: def __init__(self, offsets, indices, value=None): @@ -243,6 +242,7 @@ def view_edge_list(self): self.edge_count = len(edgelist_df) else: edgelist_df = self.edgelist.edgelist_df + if self.renumbered: if isinstance(self.edgelist.renumber_map, cudf.DataFrame): df = cudf.DataFrame() @@ -408,10 +408,27 @@ def get_two_hop_neighbors(self): """ df = graph_wrapper.get_two_hop_neighbors(self) if self.renumbered is True: - df['first'] = self.edgelist.renumber_map[df['first']].\ - reset_index(drop=True) - df['second'] = self.edgelist.renumber_map[df['second']].\ - reset_index(drop=True) + if isinstance(self.edgelist.renumber_map, cudf.DataFrame): + n_cols = len(self.edgelist.renumber_map.columns) - 1 + unrenumbered_df_ = df.merge(self.edgelist.renumber_map, + left_on='first', right_on='id', + how='left').\ + drop(['id', 'first']) + unrenumbered_df = unrenumbered_df_.merge(self.edgelist. + renumber_map, + left_on='second', + right_on='id', + how='left').\ + drop(['id', 'second']) + unrenumbered_df.columns = ['first_' + str(i) + for i in range(n_cols)]\ + + ['second_' + str(i) for i in range(n_cols)] + df = unrenumbered_df + else: + df['first'] = self.edgelist.renumber_map[df['first']].\ + reset_index(drop=True) + df['second'] = self.edgelist.renumber_map[df['second']].\ + reset_index(drop=True) return df def number_of_vertices(self): diff --git a/python/cugraph/structure/graph_wrapper.pyx b/python/cugraph/structure/graph_wrapper.pyx index 4adfa0dfaae..16e0c99b84b 100644 --- a/python/cugraph/structure/graph_wrapper.pyx +++ b/python/cugraph/structure/graph_wrapper.pyx @@ -139,14 +139,12 @@ def get_edge_list(graph_ptr): nelem=col_size, dtype=np_dtype_from_gdf_column(g.edgeList.edge_data)) value_col = cudf.Series(value_data) - return source_col, dest_col, value_col def add_adj_list(graph_ptr, offset_col, index_col, value_col=None): cdef uintptr_t graph = graph_ptr cdef Graph * g = graph - cdef gdf_column c_offset_col = get_gdf_column_view(offset_col) cdef gdf_column c_index_col = get_gdf_column_view(index_col) cdef gdf_column c_value_col @@ -156,7 +154,6 @@ def add_adj_list(graph_ptr, offset_col, index_col, value_col=None): else: c_value_col = get_gdf_column_view(value_col) c_value_col_ptr = &c_value_col - c_graph.adj_list_view(g, &c_offset_col, &c_index_col, @@ -169,7 +166,6 @@ def get_adj_list(graph_ptr): offset_col_size = g.adjList.offsets.size index_col_size = g.adjList.indices.size - cdef uintptr_t offset_col_data = g.adjList.offsets.data cdef uintptr_t index_col_data = g.adjList.indices.data cdef uintptr_t value_col_data = NULL @@ -212,7 +208,9 @@ def view_edge_list(input_graph): if input_graph.adjlist is None: raise Exception('Graph is Empty') else: - add_adj_list(graph, input_graph.adjlist.offsets, input_graph.adjlist.indices, input_graph.adjlist.weights) + [offsets, indices] = datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [weights] = datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + add_adj_list(graph, offsets, indices, weights) c_graph.add_edge_list(g) source, dest, value = get_edge_list(graph) input_graph.edgelist = input_graph.EdgeList(source, dest, value) @@ -224,10 +222,12 @@ def view_adj_list(input_graph): if input_graph.edgelist is None: raise Exception('Graph is Empty') else: - if len(input_graph.edgelist.edgelist_df.columns)>2: - add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'], input_graph.edgelist.edgelist_df['weights']) + [src, dst] = datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + if input_graph.edgelist.weights: + [weights] = datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) + add_edge_list(graph, src, dst, weights) else: - add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']) + add_edge_list(graph, src, dst) c_graph.add_adj_list(g) offsets, indices, values = get_adj_list(graph) input_graph.adjlist = input_graph.AdjList(offsets, indices, values) @@ -237,15 +237,19 @@ def view_transposed_adj_list(input_graph): cdef Graph * g = graph if input_graph.transposedadjlist is None: if input_graph.edgelist is None: - raise Exception('Graph is Empty') - else: - if len(input_graph.edgelist.edgelist_df.columns)>2: - add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'], input_graph.edgelist.edgelist_df['weights']) + if input_graph.adjlist is None: + raise Exception('Graph is Empty') else: - add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']) - c_graph.add_transposed_adj_list(g) - offsets, indices, values = get_transposed_adj_list(graph) - input_graph.transposedadjlist = input_graph.transposedAdjList(offsets, indices, values) + view_edge_list(input_graph) + [src, dst] = datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + if input_graph.edgelist.weights: + [weights] = datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) + add_edge_list(graph, src, dst, weights) + else: + add_edge_list(graph, src, dst) + c_graph.add_transposed_adj_list(g) + offsets, indices, values = get_transposed_adj_list(graph) + input_graph.transposedadjlist = input_graph.transposedAdjList(offsets, indices, values) def add_transposed_adj_list(graph_ptr, offset_col, index_col, value_col=None): cdef uintptr_t graph = graph_ptr @@ -357,15 +361,13 @@ def get_two_hop_neighbors(input_graph): def number_of_vertices(input_graph): cdef uintptr_t graph = allocate_cpp_graph() cdef Graph * g = graph - - if input_graph.adjlist: - add_adj_list(graph, input_graph.adjlist.offsets, input_graph.adjlist.indices, input_graph.adjlist.weights) + [src, dst] = datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + if input_graph.edgelist.weights: + [weights] = datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) + add_edge_list(graph, src, dst, weights) else: - if input_graph.edgelist.weights: - add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst'], input_graph.edgelist.edgelist_df['weights']) - else: - add_edge_list(graph, input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']) - c_graph.number_of_vertices(g) + add_edge_list(graph, src, dst) + c_graph.number_of_vertices(g) return g.numberOfVertices diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 67c313dcb08..b8dd0bf9d17 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -105,8 +105,8 @@ def compare_graphs(nx_graph, cu_graph): if len(edgelist_df.columns) > 2: df0 = cudf.from_pandas(nx.to_pandas_edgelist(nx_graph)) - df0 = df0.sort_values(by=['source', 'target']) - df1 = df.sort_values(by=['source', 'target']) + df0 = df0.sort_values(by=['source', 'target']).reset_index(drop=True) + df1 = df.sort_values(by=['source', 'target']).reset_index(drop=True) if not df0['weight'].equals(df1['weight']): return False @@ -482,7 +482,6 @@ def test_networkx_compatibility(managed, pool, graph_file): df['source'] = pd.Series(M['0']) df['target'] = pd.Series(M['1']) df['weight'] = pd.Series(M.weight) - gdf = cudf.from_pandas(df) Gnx = nx.from_pandas_edgelist(df, @@ -495,7 +494,6 @@ def test_networkx_compatibility(managed, pool, graph_file): destination='target', edge_attr='weight', create_using=cugraph.DiGraph) - assert compare_graphs(Gnx, G) Gnx.clear()