From 62ecea2e0539f97742ab6f217eaca960753fc8f0 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:22:34 +0100 Subject: [PATCH 1/2] fix inconsistent graph properties between the SG and the MG API (#3757) Several graph methods are failing, some being an effect of migrating away from cython.cu renumbering. This PR fixes couple graph methods and fixes the inconsistency in results returned by the SG and MG API closes #3740 closes #3766 Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3757 --- .../cugraph/structure/graph_classes.py | 7 +- .../simpleDistributedGraph.py | 138 +++++++++++------- .../graph_implementation/simpleGraph.py | 121 +++++++++++++-- .../cugraph/cugraph/structure/number_map.py | 55 ++++++- .../cugraph/cugraph/structure/symmetrize.py | 1 + python/cugraph/cugraph/testing/utils.py | 2 +- .../centrality/test_betweenness_centrality.py | 2 +- .../test_edge_betweenness_centrality_mg.py | 4 +- .../tests/community/test_balanced_cut.py | 4 +- .../community/test_induced_subgraph_mg.py | 4 +- .../tests/community/test_k_truss_subgraph.py | 8 +- .../tests/community/test_triangle_count_mg.py | 4 +- .../cugraph/tests/core/test_k_core_mg.py | 12 +- .../tests/link_analysis/test_pagerank.py | 4 +- .../cugraph/tests/nx/test_nx_convert.py | 6 +- .../tests/sampling/test_random_walks.py | 41 +++--- .../cugraph/tests/structure/test_graph.py | 127 ++++++++++++++-- .../cugraph/tests/structure/test_graph_mg.py | 53 ++++++- .../tests/structure/test_multigraph.py | 2 +- .../cugraph/cugraph/utilities/nx_factory.py | 8 +- python/pylibcugraph/pylibcugraph/graphs.pxd | 1 + python/pylibcugraph/pylibcugraph/graphs.pyx | 17 +-- .../pylibcugraph/uniform_random_walks.pyx | 11 +- 23 files changed, 498 insertions(+), 134 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_classes.py b/python/cugraph/cugraph/structure/graph_classes.py index b89ada9bf50..6f6c7e5a26c 100644 --- a/python/cugraph/cugraph/structure/graph_classes.py +++ b/python/cugraph/cugraph/structure/graph_classes.py @@ -68,11 +68,14 @@ def __init__(self, m_graph=None, directed=False): if isinstance(m_graph, MultiGraph): elist = m_graph.view_edge_list() if m_graph.is_weighted(): - weights = "weights" + weights = m_graph.weight_column else: weights = None self.from_cudf_edgelist( - elist, source="src", destination="dst", edge_attr=weights + elist, + source=m_graph.source_columns, + destination=m_graph.destination_columns, + edge_attr=weights, ) else: raise TypeError( diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index ae2c57f5ef3..90db2c6b1f5 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -79,6 +79,8 @@ def __init__(self, properties): self.properties = simpleDistributedGraphImpl.Properties(properties) self.source_columns = None self.destination_columns = None + self.weight_column = None + self.vertex_columns = None def _make_plc_graph( sID, @@ -175,6 +177,7 @@ def __from_edgelist( "and destination parameters" ) ddf_columns = s_col + d_col + self.vertex_columns = ddf_columns.copy() _client = default_client() workers = _client.scheduler_info()["workers"] # Repartition to 2 partitions per GPU for memory efficient process @@ -214,10 +217,11 @@ def __from_edgelist( # The symmetrize step may add additional edges with unknown # ids and types for an undirected graph. Therefore, only # directed graphs may be used with ids and types. + # FIXME: Drop the check in symmetrize.py as it is redundant if len(edge_attr) == 3: if not self.properties.directed: raise ValueError( - "User-provided edge ids and edge " + "User-provided edge ids and/or edge " "types are not permitted for an " "undirected graph." ) @@ -285,6 +289,7 @@ def __from_edgelist( self.properties.renumber = renumber self.source_columns = source self.destination_columns = destination + self.weight_column = weight # If renumbering is not enabled, this function will only create # the edgelist_df and not do any renumbering. @@ -316,7 +321,6 @@ def __from_edgelist( ddf = ddf.map_partitions(lambda df: df.copy()) ddf = persist_dask_df_equal_parts_per_worker(ddf, _client) num_edges = len(ddf) - self._number_of_edges = num_edges ddf = get_persisted_df_worker_map(ddf, _client) delayed_tasks_d = { w: delayed(simpleDistributedGraphImpl._make_plc_graph)( @@ -356,6 +360,8 @@ def renumbered(self): def view_edge_list(self): """ + FIXME: Should this also return the edge ids and types? + Display the edge list. Compute it if needed. NOTE: If the graph is of type Graph() then the displayed undirected edges are the same as displayed by networkx Graph(), but the direction @@ -386,7 +392,59 @@ def view_edge_list(self): """ if self.edgelist is None: raise RuntimeError("Graph has no Edgelist.") - return self.edgelist.edgelist_df + + edgelist_df = self.input_df + is_string_dtype = False + is_multi_column = False + wgtCol = simpleDistributedGraphImpl.edgeWeightCol + if not self.properties.directed: + srcCol = self.source_columns + dstCol = self.destination_columns + if self.renumber_map.unrenumbered_id_type == "object": + # FIXME: Use the renumbered vertices instead and then un-renumber. + # This operation can be expensive. + is_string_dtype = True + edgelist_df = self.edgelist.edgelist_df + srcCol = self.renumber_map.renumbered_src_col_name + dstCol = self.renumber_map.renumbered_dst_col_name + + if isinstance(srcCol, list): + srcCol = self.renumber_map.renumbered_src_col_name + dstCol = self.renumber_map.renumbered_dst_col_name + edgelist_df = self.edgelist.edgelist_df + # unrenumber before extracting the upper triangular part + if len(self.source_columns) == 1: + edgelist_df = self.renumber_map.unrenumber(edgelist_df, srcCol) + edgelist_df = self.renumber_map.unrenumber(edgelist_df, dstCol) + else: + is_multi_column = True + + edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ + [srcCol, dstCol] + ].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1) + + edgelist_df = edgelist_df.groupby(by=[srcCol, dstCol]).sum().reset_index() + if wgtCol in edgelist_df.columns: + # FIXME: This breaks if there are are multi edges as those will + # be dropped during the symmetrization step and the original 'weight' + # will be halved. + edgelist_df[wgtCol] /= 2 + + if is_string_dtype or is_multi_column: + # unrenumber the vertices + edgelist_df = self.renumber_map.unrenumber(edgelist_df, srcCol) + edgelist_df = self.renumber_map.unrenumber(edgelist_df, dstCol) + + if self.properties.renumbered: + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) + + # If there is no 'wgt' column, nothing will happen + edgelist_df = edgelist_df.rename(columns={wgtCol: self.weight_column}) + + self.properties.edge_count = len(edgelist_df) + return edgelist_df def delete_edge_list(self): """ @@ -405,23 +463,7 @@ def number_of_vertices(self): Get the number of nodes in the graph. """ if self.properties.node_count is None: - if self.edgelist is not None: - if self.renumbered is True: - src_col_name = self.renumber_map.renumbered_src_col_name - dst_col_name = self.renumber_map.renumbered_dst_col_name - # FIXME: from_dask_cudf_edgelist() currently requires - # renumber=True for MG, so this else block will not be - # used. Should this else block be removed and added back when - # the restriction is removed? - else: - src_col_name = "src" - dst_col_name = "dst" - - ddf = self.edgelist.edgelist_df[[src_col_name, dst_col_name]] - # ddf = self.edgelist.edgelist_df[["src", "dst"]] - self.properties.node_count = ddf.max().max().compute() + 1 - else: - raise RuntimeError("Graph is Empty") + self.properties.node_count = len(self.nodes()) return self.properties.node_count def number_of_nodes(self): @@ -434,10 +476,16 @@ def number_of_edges(self, directed_edges=False): """ Get the number of edges in the graph. """ - if self.edgelist is not None: - return self._number_of_edges - else: - raise RuntimeError("Graph is Empty") + + if directed_edges and self.edgelist is not None: + return len(self.edgelist.edgelist_df) + + if self.properties.edge_count is None: + if self.edgelist is not None: + self.view_edge_list() + else: + raise RuntimeError("Graph is Empty") + return self.properties.edge_count def in_degree(self, vertex_subset=None): """ @@ -1021,19 +1069,8 @@ def edges(self): sources and destinations. It does not return the edge weights. For viewing edges with weights use view_edge_list() """ - if self.renumbered is True: - src_col_name = self.renumber_map.renumbered_src_col_name - dst_col_name = self.renumber_map.renumbered_dst_col_name - # FIXME: from_dask_cudf_edgelist() currently requires - # renumber=True for MG, so this else block will not be - # used. Should this else block be removed and added back when - # the restriction is removed? - else: - src_col_name = "src" - dst_col_name = "dst" - # return self.view_edge_list()[["src", "dst"]] - return self.view_edge_list()[[src_col_name, dst_col_name]] + return self.view_edge_list()[self.vertex_columns] def nodes(self): """ @@ -1045,23 +1082,26 @@ def nodes(self): a dataframe and do 'renumber_map.unrenumber' or 'G.unrenumber' """ - if self.renumbered: - # FIXME: This relies on current implementation - # of NumberMap, should not really expose - # this, perhaps add a method to NumberMap + if self.edgelist is not None: + if self.renumbered: + # FIXME: This relies on current implementation + # of NumberMap, should not really expose + # this, perhaps add a method to NumberMap - df = self.renumber_map.implementation.ddf.drop(columns="global_id") + df = self.renumber_map.implementation.ddf.drop(columns="global_id") - if len(df.columns) > 1: - return df - else: - return df[df.columns[0]] + if len(df.columns) > 1: + return df + else: + return df[df.columns[0]] + else: + df = self.input_df + return dask_cudf.concat( + [df[self.source_columns], df[self.destination_columns]] + ).drop_duplicates() else: - df = self.input_df - return dask_cudf.concat( - [df[self.source_columns], df[self.destination_columns]] - ).drop_duplicates() + raise RuntimeError("Graph is Empty") def neighbors(self, n): if self.edgelist is None: diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index d0c0ded5eb4..2690ab88c13 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -47,8 +47,8 @@ class simpleGraphImpl: class EdgeList: def __init__( self, - source: str, - destination: str, + source: cudf.Series, + destination: cudf.Series, edge_attr: Union[cudf.DataFrame, Dict[str, cudf.DataFrame]] = None, ): self.edgelist_df = cudf.DataFrame() @@ -96,6 +96,7 @@ def __init__(self, properties): def __init__(self, properties): # Structure self.edgelist = None + self.input_df = None self.adjlist = None self.transposedadjlist = None self.renumber_map = None @@ -109,6 +110,11 @@ def __init__(self, properties): self.batch_adjlists = None self.batch_transposed_adjlists = None + self.source_columns = None + self.destination_columns = None + self.vertex_columns = None + self.weight_column = None + # Functions # FIXME: Change to public function # FIXME: Make function more modular @@ -149,6 +155,7 @@ def __from_edgelist( "destination parameters" ) df_columns = s_col + d_col + self.vertex_columns = df_columns.copy() if edge_attr is not None: if weight is not None or edge_id is not None or edge_type is not None: @@ -212,9 +219,11 @@ def __from_edgelist( elist = input_df.compute().reset_index(drop=True) else: raise TypeError("input should be a cudf.DataFrame or a dask_cudf dataFrame") - - # Original, unmodified input dataframe. + # initial, unmodified input dataframe. self.input_df = elist + self.weight_column = weight + self.source_columns = source + self.destination_columns = destination # Renumbering self.renumber_map = None @@ -233,6 +242,8 @@ def __from_edgelist( # Use renumber_map to figure out if the python renumbering occured self.properties.renumbered = renumber_map.is_renumbered self.renumber_map = renumber_map + self.renumber_map.implementation.src_col_names = simpleGraphImpl.srcCol + self.renumber_map.implementation.dst_col_names = simpleGraphImpl.dstCol else: if type(source) is list and type(destination) is list: raise ValueError("set renumber to True for multi column ids") @@ -405,24 +416,104 @@ def view_edge_list(self): src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) self.edgelist = self.EdgeList(src, dst, weights) - edgelist_df = self.edgelist.edgelist_df + srcCol = self.source_columns + dstCol = self.destination_columns + """ + Only use the initial input dataframe if the graph is directed with: + 1) single vertex column names with integer vertex type + 2) list of vertex column names of size 1 with integer vertex type + """ + use_initial_input_df = True + + if self.input_df is not None: + if type(srcCol) is list and type(dstCol) is list: + if len(srcCol) == 1: + srcCol = srcCol[0] + dstCol = dstCol[0] + if self.input_df[srcCol].dtype not in [ + np.int32, + np.int64, + ] or self.input_df[dstCol].dtype not in [np.int32, np.int64]: + # hypergraph case + use_initial_input_df = False + else: + use_initial_input_df = False + + elif self.input_df[srcCol].dtype not in [ + np.int32, + np.int64, + ] or self.input_df[dstCol].dtype not in [np.int32, np.int64]: + use_initial_input_df = False + else: + use_initial_input_df = False - if self.properties.renumbered: + if use_initial_input_df and self.properties.directed: + edgelist_df = self.input_df + else: + edgelist_df = self.edgelist.edgelist_df + if srcCol is None and dstCol is None: + srcCol = simpleGraphImpl.srcCol + dstCol = simpleGraphImpl.dstCol + + if use_initial_input_df and not self.properties.directed: + # unrenumber before extracting the upper triangular part + # case when the vertex column name is of size 1 + if self.properties.renumbered: + edgelist_df = self.renumber_map.unrenumber( + edgelist_df, simpleGraphImpl.srcCol + ) + edgelist_df = self.renumber_map.unrenumber( + edgelist_df, simpleGraphImpl.dstCol + ) + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) + # extract the upper triangular part + edgelist_df = edgelist_df[edgelist_df[srcCol] <= edgelist_df[dstCol]] + else: + edgelist_df = edgelist_df[ + edgelist_df[simpleGraphImpl.srcCol] + <= edgelist_df[simpleGraphImpl.dstCol] + ] + elif not use_initial_input_df and self.properties.renumbered: + # Do not unrenumber the vertices if the initial input df was used + if not self.properties.directed: + edgelist_df = edgelist_df[ + edgelist_df[simpleGraphImpl.srcCol] + <= edgelist_df[simpleGraphImpl.dstCol] + ] edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.dstCol ) + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) - # FIXME: revisit this approach - if not self.properties.directed: - edgelist_df = edgelist_df[ - edgelist_df[simpleGraphImpl.srcCol] - <= edgelist_df[simpleGraphImpl.dstCol] - ] - edgelist_df = edgelist_df.reset_index(drop=True) - self.properties.edge_count = len(edgelist_df) + if self.vertex_columns is not None and len(self.vertex_columns) == 2: + # single column vertices internally renamed to 'simpleGraphImpl.srcCol' + # and 'simpleGraphImpl.dstCol'. + if not set(self.vertex_columns).issubset(set(edgelist_df.columns)): + # Get the initial column names passed by the user. + if srcCol is not None and dstCol is not None: + edgelist_df = edgelist_df.rename( + columns={ + simpleGraphImpl.srcCol: srcCol, + simpleGraphImpl.dstCol: dstCol, + } + ) + + # FIXME: When renumbered, the MG API uses renumbered col names which + # is not consistant with the SG API. + + self.properties.edge_count = len(edgelist_df) + + wgtCol = simpleGraphImpl.edgeWeightCol + edgelist_df = edgelist_df.rename( + columns={wgtCol: self.weight_column} + ).reset_index(drop=True) return edgelist_df @@ -1175,7 +1266,7 @@ def edges(self): sources and destinations. It does not return the edge weights. For viewing edges with weights use view_edge_list() """ - return self.view_edge_list()[[simpleGraphImpl.srcCol, simpleGraphImpl.dstCol]] + return self.view_edge_list()[self.vertex_columns] def nodes(self): """ diff --git a/python/cugraph/cugraph/structure/number_map.py b/python/cugraph/cugraph/structure/number_map.py index 481f99b9060..d7da20f9d84 100644 --- a/python/cugraph/cugraph/structure/number_map.py +++ b/python/cugraph/cugraph/structure/number_map.py @@ -25,6 +25,8 @@ class NumberMap: class SingleGPU: def __init__(self, df, src_col_names, dst_col_names, id_type, store_transposed): self.col_names = NumberMap.compute_vals(src_col_names) + # FIXME: rename the next two attributes to its singular conterpart as there + # is only one 'src' and 'dst' col name self.src_col_names = src_col_names self.dst_col_names = dst_col_names self.df = df @@ -141,6 +143,8 @@ def __init__( self, ddf, src_col_names, dst_col_names, id_type, store_transposed ): self.col_names = NumberMap.compute_vals(src_col_names) + self.src_col_names = src_col_names + self.dst_col_names = dst_col_names self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) self.val_types["count"] = np.int32 self.id_type = id_type @@ -258,6 +262,7 @@ def __init__( # The column name 'id' contains the renumbered vertices and the other column(s) # contain the original vertices self.df_internal_to_external = None + self.internal_to_external_col_names = {} @staticmethod def compute_vals_types(df, column_names): @@ -480,7 +485,15 @@ def renumber_and_segment( # For columns with mismatch dtypes, set the renumbered # id_type to either 'int32' or 'int64' - if df.dtypes.nunique() > 1: + if isinstance(src_col_names, list): + vertex_col_names = src_col_names.copy() + else: + vertex_col_names = [src_col_names] + if isinstance(dst_col_names, list): + vertex_col_names += dst_col_names + else: + vertex_col_names += [dst_col_names] + if df[vertex_col_names].dtypes.nunique() > 1: # can't determine the edgelist input type unrenumbered_id_type = None else: @@ -503,7 +516,9 @@ def renumber_and_segment( renumbered = True renumber_map = NumberMap(renumber_id_type, unrenumbered_id_type, renumbered) - if not isinstance(src_col_names, list): + renumber_map.input_src_col_names = src_col_names + renumber_map.input_dst_col_names = dst_col_names + if not isinstance(renumber_map.input_src_col_names, list): src_col_names = [src_col_names] dst_col_names = [dst_col_names] @@ -512,6 +527,10 @@ def renumber_and_segment( # renumbered_dst_col_name) renumber_map.set_renumbered_col_names(src_col_names, dst_col_names, df.columns) + # FIXME: Remove 'src_col_names' and 'dst_col_names' from this initialization as + # those will capture 'simpleGraph.srcCol' and 'simpleGraph.dstCol'. + # In fact the input src and dst col names are already captured in + # 'renumber_map.input_src_col_names' and 'renumber_map.input_dst_col_names'. if isinstance(df, cudf.DataFrame): renumber_map.implementation = NumberMap.SingleGPU( df, @@ -648,6 +667,35 @@ def unrenumber(self, df, column_name, preserve_order=False, get_column_names=Fal mapping[nm] = nm + "_" + column_name col_names = list(mapping.values()) + if isinstance(self.input_src_col_names, list): + input_src_col_names = self.input_src_col_names.copy() + input_dst_col_names = self.input_dst_col_names.copy() + else: + # Assuming the src and dst columns are of the same length + # if they are lists. + input_src_col_names = [self.input_src_col_names] + input_dst_col_names = [self.input_dst_col_names] + if not isinstance(col_names, list): + col_names = [col_names] + + if column_name in [ + self.renumbered_src_col_name, + self.implementation.src_col_names, + ]: + self.internal_to_external_col_names.update( + dict(zip(col_names, input_src_col_names)) + ) + elif column_name in [ + self.renumbered_dst_col_name, + self.implementation.dst_col_names, + ]: + self.internal_to_external_col_names.update( + dict(zip(col_names, input_dst_col_names)) + ) + + if len(self.implementation.col_names) == 1: + col_names = col_names[0] + if preserve_order: index_name = NumberMap.generate_unused_column_name(df) df[index_name] = df.index @@ -665,6 +713,9 @@ def unrenumber(self, df, column_name, preserve_order=False, get_column_names=Fal df = df.map_partitions(lambda df: df.rename(columns=mapping, copy=False)) else: df = df.rename(columns=mapping, copy=False) + # FIXME: This parameter is not working as expected as it oesn't return + # the unrenumbered column names: leverage 'self.internal_to_external_col_names' + # instead. if get_column_names: return df, col_names else: diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 15011fa8dbc..4c00e68344d 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -230,6 +230,7 @@ def symmetrize( """ + # FIXME: Redundant check that should be done at the graph creation if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") diff --git a/python/cugraph/cugraph/testing/utils.py b/python/cugraph/cugraph/testing/utils.py index 0dae17ed14e..6d58076e6fe 100644 --- a/python/cugraph/cugraph/testing/utils.py +++ b/python/cugraph/cugraph/testing/utils.py @@ -407,7 +407,7 @@ def compare_mst(mst_cugraph, mst_nx): pass # check total weight - cg_sum = edgelist_df["weights"].sum() + cg_sum = edgelist_df[mst_cugraph.weight_column].sum() nx_sum = mst_nx_df["weight"].sum() print(cg_sum) print(nx_sum) diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py index 3e4dd3af4fc..db34c68a054 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py @@ -118,7 +118,7 @@ def calc_betweenness_centrality( ) M = G.to_pandas_edgelist().rename( - columns={"src": "0", "dst": "1", "weights": "weight"} + columns={"src": "0", "dst": "1", "wgt": edge_attr} ) Gnx = nx.from_pandas_edgelist( diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index aa41f8e1c82..97e503e5428 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -16,7 +16,7 @@ import dask_cudf from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, email_Eu_core +from cugraph.experimental.datasets import DATASETS_UNDIRECTED import cugraph import cugraph.dask as dcg @@ -41,7 +41,7 @@ def setup_function(): # email_Eu_core is too expensive to test -datasets = DATASETS_UNDIRECTED + [email_Eu_core] +datasets = DATASETS_UNDIRECTED # ============================================================================= diff --git a/python/cugraph/cugraph/tests/community/test_balanced_cut.py b/python/cugraph/cugraph/tests/community/test_balanced_cut.py index 0a95a1846ce..f6c1a741011 100644 --- a/python/cugraph/cugraph/tests/community/test_balanced_cut.py +++ b/python/cugraph/cugraph/tests/community/test_balanced_cut.py @@ -102,7 +102,7 @@ def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", [DEFAULT_DATASETS[2]]) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): gc.collect() @@ -111,7 +111,7 @@ def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): # read_weights_in_sp=True => value column dtype is float32 G = graph_file.get_graph() NM = G.to_pandas_edgelist().rename( - columns={"src": "0", "dst": "1", "weights": "weight"} + columns={"src": "0", "dst": "1", "wgt": "weight"} ) G = nx.from_pandas_edgelist( diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py index 3a6a6e0d409..d93fa3b547d 100644 --- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py +++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py @@ -90,8 +90,8 @@ def input_expected_output(input_combo): # Sample k vertices from the cuGraph graph # FIXME: Leverage the method 'select_random_vertices' instead - srcs = G.view_edge_list()["src"] - dsts = G.view_edge_list()["dst"] + srcs = G.view_edge_list()["0"] + dsts = G.view_edge_list()["1"] vertices = cudf.concat([srcs, dsts]).drop_duplicates() vertices = vertices.sample(num_seeds).astype("int32") diff --git a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py index b0dcc2ede3d..c1f8f4c3546 100644 --- a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py +++ b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py @@ -39,7 +39,7 @@ def setup_function(): # currently in networkx master and will hopefully will make it to a release # soon. def ktruss_ground_truth(graph_file): - G = nx.read_edgelist(str(graph_file), nodetype=int, data=(("weights", float),)) + G = nx.read_edgelist(str(graph_file), nodetype=int, data=(("weight", float),)) df = nx.to_pandas_edgelist(G) return df @@ -50,18 +50,18 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): edgelist_df = k_truss_cugraph.view_edge_list() src = edgelist_df["src"] dst = edgelist_df["dst"] - wgt = edgelist_df["weights"] + wgt = edgelist_df["weight"] assert len(edgelist_df) == len(k_truss_nx) for i in range(len(src)): has_edge = ( (k_truss_nx["source"] == src[i]) & (k_truss_nx["target"] == dst[i]) - & np.isclose(k_truss_nx["weights"], wgt[i]) + & np.isclose(k_truss_nx["weight"], wgt[i]) ).any() has_opp_edge = ( (k_truss_nx["source"] == dst[i]) & (k_truss_nx["target"] == src[i]) - & np.isclose(k_truss_nx["weights"], wgt[i]) + & np.isclose(k_truss_nx["weight"], wgt[i]) ).any() assert has_edge or has_opp_edge return True diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 2cf0525d2ad..0f7bb14581f 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -69,8 +69,8 @@ def input_expected_output(dask_client, input_combo): if start_list: # sample k nodes from the cuGraph graph k = random.randint(1, 10) - srcs = G.view_edge_list()["src"] - dsts = G.view_edge_list()["dst"] + srcs = G.view_edge_list()[G.source_columns] + dsts = G.view_edge_list()[G.destination_columns] nodes = cudf.concat([srcs, dsts]).drop_duplicates() start_list = nodes.sample(k) else: diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index c68108ce241..7f4eeeb69d5 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -83,9 +83,12 @@ def input_expected_output(dask_client, input_combo): ) sg_k_core_results = sg_k_core_graph.view_edge_list() # FIXME: The result will come asymetric. Symmetrize the results + srcCol = sg_k_core_graph.source_columns + dstCol = sg_k_core_graph.destination_columns + wgtCol = sg_k_core_graph.weight_column sg_k_core_results = ( - symmetrize_df(sg_k_core_results, "src", "dst", "weights") - .sort_values(["src", "dst"]) + symmetrize_df(sg_k_core_results, srcCol, dstCol, wgtCol) + .sort_values([srcCol, dstCol]) .reset_index(drop=True) ) @@ -144,7 +147,10 @@ def test_dask_k_core(dask_client, benchmark, input_expected_output): expected_k_core_results = input_expected_output["sg_k_core_results"] k_core_results = ( - k_core_results.compute().sort_values(["src", "dst"]).reset_index(drop=True) + k_core_results.compute() + .sort_values(["src", "dst"]) + .reset_index(drop=True) + .rename(columns={"weights": "weight"}) ) assert_frame_equal( diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py index 8e8ab13574d..9d9572b88b2 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py @@ -187,11 +187,11 @@ def test_pagerank( G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) if has_precomputed_vertex_out_weight == 1: - df = G.view_edge_list()[["src", "weights"]] + df = G.view_edge_list()[["src", "wgt"]] pre_vtx_o_wgt = ( df.groupby(["src"], as_index=False) .sum() - .rename(columns={"src": "vertex", "weights": "sums"}) + .rename(columns={"src": "vertex", "wgt": "sums"}) ) cugraph_pr = cugraph_call( diff --git a/python/cugraph/cugraph/tests/nx/test_nx_convert.py b/python/cugraph/cugraph/tests/nx/test_nx_convert.py index 58b89a4bda9..e20897572d0 100644 --- a/python/cugraph/cugraph/tests/nx/test_nx_convert.py +++ b/python/cugraph/cugraph/tests/nx/test_nx_convert.py @@ -25,8 +25,9 @@ def _compare_graphs(nxG, cuG, has_wt=True): assert nxG.number_of_edges() == cuG.number_of_edges() cu_df = cuG.view_edge_list().to_pandas() + cu_df = cu_df.rename(columns={"0": "src", "1": "dst"}) if has_wt is True: - cu_df = cu_df.drop(columns=["weights"]) + cu_df = cu_df.drop(columns=["weight"]) out_of_order = cu_df[cu_df["src"] > cu_df["dst"]] if len(out_of_order) > 0: @@ -72,12 +73,11 @@ def test_networkx_compatibility(graph_file): # create a cuGraph Directed Graph gdf = cudf.from_pandas(M) - gdf = gdf.rename(columns={"weight": "weights"}) cuG = cugraph.from_cudf_edgelist( gdf, source="0", destination="1", - edge_attr="weights", + edge_attr="weight", create_using=cugraph.Graph(directed=True), ) diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_random_walks.py index 48629fa03a6..9c94e036683 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks.py @@ -76,7 +76,7 @@ def calc_random_walks(G, max_depth=None, use_padding=False, legacy_result_type=T """ assert G is not None - G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="weights") + G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt") k = random.randint(1, 6) @@ -136,8 +136,9 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type e_wgt_paths = path_data[1] e_wgt_idx = 0 - G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="weights") + G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt") df_G = G.input_df + if "weight" in df_G.columns: df_G = df_G.rename(columns={"weight": "wgt"}) @@ -176,17 +177,18 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type else: # check valid edge wgt - expected_wgt = edge["wgt"].iloc[0] - result_wgt = e_wgt_paths.iloc[e_wgt_idx] - - if expected_wgt != result_wgt: - print( - "[ERR] Invalid edge wgt: " - "The edge src {} dst {} has wgt {} but got {}".format( - src, dst, expected_wgt, result_wgt + if G.is_weighted(): + expected_wgt = edge["wgt"].iloc[0] + result_wgt = e_wgt_paths.iloc[e_wgt_idx] + + if expected_wgt != result_wgt: + print( + "[ERR] Invalid edge wgt: " + "The edge src {} dst {} has wgt {} but got {}".format( + src, dst, expected_wgt, result_wgt + ) ) - ) - invalid_edge_wgt += 1 + invalid_edge_wgt += 1 e_wgt_idx += 1 if src != -1 and dst == -1: @@ -195,9 +197,10 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type assert invalid_seeds == 0 assert invalid_edge == 0 - assert invalid_edge_wgt == 0 assert len(v_paths) == (max_depth) * len(seeds) - assert len(e_wgt_paths) == (max_depth - 1) * len(seeds) + if G.is_weighted(): + assert invalid_edge_wgt == 0 + assert len(e_wgt_paths) == (max_depth - 1) * len(seeds) if legacy_result_type: sizes = path_data[2] @@ -298,11 +301,15 @@ def test_random_walks_nx(graph_file): M = G.to_pandas_edgelist() + source = G.source_columns + target = G.destination_columns + edge_attr = G.weight_column + Gnx = nx.from_pandas_edgelist( M, - source="src", - target="dst", - edge_attr="weights", + source=source, + target=target, + edge_attr=edge_attr, create_using=nx.DiGraph(), ) max_depth = random.randint(2, 10) diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index a80c47662e2..de306309ca4 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -62,8 +62,8 @@ def compare_graphs(nx_graph, cu_graph): edgelist_df = cu_graph.view_edge_list().reset_index(drop=True) df = cudf.DataFrame() - df["source"] = edgelist_df["src"] - df["target"] = edgelist_df["dst"] + df["source"] = edgelist_df["source"] + df["target"] = edgelist_df["target"] if len(edgelist_df.columns) > 2: df["weight"] = edgelist_df["weights"] cu_to_nx_graph = nx.from_pandas_edgelist( @@ -319,10 +319,10 @@ def test_edges_for_Graph(graph_file): edges.append([edge[1], edge[0]]) else: edges.append([edge[0], edge[1]]) - nx_edge_list = cudf.DataFrame(list(edges), columns=["src", "dst"]) + nx_edge_list = cudf.DataFrame(list(edges), columns=["0", "1"]) assert_frame_equal( - nx_edge_list.sort_values(by=["src", "dst"]).reset_index(drop=True), - cu_edge_list.sort_values(by=["src", "dst"]).reset_index(drop=True), + nx_edge_list.sort_values(by=["0", "1"]).reset_index(drop=True), + cu_edge_list.sort_values(by=["0", "1"]).reset_index(drop=True), check_dtype=False, ) @@ -344,7 +344,8 @@ def test_view_edge_list_for_Graph(graph_file): G = cugraph.from_cudf_edgelist( cu_M, source="0", destination="1", create_using=cugraph.Graph ) - cu_edge_list = G.view_edge_list().sort_values(["src", "dst"]) + + cu_edge_list = G.view_edge_list().sort_values(["0", "1"]) # Check if number of Edges is same assert len(nx_edges) == len(cu_edge_list) @@ -359,12 +360,12 @@ def test_view_edge_list_for_Graph(graph_file): edges.append([edge[0], edge[1]]) edges = list(edges) edges.sort() - nx_edge_list = cudf.DataFrame(edges, columns=["src", "dst"]) + nx_edge_list = cudf.DataFrame(edges, columns=["0", "1"]) # Compare nx and cugraph edges when viewing edgelist # assert cu_edge_list.equals(nx_edge_list) - assert (cu_edge_list["src"].to_numpy() == nx_edge_list["src"].to_numpy()).all() - assert (cu_edge_list["dst"].to_numpy() == nx_edge_list["dst"].to_numpy()).all() + assert (cu_edge_list["0"].to_numpy() == nx_edge_list["0"].to_numpy()).all() + assert (cu_edge_list["1"].to_numpy() == nx_edge_list["1"].to_numpy()).all() # Test @@ -682,8 +683,8 @@ def test_to_pandas_edgelist(graph_file): G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1") - assert "s" in G.to_pandas_edgelist("s", "d").columns - assert "s" in G.to_pandas_edgelist(source="s", destination="d").columns + assert "0" in G.to_pandas_edgelist("0", "1").columns + assert "0" in G.to_pandas_edgelist(source="0", destination="1").columns @pytest.mark.sg @@ -877,3 +878,107 @@ def test_graph_creation_edge_properties(graph_file, edge_props): G = cugraph.Graph(directed=True) G.from_cudf_edgelist(df, source="0", destination="1", **prop_keys) + + +@pytest.mark.sg +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +def test_graph_creation_edges(graph_file, directed, renumber): + # Verifies that the input dataframe passed the user is the same + # retrieved from the graph when the graph is directed + srcCol = "source" + dstCol = "target" + wgtCol = "weight" + input_df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol, dstCol, wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + + G = cugraph.Graph(directed=directed) + + if renumber: + # trigger renumbering by passing a list of vertex column + srcCol = [srcCol] + dstCol = [dstCol] + vertexCol = srcCol + dstCol + else: + vertexCol = [srcCol, dstCol] + G.from_cudf_edgelist(input_df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + columns = vertexCol.copy() + columns.append(wgtCol) + + edge_list_view = G.view_edge_list().loc[:, columns] + edges = G.edges().loc[:, vertexCol] + + assert_frame_equal( + edge_list_view.drop(columns=wgtCol) + .sort_values(by=vertexCol) + .reset_index(drop=True), + edges.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + + if directed: + assert_frame_equal( + edge_list_view.sort_values(by=vertexCol).reset_index(drop=True), + input_df.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + else: + # If the graph is undirected, ensures that only the upper triangular + # matrix of the adjacency matrix is returned + if isinstance(srcCol, list): + srcCol = srcCol[0] + dstCol = dstCol[0] + is_upper_triangular = edge_list_view[srcCol] <= edge_list_view[dstCol] + is_upper_triangular = list(set(is_upper_triangular.values_host)) + assert len(is_upper_triangular) == 1 + assert is_upper_triangular[0] + + +@pytest.mark.sg +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", [True, False]) +def test_graph_creation_edges_multi_col_vertices(graph_file, directed): + srcCol = ["src_0", "src_1"] + dstCol = ["dst_0", "dst_1"] + wgtCol = "weight" + vertexCol = srcCol + dstCol + columns = vertexCol.copy() + columns.append(wgtCol) + + input_df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol[0], dstCol[0], wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + input_df["src_1"] = input_df["src_0"] + 1000 + input_df["dst_1"] = input_df["dst_0"] + 1000 + + G = cugraph.Graph(directed=directed) + G.from_cudf_edgelist(input_df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + input_df = input_df.loc[:, columns] + edge_list_view = G.view_edge_list().loc[:, columns] + edges = G.edges().loc[:, vertexCol] + + assert_frame_equal( + edge_list_view.drop(columns=wgtCol) + .sort_values(by=vertexCol) + .reset_index(drop=True), + edges.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + if directed: + assert_frame_equal( + edge_list_view.sort_values(by=vertexCol).reset_index(drop=True), + input_df.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index 707b195dfa8..3024e50402a 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -338,7 +338,7 @@ def test_mg_select_random_vertices( assert len(join) == len(sampled_vertices) -@pytest.mark.sg +@pytest.mark.mg @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize( "edge_props", @@ -363,3 +363,54 @@ def test_graph_creation_edge_properties(dask_client, graph_file, edge_props): G = cugraph.Graph(directed=True) G.from_dask_cudf_edgelist(df, source="0", destination="1", **prop_keys) + + +@pytest.mark.parametrize("directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +@pytest.mark.parametrize("graph_file", datasets) +def test_graph_creation_properties(dask_client, graph_file, directed, renumber): + srcCol = "src" + dstCol = "dst" + wgtCol = "wgt" + df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol, dstCol, wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + ddf = dask_cudf.from_cudf(df, npartitions=2) + + if renumber: + # trigger renumbering by passing a list of vertex column + srcCol = [srcCol] + dstCol = [dstCol] + vertexCol = srcCol + dstCol + else: + vertexCol = [srcCol, dstCol] + + sG = cugraph.Graph(directed=directed) + mG = cugraph.Graph(directed=directed) + sG.from_cudf_edgelist(df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + mG.from_dask_cudf_edgelist(ddf, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + columns = vertexCol.copy() + columns.append(wgtCol) + + sG_edgelist_view = ( + sG.view_edge_list() + .sort_values(by=vertexCol) + .reset_index(drop=True) + .loc[:, columns] + ) + mG_edgelist_view = ( + mG.view_edge_list() + .compute() + .sort_values(by=vertexCol) + .reset_index(drop=True) + .loc[:, columns] + ) + + assert sG.number_of_nodes() == mG.number_of_nodes() + assert sG.number_of_edges() == mG.number_of_edges() + assert_frame_equal(sG_edgelist_view, mG_edgelist_view, check_dtype=False) diff --git a/python/cugraph/cugraph/tests/structure/test_multigraph.py b/python/cugraph/cugraph/tests/structure/test_multigraph.py index af78c238d4e..a9ea617fdb8 100644 --- a/python/cugraph/cugraph/tests/structure/test_multigraph.py +++ b/python/cugraph/cugraph/tests/structure/test_multigraph.py @@ -47,7 +47,7 @@ def test_multigraph(graph_file): assert G.number_of_nodes() == Gnx.number_of_nodes() cuedges = cugraph.to_pandas_edgelist(G) cuedges.rename( - columns={"src": "source", "dst": "target", "weights": "weight"}, inplace=True + columns={"src": "source", "dst": "target", "wgt": "weight"}, inplace=True ) cuedges["weight"] = cuedges["weight"].round(decimals=3) nxedges = nx.to_pandas_edgelist(Gnx).astype( diff --git a/python/cugraph/cugraph/utilities/nx_factory.py b/python/cugraph/cugraph/utilities/nx_factory.py index 2448a511229..d07d17978d7 100644 --- a/python/cugraph/cugraph/utilities/nx_factory.py +++ b/python/cugraph/cugraph/utilities/nx_factory.py @@ -236,11 +236,15 @@ def cugraph_to_nx(G): pdf = G.view_edge_list().to_pandas() num_col = len(pdf.columns) + source = G.source_columns + target = G.destination_columns + if num_col == 2: - Gnx = nx.from_pandas_edgelist(pdf, source="src", target="dst") + Gnx = nx.from_pandas_edgelist(pdf, source=source, target=target) else: + edge_attr = G.weight_column Gnx = nx.from_pandas_edgelist( - pdf, source="src", target="dst", edge_attr="weights" + pdf, source=source, target=target, edge_attr=edge_attr ) return Gnx diff --git a/python/pylibcugraph/pylibcugraph/graphs.pxd b/python/pylibcugraph/pylibcugraph/graphs.pxd index 4e52ed557ed..a2df44ba26e 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pxd +++ b/python/pylibcugraph/pylibcugraph/graphs.pxd @@ -25,6 +25,7 @@ from pylibcugraph._cugraph_c.graph cimport ( cdef class _GPUGraph: cdef cugraph_graph_t* c_graph_ptr cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr + cdef cugraph_type_erased_device_array_view_t* weights_view_ptr cdef class SGGraph(_GPUGraph): pass diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index fb4692bf3a8..33a8a09c6f4 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -166,11 +166,10 @@ cdef class SGGraph(_GPUGraph): dst_or_index_array ) - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( + + self.weights_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( weight_array ) - self.edge_id_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( edge_id_array @@ -187,7 +186,7 @@ cdef class SGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_or_offsets_view_ptr, dsts_or_indices_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -205,7 +204,7 @@ cdef class SGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_or_offsets_view_ptr, dsts_or_indices_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -224,7 +223,7 @@ cdef class SGGraph(_GPUGraph): cugraph_type_erased_device_array_view_free(srcs_or_offsets_view_ptr) cugraph_type_erased_device_array_view_free(dsts_or_indices_view_ptr) - cugraph_type_erased_device_array_view_free(weights_view_ptr) + cugraph_type_erased_device_array_view_free(self.weights_view_ptr) if self.edge_id_view_ptr is not NULL: cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) if edge_type_view_ptr is not NULL: @@ -337,7 +336,7 @@ cdef class MGGraph(_GPUGraph): create_cugraph_type_erased_device_array_view_from_py_obj( dst_array ) - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \ + self.weights_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( weight_array ) @@ -355,7 +354,7 @@ cdef class MGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_view_ptr, dsts_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -369,7 +368,7 @@ cdef class MGGraph(_GPUGraph): cugraph_type_erased_device_array_view_free(srcs_view_ptr) cugraph_type_erased_device_array_view_free(dsts_view_ptr) - cugraph_type_erased_device_array_view_free(weights_view_ptr) + cugraph_type_erased_device_array_view_free(self.weights_view_ptr) if self.edge_id_view_ptr is not NULL: cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) if edge_type_view_ptr is not NULL: diff --git a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx index 4a2b8a70189..1570523beb8 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx @@ -96,6 +96,8 @@ def uniform_random_walks(ResourceHandle resource_handle, cdef uintptr_t cai_start_ptr = \ start_vertices.__cuda_array_interface__["data"][0] + cdef cugraph_type_erased_device_array_view_t* weights_ptr + cdef cugraph_type_erased_device_array_view_t* start_ptr = \ cugraph_type_erased_device_array_view_create( cai_start_ptr, @@ -113,14 +115,17 @@ def uniform_random_walks(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* path_ptr = \ cugraph_random_walk_result_get_paths(result_ptr) - cdef cugraph_type_erased_device_array_view_t* weights_ptr = \ - cugraph_random_walk_result_get_weights(result_ptr) + + if input_graph.weights_view_ptr is NULL: + cupy_weights = None + else: + weights_ptr = cugraph_random_walk_result_get_weights(result_ptr) + cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr) max_path_length = \ cugraph_random_walk_result_get_max_path_length(result_ptr) cupy_paths = copy_to_cupy_array(c_resource_handle_ptr, path_ptr) - cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr) cugraph_random_walk_result_free(result_ptr) cugraph_type_erased_device_array_view_free(start_ptr) From 15f8bbaf5360b363b3c07d881e6f3a8ae125cc05 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 9 Aug 2023 12:31:09 -0400 Subject: [PATCH 2/2] Update Changelog [skip ci] --- CHANGELOG.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fb33b14546..4832bc2fb04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,75 @@ +# cuGraph 23.08.00 (9 Aug 2023) + +## 🚨 Breaking Changes + +- Change the renumber_sampled_edgelist function behavior. ([#3762](https://github.com/rapidsai/cugraph/pull/3762)) [@seunghwak](https://github.com/seunghwak) +- PLC and Python Support for Sample-Side MFG Creation ([#3734](https://github.com/rapidsai/cugraph/pull/3734)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Stop using setup.py in build.sh ([#3704](https://github.com/rapidsai/cugraph/pull/3704)) [@vyasr](https://github.com/vyasr) +- Refactor edge betweenness centrality ([#3672](https://github.com/rapidsai/cugraph/pull/3672)) [@jnke2016](https://github.com/jnke2016) +- [FIX] Fix the hang in cuGraph Python Uniform Neighbor Sample, Add Logging to Bulk Sampler ([#3669](https://github.com/rapidsai/cugraph/pull/3669)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) + +## 🐛 Bug Fixes + +- Change the renumber_sampled_edgelist function behavior. ([#3762](https://github.com/rapidsai/cugraph/pull/3762)) [@seunghwak](https://github.com/seunghwak) +- Fix bug discovered in Jaccard testing ([#3758](https://github.com/rapidsai/cugraph/pull/3758)) [@ChuckHastings](https://github.com/ChuckHastings) +- fix inconsistent graph properties between the SG and the MG API ([#3757](https://github.com/rapidsai/cugraph/pull/3757)) [@jnke2016](https://github.com/jnke2016) +- Fixes options for `--pydevelop` to remove unneeded CWD path ("."), restores use of `setup.py` temporarily for develop builds ([#3747](https://github.com/rapidsai/cugraph/pull/3747)) [@rlratzel](https://github.com/rlratzel) +- Fix sampling call parameters if compiled with -DNO_CUGRAPH_OPS ([#3729](https://github.com/rapidsai/cugraph/pull/3729)) [@ChuckHastings](https://github.com/ChuckHastings) +- Fix primitive bug discovered in MG edge betweenness centrality testing ([#3723](https://github.com/rapidsai/cugraph/pull/3723)) [@ChuckHastings](https://github.com/ChuckHastings) +- Reorder dependencies.yaml channels ([#3721](https://github.com/rapidsai/cugraph/pull/3721)) [@raydouglass](https://github.com/raydouglass) +- [BUG] Fix namesapce to default_hash and hash_functions ([#3711](https://github.com/rapidsai/cugraph/pull/3711)) [@naimnv](https://github.com/naimnv) +- [BUG] Fix Bulk Sampling Test Issue ([#3701](https://github.com/rapidsai/cugraph/pull/3701)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Make `pylibcugraphops` optional imports in `cugraph-dgl` and `-pyg` ([#3693](https://github.com/rapidsai/cugraph/pull/3693)) [@tingyu66](https://github.com/tingyu66) +- [FIX] Rename `cugraph-ops` symbols (refactoring) and update GHA workflows to call pytest via `python -m pytest` ([#3688](https://github.com/rapidsai/cugraph/pull/3688)) [@naimnv](https://github.com/naimnv) +- [FIX] Fix the hang in cuGraph Python Uniform Neighbor Sample, Add Logging to Bulk Sampler ([#3669](https://github.com/rapidsai/cugraph/pull/3669)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- force atlas notebook changes to run in cugraph 23.08 container. ([#3656](https://github.com/rapidsai/cugraph/pull/3656)) [@acostadon](https://github.com/acostadon) + +## 📖 Documentation + +- this fixes github links in cugraph, cugraph-dgl and cugraph-pyg ([#3650](https://github.com/rapidsai/cugraph/pull/3650)) [@acostadon](https://github.com/acostadon) +- Fix minor typo in README.md ([#3636](https://github.com/rapidsai/cugraph/pull/3636)) [@akasper](https://github.com/akasper) +- Created landing spot for centrality and similarity algorithms ([#3620](https://github.com/rapidsai/cugraph/pull/3620)) [@acostadon](https://github.com/acostadon) + +## 🚀 New Features + +- Compute shortest distances between given sets of origins and destinations for large diameter graphs ([#3741](https://github.com/rapidsai/cugraph/pull/3741)) [@seunghwak](https://github.com/seunghwak) +- Update primitive to compute weighted Jaccard, Sorensen and Overlap similarity ([#3728](https://github.com/rapidsai/cugraph/pull/3728)) [@naimnv](https://github.com/naimnv) +- Add CUDA 12.0 conda environment. ([#3725](https://github.com/rapidsai/cugraph/pull/3725)) [@bdice](https://github.com/bdice) +- Renumber utility function for sampling output ([#3707](https://github.com/rapidsai/cugraph/pull/3707)) [@seunghwak](https://github.com/seunghwak) +- Integrate C++ Sampling Source Behavior Updates ([#3699](https://github.com/rapidsai/cugraph/pull/3699)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Adds `fail_on_nonconvergence` option to `pagerank` to provide pagerank results even on non-convergence ([#3639](https://github.com/rapidsai/cugraph/pull/3639)) [@rlratzel](https://github.com/rlratzel) +- Add Benchmark for Bulk Sampling ([#3628](https://github.com/rapidsai/cugraph/pull/3628)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- cugraph: Build CUDA 12 packages ([#3456](https://github.com/rapidsai/cugraph/pull/3456)) [@vyasr](https://github.com/vyasr) + +## 🛠️ Improvements + +- Pin `dask` and `distributed` for `23.08` release ([#3761](https://github.com/rapidsai/cugraph/pull/3761)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `build.yaml` workflow ([#3756](https://github.com/rapidsai/cugraph/pull/3756)) [@ajschmidt8](https://github.com/ajschmidt8) +- Support MFG creation on sampling gpus for cugraph dgl ([#3742](https://github.com/rapidsai/cugraph/pull/3742)) [@VibhuJawa](https://github.com/VibhuJawa) +- PLC and Python Support for Sample-Side MFG Creation ([#3734](https://github.com/rapidsai/cugraph/pull/3734)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Switch to new wheel building pipeline ([#3731](https://github.com/rapidsai/cugraph/pull/3731)) [@vyasr](https://github.com/vyasr) +- Remove RAFT specialization. ([#3727](https://github.com/rapidsai/cugraph/pull/3727)) [@bdice](https://github.com/bdice) +- C API for renumbering the samples ([#3724](https://github.com/rapidsai/cugraph/pull/3724)) [@ChuckHastings](https://github.com/ChuckHastings) +- Only run cugraph conda CI for CUDA 11. ([#3713](https://github.com/rapidsai/cugraph/pull/3713)) [@bdice](https://github.com/bdice) +- Promote `Datasets` to stable and clean-up unit tests ([#3712](https://github.com/rapidsai/cugraph/pull/3712)) [@nv-rliu](https://github.com/nv-rliu) +- [BUG] Unsupported graph for similiarity algos ([#3710](https://github.com/rapidsai/cugraph/pull/3710)) [@jnke2016](https://github.com/jnke2016) +- Stop using setup.py in build.sh ([#3704](https://github.com/rapidsai/cugraph/pull/3704)) [@vyasr](https://github.com/vyasr) +- [WIP] Make edge ids optional ([#3702](https://github.com/rapidsai/cugraph/pull/3702)) [@VibhuJawa](https://github.com/VibhuJawa) +- Use rapids-cmake testing to run tests in parallel ([#3697](https://github.com/rapidsai/cugraph/pull/3697)) [@robertmaynard](https://github.com/robertmaynard) +- Sampling modifications to support PyG and DGL options ([#3696](https://github.com/rapidsai/cugraph/pull/3696)) [@ChuckHastings](https://github.com/ChuckHastings) +- Include cuCollection public header for hash functions ([#3694](https://github.com/rapidsai/cugraph/pull/3694)) [@seunghwak](https://github.com/seunghwak) +- Refactor edge betweenness centrality ([#3672](https://github.com/rapidsai/cugraph/pull/3672)) [@jnke2016](https://github.com/jnke2016) +- Refactor RMAT ([#3662](https://github.com/rapidsai/cugraph/pull/3662)) [@jnke2016](https://github.com/jnke2016) +- [REVIEW] Optimize bulk sampling ([#3661](https://github.com/rapidsai/cugraph/pull/3661)) [@VibhuJawa](https://github.com/VibhuJawa) +- Update to CMake 3.26.4 ([#3648](https://github.com/rapidsai/cugraph/pull/3648)) [@vyasr](https://github.com/vyasr) +- Optimize cugraph-dgl MFG creation ([#3646](https://github.com/rapidsai/cugraph/pull/3646)) [@VibhuJawa](https://github.com/VibhuJawa) +- use rapids-upload-docs script ([#3640](https://github.com/rapidsai/cugraph/pull/3640)) [@AyodeAwe](https://github.com/AyodeAwe) +- Fix dependency versions for `23.08` ([#3638](https://github.com/rapidsai/cugraph/pull/3638)) [@ajschmidt8](https://github.com/ajschmidt8) +- Unpin `dask` and `distributed` for development ([#3634](https://github.com/rapidsai/cugraph/pull/3634)) [@galipremsagar](https://github.com/galipremsagar) +- Remove documentation build scripts for Jenkins ([#3627](https://github.com/rapidsai/cugraph/pull/3627)) [@ajschmidt8](https://github.com/ajschmidt8) +- Unpin scikit-build upper bound ([#3609](https://github.com/rapidsai/cugraph/pull/3609)) [@vyasr](https://github.com/vyasr) +- Implement C++ Edge Betweenness Centrality ([#3602](https://github.com/rapidsai/cugraph/pull/3602)) [@ChuckHastings](https://github.com/ChuckHastings) + # cuGraph 23.06.00 (7 Jun 2023) ## 🚨 Breaking Changes