Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Add un-renumbering to connected components and update tests #771

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- PR #753 ECG Error
- PR #758 Fix for graph comparison failure
- PR #761 Added flag to not treat deprecation warnings as errors, for now
- PR #771 Added unrenumbering in wcc and scc. Updated tests to compare vertices of largest component

# cuGraph 0.12.0 (04 Feb 2020)

Expand Down
15 changes: 11 additions & 4 deletions python/cugraph/components/connectivity_wrapper.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ from cudf._lib.utils cimport table_from_dataframe
from libc.stdint cimport uintptr_t
from cugraph.structure.symmetrize import symmetrize
from cugraph.structure.graph import Graph as type_Graph
from cugraph.utilities.unrenumber import unrenumber

import cudf
import cudf._lib as libcudf
Expand Down Expand Up @@ -61,8 +62,8 @@ def weakly_connected_components(input_graph):
num_verts = g.adjList.offsets.size - 1

df = cudf.DataFrame()
df['labels'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
df['vertices'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
df['label'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))

cdef cudf_table* tbl = table_from_dataframe(df)

Expand All @@ -72,6 +73,9 @@ def weakly_connected_components(input_graph):

del tbl

if input_graph.renumbered:
df = unrenumber(input_graph.edgelist.renumber_map, df, 'vertex')

return df


Expand All @@ -98,8 +102,8 @@ def strongly_connected_components(input_graph):
num_verts = g.adjList.offsets.size - 1

df = cudf.DataFrame()
df['labels'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
df['vertices'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
df['label'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))

cdef cudf_table* tbl = table_from_dataframe(df)

Expand All @@ -109,4 +113,7 @@ def strongly_connected_components(input_graph):

del tbl

if input_graph.renumbered:
df = unrenumber(input_graph.edgelist.renumber_map, df, 'vertex')

return df
77 changes: 35 additions & 42 deletions python/cugraph/tests/test_connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import gc
from itertools import product
import time

from collections import defaultdict
import pytest

import cugraph
Expand Down Expand Up @@ -53,7 +53,6 @@ def networkx_weak_call(M):
# same parameters as in NVGRAPH
result = nx.weakly_connected_components(Gnx)
t2 = time.time() - t1

print('Time : ' + str(t2))

labels = sorted(result)
Expand All @@ -69,10 +68,10 @@ def cugraph_weak_call(cu_M):
t2 = time.time() - t1
print('Time : '+str(t2))

result = df['labels'].to_array()

labels = sorted(result)
return labels
label_vertex_dict = defaultdict(list)
for i in range(len(df)):
label_vertex_dict[df['label'][i]].append(df['vertex'][i])
return label_vertex_dict


def networkx_strong_call(M):
Expand All @@ -97,16 +96,15 @@ def cugraph_strong_call(cu_M):
# cugraph Pagerank Call
G = cugraph.DiGraph()
G.from_cudf_edgelist(cu_M, source='0', destination='1')
print(G.number_of_vertices())
t1 = time.time()
df = cugraph.strongly_connected_components(G)
t2 = time.time() - t1
print('Time : '+str(t2))

result = df['labels'].to_array()

labels = sorted(result)
return labels
label_vertex_dict = defaultdict(list)
for i in range(len(df)):
label_vertex_dict[df['label'][i]].append(df['vertex'][i])
return label_vertex_dict


# these should come w/ cugraph/python:
Expand All @@ -119,27 +117,6 @@ def cugraph_strong_call(cu_M):
'../datasets/email-Eu-core.csv']


# vcount how many `val`s in ls container:
#
def counter_f(ls, val):
return sum(1 for x in ls if x == val)


# return number of uniques values in lst container:
#
def get_n_uniqs(lst):
return len(set(lst))


# gets unique values of list and then counts the
# occurences of each unique value within list;
# note: because of using set(), the "keys"
# (unique values) will be sorted in set(lst)
#
def get_uniq_counts(lst):
return [counter_f(lst, uniq_val) for uniq_val in set(lst)]


# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.parametrize('managed, pool',
list(product([False, True], [False, True])))
Expand Down Expand Up @@ -167,18 +144,26 @@ def test_weak_cc(managed, pool, graph_file):
# while cugraph returns a component label for each vertex;

nx_n_components = len(netx_labels)
cg_n_components = get_n_uniqs(cugraph_labels)
cg_n_components = len(cugraph_labels)

# Comapre number of components
assert nx_n_components == cg_n_components

lst_nx_components_lens = [len(c) for c in sorted(netx_labels, key=len)]
lst_nx_components = sorted(netx_labels, key=len, reverse=True)
lst_nx_components_lens = [len(c) for c in lst_nx_components]

# get counts of uniques:
#
lst_cg_components_lens = sorted(get_uniq_counts(cugraph_labels))
cugraph_vertex_lst = cugraph_labels.values()
lst_cg_components = sorted(cugraph_vertex_lst, key=len, reverse=True)
lst_cg_components_lens = [len(c) for c in lst_cg_components]

# Compare lengths of each component
assert lst_nx_components_lens == lst_cg_components_lens

# Compare vertices of largest component
nx_vertices = sorted(lst_nx_components[0])
cg_vertices = sorted(lst_cg_components[0])
assert nx_vertices == cg_vertices


# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.parametrize('managed, pool',
Expand Down Expand Up @@ -207,14 +192,22 @@ def test_strong_cc(managed, pool, graph_file):
# while cugraph returns a component label for each vertex;

nx_n_components = len(netx_labels)
cg_n_components = get_n_uniqs(cugraph_labels)
cg_n_components = len(cugraph_labels)

# Comapre number of components
assert nx_n_components == cg_n_components

lst_nx_components_lens = [len(c) for c in sorted(netx_labels, key=len)]
lst_nx_components = sorted(netx_labels, key=len, reverse=True)
lst_nx_components_lens = [len(c) for c in lst_nx_components]

# get counts of uniques:
#
lst_cg_components_lens = sorted(get_uniq_counts(cugraph_labels))
cugraph_vertex_lst = cugraph_labels.values()
lst_cg_components = sorted(cugraph_vertex_lst, key=len, reverse=True)
lst_cg_components_lens = [len(c) for c in lst_cg_components]

# Compare lengths of each component
assert lst_nx_components_lens == lst_cg_components_lens

# Compare vertices of largest component
nx_vertices = sorted(lst_nx_components[0])
cg_vertices = sorted(lst_cg_components[0])
assert nx_vertices == cg_vertices