Skip to content

Commit

Permalink
Reduce cuGraph Sampling Overhead for PyG (#2653)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbarghi-nv authored Oct 3, 2022
1 parent c72c44f commit 6a7ea66
Show file tree
Hide file tree
Showing 8 changed files with 493 additions and 223 deletions.
11 changes: 9 additions & 2 deletions notebooks/gnn/pyg_hetero_mag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@
"metadata": {},
"outputs": [],
"source": [
"from torch_geometric.loader import LinkNeighborLoader\n",
"from cugraph.gnn.pyg_extensions import CuGraphLinkNeighborLoader\n",
"loader = CuGraphLinkNeighborLoader(\n",
" data=(feature_store, graph_store),\n",
Expand Down Expand Up @@ -315,7 +316,13 @@
"\n",
" \n",
" acc += (pred == data['paper'].y).sum() / len(data['paper'])\n",
" return acc / (2*num_batches)\n"
" return acc / (2*num_batches)\n",
"\n",
"\n",
"for epoch in range(1, 101):\n",
" loss = train()\n",
" train_acc = test()\n",
" print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}')\n"
]
},
{
Expand Down Expand Up @@ -354,7 +361,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.13"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
34 changes: 29 additions & 5 deletions python/cugraph/cugraph/dask/structure/mg_property_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,16 +874,23 @@ def edge_props_to_graph(self,
"""
# FIXME: check default_edge_weight is valid
if edge_weight_property:
if edge_weight_property not in edge_prop_df.columns:
if (
edge_weight_property not in edge_prop_df.columns
and edge_prop_df.index.name != edge_weight_property
):
raise ValueError("edge_weight_property "
f'"{edge_weight_property}" was not found in '
"edge_prop_df")

# Ensure a valid edge_weight_property can be used for applying
# weights to the subgraph, and if a default_edge_weight was
# specified, apply it to all NAs in the weight column.
prop_col = edge_prop_df[edge_weight_property]
if prop_col.count() != prop_col.size:
if edge_weight_property in edge_prop_df.columns:
prop_col = edge_prop_df[edge_weight_property]
else:
prop_col = edge_prop_df.index.to_series()
edge_prop_df[edge_weight_property] = prop_col
if prop_col.count().compute() != prop_col.size:
if default_edge_weight is None:
raise ValueError("edge_weight_property "
f'"{edge_weight_property}" '
Expand Down Expand Up @@ -976,6 +983,7 @@ def renumber_vertices_by_type(self):
Stop is *inclusive*.
"""
# Check if some vertex IDs exist only in edge data
TCN = self.type_col_name
default = self._default_type_name
if (
self.__edge_prop_dataframe is not None
Expand All @@ -988,16 +996,32 @@ def renumber_vertices_by_type(self):
)
if self.__vertex_prop_dataframe is None:
return None

# Use categorical dtype for the type column
if self.__series_type is dask_cudf.Series:
cat_class = cudf.CategoricalDtype
else:
cat_class = pd.CategoricalDtype

is_cat = isinstance(
self.__vertex_prop_dataframe[TCN].dtype,
cat_class
)
if not is_cat:
cat_dtype = cat_class([TCN], ordered=False)
self.__vertex_prop_dataframe[TCN] = (
self.__vertex_prop_dataframe[TCN].astype(cat_dtype)
)

df = self.__vertex_prop_dataframe
if self.__edge_prop_dataframe is not None:

# FIXME DASK_CUDF: https://github.com/rapidsai/cudf/issues/11795
cat_dtype = df.dtypes[self.type_col_name]
df[self.type_col_name] = df[self.type_col_name].astype(str)

df = (
df.reset_index()
.sort_values(by=self.type_col_name)
.sort_values(by=TCN)
)

# FIXME DASK_CUDF: https://github.com/rapidsai/cudf/issues/11795
Expand Down
2 changes: 0 additions & 2 deletions python/cugraph/cugraph/gnn/pyg_extensions/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
from cugraph.utilities.api_tools import experimental_warning_wrapper

from cugraph.gnn.pyg_extensions.data.cugraph_store import EXPERIMENTAL__CuGraphStore
from cugraph.gnn.pyg_extensions.data.cugraph_store import EXPERIMENTAL__CuGraphFeatureStore
from cugraph.gnn.pyg_extensions.data.cugraph_store import EXPERIMENTAL__to_pyg

CuGraphStore = experimental_warning_wrapper(EXPERIMENTAL__CuGraphStore)
CuGraphFeatureStore = experimental_warning_wrapper(EXPERIMENTAL__CuGraphFeatureStore)
to_pyg = experimental_warning_wrapper(EXPERIMENTAL__to_pyg)
Loading

0 comments on commit 6a7ea66

Please sign in to comment.