diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 075cf231725..c66890f8ae5 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -5,6 +5,7 @@ channels:
 - rapidsai-nightly
 - dask/label/dev
 - pytorch
+- pyg
 - dglteam/label/cu118
 - conda-forge
 - nvidia
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index eacafbfd6c4..3afb1415572 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -5,6 +5,7 @@ channels:
 - rapidsai-nightly
 - dask/label/dev
 - pytorch
+- pyg
 - dglteam/label/cu118
 - conda-forge
 - nvidia
diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml
index 4d3d7c44093..2d7ed2f4cda 100644
--- a/conda/recipes/cugraph-pyg/meta.yaml
+++ b/conda/recipes/cugraph-pyg/meta.yaml
@@ -33,6 +33,7 @@ requirements:
     - pytorch >=2.0
     - cupy >=12.0.0
     - cugraph ={{ version }}
+    - pylibcugraphops ={{ version }}
     - pyg >=2.3,<2.4
 
 tests:
diff --git a/dependencies.yaml b/dependencies.yaml
index e8692cd670f..04ec1b6e957 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -199,12 +199,24 @@ files:
     output: conda
     conda_dir: python/cugraph-dgl/conda
     includes:
+      - checks
       - cugraph_dgl_dev
+      - test_python_common
+  cugraph_pyg_dev:
+    matrix:
+      cuda: ["11.8"]
+    output: conda
+    conda_dir: python/cugraph-pyg/conda
+    includes:
+      - checks
+      - cugraph_pyg_dev
+      - test_python_common
 channels:
   - rapidsai
   - rapidsai-nightly
   - dask/label/dev
   - pytorch
+  - pyg
   - dglteam/label/cu118
   - conda-forge
   - nvidia
@@ -498,6 +510,12 @@ dependencies:
           - pytorch>=2.0
           - pytorch-cuda==11.8
           - dgl>=1.1.0.cu*
-          - setuptools
-          - pre-commit
-          - pytest
+  cugraph_pyg_dev:
+    common:
+      - output_types: [conda]
+        packages:
+          - cugraph==23.10.*
+          - pylibcugraphops==23.10.*
+          - pytorch==2.0
+          - pytorch-cuda==11.8
+          - pyg=2.3.1=*torch_2.0.0*cu118*
diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
index 2bb4b0f3cd3..138d384ebcf 100644
--- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
+++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
@@ -5,16 +5,21 @@ channels:
 - rapidsai-nightly
 - dask/label/dev
 - pytorch
+- pyg
 - dglteam/label/cu118
 - conda-forge
 - nvidia
 dependencies:
 - cugraph==23.10.*
 - dgl>=1.1.0.cu*
+- pandas
 - pre-commit
 - pylibcugraphops==23.10.*
 - pytest
+- pytest-benchmark
+- pytest-cov
+- pytest-xdist
 - pytorch-cuda==11.8
 - pytorch>=2.0
-- setuptools
+- scipy
 name: cugraph_dgl_dev_cuda-118
diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml
new file mode 100644
index 00000000000..4e5159e6b45
--- /dev/null
+++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml
@@ -0,0 +1,25 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- pytorch
+- pyg
+- dglteam/label/cu118
+- conda-forge
+- nvidia
+dependencies:
+- cugraph==23.10.*
+- pandas
+- pre-commit
+- pyg=2.3.1=*torch_2.0.0*cu118*
+- pylibcugraphops==23.10.*
+- pytest
+- pytest-benchmark
+- pytest-cov
+- pytest-xdist
+- pytorch-cuda==11.8
+- pytorch==2.0
+- scipy
+name: cugraph_pyg_dev_cuda-118
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py
index 0c94be5e12b..9c9dcdb43bb 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py
@@ -13,10 +13,14 @@
 
 from .gat_conv import GATConv
 from .gatv2_conv import GATv2Conv
+from .rgcn_conv import RGCNConv
+from .sage_conv import SAGEConv
 from .transformer_conv import TransformerConv
 
 __all__ = [
     "GATConv",
     "GATv2Conv",
+    "RGCNConv",
+    "SAGEConv",
     "TransformerConv",
 ]
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
index 2639f66f440..10431a0398d 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
@@ -15,10 +15,10 @@
 from typing import Optional, Tuple, Union
 
 from cugraph.utilities.utils import import_optional
+from pylibcugraphops.pytorch import CSC, HeteroCSC
 
 torch = import_optional("torch")
 torch_geometric = import_optional("torch_geometric")
-ops_torch = import_optional("pylibcugraphops.pytorch")
 
 
 class BaseConv(torch.nn.Module):  # pragma: no cover
@@ -74,7 +74,7 @@ def get_cugraph(
         csc: Tuple[torch.Tensor, torch.Tensor, int],
         bipartite: bool = False,
         max_num_neighbors: Optional[int] = None,
-    ) -> ops_torch.CSC:
+    ) -> CSC:
         r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation.
         Supports both bipartite and non-bipartite graphs.
 
@@ -87,22 +87,22 @@ def get_cugraph(
             bipartite (bool): If set to :obj:`True`, will create the bipartite
                 structure in cugraph-ops. (default: :obj:`False`)
             max_num_neighbors (int, optional): The maximum number of neighbors
-                of a target node. It is only effective when operating in a
-                bipartite graph. When not given, will be computed on-the-fly,
-                leading to slightly worse performance. (default: :obj:`None`)
+                of a destination node. When enabled, it allows models to use
+                the message-flow-graph primitives in cugraph-ops.
+                (default: :obj:`None`)
         """
         row, colptr, num_src_nodes = csc
 
         if not row.is_cuda:
             raise RuntimeError(
-                f"'{self.__class__.__name__}' requires GPU-"
-                f"based processing (got CPU tensor)"
+                f"'{self.__class__.__name__}' requires GPU-based processing "
+                f"but got CPU tensor."
             )
 
         if max_num_neighbors is None:
             max_num_neighbors = -1
 
-        return ops_torch.CSC(
+        return CSC(
             offsets=colptr,
             indices=row,
             num_src_nodes=num_src_nodes,
@@ -117,7 +117,7 @@ def get_typed_cugraph(
         num_edge_types: Optional[int] = None,
         bipartite: bool = False,
         max_num_neighbors: Optional[int] = None,
-    ) -> ops_torch.HeteroCSC:
+    ) -> HeteroCSC:
         r"""Constructs a typed :obj:`cugraph` graph object from a CSC
         representation where each edge corresponds to a given edge type.
         Supports both bipartite and non-bipartite graphs.
@@ -135,9 +135,9 @@ def get_typed_cugraph(
             bipartite (bool): If set to :obj:`True`, will create the bipartite
                 structure in cugraph-ops. (default: :obj:`False`)
             max_num_neighbors (int, optional): The maximum number of neighbors
-                of a target node. It is only effective when operating in a
-                bipartite graph. When not given, will be computed on-the-fly,
-                leading to slightly worse performance. (default: :obj:`None`)
+                of a destination node. When enabled, it allows models to use
+                the message-flow-graph primitives in cugraph-ops.
+                (default: :obj:`None`)
         """
         if num_edge_types is None:
             num_edge_types = int(edge_type.max()) + 1
@@ -148,7 +148,7 @@ def get_typed_cugraph(
         row, colptr, num_src_nodes = csc
         edge_type = edge_type.int()
 
-        return ops_torch.HeteroCSC(
+        return HeteroCSC(
             offsets=colptr,
             indices=row,
             edge_types=edge_type,
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
index f0040015b4a..309bee4e228 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
@@ -10,16 +10,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from typing import Optional, Tuple, Union
 
 from cugraph.utilities.utils import import_optional
+from pylibcugraphops.pytorch.operators import mha_gat_n2n
 
 from .base import BaseConv
 
 torch = import_optional("torch")
 nn = import_optional("torch.nn")
 torch_geometric = import_optional("torch_geometric")
-ops_torch = import_optional("pylibcugraphops.pytorch")
 
 
 class GATConv(BaseConv):
@@ -174,9 +175,9 @@ def forward(
                 representation to the desired format.
             edge_attr: (torch.Tensor, optional) The edge features.
             max_num_neighbors (int, optional): The maximum number of neighbors
-                of a target node. It is only effective when operating in a
-                bipartite graph. When not given, will be computed on-the-fly,
-                leading to slightly worse performance. (default: :obj:`None`)
+                of a destination node. When enabled, it allows models to use
+                the message-flow-graph primitives in cugraph-ops.
+                (default: :obj:`None`)
         """
         bipartite = not isinstance(x, torch.Tensor)
         graph = self.get_cugraph(
@@ -210,7 +211,7 @@ def forward(
                 )
             x = self.lin(x)
 
-        out = ops_torch.operators.mha_gat_n2n(
+        out = mha_gat_n2n(
             (x_src, x_dst) if bipartite else x,
             self.att,
             graph,
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
index d74ca6b00d0..32956dcb400 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
@@ -10,16 +10,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from typing import Optional, Tuple, Union
 
 from cugraph.utilities.utils import import_optional
+from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n
 
 from .base import BaseConv
 
 torch = import_optional("torch")
 nn = import_optional("torch.nn")
 torch_geometric = import_optional("torch_geometric")
-ops_torch = import_optional("pylibcugraphops.pytorch")
 
 
 class GATv2Conv(BaseConv):
@@ -207,7 +208,7 @@ def forward(
         else:
             x = self.lin_src(x)
 
-        out = ops_torch.operators.mha_gat_v2_n2n(
+        out = mha_gat_v2_n2n(
             (x_src, x_dst) if bipartite else x,
             self.att,
             graph,
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py
new file mode 100644
index 00000000000..683780b66eb
--- /dev/null
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple
+
+from cugraph.utilities.utils import import_optional
+from pylibcugraphops.pytorch.operators import agg_hg_basis_n2n_post
+
+from .base import BaseConv
+
+torch = import_optional("torch")
+torch_geometric = import_optional("torch_geometric")
+
+
+class RGCNConv(BaseConv):  # pragma: no cover
+    r"""The relational graph convolutional operator from the `"Modeling
+    Relational Data with Graph Convolutional Networks"
+    <https://arxiv.org/abs/1703.06103>`_ paper.
+
+    .. math::
+        \mathbf{x}^{\prime}_i = \mathbf{\Theta}_{\textrm{root}} \cdot
+        \mathbf{x}_i + \sum_{r \in \mathcal{R}} \sum_{j \in \mathcal{N}_r(i)}
+        \frac{1}{|\mathcal{N}_r(i)|} \mathbf{\Theta}_r \cdot \mathbf{x}_j,
+
+    where :math:`\mathcal{R}` denotes the set of relations, *i.e.* edge types.
+    Edge type needs to be a one-dimensional :obj:`torch.long` tensor which
+    stores a relation identifier
+    :math:`\in \{ 0, \ldots, |\mathcal{R}| - 1\}` for each edge.
+
+    Args:
+        in_channels (int): Size of each input sample.
+        out_channels (int): Size of each output sample.
+        num_relations (int): Number of relations.
+        num_bases (int, optional): If set, this layer will use the
+            basis-decomposition regularization scheme where :obj:`num_bases`
+            denotes the number of bases to use. (default: :obj:`None`)
+        aggr (str, optional): The aggregation scheme to use
+            (:obj:`"add"`, :obj:`"mean"`, :obj:`"sum"`).
+            (default: :obj:`"mean"`)
+        root_weight (bool, optional): If set to :obj:`False`, the layer will
+            not add transformed root node features to the output.
+            (default: :obj:`True`)
+        bias (bool, optional): If set to :obj:`False`, the layer will not learn
+            an additive bias. (default: :obj:`True`)
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        num_relations: int,
+        num_bases: Optional[int] = None,
+        aggr: str = "mean",
+        root_weight: bool = True,
+        bias: bool = True,
+    ):
+        super().__init__()
+
+        if aggr not in ["mean", "sum", "add"]:
+            raise ValueError(
+                f"Aggregation function must be chosen from 'mean', 'sum' or "
+                f"'add', but got '{aggr}'."
+            )
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_relations = num_relations
+        self.num_bases = num_bases
+        self.aggr = aggr
+        self.root_weight = root_weight
+
+        dim_root_weight = 1 if root_weight else 0
+
+        if num_bases is not None:
+            self.weight = torch.nn.Parameter(
+                torch.empty(num_bases + dim_root_weight, in_channels, out_channels)
+            )
+            self.comp = torch.nn.Parameter(torch.empty(num_relations, num_bases))
+        else:
+            self.weight = torch.nn.Parameter(
+                torch.empty(num_relations + dim_root_weight, in_channels, out_channels)
+            )
+            self.register_parameter("comp", None)
+
+        if bias:
+            self.bias = torch.nn.Parameter(torch.empty(out_channels))
+        else:
+            self.register_parameter("bias", None)
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        end = -1 if self.root_weight else None
+        torch_geometric.nn.inits.glorot(self.weight[:end])
+        torch_geometric.nn.inits.glorot(self.comp)
+        if self.root_weight:
+            torch_geometric.nn.inits.glorot(self.weight[-1])
+        torch_geometric.nn.inits.zeros(self.bias)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        csc: Tuple[torch.Tensor, torch.Tensor, int],
+        edge_type: torch.Tensor,
+        max_num_neighbors: Optional[int] = None,
+    ) -> torch.Tensor:
+
+        graph = self.get_typed_cugraph(
+            csc, edge_type, self.num_relations, max_num_neighbors=max_num_neighbors
+        )
+
+        out = agg_hg_basis_n2n_post(
+            x,
+            self.comp,
+            graph,
+            concat_own=self.root_weight,
+            norm_by_out_degree=bool(self.aggr == "mean"),
+        )
+
+        out = out @ self.weight.view(-1, self.out_channels)
+
+        if self.bias is not None:
+            out = out + self.bias
+
+        return out
+
+    def __repr__(self) -> str:
+        return (
+            f"{self.__class__.__name__}({self.in_channels}, "
+            f"{self.out_channels}, num_relations={self.num_relations})"
+        )
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py
new file mode 100644
index 00000000000..8e0c1027416
--- /dev/null
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py
@@ -0,0 +1,149 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple, Union
+
+from cugraph.utilities.utils import import_optional
+from pylibcugraphops.pytorch.operators import agg_concat_n2n
+
+from .base import BaseConv
+
+torch = import_optional("torch")
+torch_geometric = import_optional("torch_geometric")
+
+
+class SAGEConv(BaseConv):
+    r"""The GraphSAGE operator from the `"Inductive Representation Learning on
+    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper.
+
+    .. math::
+        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot
+        \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j
+
+    If :obj:`project = True`, then :math:`\mathbf{x}_j` will first get
+    projected via
+
+    .. math::
+        \mathbf{x}_j \leftarrow \sigma ( \mathbf{W}_3 \mathbf{x}_j +
+        \mathbf{b})
+
+    as described in Eq. (3) of the paper.
+
+    Args:
+        in_channels (int or tuple): Size of each input sample. A tuple
+            corresponds to the sizes of source and target dimensionalities.
+        out_channels (int): Size of each output sample.
+        aggr (str or Aggregation, optional): The aggregation scheme to use.
+            Choose from :obj:`"mean"`, :obj:`"sum"`, :obj:`"min"` or
+            :obj:`"max"`. (default: :obj:`"mean"`)
+        normalize (bool, optional): If set to :obj:`True`, output features
+            will be :math:`\ell_2`-normalized, *i.e.*,
+            :math:`\frac{\mathbf{h}_i^{k+1}}
+            {\| \mathbf{h}_i^{k+1} \|_2}`.
+            (default: :obj:`False`)
+        root_weight (bool, optional): If set to :obj:`False`, the layer will
+            not add transformed root node features to the output.
+            (default: :obj:`True`)
+        project (bool, optional): If set to :obj:`True`, the layer will apply a
+            linear transformation followed by an activation function before
+            aggregation (as described in Eq. (3) of the paper).
+            (default: :obj:`False`)
+        bias (bool, optional): If set to :obj:`False`, the layer will not learn
+            an additive bias. (default: :obj:`True`)
+    """
+
+    def __init__(
+        self,
+        in_channels: Union[int, Tuple[int, int]],
+        out_channels: int,
+        aggr: str = "mean",
+        normalize: bool = False,
+        root_weight: bool = True,
+        project: bool = False,
+        bias: bool = True,
+    ):
+        super().__init__()
+
+        if aggr not in ["mean", "sum", "min", "max"]:
+            raise ValueError(
+                f"Aggregation function must be chosen from 'mean',"
+                f" 'sum', 'min' or 'max', but got '{aggr}'."
+            )
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.aggr = aggr
+        self.normalize = normalize
+        self.root_weight = root_weight
+        self.project = project
+
+        if isinstance(in_channels, int):
+            self.in_channels_src = self.in_channels_dst = in_channels
+        else:
+            self.in_channels_src, self.in_channels_dst = in_channels
+
+        if self.project:
+            self.pre_lin = torch_geometric.nn.Linear(
+                self.in_channels_src, self.in_channels_src, bias=True
+            )
+
+        if self.root_weight:
+            self.lin = torch_geometric.nn.Linear(
+                self.in_channels_src + self.in_channels_dst, out_channels, bias=bias
+            )
+        else:
+            self.lin = torch_geometric.nn.Linear(
+                self.in_channels_src, out_channels, bias=bias
+            )
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        if self.project:
+            self.pre_lin.reset_parameters()
+        self.lin.reset_parameters()
+
+    def forward(
+        self,
+        x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]],
+        csc: Tuple[torch.Tensor, torch.Tensor, int],
+        max_num_neighbors: Optional[int] = None,
+    ) -> torch.Tensor:
+        bipartite = isinstance(x, Tuple)
+        graph = self.get_cugraph(
+            csc, bipartite=bipartite, max_num_neighbors=max_num_neighbors
+        )
+
+        if self.project:
+            if bipartite:
+                x = (self.pre_lin(x[0]).relu(), x[1])
+            else:
+                x = self.pre_lin(x).relu()
+
+        out = agg_concat_n2n(x, graph, self.aggr)
+
+        if self.root_weight:
+            out = self.lin(out)
+        else:
+            out = self.lin(out[:, : self.in_channels_src])
+
+        if self.normalize:
+            out = torch.nn.functional.normalize(out, p=2.0, dim=-1)
+
+        return out
+
+    def __repr__(self) -> str:
+        return (
+            f"{self.__class__.__name__}({self.in_channels}, "
+            f"{self.out_channels}, aggr={self.aggr})"
+        )
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
index 1b8b1aa0ffa..41c0b4b4090 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
@@ -10,16 +10,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from typing import Optional, Tuple, Union
 
 from cugraph.utilities.utils import import_optional
+from pylibcugraphops.pytorch.operators import mha_simple_n2n
 
 from .base import BaseConv
 
 torch = import_optional("torch")
 nn = import_optional("torch.nn")
 torch_geometric = import_optional("torch_geometric")
-ops_torch = import_optional("pylibcugraphops.pytorch")
 
 
 class TransformerConv(BaseConv):
@@ -185,7 +186,7 @@ def forward(
                 )
             edge_attr = self.lin_edge(edge_attr)
 
-        out = ops_torch.operators.mha_simple_n2n(
+        out = mha_simple_n2n(
             key,
             query,
             value,
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
index 3270dd0bf93..083c4a2b37b 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
@@ -265,3 +265,22 @@ def abc_graph():
     )
 
     return F, G, N
+
+
+@pytest.fixture
+def basic_pyg_graph_1():
+    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
+    size = (4, 4)
+    return edge_index, size
+
+
+@pytest.fixture
+def basic_pyg_graph_2():
+    edge_index = torch.tensor(
+        [
+            [0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9],
+            [1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0],
+        ]
+    )
+    size = (10, 10)
+    return edge_index, size
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py
index ae5fd73c438..21c43bad38c 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py
@@ -13,15 +13,9 @@
 
 import pytest
 
-try:
-    from torch_geometric.nn import GATConv
-except ModuleNotFoundError:
-    pytest.skip("PyG not available", allow_module_level=True)
-
-from cugraph.utilities.utils import import_optional
 from cugraph_pyg.nn import GATConv as CuGraphGATConv
 
-torch = import_optional("torch")
+ATOL = 1e-6
 
 
 @pytest.mark.parametrize("bias", [True, False])
@@ -30,17 +24,16 @@
 @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16])
 @pytest.mark.parametrize("max_num_neighbors", [8, None])
 @pytest.mark.parametrize("use_edge_attr", [True, False])
+@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"])
 def test_gat_conv_equality(
-    bias, bipartite, concat, heads, max_num_neighbors, use_edge_attr
+    bias, bipartite, concat, heads, max_num_neighbors, use_edge_attr, graph, request
 ):
-    atol = 1e-6
-    edge_index = torch.tensor(
-        [
-            [7, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 8, 9],
-            [0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7],
-        ],
-    ).cuda()
-    size = (10, 10)
+    pytest.importorskip("torch_geometric", reason="PyG not available")
+    import torch
+    from torch_geometric.nn import GATConv
+
+    edge_index, size = request.getfixturevalue(graph)
+    edge_index = edge_index.cuda()
 
     if bipartite:
         in_channels = (5, 3)
@@ -87,7 +80,7 @@ def test_gat_conv_equality(
 
     out1 = conv1(x, edge_index, edge_attr=edge_attr)
     out2 = conv2(x, csc, edge_attr=edge_attr_perm, max_num_neighbors=max_num_neighbors)
-    assert torch.allclose(out1, out2, atol=atol)
+    assert torch.allclose(out1, out2, atol=ATOL)
 
     grad_output = torch.rand_like(out1)
     out1.backward(grad_output)
@@ -95,30 +88,30 @@ def test_gat_conv_equality(
 
     if bipartite:
         assert torch.allclose(
-            conv1.lin_src.weight.grad, conv2.lin_src.weight.grad, atol=atol
+            conv1.lin_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL
         )
         assert torch.allclose(
-            conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=atol
+            conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL
         )
     else:
         assert torch.allclose(
-            conv1.lin_src.weight.grad, conv2.lin.weight.grad, atol=atol
+            conv1.lin_src.weight.grad, conv2.lin.weight.grad, atol=ATOL
         )
 
     assert torch.allclose(
-        conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=atol
+        conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=ATOL
     )
     assert torch.allclose(
-        conv1.att_dst.grad.flatten(), conv2.att.grad[out_dim : 2 * out_dim], atol=atol
+        conv1.att_dst.grad.flatten(), conv2.att.grad[out_dim : 2 * out_dim], atol=ATOL
     )
 
     if use_edge_attr:
         assert torch.allclose(
-            conv1.att_edge.grad.flatten(), conv2.att.grad[2 * out_dim :], atol=atol
+            conv1.att_edge.grad.flatten(), conv2.att.grad[2 * out_dim :], atol=ATOL
         )
         assert torch.allclose(
-            conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=atol
+            conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL
         )
 
     if bias:
-        assert torch.allclose(conv1.bias.grad, conv2.bias.grad, atol=atol)
+        assert torch.allclose(conv1.bias.grad, conv2.bias.grad, atol=ATOL)
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py
index 1c4f241304e..6b11e87154a 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py
@@ -13,30 +13,23 @@
 
 import pytest
 
-try:
-    from torch_geometric.nn import GATv2Conv
-except ModuleNotFoundError:
-    pytest.skip("PyG not available", allow_module_level=True)
-
-from cugraph.utilities.utils import import_optional
 from cugraph_pyg.nn import GATv2Conv as CuGraphGATv2Conv
 
-torch = import_optional("torch")
+ATOL = 1e-6
 
 
 @pytest.mark.parametrize("bipartite", [True, False])
 @pytest.mark.parametrize("concat", [True, False])
 @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16])
 @pytest.mark.parametrize("use_edge_attr", [True, False])
-def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr):
-    atol = 1e-6
-    edge_index = torch.tensor(
-        [
-            [7, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 8, 9],
-            [0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7],
-        ],
-    ).cuda()
-    size = (10, 10)
+@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"])
+def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr, graph, request):
+    pytest.importorskip("torch_geometric", reason="PyG not available")
+    import torch
+    from torch_geometric.nn import GATv2Conv
+
+    edge_index, size = request.getfixturevalue(graph)
+    edge_index = edge_index.cuda()
 
     if bipartite:
         in_channels = (5, 3)
@@ -70,26 +63,24 @@ def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr):
     with torch.no_grad():
         conv2.lin_src.weight.data = conv1.lin_l.weight.data.detach().clone()
         conv2.lin_dst.weight.data = conv1.lin_r.weight.data.detach().clone()
-
         conv2.att.data = conv1.att.data.flatten().detach().clone()
-
         if use_edge_attr:
             conv2.lin_edge.weight.data = conv1.lin_edge.weight.data.detach().clone()
 
     out1 = conv1(x, edge_index, edge_attr=edge_attr)
     out2 = conv2(x, csc, edge_attr=edge_attr_perm)
-    assert torch.allclose(out1, out2, atol=atol)
+    assert torch.allclose(out1, out2, atol=ATOL)
 
     grad_output = torch.rand_like(out1)
     out1.backward(grad_output)
     out2.backward(grad_output)
 
-    assert torch.allclose(conv1.lin_l.weight.grad, conv2.lin_src.weight.grad, atol=atol)
-    assert torch.allclose(conv1.lin_r.weight.grad, conv2.lin_dst.weight.grad, atol=atol)
+    assert torch.allclose(conv1.lin_l.weight.grad, conv2.lin_src.weight.grad, atol=ATOL)
+    assert torch.allclose(conv1.lin_r.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL)
 
-    assert torch.allclose(conv1.att.grad.flatten(), conv2.att.grad, atol=atol)
+    assert torch.allclose(conv1.att.grad.flatten(), conv2.att.grad, atol=ATOL)
 
     if use_edge_attr:
         assert torch.allclose(
-            conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=atol
+            conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL
         )
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py
new file mode 100644
index 00000000000..233c6aa2836
--- /dev/null
+++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from cugraph_pyg.nn import RGCNConv as CuGraphRGCNConv
+
+ATOL = 1e-6
+
+
+@pytest.mark.parametrize("aggr", ["add", "sum", "mean"])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("max_num_neighbors", [8, None])
+@pytest.mark.parametrize("num_bases", [1, 2, None])
+@pytest.mark.parametrize("root_weight", [True, False])
+@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"])
+def test_rgcn_conv_equality(
+    aggr, bias, max_num_neighbors, num_bases, root_weight, graph, request
+):
+    pytest.importorskip("torch_geometric", reason="PyG not available")
+    import torch
+    from torch_geometric.nn import FastRGCNConv as RGCNConv
+
+    in_channels, out_channels, num_relations = (4, 2, 3)
+    kwargs = dict(aggr=aggr, bias=bias, num_bases=num_bases, root_weight=root_weight)
+
+    edge_index, size = request.getfixturevalue(graph)
+    edge_index = edge_index.cuda()
+    edge_type = torch.randint(num_relations, (edge_index.size(1),)).cuda()
+
+    x = torch.rand(size[0], in_channels, device="cuda")
+    csc, edge_type_perm = CuGraphRGCNConv.to_csc(edge_index, size, edge_type)
+
+    conv1 = RGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda()
+    conv2 = CuGraphRGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda()
+
+    with torch.no_grad():
+        if root_weight:
+            conv2.weight.data[:-1] = conv1.weight.data
+            conv2.weight.data[-1] = conv1.root.data
+        else:
+            conv2.weight.data = conv1.weight.data.detach().clone()
+        if num_bases is not None:
+            conv2.comp.data = conv1.comp.data.detach().clone()
+
+    out1 = conv1(x, edge_index, edge_type)
+    out2 = conv2(x, csc, edge_type_perm, max_num_neighbors=max_num_neighbors)
+    assert torch.allclose(out1, out2, atol=ATOL)
+
+    grad_out = torch.rand_like(out1)
+    out1.backward(grad_out)
+    out2.backward(grad_out)
+
+    if root_weight:
+        assert torch.allclose(conv1.weight.grad, conv2.weight.grad[:-1], atol=ATOL)
+        assert torch.allclose(conv1.root.grad, conv2.weight.grad[-1], atol=ATOL)
+    else:
+        assert torch.allclose(conv1.weight.grad, conv2.weight.grad, atol=ATOL)
+
+    if num_bases is not None:
+        assert torch.allclose(conv1.comp.grad, conv2.comp.grad, atol=ATOL)
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py
new file mode 100644
index 00000000000..7f73cddbdbb
--- /dev/null
+++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv
+
+ATOL = 1e-6
+
+
+@pytest.mark.parametrize("aggr", ["sum", "mean", "min", "max"])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("bipartite", [True, False])
+@pytest.mark.parametrize("max_num_neighbors", [8, None])
+@pytest.mark.parametrize("normalize", [True, False])
+@pytest.mark.parametrize("root_weight", [True, False])
+@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"])
+def test_sage_conv_equality(
+    aggr, bias, bipartite, max_num_neighbors, normalize, root_weight, graph, request
+):
+    pytest.importorskip("torch_geometric", reason="PyG not available")
+    import torch
+    from torch_geometric.nn import SAGEConv
+
+    edge_index, size = request.getfixturevalue(graph)
+    edge_index = edge_index.cuda()
+    csc = CuGraphSAGEConv.to_csc(edge_index, size)
+
+    if bipartite:
+        in_channels = (7, 3)
+        x = (
+            torch.rand(size[0], in_channels[0]).cuda(),
+            torch.rand(size[1], in_channels[1]).cuda(),
+        )
+    else:
+        in_channels = 5
+        x = torch.rand(size[0], in_channels).cuda()
+    out_channels = 4
+
+    kwargs = dict(aggr=aggr, bias=bias, normalize=normalize, root_weight=root_weight)
+
+    conv1 = SAGEConv(in_channels, out_channels, **kwargs).cuda()
+    conv2 = CuGraphSAGEConv(in_channels, out_channels, **kwargs).cuda()
+
+    in_channels_src = conv2.in_channels_src
+    with torch.no_grad():
+        conv2.lin.weight.data[:, :in_channels_src] = conv1.lin_l.weight.data
+        if root_weight:
+            conv2.lin.weight.data[:, in_channels_src:] = conv1.lin_r.weight.data
+        if bias:
+            conv2.lin.bias.data[:] = conv1.lin_l.bias.data
+
+    out1 = conv1(x, edge_index)
+    out2 = conv2(x, csc, max_num_neighbors=max_num_neighbors)
+    assert torch.allclose(out1, out2, atol=ATOL)
+
+    grad_out = torch.rand_like(out1)
+    out1.backward(grad_out)
+    out2.backward(grad_out)
+
+    assert torch.allclose(
+        conv1.lin_l.weight.grad,
+        conv2.lin.weight.grad[:, :in_channels_src],
+        atol=ATOL,
+    )
+
+    if root_weight:
+        assert torch.allclose(
+            conv1.lin_r.weight.grad,
+            conv2.lin.weight.grad[:, in_channels_src:],
+            atol=ATOL,
+        )
+
+    if bias:
+        assert torch.allclose(
+            conv1.lin_l.bias.grad,
+            conv2.lin.bias.grad,
+            atol=ATOL,
+        )
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py
index a2153ee7891..7dba1a6d515 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py
@@ -13,23 +13,25 @@
 
 import pytest
 
-try:
-    from torch_geometric.nn import TransformerConv
-except ModuleNotFoundError:
-    pytest.skip("PyG not available", allow_module_level=True)
-
-from cugraph.utilities.utils import import_optional
 from cugraph_pyg.nn import TransformerConv as CuGraphTransformerConv
 
-torch = import_optional("torch")
+ATOL = 1e-6
 
 
 @pytest.mark.parametrize("bipartite", [True, False])
 @pytest.mark.parametrize("concat", [True, False])
 @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16])
-def test_transformer_conv_equality(bipartite, concat, heads):
+@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"])
+def test_transformer_conv_equality(bipartite, concat, heads, graph, request):
+    pytest.importorskip("torch_geometric", reason="PyG not available")
+    import torch
+    from torch_geometric.nn import TransformerConv
+
+    edge_index, size = request.getfixturevalue(graph)
+    edge_index = edge_index.cuda()
+    csc = CuGraphTransformerConv.to_csc(edge_index, size)
+
     out_channels = 2
-    size = (10, 10)
     kwargs = dict(concat=concat, bias=False, root_weight=False)
 
     if bipartite:
@@ -42,14 +44,6 @@ def test_transformer_conv_equality(bipartite, concat, heads):
         in_channels = 5
         x = torch.rand(size[0], in_channels, device="cuda")
 
-    edge_index = torch.tensor(
-        [
-            [7, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 8, 9, 3, 4, 5],
-            [0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 6],
-        ],
-        device="cuda",
-    )
-
     conv1 = TransformerConv(in_channels, out_channels, heads, **kwargs).cuda()
     conv2 = CuGraphTransformerConv(in_channels, out_channels, heads, **kwargs).cuda()
 
@@ -62,30 +56,27 @@ def test_transformer_conv_equality(bipartite, concat, heads):
         conv2.lin_value.bias.data = conv1.lin_value.bias.data.detach().clone()
 
     out1 = conv1(x, edge_index)
-    csc = CuGraphTransformerConv.to_csc(edge_index, size)
     out2 = conv2(x, csc)
 
-    atol = 1e-6
-
-    assert torch.allclose(out1, out2, atol=atol)
+    assert torch.allclose(out1, out2, atol=ATOL)
 
     grad_output = torch.rand_like(out1)
     out1.backward(grad_output)
     out2.backward(grad_output)
 
     assert torch.allclose(
-        conv1.lin_query.weight.grad, conv2.lin_query.weight.grad, atol=atol
+        conv1.lin_query.weight.grad, conv2.lin_query.weight.grad, atol=ATOL
     )
     assert torch.allclose(
-        conv1.lin_key.weight.grad, conv2.lin_key.weight.grad, atol=atol
+        conv1.lin_key.weight.grad, conv2.lin_key.weight.grad, atol=ATOL
     )
     assert torch.allclose(
-        conv1.lin_value.weight.grad, conv2.lin_value.weight.grad, atol=atol
+        conv1.lin_value.weight.grad, conv2.lin_value.weight.grad, atol=ATOL
     )
     assert torch.allclose(
-        conv1.lin_query.bias.grad, conv2.lin_query.bias.grad, atol=atol
+        conv1.lin_query.bias.grad, conv2.lin_query.bias.grad, atol=ATOL
     )
-    assert torch.allclose(conv1.lin_key.bias.grad, conv2.lin_key.bias.grad, atol=atol)
+    assert torch.allclose(conv1.lin_key.bias.grad, conv2.lin_key.bias.grad, atol=ATOL)
     assert torch.allclose(
-        conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=atol
+        conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=ATOL
     )