diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebf43dbd6236..2338d90eae82 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ## [2.2.0] - 2022-MM-DD
 ### Added
+- Added `torch_geometric.sampler` package to docs ([#5563](https://github.com/pyg-team/pytorch_geometric/pull/5563))
 - Added the `DGraphFin` dynamic graph dataset ([#5504](https://github.com/pyg-team/pytorch_geometric/pull/5504))
 - Added `dropout_edge` augmentation that randomly drops edges from a graph - the usage of `dropout_adj` is now deprecated ([#5495](https://github.com/pyg-team/pytorch_geometric/pull/5495))
 - Add support for precomputed edges in `SchNet` model ([#5401](https://github.com/pyg-team/pytorch_geometric/pull/5401))
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 5a43e3b2ab26..27e3a258da08 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -38,6 +38,7 @@ In addition, it consists of easy-to-use mini-batch loaders for operating on many
    modules/nn
    modules/data
    modules/loader
+   modules/sampler
    modules/datasets
    modules/transforms
    modules/utils
diff --git a/docs/source/modules/loader.rst b/docs/source/modules/loader.rst
index 7ab0143fc77f..bf603bb32f5e 100644
--- a/docs/source/modules/loader.rst
+++ b/docs/source/modules/loader.rst
@@ -2,6 +2,7 @@ torch_geometric.loader
 ======================
 
 .. currentmodule:: torch_geometric.loader
+
 .. autosummary::
    :nosignatures:
    {% for cls in torch_geometric.loader.classes %}
diff --git a/docs/source/modules/sampler.rst b/docs/source/modules/sampler.rst
new file mode 100644
index 000000000000..388acbb7241c
--- /dev/null
+++ b/docs/source/modules/sampler.rst
@@ -0,0 +1,18 @@
+torch_geometric.sampler
+=======================
+
+.. currentmodule:: torch_geometric.sampler
+
+.. autosummary::
+   :nosignatures:
+   {% for cls in torch_geometric.sampler.classes %}
+     {{ cls }}
+   {% endfor %}
+
+.. autoclass:: torch_geometric.sampler.base.BaseSampler
+   :members:
+
+.. automodule:: torch_geometric.sampler
+   :members:
+   :undoc-members:
+   :exclude-members: BaseSampler, sample_from_nodes, sample_from_edges, edge_permutation
diff --git a/torch_geometric/sampler/__init__.py b/torch_geometric/sampler/__init__.py
index 28f08c18bf41..f177aebb0712 100644
--- a/torch_geometric/sampler/__init__.py
+++ b/torch_geometric/sampler/__init__.py
@@ -2,7 +2,7 @@
 from .neighbor_sampler import NeighborSampler
 from .hgt_sampler import HGTSampler
 
-__all__ = [
+__all__ = classes = [
     'BaseSampler',
     'NeighborSampler',
     'HGTSampler',
diff --git a/torch_geometric/sampler/base.py b/torch_geometric/sampler/base.py
index 00e3dc3cf92c..a2081423ea32 100644
--- a/torch_geometric/sampler/base.py
+++ b/torch_geometric/sampler/base.py
@@ -63,15 +63,16 @@ class HeteroSamplerOutput:
 
 
 class BaseSampler(ABC):
-    r"""A base class that initializes a graph sampler and provides a `sample`
-    routine that performs sampling on an input list or tensor of node indices.
-
-    .. warning ::
-        Any data stored in the sampler will be _replicated_ across data loading
-        workers that use the sampler. That is, each data loading worker has its
-        own instance of a sampler. As such, it is recommended to limit the
-        amount of information stored in the sampler, and to initialize all this
-        information at `__init__`.
+    r"""A base class that initializes a graph sampler and provides
+    :meth:`sample_from_nodes` and :meth:`sample_from_edges` routines.
+
+    .. note ::
+
+        Any data stored in the sampler will be *replicated* across data loading
+        workers that use the sampler since each data loading worker holds its
+        own instance of a sampler.
+        As such, it is recommended to limit the amount of information stored in
+        the sampler.
     """
     @abstractmethod
     def sample_from_nodes(
@@ -79,9 +80,13 @@ def sample_from_nodes(
         index: NodeSamplerInput,
         **kwargs,
     ) -> Union[HeteroSamplerOutput, SamplerOutput]:
-        r"""Performs sampling from the nodes specified in 'index', returning
-        a sampled subgraph in the specified output format."""
-        raise NotImplementedError
+        r"""Performs sampling from the nodes specified in :obj:`index`,
+        returning a sampled subgraph in the specified output format.
+
+        Args:
+            index (Tensor): The node indices to start sampling from.
+        """
+        pass
 
     @abstractmethod
     def sample_from_edges(
@@ -89,9 +94,16 @@ def sample_from_edges(
         index: EdgeSamplerInput,
         **kwargs,
     ) -> Union[HeteroSamplerOutput, SamplerOutput]:
-        r"""Performs sampling from the edges specified in 'index', returning
-        a sampled subgraph in the specified output format."""
-        raise NotImplementedError
+        r"""Performs sampling from the edges specified in :obj:`index`,
+        returning a sampled subgraph in the specified output format.
+
+        Args:
+            index (Tuple[Tensor, Tensor, Tensor, Optional[Tensor]]): The (1)
+                source node indices, the (2) destination node indices, the (3)
+                edge labels and the (4) optional timestamp of edges to start
+                sampling from.
+        """
+        pass
 
     @property
     def edge_permutation(self) -> Union[OptTensor, Dict[EdgeType, OptTensor]]:
@@ -99,6 +111,6 @@ def edge_permutation(self) -> Union[OptTensor, Dict[EdgeType, OptTensor]]:
         original graph, this function is expected to return the permutation
         tensor that defines the permutation from the edges in the original
         graph and the edges used in the sampler. If no such permutation was
-        applied, a default None tensor is returned. For heterogeneous graphs,
-        the expected return type is a permutation tensor for each edge type."""
+        applied, :obj:`None` is returned. For heterogeneous graphs, the
+        expected return type is a permutation tensor for each edge type."""
         return None
diff --git a/torch_geometric/sampler/hgt_sampler.py b/torch_geometric/sampler/hgt_sampler.py
index 410e5897430e..661a2d517c3d 100644
--- a/torch_geometric/sampler/hgt_sampler.py
+++ b/torch_geometric/sampler/hgt_sampler.py
@@ -14,7 +14,8 @@
 
 
 class HGTSampler(BaseSampler):
-    r"""An implementation of an in-memory HGT sampler."""
+    r"""An implementation of an in-memory heterogeneous layer-wise sampler
+    user by :class:`~torch_geometric.loader.HGTLoader`."""
     def __init__(
         self,
         data: HeteroData,
diff --git a/torch_geometric/sampler/neighbor_sampler.py b/torch_geometric/sampler/neighbor_sampler.py
index 706a0b59129f..497156036b30 100644
--- a/torch_geometric/sampler/neighbor_sampler.py
+++ b/torch_geometric/sampler/neighbor_sampler.py
@@ -29,7 +29,8 @@
 
 
 class NeighborSampler(BaseSampler):
-    r"""An implementation of an in-memory neighbor sampler."""
+    r"""An implementation of an in-memory (heterogeneous) neighbor sampler used
+    by :class:`~torch_geometric.loader.NeighborLoader`."""
     def __init__(
         self,
         data: Union[Data, HeteroData, Tuple[FeatureStore, GraphStore]],
@@ -319,8 +320,6 @@ def sample_from_nodes(
         index: NodeSamplerInput,
         **kwargs,
     ) -> Union[SamplerOutput, HeteroSamplerOutput]:
-        r"""Samples from the nodes specified in 'index', using pyg-lib or
-        torch-sparse sampling routines that store the graph in memory."""
         if isinstance(index, (list, tuple)):
             index = torch.tensor(index)
 
@@ -347,8 +346,6 @@ def sample_from_edges(
         index: EdgeSamplerInput,
         **kwargs,
     ) -> Union[SamplerOutput, HeteroSamplerOutput]:
-        r"""Samples from the edges specified in 'index', using pyg-lib or
-        torch-sparse sampling routines that store the graph in memory."""
         negative_sampling_ratio = kwargs.get('negative_sampling_ratio', 0.0)
         query = [torch.stack(s, dim=0) for s in zip(*index)]
         edge_label_index = torch.stack(query[:2], dim=0)