NVIDIA-Merlin · rnyak · May 22, 2023 · May 18, 2023 · May 18, 2023 · nv-alaiacano
diff --git a/merlin/models/tf/blocks/dlrm.py b/merlin/models/tf/blocks/dlrm.py
@@ -34,52 +34,55 @@ def DLRMBlock(
     *,
     embedding_dim: int = None,
     embedding_options: EmbeddingOptions = None,
+    embeddings: Optional[Block] = None,
     bottom_block: Optional[Block] = None,
     top_block: Optional[Block] = None,
-    embeddings: Optional[Block] = None,
 ) -> SequentialBlock:
     """Builds the DLRM architecture, as proposed in the following
-    `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_.
-
-    References
-    ----------
-    .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for
-       personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019).
-
-    Parameters
-    ----------
-    schema : Schema
-        The `Schema` with the input features
-    bottom_block : Block
-        The `Block` that combines the continuous features (typically a `MLPBlock`)
-    top_block : Optional[Block], optional
-        The optional `Block` that combines the outputs of bottom layer and of
-        the factorization machine layer, by default None
-    embedding_dim : Optional[int], optional
-        Dimension of the embeddings, by default None
-    embedding_options : EmbeddingOptions
-        Options for the input embeddings.
-        - embedding_dim_default: int - Default dimension of the embedding
-        table, when the feature is not found in ``embedding_dims``, by default 64
-        - infer_embedding_sizes : bool, Automatically defines the embedding
-        dimension from the feature cardinality in the schema, by default False,
-        which needs to be kept False for the DLRM architecture.
-
-    Returns
-    -------
-    SequentialBlock
-        The DLRM block
-
-    Raises
-    ------
-    ValueError
-        The schema is required by DLRM
-    ValueError
-        The bottom_block is required by DLRM
-    ValueError
-        The embedding_dim (X) needs to match the last layer of bottom MLP (Y).
-    ValueError
-        Only one-of `embeddings` or `embedding_options` can be used.
+     `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_.
+
+     References
+     ----------
+     .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for
+        personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019).
+
+     Parameters
+     ----------
+     schema : Schema
+         The `Schema` with the input features
+     embedding_dim : Optional[int], optional
+         Dimension of the embeddings, by default None
+     embedding_options : EmbeddingOptions
+         Options for the input embeddings.
+         - embedding_dim_default: int - Default dimension of the embedding
+         table, when the feature is not found in ``embedding_dims``, by default 64
+         - infer_embedding_sizes : bool, Automatically defines the embedding
+         dimension from the feature cardinality in the schema, by default False,
+         which needs to be kept False for the DLRM architecture.
+    embeddings: Optional[Block]
+         If provided creates a ParallelBlock with an EmbeddingTable for each
+         categorical feature in the schema.
+     bottom_block : Block
+         The `Block` that combines the continuous features (typically a `MLPBlock`)
+     top_block : Optional[Block], optional
+         The optional `Block` that combines the outputs of bottom layer and of
+         the factorization machine layer, by default None
+
+     Returns
+     -------
+     SequentialBlock
+         The DLRM block
+
+     Raises
+     ------
+     ValueError
+         The schema is required by DLRM
+     ValueError
+         The bottom_block is required by DLRM
+     ValueError
+         The embedding_dim (X) needs to match the last layer of bottom MLP (Y).
+     ValueError
+         Only one-of `embeddings` or `embedding_options` can be used.
     """
     if schema is None:
         raise ValueError("The schema is required by DLRM")

diff --git a/merlin/models/tf/blocks/interaction.py b/merlin/models/tf/blocks/interaction.py
@@ -236,6 +236,18 @@ def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
         return 0.5 * tf.subtract(summed_square, squared_sum)
 
     def compute_output_shape(self, input_shapes):
+        """Computes the output shape based on the input shapes
+
+        Parameters
+        ----------
+        input_shapes : tf.TensorShape
+            The input shapes
+
+        Returns
+        -------
+        tf.TensorShape
+            The output shape
+        """
         if len(input_shapes) != 3:
             raise ValueError("Found shape {} without 3 dimensions".format(input_shapes))
         return (input_shapes[0], input_shapes[2])

diff --git a/merlin/models/tf/prediction_tasks/classification.py b/merlin/models/tf/prediction_tasks/classification.py
@@ -99,12 +99,39 @@ def __init__(
         )
 
     def call(self, inputs, training=False, **kwargs):
+        """Projects the input with the output layer to a single logit
+
+        Parameters
+        ----------
+        inputs : tf.Tensor
+            Input tensor
+        training : bool, optional
+            Flag that indicates whether it is training or not, by default False
+
+        Returns
+        -------
+        tf.Tensor
+            Tensor with the classification probabilities
+        """
         return self.output_activation(self.output_layer(inputs))
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape based on the input shape
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            The input shape
+
+        Returns
+        -------
+        tf.TensorShape
+            The output shape
+        """
         return self.output_layer.compute_output_shape(input_shape)
 
     def get_config(self):
+        """Return a Python dict containing the configuration of the model."""
         config = super().get_config()
         config = maybe_serialize_keras_objects(
             self,

diff --git a/merlin/models/tf/prediction_tasks/regression.py b/merlin/models/tf/prediction_tasks/regression.py
@@ -105,9 +105,22 @@ def call(self, inputs: tf.Tensor, training=False, **kwargs) -> tf.Tensor:
         return self.output_activation(self.output_layer(inputs))
 
     def compute_output_shape(self, input_shape):
+        """Computes the output shape based on the input shape
+
+        Parameters
+        ----------
+        input_shape : tf.TensorShape
+            The input shape
+
+        Returns
+        -------
+        tf.TensorShape
+            The output shape
+        """
         return self.output_layer.compute_output_shape(input_shape)
 
     def get_config(self):
+        """Return a Python dict containing the configuration of the model."""
         config = super().get_config()
         config = maybe_serialize_keras_objects(
             self, config, {"output_layer": tf.keras.layers.serialize}

diff --git a/merlin/models/tf/prediction_tasks/retrieval.py b/merlin/models/tf/prediction_tasks/retrieval.py
@@ -39,10 +39,6 @@ class ItemRetrievalTask(MultiClassClassificationTask):
             The schema object including features to use and their properties.
         samplers: List[ItemSampler]
             List of samplers for negative sampling, by default `[InBatchSampler()]`
-        post_logits: Optional[PredictionBlock]
-            Optional extra pre-call block for post-processing the logits, by default None.
-            You can for example use `post_logits = mm.PopularitySamplingBlock(item_fequency)`
-            for populariy sampling correction.
         target_name: Optional[str]
             If specified, name of the target tensor to retrieve from dataloader.
             Defaults to None.
@@ -52,9 +48,17 @@ class ItemRetrievalTask(MultiClassClassificationTask):
         task_block: Block
             The `Block` that applies additional layers op to inputs.
             Defaults to None.
+        post_logits: Optional[PredictionBlock]
+            Optional extra pre-call block for post-processing the logits, by default None.
+            You can for example use `post_logits = mm.PopularitySamplingBlock(item_fequency)`
+            for populariy sampling correction.
         logits_temperature: float
             Parameter used to reduce the model overconfidence, so that logits / T.
             Defaults to 1.
+        cache_query: bool
+            Add query embeddings to the context block, by default False
+        store_negative_ids: bool
+            Returns negative items ids as part of the output, by default False
     Returns
     -------
         PredictionTask
@@ -112,6 +116,7 @@ def _build_prediction_call(
         store_negative_ids: bool = False,
         **kwargs,
     ):
+        """Returns a SequentialBlock of ItemRetrievalScorer() and LogitsTemperatureScaler()"""
         if samplers is None or len(samplers) == 0:
             samplers = (InBatchSampler(),)
 
@@ -134,6 +139,7 @@ def _build_prediction_call(
     @property
     def retrieval_scorer(self):
         def find_retrieval_scorer_block(block):
+            """Returns the ItemRetrievalScorer layer"""
             if isinstance(block, ItemRetrievalScorer):
                 return block
 
@@ -156,6 +162,7 @@ def set_retrieval_cache_query(self, value: bool):
         self.retrieval_scorer.cache_query = value
 
     def get_config(self):
+        """Return a Python dict containing the configuration of the model."""
         config = super(ItemRetrievalTask, self).get_config()
         del config["pre"]
         if self.samplers: