add more docs

logicalclocks · Oct 29, 2020 · 3fea11d · 3fea11d
1 parent 0d22cd6
commit 3fea11d
Showing 1 changed file with 57 additions and 2 deletions.
diff --git a/python/hsfs/training_dataset.py b/python/hsfs/training_dataset.py
@@ -180,7 +180,41 @@ def save(
             )
         return self
 
-    def insert(self, features, overwrite, write_options={}):
+    def insert(
+        self,
+        features: Union[
+            query.Query,
+            pd.DataFrame,
+            TypeVar("pyspark.sql.DataFrame"),  # noqa: F821
+            TypeVar("pyspark.RDD"),  # noqa: F821
+            np.ndarray,
+            List[list],
+        ],
+        overwrite: bool,
+        write_options: Optional[Dict[Any, Any]] = {},
+    ):
+        """Insert additional feature data into the training dataset.
+
+        This method appends data to the training dataset either from a Feature Store
+        `Query`, a Spark or Pandas `DataFrame`, a Spark RDD, two-dimensional Python
+        lists or Numpy ndarrays. The schemas must match for this operation.
+
+        This can also be used to overwrite all data in an existing training dataset.
+
+        # Arguments
+            features: Feature data to be materialized.
+            overwrite: Whether to overwrite the entire data in the training dataset.
+            write_options: Additional write options as key/value pairs.
+                Defaults to `{}`.
+
+        # Returns
+            `TrainingDataset`: The updated training dataset metadata object, the
+                previous `TrainingDataset` object on which you call `save` is also
+                updated.
+
+        # Raises
+            `RestAPIError`: Unable to create training dataset metadata.
+        """
         if isinstance(features, query.Query):
             feature_dataframe = features.read()
         else:
@@ -194,6 +228,18 @@ def insert(self, features, overwrite, write_options={}):
         self.compute_statistics()
 
     def read(self, split=None, read_options={}):
+        """Read the training dataset into a dataframe.
+
+        It is also possible to read only a specific split.
+
+        # Arguments
+            split: Name of the split to read, defaults to `None`, reading the entire
+                training dataset.
+            read_options: Additional read options as key/value pairs, defaults to `{}`.
+        # Returns
+            `DataFrame`: The spark dataframe containing the feature data of the
+                training dataset.
+        """
         return self._training_dataset_engine.read(self, split, read_options)
 
     def compute_statistics(self):
@@ -237,7 +283,16 @@ def tf_data(
             cycle_length=cycle_length,
         )
 
-    def show(self, n, split=None):
+    def show(self, n: int, split: str = None):
+        """Show the first `n` rows of the training dataset.
+
+        You can specify a split from which to retrieve the rows.
+
+        # Arguments
+            n: Number of rows to show.
+            split: Name of the split to show, defaults to `None`, showing the first rows
+                when taking all splits together.
+        """
         self.read(split).show(n)
 
     def add_tag(self, name: str, value: str = None):