diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml
index 10572236..d9380be9 100644
--- a/.github/workflows/testing_ci.yml
+++ b/.github/workflows/testing_ci.yml
@@ -78,8 +78,8 @@ jobs:
 
             - name: Test with pytest
               run: |
-                  python tests/global_test_config.py
                   rm -rf testing_results && rm -rf tests/__pycache__ && rm -rf tests/*/__pycache__
+                  python tests/global_test_config.py
                   python -m pytest -rA tests/*/* -s -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
 
             - name: Generate the LCOV report
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 0be3b71a..2d79de72 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -31,6 +31,9 @@ build:
             - pip install ./TSDB_repo && pip install ./PyGrinder_repo && pip install .
 
         post_install:
+            # To fix the exception: This documentation is not using `furo.css` as the stylesheet.
+            #   If you have set `html_style` in your conf.py file, remove it.
+            - pip install sphinx==7.2.6
+            # this docutils version fixes issue#102, put it in post_install to avoid being
+            #   overwritten by other versions (like 0.19) while installing other packages
             - pip install docutils==0.20
-            # this version fixes issue#102, put it in post_install to avoid being
-            # overwritten by other versions (like 0.19) while installing other packages
diff --git a/README.md b/README.md
index 3152f3c4..b04fb585 100644
--- a/README.md
+++ b/README.md
@@ -228,6 +228,7 @@ the same as we did in [SAITS paper](https://arxiv.org/pdf/2202.08516).**
 |       Neural Net       |    VaDER    |                         Variational Deep Embedding with Recurrence [^7]                         |   2019   |
 |  ***`Forecasting`***   |     🚥      |                                               🚥                                                |    🚥    |
 |        **Type**        |  **Abbr.**  |                           **Full name of the algorithm/model/paper**                            | **Year** |
+|       Neural Net       |    CSDI     |     Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation [^12]     |   2021   |
 |     Probabilistic      |    BTTF     |                           Bayesian Temporal Tensor Factorization [^8]                           |   2021   |
 
 
diff --git a/docs/index.rst b/docs/index.rst
index 61a6e9b6..9bfe69bf 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -193,7 +193,7 @@ Imputation                     Neural Net       FEDformer (Frequency Enhanced De
 Imputation                     Neural Net       Informer (Beyond Efficient Transformer for Long Sequence Time-Series Forecasting)                         2021   :cite:`zhou2021informer`
 Imputation                     Neural Net       Autoformer (Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting)            2021   :cite:`wu2021autoformer`
 Imputation                     Neural Net       US-GAN (Unsupervised GAN for Multivariate Time Series Imputation)                                         2021   :cite:`miao2021SSGAN`
-Imputation                     Neural Net       CSDI (Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation)                  2021   :cite:`tashiro2021csdi`
+Imputation, Forecasting        Neural Net       CSDI (Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation)                  2021   :cite:`tashiro2021csdi`
 Imputation                     Neural Net       GP-VAE (Gaussian Process Variational Autoencoder)                                                         2020   :cite:`fortuin2020gpvae`
 Imputation, Classification     Neural Net       BRITS (Bidirectional Recurrent Imputation for Time Series)                                                2018   :cite:`cao2018BRITS`
 Imputation                     Neural Net       M-RNN (Multi-directional Recurrent Neural Network)                                                        2019   :cite:`yoon2019MRNN`
diff --git a/docs/pypots.data.rst b/docs/pypots.data.rst
index 624f7b1d..79fde3fb 100644
--- a/docs/pypots.data.rst
+++ b/docs/pypots.data.rst
@@ -1,10 +1,10 @@
 pypots.data package
 ===================
 
-pypots.data.base
+pypots.data.dataset
 -----------------------
 
-.. automodule:: pypots.data.base
+.. automodule:: pypots.data.dataset
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/pypots/base.py b/pypots/base.py
index ac3287c5..699fc098 100644
--- a/pypots/base.py
+++ b/pypots/base.py
@@ -337,13 +337,13 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
         Parameters
         ----------
-        train_set : dict or str
+        train_set :
             The dataset for model training, should be a dictionary including keys as 'X',
             or a path string locating a data file supported by PyPOTS (e.g. h5 file).
             If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -352,7 +352,7 @@ def fit(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        val_set : dict or str
+        val_set :
             The dataset for model validating, should be a dictionary including keys as 'X',
             or a path string locating a data file supported by PyPOTS (e.g. h5 file).
             If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -361,7 +361,7 @@ def fit(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if train_set and val_set are path strings.
 
         """
@@ -371,13 +371,13 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
         Parameters
         ----------
-        test_set : dict or str
+        test_set :
             The dataset for model validating, should be a dictionary including keys as 'X',
             or a path string locating a data file supported by PyPOTS (e.g. h5 file).
             If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -386,12 +386,12 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict: dict
+        result_dict :
             Prediction results in a Python Dictionary for the given samples.
             It should be a dictionary including keys as 'imputation', 'classification', 'clustering', and 'forecasting'.
             For sure, only the keys that relevant tasks are supported by the model will be returned.
@@ -512,7 +512,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         raise NotImplementedError
 
@@ -520,6 +520,6 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
index 50bd5afd..817302ec 100644
--- a/pypots/classification/base.py
+++ b/pypots/classification/base.py
@@ -72,7 +72,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
@@ -106,7 +106,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -114,7 +114,7 @@ def predict(
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.
 
@@ -214,12 +214,12 @@ def __init__(
         self.n_classes = n_classes
 
     @abstractmethod
-    def _assemble_input_for_training(self, data) -> dict:
+    def _assemble_input_for_training(self, data: list) -> dict:
         """Assemble the given data into a dictionary for training input.
 
         Parameters
         ----------
-        data : list,
+        data :
             Input data from dataloader, should be list.
 
         Returns
@@ -230,12 +230,12 @@ def _assemble_input_for_training(self, data) -> dict:
         raise NotImplementedError
 
     @abstractmethod
-    def _assemble_input_for_validating(self, data) -> dict:
+    def _assemble_input_for_validating(self, data: list) -> dict:
         """Assemble the given data into a dictionary for validating input.
 
         Parameters
         ----------
-        data : list,
+        data :
             Data output from dataloader, should be list.
 
         Returns
@@ -246,7 +246,7 @@ def _assemble_input_for_validating(self, data) -> dict:
         raise NotImplementedError
 
     @abstractmethod
-    def _assemble_input_for_testing(self, data) -> dict:
+    def _assemble_input_for_testing(self, data: list) -> dict:
         """Assemble the given data into a dictionary for testing input.
 
         Notes
@@ -259,7 +259,7 @@ def _assemble_input_for_testing(self, data) -> dict:
 
         Parameters
         ----------
-        data : list,
+        data :
             Data output from dataloader, should be list.
 
         Returns
@@ -386,7 +386,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
@@ -420,7 +420,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -428,7 +428,7 @@ def predict(
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.
 
diff --git a/pypots/classification/brits/data.py b/pypots/classification/brits/data.py
index 2c5c2581..663ba81d 100644
--- a/pypots/classification/brits/data.py
+++ b/pypots/classification/brits/data.py
@@ -17,7 +17,7 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -26,7 +26,7 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -35,14 +35,19 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_y: bool = True,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, False, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
diff --git a/pypots/classification/brits/model.py b/pypots/classification/brits/model.py
index f95177a5..fa0ad349 100644
--- a/pypots/classification/brits/model.py
+++ b/pypots/classification/brits/model.py
@@ -208,7 +208,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForBRITS(train_set, file_type=file_type)
@@ -239,10 +239,10 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
-        test_set = DatasetForBRITS(test_set, return_labels=False, file_type=file_type)
+        test_set = DatasetForBRITS(test_set, return_y=False, file_type=file_type)
         test_loader = DataLoader(
             test_set,
             batch_size=self.batch_size,
@@ -267,7 +267,7 @@ def predict(
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.
 
diff --git a/pypots/classification/grud/data.py b/pypots/classification/grud/data.py
index 34865428..99401310 100644
--- a/pypots/classification/grud/data.py
+++ b/pypots/classification/grud/data.py
@@ -10,7 +10,7 @@
 
 import torch
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 from ...data.utils import _parse_delta_torch
 from ...imputation.locf import locf_torch
 
@@ -20,7 +20,7 @@ class DatasetForGRUD(BaseDataset):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -29,7 +29,7 @@ class DatasetForGRUD(BaseDataset):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -38,17 +38,23 @@ class DatasetForGRUD(BaseDataset):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_y: bool = True,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, False, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
         if not isinstance(self.data, str):  # data from array
             self.missing_mask = (~torch.isnan(self.X)).to(torch.float32)
             self.X_filledLOCF = locf_torch(self.X)
@@ -63,12 +69,12 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index to fetch the specified sample.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -98,7 +104,7 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             self.empirical_mean.to(torch.float32),
         ]
 
-        if self.y is not None and self.return_labels:
+        if self.return_y:
             sample.append(self.y[idx].to(torch.long))
 
         return sample
@@ -109,12 +115,12 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             The collated data sample, a list including all necessary sample info.
         """
 
@@ -140,7 +146,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         ]
 
         # if the dataset has labels and is for training, then fetch it from the file
-        if "y" in self.file_handle.keys() and self.return_labels:
+        if self.return_y:
             sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 
         return sample
diff --git a/pypots/classification/grud/model.py b/pypots/classification/grud/model.py
index deec8bd5..a18e78c8 100644
--- a/pypots/classification/grud/model.py
+++ b/pypots/classification/grud/model.py
@@ -180,7 +180,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForGRUD(train_set, file_type=file_type)
@@ -211,10 +211,10 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
-        test_set = DatasetForGRUD(test_set, return_labels=False, file_type=file_type)
+        test_set = DatasetForGRUD(test_set, return_y=False, file_type=file_type)
         test_loader = DataLoader(
             test_set,
             batch_size=self.batch_size,
@@ -239,7 +239,7 @@ def predict(
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.
 
diff --git a/pypots/classification/raindrop/data.py b/pypots/classification/raindrop/data.py
index be1c32b3..9449976f 100644
--- a/pypots/classification/raindrop/data.py
+++ b/pypots/classification/raindrop/data.py
@@ -16,7 +16,7 @@ class DatasetForRaindrop(DatasetForGRUD):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -25,7 +25,7 @@ class DatasetForRaindrop(DatasetForGRUD):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -34,14 +34,14 @@ class DatasetForRaindrop(DatasetForGRUD):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_y: bool = True,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_labels, file_type)
+        super().__init__(data, return_y, file_type)
diff --git a/pypots/classification/raindrop/model.py b/pypots/classification/raindrop/model.py
index 179531e7..8def46d2 100644
--- a/pypots/classification/raindrop/model.py
+++ b/pypots/classification/raindrop/model.py
@@ -19,8 +19,8 @@
 from torch.utils.data import DataLoader
 
 from .core import _Raindrop
+from .data import DatasetForRaindrop
 from ...classification.base import BaseNNClassifier
-from ...classification.grud.data import DatasetForGRUD
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -224,10 +224,10 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
-        training_set = DatasetForGRUD(train_set, file_type=file_type)
+        training_set = DatasetForRaindrop(train_set, file_type=file_type)
         training_loader = DataLoader(
             training_set,
             batch_size=self.batch_size,
@@ -236,7 +236,7 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            val_set = DatasetForGRUD(val_set, file_type=file_type)
+            val_set = DatasetForRaindrop(val_set, file_type=file_type)
             val_loader = DataLoader(
                 val_set,
                 batch_size=self.batch_size,
@@ -255,10 +255,10 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
-        test_set = DatasetForGRUD(test_set, return_labels=False, file_type=file_type)
+        test_set = DatasetForRaindrop(test_set, return_y=False, file_type=file_type)
         test_loader = DataLoader(
             test_set,
             batch_size=self.batch_size,
@@ -284,7 +284,7 @@ def predict(
     def classify(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Classify the input data with the trained model.
 
diff --git a/pypots/classification/template/core.py b/pypots/classification/template/core.py
new file mode 100644
index 00000000..157cc4d9
--- /dev/null
+++ b/pypots/classification/template/core.py
@@ -0,0 +1,42 @@
+"""
+The implementation of YourNewModel for the partially-observed time-series classification task.
+
+Refer to the paper "Your paper citation".
+
+"""
+
+# Created by Your Name <Your contact email> TODO: modify the author information.
+# License: BSD-3-Clause
+
+import torch.nn as nn
+
+# from ...nn.modules import some_modules
+
+
+# TODO: define your new model here.
+#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
+#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
+#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
+class _YourNewModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        # TODO: define your model's components here. If modules in pypots.nn.modules can be reused in your model,
+        #  you can import them and use them here. AND if you think the modules you implemented can be reused by
+        #  other models, you can also consider to contribute them to pypots.nn.modules
+        self.embedding = nn.Module
+        self.submodule = nn.Module
+        self.backbone = nn.Module
+
+    def forward(self, inputs: dict) -> dict:
+        # TODO: define your model's forward propagation process here.
+        #  The input is a dict, and the output `results` should also be a dict.
+        output = self.backbone()  # replace this with your model's  process
+
+        # TODO: `results` must contains the key `loss` which is will be used for
+        #  backward propagation to update the model.
+        loss = None
+        results = {
+            "loss": loss,
+        }
+        return results
diff --git a/pypots/classification/template/data.py b/pypots/classification/template/data.py
index c391740e..3c4ca97e 100644
--- a/pypots/classification/template/data.py
+++ b/pypots/classification/template/data.py
@@ -1,7 +1,7 @@
 """
 Dataset class for YourNewModel.
 
-TODO: modify the above description with your model's information.
+TODO: modify the above description for your model's dataset class.
 
 """
 
@@ -10,17 +10,26 @@
 
 from typing import Union, Iterable
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
+# TODO: define your new dataset class here. Remove or add arguments as needed.
 class DatasetForYourNewModel(BaseDataset):
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_X_ori: bool,
+        return_X_pred: bool,
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=return_X_pred,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         raise NotImplementedError
diff --git a/pypots/classification/template/model.py b/pypots/classification/template/model.py
index 584fe1d7..40f6b252 100644
--- a/pypots/classification/template/model.py
+++ b/pypots/classification/template/model.py
@@ -3,6 +3,8 @@
 
 Refer to the paper "Your paper citation".
 
+TODO: modify the above description with your model's information.
+
 """
 
 # Created by Your Name <Your contact email> TODO: modify the author information.
@@ -10,40 +12,19 @@
 
 from typing import Union, Optional
 
-import numpy as np
 import torch
-import torch.nn as nn
+
+from .core import _YourNewModel
 
 # TODO: import the base class from the classification package in PyPOTS.
 #  Here I suppose this is a neural-network classification model.
 #  You should make your model inherent BaseClassifier if it is not a NN.
 # from ..base import BaseClassifier
 from ..base import BaseNNClassifier
-
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 
 
-# TODO: define your new model here.
-#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
-#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
-#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
-class _YourNewModel(nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, inputs: dict) -> dict:
-        # TODO: define your model's forward propagation process here.
-        #  The input is a dict, and the output `results` should also be a dict.
-        #  `results` must contains the key `loss` which is will be used for backward propagation to update the model.
-
-        loss = None
-        results = {
-            "loss": loss,
-        }
-        return results
-
-
 # TODO: define your new model's wrapper here.
 #  It should be a subclass of a base class defined in PyPOTS task packages (e.g.
 #  BaseNNClassifier of PyPOTS classification task package). It has to implement all abstract methods of the base class.
@@ -53,13 +34,13 @@ def __init__(
         self,
         # TODO: add your model's hyper-parameters here
         n_classes: int,
-        batch_size: int,
-        epochs: int,
-        patience: int,
-        num_workers: int = 0,
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: Optional[int] = None,
         optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
         device: Optional[Union[str, torch.device, list]] = None,
-        saving_path: str = None,
+        saving_path: Optional[str] = None,
         model_saving_strategy: Optional[str] = "best",
     ):
         super().__init__(
@@ -76,9 +57,11 @@ def __init__(
         # TODO: set up your model's hyper-parameters here
 
         # set up the model
-        self.model = _YourNewModel()
-        self.model = self.model.to(self.device)
+        self.model = _YourNewModel(
+            # pass the arguments to your model
+        )
         self._print_model_size()
+        self._send_model_to_given_device()
 
         # set up the optimizer
         self.optimizer = optimizer
@@ -97,13 +80,13 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         raise NotImplementedError
 
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
diff --git a/pypots/classification/template/module.py b/pypots/classification/template/module.py
deleted file mode 100644
index fa20e4cd..00000000
--- a/pypots/classification/template/module.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-The implementation of the modules for YourNewModel.
-
-Refer to the paper "Your paper citation".
-
-"""
-
-# Created by Your Name <Your contact email> TODO: modify the author information.
-# License: BSD-3-Clause
-
-
-# TODO: this file is not necessary. If your new model has customized layers or modules, please put them here.
-#  Otherwise, please delete this modules.py file, don't commit it to the repository.
diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py
index 47f70a18..2ecc46e3 100644
--- a/pypots/clustering/base.py
+++ b/pypots/clustering/base.py
@@ -72,7 +72,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Union[dict, str] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the cluster.
 
@@ -105,7 +105,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -113,7 +113,7 @@ def predict(
     def cluster(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Cluster the input with the trained model.
 
@@ -379,7 +379,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Union[dict, str] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the cluster.
 
@@ -412,7 +412,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -420,7 +420,7 @@ def predict(
     def cluster(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Cluster the input with the trained model.
 
diff --git a/pypots/clustering/crli/data.py b/pypots/clustering/crli/data.py
index 6025752a..cf8976a2 100644
--- a/pypots/clustering/crli/data.py
+++ b/pypots/clustering/crli/data.py
@@ -8,7 +8,7 @@
 
 from typing import Union, Iterable
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
 class DatasetForCRLI(BaseDataset):
@@ -16,7 +16,7 @@ class DatasetForCRLI(BaseDataset):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -25,7 +25,7 @@ class DatasetForCRLI(BaseDataset):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -34,17 +34,23 @@ class DatasetForCRLI(BaseDataset):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_y: bool = True,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, False, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         return super()._fetch_data_from_array(idx)
diff --git a/pypots/clustering/crli/model.py b/pypots/clustering/crli/model.py
index 8c7805a1..90651ca0 100644
--- a/pypots/clustering/crli/model.py
+++ b/pypots/clustering/crli/model.py
@@ -343,12 +343,10 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
-        training_set = DatasetForCRLI(
-            train_set, return_labels=False, file_type=file_type
-        )
+        training_set = DatasetForCRLI(train_set, return_y=False, file_type=file_type)
         training_loader = DataLoader(
             training_set,
             batch_size=self.batch_size,
@@ -358,7 +356,7 @@ def fit(
         val_loader = None
 
         if val_set is not None:
-            val_set = DatasetForCRLI(val_set, return_labels=False, file_type=file_type)
+            val_set = DatasetForCRLI(val_set, return_y=False, file_type=file_type)
             val_loader = DataLoader(
                 val_set,
                 batch_size=self.batch_size,
@@ -377,7 +375,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
         return_latent_vars: bool = False,
     ) -> dict:
         """Make predictions for the input data with the trained model.
@@ -393,7 +391,7 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         return_latent_vars : bool
@@ -402,13 +400,13 @@ def predict(
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
 
         self.model.eval()  # set the model as eval status to freeze it.
-        test_set = DatasetForCRLI(test_set, return_labels=False, file_type=file_type)
+        test_set = DatasetForCRLI(test_set, return_y=False, file_type=file_type)
         test_loader = DataLoader(
             test_set,
             batch_size=self.batch_size,
@@ -448,7 +446,7 @@ def predict(
     def cluster(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Cluster the input with the trained model.
 
diff --git a/pypots/clustering/template/core.py b/pypots/clustering/template/core.py
new file mode 100644
index 00000000..524a7a3c
--- /dev/null
+++ b/pypots/clustering/template/core.py
@@ -0,0 +1,42 @@
+"""
+The implementation of YourNewModel for the partially-observed time-series clustering task.
+
+Refer to the paper "Your paper citation".
+
+"""
+
+# Created by Your Name <Your contact email> TODO: modify the author information.
+# License: BSD-3-Clause
+
+import torch.nn as nn
+
+# from ...nn.modules import some_modules
+
+
+# TODO: define your new model here.
+#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
+#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
+#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
+class _YourNewModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        # TODO: define your model's components here. If modules in pypots.nn.modules can be reused in your model,
+        #  you can import them and use them here. AND if you think the modules you implemented can be reused by
+        #  other models, you can also consider to contribute them to pypots.nn.modules
+        self.embedding = nn.Module
+        self.submodule = nn.Module
+        self.backbone = nn.Module
+
+    def forward(self, inputs: dict) -> dict:
+        # TODO: define your model's forward propagation process here.
+        #  The input is a dict, and the output `results` should also be a dict.
+        output = self.backbone()  # replace this with your model's  process
+
+        # TODO: `results` must contains the key `loss` which is will be used for
+        #  backward propagation to update the model.
+        loss = None
+        results = {
+            "loss": loss,
+        }
+        return results
diff --git a/pypots/clustering/template/data.py b/pypots/clustering/template/data.py
index c391740e..3c4ca97e 100644
--- a/pypots/clustering/template/data.py
+++ b/pypots/clustering/template/data.py
@@ -1,7 +1,7 @@
 """
 Dataset class for YourNewModel.
 
-TODO: modify the above description with your model's information.
+TODO: modify the above description for your model's dataset class.
 
 """
 
@@ -10,17 +10,26 @@
 
 from typing import Union, Iterable
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
+# TODO: define your new dataset class here. Remove or add arguments as needed.
 class DatasetForYourNewModel(BaseDataset):
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_X_ori: bool,
+        return_X_pred: bool,
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=return_X_pred,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         raise NotImplementedError
diff --git a/pypots/clustering/template/model.py b/pypots/clustering/template/model.py
index 06a4a38b..65bc8aa9 100644
--- a/pypots/clustering/template/model.py
+++ b/pypots/clustering/template/model.py
@@ -3,6 +3,8 @@
 
 Refer to the paper "Your paper citation".
 
+TODO: modify the above description with your model's information.
+
 """
 
 # Created by Your Name <Your contact email> TODO: modify the author information.
@@ -10,40 +12,19 @@
 
 from typing import Union, Optional
 
-import numpy as np
 import torch
-import torch.nn as nn
+
+from .core import _YourNewModel
 
 # TODO: import the base class from the clustering package in PyPOTS.
 #  Here I suppose this is a neural-network clustering model.
 #  You should make your model inherent BaseClusterer if it is not a NN.
 # from ..base import BaseClusterer
 from ..base import BaseNNClusterer
-
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 
 
-# TODO: define your new model here.
-#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
-#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
-#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
-class _YourNewModel(nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, inputs: dict) -> dict:
-        # TODO: define your model's forward propagation process here.
-        #  The input is a dict, and the output `results` should also be a dict.
-        #  `results` must contains the key `loss` which is will be used for backward propagation to update the model.
-
-        loss = None
-        results = {
-            "loss": loss,
-        }
-        return results
-
-
 # TODO: define your new model's wrapper here.
 #  It should be a subclass of a base class defined in PyPOTS task packages (e.g.
 #  BaseNNClusterer of PyPOTS clustering task package), and it has to implement all abstract methods of the base class.
@@ -53,13 +34,13 @@ def __init__(
         self,
         # TODO: add your model's hyper-parameters here
         n_clusters: int,
-        batch_size: int,
-        epochs: int,
-        patience: int,
-        num_workers: int = 0,
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: Optional[int] = None,
         optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
         device: Optional[Union[str, torch.device, list]] = None,
-        saving_path: str = None,
+        saving_path: Optional[str] = None,
         model_saving_strategy: Optional[str] = "best",
     ):
         super().__init__(
@@ -76,9 +57,11 @@ def __init__(
         # TODO: set up your model's hyper-parameters here
 
         # set up the model
-        self.model = _YourNewModel()
-        self.model = self.model.to(self.device)
+        self.model = _YourNewModel(
+            # pass the arguments to your model
+        )
         self._print_model_size()
+        self._send_model_to_given_device()
 
         # set up the optimizer
         self.optimizer = optimizer
@@ -97,13 +80,13 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         raise NotImplementedError
 
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
diff --git a/pypots/clustering/template/module.py b/pypots/clustering/template/module.py
deleted file mode 100644
index fa20e4cd..00000000
--- a/pypots/clustering/template/module.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-The implementation of the modules for YourNewModel.
-
-Refer to the paper "Your paper citation".
-
-"""
-
-# Created by Your Name <Your contact email> TODO: modify the author information.
-# License: BSD-3-Clause
-
-
-# TODO: this file is not necessary. If your new model has customized layers or modules, please put them here.
-#  Otherwise, please delete this modules.py file, don't commit it to the repository.
diff --git a/pypots/clustering/vader/core.py b/pypots/clustering/vader/core.py
index c2e0da99..3d6df970 100644
--- a/pypots/clustering/vader/core.py
+++ b/pypots/clustering/vader/core.py
@@ -15,8 +15,8 @@
 import torch
 import torch.nn as nn
 
-from pypots.utils.metrics import calc_mse
 from ...nn.modules.vader import BackboneVaDER
+from ...utils.metrics import calc_mse
 
 
 def inverse_softplus(x: np.ndarray) -> np.ndarray:
diff --git a/pypots/clustering/vader/data.py b/pypots/clustering/vader/data.py
index 6a098774..ea718397 100644
--- a/pypots/clustering/vader/data.py
+++ b/pypots/clustering/vader/data.py
@@ -8,7 +8,7 @@
 
 from typing import Union, Iterable
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
 class DatasetForVaDER(BaseDataset):
@@ -16,7 +16,7 @@ class DatasetForVaDER(BaseDataset):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -25,7 +25,7 @@ class DatasetForVaDER(BaseDataset):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -34,17 +34,23 @@ class DatasetForVaDER(BaseDataset):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_y: bool = True,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, False, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         return super()._fetch_data_from_array(idx)
diff --git a/pypots/clustering/vader/model.py b/pypots/clustering/vader/model.py
index 26ae9687..a9b151dc 100644
--- a/pypots/clustering/vader/model.py
+++ b/pypots/clustering/vader/model.py
@@ -357,12 +357,10 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
-        training_set = DatasetForVaDER(
-            train_set, return_labels=False, file_type=file_type
-        )
+        training_set = DatasetForVaDER(train_set, return_y=False, file_type=file_type)
         training_loader = DataLoader(
             training_set,
             batch_size=self.batch_size,
@@ -372,7 +370,7 @@ def fit(
 
         val_loader = None
         if val_set is not None:
-            val_set = DatasetForVaDER(val_set, return_labels=False, file_type=file_type)
+            val_set = DatasetForVaDER(val_set, return_y=False, file_type=file_type)
             val_loader = DataLoader(
                 val_set,
                 batch_size=self.batch_size,
@@ -391,7 +389,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
         return_latent_vars: bool = False,
     ) -> dict:
         """Make predictions for the input data with the trained model.
@@ -407,7 +405,7 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         return_latent_vars : bool
@@ -415,12 +413,12 @@ def predict(
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         self.model.eval()  # set the model as eval status to freeze it.
-        test_set = DatasetForVaDER(test_set, return_labels=False, file_type=file_type)
+        test_set = DatasetForVaDER(test_set, return_y=False, file_type=file_type)
         test_loader = DataLoader(
             test_set,
             batch_size=self.batch_size,
@@ -501,7 +499,7 @@ def func_to_apply(
     def cluster(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> Union[np.ndarray]:
         """Cluster the input with the trained model.
 
diff --git a/pypots/data/__init__.py b/pypots/data/__init__.py
index d243b50e..73b274dd 100644
--- a/pypots/data/__init__.py
+++ b/pypots/data/__init__.py
@@ -5,7 +5,7 @@
 # Created by Wenjie Du <wenjay.du@gmail.com>
 # License: BSD-3-Clause
 
-from .base import BaseDataset
+from .dataset import BaseDataset, SUPPORTED_DATASET_FILE_FORMATS
 from .generating import (
     gene_complete_random_walk,
     gene_complete_random_walk_for_anomaly_detection,
@@ -21,9 +21,10 @@
 from .utils import parse_delta, sliding_window
 
 __all__ = [
-    # datasets
+    # base dataset classes
     "BaseDataset",
-    # data generation
+    "SUPPORTED_DATASET_FILE_FORMATS",
+    # dataset generation functions
     "gene_complete_random_walk",
     "gene_complete_random_walk_for_anomaly_detection",
     "gene_complete_random_walk_for_classification",
diff --git a/pypots/data/base.py b/pypots/data/base.py
deleted file mode 100644
index 096bcd08..00000000
--- a/pypots/data/base.py
+++ /dev/null
@@ -1,337 +0,0 @@
-"""
-The base class for PyPOTS datasets.
-"""
-
-# Created by Wenjie Du <wenjay.du@gmail.com>
-# License: BSD-3-Clause
-
-from abc import abstractmethod
-from typing import Union, Optional, Tuple, Iterable
-
-import h5py
-import numpy as np
-import torch
-from pygrinder import fill_and_get_mask_torch
-from torch.utils.data import Dataset
-
-from .utils import turn_data_into_specified_dtype
-
-# Currently we only support h5 files
-SUPPORTED_DATASET_FILE_TYPE = ["h5py"]
-
-
-class BaseDataset(Dataset):
-    """Base dataset class in PyPOTS.
-
-    Parameters
-    ----------
-    data :
-        The dataset for model input, should be a dictionary including keys as 'X' and 'y',
-        or a path string locating a data file.
-        If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
-        which is time-series data for input, can contain missing values, and y should be array-like of shape
-        [n_samples], which is classification labels of X.
-        If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
-        key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
-
-    return_labels :
-        Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
-        during training of classification models, the Dataset class will return labels in __getitem__() for model input.
-        Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
-        need the defined Dataset class for all training/validating/testing stages. For those big datasets stored in h5
-        files, they already have both X and y saved. But we don't read labels from the file for validating and testing
-        with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
-        distinction.
-
-    file_type :
-        The type of the given file if train_set and val_set are path strings.
-
-    """
-
-    def __init__(
-        self,
-        data: Union[dict, str],
-        return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
-    ):
-        super().__init__()
-        # types and shapes had been checked after X and y input into the model
-        # So they are safe to use here. No need to check again.
-
-        self.data = data
-        self.return_X_ori = return_X_ori
-        self.return_labels = return_labels
-
-        if isinstance(self.data, str):  # data from file
-            # check if the given file type is supported
-            assert (
-                file_type in SUPPORTED_DATASET_FILE_TYPE
-            ), f"file_type should be one of {SUPPORTED_DATASET_FILE_TYPE}, but got {file_type}"
-            self.file_type = file_type
-
-            # open the file handle
-            self.file_handle = self._open_file_handle()
-            # check if X exists in the file
-            assert (
-                "X" in self.file_handle.keys()
-            ), "The given dataset file doesn't contains X. Please double check."
-
-        else:  # data from array
-            X = data["X"]
-            X_ori = None if "X_ori" not in data.keys() else data["X_ori"]
-            y = None if "y" not in data.keys() else data["y"]
-            self.X, self.X_ori, self.y = self._check_array_input(X, X_ori, y)
-
-            if self.X_ori is not None and self.return_X_ori:
-                # Only when X_ori is given and fixed, we fill the missing values in X here in advance.
-                # Otherwise, we may need original X with missing values to generate X_ori, e.g. in DatasetForSAITS.
-                self.X, self.missing_mask = fill_and_get_mask_torch(self.X)
-
-                self.X_ori, X_ori_missing_mask = fill_and_get_mask_torch(self.X_ori)
-                indicating_mask = X_ori_missing_mask - self.missing_mask
-                self.indicating_mask = indicating_mask.to(torch.float32)
-            else:
-                self.missing_mask = None
-                self.indicating_mask = None
-                # if return_X_ori is false, set X_ori to None as well
-                self.X_ori = None
-
-        self.n_samples, self.n_steps, self.n_features = self._get_data_sizes()
-
-        # set up function fetch_data()
-        if isinstance(self.data, str):
-            self.fetch_data = self._fetch_data_from_file
-        else:
-            self.fetch_data = self._fetch_data_from_array
-
-    def _get_data_sizes(self) -> Tuple[int, int, int]:
-        """Determine the number of samples in the dataset and return the number.
-
-        Returns
-        -------
-        n_samples :
-            The number of the samples in the given dataset.
-        """
-
-        if isinstance(self.data, str):
-            if self.file_handle is None:
-                self.file_handle = self._open_file_handle()
-            n_samples = len(self.file_handle["X"])
-            first_sample = self.file_handle["X"][0]
-            n_steps = len(first_sample)
-            n_features = first_sample.shape[-1]
-        else:
-            n_samples = len(self.X)
-            n_steps = len(self.X[0])
-            n_features = self.X[0].shape[-1]
-
-        return n_samples, n_steps, n_features
-
-    def __len__(self) -> int:
-        return self.n_samples
-
-    @staticmethod
-    def _check_array_input(
-        X: Union[np.ndarray, torch.Tensor, list],
-        X_ori: Union[np.ndarray, torch.Tensor, list],
-        y: Optional[Union[np.ndarray, torch.Tensor, list]] = None,
-        out_dtype: str = "tensor",
-    ) -> Tuple[
-        Union[np.ndarray, torch.Tensor],
-        Union[np.ndarray, torch.Tensor],
-        Optional[Union[np.ndarray, torch.Tensor, list]],
-    ]:
-        """Check value type and shape of input X and y
-
-        Parameters
-        ----------
-        X :
-            Time-series data that must have a shape like [n_samples, expected_n_steps, expected_n_features].
-
-        X_ori :
-            If X is with artificial missingness, X_ori is the original X without artificial missing values.
-            It must have the same shape as X. If X_ori is with original missing values, should be left as NaN.
-
-        y :
-            Labels of time-series samples (X) that must have a shape like [n_samples] or [n_samples, n_classes].
-
-        out_dtype :
-            Data type of the output, should be np.ndarray or torch.Tensor
-
-        Returns
-        -------
-        X :
-
-        X_ori :
-
-        y :
-
-        """
-        assert out_dtype in [
-            "tensor",
-            "ndarray",
-        ], f'out_dtype should be "tensor" or "ndarray", but got {out_dtype}'
-
-        # change the data type of X
-        X = turn_data_into_specified_dtype(X, out_dtype)
-        X = X.to(torch.float32)
-
-        # check the shape of X here
-        X_shape = X.shape
-        assert len(X_shape) == 3, (
-            f"input should have 3 dimensions [n_samples, seq_len, n_features],"
-            f"but got X: {X_shape}"
-        )
-        if X_ori is not None:
-            X_ori = turn_data_into_specified_dtype(X_ori, out_dtype)
-            X_ori = X_ori.to(torch.float32)
-            assert (
-                X_shape == X_ori.shape
-            ), f"X and X_ori must have matched shape, but got X: f{X.shape} and X_ori: {X_ori.shape}"
-        if y is not None:
-            assert len(X) == len(y), (
-                f"lengths of X and y must match, " f"but got f{len(X)} and {len(y)}"
-            )
-            y = turn_data_into_specified_dtype(y, out_dtype)
-
-        return X, X_ori, y
-
-    @abstractmethod
-    def _fetch_data_from_array(self, idx: int) -> Iterable:
-        """Fetch data from self.X if it is given.
-
-        Parameters
-        ----------
-        idx :
-            The index of the sample to be return.
-
-        Returns
-        -------
-        sample :
-            The collated data sample, a list including all necessary sample info.
-        """
-
-        if self.X_ori is None:
-            X = self.X[idx]
-            X, missing_mask = fill_and_get_mask_torch(X)
-        else:
-            X = self.X[idx]
-            missing_mask = self.missing_mask[idx]
-
-        sample = [
-            torch.tensor(idx),
-            X,
-            missing_mask,
-        ]
-
-        if self.X_ori is not None and self.return_X_ori:
-            X_ori = self.X_ori[idx]
-            indicating_mask = self.indicating_mask[idx]
-            sample.extend([X_ori, indicating_mask])
-
-        if self.y is not None and self.return_labels:
-            sample.append(self.y[idx].to(torch.long))
-
-        return sample
-
-    def _open_file_handle(self) -> h5py.File:
-        """Open the file handle for reading data from the file.
-
-        Notes
-        -----
-        This function can also help confirm if the given file and file type match.
-
-        Returns
-        -------
-        file_handle :
-
-        """
-        data_file_path = self.data
-        try:
-            file_handler = h5py.File(
-                data_file_path,
-                "r",
-            )  # set swmr=True if the h5 file need to be written into new content during reading
-        except ImportError:
-            raise ImportError(
-                "h5py is missing and cannot be imported. Please install it first."
-            )
-        except FileNotFoundError as e:
-            raise FileNotFoundError(f"{e}")
-        except OSError as e:
-            raise TypeError(
-                f"{e}\n"
-                f"Check out the above error log. This probably is caused by file type error. "
-                f"Please confirm that the given file {data_file_path} is an h5 file."
-            )
-        except Exception as e:
-            raise RuntimeError(e)
-        return file_handler
-
-    @abstractmethod
-    def _fetch_data_from_file(self, idx: int) -> Iterable:
-        """Fetch data with the lazy-loading strategy, i.e. only loading data from the file while requesting for samples.
-        Here the opened file handle doesn't load the entire dataset into RAM but only load the currently accessed slice.
-
-        Notes
-        -----
-        Multi workers reading from h5 file is tricky, and I was confronted with a problem similar to
-        https://discuss.pytorch.org/t/dataloader-when-num-worker-0-there-is-bug/25643/7 in 2020, please
-        refer to it for more details about the problem.
-        The implementation here is referred to
-        https://discuss.pytorch.org/t/dataloader-when-num-worker-0-there-is-bug/25643/10
-        And according to https://discuss.pytorch.org/t/dataloader-when-num-worker-0-there-is-bug/25643/37,
-        pytorch v1.7.1 and h5py v3.2.0 work well, so probably updating to the latest version can avoid the
-        issue I met. After all, this implementation may need to be updated in the near future.
-
-        Parameters
-        ----------
-        idx :
-            The index of the sample to be return.
-
-        Returns
-        -------
-        sample :
-            The collated data sample, a list including all necessary sample info.
-        """
-
-        if self.file_handle is None:
-            self.file_handle = self._open_file_handle()
-
-        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
-        X, missing_mask = fill_and_get_mask_torch(X)
-        sample = [
-            torch.tensor(idx),
-            X,
-            missing_mask,
-        ]
-
-        if "X_ori" in self.file_handle.keys() and self.return_X_ori:
-            X_ori = torch.from_numpy(self.file_handle["X_ori"][idx]).to(torch.float32)
-            X_ori, X_ori_missing_mask = fill_and_get_mask_torch(X_ori)
-            indicating_mask = (X_ori_missing_mask - missing_mask).to(torch.float32)
-            sample.extend([X_ori, indicating_mask])
-
-        # if the dataset has labels and is for training, then fetch it from the file
-        if "y" in self.file_handle.keys() and self.return_labels:
-            sample.append(self.file_handle["y"][idx].to(torch.long))
-
-        return sample
-
-    def __getitem__(self, idx: int) -> Iterable:
-        """Fetch data according to index.
-
-        Parameters
-        ----------
-        idx :
-            The index to fetch the specified sample.
-
-        Returns
-        -------
-        sample :
-            The collated data sample, a list including all necessary sample info.
-        """
-
-        sample = self.fetch_data(idx)
-        return sample
diff --git a/pypots/data/checking.py b/pypots/data/checking.py
index af22958f..4f0e7767 100644
--- a/pypots/data/checking.py
+++ b/pypots/data/checking.py
@@ -11,11 +11,30 @@
 import h5py
 
 
-def check_X_ori_in_val_set(val_set: Union[str, dict]) -> bool:
-    if isinstance(val_set, str):
-        with h5py.File(val_set, "r") as f:
-            return "X_ori" in f.keys()
-    elif isinstance(val_set, dict):
-        return "X_ori" in val_set.keys()
+def key_in_data_set(key: str, dataset: Union[str, dict]) -> bool:
+    """Check if the key is in the given dataset.
+    The dataset could be a path to an HDF5 file or a Python dictionary.
+
+    Parameters
+    ----------
+    key :
+        The key to check.
+
+    dataset :
+        The dataset to be checked.
+
+    Returns
+    -------
+    bool
+        Whether the key is in the dataset.
+    """
+
+    if isinstance(dataset, str):
+        with h5py.File(dataset, "r") as f:
+            return key in f.keys()
+    elif isinstance(dataset, dict):
+        return key in dataset.keys()
     else:
-        raise TypeError("val_set must be a str or a Python dictionary.")
+        raise TypeError(
+            f"dataset must be a str or a Python dictionary, but got {type(dataset)}"
+        )
diff --git a/pypots/data/dataset/__init__.py b/pypots/data/dataset/__init__.py
new file mode 100644
index 00000000..a29c06f5
--- /dev/null
+++ b/pypots/data/dataset/__init__.py
@@ -0,0 +1,14 @@
+"""
+The package including dataset classes for PyPOTS.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+from .base import BaseDataset
+from .config import SUPPORTED_DATASET_FILE_FORMATS
+
+__all__ = [
+    "BaseDataset",
+    "SUPPORTED_DATASET_FILE_FORMATS",
+]
diff --git a/pypots/data/dataset/base.py b/pypots/data/dataset/base.py
new file mode 100644
index 00000000..9388c351
--- /dev/null
+++ b/pypots/data/dataset/base.py
@@ -0,0 +1,465 @@
+"""
+The base dataset class.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+from typing import Union, Optional, Tuple, Iterable
+
+import h5py
+import numpy as np
+import torch
+from numpy import ndarray
+from pygrinder import fill_and_get_mask_torch
+from torch import Tensor
+from torch.utils.data import Dataset
+
+from .config import SUPPORTED_DATASET_FILE_FORMATS
+from ..utils import turn_data_into_specified_dtype
+
+
+class BaseDataset(Dataset):
+    """Base dataset class for models in PyPOTS.
+
+    Parameters
+    ----------
+    data :
+        The dataset for model input, should be a dictionary or
+        a path string locating a data file that is in supported formats.
+        If it is a dict, 'X' is mandatory and 'X_ori', 'X_pred', and 'y' are optional.
+        ``X`` is time-series data for input and could contain missing values.
+        It should be array-like of shape [n_samples, n_steps (sequence length), n_features].
+        ``X_ori`` is optional. If ``X`` is constructed from ``X_ori`` with specially designed artificial missingness,
+        your model may need ``X_ori`` for evaluation or loss calculation during training (e.g. SAITS).
+        It should have the same shape as ``X``.
+        ``X_pred`` is optional, and it is the forecasting results for the model to predict in forecasting tasks.
+        ``X_pred`` should be array-like of shape [n_samples, n_steps (sequence length), n_features], and its shape
+        could different from ``X``. But remember that ``X_pred`` contains time series forecasting results of ``X``,
+        hence it has the same number of samples as ``X``, i.e. n_samples of them are the same, but their n_steps
+        and n_features could be different. ``X_pred`` could have missing values as well as ``X``.
+        ``y`` should be array-like of shape [n_samples], which is classification labels of X.
+        If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
+        key-value pairs like a dict, and it has to include keys as 'X', etc.
+
+    return_X_ori :
+        Whether to return X_ori and indicating_mask in function __getitem__() if it is given. If `True`, for example,
+        during training of models that need the original X, the Dataset class will return X_ori in __getitem__() for
+        model input. Otherwise, X_ori and indicating mask won't be included in the data list returned by __getitem__().
+
+    return_X_pred :
+        Whether to return X_pred and X_pred_missing_mask in function __getitem__() if it is given.
+        If `True`, for example, during training of forecasting models, the Dataset class will return forecasting X
+        in __getitem__() for model input. Otherwise, X_pred and its missing mask X_pred_missing_mask won't be included
+        in the data list returned by __getitem__().
+
+    return_y :
+        Whether to return y (i.e. labels) in function __getitem__() if they exist in the given data. If `True`,
+        for example, during training of classification models, the Dataset class will return labels in __getitem__()
+        for model input. Otherwise, labels won't be included in the data returned by __getitem__().
+        This parameter exists because we need the defined Dataset class for all training/validating/testing stages.
+        For those big datasets stored in h5 files, they already have both X and y saved.
+        But we don't read labels from the file for validating and testing with function _fetch_data_from_file(),
+        which works for all three stages. Therefore, we need this parameter for distinction.
+
+    file_type :
+        The type of the given file if train_set and val_set are path strings.
+
+    """
+
+    def __init__(
+        self,
+        data: Union[dict, str],
+        return_X_ori: bool,
+        return_X_pred: bool,
+        return_y: bool,
+        file_type: str = "hdf5",
+    ):
+        super().__init__()
+        # types and shapes had been checked after X and y input into the model
+        # So they are safe to use here. No need to check again.
+
+        self.data = data
+        self.return_X_ori = return_X_ori
+        self.return_X_pred = return_X_pred
+        self.return_y = return_y
+        self.file_type = file_type
+
+        # initialize the following attributes
+        self.X = None
+        self.X_ori = None
+        self.missing_mask = None
+        self.indicating_mask = None
+        self.X_pred = None
+        self.X_pred_missing_mask = None
+        self.y = None
+        self.file_handle = None
+        self.fetch_data = None
+        self.n_samples: int = 0  # num of the samples in the dataset
+        self.n_steps: int = 0  # num of the time steps in each sample
+        self.n_features: int = 0  # num of the features in each sample
+        self.n_pred_steps: int = 0  # num of the time steps in each forecasting sample
+        self.n_pred_features: int = 0  # num of the features in each forecasting sample
+
+        # check the data type and set up the fetch_data function
+        if isinstance(self.data, str):  # data from file
+            # check if the given file type is supported
+            assert (
+                file_type in SUPPORTED_DATASET_FILE_FORMATS
+            ), f"file_type should be one of {SUPPORTED_DATASET_FILE_FORMATS}, but got {file_type}"
+            self.file_type = file_type
+
+            # open the file handle
+            self.file_handle = self._open_file_handle()
+            # check if X exists in the file
+            assert (
+                "X" in self.file_handle.keys()
+            ), "The given dataset file doesn't contains X. Please double check."
+            # check whether X_ori, X_pred, and y exist in the file if they are required
+            if self.return_X_ori:
+                assert (
+                    "X_ori" in self.file_handle.keys()
+                ), "The given dataset file doesn't contains X_ori. Please double check."
+            if self.return_X_pred:
+                assert (
+                    "X_pred" in self.file_handle.keys()
+                ), "The given dataset file doesn't contains X_pred. Please double check."
+            if self.return_y:
+                assert (
+                    "y" in self.file_handle.keys()
+                ), "The given dataset file doesn't contains y. Please double check."
+
+            # set up the function fetch_data() to fetch data from file
+            self.fetch_data = self._fetch_data_from_file
+
+        else:  # data from array
+            # check if X exists in the dictionary
+            assert (
+                "X" in self.data.keys()
+            ), "The given dataset dictionary doesn't contains X. Please double check."
+            # check whether X_ori, X_pred, and y exist in the file if they are required
+            if self.return_X_ori:
+                assert (
+                    "X_ori" in self.data.keys()
+                ), "The given dataset dictionary doesn't contains X_ori. Please double check."
+            if self.return_X_pred:
+                assert (
+                    "X_pred" in self.data.keys()
+                ), "The given dataset dictionary doesn't contains X_pred. Please double check."
+            if self.return_y:
+                assert (
+                    "y" in self.data.keys()
+                ), "The given dataset dictionary doesn't contains y. Please double check."
+
+            X = data["X"]
+            X_ori = None if "X_ori" not in data.keys() else data["X_ori"]
+            X_pred = None if "X_pred" not in data.keys() else data["X_pred"]
+            y = None if "y" not in data.keys() else data["y"]
+            self.X, self.X_ori, self.X_pred, self.y = self._check_array_input(
+                X, X_ori, X_pred, y, "tensor"
+            )
+
+            if self.return_X_ori:
+                # Only when X_ori is given and fixed, we fill the missing values in X here in advance.
+                # Otherwise, we may need original X with missing values to generate X_ori, e.g. in DatasetForSAITS.
+                self.X, self.missing_mask = fill_and_get_mask_torch(self.X)
+
+                self.X_ori, X_ori_missing_mask = fill_and_get_mask_torch(self.X_ori)
+                indicating_mask = X_ori_missing_mask - self.missing_mask
+                self.indicating_mask = indicating_mask.to(torch.float32)
+
+            if self.return_X_pred:
+                self.X_pred, self.X_pred_missing_mask = fill_and_get_mask_torch(
+                    self.X_pred
+                )
+
+            # set up the function fetch_data() to fetch data from array
+            self.fetch_data = self._fetch_data_from_array
+
+        # get the sizes of the dataset
+        (
+            self.n_samples,
+            self.n_steps,
+            self.n_features,
+            self.n_pred_steps,
+            self.n_pred_features,
+        ) = self._get_data_sizes()
+
+    def _get_data_sizes(self) -> Tuple[int, ...]:
+        """Detect the data sample sizes in the dataset and return the numbers.
+
+        Returns
+        -------
+        n_samples :
+            The number of the samples in the given dataset.
+
+        n_steps :
+            The number of each sample's time steps in the given dataset.
+
+        n_features :
+            The number of each sample's features in the given dataset.
+
+        n_pred_steps :
+            The number of each sample's forecasting time steps in the given dataset.
+            Return as 0 if the dataset does not contain X_pred which includes data samples for forecasting tasks.
+
+        n_pred_features :
+            The number of each sample's forecasting features in the given dataset.
+            Return as 0 if the dataset does not contain X_pred which includes data samples for forecasting tasks.
+        """
+
+        # initialize the sizes
+        n_samples, n_steps, n_features, n_pred_steps, n_pred_features = 0, 0, 0, 0, 0
+
+        if isinstance(self.data, str):
+            if self.file_handle is None:
+                self.file_handle = self._open_file_handle()
+            n_samples = len(self.file_handle["X"])
+            first_sample = self.file_handle["X"][0]
+            n_steps = len(first_sample)
+            n_features = first_sample.shape[-1]
+
+            if self.return_X_pred:
+                first_pred_sample = self.file_handle["X_pred"][0]
+                n_pred_steps = len(first_pred_sample)
+                n_pred_features = first_pred_sample.shape[-1]
+        else:
+            n_samples = len(self.X)
+            n_steps = len(self.X[0])
+            n_features = self.X[0].shape[-1]
+
+            if self.return_X_pred:
+                n_pred_steps = len(self.X_pred[0])
+                n_pred_features = self.X_pred[0].shape[-1]
+
+        return n_samples, n_steps, n_features, n_pred_steps, n_pred_features
+
+    def __len__(self) -> int:
+        return self.n_samples
+
+    @staticmethod
+    def _check_array_input(
+        X: Union[np.ndarray, torch.Tensor],
+        X_ori: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        X_pred: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        y: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        out_dtype: str = "tensor",
+    ) -> Tuple[
+        Union[Tensor, ndarray],
+        Optional[Union[Tensor, ndarray]],
+        Optional[Union[Tensor, ndarray]],
+        Optional[Union[Tensor, ndarray]],
+    ]:
+        """Check value type and shape of input X and y
+
+        Parameters
+        ----------
+        X :
+            Time-series data that must have a shape like [n_samples, expected_n_steps, expected_n_features].
+
+        X_ori :
+            If X is with artificial missingness, X_ori is the original X without artificial missing values.
+            It must have the same shape as X. If X_ori is with original missing values, should be left as NaN.
+
+        y :
+            Labels of time-series samples (X) that must have a shape like [n_samples] or [n_samples, n_classes].
+
+        out_dtype :
+            Data type of the output, should be np.ndarray or torch.Tensor
+
+        Returns
+        -------
+        X :
+
+        X_ori :
+
+        X_pred :
+
+        y :
+
+        """
+        assert out_dtype in [
+            "tensor",
+            "ndarray",
+        ], f'out_dtype should be "tensor" or "ndarray", but got {out_dtype}'
+
+        # change the data type of X
+        X = turn_data_into_specified_dtype(X, out_dtype)
+        X = X.to(torch.float32) if out_dtype == "tensor" else X
+
+        # check the shape of X here
+        X_shape = X.shape
+        assert len(X_shape) == 3, (
+            f"input should have 3 dimensions [n_samples, seq_len, n_features],"
+            f"but got X: {X_shape}"
+        )
+        if X_ori is not None:
+            X_ori = turn_data_into_specified_dtype(X_ori, out_dtype)
+            X_ori = X_ori.to(torch.float32) if out_dtype == "tensor" else X_ori
+            assert (
+                X_shape == X_ori.shape
+            ), f"X and X_ori must have matched shape, but got X: f{X.shape} and X_ori: {X_ori.shape}"
+
+        if X_pred is not None:
+            X_pred = turn_data_into_specified_dtype(X_pred, out_dtype)
+            X_pred = X_pred.to(torch.float32) if out_dtype == "tensor" else X_pred
+            assert len(X) == len(
+                X_pred
+            ), f"X and X_pred must have the same number of samples, but got X: f{X.shape} and X_pred: {X_pred.shape}"
+
+        if y is not None:
+            assert len(X) == len(y), (
+                f"lengths of X and y must match, " f"but got f{len(X)} and {len(y)}"
+            )
+            y = turn_data_into_specified_dtype(y, out_dtype)
+            y = y.to(torch.long) if out_dtype == "tensor" else y
+
+        return X, X_ori, X_pred, y
+
+    def _fetch_data_from_array(self, idx: int) -> Iterable:
+        """Fetch data from self.X if it is given.
+
+        Parameters
+        ----------
+        idx :
+            The index of the sample to be return.
+
+        Returns
+        -------
+        sample :
+            The collated data sample, a list including all necessary sample info.
+        """
+
+        X = self.X[idx]
+
+        if self.return_X_ori:
+            # if X_ori is given, fetch missing mask from self.missing_mask that has been created in __init__()
+            missing_mask = self.missing_mask[idx]
+        else:
+            X, missing_mask = fill_and_get_mask_torch(X)
+
+        sample = [
+            torch.tensor(idx),
+            X,
+            missing_mask,
+        ]
+
+        if self.return_X_ori:
+            X_ori = self.X_ori[idx]
+            indicating_mask = self.indicating_mask[idx]
+            sample.extend([X_ori, indicating_mask])
+
+        if self.return_X_pred:
+            X_pred = self.X_pred[idx]
+            pred_missing_mask = self.X_pred[idx]
+            sample.extend([X_pred, pred_missing_mask])
+
+        if self.return_y:
+            sample.append(self.y[idx].to(torch.long))
+
+        return sample
+
+    def _open_file_handle(self) -> h5py.File:
+        """Open the file handle for reading data from the file.
+
+        Notes
+        -----
+        This function can also help confirm if the given file and file type match.
+
+        Returns
+        -------
+        file_handle :
+
+        """
+        data_file_path = self.data
+        try:
+            file_handler = h5py.File(
+                data_file_path,
+                "r",
+            )  # set swmr=True if the h5 file need to be written into new content during reading
+        except ImportError:
+            raise ImportError(
+                "h5py is missing and cannot be imported. Please install it first."
+            )
+        except FileNotFoundError as e:
+            raise FileNotFoundError(f"{e}")
+        except OSError as e:
+            raise TypeError(
+                f"{e}\n"
+                f"Check out the above error log. This probably is caused by file type error. "
+                f"Please confirm that the given file {data_file_path} is an h5 file."
+            )
+        except Exception as e:
+            raise RuntimeError(e)
+        return file_handler
+
+    def _fetch_data_from_file(self, idx: int) -> Iterable:
+        """Fetch data with the lazy-loading strategy, i.e. only loading data from the file while requesting for samples.
+        Here the opened file handle doesn't load the entire dataset into RAM but only load the currently accessed slice.
+
+        Notes
+        -----
+        Multi workers reading from h5 file is tricky, and I was confronted with a problem similar to
+        https://discuss.pytorch.org/t/dataloader-when-num-worker-0-there-is-bug/25643/7 in 2020, please
+        refer to it for more details about the problem.
+        The implementation here is referred to
+        https://discuss.pytorch.org/t/dataloader-when-num-worker-0-there-is-bug/25643/10
+        And according to https://discuss.pytorch.org/t/dataloader-when-num-worker-0-there-is-bug/25643/37,
+        pytorch v1.7.1 and h5py v3.2.0 work well, so probably updating to the latest version can avoid the
+        issue I met. After all, this implementation may need to be updated in the near future.
+
+        Parameters
+        ----------
+        idx :
+            The index of the sample to be return.
+
+        Returns
+        -------
+        sample :
+            The collated data sample, a list including all necessary sample info.
+        """
+
+        if self.file_handle is None:
+            self.file_handle = self._open_file_handle()
+
+        X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+        X, missing_mask = fill_and_get_mask_torch(X)
+        sample = [
+            torch.tensor(idx),
+            X,
+            missing_mask,
+        ]
+
+        if self.return_X_ori:
+            X_ori = torch.from_numpy(self.file_handle["X_ori"][idx]).to(torch.float32)
+            X_ori, X_ori_missing_mask = fill_and_get_mask_torch(X_ori)
+            indicating_mask = (X_ori_missing_mask - missing_mask).to(torch.float32)
+            sample.extend([X_ori, indicating_mask])
+
+        if self.return_X_pred:
+            X_pred = torch.from_numpy(self.file_handle["X_pred"][idx]).to(torch.float32)
+            X_pred, X_pred_missing_mask = fill_and_get_mask_torch(X_pred)
+            sample.extend([X_pred, X_pred_missing_mask])
+
+        # if the dataset has labels and is for training, then fetch it from the file
+        if self.return_y:
+            sample.append(self.file_handle["y"][idx].to(torch.long))
+
+        return sample
+
+    def __getitem__(self, idx: int) -> Iterable:
+        """Fetch data according to index.
+
+        Parameters
+        ----------
+        idx :
+            The index to fetch the specified sample.
+
+        Returns
+        -------
+        sample :
+            The collated data sample, a list including all necessary sample info.
+        """
+
+        sample = self.fetch_data(idx)
+        return sample
diff --git a/pypots/data/dataset/config.py b/pypots/data/dataset/config.py
new file mode 100644
index 00000000..c8ec59cb
--- /dev/null
+++ b/pypots/data/dataset/config.py
@@ -0,0 +1,11 @@
+"""
+This module contains the configuration for the dataset module.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+# Currently we only support h5 files
+SUPPORTED_DATASET_FILE_FORMATS = [
+    "hdf5",
+]
diff --git a/pypots/data/generating.py b/pypots/data/generating.py
index 4094489c..1330128d 100644
--- a/pypots/data/generating.py
+++ b/pypots/data/generating.py
@@ -272,7 +272,9 @@ def gene_random_walk(
 
     if missing_rate > 0:
         # create random missing values
+        train_X_ori = train_X
         train_X = mcar(train_X, missing_rate)
+        val_X_ori = val_X
         val_X = mcar(val_X, missing_rate)
         # test set is left to mask after normalization
 
@@ -302,21 +304,18 @@ def gene_random_walk(
     }
 
     if missing_rate > 0:
-        # mask values in the validation set as ground truth
-        val_X_ori = val_X
-        val_X = mcar(val_X, missing_rate)
-
         # mask values in the test set as ground truth
         test_X_ori = test_X
-        test_X = mcar(test_X, 0.3)
+        test_X = mcar(test_X, missing_rate)
 
+        data["train_X"] = train_X
+        data["train_X_ori"] = train_X_ori
         data["val_X"] = val_X
         data["val_X_ori"] = val_X_ori
 
         # test_X is for model input
         data["test_X"] = test_X
-        # test_X_ori is for error calc, not for model input, hence mustn't have NaNs
-        data["test_X_ori"] = np.nan_to_num(test_X_ori)
+        data["test_X_ori"] = test_X_ori
         data["test_X_indicating_mask"] = ~np.isnan(test_X_ori) ^ ~np.isnan(test_X)
 
     return data
diff --git a/pypots/data/utils.py b/pypots/data/utils.py
index e01f744c..a33e2d01 100644
--- a/pypots/data/utils.py
+++ b/pypots/data/utils.py
@@ -136,8 +136,9 @@ def parse_delta(
 
     Parameters
     ----------
-    missing_mask : shape of [n_steps, n_features] or [n_samples, n_steps, n_features]
+    missing_mask :
         Binary masks indicate missing data (0 means missing values, 1 means observed values).
+        Shape of [n_steps, n_features] or [n_samples, n_steps, n_features].
 
     Returns
     -------
diff --git a/pypots/forecasting/__init__.py b/pypots/forecasting/__init__.py
index d54032af..66efcf67 100644
--- a/pypots/forecasting/__init__.py
+++ b/pypots/forecasting/__init__.py
@@ -6,7 +6,9 @@
 # License: BSD-3-Clause
 
 from .bttf import BTTF
+from .csdi import CSDI
 
 __all__ = [
     "BTTF",
+    "CSDI",
 ]
diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py
index 2cdf641d..f1ef2e8f 100644
--- a/pypots/forecasting/base.py
+++ b/pypots/forecasting/base.py
@@ -67,7 +67,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
@@ -99,7 +99,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -107,7 +107,7 @@ def predict(
     def forecast(
         self,
         X: dict or str,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Forecast the future the input with the trained model.
 
@@ -200,7 +200,7 @@ def __init__(
         )
 
     @abstractmethod
-    def _assemble_input_for_training(self, data) -> dict:
+    def _assemble_input_for_training(self, data: list) -> dict:
         """Assemble the given data into a dictionary for training input.
 
         Parameters
@@ -216,7 +216,7 @@ def _assemble_input_for_training(self, data) -> dict:
         raise NotImplementedError
 
     @abstractmethod
-    def _assemble_input_for_validating(self, data) -> dict:
+    def _assemble_input_for_validating(self, data: list) -> dict:
         """Assemble the given data into a dictionary for validating input.
 
         Parameters
@@ -232,7 +232,7 @@ def _assemble_input_for_validating(self, data) -> dict:
         raise NotImplementedError
 
     @abstractmethod
-    def _assemble_input_for_testing(self, data) -> dict:
+    def _assemble_input_for_testing(self, data: list) -> dict:
         """Assemble the given data into a dictionary for testing input.
 
         Notes
@@ -380,7 +380,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the classifier on the given data.
 
@@ -412,7 +412,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -420,7 +420,7 @@ def predict(
     def forecast(
         self,
         X: dict or str,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Forecast the future the input with the trained model.
 
diff --git a/pypots/forecasting/bttf/model.py b/pypots/forecasting/bttf/model.py
index 6c81b995..ab530058 100644
--- a/pypots/forecasting/bttf/model.py
+++ b/pypots/forecasting/bttf/model.py
@@ -97,7 +97,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the forecaster on the given data.
 
@@ -112,7 +112,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         assert not isinstance(
             test_set, str
@@ -140,7 +140,7 @@ def predict(
     def forecast(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Forecast the future the input with the trained model.
 
diff --git a/pypots/forecasting/csdi/__init__.py b/pypots/forecasting/csdi/__init__.py
new file mode 100644
index 00000000..fbaae9a5
--- /dev/null
+++ b/pypots/forecasting/csdi/__init__.py
@@ -0,0 +1,19 @@
+"""
+The implementation of CSDI for the partially-observed time-series forecasting task.
+
+Refer to the paper
+`Yusuke Tashiro, Jiaming Song, Yang Song, and Stefano Ermon.
+CSDI: Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation.
+In NeurIPS, 2021.
+<https://proceedings.neurips.cc/paper_files/paper/2021/file/cfe8504bda37b575c70ee1a8276f3486-Paper.pdf>`_
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+from .model import CSDI
+
+__all__ = [
+    "CSDI",
+]
diff --git a/pypots/forecasting/csdi/core.py b/pypots/forecasting/csdi/core.py
new file mode 100644
index 00000000..c66c99fd
--- /dev/null
+++ b/pypots/forecasting/csdi/core.py
@@ -0,0 +1,141 @@
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+import torch
+import torch.nn as nn
+
+from ...nn.modules.csdi import BackboneCSDI
+
+
+class _CSDI(nn.Module):
+    def __init__(
+        self,
+        n_features,
+        n_pred_features,
+        n_layers,
+        n_heads,
+        n_channels,
+        d_time_embedding,
+        d_feature_embedding,
+        d_diffusion_embedding,
+        is_unconditional,
+        n_diffusion_steps,
+        schedule,
+        beta_start,
+        beta_end,
+    ):
+        super().__init__()
+
+        self.n_features = n_features
+        self.n_pred_features = n_pred_features
+        self.d_time_embedding = d_time_embedding
+        self.is_unconditional = is_unconditional
+
+        self.embed_layer = nn.Embedding(
+            num_embeddings=n_features,
+            embedding_dim=d_feature_embedding,
+        )
+        self.backbone = BackboneCSDI(
+            n_layers,
+            n_heads,
+            n_channels,
+            n_pred_features,
+            d_time_embedding,
+            d_feature_embedding,
+            d_diffusion_embedding,
+            is_unconditional,
+            n_diffusion_steps,
+            schedule,
+            beta_start,
+            beta_end,
+        )
+
+    @staticmethod
+    def time_embedding(pos, d_model=128):
+        pe = torch.zeros(pos.shape[0], pos.shape[1], d_model).to(pos.device)
+        position = pos.unsqueeze(2)
+        div_term = 1 / torch.pow(
+            10000.0, torch.arange(0, d_model, 2, device=pos.device) / d_model
+        )
+        pe[:, :, 0::2] = torch.sin(position * div_term)
+        pe[:, :, 1::2] = torch.cos(position * div_term)
+        return pe
+
+    def get_side_info(self, observed_tp, cond_mask, feature_id):
+        B, K, L = cond_mask.shape
+        device = observed_tp.device
+        time_embed = self.time_embedding(
+            observed_tp, self.d_time_embedding
+        )  # (B,L,emb)
+        time_embed = time_embed.to(device)
+        time_embed = time_embed.unsqueeze(2).expand(-1, -1, self.n_pred_features, -1)
+
+        if self.n_pred_features == self.n_features:
+            feature_embed = self.embed_layer(
+                torch.arange(self.n_pred_features).to(device)
+            )  # (K,emb)
+            feature_embed = feature_embed.unsqueeze(0).unsqueeze(0).expand(B, L, -1, -1)
+        else:
+            feature_embed = (
+                self.embed_layer(feature_id).unsqueeze(1).expand(-1, L, -1, -1)
+            )
+
+        side_info = torch.cat(
+            [time_embed, feature_embed], dim=-1
+        )  # (B,L,K,emb+d_feature_embedding)
+        side_info = side_info.permute(0, 3, 2, 1)  # (B,*,K,L)
+
+        if not self.is_unconditional:
+            side_mask = cond_mask.unsqueeze(1)  # (B,1,K,L)
+            side_info = torch.cat([side_info, side_mask], dim=1)
+
+        return side_info
+
+    def forward(self, inputs, training=True, n_sampling_times=1):
+        results = {}
+        if training:  # for training
+            (observed_data, indicating_mask, cond_mask, observed_tp, feature_id) = (
+                inputs["X_ori"],
+                inputs["indicating_mask"],
+                inputs["cond_mask"],
+                inputs["observed_tp"],
+                inputs["feature_id"],
+            )
+            side_info = self.get_side_info(observed_tp, cond_mask, feature_id)
+            training_loss = self.backbone.calc_loss(
+                observed_data, cond_mask, indicating_mask, side_info, training
+            )
+            results["loss"] = training_loss
+        elif not training and n_sampling_times == 0:  # for validating
+            (observed_data, indicating_mask, cond_mask, observed_tp, feature_id) = (
+                inputs["X_ori"],
+                inputs["indicating_mask"],
+                inputs["cond_mask"],
+                inputs["observed_tp"],
+                inputs["feature_id"],
+            )
+            side_info = self.get_side_info(observed_tp, cond_mask, feature_id)
+            validating_loss = self.backbone.calc_loss_valid(
+                observed_data, cond_mask, indicating_mask, side_info, training
+            )
+            results["loss"] = validating_loss
+        elif not training and n_sampling_times > 0:  # for testing
+            observed_data, cond_mask, observed_tp, feature_id = (
+                inputs["X"],
+                inputs["cond_mask"],
+                inputs["observed_tp"],
+                inputs["feature_id"],
+            )
+            side_info = self.get_side_info(observed_tp, cond_mask, feature_id)
+            samples = self.backbone(
+                observed_data, cond_mask, side_info, n_sampling_times
+            )  # (n_samples, n_sampling_times, n_features, n_steps)
+            repeated_obs = observed_data.unsqueeze(1).repeat(1, n_sampling_times, 1, 1)
+            repeated_mask = cond_mask.unsqueeze(1).repeat(1, n_sampling_times, 1, 1)
+            forecasting = repeated_obs + samples * (1 - repeated_mask)
+
+            results["forecasting_data"] = forecasting.permute(
+                0, 1, 3, 2
+            )  # (n_samples, n_sampling_times, n_steps, n_features)
+
+        return results
diff --git a/pypots/forecasting/csdi/data.py b/pypots/forecasting/csdi/data.py
new file mode 100644
index 00000000..d39bfb92
--- /dev/null
+++ b/pypots/forecasting/csdi/data.py
@@ -0,0 +1,365 @@
+"""
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+from typing import Union, Iterable
+
+import numpy as np
+import torch
+from pygrinder import fill_and_get_mask_torch
+
+from ...data.dataset import BaseDataset
+
+
+class DatasetForForecastingCSDI(BaseDataset):
+    """Dataset for CSDI forecasting model."""
+
+    def __init__(
+        self,
+        data: Union[dict, str],
+        file_type: str = "hdf5",
+    ):
+        super().__init__(
+            data=data,
+            return_X_ori=False,
+            return_X_pred=True,
+            return_y=False,
+            file_type=file_type,
+        )
+
+    def sample_features(self, observed_data, observed_mask, feature_id, gt_mask):
+        ind = np.arange(self.n_pred_features)
+        np.random.shuffle(ind)
+
+        extracted_data = observed_data[:, ind[: self.n_features]]
+        extracted_mask = observed_mask[:, ind[: self.n_features]]
+        extracted_feature_id = feature_id[ind[: self.n_features]]
+        extracted_gt_mask = gt_mask[:, ind[: self.n_features]]
+
+        return extracted_data, extracted_mask, extracted_feature_id, extracted_gt_mask
+
+    def _fetch_data_from_array(self, idx: int) -> Iterable:
+        """Fetch data according to index.
+
+        Parameters
+        ----------
+        idx :
+            The index to fetch the specified sample.
+
+        Returns
+        -------
+        sample :
+            A list contains
+
+            index : int tensor,
+                The index of the sample.
+
+            observed_data : tensor,
+                Time-series data with all observed values for model input.
+
+            indicating_mask : tensor,
+                The mask records all artificially missing values to the model.
+
+            cond_mask : tensor,
+                The mask records all originally and artificially missing values to the model.
+
+            observed_tp : tensor,
+                The time points (timestamp) of the observed data.
+
+        """
+
+        feature_id = torch.arange(self.n_pred_features)
+        observed_data = self.X[idx]
+        observed_data, observed_mask = fill_and_get_mask_torch(observed_data)
+
+        # apply specifically given mask or the hist masking strategy, rather than the random masking strategy
+        if "for_pattern_mask" in self.data.keys():
+            for_pattern_mask = torch.from_numpy(self.data["for_pattern_mask"][idx]).to(
+                torch.float32
+            )
+        else:
+            previous_sample = self.X[idx - 1]
+            for_pattern_mask = (~torch.isnan(previous_sample)).to(torch.float32)
+        cond_mask = observed_mask * for_pattern_mask
+
+        indicating_mask = observed_mask - cond_mask
+
+        if self.n_pred_features > self.n_features:
+            (
+                observed_data,
+                observed_mask,
+                feature_id,
+                cond_mask,
+            ) = self.sample_features(
+                observed_data, observed_mask, feature_id, cond_mask
+            )
+
+        X_pred = self.X_pred[idx]
+        X_pred_missing_mask = self.X_pred_missing_mask[idx]
+
+        observed_data = torch.concat([observed_data, X_pred], dim=0)
+        indicating_mask = torch.concat([indicating_mask, X_pred_missing_mask], dim=0)
+        cond_mask = torch.concat([cond_mask, torch.zeros(X_pred.shape)], dim=0)
+        observed_tp = torch.arange(
+            0, self.n_steps + self.n_pred_steps, dtype=torch.float32
+        )
+
+        sample = [
+            torch.tensor(idx),
+            observed_data,
+            indicating_mask,
+            cond_mask,
+            observed_tp,
+            feature_id,
+        ]
+
+        if self.return_y:
+            sample.append(self.y[idx].to(torch.long))
+
+        return sample
+
+    def _fetch_data_from_file(self, idx: int) -> Iterable:
+        """Fetch data with the lazy-loading strategy, i.e. only loading data from the file while requesting for samples.
+        Here the opened file handle doesn't load the entire dataset into RAM but only load the currently accessed slice.
+
+        Parameters
+        ----------
+        idx :
+            The index of the sample to be return.
+
+        Returns
+        -------
+        sample :
+            A list contains
+
+            index : int tensor,
+                The index of the sample.
+
+            observed_data : tensor,
+                Time-series data with all observed values for model input.
+
+            indicating_mask : tensor,
+                The mask records all artificially missing values to the model.
+
+            cond_mask : tensor,
+                The mask records all originally and artificially missing values to the model.
+
+            observed_tp : tensor,
+                The time points (timestamp) of the observed data.
+
+        """
+
+        if self.file_handle is None:
+            self.file_handle = self._open_file_handle()
+
+        feature_id = torch.arange(self.n_pred_features)
+        observed_data = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+        observed_data, observed_mask = fill_and_get_mask_torch(observed_data)
+
+        # apply specifically given mask or the hist masking strategy, rather than the random masking strategy
+        if "for_pattern_mask" in self.file_handle.keys():
+            for_pattern_mask = torch.from_numpy(
+                self.file_handle["for_pattern_mask"][idx]
+            ).to(torch.float32)
+        else:
+            previous_sample = torch.from_numpy(self.file_handle["X"][idx - 1]).to(
+                torch.float32
+            )
+            for_pattern_mask = (~torch.isnan(previous_sample)).to(torch.float32)
+        cond_mask = observed_mask * for_pattern_mask
+
+        indicating_mask = observed_mask - cond_mask
+
+        if self.n_pred_features > self.n_features:
+            (
+                observed_data,
+                observed_mask,
+                feature_id,
+                cond_mask,
+            ) = self.sample_features(
+                observed_data, observed_mask, feature_id, cond_mask
+            )
+
+        X_pred = torch.from_numpy(self.file_handle["X_pred"][idx]).to(torch.float32)
+        X_pred, X_pred_missing_mask = fill_and_get_mask_torch(X_pred)
+
+        observed_data = torch.concat([observed_data, X_pred], dim=0)
+        indicating_mask = torch.concat([indicating_mask, X_pred_missing_mask], dim=0)
+        cond_mask = torch.concat([cond_mask, torch.zeros(X_pred.shape)], dim=0)
+        observed_tp = torch.arange(
+            0, self.n_steps + self.n_pred_steps, dtype=torch.float32
+        )
+
+        sample = [
+            torch.tensor(idx),
+            observed_data,
+            indicating_mask,
+            cond_mask,
+            observed_tp,
+            feature_id,
+        ]
+
+        if self.return_y:
+            sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
+
+        return sample
+
+
+class TestDatasetForForecastingCSDI(DatasetForForecastingCSDI):
+    """Test dataset for CSDI forecasting model."""
+
+    def __init__(
+        self,
+        data: Union[dict, str],
+        n_pred_steps: int,
+        n_pred_features: int,
+        file_type: str = "hdf5",
+    ):
+        super().__init__(
+            data=data,
+            file_type=file_type,
+        )
+        self.n_pred_steps = n_pred_steps
+        self.n_pred_features = n_pred_features
+
+    def _fetch_data_from_array(self, idx: int) -> Iterable:
+        """Fetch data according to index.
+
+        Parameters
+        ----------
+        idx :
+            The index to fetch the specified sample.
+
+        Returns
+        -------
+        sample :
+            A list contains
+
+            index : int tensor,
+                The index of the sample.
+
+            observed_data : tensor,
+                Time-series data with all observed values for model input.
+
+            cond_mask : tensor,
+                The mask records missing values to the model.
+
+            observed_tp : tensor,
+                The time points (timestamp) of the observed data.
+        """
+
+        feature_id = torch.arange(self.n_pred_features)
+        observed_data = self.X[idx]
+        observed_data, observed_mask = fill_and_get_mask_torch(observed_data)
+        cond_mask = observed_mask
+
+        if self.n_pred_features > self.n_features:
+            (
+                observed_data,
+                observed_mask,
+                feature_id,
+                cond_mask,
+            ) = self.sample_features(
+                observed_data, observed_mask, feature_id, cond_mask
+            )
+
+        observed_data = torch.concat(
+            [observed_data, torch.zeros([self.n_pred_steps, self.n_pred_features])],
+            dim=0,
+        )
+
+        cond_mask = torch.concat(
+            [cond_mask, torch.zeros([self.n_pred_steps, self.n_pred_features])], dim=0
+        )
+        observed_tp = torch.arange(
+            0, self.n_steps + self.n_pred_steps, dtype=torch.float32
+        )
+
+        sample = [
+            torch.tensor(idx),
+            observed_data,
+            cond_mask,
+            observed_tp,
+            feature_id,
+        ]
+
+        if self.return_y:
+            sample.append(self.y[idx].to(torch.long))
+
+        return sample
+
+    def _fetch_data_from_file(self, idx: int) -> Iterable:
+        """Fetch data with the lazy-loading strategy, i.e. only loading data from the file while requesting for samples.
+        Here the opened file handle doesn't load the entire dataset into RAM but only load the currently accessed slice.
+
+        Parameters
+        ----------
+        idx :
+            The index of the sample to be return.
+
+        Returns
+        -------
+        sample :
+            A list contains
+
+            index : int tensor,
+                The index of the sample.
+
+            observed_data : tensor,
+                Time-series data with all observed values for model input.
+
+            cond_mask : tensor,
+                The mask records missing values to the model.
+
+            observed_tp : tensor,
+                The time points (timestamp) of the observed data.
+
+        """
+
+        if self.file_handle is None:
+            self.file_handle = self._open_file_handle()
+
+        feature_id = torch.arange(self.n_pred_features)
+        observed_data = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+        observed_data, observed_mask = fill_and_get_mask_torch(observed_data)
+        cond_mask = observed_mask
+
+        if self.n_pred_features > self.n_features:
+            (
+                observed_data,
+                observed_mask,
+                feature_id,
+                cond_mask,
+            ) = self.sample_features(
+                observed_data, observed_mask, feature_id, cond_mask
+            )
+
+        observed_data = torch.concat(
+            [observed_data, torch.zeros([self.n_pred_steps, self.n_pred_features])],
+            dim=0,
+        )
+
+        cond_mask = torch.concat(
+            [cond_mask, torch.zeros([self.n_pred_steps, self.n_pred_features])], dim=0
+        )
+        observed_tp = torch.arange(
+            0, self.n_steps + self.n_pred_steps, dtype=torch.float32
+        )
+
+        feature_id = torch.arange(self.n_pred_features)
+
+        sample = [
+            torch.tensor(idx),
+            observed_data,
+            cond_mask,
+            observed_tp,
+            feature_id,
+        ]
+
+        if self.return_y:
+            sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
+
+        return sample
diff --git a/pypots/forecasting/csdi/model.py b/pypots/forecasting/csdi/model.py
new file mode 100644
index 00000000..68f6a412
--- /dev/null
+++ b/pypots/forecasting/csdi/model.py
@@ -0,0 +1,496 @@
+"""
+The implementation of CSDI for the partially-observed time-series forecasting task.
+
+Refer to the paper "Yusuke Tashiro, Jiaming Song, Yang Song, and Stefano Ermon.
+CSDI: Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation.
+In NeurIPS, 2021."
+
+Notes
+-----
+Partial implementation uses code from the official implementation https://github.com/ermongroup/CSDI.
+
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+import os
+from typing import Union, Optional
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+
+try:
+    import nni
+except ImportError:
+    pass
+
+from .core import _CSDI
+from .data import DatasetForForecastingCSDI, TestDatasetForForecastingCSDI
+from ..base import BaseNNForecaster
+from ...data.checking import key_in_data_set
+from ...optim.adam import Adam
+from ...optim.base import Optimizer
+from ...utils.logging import logger
+
+
+class CSDI(BaseNNForecaster):
+    """The PyTorch implementation of the CSDI model :cite:`tashiro2021csdi`.
+
+    Parameters
+    ----------
+    n_steps :
+        The number of time steps in the time-series data sample.
+
+    n_features :
+        The number of features in the time-series data sample.
+
+    n_pred_steps :
+        The number of steps in the forecasting time series.
+
+    n_pred_features :
+        The number of features in the forecasting time series.
+
+    n_layers :
+        The number of layers in the CSDI model.
+
+    n_heads :
+        The number of heads in the multi-head attention mechanism.
+
+    n_channels :
+        The number of residual channels.
+
+    d_time_embedding :
+        The dimension number of the time (temporal) embedding.
+
+    d_feature_embedding :
+        The dimension number of the feature embedding.
+
+    d_diffusion_embedding :
+        The dimension number of the diffusion embedding.
+
+    is_unconditional :
+        Whether the model is unconditional or conditional.
+
+    target_strategy :
+        The strategy for selecting the target for the diffusion process. It has to be one of ["mix", "random"].
+
+    n_diffusion_steps :
+        The number of the diffusion step T in the original paper.
+
+    schedule:
+        The schedule for other noise levels. It has to be one of ["quad", "linear"].
+
+    beta_start:
+        The minimum noise level.
+
+    beta_end:
+        The maximum noise level.
+
+    batch_size :
+        The batch size for training and evaluating the model.
+
+    epochs :
+        The number of epochs for training the model.
+
+    patience :
+        The patience for the early-stopping mechanism. Given a positive integer, the training process will be
+        stopped when the model does not perform better after that number of epochs.
+        Leaving it default as None will disable the early-stopping.
+
+    optimizer :
+        The optimizer for model training.
+        If not given, will use a default Adam optimizer.
+
+    num_workers :
+        The number of subprocesses to use for data loading.
+        `0` means data loading will be in the main process, i.e. there won't be subprocesses.
+
+    device :
+        The device for the model to run on. It can be a string, a :class:`torch.device` object, or a list of them.
+        If not given, will try to use CUDA devices first (will use the default CUDA device if there are multiple),
+        then CPUs, considering CUDA and CPU are so far the main devices for people to train ML models.
+        If given a list of devices, e.g. ['cuda:0', 'cuda:1'], or [torch.device('cuda:0'), torch.device('cuda:1')] , the
+        model will be parallely trained on the multiple devices (so far only support parallel training on CUDA devices).
+        Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
+
+    saving_path :
+        The path for automatically saving model checkpoints and tensorboard files (i.e. loss values recorded during
+        training into a tensorboard file). Will not save if not given.
+
+    model_saving_strategy :
+        The strategy to save model checkpoints. It has to be one of [None, "best", "better", "all"].
+        No model will be saved when it is set as None.
+        The "best" strategy will only automatically save the best model after the training finished.
+        The "better" strategy will automatically save the model during training whenever the model performs
+        better than in previous epochs.
+        The "all" strategy will save every model after each epoch training.
+
+    """
+
+    def __init__(
+        self,
+        n_steps: int,
+        n_features: int,
+        n_pred_steps: int,
+        n_pred_features: int,
+        n_layers: int,
+        n_heads: int,
+        n_channels: int,
+        d_time_embedding: int,
+        d_feature_embedding: int,
+        d_diffusion_embedding: int,
+        n_diffusion_steps: int = 50,
+        target_strategy: str = "random",
+        is_unconditional: bool = False,
+        schedule: str = "quad",
+        beta_start: float = 0.0001,
+        beta_end: float = 0.5,
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: Optional[int] = None,
+        optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
+        device: Optional[Union[str, torch.device, list]] = None,
+        saving_path: Optional[str] = None,
+        model_saving_strategy: Optional[str] = "best",
+    ):
+        super().__init__(
+            batch_size,
+            epochs,
+            patience,
+            num_workers,
+            device,
+            saving_path,
+            model_saving_strategy,
+        )
+        assert n_pred_features == n_features, (
+            f"currently n_pred_features of CSDI forecasting model should be equal to n_features, "
+            f"but got {n_pred_features} and {n_features}."
+        )
+        assert target_strategy in ["mix", "random"]
+        assert schedule in ["quad", "linear"]
+        self.n_steps = n_steps
+        self.n_features = n_features
+        self.n_pred_steps = n_pred_steps
+        self.n_pred_features = n_pred_features
+        self.target_strategy = target_strategy
+
+        # set up the model
+        self.model = _CSDI(
+            n_features,
+            n_pred_features,
+            n_layers,
+            n_heads,
+            n_channels,
+            d_time_embedding,
+            d_feature_embedding,
+            d_diffusion_embedding,
+            is_unconditional,
+            n_diffusion_steps,
+            schedule,
+            beta_start,
+            beta_end,
+        )
+        self._print_model_size()
+        self._send_model_to_given_device()
+
+        # set up the optimizer
+        self.optimizer = optimizer
+        self.optimizer.init_optimizer(self.model.parameters())
+
+    def _assemble_input_for_training(self, data: list) -> dict:
+        (
+            indices,
+            X_ori,
+            indicating_mask,
+            cond_mask,
+            observed_tp,
+            feature_id,
+        ) = self._send_data_to_given_device(data)
+
+        inputs = {
+            "X_ori": X_ori.permute(0, 2, 1),  # ori observed part for model hint
+            "indicating_mask": indicating_mask.permute(0, 2, 1),  # for loss calc
+            "cond_mask": cond_mask.permute(0, 2, 1),  # for masking X_ori
+            "observed_tp": observed_tp,
+            "feature_id": feature_id,
+        }
+        return inputs
+
+    def _assemble_input_for_validating(self, data: list) -> dict:
+        return self._assemble_input_for_training(data)
+
+    def _assemble_input_for_testing(self, data: list) -> dict:
+        (
+            indices,
+            X,
+            cond_mask,
+            observed_tp,
+            feature_id,
+        ) = self._send_data_to_given_device(data)
+
+        inputs = {
+            "X": X.permute(0, 2, 1),  # for model input
+            "cond_mask": cond_mask.permute(0, 2, 1),  # missing mask
+            "observed_tp": observed_tp,
+            "feature_id": feature_id,
+        }
+        return inputs
+
+    def _train_model(
+        self,
+        training_loader: DataLoader,
+        val_loader: DataLoader = None,
+    ) -> None:
+        # each training starts from the very beginning, so reset the loss and model dict here
+        self.best_loss = float("inf")
+        self.best_model_dict = None
+
+        try:
+            training_step = 0
+            for epoch in range(1, self.epochs + 1):
+                self.model.train()
+                epoch_train_loss_collector = []
+                for idx, data in enumerate(training_loader):
+                    training_step += 1
+                    inputs = self._assemble_input_for_training(data)
+                    self.optimizer.zero_grad()
+                    results = self.model.forward(inputs)
+                    # use sum() before backward() in case of multi-gpu training
+                    results["loss"].sum().backward()
+                    self.optimizer.step()
+                    epoch_train_loss_collector.append(results["loss"].sum().item())
+
+                    # save training loss logs into the tensorboard file for every step if in need
+                    if self.summary_writer is not None:
+                        self._save_log_into_tb_file(training_step, "training", results)
+
+                # mean training loss of the current epoch
+                mean_train_loss = np.mean(epoch_train_loss_collector)
+
+                if val_loader is not None:
+                    self.model.eval()
+                    val_loss_collector = []
+                    with torch.no_grad():
+                        for idx, data in enumerate(val_loader):
+                            inputs = self._assemble_input_for_validating(data)
+                            results = self.model.forward(
+                                inputs, training=False, n_sampling_times=0
+                            )
+                            val_loss_collector.append(results["loss"].sum().item())
+
+                    mean_val_loss = np.asarray(val_loss_collector).mean()
+
+                    # save validation loss logs into the tensorboard file for every epoch if in need
+                    if self.summary_writer is not None:
+                        val_loss_dict = {
+                            "validating_loss": mean_val_loss,
+                        }
+                        self._save_log_into_tb_file(epoch, "validating", val_loss_dict)
+
+                    logger.info(
+                        f"Epoch {epoch:03d} - "
+                        f"training loss: {mean_train_loss:.4f}, "
+                        f"validation loss: {mean_val_loss:.4f}"
+                    )
+                    mean_loss = mean_val_loss
+                else:
+                    logger.info(
+                        f"Epoch {epoch:03d} - training loss: {mean_train_loss:.4f}"
+                    )
+                    mean_loss = mean_train_loss
+
+                if np.isnan(mean_loss):
+                    logger.warning(
+                        f"‼️ Attention: got NaN loss in Epoch {epoch}. This may lead to unexpected errors."
+                    )
+
+                if mean_loss < self.best_loss:
+                    self.best_epoch = epoch
+                    self.best_loss = mean_loss
+                    self.best_model_dict = self.model.state_dict()
+                    self.patience = self.original_patience
+                else:
+                    self.patience -= 1
+
+                # save the model if necessary
+                self._auto_save_model_if_necessary(
+                    confirm_saving=mean_loss < self.best_loss,
+                    saving_name=f"{self.__class__.__name__}_epoch{epoch}_loss{mean_loss}",
+                )
+
+                if os.getenv("enable_tuning", False):
+                    nni.report_intermediate_result(mean_loss)
+                    if epoch == self.epochs - 1 or self.patience == 0:
+                        nni.report_final_result(self.best_loss)
+
+                if self.patience == 0:
+                    logger.info(
+                        "Exceeded the training patience. Terminating the training procedure..."
+                    )
+                    break
+
+        except Exception as e:
+            logger.error(f"❌ Exception: {e}")
+            if self.best_model_dict is None:
+                raise RuntimeError(
+                    "Training got interrupted. Model was not trained. Please investigate the error printed above."
+                )
+            else:
+                RuntimeWarning(
+                    "Training got interrupted. Please investigate the error printed above.\n"
+                    "Model got trained and will load the best checkpoint so far for testing.\n"
+                    "If you don't want it, please try fit() again."
+                )
+
+        if np.isnan(self.best_loss):
+            raise ValueError("Something is wrong. best_loss is Nan after training.")
+
+        logger.info(
+            f"Finished training. The best model is from epoch#{self.best_epoch}."
+        )
+
+    def fit(
+        self,
+        train_set: Union[dict, str],
+        val_set: Optional[Union[dict, str]] = None,
+        file_type: str = "hdf5",
+        n_sampling_times: int = 1,
+    ) -> None:
+        # Step 1: wrap the input data with classes Dataset and DataLoader
+        training_set = DatasetForForecastingCSDI(
+            train_set,
+            file_type=file_type,
+        )
+        training_loader = DataLoader(
+            training_set,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers,
+        )
+        val_loader = None
+        if val_set is not None:
+            if not key_in_data_set("X_pred", val_set):
+                raise ValueError("val_set must contain 'X_pred' for model validation.")
+            val_set = DatasetForForecastingCSDI(
+                val_set,
+                file_type=file_type,
+            )
+            val_loader = DataLoader(
+                val_set,
+                batch_size=self.batch_size,
+                shuffle=False,
+                num_workers=self.num_workers,
+            )
+
+        # Step 2: train the model and freeze it
+        self._train_model(training_loader, val_loader)
+        self.model.load_state_dict(self.best_model_dict)
+        self.model.eval()  # set the model as eval status to freeze it.
+
+        # Step 3: save the model if necessary
+        self._auto_save_model_if_necessary(confirm_saving=True)
+
+    def predict(
+        self,
+        test_set: Union[dict, str],
+        file_type: str = "hdf5",
+        n_sampling_times: int = 1,
+    ) -> dict:
+        """
+
+        Parameters
+        ----------
+        test_set : dict or str
+            The dataset for model validating, should be a dictionary including keys as 'X' and 'y',
+            or a path string locating a data file.
+            If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
+            which is time-series data for validating, can contain missing values, and y should be array-like of shape
+            [n_samples], which is classification labels of X.
+            If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
+            key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
+
+        file_type :
+            The type of the given file if test_set is a path string.
+
+        n_sampling_times:
+            The number of sampling times for the model to sample from the diffusion process.
+
+        Returns
+        -------
+        result_dict: dict
+            Prediction results in a Python Dictionary for the given samples.
+            It should be a dictionary including a key named 'imputation'.
+
+        """
+        assert n_sampling_times > 0, "n_sampling_times should be greater than 0."
+
+        # Step 1: wrap the input data with classes Dataset and DataLoader
+        self.model.eval()  # set the model as eval status to freeze it.
+        test_set = TestDatasetForForecastingCSDI(
+            test_set,
+            self.n_pred_steps,
+            self.n_pred_features,
+            file_type=file_type,
+        )
+        test_loader = DataLoader(
+            test_set,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=self.num_workers,
+        )
+        forecasting_collector = []
+
+        # Step 2: process the data with the model
+        with torch.no_grad():
+            for idx, data in enumerate(test_loader):
+                inputs = self._assemble_input_for_testing(data)
+                results = self.model(
+                    inputs,
+                    training=False,
+                    n_sampling_times=n_sampling_times,
+                )
+                forecasting_data = results["forecasting_data"][
+                    :, :, -self.n_pred_steps :
+                ]
+                forecasting_collector.append(forecasting_data)
+
+        # Step 3: output collection and return
+        forecasting_data = torch.cat(forecasting_collector).cpu().detach().numpy()
+        result_dict = {
+            "forecasting": forecasting_data,  # [bz, n_sampling_times, n_pred_steps, n_features]
+        }
+        return result_dict
+
+    def forecast(
+        self,
+        X: Union[dict, str],
+        file_type: str = "hdf5",
+    ) -> np.ndarray:
+        """Impute missing values in the given data with the trained model.
+
+        Warnings
+        --------
+        The method impute is deprecated. Please use `predict()` instead.
+
+        Parameters
+        ----------
+        X :
+            The data samples for testing, should be array-like of shape [n_samples, sequence length (time steps),
+            n_features], or a path string locating a data file, e.g. h5 file.
+
+        file_type :
+            The type of the given file if X is a path string.
+
+        Returns
+        -------
+        array-like, shape [n_samples, sequence length (time steps), n_features],
+            Imputed data.
+        """
+        logger.warning(
+            "🚨DeprecationWarning: The method impute is deprecated. Please use `predict` instead."
+        )
+        results_dict = self.predict(X, file_type=file_type)
+        return results_dict["forecasting"]
diff --git a/pypots/forecasting/template/core.py b/pypots/forecasting/template/core.py
new file mode 100644
index 00000000..55cf3ada
--- /dev/null
+++ b/pypots/forecasting/template/core.py
@@ -0,0 +1,42 @@
+"""
+The implementation of YourNewModel for the partially-observed time-series forecasting task.
+
+Refer to the paper "Your paper citation".
+
+"""
+
+# Created by Your Name <Your contact email> TODO: modify the author information.
+# License: BSD-3-Clause
+
+import torch.nn as nn
+
+# from ...nn.modules import some_modules
+
+
+# TODO: define your new model here.
+#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
+#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
+#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
+class _YourNewModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        # TODO: define your model's components here. If modules in pypots.nn.modules can be reused in your model,
+        #  you can import them and use them here. AND if you think the modules you implemented can be reused by
+        #  other models, you can also consider to contribute them to pypots.nn.modules
+        self.embedding = nn.Module
+        self.submodule = nn.Module
+        self.backbone = nn.Module
+
+    def forward(self, inputs: dict) -> dict:
+        # TODO: define your model's forward propagation process here.
+        #  The input is a dict, and the output `results` should also be a dict.
+        output = self.backbone()  # replace this with your model's  process
+
+        # TODO: `results` must contains the key `loss` which is will be used for
+        #  backward propagation to update the model.
+        loss = None
+        results = {
+            "loss": loss,
+        }
+        return results
diff --git a/pypots/forecasting/template/data.py b/pypots/forecasting/template/data.py
index c391740e..3c4ca97e 100644
--- a/pypots/forecasting/template/data.py
+++ b/pypots/forecasting/template/data.py
@@ -1,7 +1,7 @@
 """
 Dataset class for YourNewModel.
 
-TODO: modify the above description with your model's information.
+TODO: modify the above description for your model's dataset class.
 
 """
 
@@ -10,17 +10,26 @@
 
 from typing import Union, Iterable
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
+# TODO: define your new dataset class here. Remove or add arguments as needed.
 class DatasetForYourNewModel(BaseDataset):
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_X_ori: bool,
+        return_X_pred: bool,
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=return_X_pred,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         raise NotImplementedError
diff --git a/pypots/forecasting/template/model.py b/pypots/forecasting/template/model.py
index 099c617b..890c3fde 100644
--- a/pypots/forecasting/template/model.py
+++ b/pypots/forecasting/template/model.py
@@ -3,6 +3,8 @@
 
 Refer to the paper "Your paper citation".
 
+TODO: modify the above description with your model's information.
+
 """
 
 # Created by Your Name <Your contact email> TODO: modify the author information.
@@ -10,40 +12,19 @@
 
 from typing import Union, Optional
 
-import numpy as np
 import torch
-import torch.nn as nn
+
+from .core import _YourNewModel
 
 # TODO: import the base class from the forecasting package in PyPOTS.
 #  Here I suppose this is a neural-network forecasting model.
 #  You should make your model inherent BaseForecaster if it is not a NN.
 # from ..base import BaseForecaster
 from ..base import BaseNNForecaster
-
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 
 
-# TODO: define your new model here.
-#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
-#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
-#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
-class _YourNewModel(nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, inputs: dict) -> dict:
-        # TODO: define your model's forward propagation process here.
-        #  The input is a dict, and the output `results` should also be a dict.
-        #  `results` must contains the key `loss` which is will be used for backward propagation to update the model.
-
-        loss = None
-        results = {
-            "loss": loss,
-        }
-        return results
-
-
 # TODO: define your new model's wrapper here.
 #  It should be a subclass of a base class defined in PyPOTS task packages (e.g.
 #  BaseNNForecaster of PyPOTS forecasting task package), and it has to implement all abstract methods of the base class.
@@ -52,13 +33,13 @@ class YourNewModel(BaseNNForecaster):
     def __init__(
         self,
         # TODO: add your model's hyper-parameters here
-        batch_size: int,
-        epochs: int,
-        patience: int,
-        num_workers: int = 0,
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: Optional[int] = None,
         optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
         device: Optional[Union[str, torch.device, list]] = None,
-        saving_path: str = None,
+        saving_path: Optional[str] = None,
         model_saving_strategy: Optional[str] = "best",
     ):
         super().__init__(
@@ -74,9 +55,11 @@ def __init__(
         # TODO: set up your model's hyper-parameters here
 
         # set up the model
-        self.model = _YourNewModel()
-        self.model = self.model.to(self.device)
+        self.model = _YourNewModel(
+            # pass the arguments to your model
+        )
         self._print_model_size()
+        self._send_model_to_given_device()
 
         # set up the optimizer
         self.optimizer = optimizer
@@ -95,13 +78,13 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         raise NotImplementedError
 
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
diff --git a/pypots/forecasting/template/module.py b/pypots/forecasting/template/module.py
deleted file mode 100644
index fa20e4cd..00000000
--- a/pypots/forecasting/template/module.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-The implementation of the modules for YourNewModel.
-
-Refer to the paper "Your paper citation".
-
-"""
-
-# Created by Your Name <Your contact email> TODO: modify the author information.
-# License: BSD-3-Clause
-
-
-# TODO: this file is not necessary. If your new model has customized layers or modules, please put them here.
-#  Otherwise, please delete this modules.py file, don't commit it to the repository.
diff --git a/pypots/imputation/autoformer/data.py b/pypots/imputation/autoformer/data.py
index 1b9a8d0e..15eef9b3 100644
--- a/pypots/imputation/autoformer/data.py
+++ b/pypots/imputation/autoformer/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/autoformer/model.py b/pypots/imputation/autoformer/model.py
index bce312e0..edafd0a5 100644
--- a/pypots/imputation/autoformer/model.py
+++ b/pypots/imputation/autoformer/model.py
@@ -20,11 +20,11 @@
 import torch
 from torch.utils.data import DataLoader
 
+from .core import _Autoformer
 from .data import DatasetForAutoformer
-from pypots.imputation.autoformer.core import _Autoformer
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -211,11 +211,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForAutoformer(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -225,10 +225,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForAutoformer(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -248,7 +248,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -263,19 +263,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -302,7 +306,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/base.py b/pypots/imputation/base.py
index 284d1af2..18218c3e 100644
--- a/pypots/imputation/base.py
+++ b/pypots/imputation/base.py
@@ -68,7 +68,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the imputer on the given data.
 
@@ -90,7 +90,7 @@ def fit(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include the key 'X'.
 
-        file_type : str, default = "h5py",
+        file_type :
             The type of the given file if train_set and val_set are path strings.
 
         """
@@ -100,7 +100,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -108,7 +108,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
@@ -207,7 +207,7 @@ def _assemble_input_for_training(self, data: list) -> dict:
 
         Parameters
         ----------
-        data : list,
+        data :
             Input data from dataloader, should be list.
 
         Returns
@@ -223,7 +223,7 @@ def _assemble_input_for_validating(self, data: list) -> dict:
 
         Parameters
         ----------
-        data : list,
+        data :
             Data output from dataloader, should be list.
 
         Returns
@@ -247,7 +247,7 @@ def _assemble_input_for_testing(self, data: list) -> dict:
 
         Parameters
         ----------
-        data : list,
+        data :
             Data output from dataloader, should be list.
 
         Returns
@@ -383,7 +383,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the imputer on the given data.
 
@@ -405,7 +405,7 @@ def fit(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include the key 'X'.
 
-        file_type : str, default = "h5py",
+        file_type :
             The type of the given file if train_set and val_set are path strings.
 
         """
@@ -415,7 +415,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
 
@@ -423,7 +423,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/brits/data.py b/pypots/imputation/brits/data.py
index 3144d8c1..589a5d5f 100644
--- a/pypots/imputation/brits/data.py
+++ b/pypots/imputation/brits/data.py
@@ -10,7 +10,7 @@
 import torch
 from pygrinder import fill_and_get_mask_torch
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 from ...data.utils import _parse_delta_torch
 
 
@@ -19,7 +19,7 @@ class DatasetForBRITS(BaseDataset):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -28,7 +28,7 @@ class DatasetForBRITS(BaseDataset):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -37,7 +37,7 @@ class DatasetForBRITS(BaseDataset):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
@@ -45,18 +45,24 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
         if not isinstance(self.data, str):
             # calculate all delta here.
-            if self.X_ori is None:
-                forward_X, forward_missing_mask = fill_and_get_mask_torch(self.X)
-            else:
+            if self.return_X_ori:
                 forward_missing_mask = self.missing_mask
                 forward_X = self.X
+            else:
+                forward_X, forward_missing_mask = fill_and_get_mask_torch(self.X)
 
             forward_delta = _parse_delta_torch(forward_missing_mask)
             backward_X = torch.flip(forward_X, dims=[1])
@@ -81,12 +87,12 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -116,10 +122,10 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             self.processed_data["backward"]["delta"][idx],
         ]
 
-        if self.X_ori is not None and self.return_X_ori:
+        if self.return_X_ori:
             sample.extend([self.X_ori[idx], self.indicating_mask[idx]])
 
-        if self.y is not None and self.return_labels:
+        if self.return_y:
             sample.append(self.y[idx].to(torch.long))
 
         return sample
@@ -130,12 +136,12 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             The collated data sample, a list including all necessary sample info.
         """
 
@@ -169,14 +175,14 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
             backward["deltas"],
         ]
 
-        if "X_ori" in self.file_handle.keys() and self.return_X_ori:
+        if self.return_X_ori:
             X_ori = torch.from_numpy(self.file_handle["X_ori"][idx]).to(torch.float32)
             X_ori, X_ori_missing_mask = fill_and_get_mask_torch(X_ori)
             indicating_mask = X_ori_missing_mask - missing_mask
             sample.extend([X_ori, indicating_mask])
 
         # if the dataset has labels and is for training, then fetch it from the file
-        if "y" in self.file_handle.keys() and self.return_labels:
+        if self.return_y:
             sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 
         return sample
diff --git a/pypots/imputation/brits/model.py b/pypots/imputation/brits/model.py
index b7dc6edd..68d71355 100644
--- a/pypots/imputation/brits/model.py
+++ b/pypots/imputation/brits/model.py
@@ -24,7 +24,7 @@
 from .core import _BRITS
 from .data import DatasetForBRITS
 from ..base import BaseNNImputer
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -194,11 +194,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForBRITS(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -208,10 +208,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForBRITS(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -231,11 +231,11 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = DatasetForBRITS(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         test_loader = DataLoader(
             test_set,
@@ -261,7 +261,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/crossformer/data.py b/pypots/imputation/crossformer/data.py
index 056486f8..6bbc771d 100644
--- a/pypots/imputation/crossformer/data.py
+++ b/pypots/imputation/crossformer/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/crossformer/model.py b/pypots/imputation/crossformer/model.py
index e79a957d..a2076d95 100644
--- a/pypots/imputation/crossformer/model.py
+++ b/pypots/imputation/crossformer/model.py
@@ -21,11 +21,11 @@
 import torch
 from torch.utils.data import DataLoader
 
+from .core import _Crossformer
 from .data import DatasetForCrossformer
-from pypots.imputation.crossformer.core import _Crossformer
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -218,11 +218,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForCrossformer(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -232,10 +232,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForCrossformer(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -255,7 +255,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -270,19 +270,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -309,7 +313,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/csdi/core.py b/pypots/imputation/csdi/core.py
index cfa975dc..fd3cdd84 100644
--- a/pypots/imputation/csdi/core.py
+++ b/pypots/imputation/csdi/core.py
@@ -1,6 +1,7 @@
 # Created by Wenjie Du <wenjay.du@gmail.com>
 # License: BSD-3-Clause
 
+import torch
 import torch.nn as nn
 
 from ...nn.modules.csdi import BackboneCSDI
@@ -9,10 +10,10 @@
 class _CSDI(nn.Module):
     def __init__(
         self,
+        n_features,
         n_layers,
         n_heads,
         n_channels,
-        d_target,
         d_time_embedding,
         d_feature_embedding,
         d_diffusion_embedding,
@@ -24,11 +25,19 @@ def __init__(
     ):
         super().__init__()
 
+        self.n_features = n_features
+        self.d_time_embedding = d_time_embedding
+        self.is_unconditional = is_unconditional
+
+        self.embed_layer = nn.Embedding(
+            num_embeddings=n_features,
+            embedding_dim=d_feature_embedding,
+        )
         self.backbone = BackboneCSDI(
             n_layers,
             n_heads,
             n_channels,
-            d_target,
+            n_features,
             d_time_embedding,
             d_feature_embedding,
             d_diffusion_embedding,
@@ -39,6 +48,41 @@ def __init__(
             beta_end,
         )
 
+    @staticmethod
+    def time_embedding(pos, d_model=128):
+        pe = torch.zeros(pos.shape[0], pos.shape[1], d_model).to(pos.device)
+        position = pos.unsqueeze(2)
+        div_term = 1 / torch.pow(
+            10000.0, torch.arange(0, d_model, 2, device=pos.device) / d_model
+        )
+        pe[:, :, 0::2] = torch.sin(position * div_term)
+        pe[:, :, 1::2] = torch.cos(position * div_term)
+        return pe
+
+    def get_side_info(self, observed_tp, cond_mask):
+        B, K, L = cond_mask.shape
+        device = observed_tp.device
+        time_embed = self.time_embedding(
+            observed_tp, self.d_time_embedding
+        )  # (B,L,emb)
+        time_embed = time_embed.to(device)
+        time_embed = time_embed.unsqueeze(2).expand(-1, -1, K, -1)
+        feature_embed = self.embed_layer(
+            torch.arange(self.n_features).to(device)
+        )  # (K,emb)
+        feature_embed = feature_embed.unsqueeze(0).unsqueeze(0).expand(B, L, -1, -1)
+
+        side_info = torch.cat(
+            [time_embed, feature_embed], dim=-1
+        )  # (B,L,K,emb+d_feature_embedding)
+        side_info = side_info.permute(0, 3, 2, 1)  # (B,*,K,L)
+
+        if not self.is_unconditional:
+            side_mask = cond_mask.unsqueeze(1)  # (B,1,K,L)
+            side_info = torch.cat([side_info, side_mask], dim=1)
+
+        return side_info
+
     def forward(self, inputs, training=True, n_sampling_times=1):
         results = {}
         if training:  # for training
@@ -48,7 +92,7 @@ def forward(self, inputs, training=True, n_sampling_times=1):
                 inputs["cond_mask"],
                 inputs["observed_tp"],
             )
-            side_info = self.backbone.get_side_info(observed_tp, cond_mask)
+            side_info = self.get_side_info(observed_tp, cond_mask)
             training_loss = self.backbone.calc_loss(
                 observed_data, cond_mask, indicating_mask, side_info, training
             )
@@ -60,7 +104,7 @@ def forward(self, inputs, training=True, n_sampling_times=1):
                 inputs["cond_mask"],
                 inputs["observed_tp"],
             )
-            side_info = self.backbone.get_side_info(observed_tp, cond_mask)
+            side_info = self.get_side_info(observed_tp, cond_mask)
             validating_loss = self.backbone.calc_loss_valid(
                 observed_data, cond_mask, indicating_mask, side_info, training
             )
@@ -71,7 +115,7 @@ def forward(self, inputs, training=True, n_sampling_times=1):
                 inputs["cond_mask"],
                 inputs["observed_tp"],
             )
-            side_info = self.backbone.get_side_info(observed_tp, cond_mask)
+            side_info = self.get_side_info(observed_tp, cond_mask)
             samples = self.backbone(
                 observed_data, cond_mask, side_info, n_sampling_times
             )  # (n_samples, n_sampling_times, n_features, n_steps)
diff --git a/pypots/imputation/csdi/data.py b/pypots/imputation/csdi/data.py
index 8617e6bc..03d07923 100644
--- a/pypots/imputation/csdi/data.py
+++ b/pypots/imputation/csdi/data.py
@@ -11,21 +11,36 @@
 import torch
 from pygrinder import fill_and_get_mask_torch
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
 class DatasetForCSDI(BaseDataset):
-    """Dataset for CSDI model."""
+    """Dataset for CSDI model.
+
+    Notes
+    -----
+    In CSDI official code, `observed_mask` indicates all observed values in raw data.
+    `gt_mask` indicates all observed values in the input data.
+    `observed_mask` - `gt_mask` = `indicating_mask` in our code.
+    `cond_mask`, for testing, it is `gt_mask`; for training, it is `observed_mask`
+    includes some artificially missing values.
+
+    """
 
     def __init__(
         self,
         data: Union[dict, str],
         target_strategy: str,
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
+        )
         assert target_strategy in ["random", "hist", "mix"]
         self.target_strategy = target_strategy
 
@@ -55,12 +70,12 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index to fetch the specified sample.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -80,7 +95,7 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
 
         """
 
-        if self.X_ori is not None and self.return_X_ori:
+        if self.return_X_ori:
             observed_data = self.X_ori[idx]
             cond_mask = self.missing_mask[idx]
             indicating_mask = self.indicating_mask[idx]
@@ -117,7 +132,7 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             observed_tp,
         ]
 
-        if self.y is not None and self.return_labels:
+        if self.return_y:
             sample.append(self.y[idx].to(torch.long))
 
         return sample
@@ -128,12 +143,12 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -156,7 +171,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        if "X_ori" in self.file_handle.keys() and self.return_X_ori:
+        if self.return_X_ori:
             observed_data = torch.from_numpy(self.file_handle["X_ori"][idx]).to(
                 torch.float32
             )
@@ -203,7 +218,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
             observed_tp,
         ]
 
-        if "y" in self.file_handle.keys() and self.return_labels:
+        if self.return_y:
             sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 
         return sample
@@ -216,22 +231,21 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, "random", return_X_ori, return_labels, file_type)
+        super().__init__(data, "random", return_X_ori, file_type)
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         """Fetch data according to index.
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index to fetch the specified sample.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -264,7 +278,7 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             observed_tp,
         ]
 
-        if self.y is not None and self.return_labels:
+        if self.return_y:
             sample.append(self.y[idx].to(torch.long))
 
         return sample
@@ -275,12 +289,12 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -319,7 +333,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
             observed_tp,
         ]
 
-        if "y" in self.file_handle.keys() and self.return_labels:
+        if self.return_y:
             sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 
         return sample
diff --git a/pypots/imputation/csdi/model.py b/pypots/imputation/csdi/model.py
index e43f6db4..bcfdc29d 100644
--- a/pypots/imputation/csdi/model.py
+++ b/pypots/imputation/csdi/model.py
@@ -29,7 +29,7 @@
 from .core import _CSDI
 from .data import DatasetForCSDI, TestDatasetForCSDI
 from ..base import BaseNNImputer
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -40,6 +40,9 @@ class CSDI(BaseNNImputer):
 
     Parameters
     ----------
+    n_steps :
+        The number of time steps in the time-series data sample.
+
     n_features :
         The number of features in the time-series data sample.
 
@@ -122,6 +125,7 @@ class CSDI(BaseNNImputer):
 
     def __init__(
         self,
+        n_steps: int,
         n_features: int,
         n_layers: int,
         n_heads: int,
@@ -155,14 +159,15 @@ def __init__(
         )
         assert target_strategy in ["mix", "random"]
         assert schedule in ["quad", "linear"]
+        self.n_steps = n_steps
         self.target_strategy = target_strategy
 
         # set up the model
         self.model = _CSDI(
+            n_features,
             n_layers,
             n_heads,
             n_channels,
-            n_features,
             d_time_embedding,
             d_feature_embedding,
             d_diffusion_embedding,
@@ -196,10 +201,10 @@ def _assemble_input_for_training(self, data: list) -> dict:
         }
         return inputs
 
-    def _assemble_input_for_validating(self, data) -> dict:
+    def _assemble_input_for_validating(self, data: list) -> dict:
         return self._assemble_input_for_training(data)
 
-    def _assemble_input_for_testing(self, data) -> dict:
+    def _assemble_input_for_testing(self, data: list) -> dict:
         (
             indices,
             X,
@@ -331,7 +336,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
         n_sampling_times: int = 1,
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
@@ -339,7 +344,6 @@ def fit(
             train_set,
             self.target_strategy,
             return_X_ori=False,
-            return_labels=False,
             file_type=file_type,
         )
         training_loader = DataLoader(
@@ -350,13 +354,12 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForCSDI(
                 val_set,
                 self.target_strategy,
                 return_X_ori=True,
-                return_labels=False,
                 file_type=file_type,
             )
             val_loader = DataLoader(
@@ -377,7 +380,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
         n_sampling_times: int = 1,
     ) -> dict:
         """
@@ -393,7 +396,7 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         n_sampling_times:
@@ -410,9 +413,7 @@ def predict(
 
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
-        test_set = TestDatasetForCSDI(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
-        )
+        test_set = TestDatasetForCSDI(test_set, return_X_ori=False, file_type=file_type)
         test_loader = DataLoader(
             test_set,
             batch_size=self.batch_size,
@@ -443,7 +444,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/dlinear/data.py b/pypots/imputation/dlinear/data.py
index 1884054f..b47cb439 100644
--- a/pypots/imputation/dlinear/data.py
+++ b/pypots/imputation/dlinear/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/dlinear/model.py b/pypots/imputation/dlinear/model.py
index 3dc95445..af7ba286 100644
--- a/pypots/imputation/dlinear/model.py
+++ b/pypots/imputation/dlinear/model.py
@@ -21,11 +21,11 @@
 import torch
 from torch.utils.data import DataLoader
 
+from .core import _DLinear
 from .data import DatasetForDLinear
-from pypots.imputation.dlinear.core import _DLinear
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -189,11 +189,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForDLinear(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -203,10 +203,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForDLinear(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -226,7 +226,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -241,19 +241,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -280,7 +284,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/etsformer/data.py b/pypots/imputation/etsformer/data.py
index f03a4e61..19503a4d 100644
--- a/pypots/imputation/etsformer/data.py
+++ b/pypots/imputation/etsformer/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/etsformer/model.py b/pypots/imputation/etsformer/model.py
index 11a6e19f..94a253e1 100644
--- a/pypots/imputation/etsformer/model.py
+++ b/pypots/imputation/etsformer/model.py
@@ -20,11 +20,11 @@
 import torch
 from torch.utils.data import DataLoader
 
+from .core import _ETSformer
 from .data import DatasetForETSformer
-from pypots.imputation.etsformer.core import _ETSformer
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -211,11 +211,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForETSformer(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -225,10 +225,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForETSformer(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -248,7 +248,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -263,19 +263,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -302,7 +306,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/fedformer/data.py b/pypots/imputation/fedformer/data.py
index a5982636..f8d79217 100644
--- a/pypots/imputation/fedformer/data.py
+++ b/pypots/imputation/fedformer/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/fedformer/model.py b/pypots/imputation/fedformer/model.py
index d6f9746a..dfda3740 100644
--- a/pypots/imputation/fedformer/model.py
+++ b/pypots/imputation/fedformer/model.py
@@ -23,8 +23,8 @@
 from .core import _FEDformer
 from .data import DatasetForFEDformer
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -225,11 +225,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForFEDformer(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -239,10 +239,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForFEDformer(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -262,7 +262,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -277,19 +277,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -316,7 +320,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/gpvae/data.py b/pypots/imputation/gpvae/data.py
index 24b5739b..27b3b456 100644
--- a/pypots/imputation/gpvae/data.py
+++ b/pypots/imputation/gpvae/data.py
@@ -9,7 +9,7 @@
 
 import torch
 from pygrinder import fill_and_get_mask_torch
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
 class DatasetForGPVAE(BaseDataset):
@@ -17,7 +17,7 @@ class DatasetForGPVAE(BaseDataset):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -26,7 +26,7 @@ class DatasetForGPVAE(BaseDataset):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -35,7 +35,7 @@ class DatasetForGPVAE(BaseDataset):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
@@ -43,22 +43,28 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         """Fetch data from self.X if it is given.
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -78,7 +84,7 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
         """
         X = self.X[idx]
 
-        if self.X_ori is not None and self.return_X_ori:
+        if self.return_X_ori:
             X = self.X[idx]
             missing_mask = self.missing_mask[idx]
             X_ori = self.X_ori[idx]
@@ -88,7 +94,7 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             X, missing_mask = fill_and_get_mask_torch(X)
             sample = [torch.tensor(idx), X, missing_mask]
 
-        if self.y is not None and self.return_labels:
+        if self.return_y:
             sample.append(self.y[idx].to(torch.long))
 
         return sample
@@ -99,19 +105,19 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             The collated data sample, a list including all necessary sample info.
         """
 
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        if "X_ori" in self.file_handle.keys() and self.return_X_ori:
+        if self.return_X_ori:
             X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
             X_ori = torch.from_numpy(self.file_handle["X_ori"][idx]).to(torch.float32)
             X_ori, X_ori_missing_mask = fill_and_get_mask_torch(X_ori)
@@ -124,7 +130,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
             sample = [torch.tensor(idx), X, missing_mask]
 
         # if the dataset has labels and is for training, then fetch it from the file
-        if "y" in self.file_handle.keys() and self.return_labels:
+        if self.return_y:
             sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 
         return sample
diff --git a/pypots/imputation/gpvae/model.py b/pypots/imputation/gpvae/model.py
index 272010e3..1ff234c9 100644
--- a/pypots/imputation/gpvae/model.py
+++ b/pypots/imputation/gpvae/model.py
@@ -27,7 +27,7 @@
 from .core import _GPVAE
 from .data import DatasetForGPVAE
 from ..base import BaseNNImputer
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -361,11 +361,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForGPVAE(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -375,10 +375,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForGPVAE(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -398,7 +398,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
         n_sampling_times: int = 1,
     ) -> dict:
         """
@@ -414,7 +414,7 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         n_sampling_times:
@@ -431,7 +431,7 @@ def predict(
 
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = DatasetForGPVAE(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         test_loader = DataLoader(
             test_set,
@@ -459,7 +459,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
         n_sampling_times: int = 1,
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
diff --git a/pypots/imputation/informer/data.py b/pypots/imputation/informer/data.py
index bf6a146d..7624c41d 100644
--- a/pypots/imputation/informer/data.py
+++ b/pypots/imputation/informer/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/informer/model.py b/pypots/imputation/informer/model.py
index 007920db..9429485e 100644
--- a/pypots/imputation/informer/model.py
+++ b/pypots/imputation/informer/model.py
@@ -24,8 +24,8 @@
 from .core import _Informer
 from .data import DatasetForInformer
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -206,11 +206,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForInformer(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -220,10 +220,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForInformer(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -243,7 +243,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -258,19 +258,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -297,7 +301,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/locf/model.py b/pypots/imputation/locf/model.py
index e7c47366..b88e9e7a 100644
--- a/pypots/imputation/locf/model.py
+++ b/pypots/imputation/locf/model.py
@@ -55,7 +55,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the imputer on the given data.
 
@@ -73,7 +73,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -88,7 +88,7 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
@@ -128,7 +128,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/mean/model.py b/pypots/imputation/mean/model.py
index 2594df88..33582f8d 100644
--- a/pypots/imputation/mean/model.py
+++ b/pypots/imputation/mean/model.py
@@ -29,7 +29,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the imputer on the given data.
 
@@ -47,7 +47,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -62,7 +62,7 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
@@ -114,7 +114,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/median/model.py b/pypots/imputation/median/model.py
index 6d5db169..6295aa5f 100644
--- a/pypots/imputation/median/model.py
+++ b/pypots/imputation/median/model.py
@@ -29,7 +29,7 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         """Train the imputer on the given data.
 
@@ -47,7 +47,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -62,7 +62,7 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
@@ -115,7 +115,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/mrnn/data.py b/pypots/imputation/mrnn/data.py
index b8fcdbf4..cb228f53 100644
--- a/pypots/imputation/mrnn/data.py
+++ b/pypots/imputation/mrnn/data.py
@@ -10,7 +10,7 @@
 import torch
 from pygrinder import fill_and_get_mask_torch
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 from ...data.utils import _parse_delta_torch
 
 
@@ -19,7 +19,7 @@ class DatasetForMRNN(BaseDataset):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -28,7 +28,7 @@ class DatasetForMRNN(BaseDataset):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -37,7 +37,7 @@ class DatasetForMRNN(BaseDataset):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
@@ -45,18 +45,24 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
         if not isinstance(self.data, str):
             # calculate all delta here.
-            if self.X_ori is None:
-                forward_X, forward_missing_mask = fill_and_get_mask_torch(self.X)
-            else:
+            if self.return_X_ori:
                 forward_missing_mask = self.missing_mask
                 forward_X = self.X
+            else:
+                forward_X, forward_missing_mask = fill_and_get_mask_torch(self.X)
 
             forward_delta = _parse_delta_torch(forward_missing_mask)
             backward_X = torch.flip(forward_X, dims=[1])
@@ -81,12 +87,12 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
             index : int tensor,
@@ -116,10 +122,10 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             self.processed_data["backward"]["delta"][idx],
         ]
 
-        if self.X_ori is not None and self.return_X_ori:
+        if self.return_X_ori:
             sample.extend([self.X_ori[idx], self.indicating_mask[idx]])
 
-        if self.y is not None and self.return_labels:
+        if self.return_y:
             sample.append(self.y[idx].to(torch.long))
 
         return sample
@@ -130,12 +136,12 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             The collated data sample, a list including all necessary sample info.
         """
 
@@ -169,14 +175,14 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
             backward["deltas"],
         ]
 
-        if "X_ori" in self.file_handle.keys() and self.return_X_ori:
+        if self.return_X_ori:
             X_ori = torch.from_numpy(self.file_handle["X_ori"][idx]).to(torch.float32)
             X_ori, X_ori_missing_mask = fill_and_get_mask_torch(X_ori)
             indicating_mask = X_ori_missing_mask - missing_mask
             sample.extend([X_ori, indicating_mask])
 
         # if the dataset has labels and is for training, then fetch it from the file
-        if "y" in self.file_handle.keys() and self.return_labels:
+        if self.return_y:
             sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 
         return sample
diff --git a/pypots/imputation/mrnn/model.py b/pypots/imputation/mrnn/model.py
index 7fb88e00..378cd5c3 100644
--- a/pypots/imputation/mrnn/model.py
+++ b/pypots/imputation/mrnn/model.py
@@ -23,7 +23,7 @@
 from .core import _MRNN
 from .data import DatasetForMRNN
 from ..base import BaseNNImputer
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -194,11 +194,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForMRNN(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -208,10 +208,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForMRNN(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -231,11 +231,11 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = DatasetForMRNN(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         test_loader = DataLoader(
             test_set,
@@ -261,7 +261,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/patchtst/data.py b/pypots/imputation/patchtst/data.py
index c8c0ea1f..4ccb1e72 100644
--- a/pypots/imputation/patchtst/data.py
+++ b/pypots/imputation/patchtst/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/patchtst/model.py b/pypots/imputation/patchtst/model.py
index b2aceb4b..b4c72c4d 100644
--- a/pypots/imputation/patchtst/model.py
+++ b/pypots/imputation/patchtst/model.py
@@ -21,11 +21,11 @@
 import torch
 from torch.utils.data import DataLoader
 
+from .core import _PatchTST
 from .data import DatasetForPatchTST
-from pypots.imputation.patchtst.core import _PatchTST
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -239,11 +239,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForPatchTST(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -253,10 +253,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForPatchTST(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -276,7 +276,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -291,19 +291,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -330,7 +334,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/saits/core.py b/pypots/imputation/saits/core.py
index 01465448..89c5e731 100644
--- a/pypots/imputation/saits/core.py
+++ b/pypots/imputation/saits/core.py
@@ -18,8 +18,8 @@
 import torch
 import torch.nn as nn
 
-from pypots.utils.metrics import calc_mae
 from ...nn.modules.saits import BackboneSAITS
+from ...utils.metrics import calc_mae
 
 
 class _SAITS(nn.Module):
diff --git a/pypots/imputation/saits/data.py b/pypots/imputation/saits/data.py
index aeae871a..0c25b0f8 100644
--- a/pypots/imputation/saits/data.py
+++ b/pypots/imputation/saits/data.py
@@ -10,7 +10,7 @@
 import torch
 from pygrinder import mcar, fill_and_get_mask_torch
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
 class DatasetForSAITS(BaseDataset):
@@ -20,7 +20,7 @@ class DatasetForSAITS(BaseDataset):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -29,7 +29,7 @@ class DatasetForSAITS(BaseDataset):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -38,7 +38,7 @@ class DatasetForSAITS(BaseDataset):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
 
     rate : float, in (0,1),
@@ -54,11 +54,17 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=False,
+            return_y=return_y,
+            file_type=file_type,
+        )
         self.rate = rate
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
@@ -66,31 +72,31 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index to fetch the specified sample.
 
         Returns
         -------
-        sample : list,
+        sample :
             A list contains
 
-            index : int tensor,
+            index :
                 The index of the sample.
 
-            X_ori : tensor,
+            X_ori :
                 Original time-series for calculating mask imputation loss.
 
-            X : tensor,
+            X :
                 Time-series data with artificially missing values for model input.
 
-            missing_mask : tensor,
+            missing_mask :
                 The mask records all missing values in X.
 
-            indicating_mask : tensor.
+            indicating_mask :
                 The mask indicates artificially missing values in X.
         """
 
-        if self.X_ori is not None and self.return_X_ori:
+        if self.return_X_ori:
             X = self.X[idx]
             X_ori = self.X_ori[idx]
             missing_mask = self.missing_mask[idx]
@@ -110,7 +116,7 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
             indicating_mask,
         ]
 
-        if self.y is not None and self.return_labels:
+        if self.return_y:
             sample.append(self.y[idx].to(torch.long))
 
         return sample
@@ -121,19 +127,19 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
 
         Parameters
         ----------
-        idx : int,
+        idx :
             The index of the sample to be return.
 
         Returns
         -------
-        sample : list,
+        sample :
             The collated data sample, a list including all necessary sample info.
         """
 
         if self.file_handle is None:
             self.file_handle = self._open_file_handle()
 
-        if "X_ori" in self.file_handle.keys() and self.return_X_ori:
+        if self.return_X_ori:
             X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
             X_ori = torch.from_numpy(self.file_handle["X_ori"][idx]).to(torch.float32)
             X_ori, X_ori_missing_mask = fill_and_get_mask_torch(X_ori)
@@ -149,7 +155,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
         sample = [torch.tensor(idx), X, missing_mask, X_ori, indicating_mask]
 
         # if the dataset has labels and is for training, then fetch it from the file
-        if "y" in self.file_handle.keys() and self.return_labels:
+        if self.return_y:
             sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 
         return sample
diff --git a/pypots/imputation/saits/model.py b/pypots/imputation/saits/model.py
index 4291c81a..a45927f6 100644
--- a/pypots/imputation/saits/model.py
+++ b/pypots/imputation/saits/model.py
@@ -20,11 +20,11 @@
 import torch
 from torch.utils.data import DataLoader
 
-from .data import DatasetForSAITS
 from .core import _SAITS
+from .data import DatasetForSAITS
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
+from ...data.dataset import BaseDataset
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -246,11 +246,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForSAITS(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -260,10 +260,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForSAITS(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -283,7 +283,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
         diagonal_attention_mask: bool = True,
         return_latent_vars: bool = False,
     ) -> dict:
@@ -291,7 +291,7 @@ def predict(
 
         Parameters
         ----------
-        test_set : dict or str
+        test_set :
             The dataset for model validating, should be a dictionary including keys as 'X',
             or a path string locating a data file supported by PyPOTS (e.g. h5 file).
             If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -300,26 +300,30 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
-        diagonal_attention_mask : bool
+        diagonal_attention_mask :
             Whether to apply a diagonal attention mask to the self-attention mechanism in the testing stage.
 
-        return_latent_vars : bool
+        return_latent_vars :
             Whether to return the latent variables in SAITS, e.g. attention weights of two DMSA blocks and
             the weight matrix from the combination block, etc.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -375,7 +379,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/template/core.py b/pypots/imputation/template/core.py
new file mode 100644
index 00000000..4bfd259d
--- /dev/null
+++ b/pypots/imputation/template/core.py
@@ -0,0 +1,42 @@
+"""
+The implementation of YourNewModel for the partially-observed time-series imputation task.
+
+Refer to the paper "Your paper citation".
+
+"""
+
+# Created by Your Name <Your contact email> TODO: modify the author information.
+# License: BSD-3-Clause
+
+import torch.nn as nn
+
+# from ...nn.modules import some_modules
+
+
+# TODO: define your new model here.
+#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
+#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
+#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
+class _YourNewModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        # TODO: define your model's components here. If modules in pypots.nn.modules can be reused in your model,
+        #  you can import them and use them here. AND if you think the modules you implemented can be reused by
+        #  other models, you can also consider to contribute them to pypots.nn.modules
+        self.embedding = nn.Module
+        self.submodule = nn.Module
+        self.backbone = nn.Module
+
+    def forward(self, inputs: dict) -> dict:
+        # TODO: define your model's forward propagation process here.
+        #  The input is a dict, and the output `results` should also be a dict.
+        output = self.backbone()  # replace this with your model's  process
+
+        # TODO: `results` must contains the key `loss` which is will be used for
+        #  backward propagation to update the model.
+        loss = None
+        results = {
+            "loss": loss,
+        }
+        return results
diff --git a/pypots/imputation/template/data.py b/pypots/imputation/template/data.py
index c391740e..3c4ca97e 100644
--- a/pypots/imputation/template/data.py
+++ b/pypots/imputation/template/data.py
@@ -1,7 +1,7 @@
 """
 Dataset class for YourNewModel.
 
-TODO: modify the above description with your model's information.
+TODO: modify the above description for your model's dataset class.
 
 """
 
@@ -10,17 +10,26 @@
 
 from typing import Union, Iterable
 
-from ...data.base import BaseDataset
+from ...data.dataset import BaseDataset
 
 
+# TODO: define your new dataset class here. Remove or add arguments as needed.
 class DatasetForYourNewModel(BaseDataset):
     def __init__(
         self,
         data: Union[dict, str],
-        return_labels: bool = True,
-        file_type: str = "h5py",
+        return_X_ori: bool,
+        return_X_pred: bool,
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_X_pred=return_X_pred,
+            return_y=return_y,
+            file_type=file_type,
+        )
 
     def _fetch_data_from_array(self, idx: int) -> Iterable:
         raise NotImplementedError
diff --git a/pypots/imputation/template/model.py b/pypots/imputation/template/model.py
index 496782bf..170e140b 100644
--- a/pypots/imputation/template/model.py
+++ b/pypots/imputation/template/model.py
@@ -3,6 +3,8 @@
 
 Refer to the paper "Your paper citation".
 
+TODO: modify the above description with your model's information.
+
 """
 
 # Created by Your Name <Your contact email> TODO: modify the author information.
@@ -10,40 +12,19 @@
 
 from typing import Union, Optional
 
-import numpy as np
 import torch
-import torch.nn as nn
+
+from .core import _YourNewModel
 
 # TODO: import the base class from the imputation package in PyPOTS.
 #  Here I suppose this is a neural-network imputation model.
 #  You should make your model inherent BaseImputer if it is not a NN.
 # from ..base import BaseImputer
 from ..base import BaseNNImputer
-
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 
 
-# TODO: define your new model here.
-#  It could be a neural network model or a non-neural network algorithm (e.g. written in numpy).
-#  Your model should be implemented with PyTorch and subclass torch.nn.Module if it is a neural network.
-#  Note that your main algorithm is defined in this class, and this class usually won't be exposed to users.
-class _YourNewModel(nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, inputs: dict) -> dict:
-        # TODO: define your model's forward propagation process here.
-        #  The input is a dict, and the output `results` should also be a dict.
-        #  `results` must contains the key `loss` which is will be used for backward propagation to update the model.
-
-        loss = None
-        results = {
-            "loss": loss,
-        }
-        return results
-
-
 # TODO: define your new model's wrapper here.
 #  It should be a subclass of a base class defined in PyPOTS task packages (e.g.
 #  BaseNNImputer of PyPOTS imputation task package), and it has to implement all abstract methods of the base class.
@@ -52,13 +33,13 @@ class YourNewModel(BaseNNImputer):
     def __init__(
         self,
         # TODO: add your model's hyper-parameters here
-        batch_size: int,
-        epochs: int,
-        patience: int,
-        num_workers: int = 0,
+        batch_size: int = 32,
+        epochs: int = 100,
+        patience: Optional[int] = None,
         optimizer: Optional[Optimizer] = Adam(),
+        num_workers: int = 0,
         device: Optional[Union[str, torch.device, list]] = None,
-        saving_path: str = None,
+        saving_path: Optional[str] = None,
         model_saving_strategy: Optional[str] = "best",
     ):
         super().__init__(
@@ -74,9 +55,11 @@ def __init__(
         # TODO: set up your model's hyper-parameters here
 
         # set up the model
-        self.model = _YourNewModel()
-        self.model = self.model.to(self.device)
+        self.model = _YourNewModel(
+            # pass the arguments to your model
+        )
         self._print_model_size()
+        self._send_model_to_given_device()
 
         # set up the optimizer
         self.optimizer = optimizer
@@ -95,13 +78,13 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         raise NotImplementedError
 
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         raise NotImplementedError
diff --git a/pypots/imputation/template/module.py b/pypots/imputation/template/module.py
deleted file mode 100644
index fa20e4cd..00000000
--- a/pypots/imputation/template/module.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
-The implementation of the modules for YourNewModel.
-
-Refer to the paper "Your paper citation".
-
-"""
-
-# Created by Your Name <Your contact email> TODO: modify the author information.
-# License: BSD-3-Clause
-
-
-# TODO: this file is not necessary. If your new model has customized layers or modules, please put them here.
-#  Otherwise, please delete this modules.py file, don't commit it to the repository.
diff --git a/pypots/imputation/timesnet/data.py b/pypots/imputation/timesnet/data.py
index d30f8a53..f65632c9 100644
--- a/pypots/imputation/timesnet/data.py
+++ b/pypots/imputation/timesnet/data.py
@@ -17,8 +17,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/timesnet/model.py b/pypots/imputation/timesnet/model.py
index 699b55b4..419470b2 100644
--- a/pypots/imputation/timesnet/model.py
+++ b/pypots/imputation/timesnet/model.py
@@ -23,8 +23,8 @@
 from .core import _TimesNet
 from .data import DatasetForTimesNet
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.dataset import BaseDataset
+from ...data.checking import key_in_data_set
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -201,11 +201,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForTimesNet(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -215,10 +215,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForTimesNet(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -238,7 +238,7 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         """Make predictions for the input data with the trained model.
 
@@ -253,19 +253,23 @@ def predict(
             If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
             key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-        file_type : str
+        file_type :
             The type of the given file if test_set is a path string.
 
         Returns
         -------
-        result_dict : dict,
+        file_type :
             The dictionary containing the clustering results and latent variables if necessary.
 
         """
         # Step 1: wrap the input data with classes Dataset and DataLoader
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -292,7 +296,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/transformer/data.py b/pypots/imputation/transformer/data.py
index 6974991d..d8751050 100644
--- a/pypots/imputation/transformer/data.py
+++ b/pypots/imputation/transformer/data.py
@@ -15,8 +15,8 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
         rate: float = 0.2,
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type, rate)
+        super().__init__(data, return_X_ori, return_y, file_type, rate)
diff --git a/pypots/imputation/transformer/model.py b/pypots/imputation/transformer/model.py
index e465db30..46ee13ab 100644
--- a/pypots/imputation/transformer/model.py
+++ b/pypots/imputation/transformer/model.py
@@ -23,8 +23,8 @@
 from .core import _Transformer
 from .data import DatasetForTransformer
 from ..base import BaseNNImputer
-from ...data.base import BaseDataset
-from ...data.checking import check_X_ori_in_val_set
+from ...data.dataset import BaseDataset
+from ...data.checking import key_in_data_set
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -234,11 +234,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForTransformer(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -248,10 +248,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForTransformer(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -271,11 +271,15 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = BaseDataset(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set,
+            return_X_ori=False,
+            return_X_pred=False,
+            return_y=False,
+            file_type=file_type,
         )
         test_loader = DataLoader(
             test_set,
@@ -301,7 +305,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/imputation/usgan/data.py b/pypots/imputation/usgan/data.py
index 58e035c3..40e6ee77 100644
--- a/pypots/imputation/usgan/data.py
+++ b/pypots/imputation/usgan/data.py
@@ -15,7 +15,7 @@ class DatasetForUSGAN(DatasetForBRITS):
 
     Parameters
     ----------
-    data : dict or str,
+    data :
         The dataset for model input, should be a dictionary including keys as 'X' and 'y',
         or a path string locating a data file.
         If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -24,7 +24,7 @@ class DatasetForUSGAN(DatasetForBRITS):
         If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
         key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
-    return_labels : bool, default = True,
+    return_y :
         Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
         during training of classification models, the Dataset class will return labels in __getitem__() for model input.
         Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -33,7 +33,7 @@ class DatasetForUSGAN(DatasetForBRITS):
         with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
         distinction.
 
-    file_type : str, default = "h5py"
+    file_type :
         The type of the given file if train_set and val_set are path strings.
     """
 
@@ -41,7 +41,12 @@ def __init__(
         self,
         data: Union[dict, str],
         return_X_ori: bool,
-        return_labels: bool,
-        file_type: str = "h5py",
+        return_y: bool,
+        file_type: str = "hdf5",
     ):
-        super().__init__(data, return_X_ori, return_labels, file_type)
+        super().__init__(
+            data=data,
+            return_X_ori=return_X_ori,
+            return_y=return_y,
+            file_type=file_type,
+        )
diff --git a/pypots/imputation/usgan/model.py b/pypots/imputation/usgan/model.py
index 91aaff44..1f684e92 100644
--- a/pypots/imputation/usgan/model.py
+++ b/pypots/imputation/usgan/model.py
@@ -20,7 +20,7 @@
 from .core import _USGAN
 from .data import DatasetForUSGAN
 from ..base import BaseNNImputer
-from ...data.checking import check_X_ori_in_val_set
+from ...data.checking import key_in_data_set
 from ...optim.adam import Adam
 from ...optim.base import Optimizer
 from ...utils.logging import logger
@@ -375,11 +375,11 @@ def fit(
         self,
         train_set: Union[dict, str],
         val_set: Optional[Union[dict, str]] = None,
-        file_type: str = "h5py",
+        file_type: str = "hdf5",
     ) -> None:
         # Step 1: wrap the input data with classes Dataset and DataLoader
         training_set = DatasetForUSGAN(
-            train_set, return_X_ori=False, return_labels=False, file_type=file_type
+            train_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         training_loader = DataLoader(
             training_set,
@@ -389,10 +389,10 @@ def fit(
         )
         val_loader = None
         if val_set is not None:
-            if not check_X_ori_in_val_set(val_set):
+            if not key_in_data_set("X_ori", val_set):
                 raise ValueError("val_set must contain 'X_ori' for model validation.")
             val_set = DatasetForUSGAN(
-                val_set, return_X_ori=True, return_labels=False, file_type=file_type
+                val_set, return_X_ori=True, return_y=False, file_type=file_type
             )
             val_loader = DataLoader(
                 val_set,
@@ -412,11 +412,11 @@ def fit(
     def predict(
         self,
         test_set: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> dict:
         self.model.eval()  # set the model as eval status to freeze it.
         test_set = DatasetForUSGAN(
-            test_set, return_X_ori=False, return_labels=False, file_type=file_type
+            test_set, return_X_ori=False, return_y=False, file_type=file_type
         )
         test_loader = DataLoader(
             test_set,
@@ -442,7 +442,7 @@ def predict(
     def impute(
         self,
         X: Union[dict, str],
-        file_type="h5py",
+        file_type: str = "hdf5",
     ) -> np.ndarray:
         """Impute missing values in the given data with the trained model.
 
diff --git a/pypots/nn/modules/csdi/backbone.py b/pypots/nn/modules/csdi/backbone.py
index 697e76a2..26051060 100644
--- a/pypots/nn/modules/csdi/backbone.py
+++ b/pypots/nn/modules/csdi/backbone.py
@@ -44,11 +44,6 @@ def __init__(
             d_side += 1  # for conditional mask
             d_input = 2
 
-        self.embed_layer = nn.Embedding(
-            num_embeddings=d_target,
-            embedding_dim=d_feature_embedding,
-        )
-
         self.diff_model = CsdiDiffusionModel(
             n_diffusion_steps,
             d_diffusion_embedding,
@@ -78,41 +73,6 @@ def __init__(
             "alpha_torch", torch.tensor(self.alpha).float().unsqueeze(1).unsqueeze(1)
         )
 
-    @staticmethod
-    def time_embedding(pos, d_model=128):
-        pe = torch.zeros(pos.shape[0], pos.shape[1], d_model).to(pos.device)
-        position = pos.unsqueeze(2)
-        div_term = 1 / torch.pow(
-            10000.0, torch.arange(0, d_model, 2, device=pos.device) / d_model
-        )
-        pe[:, :, 0::2] = torch.sin(position * div_term)
-        pe[:, :, 1::2] = torch.cos(position * div_term)
-        return pe
-
-    def get_side_info(self, observed_tp, cond_mask):
-        B, K, L = cond_mask.shape
-        device = observed_tp.device
-        time_embed = self.time_embedding(
-            observed_tp, self.d_time_embedding
-        )  # (B,L,emb)
-        time_embed = time_embed.to(device)
-        time_embed = time_embed.unsqueeze(2).expand(-1, -1, K, -1)
-        feature_embed = self.embed_layer(
-            torch.arange(self.d_target).to(device)
-        )  # (K,emb)
-        feature_embed = feature_embed.unsqueeze(0).unsqueeze(0).expand(B, L, -1, -1)
-
-        side_info = torch.cat(
-            [time_embed, feature_embed], dim=-1
-        )  # (B,L,K,emb+d_feature_embedding)
-        side_info = side_info.permute(0, 3, 2, 1)  # (B,*,K,L)
-
-        if not self.is_unconditional:
-            side_mask = cond_mask.unsqueeze(1)  # (B,1,K,L)
-            side_info = torch.cat([side_info, side_mask], dim=1)
-
-        return side_info
-
     def set_input_to_diffmodel(self, noisy_data, observed_data, cond_mask):
         if self.is_unconditional:
             total_input = noisy_data.unsqueeze(1)  # (B,1,K,L)
diff --git a/pypots/utils/visual/data.py b/pypots/utils/visual/data.py
index 338b1145..ca5a5f6e 100644
--- a/pypots/utils/visual/data.py
+++ b/pypots/utils/visual/data.py
@@ -29,26 +29,26 @@ def plot_data(
 
     Parameters
     ----------
-    X : ndarray,
+    X :
         The observed values
 
-    X_ori : ndarray,
+    X_ori :
         The evaluated values
 
-    X_imputed : ndarray,
+    X_imputed :
         The imputed values
 
-    sample_idx : int,
+    sample_idx :
         The index of the sample to be plotted.
         If None, a randomly-selected sample will be plotted for visualization.
 
-    n_rows : int,
+    n_rows :
         The number of rows in the plot
 
-    n_cols : int,
+    n_cols :
         The number of columns in the plot
 
-    fig_size : list,
+    fig_size :
         The size of the figure
     """
 
@@ -95,9 +95,9 @@ def plot_data(
 
 
 def plot_missingness(
-    missing_mask,
-    min_step=0,
-    max_step=1,
+    missing_mask: int,
+    min_step: int = 0,
+    max_step: int = 1,
     sample_idx: Optional[int] = None,
 ):
     """Plot the missingness pattern of one multivariate timeseries. For each feature,
@@ -106,16 +106,16 @@ def plot_missingness(
 
     Parameters
     ----------
-    missing_mask : ndarray,
+    missing_mask :
         The missing mask of multivariate time series.
 
-    min_step : int,
+    min_step :
         The minimum time step for visualization.
 
-    max_step : int,
+    max_step :
         The maximum time step for visualization.
 
-    sample_idx : int,
+    sample_idx :
         The index of the sample to be plotted, if None, a randomly-selected sample will be plotted for visualization.
     """
     mask_shape = missing_mask.shape
diff --git a/tests/classification/brits.py b/tests/classification/brits.py
index 0ec7b68d..7441e40e 100644
--- a/tests/classification/brits.py
+++ b/tests/classification/brits.py
@@ -21,9 +21,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_CLASSIFICATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -104,8 +104,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="classification-brits")
     def test_4_lazy_loading(self):
-        self.brits.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        results = self.brits.predict(H5_TEST_SET_PATH)
+        self.brits.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        results = self.brits.predict(GENERAL_H5_TEST_SET_PATH)
         metrics = calc_binary_classification_metrics(
             results["classification"], DATA["test_y"]
         )
diff --git a/tests/classification/grud.py b/tests/classification/grud.py
index 5c165e07..61f7d496 100644
--- a/tests/classification/grud.py
+++ b/tests/classification/grud.py
@@ -21,9 +21,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_CLASSIFICATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -101,8 +101,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="classification-grud")
     def test_4_lazy_loading(self):
-        self.grud.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        results = self.grud.predict(H5_TEST_SET_PATH)
+        self.grud.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        results = self.grud.predict(GENERAL_H5_TEST_SET_PATH)
         metrics = calc_binary_classification_metrics(
             results["classification"], DATA["test_y"]
         )
diff --git a/tests/classification/raindrop.py b/tests/classification/raindrop.py
index 64f6aa59..10363b78 100644
--- a/tests/classification/raindrop.py
+++ b/tests/classification/raindrop.py
@@ -20,9 +20,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_CLASSIFICATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -106,8 +106,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="classification-raindrop")
     def test_4_lazy_loading(self):
-        self.raindrop.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        results = self.raindrop.predict(H5_TEST_SET_PATH)
+        self.raindrop.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        results = self.raindrop.predict(GENERAL_H5_TEST_SET_PATH)
         metrics = calc_binary_classification_metrics(
             results["classification"], DATA["test_y"]
         )
diff --git a/tests/clustering/crli.py b/tests/clustering/crli.py
index 7046f792..6b3266ff 100644
--- a/tests/clustering/crli.py
+++ b/tests/clustering/crli.py
@@ -25,9 +25,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_CLUSTERING,
     check_tb_and_model_checkpoints_existence,
 )
@@ -165,9 +165,9 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="clustering-crli")
     def test_4_lazy_loading(self):
-        self.crli_lstm.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
+        self.crli_lstm.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
         clustering_results = self.crli_lstm.predict(
-            H5_TEST_SET_PATH, return_latent_vars=True
+            GENERAL_H5_TEST_SET_PATH, return_latent_vars=True
         )
         external_metrics = calc_external_cluster_validation_metrics(
             clustering_results["clustering"], DATA["test_y"]
diff --git a/tests/clustering/vader.py b/tests/clustering/vader.py
index bf0b0989..ba8a02de 100644
--- a/tests/clustering/vader.py
+++ b/tests/clustering/vader.py
@@ -26,9 +26,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_CLUSTERING,
     check_tb_and_model_checkpoints_existence,
 )
@@ -113,9 +113,9 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="clustering-vader")
     def test_4_lazy_loading(self):
-        self.vader.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
+        self.vader.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
         clustering_results = self.vader.predict(
-            H5_TEST_SET_PATH, return_latent_vars=True
+            GENERAL_H5_TEST_SET_PATH, return_latent_vars=True
         )
         external_metrics = calc_external_cluster_validation_metrics(
             clustering_results["clustering"], DATA["test_y"]
diff --git a/tests/forecasting/bttf.py b/tests/forecasting/bttf.py
index 2e9d21bb..5e87cac5 100644
--- a/tests/forecasting/bttf.py
+++ b/tests/forecasting/bttf.py
@@ -11,12 +11,8 @@
 
 from pypots.forecasting import BTTF
 from pypots.utils.logging import logger
-from pypots.utils.metrics import calc_mae
-from tests.forecasting.config import (
-    TEST_SET,
-    N_PRED_STEP,
-)
-from tests.global_test_config import DATA
+from pypots.utils.metrics import calc_mse
+from tests.global_test_config import DATA, FORECASTING_TEST_SET, N_PRED_STEPS
 
 
 class TestBTTF(unittest.TestCase):
@@ -24,9 +20,9 @@ class TestBTTF(unittest.TestCase):
 
     # initialize a BTTF model
     bttf = BTTF(
-        n_steps=DATA["n_steps"] - N_PRED_STEP,
+        n_steps=DATA["n_steps"] - N_PRED_STEPS,
         n_features=DATA["n_features"],
-        pred_step=N_PRED_STEP,
+        pred_step=N_PRED_STEPS,
         rank=10,
         time_lags=[1, 2, 3, 2, 2 + 1, 2 + 2, 3, 3 + 1, 3 + 2],
         burn_iter=5,
@@ -36,9 +32,9 @@ class TestBTTF(unittest.TestCase):
 
     @pytest.mark.xdist_group(name="forecasting-bttf")
     def test_0_forecasting(self):
-        predictions = self.bttf.predict(TEST_SET)["forecasting"]
-        mae = calc_mae(predictions, TEST_SET["X_ori"][:, -N_PRED_STEP:])
-        logger.info(f"prediction MAE: {mae}")
+        predictions = self.bttf.predict(FORECASTING_TEST_SET)["forecasting"]
+        mse = calc_mse(predictions, FORECASTING_TEST_SET["X_pred"])
+        logger.info(f"prediction MSE: {mse}")
 
 
 if __name__ == "__main__":
diff --git a/tests/forecasting/config.py b/tests/forecasting/config.py
deleted file mode 100644
index 3f2bc225..00000000
--- a/tests/forecasting/config.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""
-Test configs for forecasting models.
-"""
-
-# Created by Wenjie Du <wenjay.du@gmail.com>
-# License: BSD-3-Clause
-
-from tests.global_test_config import DATA
-
-EPOCHS = 5
-N_PRED_STEP = 1
-
-TRAIN_SET = {"X": DATA["train_X"]}
-VAL_SET = {"X": DATA["val_X"]}
-TEST_SET = {
-    "X": DATA["test_X"][:, :-N_PRED_STEP],
-    "X_ori": DATA["test_X_ori"],
-}
diff --git a/tests/forecasting/csdi.py b/tests/forecasting/csdi.py
new file mode 100644
index 00000000..3df64ad8
--- /dev/null
+++ b/tests/forecasting/csdi.py
@@ -0,0 +1,149 @@
+"""
+Test cases for CSDI forecasting model.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: BSD-3-Clause
+
+
+import os.path
+import unittest
+
+import numpy as np
+import pytest
+
+from pypots.forecasting import CSDI
+from pypots.optim import Adam
+from pypots.utils.logging import logger
+from pypots.utils.metrics import calc_mse, calc_quantile_crps
+
+from tests.global_test_config import (
+    DATA,
+    EPOCHS,
+    DEVICE,
+    N_PRED_STEPS,
+    FORECASTING_TRAIN_SET,
+    FORECASTING_VAL_SET,
+    FORECASTING_TEST_SET,
+    FORECASTING_H5_TRAIN_SET_PATH,
+    FORECASTING_H5_VAL_SET_PATH,
+    FORECASTING_H5_TEST_SET_PATH,
+    RESULT_SAVING_DIR_FOR_FORECASTING,
+    check_tb_and_model_checkpoints_existence,
+)
+
+
+class TestCSDI(unittest.TestCase):
+    logger.info("Running tests for an forecasting model CSDI...")
+
+    # set the log and model saving path
+    saving_path = os.path.join(RESULT_SAVING_DIR_FOR_FORECASTING, "CSDI")
+    model_save_name = "saved_csdi_model.pypots"
+
+    # initialize an Adam optimizer
+    optimizer = Adam(lr=0.001, weight_decay=1e-5)
+
+    # initialize a CSDI model
+    csdi = CSDI(
+        n_steps=DATA["n_steps"] - N_PRED_STEPS,
+        n_features=DATA["n_features"],
+        n_pred_steps=N_PRED_STEPS,
+        n_pred_features=DATA["n_features"],
+        n_layers=1,
+        n_channels=8,
+        d_time_embedding=32,
+        d_feature_embedding=3,
+        d_diffusion_embedding=32,
+        n_diffusion_steps=5,
+        n_heads=1,
+        epochs=EPOCHS,
+        saving_path=saving_path,
+        optimizer=optimizer,
+        device=DEVICE,
+    )
+
+    @pytest.mark.xdist_group(name="forecasting-csdi")
+    def test_0_fit(self):
+        self.csdi.fit(FORECASTING_TRAIN_SET, FORECASTING_VAL_SET)
+
+    @pytest.mark.xdist_group(name="forecasting-csdi")
+    def test_1_forecasting(self):
+        forecasting_X = self.csdi.predict(FORECASTING_TEST_SET, n_sampling_times=2)[
+            "forecasting"
+        ]
+        test_CRPS = calc_quantile_crps(
+            forecasting_X,
+            FORECASTING_TEST_SET["X_pred"],
+            ~np.isnan(FORECASTING_TEST_SET["X_pred"]),
+        )
+        forecasting_X = forecasting_X.mean(axis=1)  # mean over sampling times
+        assert not np.isnan(
+            forecasting_X
+        ).any(), (
+            "Output has missing values in the forecasting results that should not be."
+        )
+        test_MSE = calc_mse(
+            forecasting_X,
+            FORECASTING_TEST_SET["X_pred"],
+            ~np.isnan(FORECASTING_TEST_SET["X_pred"]),
+        )
+        logger.info(f"CSDI test_MSE: {test_MSE}, test_CRPS: {test_CRPS}")
+
+    @pytest.mark.xdist_group(name="forecasting-csdi")
+    def test_2_parameters(self):
+        assert hasattr(self.csdi, "model") and self.csdi.model is not None
+
+        assert hasattr(self.csdi, "optimizer") and self.csdi.optimizer is not None
+
+        assert hasattr(self.csdi, "best_loss")
+        self.assertNotEqual(self.csdi.best_loss, float("inf"))
+
+        assert (
+            hasattr(self.csdi, "best_model_dict")
+            and self.csdi.best_model_dict is not None
+        )
+
+    @pytest.mark.xdist_group(name="forecasting-csdi")
+    def test_3_saving_path(self):
+        # whether the root saving dir exists, which should be created by save_log_into_tb_file
+        assert os.path.exists(
+            self.saving_path
+        ), f"file {self.saving_path} does not exist"
+
+        # check if the tensorboard file and model checkpoints exist
+        check_tb_and_model_checkpoints_existence(self.csdi)
+
+        # save the trained model into file, and check if the path exists
+        saved_model_path = os.path.join(self.saving_path, self.model_save_name)
+        self.csdi.save(saved_model_path)
+
+        # test loading the saved model, not necessary, but need to test
+        self.csdi.load(saved_model_path)
+
+    @pytest.mark.xdist_group(name="forecasting-csdi")
+    def test_4_lazy_loading(self):
+        self.csdi.fit(FORECASTING_H5_TRAIN_SET_PATH, FORECASTING_H5_VAL_SET_PATH)
+        forecasting_results = self.csdi.predict(FORECASTING_H5_TEST_SET_PATH)
+        forecasting_X = forecasting_results["forecasting"]
+        test_CRPS = calc_quantile_crps(
+            forecasting_X,
+            FORECASTING_TEST_SET["X_pred"],
+            ~np.isnan(FORECASTING_TEST_SET["X_pred"]),
+        )
+        forecasting_X = forecasting_X.mean(axis=1)  # mean over sampling times
+        assert not np.isnan(
+            forecasting_X
+        ).any(), (
+            "Output has missing values in the forecasting results that should not be."
+        )
+
+        test_MSE = calc_mse(
+            forecasting_X,
+            FORECASTING_TEST_SET["X_pred"],
+            ~np.isnan(FORECASTING_TEST_SET["X_pred"]),
+        )
+        logger.info(f"Lazy-loading CSDI test_MSE: {test_MSE}, test_CRPS: {test_CRPS}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/global_test_config.py b/tests/global_test_config.py
index 2eba4c22..ad388f43 100644
--- a/tests/global_test_config.py
+++ b/tests/global_test_config.py
@@ -15,7 +15,39 @@
 from pypots.utils.logging import logger
 from pypots.utils.random import set_random_seed
 
-set_random_seed(2023)
+# set the random seed for all test cases
+RANDOM_SEED = 2023
+# set the number of epochs for all model training
+EPOCHS = 2
+# set the number of prediction steps for forecasting models
+N_PRED_STEPS = 1
+# tensorboard and model files saving directory
+RESULT_SAVING_DIR = "testing_results"
+MODEL_SAVING_DIR = f"{RESULT_SAVING_DIR}/models"
+DATA_SAVING_DIR = f"{RESULT_SAVING_DIR}/datasets"
+RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(MODEL_SAVING_DIR, "imputation")
+RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(MODEL_SAVING_DIR, "classification")
+RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(MODEL_SAVING_DIR, "clustering")
+RESULT_SAVING_DIR_FOR_FORECASTING = os.path.join(MODEL_SAVING_DIR, "forecasting")
+# paths to save the generated dataset into files for testing the lazy-loading strategy
+GENERAL_DATA_SAVING_DIR = f"{DATA_SAVING_DIR}/general_h5dataset"
+GENERAL_H5_TRAIN_SET_PATH = os.path.abspath(f"{GENERAL_DATA_SAVING_DIR}/train_set.h5")
+GENERAL_H5_VAL_SET_PATH = os.path.abspath(f"{GENERAL_DATA_SAVING_DIR}/val_set.h5")
+GENERAL_H5_TEST_SET_PATH = os.path.abspath(f"{GENERAL_DATA_SAVING_DIR}/test_set.h5")
+# paths to save the generated dataset for testing forecasting models with the lazy-loading strategy
+FORECASTING_DATA_SAVING_DIR = f"{DATA_SAVING_DIR}/forecasting_h5dataset"
+FORECASTING_H5_TRAIN_SET_PATH = os.path.abspath(
+    f"{FORECASTING_DATA_SAVING_DIR}/train_set.h5"
+)
+FORECASTING_H5_VAL_SET_PATH = os.path.abspath(
+    f"{FORECASTING_DATA_SAVING_DIR}/val_set.h5"
+)
+FORECASTING_H5_TEST_SET_PATH = os.path.abspath(
+    f"{FORECASTING_DATA_SAVING_DIR}/test_set.h5"
+)
+
+
+set_random_seed(RANDOM_SEED)
 
 # Generate the unified data for testing and cache it first, DATA here is a singleton
 # Otherwise, file lock will cause bug if running test parallely with pytest-xdist.
@@ -43,15 +75,21 @@
     "y": DATA["test_y"].astype(float),
 }
 
-# tensorboard and model files saving directory
-RESULT_SAVING_DIR = "testing_results"
-RESULT_SAVING_DIR_FOR_IMPUTATION = os.path.join(RESULT_SAVING_DIR, "imputation")
-RESULT_SAVING_DIR_FOR_CLASSIFICATION = os.path.join(RESULT_SAVING_DIR, "classification")
-RESULT_SAVING_DIR_FOR_CLUSTERING = os.path.join(RESULT_SAVING_DIR, "clustering")
-RESULT_SAVING_DIR_FOR_FORECASTING = os.path.join(RESULT_SAVING_DIR, "forecasting")
-
-# set the number of epochs for all model training
-EPOCHS = 2
+assert (
+    N_PRED_STEPS <= DATA["train_X"].shape[1]
+), "N_PRED_STEPS should be less than the sequence length."
+FORECASTING_TRAIN_SET = {
+    "X": DATA["train_X"][:, :-N_PRED_STEPS],
+    "X_pred": DATA["train_X_ori"][:, -N_PRED_STEPS:],
+}
+FORECASTING_VAL_SET = {
+    "X": DATA["val_X"][:, :-N_PRED_STEPS],
+    "X_pred": DATA["val_X_ori"][:, -N_PRED_STEPS:],
+}
+FORECASTING_TEST_SET = {
+    "X": DATA["test_X"][:, :-N_PRED_STEPS],
+    "X_pred": DATA["test_X_ori"][:, -N_PRED_STEPS:],
+}
 
 # set DEVICES to None if no cuda device is available, to avoid initialization failed while importing test classes
 n_cuda_devices = torch.cuda.device_count()
@@ -65,12 +103,6 @@
     # if having no multiple cuda devices, leave it as None to use the default device
     DEVICE = None
 
-# save the generated dataset into files for testing the lazy-loading strategy
-DATA_SAVING_DIR = "h5data_for_tests"
-H5_TRAIN_SET_PATH = f"{DATA_SAVING_DIR}/train_set.h5"
-H5_VAL_SET_PATH = f"{DATA_SAVING_DIR}/val_set.h5"
-H5_TEST_SET_PATH = f"{DATA_SAVING_DIR}/test_set.h5"
-
 
 def check_tb_and_model_checkpoints_existence(model):
     # check the tensorboard file existence
@@ -86,31 +118,23 @@ def check_tb_and_model_checkpoints_existence(model):
 
 
 if __name__ == "__main__":
-    if not os.path.exists(H5_TRAIN_SET_PATH):
-        save_dict_into_h5(
-            {
-                "X": DATA["train_X"],
-                "y": DATA["train_y"].astype(float),
-            },
-            H5_TRAIN_SET_PATH,
-        )
-
-    if not os.path.exists(H5_VAL_SET_PATH):
-        save_dict_into_h5(
-            {
-                "X": DATA["val_X"],
-                "X_ori": DATA["val_X_ori"],
-                "y": DATA["val_y"].astype(float),
-            },
-            H5_VAL_SET_PATH,
-        )
-
-    if not os.path.exists(H5_TEST_SET_PATH):
-        save_dict_into_h5(
-            {
-                "X": DATA["test_X"],
-                "X_ori": DATA["test_X_ori"],
-                "y": DATA["test_y"].astype(float),
-            },
-            H5_TEST_SET_PATH,
-        )
+    if not os.path.exists(GENERAL_H5_TRAIN_SET_PATH):
+        save_dict_into_h5(TRAIN_SET, GENERAL_H5_TRAIN_SET_PATH)
+    if not os.path.exists(GENERAL_H5_VAL_SET_PATH):
+        save_dict_into_h5(VAL_SET, GENERAL_H5_VAL_SET_PATH)
+    if not os.path.exists(GENERAL_H5_TEST_SET_PATH):
+        save_dict_into_h5(TEST_SET, GENERAL_H5_TEST_SET_PATH)
+
+    if not os.path.exists(FORECASTING_H5_TRAIN_SET_PATH):
+        save_dict_into_h5(FORECASTING_TRAIN_SET, FORECASTING_H5_TRAIN_SET_PATH)
+    if not os.path.exists(FORECASTING_H5_VAL_SET_PATH):
+        save_dict_into_h5(FORECASTING_VAL_SET, FORECASTING_H5_VAL_SET_PATH)
+    if not os.path.exists(FORECASTING_H5_TEST_SET_PATH):
+        save_dict_into_h5(FORECASTING_TEST_SET, FORECASTING_H5_TEST_SET_PATH)
+
+    logger.info(
+        f"Files under GENERAL_DATA_SAVING_DIR: {os.listdir(GENERAL_DATA_SAVING_DIR)}"
+    )
+    logger.info(
+        f"Files under FORECASTING_DATA_SAVING_DIR: {os.listdir(FORECASTING_DATA_SAVING_DIR)}"
+    )
diff --git a/tests/imputation/autoformer.py b/tests/imputation/autoformer.py
index 83610812..f68da280 100644
--- a/tests/imputation/autoformer.py
+++ b/tests/imputation/autoformer.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -112,8 +112,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-autoformer")
     def test_4_lazy_loading(self):
-        self.autoformer.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.autoformer.predict(H5_TEST_SET_PATH)
+        self.autoformer.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.autoformer.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/brits.py b/tests/imputation/brits.py
index 1e63ffa4..d69e4b1d 100644
--- a/tests/imputation/brits.py
+++ b/tests/imputation/brits.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -100,8 +100,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-brits")
     def test_4_lazy_loading(self):
-        self.brits.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.brits.predict(H5_TEST_SET_PATH)
+        self.brits.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.brits.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/crossformer.py b/tests/imputation/crossformer.py
index a6a6c55e..e33459ca 100644
--- a/tests/imputation/crossformer.py
+++ b/tests/imputation/crossformer.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -113,8 +113,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-crossformer")
     def test_4_lazy_loading(self):
-        self.crossformer.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.crossformer.predict(H5_TEST_SET_PATH)
+        self.crossformer.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.crossformer.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/csdi.py b/tests/imputation/csdi.py
index a0ee0f93..3023cea2 100644
--- a/tests/imputation/csdi.py
+++ b/tests/imputation/csdi.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -43,6 +43,7 @@ class TestCSDI(unittest.TestCase):
 
     # initialize a CSDI model
     csdi = CSDI(
+        n_steps=DATA["n_steps"],
         n_features=DATA["n_features"],
         n_layers=1,
         n_channels=8,
@@ -109,8 +110,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-csdi")
     def test_4_lazy_loading(self):
-        self.csdi.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.csdi.predict(H5_TEST_SET_PATH)
+        self.csdi.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.csdi.predict(GENERAL_H5_TEST_SET_PATH)
         imputed_X = imputation_results["imputation"]
         test_CRPS = calc_quantile_crps(
             imputed_X, DATA["test_X_ori"], DATA["test_X_indicating_mask"]
diff --git a/tests/imputation/dlinear.py b/tests/imputation/dlinear.py
index c1351305..d8cdf858 100644
--- a/tests/imputation/dlinear.py
+++ b/tests/imputation/dlinear.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -127,8 +127,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-dlinear")
     def test_4_lazy_loading(self):
-        self.dlinear.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.dlinear.predict(H5_TEST_SET_PATH)
+        self.dlinear.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.dlinear.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/etsformer.py b/tests/imputation/etsformer.py
index 87b8ce49..3ade3dfd 100644
--- a/tests/imputation/etsformer.py
+++ b/tests/imputation/etsformer.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -112,8 +112,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-etsformer")
     def test_4_lazy_loading(self):
-        self.etsformer.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.etsformer.predict(H5_TEST_SET_PATH)
+        self.etsformer.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.etsformer.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/fedformer.py b/tests/imputation/fedformer.py
index 7a6b24e5..fe563582 100644
--- a/tests/imputation/fedformer.py
+++ b/tests/imputation/fedformer.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -114,8 +114,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-fedformer")
     def test_4_lazy_loading(self):
-        self.fedformer.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.fedformer.predict(H5_TEST_SET_PATH)
+        self.fedformer.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.fedformer.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/gpvae.py b/tests/imputation/gpvae.py
index 9db47e7e..c76170e8 100644
--- a/tests/imputation/gpvae.py
+++ b/tests/imputation/gpvae.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -101,8 +101,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-gpvae")
     def test_4_lazy_loading(self):
-        self.gp_vae.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputed_X = self.gp_vae.predict(H5_TEST_SET_PATH, n_sampling_times=2)[
+        self.gp_vae.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputed_X = self.gp_vae.predict(GENERAL_H5_TEST_SET_PATH, n_sampling_times=2)[
             "imputation"
         ]
         imputed_X = imputed_X.mean(axis=1)
diff --git a/tests/imputation/informer.py b/tests/imputation/informer.py
index 6f13680b..63689b03 100644
--- a/tests/imputation/informer.py
+++ b/tests/imputation/informer.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -110,8 +110,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-informer")
     def test_4_lazy_loading(self):
-        self.informer.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.informer.predict(H5_TEST_SET_PATH)
+        self.informer.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.informer.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/locf.py b/tests/imputation/locf.py
index 38ed1ce9..b22f4b42 100644
--- a/tests/imputation/locf.py
+++ b/tests/imputation/locf.py
@@ -19,9 +19,9 @@
     DATA,
     DEVICE,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
 )
 
 
@@ -114,8 +114,8 @@ def test_0_impute(self):
 
     @pytest.mark.xdist_group(name="imputation-locf")
     def test_4_lazy_loading(self):
-        self.locf_backward.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.locf_backward.predict(H5_TEST_SET_PATH)
+        self.locf_backward.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.locf_backward.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/mean.py b/tests/imputation/mean.py
index 31747c71..04be2c9d 100644
--- a/tests/imputation/mean.py
+++ b/tests/imputation/mean.py
@@ -18,9 +18,9 @@
 from tests.global_test_config import (
     DATA,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
 )
 
 
@@ -56,8 +56,8 @@ def test_0_impute(self):
 
     @pytest.mark.xdist_group(name="imputation-mean")
     def test_4_lazy_loading(self):
-        self.mean.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.mean.predict(H5_TEST_SET_PATH)
+        self.mean.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.mean.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/median.py b/tests/imputation/median.py
index c11ab3d3..d4960449 100644
--- a/tests/imputation/median.py
+++ b/tests/imputation/median.py
@@ -18,9 +18,9 @@
 from tests.global_test_config import (
     DATA,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
 )
 
 
@@ -56,8 +56,8 @@ def test_0_impute(self):
 
     @pytest.mark.xdist_group(name="imputation-median")
     def test_4_lazy_loading(self):
-        self.median.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.median.predict(H5_TEST_SET_PATH)
+        self.median.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.median.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/mrnn.py b/tests/imputation/mrnn.py
index 4506e755..5e42e256 100644
--- a/tests/imputation/mrnn.py
+++ b/tests/imputation/mrnn.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -100,8 +100,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-mrnn")
     def test_4_lazy_loading(self):
-        self.mrnn.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.mrnn.predict(H5_TEST_SET_PATH)
+        self.mrnn.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.mrnn.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/patchtst.py b/tests/imputation/patchtst.py
index 161d3cca..fcfdff4b 100644
--- a/tests/imputation/patchtst.py
+++ b/tests/imputation/patchtst.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -114,8 +114,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-patchtst")
     def test_4_lazy_loading(self):
-        self.patchtst.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.patchtst.predict(H5_TEST_SET_PATH)
+        self.patchtst.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.patchtst.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/saits.py b/tests/imputation/saits.py
index 960e2bd4..325b28d2 100644
--- a/tests/imputation/saits.py
+++ b/tests/imputation/saits.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -112,8 +112,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-saits")
     def test_4_lazy_loading(self):
-        self.saits.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.saits.predict(H5_TEST_SET_PATH)
+        self.saits.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.saits.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/timesnet.py b/tests/imputation/timesnet.py
index 606d8747..8959cc9f 100644
--- a/tests/imputation/timesnet.py
+++ b/tests/imputation/timesnet.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -110,8 +110,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-timesnet")
     def test_4_lazy_loading(self):
-        self.timesnet.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.timesnet.predict(H5_TEST_SET_PATH)
+        self.timesnet.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.timesnet.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/transformer.py b/tests/imputation/transformer.py
index 2563680c..06839b95 100644
--- a/tests/imputation/transformer.py
+++ b/tests/imputation/transformer.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -109,8 +109,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-transformer")
     def test_4_lazy_loading(self):
-        self.transformer.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.transformer.predict(H5_TEST_SET_PATH)
+        self.transformer.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.transformer.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."
diff --git a/tests/imputation/usgan.py b/tests/imputation/usgan.py
index 934553a3..c9da6696 100644
--- a/tests/imputation/usgan.py
+++ b/tests/imputation/usgan.py
@@ -23,9 +23,9 @@
     TRAIN_SET,
     VAL_SET,
     TEST_SET,
-    H5_TRAIN_SET_PATH,
-    H5_VAL_SET_PATH,
-    H5_TEST_SET_PATH,
+    GENERAL_H5_TRAIN_SET_PATH,
+    GENERAL_H5_VAL_SET_PATH,
+    GENERAL_H5_TEST_SET_PATH,
     RESULT_SAVING_DIR_FOR_IMPUTATION,
     check_tb_and_model_checkpoints_existence,
 )
@@ -103,8 +103,8 @@ def test_3_saving_path(self):
 
     @pytest.mark.xdist_group(name="imputation-usgan")
     def test_4_lazy_loading(self):
-        self.usgan.fit(H5_TRAIN_SET_PATH, H5_VAL_SET_PATH)
-        imputation_results = self.usgan.predict(H5_TEST_SET_PATH)
+        self.usgan.fit(GENERAL_H5_TRAIN_SET_PATH, GENERAL_H5_VAL_SET_PATH)
+        imputation_results = self.usgan.predict(GENERAL_H5_TEST_SET_PATH)
         assert not np.isnan(
             imputation_results["imputation"]
         ).any(), "Output still has missing values after running impute()."