WenjieDu · WenjieDu · Apr 18, 2024 · Apr 16, 2024 · Apr 16, 2024 · Apr 17, 2024
diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml
@@ -78,8 +78,8 @@ jobs:
 
  - name: Test with pytest
  run: |
- python tests/global_test_config.py
  rm -rf testing_results && rm -rf tests/__pycache__ && rm -rf tests/*/__pycache__
+ python tests/global_test_config.py
  python -m pytest -rA tests/*/* -s -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
 
  - name: Generate the LCOV report

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -31,6 +31,9 @@ build:
  - pip install ./TSDB_repo && pip install ./PyGrinder_repo && pip install .
 
  post_install:
+ # To fix the exception: This documentation is not using `furo.css` as the stylesheet.
+ # If you have set `html_style` in your conf.py file, remove it.
+ - pip install sphinx==7.2.6
+ # this docutils version fixes issue#102, put it in post_install to avoid being
+ # overwritten by other versions (like 0.19) while installing other packages
  - pip install docutils==0.20
- # this version fixes issue#102, put it in post_install to avoid being
- # overwritten by other versions (like 0.19) while installing other packages
diff --git a/README.md b/README.md
@@ -228,6 +228,7 @@ the same as we did in [SAITS paper](https://arxiv.org/pdf/2202.08516).**
 | Neural Net | VaDER | Variational Deep Embedding with Recurrence [^7] | 2019 |
 | ***`Forecasting`*** | 🚥 | 🚥 | 🚥 |
 | **Type** | **Abbr.** | **Full name of the algorithm/model/paper** | **Year** |
+| Neural Net | CSDI | Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation [^12] | 2021 |
 | Probabilistic | BTTF | Bayesian Temporal Tensor Factorization [^8] | 2021 |
 
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -193,7 +193,7 @@ Imputation Neural Net FEDformer (Frequency Enhanced De
 Imputation Neural Net Informer (Beyond Efficient Transformer for Long Sequence Time-Series Forecasting) 2021 :cite:`zhou2021informer`
 Imputation Neural Net Autoformer (Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting) 2021 :cite:`wu2021autoformer`
 Imputation Neural Net US-GAN (Unsupervised GAN for Multivariate Time Series Imputation) 2021 :cite:`miao2021SSGAN`
-Imputation  Neural Net CSDI (Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation) 2021 :cite:`tashiro2021csdi`
+Imputation, Forecasting Neural Net CSDI (Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation) 2021 :cite:`tashiro2021csdi`
 Imputation Neural Net GP-VAE (Gaussian Process Variational Autoencoder) 2020 :cite:`fortuin2020gpvae`
 Imputation, Classification Neural Net BRITS (Bidirectional Recurrent Imputation for Time Series) 2018 :cite:`cao2018BRITS`
 Imputation Neural Net M-RNN (Multi-directional Recurrent Neural Network) 2019 :cite:`yoon2019MRNN`

diff --git a/docs/pypots.data.rst b/docs/pypots.data.rst
@@ -1,10 +1,10 @@
 pypots.data package
 ===================
 
-pypots.data.base
+pypots.data.dataset
 -----------------------
 
-.. automodule:: pypots.data.base
+.. automodule:: pypots.data.dataset
  :members:
  :undoc-members:
  :show-inheritance:

diff --git a/pypots/base.py b/pypots/base.py
@@ -337,13 +337,13 @@ def fit(
  self,
  train_set: Union[dict, str],
  val_set: Optional[Union[dict, str]] = None,
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> None:
  """Train the classifier on the given data.
 
  Parameters
  ----------
- train_set : dict or str
+ train_set :
  The dataset for model training, should be a dictionary including keys as 'X',
  or a path string locating a data file supported by PyPOTS (e.g. h5 file).
  If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -352,7 +352,7 @@ def fit(
  If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
  key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
- val_set : dict or str
+ val_set :
  The dataset for model validating, should be a dictionary including keys as 'X',
  or a path string locating a data file supported by PyPOTS (e.g. h5 file).
  If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -361,7 +361,7 @@ def fit(
  If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
  key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
- file_type : str
+ file_type :
  The type of the given file if train_set and val_set are path strings.
 
  """
@@ -371,13 +371,13 @@ def fit(
  def predict(
  self,
  test_set: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> dict:
  """Make predictions for the input data with the trained model.
 
  Parameters
  ----------
- test_set : dict or str
+ test_set :
  The dataset for model validating, should be a dictionary including keys as 'X',
  or a path string locating a data file supported by PyPOTS (e.g. h5 file).
  If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -386,12 +386,12 @@ def predict(
  If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
  key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
- file_type : str
+ file_type :
  The type of the given file if test_set is a path string.
 
  Returns
  -------
- result_dict: dict
+ result_dict :
  Prediction results in a Python Dictionary for the given samples.
  It should be a dictionary including keys as 'imputation', 'classification', 'clustering', and 'forecasting'.
  For sure, only the keys that relevant tasks are supported by the model will be returned.
@@ -512,14 +512,14 @@ def fit(
  self,
  train_set: Union[dict, str],
  val_set: Optional[Union[dict, str]] = None,
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> None:
  raise NotImplementedError
 
  @abstractmethod
  def predict(
  self,
  test_set: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> dict:
  raise NotImplementedError
diff --git a/pypots/classification/base.py b/pypots/classification/base.py
@@ -72,7 +72,7 @@ def fit(
  self,
  train_set: Union[dict, str],
  val_set: Optional[Union[dict, str]] = None,
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> None:
  """Train the classifier on the given data.
 
@@ -106,15 +106,15 @@ def fit(
  def predict(
  self,
  test_set: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> dict:
  raise NotImplementedError
 
  @abstractmethod
  def classify(
  self,
  X: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> np.ndarray:
  """Classify the input data with the trained model.
 
@@ -214,12 +214,12 @@ def __init__(
  self.n_classes = n_classes
 
  @abstractmethod
- def _assemble_input_for_training(self, data) -> dict:
+ def _assemble_input_for_training(self, data: list) -> dict:
  """Assemble the given data into a dictionary for training input.
 
  Parameters
  ----------
- data : list,
+ data :
  Input data from dataloader, should be list.
 
  Returns
@@ -230,12 +230,12 @@ def _assemble_input_for_training(self, data) -> dict:
  raise NotImplementedError
 
  @abstractmethod
- def _assemble_input_for_validating(self, data) -> dict:
+ def _assemble_input_for_validating(self, data: list) -> dict:
  """Assemble the given data into a dictionary for validating input.
 
  Parameters
  ----------
- data : list,
+ data :
  Data output from dataloader, should be list.
 
  Returns
@@ -246,7 +246,7 @@ def _assemble_input_for_validating(self, data) -> dict:
  raise NotImplementedError
 
  @abstractmethod
- def _assemble_input_for_testing(self, data) -> dict:
+ def _assemble_input_for_testing(self, data: list) -> dict:
  """Assemble the given data into a dictionary for testing input.
 
  Notes
@@ -259,7 +259,7 @@ def _assemble_input_for_testing(self, data) -> dict:
 
  Parameters
  ----------
- data : list,
+ data :
  Data output from dataloader, should be list.
 
  Returns
@@ -386,7 +386,7 @@ def fit(
  self,
  train_set: Union[dict, str],
  val_set: Optional[Union[dict, str]] = None,
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> None:
  """Train the classifier on the given data.
 
@@ -420,15 +420,15 @@ def fit(
  def predict(
  self,
  test_set: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> dict:
  raise NotImplementedError
 
  @abstractmethod
  def classify(
  self,
  X: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> np.ndarray:
  """Classify the input data with the trained model.
 

diff --git a/pypots/classification/brits/data.py b/pypots/classification/brits/data.py
@@ -17,7 +17,7 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
 
  Parameters
  ----------
- data : dict or str,
+ data :
  The dataset for model input, should be a dictionary including keys as 'X' and 'y',
  or a path string locating a data file.
  If it is a dict, X should be array-like of shape [n_samples, sequence length (time steps), n_features],
@@ -26,7 +26,7 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
  If it is a path string, the path should point to a data file, e.g. a h5 file, which contains
  key-value pairs like a dict, and it has to include keys as 'X' and 'y'.
 
- return_labels : bool, default = True,
+ return_y :
  Whether to return labels in function __getitem__() if they exist in the given data. If `True`, for example,
  during training of classification models, the Dataset class will return labels in __getitem__() for model input.
  Otherwise, labels won't be included in the data returned by __getitem__(). This parameter exists because we
@@ -35,14 +35,19 @@ class DatasetForBRITS(DatasetForBRITS_Imputation):
  with function _fetch_data_from_file(), which works for all three stages. Therefore, we need this parameter for
  distinction.
 
- file_type : str, default = "h5py"
+ file_type :
  The type of the given file if train_set and val_set are path strings.
  """
 
  def __init__(
  self,
  data: Union[dict, str],
- return_labels: bool = True,
- file_type: str = "h5py",
+ return_y: bool = True,
+ file_type: str = "hdf5",
  ):
- super().__init__(data, False, return_labels, file_type)
+ super().__init__(
+ data=data,
+ return_X_ori=False,
+ return_y=return_y,
+ file_type=file_type,
+ )
diff --git a/pypots/classification/brits/model.py b/pypots/classification/brits/model.py
@@ -208,7 +208,7 @@ def fit(
  self,
  train_set: Union[dict, str],
  val_set: Optional[Union[dict, str]] = None,
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> None:
  # Step 1: wrap the input data with classes Dataset and DataLoader
  training_set = DatasetForBRITS(train_set, file_type=file_type)
@@ -239,10 +239,10 @@ def fit(
  def predict(
  self,
  test_set: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> dict:
  self.model.eval() # set the model as eval status to freeze it.
- test_set = DatasetForBRITS(test_set, return_labels=False, file_type=file_type)
+ test_set = DatasetForBRITS(test_set, return_y=False, file_type=file_type)
  test_loader = DataLoader(
  test_set,
  batch_size=self.batch_size,
@@ -267,7 +267,7 @@ def predict(
  def classify(
  self,
  X: Union[dict, str],
- file_type: str = "h5py",
+ file_type: str = "hdf5",
  ) -> np.ndarray:
  """Classify the input data with the trained model.