WenjieDu · WenjieDu · Sep 21, 2023 · Sep 21, 2023 · Sep 21, 2023 · Sep 21, 2023
diff --git a/.github/workflows/testing_ci.yml b/.github/workflows/testing_ci.yml
@@ -15,43 +15,60 @@ jobs:
  runs-on: ${{ matrix.os }}
  defaults:
  run:
- shell: bash -l {0}
+ shell: bash {0}
  strategy:
  fail-fast: false
  matrix:
  os: [ubuntu-latest, windows-latest, macOS-latest]
- python-version: ["3.7", "3.8", "3.9", "3.10"]
+ python-version: ["3.7", "3.10"]
+ torch-version: ["1.13.1"]
 
  steps:
  - name: Check out the repo code
  uses: actions/checkout@v3
 
- - name: Set up Conda
- uses: conda-incubator/setup-miniconda@v2
+ - name: Determine the Python version
+ uses: haya14busa/action-cond@v1
+ id: condval
  with:
- activate-environment: pypots-test
- python-version: ${{ matrix.python-version }}
- environment-file: tests/environment_for_conda_test.yml
- auto-activate-base: false
+ cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
+ # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
+ if_true: "3.7.16"
+ if_false: ${{ matrix.python-version }}
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ steps.condval.outputs.value }}
+ check-latest: true
+ cache: pip
+ cache-dependency-path: |
+ setup.cfg
+
+ - name: Install PyTorch ${{ matrix.torch-version }}+cpu
+ # we have to install torch in advance because torch_sparse needs it for compilation,
+ # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
+ run: |
+ which python
+ which pip
+ python -m pip install --upgrade pip
+ pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
+ python -c "import torch; print('PyTorch:', torch.__version__)"
+
+ - name: Install other dependencies
+ run: |
+ pip install pypots
+ pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
+ pip install -e ".[dev]"
 
  - name: Fetch the test environment details
  run: |
  which python
- conda info
- conda list
+ pip list
 
  - name: Test with pytest
  run: |
- # run tests separately here due to Segmentation Fault in test_clustering when run all in
- # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
- python -m pytest -rA tests/test_classification.py -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_imputation.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_clustering.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_forecasting.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_optim.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_data.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_utils.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
- python -m pytest -rA tests/test_cli.py -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ coverage run --source=pypots -m pytest -rA tests/*/*
 
  - name: Generate the LCOV report
  run: |
@@ -61,4 +78,4 @@ jobs:
  uses: coverallsapp/github-action@master
  with:
  github-token: ${{ secrets.GITHUB_TOKEN }}
- path-to-lcov: 'coverage.lcov'
+ path-to-lcov: "coverage.lcov"
diff --git a/.github/workflows/testing_daily.yml b/.github/workflows/testing_daily.yml
@@ -10,61 +10,43 @@ jobs:
  runs-on: ${{ matrix.os }}
  defaults:
  run:
- shell: bash {0}
+ shell: bash -l {0}
  strategy:
  fail-fast: false
  matrix:
  os: [ubuntu-latest, windows-latest, macOS-latest]
- python-version: ["3.7", "3.8", "3.9", "3.10"]
- torch-version: ["1.13.1"]
+ python-version: ["3.7", "3.10"]
 
  steps:
  - name: Check out the repo code
  uses: actions/checkout@v3
 
- - name: Determine the Python version
- uses: haya14busa/action-cond@v1
- id: condval
+ - name: Set up Conda
+ uses: conda-incubator/setup-miniconda@v2
  with:
- cond: ${{ matrix.python-version == 3.7 && matrix.os == 'macOS-latest' }}
- # Note: the latest 3.7 subversion 3.7.17 for MacOS has "ModuleNotFoundError: No module named '_bz2'"
- if_true: "3.7.16"
- if_false: ${{ matrix.python-version }}
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: ${{ steps.condval.outputs.value }}
- check-latest: true
- cache: pip
- cache-dependency-path: |
- setup.cfg
-
- - name: Install PyTorch ${{ matrix.torch-version }}+cpu
- # we have to install torch in advance because torch_sparse needs it for compilation,
- # refer to https://github.com/rusty1s/pytorch_sparse/issues/156#issuecomment-1304869772 for details
- run: |
- which python
- which pip
- python -m pip install --upgrade pip
- pip install torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cpu
- python -c "import torch; print('PyTorch:', torch.__version__)"
-
- - name: Install other dependencies
- run: |
- pip install pypots
- pip install torch-geometric torch-scatter torch-sparse -f "https://data.pyg.org/whl/torch-${{ matrix.torch-version }}+cpu.html"
- pip install -e ".[dev]"
+ activate-environment: pypots-test
+ python-version: ${{ matrix.python-version }}
+ environment-file: tests/environment_for_conda_test.yml
+ auto-activate-base: false
 
  - name: Fetch the test environment details
  run: |
  which python
- pip list
+ conda info
+ conda list
 
  - name: Test with pytest
  run: |
- coverage run --source=pypots -m pytest --ignore tests/test_training_on_multi_gpus.py
- # ignore the test_training_on_multi_gpus.py because it requires multiple GPUs which are not available on GitHub Actions
+ # run tests separately here due to Segmentation Fault in test_clustering when run all in
+ # one command with `pytest` on MacOS. Bugs not caught, so this is a trade-off to avoid SF.
+ python -m pytest -rA tests/classification/* -n auto --cov=pypots --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/imputation/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/clustering/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/forecasting/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/optim/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/data/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/utils/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
+ python -m pytest -rA tests/cli/* -n auto --cov=pypots --cov-append --dist=loadgroup --cov-config=.coveragerc
 
  - name: Generate the LCOV report
  run: |
@@ -74,4 +56,4 @@ jobs:
  uses: coverallsapp/github-action@master
  with:
  github-token: ${{ secrets.GITHUB_TOKEN }}
- path-to-lcov: "coverage.lcov"
+ path-to-lcov: 'coverage.lcov'
diff --git a/docs/pypots.forecasting.rst b/docs/pypots.forecasting.rst
@@ -1,11 +1,31 @@
 pypots.forecasting package
 ==========================
 
+Subpackages
+-----------
 
-pypots.forecasting.bttf module
+.. toctree::
+ :maxdepth: 4
+
+ pypots.forecasting.bttf
+ pypots.forecasting.template
+
+Submodules
+----------
+
+pypots.forecasting.base module
 ------------------------------
 
-.. automodule:: pypots.forecasting.bttf
+.. automodule:: pypots.forecasting.base
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :inherited-members:
+
+Module contents
+---------------
+
+.. automodule:: pypots.forecasting
  :members:
  :undoc-members:
  :show-inheritance:

diff --git a/pypots/classification/grud/data.py b/pypots/classification/grud/data.py
@@ -123,7 +123,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
  if self.file_handle is None:
  self.file_handle = self._open_file_handle()
 
- X = torch.from_numpy(self.file_handle["X"][idx])
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
  missing_mask = (~torch.isnan(X)).to(torch.float32)
  X_filledLOCF = self.locf._locf_torch(X.unsqueeze(dim=0)).squeeze()
  X = torch.nan_to_num(X)

diff --git a/pypots/classification/template/dataset.py → pypots/classification/template/data.py b/pypots/classification/template/dataset.py → pypots/classification/template/data.py
diff --git a/pypots/clustering/template/dataset.py → pypots/clustering/template/data.py b/pypots/clustering/template/dataset.py → pypots/clustering/template/data.py
diff --git a/pypots/clustering/vader/data.py b/pypots/clustering/vader/data.py
@@ -6,12 +6,12 @@
 # License: GLP-v3
 
 
-from typing import Union
+from typing import Union, Iterable
 
-from ..crli.data import DatasetForCRLI
+from ...data.base import BaseDataset
 
 
-class DatasetForVaDER(DatasetForCRLI):
+class DatasetForVaDER(BaseDataset):
  """Dataset class for model VaDER.
 
  Parameters
@@ -45,3 +45,9 @@ def __init__(
  file_type: str = "h5py",
  ):
  super().__init__(data, return_labels, file_type)
+
+ def _fetch_data_from_array(self, idx: int) -> Iterable:
+ return super()._fetch_data_from_array(idx)
+
+ def _fetch_data_from_file(self, idx: int) -> Iterable:
+ return super()._fetch_data_from_file(idx)
diff --git a/pypots/data/base.py b/pypots/data/base.py
@@ -204,13 +204,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
  The collated data sample, a list including all necessary sample info.
  """
 
- X = self.X[idx]
- missing_mask = ~torch.isnan(X)
+ X = self.X[idx].to(torch.float32)
+ missing_mask = (~torch.isnan(X)).to(torch.float32)
  X = torch.nan_to_num(X)
  sample = [
  torch.tensor(idx),
- X.to(torch.float32),
- missing_mask.to(torch.float32),
+ X,
+ missing_mask,
  ]
 
  if self.y is not None and self.return_labels:
@@ -279,13 +279,13 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
  if self.file_handle is None:
  self.file_handle = self._open_file_handle()
 
- X = torch.from_numpy(self.file_handle["X"][idx])
- missing_mask = ~torch.isnan(X)
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
+ missing_mask = (~torch.isnan(X)).to(torch.float32)
  X = torch.nan_to_num(X)
  sample = [
  torch.tensor(idx),
- X.to(torch.float32),
- missing_mask.to(torch.float32),
+ X,
+ missing_mask,
  ]
 
  # if the dataset has labels and is for training, then fetch it from the file

diff --git a/pypots/data/saving.py b/pypots/data/saving.py
@@ -14,7 +14,11 @@
 from pypots.utils.logging import logger
 
 
-def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
+def save_dict_into_h5(
+ data_dict: dict,
+ saving_dir: str,
+ saving_name: str = "datasets.h5",
+) -> None:
  """Save the given data (in a dictionary) into the given h5 file.
 
  Parameters
@@ -25,6 +29,9 @@ def save_dict_into_h5(data_dict: dict, saving_dir: str) -> None:
  saving_dir : str,
  The h5 file to save the data.
 
+ saving_name : str, optional (default="datasets.h5")
+ The final name of the saved h5 file.
+
  """
 
  def save_set(handle, name, data):
@@ -36,7 +43,7 @@ def save_set(handle, name, data):
  handle.create_dataset(name, data=data)
 
  create_dir_if_not_exist(saving_dir)
- saving_path = os.path.join(saving_dir, "datasets.h5")
+ saving_path = os.path.join(saving_dir, saving_name)
  with h5py.File(saving_path, "w") as hf:
  for k, v in data_dict.items():
  save_set(hf, k, v)

diff --git a/pypots/forecasting/template/dataset.py → pypots/forecasting/template/data.py b/pypots/forecasting/template/dataset.py → pypots/forecasting/template/data.py
diff --git a/pypots/imputation/brits/data.py b/pypots/imputation/brits/data.py
@@ -59,14 +59,14 @@ def __init__(
 
  self.processed_data = {
  "forward": {
- "X": forward_X,
- "missing_mask": forward_missing_mask,
- "delta": forward_delta,
+ "X": forward_X.to(torch.float32),
+ "missing_mask": forward_missing_mask.to(torch.float32),
+ "delta": forward_delta.to(torch.float32),
  },
  "backward": {
- "X": backward_X,
- "missing_mask": backward_missing_mask,
- "delta": backward_delta,
+ "X": backward_X.to(torch.float32),
+ "missing_mask": backward_missing_mask.to(torch.float32),
+ "delta": backward_delta.to(torch.float32),
  },
  }
 
@@ -101,13 +101,13 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
  sample = [
  torch.tensor(idx),
  # for forward
- self.processed_data["forward"]["X"][idx].to(torch.float32),
- self.processed_data["forward"]["missing_mask"][idx].to(torch.float32),
- self.processed_data["forward"]["delta"][idx].to(torch.float32),
+ self.processed_data["forward"]["X"][idx],
+ self.processed_data["forward"]["missing_mask"][idx],
+ self.processed_data["forward"]["delta"][idx],
  # for backward
- self.processed_data["backward"]["X"][idx].to(torch.float32),
- self.processed_data["backward"]["missing_mask"][idx].to(torch.float32),
- self.processed_data["backward"]["delta"][idx].to(torch.float32),
+ self.processed_data["backward"]["X"][idx],
+ self.processed_data["backward"]["missing_mask"][idx],
+ self.processed_data["backward"]["delta"][idx],
  ]
 
  if self.y is not None and self.return_labels:
@@ -133,7 +133,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
  if self.file_handle is None:
  self.file_handle = self._open_file_handle()
 
- X = torch.from_numpy(self.file_handle["X"][idx])
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
  missing_mask = (~torch.isnan(X)).to(torch.float32)
  X = torch.nan_to_num(X)
 

diff --git a/pypots/imputation/gpvae/data.py b/pypots/imputation/gpvae/data.py
@@ -10,7 +10,6 @@
 import torch
 
 from ...data.base import BaseDataset
-from ...data.utils import torch_parse_delta
 
 
 class DatasetForGPVAE(BaseDataset):
@@ -51,7 +50,7 @@ def __init__(
  if not isinstance(self.data, str):
  # calculate all delta here.
  missing_mask = (~torch.isnan(self.X)).type(torch.float32)
- X = torch.nan_to_num(self.X)
+ X = torch.nan_to_num(self.X).to(torch.float32)
 
  self.processed_data = {
  "X": X,
@@ -89,8 +88,8 @@ def _fetch_data_from_array(self, idx: int) -> Iterable:
  sample = [
  torch.tensor(idx),
  # for forward
- self.processed_data["X"][idx].to(torch.float32),
- self.processed_data["missing_mask"][idx].to(torch.float32),
+ self.processed_data["X"][idx],
+ self.processed_data["missing_mask"][idx],
  ]
 
  if self.y is not None and self.return_labels:
@@ -116,7 +115,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
  if self.file_handle is None:
  self.file_handle = self._open_file_handle()
 
- X = torch.from_numpy(self.file_handle["X"][idx])
+ X = torch.from_numpy(self.file_handle["X"][idx]).to(torch.float32)
  missing_mask = (~torch.isnan(X)).to(torch.float32)
  X = torch.nan_to_num(X)