diff --git a/.github/workflows/test_full.yml b/.github/workflows/test_full.yml index 0f977ba1..49346a25 100644 --- a/.github/workflows/test_full.yml +++ b/.github/workflows/test_full.yml @@ -2,14 +2,15 @@ name: Tests Full Python on: schedule: - - cron: '4 2 * * 3' + - cron: "4 2 * * 3" + workflow_dispatch: jobs: Library: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ["3.7", "3.8", "3.9", "3.10"] os: [macos-latest, ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v2 @@ -20,15 +21,15 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - cache: 'pip' # caching pip dependencies + cache: "pip" # caching pip dependencies - name: Install MacOS dependencies run: | - brew install libomp + brew install libomp if: ${{ matrix.os == 'macos-latest' }} - name: Install dependencies run: | - python -m pip install -U pip - pip install -r prereq.txt + python -m pip install -U pip + pip install -r prereq.txt - name: Test Core run: | pip install .[testing] diff --git a/.github/workflows/test_pr.yml b/.github/workflows/test_pr.yml index bac8905e..604617c4 100644 --- a/.github/workflows/test_pr.yml +++ b/.github/workflows/test_pr.yml @@ -5,14 +5,14 @@ on: branches: [main, release] pull_request: types: [opened, synchronize, reopened] - + workflow_dispatch: jobs: Linter: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.10'] + python-version: ["3.10"] os: [ubuntu-latest] steps: - uses: actions/checkout@v2 @@ -24,19 +24,19 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install bandit pre-commit + pip install bandit pre-commit - name: pre-commit validation run: pre-commit run --all - name: Security checks run: | - bandit -r src/synthcity/* + bandit -r src/synthcity/* Library: needs: [Linter] runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.10'] + python-version: ["3.10"] os: [macos-latest, ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v2 @@ -47,15 +47,15 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - cache: 'pip' # caching pip dependencies + cache: "pip" # caching pip dependencies - name: Install MacOS dependencies run: | - brew install libomp + brew install libomp if: ${{ matrix.os == 'macos-latest' }} - name: Install dependencies run: | - python -m pip install -U pip - pip install -r prereq.txt + python -m pip install -U pip + pip install -r prereq.txt - name: Test Core run: | pip install .[testing] diff --git a/.github/workflows/test_tutorials.yml b/.github/workflows/test_tutorials.yml index c93a0c35..ba1c7a4e 100644 --- a/.github/workflows/test_tutorials.yml +++ b/.github/workflows/test_tutorials.yml @@ -6,8 +6,8 @@ on: pull_request: types: [opened, synchronize, reopened] schedule: - - cron: '2 3 * * 4' - + - cron: "2 3 * * 4" + workflow_dispatch: jobs: Tutorials: @@ -25,19 +25,19 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - cache: 'pip' # caching pip dependencies + cache: "pip" # caching pip dependencies - name: Install MacOS dependencies run: | - brew install libomp + brew install libomp if: ${{ matrix.os == 'macos-latest' }} - name: Install dependencies run: | - python -m pip install -U pip - pip install -r prereq.txt + python -m pip install -U pip + pip install -r prereq.txt - pip install .[all] + pip install .[all] - python -m pip install ipykernel - python -m ipykernel install --user + python -m pip install ipykernel + python -m ipykernel install --user - name: Run the tutorials run: python tests/nb_eval.py --nb_dir tutorials/ diff --git a/setup.cfg b/setup.cfg index caa59d0e..54f40eac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ install_requires = pandas>=1.3,<2.0 torch>=1.10.0,<2.0 numpy>=1.20 - lifelines>=0.27 + lifelines>=0.27,!= 0.27.5 opacus>=1.3 decaf-synthetic-data>=0.1.6 optuna>=3.1 diff --git a/src/synthcity/plugins/core/dataloader.py b/src/synthcity/plugins/core/dataloader.py index 2a13424b..40bee0a8 100644 --- a/src/synthcity/plugins/core/dataloader.py +++ b/src/synthcity/plugins/core/dataloader.py @@ -5,6 +5,7 @@ # third party import numpy as np +import numpy.ma as ma import pandas as pd import PIL import torch @@ -928,10 +929,21 @@ def unpack(self, as_numpy: bool = False, pad: bool = False) -> Any: self.data["outcome"], ) if as_numpy: + longest_observation_seq = max([len(seq) for seq in temporal_data]) return ( np.asarray(static_data), - np.asarray(temporal_data), - np.asarray(observation_times), + np.asarray(pd.concat(temporal_data)), + # masked array to handle variable length sequences + ma.vstack( + [ + ma.array( + np.resize(ot, longest_observation_seq), + mask=[True for i in range(len(ot))] + + [False for j in range(longest_observation_seq - len(ot))], + ) + for ot in observation_times + ] + ), np.asarray(outcome), ) return ( @@ -1289,7 +1301,6 @@ def pack_raw_data( fill: Any = np.nan, seq_offset: int = 0, ) -> pd.DataFrame: - # Temporal data: (subjects, temporal_sequence, temporal_feature) temporal_features = TimeSeriesDataLoader.unique_temporal_features(temporal_data) temporal_features, mask_features = TimeSeriesDataLoader.extract_masked_features( diff --git a/src/synthcity/plugins/core/models/layers.py b/src/synthcity/plugins/core/models/layers.py index 54ccf5d0..81c0578c 100644 --- a/src/synthcity/plugins/core/models/layers.py +++ b/src/synthcity/plugins/core/models/layers.py @@ -44,12 +44,21 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x.transpose(*self.dims) +def _get_out_features(self: nn.Module) -> int: + """Get the number of output features of a model.""" + for i in reversed(range(len(self._modules["model"]))): + if isinstance(self._modules["model"][i], nn.Linear): + return self._modules["model"][i].out_features + raise ValueError("No linear layer found in the model.") + + @validate_arguments(config=dict(arbitrary_types_allowed=True)) def _forward_skip_connection( self: nn.Module, X: torch.Tensor, *args: Any, **kwargs: Any ) -> torch.Tensor: - # if X.shape[-1] == 0: - # return torch.zeros((*X.shape[:-1], self.n_units_out)).to(self.device) + if X.shape[-1] == 0: + out_shape = _get_out_features(self) + return torch.zeros((*X.shape[:-1], out_shape)).to(self.device) X = X.float().to(self.device) out = self._forward(X, *args, **kwargs) return torch.cat([out, X], dim=-1) @@ -76,23 +85,6 @@ class Wrapper(cls): # type: ignore return Wrapper -# class GLU(nn.Module): -# """Gated Linear Unit (GLU).""" - -# def __init__(self, activation: Union[str, nn.Module] = "sigmoid") -> None: -# super().__init__() -# if type(activation) == str: -# self.non_lin = get_nonlin(activation) -# else: -# self.non_lin = activation - -# def forward(self, x: Tensor) -> Tensor: -# if x.shape[-1] % 2: -# raise ValueError("The last dimension of the input tensor must be even.") -# a, b = x.chunk(2, dim=-1) -# return a * self.non_lin(b) - - class GumbelSoftmax(nn.Module): def __init__( self, tau: float = 0.2, hard: bool = False, eps: float = 1e-10, dim: int = -1 diff --git a/src/synthcity/plugins/core/models/tabular_ddpm/gaussian_multinomial_diffsuion.py b/src/synthcity/plugins/core/models/tabular_ddpm/gaussian_multinomial_diffsuion.py index 72ff5879..6414a2af 100644 --- a/src/synthcity/plugins/core/models/tabular_ddpm/gaussian_multinomial_diffsuion.py +++ b/src/synthcity/plugins/core/models/tabular_ddpm/gaussian_multinomial_diffsuion.py @@ -157,7 +157,10 @@ def __init__( self.posterior_log_variance_clipped = ( torch.from_numpy( np.log( - np.append(self.posterior_variance[1], self.posterior_variance[1:]) + np.append( + self.posterior_variance[1].cpu(), + self.posterior_variance[1:].cpu(), + ) ) ) .float() diff --git a/src/synthcity/plugins/core/models/time_series_survival/ts_surv_dynamic_deephit.py b/src/synthcity/plugins/core/models/time_series_survival/ts_surv_dynamic_deephit.py index 062902ac..a8363799 100644 --- a/src/synthcity/plugins/core/models/time_series_survival/ts_surv_dynamic_deephit.py +++ b/src/synthcity/plugins/core/models/time_series_survival/ts_surv_dynamic_deephit.py @@ -89,7 +89,7 @@ def _merge_data( self, static: Optional[np.ndarray], temporal: np.ndarray, - observation_times: np.ndarray, + observation_times: Union[List, np.ndarray], ) -> np.ndarray: if static is None: static = np.zeros((len(temporal), 0)) @@ -121,7 +121,6 @@ def fit( ) -> TimeSeriesSurvivalPlugin: static = np.asarray(static) temporal = np.asarray(temporal) - observation_times = np.asarray(observation_times) T = np.asarray(T) E = np.asarray(E) @@ -146,7 +145,6 @@ def predict( "Predict risk" static = np.asarray(static) temporal = np.asarray(temporal) - observation_times = np.asarray(observation_times) data = self._merge_data(static, temporal, observation_times) diff --git a/src/synthcity/plugins/core/models/ts_model.py b/src/synthcity/plugins/core/models/ts_model.py index 34f6ac4c..e9fbde4f 100644 --- a/src/synthcity/plugins/core/models/ts_model.py +++ b/src/synthcity/plugins/core/models/ts_model.py @@ -493,7 +493,6 @@ def _prepare_input( ) -> Tuple: static_data = np.asarray(static_data) temporal_data = np.asarray(temporal_data) - observation_times = np.asarray(observation_times) if outcome is not None: outcome = np.asarray(outcome) @@ -519,7 +518,7 @@ def _prepare_input( ) temporal_data_t = self._check_tensor(local_temporal_data).float() local_observation_times = np.array( - observation_times[indices].tolist() + [observation_times[i] for i in indices] ).astype(float) observation_times_t = self._check_tensor(local_observation_times).float() diff --git a/tests/plugins/core/models/test_mlp.py b/tests/plugins/core/models/test_mlp.py index ac9a2db3..09b0c21a 100644 --- a/tests/plugins/core/models/test_mlp.py +++ b/tests/plugins/core/models/test_mlp.py @@ -71,9 +71,8 @@ def test_basic_network( @pytest.mark.parametrize("layer", [LinearLayer, ResidualLayer]) def test_custom_layers(layer: torch.nn.Module) -> None: X, _ = load_digits(return_X_y=True) - Xt = torch.from_numpy(X) - - mod = layer(Xt.shape[1], 10).cpu() + Xt = torch.from_numpy(X).cpu() + mod = layer(Xt.shape[1], 10, device="cpu") assert mod(Xt).shape[0] == Xt.shape[0] assert mod(Xt).shape[1] >= 10