make release-tag: Merge branch 'master' into stable

sdv-dev · Sep 9, 2022 · 61adde8 · 61adde8
2 parents d728f5f + 1048531
commit 61adde8
Show file tree

Hide file tree

Showing 67 changed files with 970 additions and 1,129 deletions.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -2,7 +2,7 @@
 name: Feature request
 about: Request a new feature that you would like to see implemented in SDV
 title: ''
-labels: new feature, new
+labels: feature request, new
 assignees: ''
 
 ---

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
@@ -1,16 +1,17 @@
 name: Integration Tests
 
 on:
-  - push
-  - pull_request
+  push:
+  pull_request:
+    types: [opened, reopened]
 
 jobs:
   unit:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8, 3.9]
-        os: [ubuntu-latest, macos-10.15, windows-latest]
+        os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -1,8 +1,9 @@
 name: Style Checks
 
 on:
-  - push
-  - pull_request
+  push:
+  pull_request:
+    types: [opened, reopened]
 
 jobs:
   lint:

diff --git a/.github/workflows/minimum.yml b/.github/workflows/minimum.yml
@@ -1,16 +1,17 @@
 name: Unit Tests Minimum Versions
 
 on:
-  - push
-  - pull_request
+  push:
+  pull_request:
+    types: [opened, reopened]
 
 jobs:
   minimum:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8, 3.9]
-        os: [ubuntu-latest, macos-10.15, windows-latest]
+        os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/readme.yml b/.github/workflows/readme.yml
@@ -1,16 +1,17 @@
 name: Test README
 
 on:
-  - push
-  - pull_request
+  push:
+  pull_request:
+    types: [opened, reopened]
 
 jobs:
   readme:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8, 3.9]
-        os: [ubuntu-latest, macos-10.15]   # skip windows bc rundoc fails
+        os: [ubuntu-latest, macos-latest]   # skip windows bc rundoc fails
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
@@ -1,16 +1,17 @@
 name: Run Tutorials
 
 on:
-  - push
-  - pull_request
+  push:
+  pull_request:
+    types: [opened, reopened]
 
 jobs:
   tutorials:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8, 3.9]
-        os: [ubuntu-latest, macos-10.15, windows-latest]
+        os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
@@ -34,5 +35,9 @@ jobs:
       run: python -m pip install pywinpty==2.0.1
     - name: Install package and dependencies
       run: pip install invoke jupyter .
+
+    - if: matrix.python-version != 3.6
+      name: Install NBConvert
+      run: pip install nbconvert==6.4.5 nbformat==5.4.0
     - name: invoke tutorials
       run: invoke tutorials
diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml
@@ -1,16 +1,17 @@
 name: Unit Tests
 
 on:
-  - push
-  - pull_request
+  push:
+  pull_request:
+    types: [opened, reopened]
 
 jobs:
   unit:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8, 3.9]
-        os: [ubuntu-latest, macos-10.15, windows-latest]
+        os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/HISTORY.md b/HISTORY.md
@@ -1,5 +1,28 @@
 # Release Notes
 
+## 0.17.0 - 2022-09-09
+
+This release updates the code to use RDT version 1.2.0 and greater, so that those new features are now available in SDV. This changes the transformers that are available in SDV models to be those that are in RDT version 1.2.0. As a result, some arguments for initializing models have changed.
+
+Additionally, this release fixes bugs related to loading models with custom constraints. It also fixes a bug that added `NaNs` to the index of sampled data when using `sample_remaining_columns`.
+
+### Bugs Fixed
+
+* Incorrect rounding in Custom Constraint example - Issue [#941](https://github.com/sdv-dev/SDV/issues/941) by @amontanez24
+* Can't save the model if use the custom constraint - Issue [#928](https://github.com/sdv-dev/SDV/issues/928) by @pvk-developer
+* User Guide code fixes - Issue [#983](https://github.com/sdv-dev/SDV/issues/983) by @amontanez24
+* Index contains NaNs when using sample_remaining_columns - Issue [#985](https://github.com/sdv-dev/SDV/issues/985) by @amontanez24
+* Cannot sample after loading a model with custom constraints: TypeError - Issue [#984](https://github.com/sdv-dev/SDV/issues/984) by @pvk-developer
+* Set HyperTransformer config manually, based on Metadata if given - Issue [#982](https://github.com/sdv-dev/SDV/issues/982) by @pvk-developer
+
+### New Features
+
+* Change default metrics for evaluate - Issue [#949](https://github.com/sdv-dev/SDV/issues/949) by @fealho
+
+### Maintenance
+
+* Update the RDT version to 1.0 - Issue [#897](https://github.com/sdv-dev/SDV/issues/897) by @pvk-developer
+
 ## 0.16.0 - 2022-07-21
 
 This release brings user friendly improvements and bug fixes on the `SDV` constraints, to help

diff --git a/Makefile b/Makefile
@@ -134,7 +134,7 @@ test-tutorials: ## run the tutorial notebooks
 	invoke tutorials
 
 .PHONY: test
-test: test-unit test-readme test-tutorials ## test everything that needs test dependencies
+test: test-unit test-integration test-readme test-tutorials ## test everything that needs test dependencies
 
 .PHONY: test-all
 test-all: ## run tests on every Python version with tox

diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -1,5 +1,5 @@
 {% set name = 'sdv' %}
-{% set version = '0.16.0' %}
+{% set version = '0.17.0.dev3' %}
 
 package:
   name: "{{ name|lower }}"
@@ -19,29 +19,29 @@ requirements:
     - pytest-runner
     - graphviz
     - python >=3.6,<3.10
-    - faker >=3.0.0,<10
+    - faker >=10,<15
     - python-graphviz >=0.13.2,<1
     - numpy >=1.18.0,<2
     - pandas >=1.1.3,<2
     - tqdm >=4.15,<5
-    - copulas >=0.6.0,<0.7
-    - ctgan >=0.5.0,<0.6
+    - copulas >=0.7.0,<0.8
+    - ctgan >=0.5.2,<0.6
     - deepecho >=0.3.0.post1,<0.4
-    - rdt >=0.6.1,<0.7
-    - sdmetrics >=0.4.1,<0.5
+    - rdt >=1.2.0,<2
+    - sdmetrics >=0.6.0,<0.7
   run:
     - graphviz
     - python >=3.6,<3.10
-    - faker >=3.0.0,<10
+    - faker >=10,<15
     - python-graphviz >=0.13.2,<1
     - numpy >=1.18.0,<2
     - pandas >=1.1.3,<2
     - tqdm >=4.15,<5
-    - copulas >=0.6.0,<0.7
-    - ctgan >=0.5.0,<0.6
+    - copulas >=0.7.0,<0.8
+    - ctgan >=0.5.2,<0.6
     - deepecho >=0.3.0.post1,<0.4
-    - rdt >=0.6.1,<0.7
-    - sdmetrics >=0.4.1,<0.5
+    - rdt >=1.2.0,<2
+    - sdmetrics >=0.6.0,<0.7
 
 about:
   home: "https://sdv.dev"

diff --git a/docs/api_reference/metrics/relational.rst b/docs/api_reference/metrics/relational.rst
@@ -35,12 +35,9 @@ Multi Table Statistical Metrics
     CSTest
     CSTest.get_subclasses
     CSTest.compute
-    KSTest
-    KSTest.get_subclasses
-    KSTest.compute
-    KSTestExtended
-    KSTestExtended.get_subclasses
-    KSTestExtended.compute
+    KSComplement
+    KSComplement.get_subclasses
+    KSComplement.compute
 
 Multi Table Detection Metrics
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/docs/api_reference/metrics/tabular.rst b/docs/api_reference/metrics/tabular.rst
@@ -37,12 +37,9 @@ Single Table Statistical Metrics
     CSTest
     CSTest.get_subclasses
     CSTest.compute
-    KSTest
-    KSTest.get_subclasses
-    KSTest.compute
-    KSTestExtended
-    KSTestExtended.get_subclasses
-    KSTestExtended.compute
+    KSComplement
+    KSComplement.get_subclasses
+    KSComplement.compute
     ContinuousKLDivergence
     ContinuousKLDivergence.get_subclasses
     ContinuousKLDivergence.compute

diff --git a/docs/developer_guides/sdv/tabular.rst b/docs/developer_guides/sdv/tabular.rst
@@ -58,8 +58,8 @@ A part from the previous steps, the ``BaseTabularModel`` also offers a couple of
 functionalities:
 
 * ``get_metadata``: Returns the Table metadata object that has been fitted to the data.
-* ``save``: Saves the complete Tabular Model in a file using ``pickle``.
-* ``load``: Loads a previously saved model from a ``pickle`` file.
+* ``save``: Saves the complete Tabular Model in a file using ``cloudpickle``.
+* ``load``: Loads a previously saved model from a ``cloudpickle`` file.
 
 Implementing a Tabular Model
 ----------------------------

diff --git a/docs/user_guides/evaluation/evaluation_framework.rst b/docs/user_guides/evaluation/evaluation_framework.rst
@@ -98,21 +98,21 @@ are included within the SDV Evaluation framework. However, the list of
 metrics that are applied can be controlled by passing a list with the
 names of the metrics that you want to apply.
 
-For example, if you were interested on obtaining only the ``CSTest`` and
-``KSTest`` metrics you can call the ``evaluate`` function as follows:
+For example, if you were interested on obtaining only the ``CSTest``
+metric you can call the ``evaluate`` function as follows:
 
 .. ipython:: python
     :okwarning:
 
-    evaluate(synthetic_data, real_data, metrics=['CSTest', 'KSTest'])
+    evaluate(synthetic_data, real_data, metrics=['CSTest'])
 
 
 Or, if we want to see the scores separately:
 
 .. ipython:: python
     :okwarning:
 
-    evaluate(synthetic_data, real_data, metrics=['CSTest', 'KSTest'], aggregate=False)
+    evaluate(synthetic_data, real_data, metrics=['CSTest'], aggregate=False)
 
 
 For more details about all the metrics that exist for the different data modalities

diff --git a/docs/user_guides/evaluation/multi_table_metrics.rst b/docs/user_guides/evaluation/multi_table_metrics.rst
@@ -153,21 +153,20 @@ report back the average score obtained.
 The list of such metrics is:
 
 * ``CSTest``: Multi Single Table metric based on the Single Table CSTest metric.
-* ``KSTest``: Multi Single Table metric based on the Single Table KSTest metric.
-* ``KSTestExtended``: Multi Single Table metric based on the Single Table KSTestExtended metric.
+* ``KSComplement``: Multi Single Table metric based on the Single Table KSComplement metric.
 * ``LogisticDetection``: Multi Single Table metric based on the Single Table LogisticDetection metric.
 * ``SVCDetection``: Multi Single Table metric based on the Single Table SVCDetection metric.
 * ``BNLikelihood``: Multi Single Table metric based on the Single Table BNLikelihood metric.
 * ``BNLogLikelihood``: Multi Single Table metric based on the Single Table BNLogLikelihood metric.
 
-Let's try to use the ``KSTestExtended`` metric:
+Let's try to use the ``KSComplement`` metric:
 
 .. ipython::
     :verbatim:
 
-    In [6]: from sdv.metrics.relational import KSTestExtended
+    In [6]: from sdv.metrics.relational import KSComplement
 
-    In [7]: KSTestExtended.compute(real_data, synthetic_data)
+    In [7]: KSComplement.compute(real_data, synthetic_data)
     Out[7]: 0.8194444444444443
 
 Parent Child Detection Metrics

diff --git a/docs/user_guides/evaluation/single_table_metrics.rst b/docs/user_guides/evaluation/single_table_metrics.rst
@@ -136,7 +136,7 @@ outcome from the test.
 
 Such metrics are:
 
-* ``sdv.metrics.tabular.KSTest``: This metric uses the two-sample Kolmogorov–Smirnov test
+* ``sdv.metrics.tabular.KSComplement``: This metric uses the two-sample Kolmogorov–Smirnov test
   to compare the distributions of continuous columns using the empirical CDF.
   The output for each column is 1 minus the KS Test D statistic, which indicates the maximum
   distance between the expected CDF and the observed CDF values.
@@ -150,16 +150,16 @@ Let us execute these two metrics on the loaded data:
 .. ipython::
     :verbatim:
 
-    In [6]: from sdv.metrics.tabular import CSTest, KSTest
+    In [6]: from sdv.metrics.tabular import CSTest, KSComplement
 
     In [7]: CSTest.compute(real_data, synthetic_data)
     Out[7]: 0.8078084931103922
 
-    In [8]: KSTest.compute(real_data, synthetic_data)
+    In [8]: KSComplement.compute(real_data, synthetic_data)
     Out[8]: 0.6372093023255814
 
 In each case, the statistical test will be executed on all the compatible column (so, categorical
-or boolean columns for ``CSTest`` and numerical columns for ``KSTest``), and report the average
+or boolean columns for ``CSTest`` and numerical columns for ``KSComplement``), and report the average
 score obtained.
 
 .. note:: If your table does not contain any column of the compatible type, the output of
@@ -173,11 +173,11 @@ metric classes or their names:
 
     In [9]: from sdv.evaluation import evaluate
 
-    In [10]: evaluate(synthetic_data, real_data, metrics=['CSTest', 'KSTest'], aggregate=False)
+    In [10]: evaluate(synthetic_data, real_data, metrics=['CSTest', 'KSComplement'], aggregate=False)
     Out[10]:
        metric                                     name  raw_score  normalized_score  min_value  max_value      goal
     0  CSTest                              Chi-Squared   0.807808          0.807808        0.0        1.0  MAXIMIZE
-    1  KSTest  Inverted Kolmogorov-Smirnov D statistic   0.637209          0.637209        0.0        1.0  MAXIMIZE
+    1  KSComplement  Inverted Kolmogorov-Smirnov D statistic   0.637209          0.637209        0.0        1.0  MAXIMIZE
 
 
 Likelihood Metrics

diff --git a/docs/user_guides/relational/hma1.rst b/docs/user_guides/relational/hma1.rst
@@ -125,7 +125,7 @@ method passing the name of the file in which you want to save the model.
 Note that the extension of the filename is not relevant, but we will be
 using the ``.pkl`` extension to highlight that the serialization
 protocol used is
-`pickle <https://docs.python.org/3/library/pickle.html>`__.
+`cloudpickle <https://github.com/cloudpipe/cloudpickle>`__.
 
 .. ipython:: python
     :okwarning: