unionai-oss · cosmicBboy · Jun 17, 2021 · May 24, 2021 · May 24, 2021 · May 24, 2021
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -2,24 +2,24 @@ name: CI Tests
 on:
   push:
     branches:
-    - master
-    - dev
-    - bugfix
-    - 'release/*'
-    - dtypes
+      - master
+      - dev
+      - bugfix
+      - "release/*"
+      - dtypes
   pull_request:
     branches:
-    - master
-    - dev
-    - bugfix
-    - 'release/*'
-    - dtypes
+      - master
+      - dev
+      - bugfix
+      - "release/*"
+      - dtypes
 
 env:
   DEFAULT_PYTHON: 3.8
   CI: "true"
   # Increase this value to reset cache if environment.yml has not changed
-  CACHE_VERSION: 2
+  CACHE_VERSION: 3
 
 jobs:
   codestyle:
@@ -73,7 +73,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9"]
+        python-version: ["3.7", "3.8", "3.9"]
     defaults:
       run:
         shell: bash -l {0}
@@ -135,16 +135,13 @@ jobs:
 
   tests:
     name: >
-      CI Tests (${{ matrix.python-version }},
-      ${{ matrix.os }},
-      pandas-${{ matrix.pandas-version }})
+      CI Tests (${{ matrix.python-version }}, ${{ matrix.os }})
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: ["3.6", "3.7", "3.8", "3.9"]
-        pandas-version: ["latest", "0.25.3"]
+        python-version: ["3.7", "3.8", "3.9"]
 
     defaults:
       run:
@@ -186,28 +183,28 @@ jobs:
           nox
           -db conda -r -v
           --non-interactive
-          --session "tests-${{ matrix.python-version }}(extra='core', pandas='${{ matrix.pandas-version }}')"
+          --session "tests-${{ matrix.python-version }}(extra='core')"
 
       - name: Unit Tests - Hypotheses
         run: >
           nox
           -db conda -r -v
           --non-interactive
-          --session "tests-${{ matrix.python-version }}(extra='hypotheses', pandas='${{ matrix.pandas-version }}')"
+          --session "tests-${{ matrix.python-version }}(extra='hypotheses')"
 
       - name: Unit Tests - IO
         run: >
           nox
           -db conda -r -v
           --non-interactive
-          --session "tests-${{ matrix.python-version }}(extra='io', pandas='${{ matrix.pandas-version }}')"
+          --session "tests-${{ matrix.python-version }}(extra='io')"
 
       - name: Unit Tests - Strategies
         run: >
           nox
           -db conda -r -v
           --non-interactive
-          --session "tests-${{ matrix.python-version }}(extra='strategies', pandas='${{ matrix.pandas-version }}')"
+          --session "tests-${{ matrix.python-version }}(extra='strategies')"
 
       - name: Upload coverage to Codecov
         uses: "codecov/codecov-action@v1"
@@ -217,4 +214,4 @@ jobs:
           nox
           -db conda -r -v
           --non-interactive
-          --session "docs-${{ matrix.python-version }}(pandas='${{ matrix.pandas-version }}')"
+          --session "docs-${{ matrix.python-version }}"
diff --git a/docs/source/API_reference.rst b/docs/source/API_reference.rst
@@ -93,7 +93,7 @@ Pandas Data Types
    :template: pandas_dtype_class.rst
    :nosignatures:
 
-   pandera.dtypes.PandasDtype
+   pandera.dtypes.DataType
 
 
 Decorators

diff --git a/docs/source/_templates/enum_class.rst b/docs/source/_templates/enum_class.rst
diff --git a/docs/source/_templates/pandas_dtype_class.rst b/docs/source/_templates/pandas_dtype_class.rst
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -9,14 +9,14 @@
 import doctest
 import inspect
 import logging as pylogging
-import subprocess
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import os
 import shutil
+import subprocess
 import sys
 
 from sphinx.util import logging

diff --git a/docs/source/dataframe_schemas.rst b/docs/source/dataframe_schemas.rst
@@ -80,7 +80,7 @@ nullable. In order to accept null values, you need to explicitly specify
    df = pd.DataFrame({"column1": [5, 1, np.nan]})
 
    non_null_schema = DataFrameSchema({
-       "column1": Column(pa.Int, Check(lambda x: x > 0))
+       "column1": Column(pa.Float, Check(lambda x: x > 0))
    })
 
    non_null_schema.validate(df)
@@ -91,18 +91,11 @@ nullable. In order to accept null values, you need to explicitly specify
     ...
     SchemaError: non-nullable series contains null values: {2: nan}
 
-.. note:: Due to a known limitation in
-    `pandas prior to version 0.24.0 <https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html>`_,
-    integer arrays cannot contain ``NaN`` values, so this schema will return
-    a DataFrame where ``column1`` is of type ``float``.
-    :class:`~pandera.dtypes.PandasDtype` does not currently support the nullable integer
-    array type, but you can still use the "Int64" string alias for nullable
-    integer arrays
 
 .. testcode:: null_values_in_columns
 
    null_schema = DataFrameSchema({
-       "column1": Column(pa.Int, Check(lambda x: x > 0), nullable=True)
+       "column1": Column(pa.Float, Check(lambda x: x > 0), nullable=True)
    })
 
    print(null_schema.validate(df))
@@ -401,7 +394,7 @@ schema, specify ``strict=True``:
 
     Traceback (most recent call last):
     ...
-    SchemaError: column 'column2' not in DataFrameSchema {'column1': <Schema Column: 'None' type=int>}
+    SchemaError: column 'column2' not in DataFrameSchema {'column1': <Schema Column: 'None' type=DataType(int64)>}
 
 Alternatively, if your DataFrame contains columns that are not in the schema,
 and you would like these to be dropped on validation,
@@ -626,13 +619,17 @@ Some examples of where this can be provided to pandas are:
       },
   )
 
-  df = pd.DataFrame.from_dict(
-    {
-        "a": {"column1": 1, "column2": "valueA", "column3": True},
-        "b": {"column1": 1, "column2": "valueB", "column3": True},
-    },
-    orient="index"
-  ).astype(schema.dtype).sort_index(axis=1)
+  df = (
+      pd.DataFrame.from_dict(
+          {
+              "a": {"column1": 1, "column2": "valueA", "column3": True},
+              "b": {"column1": 1, "column2": "valueB", "column3": True},
+          },
+          orient="index",
+      )
+      .astype({col: str(dtype) for col, dtype in schema.dtypes.items()})
+      .sort_index(axis=1)
+  )
 
   print(schema.validate(df))
 
@@ -718,11 +715,11 @@ data pipeline:
 
     <Schema DataFrameSchema(
         columns={
-            'col1': <Schema Column(name=col1, type=int)>
+            'col1': <Schema Column(name=col1, type=DataType(int64))>
         },
         checks=[],
         coerce=False,
-        pandas_dtype=None,
+        dtype=None,
         index=None,
         strict=True
         name=None,
@@ -756,15 +753,15 @@ the pipeline output.
 
     <Schema DataFrameSchema(
         columns={
-            'column2': <Schema Column(name=column2, type=float)>
+            'column2': <Schema Column(name=column2, type=DataType(float64))>
         },
         checks=[],
         coerce=True,
-        pandas_dtype=None,
+        dtype=None,
         index=<Schema MultiIndex(
             indexes=[
-                <Schema Index(name=column3, type=int)>
-                <Schema Index(name=column1, type=int)>
+                <Schema Index(name=column3, type=DataType(int64))>
+                <Schema Index(name=column1, type=DataType(int64))>
             ]
             coerce=False,
             strict=False,

diff --git a/docs/source/extensions.rst b/docs/source/extensions.rst
@@ -94,20 +94,20 @@ The corresponding strategy for this check would be:
    import pandera.strategies as st
 
    def equals_strategy(
-       pandas_dtype: pa.PandasDtype,
+       pandera_dtype: pa.DataType,
        strategy: Optional[st.SearchStrategy] = None,
        *,
        value,
    ):
        if strategy is None:
            return st.pandas_dtype_strategy(
-               pandas_dtype, strategy=hypothesis.strategies.just(value),
+               pandera_dtype, strategy=hypothesis.strategies.just(value),
            )
        return strategy.filter(lambda x: x == value)
 
 As you may notice, the ``pandera`` strategy interface is has two arguments
 followed by keyword-only arguments that match the check function keyword-only
-check statistics. The ``pandas_dtype`` positional argument is useful for
+check statistics. The ``pandera_dtype`` positional argument is useful for
 ensuring the correct data type. In the above example, we're using the
 :func:`~pandera.strategies.pandas_dtype_strategy` strategy to make sure the
 generated ``value`` is of the correct data type.
@@ -147,15 +147,15 @@ would look like:
    :skipif: SKIP_STRATEGY
 
    def in_between_strategy(
-       pandas_dtype: pa.PandasDtype,
+       pandera_dtype: pa.DataType,
        strategy: Optional[st.SearchStrategy] = None,
        *,
        min_value,
        max_value
    ):
        if strategy is None:
            return st.pandas_dtype_strategy(
-               pandas_dtype,
+               pandera_dtype,
                min_value=min_value,
                max_value=max_value,
                exclude_min=False,

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -155,7 +155,7 @@ Quick Start
 You can pass the built-in python types that are supported by
 pandas, or strings representing the
 `legal pandas datatypes <https://pandas.pydata.org/docs/user_guide/basics.html#dtypes>`_,
-or pandera's ``PandasDtype`` enum:
+or pandera's ``DataType``:
 
 .. testcode:: quick_start
 
@@ -171,13 +171,13 @@ or pandera's ``PandasDtype`` enum:
         # pandas > 1.0.0 support native "string" type
         "str_column2": pa.Column("str"),
 
-        # pandera PandasDtype enum
+        # pandera DataType
         "int_column3": pa.Column(pa.Int),
         "float_column3": pa.Column(pa.Float),
         "str_column3": pa.Column(pa.String),
     })
 
-For more details on data types, see :class:`~pandera.dtypes.PandasDtype`
+For more details on data types, see :class:`~pandera.dtypes.DataType`
 
 
 Schema Model