From 043c1c2e5188b5266090042a4048367594434c24 Mon Sep 17 00:00:00 2001
From: Niels Bantilan <niels.bantilan@gmail.com>
Date: Wed, 19 Oct 2022 09:42:54 -0400
Subject: [PATCH 1/8] move jupyterlite_sphinx to pip deps

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 790bf826b..03797698f 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -54,4 +54,4 @@ types-pyyaml
 types-pkg_resources
 types-requests
 types-pytz
-jupyterlite_sphinx
\ No newline at end of file
+jupyterlite_sphinx

From 7e4c8f4d7ee46ec3552fbd67afa5c732f1aed39b Mon Sep 17 00:00:00 2001
From: Philippe Prados <github@prados.fr>
Date: Mon, 25 Jul 2022 12:26:30 +0200
Subject: [PATCH 2/8] Add compatibility with cudf Missing: unit tests

---
 pandera/typing/cudf.py | 350 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 350 insertions(+)
 create mode 100644 pandera/typing/cudf.py

diff --git a/pandera/typing/cudf.py b/pandera/typing/cudf.py
new file mode 100644
index 000000000..426211393
--- /dev/null
+++ b/pandera/typing/cudf.py
@@ -0,0 +1,350 @@
+"""Typing definitions and helpers."""
+# pylint:disable=abstract-method,disable=too-many-ancestors
+import io
+from typing import _type_check  # type: ignore[attr-defined]
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
+
+<<<<<<< HEAD
+=======
+import cudf
+>>>>>>> Add compatibility with cudf
+import pandas as pd
+
+from ..errors import SchemaError, SchemaInitError
+from .common import DataFrameBase, GenericDtype, IndexBase, Schema, SeriesBase
+from .formats import Formats
+
+try:
+    from typing import _GenericAlias  # type: ignore[attr-defined]
+except ImportError:  # pragma: no cover
+    _GenericAlias = None
+
+<<<<<<< HEAD
+try:
+    import cudf
+
+    try:
+        from pydantic.fields import ModelField
+    except ImportError:
+        ModelField = Any  # type: ignore
+
+
+    # pylint:disable=too-few-public-methods
+    class Index(IndexBase, cudf.Index, Generic[GenericDtype]):
+        """Representation of pandas.Index, only used for type annotation.
+
+        *new in 0.5.0*
+        """
+
+
+    # pylint:disable=too-few-public-methods
+    class Series(SeriesBase, cudf.Series, Generic[GenericDtype]):  # type: ignore
+        """Representation of pandas.Series, only used for type annotation.
+
+        *new in 0.5.0*
+        """
+
+        if hasattr(pd.Series, "__class_getitem__") and _GenericAlias:
+            def __class_getitem__(cls, item):
+                """Define this to override the patch that pyspark.pandas performs on pandas.
+                https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
+                """
+                _type_check(item, "Parameters to generic types must be types.")
+                return _GenericAlias(cls, item)
+
+
+    # pylint:disable=invalid-name
+    if TYPE_CHECKING:
+        T = TypeVar("T")  # pragma: no cover
+    else:
+        T = Schema
+
+
+    # pylint:disable=too-few-public-methods
+    class DataFrame(DataFrameBase, cudf.DataFrame, Generic[T]):
+        """
+        A generic type for pandas.DataFrame.
+
+        *new in 0.5.0*
+        """
+
+        if hasattr(pd.DataFrame, "__class_getitem__") and _GenericAlias:
+            def __class_getitem__(cls, item):
+                """Define this to override the patch that pyspark.pandas performs on pandas.
+                https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
+                """
+                _type_check(item, "Parameters to generic types must be types.")
+                return _GenericAlias(cls, item)
+
+        @classmethod
+        def __get_validators__(cls):
+            yield cls.pydantic_validate
+
+        @classmethod
+        def from_format(cls, obj: Any, config) -> pd.DataFrame:
+            """
+            Converts serialized data from a specific format
+            specified in the :py:class:`pandera.model.SchemaModel` config options
+            ``from_format`` and ``from_format_kwargs``.
+
+            :param obj: object representing a serialized dataframe.
+            :param config: schema model configuration object.
+            """
+            if config.from_format is None:
+                if not isinstance(obj, pd.DataFrame):
+                    try:
+                        obj = pd.DataFrame(obj)
+                    except Exception as exc:
+                        raise ValueError(
+                            f"Expected pd.DataFrame, found {type(obj)}"
+                        ) from exc
+                return obj
+
+            reader = {
+                Formats.dict: pd.DataFrame,
+                Formats.csv: pd.read_csv,
+                Formats.json: pd.read_json,
+                Formats.feather: pd.read_feather,
+                Formats.parquet: pd.read_parquet,
+                Formats.pickle: pd.read_pickle,
+            }[Formats(config.from_format)]
+
+            return reader(obj, **(config.from_format_kwargs or {}))
+
+        @classmethod
+        def to_format(cls, data: pd.DataFrame, config) -> Any:
+            """
+            Converts a dataframe to the format specified in the
+            :py:class:`pandera.model.SchemaModel` config options ``to_format``
+            and ``to_format_kwargs``.
+
+            :param data: convert this data to the specified format
+            :param config: :py:cl
+            """
+            if config.to_format is None:
+                return data
+
+            writer, buffer = {
+                Formats.dict: (data.to_dict, None),
+                Formats.csv: (data.to_csv, None),
+                Formats.json: (data.to_json, None),
+                Formats.feather: (data.to_feather, io.BytesIO()),
+                Formats.parquet: (data.to_parquet, io.BytesIO()),
+                Formats.pickle: (data.to_pickle, io.BytesIO()),
+            }[Formats(config.to_format)]
+
+            args = [] if buffer is None else [buffer]
+            out = writer(*args, **(config.to_format_kwargs or {}))
+            if buffer is None:
+                return out
+            elif buffer.closed:
+                raise IOError(
+                    f"pandas=={pd.__version__} closed the buffer automatically "
+                    f"using the serialization method {writer}. Use a later "
+                    "version of pandas or use a different the serialization "
+                    "format."
+                )
+            buffer.seek(0)
+            return buffer
+
+        @classmethod
+        def _get_schema(cls, field: ModelField):
+            if not field.sub_fields:
+                raise TypeError(
+                    "Expected a typed pandera.typing.DataFrame,"
+                    " e.g. DataFrame[Schema]"
+                )
+            schema_model = field.sub_fields[0].type_
+            try:
+                schema = schema_model.to_schema()
+            except SchemaInitError as exc:
+                raise ValueError(
+                    f"Cannot use {cls.__name__} as a pydantic type as its "
+                    "SchemaModel cannot be converted to a DataFrameSchema.\n"
+                    f"Please revisit the model to address the following errors:"
+                    f"\n{exc}"
+                ) from exc
+            return schema_model, schema
+
+        @classmethod
+        def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
+            """
+            Verify that the input can be converted into a pandas dataframe that
+            meets all schema requirements.
+            """
+            schema_model, schema = cls._get_schema(field)
+            data = cls.from_format(obj, schema_model.__config__)
+
+            try:
+                valid_data = schema.validate(data)
+            except SchemaError as exc:
+                raise ValueError(str(exc)) from exc
+
+            return cls.to_format(valid_data, schema_model.__config__)
+
+except ImportError:
+    pass # Ignore
+
+=======
+
+try:
+    from pydantic.fields import ModelField
+except ImportError:
+    ModelField = Any  # type: ignore
+
+
+# pylint:disable=too-few-public-methods
+class Index(IndexBase, cudf.Index, Generic[GenericDtype]):
+    """Representation of pandas.Index, only used for type annotation.
+
+    *new in 0.5.0*
+    """
+
+
+# pylint:disable=too-few-public-methods
+class Series(SeriesBase, cudf.Series, Generic[GenericDtype]):  # type: ignore
+    """Representation of pandas.Series, only used for type annotation.
+
+    *new in 0.5.0*
+    """
+
+    if hasattr(pd.Series, "__class_getitem__") and _GenericAlias:
+
+        def __class_getitem__(cls, item):
+            """Define this to override the patch that pyspark.pandas performs on pandas.
+            https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
+            """
+            _type_check(item, "Parameters to generic types must be types.")
+            return _GenericAlias(cls, item)
+
+
+# pylint:disable=invalid-name
+if TYPE_CHECKING:
+    T = TypeVar("T")  # pragma: no cover
+else:
+    T = Schema
+
+
+# pylint:disable=too-few-public-methods
+class DataFrame(DataFrameBase, cudf.DataFrame, Generic[T]):
+    """
+    A generic type for pandas.DataFrame.
+
+    *new in 0.5.0*
+    """
+
+    if hasattr(pd.DataFrame, "__class_getitem__") and _GenericAlias:
+
+        def __class_getitem__(cls, item):
+            """Define this to override the patch that pyspark.pandas performs on pandas.
+            https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
+            """
+            _type_check(item, "Parameters to generic types must be types.")
+            return _GenericAlias(cls, item)
+
+    @classmethod
+    def __get_validators__(cls):
+        yield cls.pydantic_validate
+
+    @classmethod
+    def from_format(cls, obj: Any, config) -> pd.DataFrame:
+        """
+        Converts serialized data from a specific format
+        specified in the :py:class:`pandera.model.SchemaModel` config options
+        ``from_format`` and ``from_format_kwargs``.
+
+        :param obj: object representing a serialized dataframe.
+        :param config: schema model configuration object.
+        """
+        if config.from_format is None:
+            if not isinstance(obj, pd.DataFrame):
+                try:
+                    obj = pd.DataFrame(obj)
+                except Exception as exc:
+                    raise ValueError(
+                        f"Expected pd.DataFrame, found {type(obj)}"
+                    ) from exc
+            return obj
+
+        reader = {
+            Formats.dict: pd.DataFrame,
+            Formats.csv: pd.read_csv,
+            Formats.json: pd.read_json,
+            Formats.feather: pd.read_feather,
+            Formats.parquet: pd.read_parquet,
+            Formats.pickle: pd.read_pickle,
+        }[Formats(config.from_format)]
+
+        return reader(obj, **(config.from_format_kwargs or {}))
+
+    @classmethod
+    def to_format(cls, data: pd.DataFrame, config) -> Any:
+        """
+        Converts a dataframe to the format specified in the
+        :py:class:`pandera.model.SchemaModel` config options ``to_format``
+        and ``to_format_kwargs``.
+
+        :param data: convert this data to the specified format
+        :param config: :py:cl
+        """
+        if config.to_format is None:
+            return data
+
+        writer, buffer = {
+            Formats.dict: (data.to_dict, None),
+            Formats.csv: (data.to_csv, None),
+            Formats.json: (data.to_json, None),
+            Formats.feather: (data.to_feather, io.BytesIO()),
+            Formats.parquet: (data.to_parquet, io.BytesIO()),
+            Formats.pickle: (data.to_pickle, io.BytesIO()),
+        }[Formats(config.to_format)]
+
+        args = [] if buffer is None else [buffer]
+        out = writer(*args, **(config.to_format_kwargs or {}))
+        if buffer is None:
+            return out
+        elif buffer.closed:
+            raise IOError(
+                f"pandas=={pd.__version__} closed the buffer automatically "
+                f"using the serialization method {writer}. Use a later "
+                "version of pandas or use a different the serialization "
+                "format."
+            )
+        buffer.seek(0)
+        return buffer
+
+    @classmethod
+    def _get_schema(cls, field: ModelField):
+        if not field.sub_fields:
+            raise TypeError(
+                "Expected a typed pandera.typing.DataFrame,"
+                " e.g. DataFrame[Schema]"
+            )
+        schema_model = field.sub_fields[0].type_
+        try:
+            schema = schema_model.to_schema()
+        except SchemaInitError as exc:
+            raise ValueError(
+                f"Cannot use {cls.__name__} as a pydantic type as its "
+                "SchemaModel cannot be converted to a DataFrameSchema.\n"
+                f"Please revisit the model to address the following errors:"
+                f"\n{exc}"
+            ) from exc
+        return schema_model, schema
+
+    @classmethod
+    def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
+        """
+        Verify that the input can be converted into a pandas dataframe that
+        meets all schema requirements.
+        """
+        schema_model, schema = cls._get_schema(field)
+        data = cls.from_format(obj, schema_model.__config__)
+
+        try:
+            valid_data = schema.validate(data)
+        except SchemaError as exc:
+            raise ValueError(str(exc)) from exc
+
+        return cls.to_format(valid_data, schema_model.__config__)
+>>>>>>> Add compatibility with cudf

From 2acdfcf0797c1938dad1ece18d07b4c49747b007 Mon Sep 17 00:00:00 2001
From: Philippe Prados <github@prados.fr>
Date: Fri, 29 Jul 2022 11:10:09 +0200
Subject: [PATCH 3/8] Accept the absence of cudf

---
 pandera/typing/cudf.py | 168 -----------------------------------------
 1 file changed, 168 deletions(-)

diff --git a/pandera/typing/cudf.py b/pandera/typing/cudf.py
index 426211393..ad260a3c6 100644
--- a/pandera/typing/cudf.py
+++ b/pandera/typing/cudf.py
@@ -4,10 +4,6 @@
 from typing import _type_check  # type: ignore[attr-defined]
 from typing import TYPE_CHECKING, Any, Generic, TypeVar
 
-<<<<<<< HEAD
-=======
-import cudf
->>>>>>> Add compatibility with cudf
 import pandas as pd
 
 from ..errors import SchemaError, SchemaInitError
@@ -19,7 +15,6 @@
 except ImportError:  # pragma: no cover
     _GenericAlias = None
 
-<<<<<<< HEAD
 try:
     import cudf
 
@@ -185,166 +180,3 @@ def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
 except ImportError:
     pass # Ignore
 
-=======
-
-try:
-    from pydantic.fields import ModelField
-except ImportError:
-    ModelField = Any  # type: ignore
-
-
-# pylint:disable=too-few-public-methods
-class Index(IndexBase, cudf.Index, Generic[GenericDtype]):
-    """Representation of pandas.Index, only used for type annotation.
-
-    *new in 0.5.0*
-    """
-
-
-# pylint:disable=too-few-public-methods
-class Series(SeriesBase, cudf.Series, Generic[GenericDtype]):  # type: ignore
-    """Representation of pandas.Series, only used for type annotation.
-
-    *new in 0.5.0*
-    """
-
-    if hasattr(pd.Series, "__class_getitem__") and _GenericAlias:
-
-        def __class_getitem__(cls, item):
-            """Define this to override the patch that pyspark.pandas performs on pandas.
-            https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
-            """
-            _type_check(item, "Parameters to generic types must be types.")
-            return _GenericAlias(cls, item)
-
-
-# pylint:disable=invalid-name
-if TYPE_CHECKING:
-    T = TypeVar("T")  # pragma: no cover
-else:
-    T = Schema
-
-
-# pylint:disable=too-few-public-methods
-class DataFrame(DataFrameBase, cudf.DataFrame, Generic[T]):
-    """
-    A generic type for pandas.DataFrame.
-
-    *new in 0.5.0*
-    """
-
-    if hasattr(pd.DataFrame, "__class_getitem__") and _GenericAlias:
-
-        def __class_getitem__(cls, item):
-            """Define this to override the patch that pyspark.pandas performs on pandas.
-            https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
-            """
-            _type_check(item, "Parameters to generic types must be types.")
-            return _GenericAlias(cls, item)
-
-    @classmethod
-    def __get_validators__(cls):
-        yield cls.pydantic_validate
-
-    @classmethod
-    def from_format(cls, obj: Any, config) -> pd.DataFrame:
-        """
-        Converts serialized data from a specific format
-        specified in the :py:class:`pandera.model.SchemaModel` config options
-        ``from_format`` and ``from_format_kwargs``.
-
-        :param obj: object representing a serialized dataframe.
-        :param config: schema model configuration object.
-        """
-        if config.from_format is None:
-            if not isinstance(obj, pd.DataFrame):
-                try:
-                    obj = pd.DataFrame(obj)
-                except Exception as exc:
-                    raise ValueError(
-                        f"Expected pd.DataFrame, found {type(obj)}"
-                    ) from exc
-            return obj
-
-        reader = {
-            Formats.dict: pd.DataFrame,
-            Formats.csv: pd.read_csv,
-            Formats.json: pd.read_json,
-            Formats.feather: pd.read_feather,
-            Formats.parquet: pd.read_parquet,
-            Formats.pickle: pd.read_pickle,
-        }[Formats(config.from_format)]
-
-        return reader(obj, **(config.from_format_kwargs or {}))
-
-    @classmethod
-    def to_format(cls, data: pd.DataFrame, config) -> Any:
-        """
-        Converts a dataframe to the format specified in the
-        :py:class:`pandera.model.SchemaModel` config options ``to_format``
-        and ``to_format_kwargs``.
-
-        :param data: convert this data to the specified format
-        :param config: :py:cl
-        """
-        if config.to_format is None:
-            return data
-
-        writer, buffer = {
-            Formats.dict: (data.to_dict, None),
-            Formats.csv: (data.to_csv, None),
-            Formats.json: (data.to_json, None),
-            Formats.feather: (data.to_feather, io.BytesIO()),
-            Formats.parquet: (data.to_parquet, io.BytesIO()),
-            Formats.pickle: (data.to_pickle, io.BytesIO()),
-        }[Formats(config.to_format)]
-
-        args = [] if buffer is None else [buffer]
-        out = writer(*args, **(config.to_format_kwargs or {}))
-        if buffer is None:
-            return out
-        elif buffer.closed:
-            raise IOError(
-                f"pandas=={pd.__version__} closed the buffer automatically "
-                f"using the serialization method {writer}. Use a later "
-                "version of pandas or use a different the serialization "
-                "format."
-            )
-        buffer.seek(0)
-        return buffer
-
-    @classmethod
-    def _get_schema(cls, field: ModelField):
-        if not field.sub_fields:
-            raise TypeError(
-                "Expected a typed pandera.typing.DataFrame,"
-                " e.g. DataFrame[Schema]"
-            )
-        schema_model = field.sub_fields[0].type_
-        try:
-            schema = schema_model.to_schema()
-        except SchemaInitError as exc:
-            raise ValueError(
-                f"Cannot use {cls.__name__} as a pydantic type as its "
-                "SchemaModel cannot be converted to a DataFrameSchema.\n"
-                f"Please revisit the model to address the following errors:"
-                f"\n{exc}"
-            ) from exc
-        return schema_model, schema
-
-    @classmethod
-    def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
-        """
-        Verify that the input can be converted into a pandas dataframe that
-        meets all schema requirements.
-        """
-        schema_model, schema = cls._get_schema(field)
-        data = cls.from_format(obj, schema_model.__config__)
-
-        try:
-            valid_data = schema.validate(data)
-        except SchemaError as exc:
-            raise ValueError(str(exc)) from exc
-
-        return cls.to_format(valid_data, schema_model.__config__)
->>>>>>> Add compatibility with cudf

From 5886fac07951aa21d446210bf73bc7cde1775d09 Mon Sep 17 00:00:00 2001
From: Philippe Prados <github@prados.fr>
Date: Wed, 19 Oct 2022 17:42:44 +0200
Subject: [PATCH 4/8] Add cudf unit test

---
 pandera/typing/__init__.py | 5 +++++
 pandera/typing/cudf.py     | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandera/typing/__init__.py b/pandera/typing/__init__.py
index d28d50522..63150e4c3 100644
--- a/pandera/typing/__init__.py
+++ b/pandera/typing/__init__.py
@@ -57,6 +57,11 @@
     SERIES_TYPES.update({modin.Series})
     INDEX_TYPES.update({modin.Index})
 
+if cudf.CUDF_INSTALLED:
+    DATAFRAME_TYPES.update({cudf.DataFrame})
+    SERIES_TYPES.update({cudf.Series})
+    INDEX_TYPES.update({cudf.Index})
+
 if pyspark.PYSPARK_INSTALLED:
     DATAFRAME_TYPES.update({pyspark.DataFrame})
     SERIES_TYPES.update({pyspark.Series})
diff --git a/pandera/typing/cudf.py b/pandera/typing/cudf.py
index ad260a3c6..2d93762e3 100644
--- a/pandera/typing/cudf.py
+++ b/pandera/typing/cudf.py
@@ -176,7 +176,7 @@ def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
                 raise ValueError(str(exc)) from exc
 
             return cls.to_format(valid_data, schema_model.__config__)
-
+    CUDF_INSTALLED=True
 except ImportError:
-    pass # Ignore
+    CUDF_INSTALLED=False
 

From ae2853b89792347de7672c8efecc3bdfedc25434 Mon Sep 17 00:00:00 2001
From: Philippe Prados <github@prados.fr>
Date: Thu, 20 Oct 2022 09:40:00 +0200
Subject: [PATCH 5/8] Add cudf unit test and rebase from fix-dependencies

---
 environment.yml                    |   3 +
 pandera/core/pandas/types.py       |   8 +
 pandera/cudf_accessor.py           |  98 ++++++
 pandera/errors.py                  |  16 +
 pandera/typing/__init__.py         |   2 +-
 pandera/typing/cudf.py             |   7 +-
 tests/cudf/conftest.py             |   9 +
 tests/cudf/test_cudf_accessor.py   |  21 ++
 tests/cudf/test_schemas_on_cudf.py | 458 +++++++++++++++++++++++++++++
 9 files changed, 618 insertions(+), 4 deletions(-)
 create mode 100644 pandera/cudf_accessor.py
 create mode 100644 tests/cudf/conftest.py
 create mode 100644 tests/cudf/test_cudf_accessor.py
 create mode 100644 tests/cudf/test_schemas_on_cudf.py

diff --git a/environment.yml b/environment.yml
index 304e99ce5..74e624185 100644
--- a/environment.yml
+++ b/environment.yml
@@ -31,6 +31,9 @@ dependencies:
   - modin
   - protobuf <= 3.20.3
 
+  # cudf extra
+  - cudf
+
   # dask extra
   - dask
   - distributed
diff --git a/pandera/core/pandas/types.py b/pandera/core/pandas/types.py
index 661ba20e0..31ee7acbe 100644
--- a/pandera/core/pandas/types.py
+++ b/pandera/core/pandas/types.py
@@ -74,6 +74,14 @@ def supported_types() -> SupportedTypes:
         index_types.append(dd.Index)
     except ImportError:
         pass
+    try:
+        import cudf
+
+        table_types.append(cudf.DataFrame)
+        field_types.append(cudf.Series)
+        index_types.append(cudf.Index)
+    except ImportError:
+        pass
 
     return SupportedTypes(
         tuple(table_types),
diff --git a/pandera/cudf_accessor.py b/pandera/cudf_accessor.py
new file mode 100644
index 000000000..e72d07483
--- /dev/null
+++ b/pandera/cudf_accessor.py
@@ -0,0 +1,98 @@
+"""Custom accessor functionality for modin.
+
+Source code adapted from pyspark.pandas implementation:
+https://spark.apache.org/docs/3.2.0/api/python/reference/pyspark.pandas/api/pyspark.pandas.extensions.register_dataframe_accessor.html?highlight=register_dataframe_accessor#pyspark.pandas.extensions.register_dataframe_accessor
+"""
+
+import warnings
+
+from pandera.pandas_accessor import (
+    PanderaDataFrameAccessor,
+    PanderaSeriesAccessor,
+)
+
+
+# pylint: disable=too-few-public-methods
+class CachedAccessor:
+    """
+    Custom property-like object.
+
+    A descriptor for caching accessors:
+
+    :param name: Namespace that accessor's methods, properties, etc will be
+        accessed under, e.g. "foo" for a dataframe accessor yields the accessor
+        ``df.foo``
+    :param cls: Class with the extension methods.
+
+    For accessor, the class's __init__ method assumes that you are registering
+    an accessor for one of ``Series``, ``DataFrame``, or ``Index``.
+    """
+
+    def __init__(self, name, accessor):
+        self._name = name
+        self._accessor = accessor
+
+    def __get__(self, obj, cls):
+        if obj is None:  # pragma: no cover
+            return self._accessor
+        accessor_obj = self._accessor(obj)
+        object.__setattr__(obj, self._name, accessor_obj)
+        return accessor_obj
+
+
+def _register_accessor(name, cls):
+    """
+    Register a custom accessor on {class} objects.
+
+    :param name: Name under which the accessor should be registered. A warning
+        is issued if this name conflicts with a preexisting attribute.
+    :returns: A class decorator callable.
+    """
+
+    def decorator(accessor):
+        if hasattr(cls, name):
+            msg = (
+                f"registration of accessor {accessor} under name '{name}' for "
+                "type {cls.__name__} is overriding a preexisting attribute "
+                "with the same name."
+            )
+
+            warnings.warn(
+                msg,
+                UserWarning,
+                stacklevel=2,
+            )
+        setattr(cls, name, CachedAccessor(name, accessor))
+        return accessor
+
+    return decorator
+
+
+def register_dataframe_accessor(name):
+    """
+    Register a custom accessor with a DataFrame
+
+    :param name: name used when calling the accessor after its registered
+    :returns: a class decorator callable.
+    """
+    # pylint: disable=import-outside-toplevel
+    from cudf import DataFrame
+
+    return _register_accessor(name, DataFrame)
+
+
+def register_series_accessor(name):
+    """
+    Register a custom accessor with a Series object
+
+    :param name: name used when calling the accessor after its registered
+    :returns: a callable class decorator
+    """
+    # pylint: disable=import-outside-toplevel
+    from cudf import Series
+
+    return _register_accessor(name, Series)
+
+
+register_dataframe_accessor("pandera")(PanderaDataFrameAccessor)
+register_series_accessor("pandera")(PanderaSeriesAccessor)
diff --git a/pandera/errors.py b/pandera/errors.py
index 035ed50bd..c98e3219a 100644
--- a/pandera/errors.py
+++ b/pandera/errors.py
@@ -289,6 +289,22 @@ def _parse_schema_errors(schema_errors: List[Dict[str, Any]]):
                 for x in check_failure_cases
             ]
 
+        elif any(
+            type(x).__module__.startswith("cudf")
+            for x in check_failure_cases
+        ):
+            # pylint: disable=import-outside-toplevel
+            # The current version of cudf is not compatible with sort_values() of strings.
+            # The workaround is to convert all the cuda dataframe to pandas.
+            import cudf
+
+            # concat_fn = cudf.concat
+            check_failure_cases = [
+                # x if isinstance(x, cudf.DataFrame) else cudf.DataFrame(x)
+                x.to_pandas() if isinstance(x, cudf.DataFrame) else x
+                for x in check_failure_cases
+            ]
+
         failure_cases = (
             concat_fn(check_failure_cases)
             .reset_index(drop=True)
diff --git a/pandera/typing/__init__.py b/pandera/typing/__init__.py
index 63150e4c3..68491309d 100644
--- a/pandera/typing/__init__.py
+++ b/pandera/typing/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Set, Type
 
-from pandera.typing import dask, fastapi, geopandas, modin, pyspark
+from pandera.typing import dask, fastapi, geopandas, modin, cudf, pyspark
 from pandera.typing.common import (
     BOOL,
     INT8,
diff --git a/pandera/typing/cudf.py b/pandera/typing/cudf.py
index 2d93762e3..8efcd6411 100644
--- a/pandera/typing/cudf.py
+++ b/pandera/typing/cudf.py
@@ -176,7 +176,8 @@ def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
                 raise ValueError(str(exc)) from exc
 
             return cls.to_format(valid_data, schema_model.__config__)
-    CUDF_INSTALLED=True
-except ImportError:
-    CUDF_INSTALLED=False
 
+
+    CUDF_INSTALLED = True
+except ImportError:
+    CUDF_INSTALLED = False
diff --git a/tests/cudf/conftest.py b/tests/cudf/conftest.py
new file mode 100644
index 000000000..852eea3fb
--- /dev/null
+++ b/tests/cudf/conftest.py
@@ -0,0 +1,9 @@
+"""Registers fixtures for core"""
+
+import os
+
+import pytest
+
+# pylint: disable=unused-import
+from tests.core.checks_fixtures import custom_check_teardown  # noqa
+
diff --git a/tests/cudf/test_cudf_accessor.py b/tests/cudf/test_cudf_accessor.py
new file mode 100644
index 000000000..8f9a1b861
--- /dev/null
+++ b/tests/cudf/test_cudf_accessor.py
@@ -0,0 +1,21 @@
+"""Unit tests of cudf accessor functionality.
+"""
+
+import pytest
+
+from pandera import cudf_accessor
+
+
+# pylint: disable=too-few-public-methods
+class CustomAccessor:
+    """Mock accessor class"""
+
+    def __init__(self, obj):
+        self._obj = obj
+
+
+def test_cudf_accessor_warning():
+    """Test that cudf accessor raises warning when name already exists."""
+    cudf_accessor.register_dataframe_accessor("foo")(CustomAccessor)
+    with pytest.warns(UserWarning):
+        cudf_accessor.register_dataframe_accessor("foo")(CustomAccessor)
diff --git a/tests/cudf/test_schemas_on_cudf.py b/tests/cudf/test_schemas_on_cudf.py
new file mode 100644
index 000000000..2199c889f
--- /dev/null
+++ b/tests/cudf/test_schemas_on_cudf.py
@@ -0,0 +1,458 @@
+"""Unit tests for cudf data structures."""
+
+import typing
+from unittest.mock import MagicMock
+
+import cudf
+import pandas as pd
+import pytest
+
+import pandera as pa
+from pandera import extensions
+from pandera.engines import numpy_engine, pandas_engine
+from pandera.typing.modin import DataFrame, Index, Series, modin_version
+from tests.strategies.test_strategies import NULLABLE_DTYPES
+from tests.strategies.test_strategies import (
+    SUPPORTED_DTYPES as SUPPORTED_STRATEGY_DTYPES,
+)
+from tests.strategies.test_strategies import (
+    UNSUPPORTED_DTYPE_CLS as UNSUPPORTED_STRATEGY_DTYPE_CLS,
+)
+
+try:
+    import hypothesis
+    import hypothesis.strategies as st
+except ImportError:
+    hypothesis = MagicMock()
+    st = MagicMock()
+
+
+UNSUPPORTED_STRATEGY_DTYPE_CLS = set(UNSUPPORTED_STRATEGY_DTYPE_CLS)
+UNSUPPORTED_STRATEGY_DTYPE_CLS.add(numpy_engine.Object)
+
+TEST_DTYPES_ON_CUDF = []
+# pylint: disable=redefined-outer-name
+# for dtype_cls in pandas_engine.Engine.get_registered_dtypes():
+#     if (
+#         dtype_cls in UNSUPPORTED_STRATEGY_DTYPE_CLS
+#         or (
+#             pandas_engine.Engine.dtype(dtype_cls)
+#             not in SUPPORTED_STRATEGY_DTYPES
+#         )
+#         or not (
+#             pandas_engine.GEOPANDAS_INSTALLED
+#             and dtype_cls == pandas_engine.Geometry
+#         )
+#     ):
+#         continue
+#     TEST_DTYPES_ON_CUDF.append(pandas_engine.Engine.dtype(dtype_cls))
+
+
+@pytest.mark.parametrize("coerce", [True, False])
+def test_dataframe_schema_case(coerce):
+    """Test a simple schema case."""
+    schema = pa.DataFrameSchema(
+        {
+            "int_column": pa.Column(int, pa.Check.ge(0)),
+            "float_column": pa.Column(float, pa.Check.le(0)),
+            # cudf not implemented "str_column": pa.Column(str, pa.Check.isin(list("abcde"))),
+        },
+        coerce=coerce,
+    )
+    cdf = cudf.DataFrame(
+        {
+            "int_column": range(10),
+            "float_column": [float(-x) for x in range(10)],
+            # cudf not implemented "str_column": list("aabbcceedd"),
+        }
+    )
+    assert isinstance(schema.validate(cdf), cudf.DataFrame)
+
+
+def _test_datatype_with_schema(
+    schema: typing.Union[pa.DataFrameSchema, pa.SeriesSchema],
+    data: st.DataObject,
+):
+    """Test pandera datatypes against modin data containers."""
+    data_container_cls = {
+        pa.DataFrameSchema: cudf.DataFrame,
+        pa.SeriesSchema: cudf.Series,
+        pa.Column: cudf.DataFrame,
+    }[type(schema)]
+
+    sample = data.draw(schema.strategy(size=3))
+    assert isinstance(schema(data_container_cls(sample)), data_container_cls)
+
+
+@pytest.mark.parametrize("dtype_cls", TEST_DTYPES_ON_CUDF)
+@pytest.mark.parametrize("coerce", [True, False])
+@hypothesis.given(st.data())
+def test_dataframe_schema_dtypes(
+    dtype_cls: pandas_engine.DataType,
+    coerce: bool,
+    data: st.DataObject,
+):
+    """
+    Test that all supported modin data types work as expected for dataframes.
+    """
+    dtype = pandas_engine.Engine.dtype(dtype_cls)
+    schema = pa.DataFrameSchema({"column": pa.Column(dtype)}, coerce=coerce)
+    with pytest.warns(
+        UserWarning, match="Distributing .+ object. This may take some time."
+    ):
+        _test_datatype_with_schema(schema, data)
+
+
+@pytest.mark.parametrize("dtype_cls", TEST_DTYPES_ON_CUDF)
+@pytest.mark.parametrize("coerce", [True, False])
+@pytest.mark.parametrize("schema_cls", [pa.SeriesSchema, pa.Column])
+@hypothesis.given(st.data())
+def test_field_schema_dtypes(
+    dtype_cls: pandas_engine.DataType,
+    coerce: bool,
+    schema_cls,
+    data: st.DataObject,
+):
+    """
+    Test that all supported modin data types work as expected for series.
+    """
+    schema = schema_cls(dtype_cls, name="field", coerce=coerce)
+    _test_datatype_with_schema(schema, data)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        int,
+        float,
+        bool,
+        # str,
+        # pandas_engine.DateTime,
+    ],
+)
+@pytest.mark.parametrize("coerce", [True, False])
+@pytest.mark.parametrize("schema_cls", [pa.Index])
+@hypothesis.given(st.data())
+def test_index_dtypes(
+    dtype: pandas_engine.DataType,
+    coerce: bool,
+    schema_cls,
+    data: st.DataObject,
+):
+    """Test cudf Index and MultiIndex on subset of datatypes.
+
+    Only test basic datatypes since index handling in pandas is already a
+    little finicky.
+    """
+    if schema_cls is pa.Index:
+        schema = schema_cls(dtype, name="field", coerce=coerce)
+    else:
+        schema = schema_cls(indexes=[pa.Index(dtype, name="field")])
+        schema.coerce = coerce
+    sample = data.draw(schema.strategy(size=3))
+    assert isinstance(
+        schema(cudf.DataFrame(pd.DataFrame(index=sample))), cudf.DataFrame
+    )
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        dt
+        for dt in TEST_DTYPES_ON_CUDF
+        # pylint: disable=no-value-for-parameter
+        if dt in NULLABLE_DTYPES
+        and not (
+            pandas_engine.GEOPANDAS_INSTALLED
+            and dt == pandas_engine.Engine.dtype(pandas_engine.Geometry)
+        )
+    ],
+)
+@hypothesis.given(st.data())
+@hypothesis.settings(
+    suppress_health_check=[hypothesis.HealthCheck.too_slow],
+)
+def test_nullable(
+    dtype: pandas_engine.DataType,
+    data: st.DataObject,
+):
+    """Test nullable checks on cudf dataframes."""
+    checks = None
+    nullable_schema = pa.DataFrameSchema(
+        {"field": pa.Column(dtype, checks=checks, nullable=True)}
+    )
+    nonnullable_schema = pa.DataFrameSchema(
+        {"field": pa.Column(dtype, checks=checks, nullable=False)}
+    )
+    null_sample = data.draw(nullable_schema.strategy(size=5))
+    nonnull_sample = data.draw(nonnullable_schema.strategy(size=5))
+
+    ks_null_sample = cudf.DataFrame(null_sample)
+    ks_nonnull_sample = cudf.DataFrame(nonnull_sample)
+    n_nulls = ks_null_sample.isna().sum().item()
+    assert ks_nonnull_sample.notna().all().item()
+    assert n_nulls >= 0
+    if n_nulls > 0:
+        with pytest.raises(pa.errors.SchemaError):
+            nonnullable_schema(ks_null_sample)
+
+
+def test_required_column():
+    """Test the required column raises error."""
+    required_schema = pa.DataFrameSchema(
+        {"field": pa.Column(int, required=True)}
+    )
+    schema = pa.DataFrameSchema({"field_": pa.Column(int, required=False)})
+
+    data = cudf.DataFrame({"field": [1, 2, 3]})
+
+    assert isinstance(required_schema(data), cudf.DataFrame)
+    assert isinstance(schema(data), cudf.DataFrame)
+
+    with pytest.raises(pa.errors.SchemaError):
+        required_schema(cudf.DataFrame({"another_field": [1, 2, 3]}))
+    schema(cudf.DataFrame({"another_field": [1, 2, 3]}))
+
+
+@pytest.mark.parametrize("from_dtype", [bool, float, int])
+@pytest.mark.parametrize("to_dtype", [float, int, str, bool])
+@hypothesis.given(st.data())
+def test_dtype_coercion(from_dtype, to_dtype, data):
+    """Test the datatype coercion provides informative errors."""
+    from_schema = pa.DataFrameSchema({"field": pa.Column(from_dtype)})
+    to_schema = pa.DataFrameSchema({"field": pa.Column(to_dtype, coerce=True)})
+
+    pd_sample = data.draw(from_schema.strategy(size=3))
+    sample = cudf.DataFrame(pd_sample)
+
+    if from_dtype is to_dtype:
+        assert isinstance(to_schema(sample), cudf.DataFrame)
+        return
+
+    if from_dtype is str and to_dtype in {int, float}:
+        try:
+            result = to_schema(sample)
+            assert result["field"].dtype == to_dtype
+        except pa.errors.SchemaError as err:
+            for x in err.failure_cases.failure_case:
+                with pytest.raises(ValueError):
+                    to_dtype(x)
+        return
+
+    assert isinstance(to_schema(sample), cudf.DataFrame)
+
+
+def test_strict_schema():
+    """Test schema strictness."""
+    strict_schema = pa.DataFrameSchema({"field": pa.Column()}, strict=True)
+    non_strict_schema = pa.DataFrameSchema({"field": pa.Column()})
+
+    strict_df = cudf.DataFrame({"field": [1]})
+    non_strict_df = cudf.DataFrame({"field": [1], "foo": [2]})
+
+    strict_schema(strict_df)
+    non_strict_schema(strict_df)
+
+    with pytest.raises(
+        pa.errors.SchemaError, match="column 'foo' not in DataFrameSchema"
+    ):
+        strict_schema(non_strict_df)
+
+    non_strict_schema(non_strict_df)
+
+
+# pylint: disable=unused-argument
+def test_custom_checks(custom_check_teardown):
+    """Test that custom checks can be executed."""
+
+    # @extensions.register_check_method(statistics=["value"])
+    # def cudf_eq(cudf_obj, *, value):  # PPR
+    #     return cudf_obj == value
+    #
+    # custom_schema = pa.DataFrameSchema(
+    #     {"field": pa.Column(checks=pa.Check(lambda s: s == 0, name="custom"))}
+    # )
+    #
+    # custom_registered_schema = pa.DataFrameSchema(
+    #     {"field": pa.Column(checks=pa.Check.cudf_eq(0))}
+    # )
+    #
+    # for schema in (custom_schema, custom_registered_schema):
+    #     schema(cudf.DataFrame({"field": [0] * 100}))
+    #
+    #     try:
+    #         schema(cudf.DataFrame({"field": [-1] * 100}))
+    #     except pa.errors.SchemaError as err:
+    #         assert (err.failure_cases["failure_case"] == -1).all()
+    pass
+
+def test_schema_model():
+    # pylint: disable=missing-class-docstring
+    """Test that SchemaModel subclasses work on cudf dataframes."""
+
+    # pylint: disable=too-few-public-methods
+    class Schema(pa.SchemaModel):
+        int_field: pa.typing.cudf.Series[int] = pa.Field(gt=0)
+        float_field: pa.typing.cudf.Series[float] = pa.Field(lt=0)
+        # in_field: pa.typing.cudf.Series[str] = pa.Field(isin=[1, 2, 3])
+
+    valid_df = cudf.DataFrame(
+        {
+            "int_field": [1, 2, 3],
+            "float_field": [-1.1, -2.1, -3.1],
+            # "in_field": [1, 2, 3],
+        }
+    )
+    invalid_df = cudf.DataFrame(
+        {
+            "int_field": [-1],
+            "field_field": [1.0],
+            # "in_field": [4],
+        }
+    )
+
+    Schema.validate(valid_df)
+    try:
+        Schema.validate(invalid_df, lazy=True)
+    except pa.errors.SchemaErrors as err:
+        expected_failures = {-1, "float_field"}
+        assert (
+            set(err.failure_cases["failure_case"].tolist())
+            == expected_failures
+        )
+
+
+@pytest.mark.parametrize(
+    "check,valid,invalid",
+    [
+        [pa.Check.eq(0), 0, -1],
+        [pa.Check.ne(0), 1, 0],
+        [pa.Check.gt(0), 1, -1],
+        [pa.Check.ge(0), 0, -1],
+        [pa.Check.lt(0), -1, 0],
+        [pa.Check.le(0), 0, 1],
+        [pa.Check.in_range(0, 10), 5, -1],
+        # FIXME: a valider
+        # [pa.Check.isin(["a"]), "a", "b"],
+        # [pa.Check.notin(["a"]), "b", "a"],
+        # [pa.Check.str_matches("^a$"), "a", "b"],
+        # [pa.Check.str_contains("a"), "faa", "foo"],
+        # [pa.Check.str_startswith("a"), "ab", "ba"],
+        # [pa.Check.str_endswith("a"), "ba", "ab"],
+        # [pa.Check.str_length(1, 2), "a", ""],
+    ],
+)
+def test_check_comparison_operators(check, valid, invalid):
+    """Test simple comparison operators."""
+    valid_check_result = check(cudf.Series([valid] * 3))
+    invalid_check_result = check(cudf.Series([invalid] * 3))
+    assert valid_check_result.check_passed
+    assert not invalid_check_result.check_passed
+
+
+def test_check_decorators():
+    # pylint: disable=missing-class-docstring
+    """Test that pandera decorators work with koalas."""
+    in_schema = pa.DataFrameSchema({"a": pa.Column(int)})
+    out_schema = in_schema.add_columns({"b": pa.Column(int)})
+
+    # pylint: disable=too-few-public-methods
+    class InSchema(pa.SchemaModel):
+        a: pa.typing.cudf.Series[int]
+
+    class OutSchema(InSchema):
+        b: pa.typing.cudf.Series[int]
+
+    @pa.check_input(in_schema)
+    @pa.check_output(out_schema)
+    def function_check_input_output(df: cudf.DataFrame) -> cudf.DataFrame:
+        df["b"] = df["a"] + 1
+        return df
+
+    @pa.check_input(in_schema)
+    @pa.check_output(out_schema)
+    def function_check_input_output_invalid(
+        df: cudf.DataFrame,
+    ) -> cudf.DataFrame:
+        return df
+
+    @pa.check_io(df=in_schema, out=out_schema)
+    def function_check_io(df: cudf.DataFrame) -> cudf.DataFrame:
+        df["b"] = df["a"] + 1
+        return df
+
+    @pa.check_io(df=in_schema, out=out_schema)
+    def function_check_io_invalid(df: cudf.DataFrame) -> cudf.DataFrame:
+        return df
+
+    @pa.check_types
+    def function_check_types(
+        df: pa.typing.cudf.DataFrame[InSchema],
+    ) -> pa.typing.cudf.DataFrame[OutSchema]:
+        df["b"] = df["a"] + 1
+        return df
+
+    @pa.check_types
+    def function_check_types_invalid(
+        df: pa.typing.cudf.DataFrame[InSchema],
+    ) -> pa.typing.cudf.DataFrame[OutSchema]:
+        return df
+
+    valid_df = cudf.DataFrame({"a": [1, 2, 3]})
+    invalid_df = cudf.DataFrame({"b": [1, 2, 3]})
+
+    function_check_input_output(valid_df)
+    function_check_io(valid_df)
+    function_check_types(valid_df)
+
+    for fn in (
+        function_check_input_output,
+        function_check_input_output_invalid,
+        function_check_io,
+        function_check_io_invalid,
+        function_check_types,
+        function_check_types_invalid,
+    ):
+        with pytest.raises(pa.errors.SchemaError):
+            fn(invalid_df)
+
+    for fn in (
+        function_check_input_output_invalid,
+        function_check_io_invalid,
+        function_check_types_invalid,
+    ):
+        with pytest.raises(pa.errors.SchemaError):
+            fn(valid_df)
+
+
+# pylint: disable=too-few-public-methods
+class InitSchema(pa.SchemaModel):
+    """Schema used for dataframe initialization."""
+
+    col1: Series[int]
+    col2: Series[float]
+    col3: Series[str]
+    index: Index[int]
+
+
+def test_init_cudf_dataframe():
+    """Test initialization of pandas.typing.dask.DataFrame with Schema."""
+    assert isinstance(
+        DataFrame[InitSchema]({"col1": [1], "col2": [1.0], "col3": ["1"]}),
+        DataFrame,
+    )
+
+
+@pytest.mark.parametrize(
+    "invalid_data",
+    [
+        {"col1": [1.0], "col2": [1.0], "col3": ["1"]},
+        {"col1": [1], "col2": [1], "col3": ["1"]},
+        {"col1": [1], "col2": [1.0], "col3": [1]},
+        {"col1": [1]},
+    ],
+)
+def test_init_cudf_dataframe_errors(invalid_data):
+    """Test errors from initializing a pandas.typing.DataFrame with Schema."""
+    with pytest.raises(pa.errors.SchemaError):
+        DataFrame[InitSchema](invalid_data)

From 28730689c680c0cd74e172555cc32d73ae6b598f Mon Sep 17 00:00:00 2001
From: Philippe Prados <github@prados.fr>
Date: Fri, 21 Oct 2022 10:25:51 +0200
Subject: [PATCH 6/8] Fix strings tests

---
 pandera/core/pandas/checks.py        |  12 +++
 pandera/errors.py                    |   3 +-
 pandera/typing/cudf.py               |  11 +--
 tests/conftest.py                    |   3 +
 tests/cudf/conftest.py               |   9 --
 tests/cudf/test_schemas_on_cudf.py   | 132 +++++++++++++++------------
 tests/modin/test_schemas_on_modin.py |   2 +-
 7 files changed, 97 insertions(+), 75 deletions(-)
 delete mode 100644 tests/cudf/conftest.py

diff --git a/pandera/core/pandas/checks.py b/pandera/core/pandas/checks.py
index e9ab11dc3..9598da7d0 100644
--- a/pandera/core/pandas/checks.py
+++ b/pandera/core/pandas/checks.py
@@ -300,6 +300,9 @@ def str_matches(
     :param pattern: Regular expression pattern to use for matching
     :param kwargs: key-word arguments passed into the `Check` initializer.
     """
+    if data.__module__.startswith("cudf"):
+        # This should be in its own backend implementation
+        return data.str.match(cast(str, pattern))
     return data.str.match(cast(str, pattern), na=False)
 
 
@@ -317,6 +320,9 @@ def str_contains(
     :param pattern: Regular expression pattern to use for searching
     :param kwargs: key-word arguments passed into the `Check` initializer.
     """
+    if data.__module__.startswith("cudf"):
+        # This should be in its own backend implementation
+        return data.str.contains(cast(str, pattern))
     return data.str.contains(cast(str, pattern), na=False)
 
 
@@ -330,6 +336,9 @@ def str_startswith(data: PandasData, string: str) -> PandasData:
     :param string: String all values should start with
     :param kwargs: key-word arguments passed into the `Check` initializer.
     """
+    if data.__module__.startswith("cudf"):
+        # This should be in its own backend implementation
+        return data.str.startswith(string)
     return data.str.startswith(string, na=False)
 
 
@@ -342,6 +351,9 @@ def str_endswith(data: PandasData, string: str) -> PandasData:
     :param string: String all values should end with
     :param kwargs: key-word arguments passed into the `Check` initializer.
     """
+    if data.__module__.startswith("cudf"):
+        # This should be in its own backend implementation
+        return data.str.endswith(string, na=False)
     return data.str.endswith(string, na=False)
 
 
diff --git a/pandera/errors.py b/pandera/errors.py
index c98e3219a..35b77a770 100644
--- a/pandera/errors.py
+++ b/pandera/errors.py
@@ -290,8 +290,7 @@ def _parse_schema_errors(schema_errors: List[Dict[str, Any]]):
             ]
 
         elif any(
-            type(x).__module__.startswith("cudf")
-            for x in check_failure_cases
+            type(x).__module__.startswith("cudf") for x in check_failure_cases
         ):
             # pylint: disable=import-outside-toplevel
             # The current version of cudf is not compatible with sort_values() of strings.
diff --git a/pandera/typing/cudf.py b/pandera/typing/cudf.py
index 8efcd6411..a49f57e79 100644
--- a/pandera/typing/cudf.py
+++ b/pandera/typing/cudf.py
@@ -23,7 +23,6 @@
     except ImportError:
         ModelField = Any  # type: ignore
 
-
     # pylint:disable=too-few-public-methods
     class Index(IndexBase, cudf.Index, Generic[GenericDtype]):
         """Representation of pandas.Index, only used for type annotation.
@@ -31,7 +30,6 @@ class Index(IndexBase, cudf.Index, Generic[GenericDtype]):
         *new in 0.5.0*
         """
 
-
     # pylint:disable=too-few-public-methods
     class Series(SeriesBase, cudf.Series, Generic[GenericDtype]):  # type: ignore
         """Representation of pandas.Series, only used for type annotation.
@@ -40,6 +38,7 @@ class Series(SeriesBase, cudf.Series, Generic[GenericDtype]):  # type: ignore
         """
 
         if hasattr(pd.Series, "__class_getitem__") and _GenericAlias:
+
             def __class_getitem__(cls, item):
                 """Define this to override the patch that pyspark.pandas performs on pandas.
                 https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
@@ -47,14 +46,12 @@ def __class_getitem__(cls, item):
                 _type_check(item, "Parameters to generic types must be types.")
                 return _GenericAlias(cls, item)
 
-
     # pylint:disable=invalid-name
     if TYPE_CHECKING:
         T = TypeVar("T")  # pragma: no cover
     else:
         T = Schema
 
-
     # pylint:disable=too-few-public-methods
     class DataFrame(DataFrameBase, cudf.DataFrame, Generic[T]):
         """
@@ -64,6 +61,7 @@ class DataFrame(DataFrameBase, cudf.DataFrame, Generic[T]):
         """
 
         if hasattr(pd.DataFrame, "__class_getitem__") and _GenericAlias:
+
             def __class_getitem__(cls, item):
                 """Define this to override the patch that pyspark.pandas performs on pandas.
                 https://github.com/apache/spark/blob/master/python/pyspark/pandas/__init__.py#L124-L144
@@ -162,7 +160,9 @@ def _get_schema(cls, field: ModelField):
             return schema_model, schema
 
         @classmethod
-        def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
+        def pydantic_validate(
+            cls, obj: Any, field: ModelField
+        ) -> pd.DataFrame:
             """
             Verify that the input can be converted into a pandas dataframe that
             meets all schema requirements.
@@ -177,7 +177,6 @@ def pydantic_validate(cls, obj: Any, field: ModelField) -> pd.DataFrame:
 
             return cls.to_format(valid_data, schema_model.__config__)
 
-
     CUDF_INSTALLED = True
 except ImportError:
     CUDF_INSTALLED = False
diff --git a/tests/conftest.py b/tests/conftest.py
index 07e327a63..87fb9459b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,6 +2,9 @@
 
 import os
 
+# pylint: disable=unused-import
+from tests.core.checks_fixtures import custom_check_teardown
+
 try:
     # pylint: disable=unused-import
     import hypothesis  # noqa F401
diff --git a/tests/cudf/conftest.py b/tests/cudf/conftest.py
deleted file mode 100644
index 852eea3fb..000000000
--- a/tests/cudf/conftest.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""Registers fixtures for core"""
-
-import os
-
-import pytest
-
-# pylint: disable=unused-import
-from tests.core.checks_fixtures import custom_check_teardown  # noqa
-
diff --git a/tests/cudf/test_schemas_on_cudf.py b/tests/cudf/test_schemas_on_cudf.py
index 2199c889f..5e0260649 100644
--- a/tests/cudf/test_schemas_on_cudf.py
+++ b/tests/cudf/test_schemas_on_cudf.py
@@ -10,11 +10,8 @@
 import pandera as pa
 from pandera import extensions
 from pandera.engines import numpy_engine, pandas_engine
-from pandera.typing.modin import DataFrame, Index, Series, modin_version
+from pandera.typing.modin import DataFrame, Index, Series
 from tests.strategies.test_strategies import NULLABLE_DTYPES
-from tests.strategies.test_strategies import (
-    SUPPORTED_DTYPES as SUPPORTED_STRATEGY_DTYPES,
-)
 from tests.strategies.test_strategies import (
     UNSUPPORTED_DTYPE_CLS as UNSUPPORTED_STRATEGY_DTYPE_CLS,
 )
@@ -26,26 +23,10 @@
     hypothesis = MagicMock()
     st = MagicMock()
 
-
 UNSUPPORTED_STRATEGY_DTYPE_CLS = set(UNSUPPORTED_STRATEGY_DTYPE_CLS)
 UNSUPPORTED_STRATEGY_DTYPE_CLS.add(numpy_engine.Object)
 
-TEST_DTYPES_ON_CUDF = []
-# pylint: disable=redefined-outer-name
-# for dtype_cls in pandas_engine.Engine.get_registered_dtypes():
-#     if (
-#         dtype_cls in UNSUPPORTED_STRATEGY_DTYPE_CLS
-#         or (
-#             pandas_engine.Engine.dtype(dtype_cls)
-#             not in SUPPORTED_STRATEGY_DTYPES
-#         )
-#         or not (
-#             pandas_engine.GEOPANDAS_INSTALLED
-#             and dtype_cls == pandas_engine.Geometry
-#         )
-#     ):
-#         continue
-#     TEST_DTYPES_ON_CUDF.append(pandas_engine.Engine.dtype(dtype_cls))
+TEST_DTYPES_ON_CUDF: typing.List[str] = []
 
 
 @pytest.mark.parametrize("coerce", [True, False])
@@ -55,7 +36,8 @@ def test_dataframe_schema_case(coerce):
         {
             "int_column": pa.Column(int, pa.Check.ge(0)),
             "float_column": pa.Column(float, pa.Check.le(0)),
-            # cudf not implemented "str_column": pa.Column(str, pa.Check.isin(list("abcde"))),
+            # not implemented in cudf 22.08.00
+            # "str_column": pa.Column(str, pa.Check.isin(list("abcde"))),
         },
         coerce=coerce,
     )
@@ -63,7 +45,7 @@ def test_dataframe_schema_case(coerce):
         {
             "int_column": range(10),
             "float_column": [float(-x) for x in range(10)],
-            # cudf not implemented "str_column": list("aabbcceedd"),
+            # "str_column": list("aabbcceedd"),  # not implemented in cudf 22.08.00
         }
     )
     assert isinstance(schema.validate(cdf), cudf.DataFrame)
@@ -126,12 +108,14 @@ def test_field_schema_dtypes(
         int,
         float,
         bool,
-        # str,
-        # pandas_engine.DateTime,
+        # str,  # not implemented in cudf 22.08.00
+        # pandas_engine.DateTime,  # not implemented in cudf 22.08.00
     ],
 )
 @pytest.mark.parametrize("coerce", [True, False])
-@pytest.mark.parametrize("schema_cls", [pa.Index])
+@pytest.mark.parametrize(
+    "schema_cls", [pa.Index]
+)  # Multiindex not implemented in cudf 22.08.00
 @hypothesis.given(st.data())
 def test_index_dtypes(
     dtype: pandas_engine.DataType,
@@ -197,6 +181,39 @@ def test_nullable(
             nonnullable_schema(ks_null_sample)
 
 
+# def test_unique():  # cudf 22.08.00 not implemented `df.duplicated()`
+#     """Test uniqueness checks on modin dataframes."""
+#     schema = pa.DataFrameSchema({"field": pa.Column(int)}, unique=["field"])
+#     column_schema = pa.Column(int, unique=True, name="field")
+#     series_schema = pa.SeriesSchema(int, unique=True, name="field")
+#
+#     data_unique = cudf.DataFrame({"field": [1, 2, 3]})
+#     data_non_unique = cudf.DataFrame({"field": [1, 1, 1]})
+#
+#     assert isinstance(schema(data_unique), cudf.DataFrame)
+#     assert isinstance(column_schema(data_unique), cudf.DataFrame)
+#     assert isinstance(series_schema(data_unique["field"]), cudf.Series)
+#
+#     with pytest.raises(pa.errors.SchemaError, match="columns .+ not unique"):
+#         schema(data_non_unique)
+#     with pytest.raises(
+#         pa.errors.SchemaError, match="series .+ contains duplicate values"
+#     ):
+#         column_schema(data_non_unique)
+#     with pytest.raises(
+#         pa.errors.SchemaError, match="series .+ contains duplicate values"
+#     ):
+#         series_schema(data_non_unique["field"])
+#
+#     schema.unique = None
+#     column_schema.unique = False
+#     series_schema.unique = False
+#
+#     assert isinstance(schema(data_non_unique), mpd.DataFrame)
+#     assert isinstance(column_schema(data_non_unique), mpd.DataFrame)
+#     assert isinstance(series_schema(data_non_unique["field"]), mpd.Series)
+
+
 def test_required_column():
     """Test the required column raises error."""
     required_schema = pa.DataFrameSchema(
@@ -214,7 +231,9 @@ def test_required_column():
     schema(cudf.DataFrame({"another_field": [1, 2, 3]}))
 
 
-@pytest.mark.parametrize("from_dtype", [bool, float, int])
+@pytest.mark.parametrize(
+    "from_dtype", [bool, float, int]
+)  # str not implemented in cudf 22.08.00
 @pytest.mark.parametrize("to_dtype", [float, int, str, bool])
 @hypothesis.given(st.data())
 def test_dtype_coercion(from_dtype, to_dtype, data):
@@ -265,26 +284,26 @@ def test_strict_schema():
 def test_custom_checks(custom_check_teardown):
     """Test that custom checks can be executed."""
 
-    # @extensions.register_check_method(statistics=["value"])
-    # def cudf_eq(cudf_obj, *, value):  # PPR
-    #     return cudf_obj == value
-    #
-    # custom_schema = pa.DataFrameSchema(
-    #     {"field": pa.Column(checks=pa.Check(lambda s: s == 0, name="custom"))}
-    # )
-    #
-    # custom_registered_schema = pa.DataFrameSchema(
-    #     {"field": pa.Column(checks=pa.Check.cudf_eq(0))}
-    # )
-    #
-    # for schema in (custom_schema, custom_registered_schema):
-    #     schema(cudf.DataFrame({"field": [0] * 100}))
-    #
-    #     try:
-    #         schema(cudf.DataFrame({"field": [-1] * 100}))
-    #     except pa.errors.SchemaError as err:
-    #         assert (err.failure_cases["failure_case"] == -1).all()
-    pass
+    @extensions.register_check_method(statistics=["value"])
+    def cudf_eq(cudf_obj, *, value):
+        return cudf_obj == value
+
+    custom_schema = pa.DataFrameSchema(
+        {"field": pa.Column(checks=pa.Check(lambda s: s == 0, name="custom"))}
+    )
+
+    custom_registered_schema = pa.DataFrameSchema(
+        {"field": pa.Column(checks=pa.Check.cudf_eq(0))}
+    )
+
+    for schema in (custom_schema, custom_registered_schema):
+        schema(cudf.DataFrame({"field": [0] * 100}))
+
+        try:
+            schema(cudf.DataFrame({"field": [-1] * 100}))
+        except pa.errors.SchemaError as err:
+            assert (err.failure_cases["failure_case"] == -1).all()
+
 
 def test_schema_model():
     # pylint: disable=missing-class-docstring
@@ -300,14 +319,14 @@ class Schema(pa.SchemaModel):
         {
             "int_field": [1, 2, 3],
             "float_field": [-1.1, -2.1, -3.1],
-            # "in_field": [1, 2, 3],
+            # "str_field": ["a", "b", "c"],  # not implemented in cudf 22.08.00
         }
     )
     invalid_df = cudf.DataFrame(
         {
             "int_field": [-1],
             "field_field": [1.0],
-            # "in_field": [4],
+            # "str_field": ["d"],  # not implemented in cudf 22.08.00
         }
     )
 
@@ -332,14 +351,13 @@ class Schema(pa.SchemaModel):
         [pa.Check.lt(0), -1, 0],
         [pa.Check.le(0), 0, 1],
         [pa.Check.in_range(0, 10), 5, -1],
-        # FIXME: a valider
-        # [pa.Check.isin(["a"]), "a", "b"],
-        # [pa.Check.notin(["a"]), "b", "a"],
-        # [pa.Check.str_matches("^a$"), "a", "b"],
-        # [pa.Check.str_contains("a"), "faa", "foo"],
-        # [pa.Check.str_startswith("a"), "ab", "ba"],
-        # [pa.Check.str_endswith("a"), "ba", "ab"],
-        # [pa.Check.str_length(1, 2), "a", ""],
+        # [pa.Check.isin(["a"]), "a", "b"],  # Not impleted by cudf
+        # [pa.Check.notin(["a"]), "b", "a"],  # Not impleted by cudf
+        [pa.Check.str_matches("^a$"), "a", "b"],
+        [pa.Check.str_contains("a"), "faa", "foo"],
+        [pa.Check.str_startswith("a"), "ab", "ba"],
+        [pa.Check.str_endswith("a"), "ba", "ab"],
+        [pa.Check.str_length(1, 2), "a", ""],
     ],
 )
 def test_check_comparison_operators(check, valid, invalid):
diff --git a/tests/modin/test_schemas_on_modin.py b/tests/modin/test_schemas_on_modin.py
index d8393d39e..f24346da1 100644
--- a/tests/modin/test_schemas_on_modin.py
+++ b/tests/modin/test_schemas_on_modin.py
@@ -251,7 +251,7 @@ def test_required_column():
     schema(mpd.DataFrame({"another_field": [1, 2, 3]}))
 
 
-@pytest.mark.parametrize("from_dtype", [str])
+@pytest.mark.parametrize("from_dtype", [bool, float, int, str])
 @pytest.mark.parametrize("to_dtype", [float, int, str, bool])
 @hypothesis.given(st.data())
 def test_dtype_coercion(from_dtype, to_dtype, data):

From aee1811dcf1b0cee6c32a901f8feb98e4ec62c08 Mon Sep 17 00:00:00 2001
From: cosmicBboy <niels.bantilan@gmail.com>
Date: Thu, 26 Jan 2023 22:00:04 -0500
Subject: [PATCH 7/8] move cudf_accessor.py

---
 pandera/{ => accessors}/cudf_accessor.py |  0
 pandera/core/extensions.py               |  2 +-
 pandera/typing/common.py                 |  4 ++--
 pandera/typing/cudf.py                   | 12 +++++++++---
 tests/cudf/test_cudf_accessor.py         |  2 +-
 5 files changed, 13 insertions(+), 7 deletions(-)
 rename pandera/{ => accessors}/cudf_accessor.py (100%)

diff --git a/pandera/cudf_accessor.py b/pandera/accessors/cudf_accessor.py
similarity index 100%
rename from pandera/cudf_accessor.py
rename to pandera/accessors/cudf_accessor.py
diff --git a/pandera/core/extensions.py b/pandera/core/extensions.py
index f440971b3..ddecd4605 100644
--- a/pandera/core/extensions.py
+++ b/pandera/core/extensions.py
@@ -3,7 +3,7 @@
 import warnings
 from enum import Enum
 from functools import partial, wraps
-from inspect import signature, Parameter, Signature, _empty
+from inspect import signature, Parameter, Signature, _empty  # type: ignore
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
 import pandas as pd
diff --git a/pandera/typing/common.py b/pandera/typing/common.py
index 0490de0df..fd5c4b6c9 100644
--- a/pandera/typing/common.py
+++ b/pandera/typing/common.py
@@ -95,7 +95,7 @@
 else:
     GenericDtype = TypeVar(  # type: ignore
         "GenericDtype",
-        bound=Union[
+        bound=Union[  # type: ignore
             bool,
             int,
             str,
@@ -134,7 +134,7 @@
         ],
     )
 
-DataFrameModel = TypeVar("Schema", bound="DataFrameModel")  # type: ignore
+DataFrameModel = TypeVar("DataFrameModel", bound="DataFrameModel")  # type: ignore
 
 
 # pylint:disable=invalid-name
diff --git a/pandera/typing/cudf.py b/pandera/typing/cudf.py
index a49f57e79..ba9e38bdd 100644
--- a/pandera/typing/cudf.py
+++ b/pandera/typing/cudf.py
@@ -6,8 +6,14 @@
 
 import pandas as pd
 
-from ..errors import SchemaError, SchemaInitError
-from .common import DataFrameBase, GenericDtype, IndexBase, Schema, SeriesBase
+from pandera.errors import SchemaError, SchemaInitError
+from pandera.typing.common import (
+    DataFrameBase,
+    GenericDtype,
+    IndexBase,
+    DataFrameModel,
+    SeriesBase,
+)
 from .formats import Formats
 
 try:
@@ -50,7 +56,7 @@ def __class_getitem__(cls, item):
     if TYPE_CHECKING:
         T = TypeVar("T")  # pragma: no cover
     else:
-        T = Schema
+        T = DataFrameModel
 
     # pylint:disable=too-few-public-methods
     class DataFrame(DataFrameBase, cudf.DataFrame, Generic[T]):
diff --git a/tests/cudf/test_cudf_accessor.py b/tests/cudf/test_cudf_accessor.py
index 8f9a1b861..693a913fd 100644
--- a/tests/cudf/test_cudf_accessor.py
+++ b/tests/cudf/test_cudf_accessor.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from pandera import cudf_accessor
+from pandera.accessors import cudf_accessor
 
 
 # pylint: disable=too-few-public-methods

From 9192da71fdb879d1ba8f24b9e004228a583ef607 Mon Sep 17 00:00:00 2001
From: cosmicBboy <niels.bantilan@gmail.com>
Date: Thu, 26 Jan 2023 22:02:06 -0500
Subject: [PATCH 8/8] uncomment and skip test

---
 tests/cudf/test_schemas_on_cudf.py | 63 +++++++++++++++---------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/tests/cudf/test_schemas_on_cudf.py b/tests/cudf/test_schemas_on_cudf.py
index 5e0260649..40acb290a 100644
--- a/tests/cudf/test_schemas_on_cudf.py
+++ b/tests/cudf/test_schemas_on_cudf.py
@@ -181,37 +181,38 @@ def test_nullable(
             nonnullable_schema(ks_null_sample)
 
 
-# def test_unique():  # cudf 22.08.00 not implemented `df.duplicated()`
-#     """Test uniqueness checks on modin dataframes."""
-#     schema = pa.DataFrameSchema({"field": pa.Column(int)}, unique=["field"])
-#     column_schema = pa.Column(int, unique=True, name="field")
-#     series_schema = pa.SeriesSchema(int, unique=True, name="field")
-#
-#     data_unique = cudf.DataFrame({"field": [1, 2, 3]})
-#     data_non_unique = cudf.DataFrame({"field": [1, 1, 1]})
-#
-#     assert isinstance(schema(data_unique), cudf.DataFrame)
-#     assert isinstance(column_schema(data_unique), cudf.DataFrame)
-#     assert isinstance(series_schema(data_unique["field"]), cudf.Series)
-#
-#     with pytest.raises(pa.errors.SchemaError, match="columns .+ not unique"):
-#         schema(data_non_unique)
-#     with pytest.raises(
-#         pa.errors.SchemaError, match="series .+ contains duplicate values"
-#     ):
-#         column_schema(data_non_unique)
-#     with pytest.raises(
-#         pa.errors.SchemaError, match="series .+ contains duplicate values"
-#     ):
-#         series_schema(data_non_unique["field"])
-#
-#     schema.unique = None
-#     column_schema.unique = False
-#     series_schema.unique = False
-#
-#     assert isinstance(schema(data_non_unique), mpd.DataFrame)
-#     assert isinstance(column_schema(data_non_unique), mpd.DataFrame)
-#     assert isinstance(series_schema(data_non_unique["field"]), mpd.Series)
+@pytest.mark.skip(reason="cudf 22.08.00 not implemented `df.duplicated()`")
+def test_unique():
+    """Test uniqueness checks on modin dataframes."""
+    schema = pa.DataFrameSchema({"field": pa.Column(int)}, unique=["field"])
+    column_schema = pa.Column(int, unique=True, name="field")
+    series_schema = pa.SeriesSchema(int, unique=True, name="field")
+
+    data_unique = cudf.DataFrame({"field": [1, 2, 3]})
+    data_non_unique = cudf.DataFrame({"field": [1, 1, 1]})
+
+    assert isinstance(schema(data_unique), cudf.DataFrame)
+    assert isinstance(column_schema(data_unique), cudf.DataFrame)
+    assert isinstance(series_schema(data_unique["field"]), cudf.Series)
+
+    with pytest.raises(pa.errors.SchemaError, match="columns .+ not unique"):
+        schema(data_non_unique)
+    with pytest.raises(
+        pa.errors.SchemaError, match="series .+ contains duplicate values"
+    ):
+        column_schema(data_non_unique)
+    with pytest.raises(
+        pa.errors.SchemaError, match="series .+ contains duplicate values"
+    ):
+        series_schema(data_non_unique["field"])
+
+    schema.unique = None
+    column_schema.unique = False
+    series_schema.unique = False
+
+    assert isinstance(schema(data_non_unique), cudf.DataFrame)
+    assert isinstance(column_schema(data_non_unique), cudf.DataFrame)
+    assert isinstance(series_schema(data_non_unique["field"]), cudf.Series)
 
 
 def test_required_column():