-
-
Notifications
You must be signed in to change notification settings - Fork 314
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
internals rewrite: clean up checks and hypothesis functionality (#1109)
* rename core.pandas.checks to *.builtin_checks * handle strategies better * wip re-implement builtin checks * clean up hypotheses * add docstrings * move builtin checks/hypotheses modules * clean up register_{check, hypothesis} * minor import cleanup * clean up check registration, error implementation * create backends.base subpackage * remove debugging script * cleanup, fix codecov
- Loading branch information
1 parent
0ef450b
commit d6c2078
Showing
32 changed files
with
1,311 additions
and
819 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,13 @@ good-names= | |
fp, | ||
bar, | ||
_IS_INFERRED, | ||
eq, | ||
ne, | ||
gt, | ||
ge, | ||
lt, | ||
le, | ||
dt | ||
|
||
[MESSAGES CONTROL] | ||
disable= | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""Pandera backends.""" | ||
|
||
# ensure that base builtin checks and hypothesis are registered | ||
import pandera.backends.base.builtin_checks | ||
import pandera.backends.base.builtin_hypotheses | ||
|
||
import pandera.backends.pandas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# pylint: disable=missing-function-docstring | ||
"""Built-in check functions base implementation. | ||
This module contains check function abstract definitions that correspond to | ||
the pandera.core.base.checks.Check methods. These functions do not actually | ||
implement any validation logic and serve as the entrypoint for dispatching | ||
specific implementations based on the data object type, e.g. | ||
`pandas.DataFrame`s. | ||
""" | ||
|
||
import re | ||
from typing import Any, Iterable, TypeVar, Union | ||
|
||
from pandera.core.checks import Check | ||
|
||
|
||
T = TypeVar("T") | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def equal_to(data: Any, value: Any) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def not_equal_to(data: Any, value: Any) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def greater_than(data: Any, min_value: Any) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def greater_than_or_equal_to(data: Any, min_value: Any) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def less_than(data: Any, max_value: Any) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def less_than_or_equal_to(data: Any, max_value: Any) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def in_range( | ||
data: Any, | ||
min_value: T, | ||
max_value: T, | ||
include_min: bool = True, | ||
include_max: bool = True, | ||
) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def isin(data: Any, allowed_values: Iterable) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def notin(data: Any, forbidden_values: Iterable) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def str_matches(data: Any, pattern: Union[str, re.Pattern]) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def str_contains(data: Any, pattern: Union[str, re.Pattern]) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def str_startswith(data: Any, string: str) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def str_endswith(data: Any, string: str) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def str_length(data: Any, min_value: int = None, max_value: int = None) -> Any: | ||
raise NotImplementedError | ||
|
||
|
||
@Check.register_builtin_check_fn | ||
def unique_values_eq(data: Any, values: Iterable) -> Any: | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# pylint: disable=missing-function-docstring | ||
"""Built-in hypothesis functions base implementation. | ||
This module contains hypothesis function abstract definitions that | ||
correspond to the pandera.core.base.checks.Check methods. These functions do not | ||
actually implement any validation logic and serve as the entrypoint for | ||
dispatching specific implementations based on the data object type, e.g. | ||
`pandas.DataFrame`s. | ||
""" | ||
|
||
from typing import Any, Tuple | ||
|
||
from pandera.core.hypotheses import Hypothesis | ||
|
||
|
||
@Hypothesis.register_builtin_check_fn | ||
def two_sample_ttest( | ||
*samples: Tuple[Any, ...], | ||
equal_var: bool = True, | ||
nan_policy: str = "propagate", | ||
): | ||
raise NotImplementedError | ||
|
||
|
||
@Hypothesis.register_builtin_check_fn | ||
def one_sample_ttest( | ||
*samples: Tuple[Any, ...], | ||
popmean: float, | ||
nan_policy: str = "propagate", | ||
): | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
"""Pandas backend implementation for schemas and checks.""" | ||
|
||
import pandas as pd | ||
|
||
import pandera.typing | ||
from pandera.core.checks import Check | ||
from pandera.core.hypotheses import Hypothesis | ||
|
||
from pandera.backends.pandas.checks import PandasCheckBackend | ||
from pandera.backends.pandas.hypotheses import PandasHypothesisBackend | ||
from pandera.backends.pandas import builtin_checks, builtin_hypotheses | ||
|
||
|
||
data_types = [pd.DataFrame, pd.Series] | ||
|
||
if pandera.typing.dask.DASK_INSTALLED: | ||
import dask.dataframe as dd | ||
|
||
data_types.extend([dd.DataFrame, dd.Series]) | ||
|
||
if pandera.typing.modin.MODIN_INSTALLED: | ||
import modin.pandas as mpd | ||
|
||
data_types.extend([mpd.DataFrame, mpd.Series]) | ||
|
||
if pandera.typing.pyspark.PYSPARK_INSTALLED: | ||
import pyspark.pandas as ps | ||
|
||
data_types.extend([ps.DataFrame, ps.Series]) | ||
|
||
if pandera.typing.geopandas.GEOPANDAS_INSTALLED: | ||
import geopandas as gpd | ||
|
||
data_types.extend([gpd.GeoDataFrame, gpd.GeoSeries]) | ||
|
||
for t in data_types: | ||
Check.register_backend(t, PandasCheckBackend) | ||
Hypothesis.register_backend(t, PandasHypothesisBackend) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.