-
-
Notifications
You must be signed in to change notification settings - Fork 315
/
Copy pathcontainer.py
121 lines (96 loc) · 3.85 KB
/
container.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""DataFrame Schema for Polars."""
import warnings
from typing import Optional, Type
import polars as pl
from pandera.api.dataframe.container import DataFrameSchema as _DataFrameSchema
from pandera.api.polars.types import PolarsCheckObjects
from pandera.api.polars.utils import get_validation_depth
from pandera.backends.polars.register import register_polars_backends
from pandera.config import config_context, get_config_context
from pandera.dtypes import DataType
from pandera.engines import polars_engine
class DataFrameSchema(_DataFrameSchema[PolarsCheckObjects]):
"""A polars LazyFrame or DataFrame validator."""
def _validate_attributes(self):
super()._validate_attributes()
if self.unique_column_names:
warnings.warn(
"unique_column_names=True will have no effect on validation "
"since polars DataFrames does not support duplicate column "
"names."
)
if self.report_duplicates != "all":
warnings.warn(
"Setting report_duplicates to 'exclude_first' or "
"'exclude_last' will have no effect on validation. With the "
"polars backend, all duplicate values will be reported."
)
@staticmethod
def register_default_backends(
check_obj_cls: Type,
): # pylint: disable=unused-argument
register_polars_backends()
def validate(
self,
check_obj: PolarsCheckObjects,
head: Optional[int] = None,
tail: Optional[int] = None,
sample: Optional[int] = None,
random_state: Optional[int] = None,
lazy: bool = False,
inplace: bool = False,
) -> PolarsCheckObjects:
"""Validate a polars DataFrame against the schema."""
if not get_config_context().validation_enabled:
return check_obj
is_dataframe = isinstance(check_obj, pl.DataFrame)
with config_context(validation_depth=get_validation_depth(check_obj)):
if is_dataframe:
# if validating a polars DataFrame, use the global config setting
check_obj = check_obj.lazy()
output = self.get_backend(check_obj).validate(
check_obj=check_obj,
schema=self,
head=head,
tail=tail,
sample=sample,
random_state=random_state,
lazy=lazy,
inplace=inplace,
)
if is_dataframe:
output = output.collect()
return output
@property
def dtype(
self,
) -> DataType:
"""Get the dtype property."""
return self._dtype # type: ignore
@dtype.setter
def dtype(self, value) -> None:
"""Set the pandas dtype property."""
self._dtype = polars_engine.Engine.dtype(value) if value else None
def strategy(
self, *, size: Optional[int] = None, n_regex_columns: int = 1
):
"""Create a ``hypothesis`` strategy for generating a DataFrame.
:param size: number of elements to generate
:param n_regex_columns: number of regex columns to generate.
:returns: a strategy that generates pandas DataFrame objects.
.. warning::
This method is not implemented in the polars backend.
"""
raise NotImplementedError(
"Data synthesis is not supported in with polars schemas."
)
def example(self, size: Optional[int] = None, n_regex_columns: int = 1):
"""Generate an example of a particular size.
:param size: number of elements in the generated DataFrame.
:returns: pandas DataFrame object.
.. warning::
This method is not implemented in polars backend.
"""
raise NotImplementedError(
"Data synthesis is not supported in with polars schemas."
)