11from abc import ABC , abstractmethod
2- from typing import cast
2+ from typing import List , Optional , cast
33
44from pandas import DataFrame as PandasDataFrame
55from pyspark .sql import DataFrame as PySparkDataFrame
1212 PySparkConnectDataFrame = None # type: ignore[misc,assignment]
1313
1414from dataframe_expectations .core .types import DataFrameLike , DataFrameType
15+ from dataframe_expectations .core .tagging import TagSet
1516from dataframe_expectations .result_message import (
1617 DataFrameExpectationResultMessage ,
1718)
@@ -22,6 +23,20 @@ class DataFrameExpectation(ABC):
2223 Base class for DataFrame expectations.
2324 """
2425
26+ def __init__ (self , tags : Optional [List [str ]] = None ):
27+ """
28+ Initialize the base expectation with optional tags.
29+ :param tags: Optional tags as list of strings in "key:value" format.
30+ Example: ["priority:high", "env:test"]
31+ """
32+ self .__tags = TagSet (tags )
33+
34+ def get_tags (self ) -> TagSet :
35+ """
36+ Returns the tags for this expectation.
37+ """
38+ return self .__tags
39+
2540 def get_expectation_name (self ) -> str :
2641 """
2742 Returns the class name as the expectation name.
@@ -48,29 +63,31 @@ def infer_data_frame_type(cls, data_frame: DataFrameLike) -> DataFrameType:
4863 """
4964 Infer the DataFrame type based on the provided DataFrame.
5065 """
51- if isinstance (data_frame , PandasDataFrame ):
52- return DataFrameType .PANDAS
53- elif isinstance (data_frame , PySparkDataFrame ):
54- return DataFrameType .PYSPARK
55- elif PySparkConnectDataFrame is not None and isinstance (
56- data_frame , PySparkConnectDataFrame
57- ):
58- return DataFrameType .PYSPARK
59- else :
60- raise ValueError (f"Unsupported DataFrame type: { type (data_frame )} " )
66+ match data_frame :
67+ case PandasDataFrame ():
68+ return DataFrameType .PANDAS
69+ case PySparkDataFrame ():
70+ return DataFrameType .PYSPARK
71+ case _ if PySparkConnectDataFrame is not None and isinstance (
72+ data_frame , PySparkConnectDataFrame
73+ ):
74+ return DataFrameType .PYSPARK
75+ case _:
76+ raise ValueError (f"Unsupported DataFrame type: { type (data_frame )} " )
6177
6278 def validate (self , data_frame : DataFrameLike , ** kwargs ):
6379 """
6480 Validate the DataFrame against the expectation.
6581 """
6682 data_frame_type = self .infer_data_frame_type (data_frame )
6783
68- if data_frame_type == DataFrameType .PANDAS :
69- return self .validate_pandas (data_frame = data_frame , ** kwargs )
70- elif data_frame_type == DataFrameType .PYSPARK :
71- return self .validate_pyspark (data_frame = data_frame , ** kwargs )
72- else :
73- raise ValueError (f"Unsupported DataFrame type: { data_frame_type } " )
84+ match data_frame_type :
85+ case DataFrameType .PANDAS :
86+ return self .validate_pandas (data_frame = data_frame , ** kwargs )
87+ case DataFrameType .PYSPARK :
88+ return self .validate_pyspark (data_frame = data_frame , ** kwargs )
89+ case _:
90+ raise ValueError (f"Unsupported DataFrame type: { data_frame_type } " )
7491
7592 @abstractmethod
7693 def validate_pandas (
0 commit comments