From 1fc650b86694888c5ce5312af86dc1c6c937e99c Mon Sep 17 00:00:00 2001 From: Herminio Vazquez Date: Thu, 7 Mar 2024 22:26:30 +0100 Subject: [PATCH] Feature duckdb10 (#172) * Remove of unnecessary libraries * Added license and authors * Resolved dependencies unnecessary --- cuallee/__init__.py | 36 +++++++++++------------------------ cuallee/cloud/__init__.py | 8 +++++--- cuallee/pyspark_validation.py | 7 ------- pyproject.toml | 21 +++++++++----------- setup.cfg | 2 +- 5 files changed, 26 insertions(+), 48 deletions(-) diff --git a/cuallee/__init__.py b/cuallee/__init__.py index 32d55a85..a94494bc 100644 --- a/cuallee/__init__.py +++ b/cuallee/__init__.py @@ -9,59 +9,42 @@ from types import ModuleType from typing import Any, Dict, List, Literal, Optional, Protocol, Tuple, Union from .iso.checks import ISO - -from colorama import Fore, Style # type: ignore from toolz import compose, valfilter # type: ignore logger = logging.getLogger("cuallee") -__version__ = "0.8.7" +__version__ = "0.8.8" # Verify Libraries Available # ========================== try: from pandas import DataFrame as pandas_dataframe # type: ignore - - logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Pandas") except (ModuleNotFoundError, ImportError): - logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Pandas") + logger.debug("KO: Pandas") try: from polars.dataframe.frame import DataFrame as polars_dataframe # type: ignore - - logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Polars") except (ModuleNotFoundError, ImportError): - logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Polars") + logger.debug("KO: Polars") try: from pyspark.sql import DataFrame as pyspark_dataframe - - logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " PySpark") - except (ModuleNotFoundError, ImportError): - logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " PySpark") + logger.debug("KO: PySpark") try: from snowflake.snowpark import DataFrame as snowpark_dataframe # type: ignore - - logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Snowpark") except (ModuleNotFoundError, ImportError): - logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Snowpark") + logger.debug("KO: Snowpark") try: from duckdb import DuckDBPyConnection as duckdb_dataframe # type: ignore - - logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " DuckDB") except (ModuleNotFoundError, ImportError): - logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " DuckDB") + logger.debug("KO: DuckDB") try: from google.cloud import bigquery - - logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " BigQuery") except (ModuleNotFoundError, ImportError): - logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " BigQuery") - + logger.debug("KO: BigQuery") -logger.debug(Style.RESET_ALL) class CheckLevel(enum.Enum): @@ -225,7 +208,10 @@ def __init__( self.rows = -1 self.config: Dict[str, str] = {} self.table_name = table_name - self.iso = ISO(self) + try: + self.iso = ISO(self) + except (ModuleNotFoundError, ImportError): + logger.error("ISO module requires requests") self.session = session def __repr__(self): diff --git a/cuallee/cloud/__init__.py b/cuallee/cloud/__init__.py index 57ad00e8..ef9b3d5e 100644 --- a/cuallee/cloud/__init__.py +++ b/cuallee/cloud/__init__.py @@ -1,15 +1,17 @@ import os -import msgpack +import logging import requests from requests.exceptions import ConnectionError -import logging - logger = logging.getLogger("cuallee") CUALLEE_CLOUD_HEADERS = { "Content-Type": "application/octet-stream", "Authorization": f"Bearer {os.getenv('CUALLEE_CLOUD_TOKEN')}", } +try: + import msgpack +except (ModuleNotFoundError, ImportError): + logger.error("Module msgpack missing for cloud operations") def standardize(check): return { diff --git a/cuallee/pyspark_validation.py b/cuallee/pyspark_validation.py index 98db9335..d0f0d4cc 100644 --- a/cuallee/pyspark_validation.py +++ b/cuallee/pyspark_validation.py @@ -12,7 +12,6 @@ import cuallee.utils as cuallee_utils from cuallee import Check, ComputeEngine, Rule -from colorama import Fore, Style # type: ignore import os @@ -567,12 +566,6 @@ def _field_type_filter( def _replace_observe_compute(computed_expressions: dict) -> dict: """Replace observe based check with select""" - print( - "[😔]" - + Fore.YELLOW - + " PySpark < 3.3.0 | When you upgrade checks will run 2x faster." - ) - print(Style.RESET_ALL) select_only_expressions = {} for k, v in computed_expressions.items(): instruction = v diff --git a/pyproject.toml b/pyproject.toml index 764f5652..65939397 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,25 +4,18 @@ build-backend = "setuptools.build_meta" [project] name = "cuallee" -version = "0.8.7" -authors = [ - { name="Herminio Vazquez", email="canimus@gmail.com"}, - { name="Virginie Grosboillot", email="vestalisvirginis@gmail.com" } -] +version = "0.8.8" +authors = ["Herminio Vazquez ", "Virginie Grosboillot "] +license = "Apache-2.0" description = "Python library for data validation on DataFrame APIs including Snowflake/Snowpark, Apache/PySpark and Pandas/DataFrame." readme = "README.md" requires-python = ">=3.8" classifiers = [ "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] dependencies = [ - "colorama >= 0.4.6", "toolz >= 0.12.0", - "pygments >= 2.15.1", - "requests >= 2.28.2", - "pandas>=1.5.3", ] [project.optional-dependencies] @@ -30,6 +23,9 @@ dev = [ "black==24.2.0", "ruff==0.3.0" ] +iso = [ + "requests>=2.28" +] pyspark = [ "pyspark>=3.4.0" ] @@ -45,7 +41,7 @@ bigquery =[ "pyarrow >= 11.0.0" ] duckdb = [ - "duckdb==0.9.2" + "duckdb==0.10.0" ] polars = [ "polars>=0.19.6" @@ -59,7 +55,8 @@ dagster = [ "dagster == 1.6.8" ] cloud = [ - "msgpack == 1.0.8" + "msgpack == 1.0.8", + "requests>=2.28", ] [tool.ruff.lint] diff --git a/setup.cfg b/setup.cfg index 30813aa5..c93f03ba 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] name = cuallee -version = 0.8.7 +version = 0.8.8 [options] packages = find: \ No newline at end of file