Skip to content

Commit

Permalink
Feature duckdb10 (#172)
Browse files Browse the repository at this point in the history
* Remove of unnecessary libraries

* Added license and authors

* Resolved dependencies unnecessary
  • Loading branch information
canimus authored Mar 7, 2024
1 parent 4971242 commit 1fc650b
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 48 deletions.
36 changes: 11 additions & 25 deletions cuallee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,59 +9,42 @@
from types import ModuleType
from typing import Any, Dict, List, Literal, Optional, Protocol, Tuple, Union
from .iso.checks import ISO

from colorama import Fore, Style # type: ignore
from toolz import compose, valfilter # type: ignore

logger = logging.getLogger("cuallee")
__version__ = "0.8.7"
__version__ = "0.8.8"
# Verify Libraries Available
# ==========================
try:
from pandas import DataFrame as pandas_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Pandas")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Pandas")
logger.debug("KO: Pandas")

try:
from polars.dataframe.frame import DataFrame as polars_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Polars")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Polars")
logger.debug("KO: Polars")

try:
from pyspark.sql import DataFrame as pyspark_dataframe

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " PySpark")

except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " PySpark")
logger.debug("KO: PySpark")

try:
from snowflake.snowpark import DataFrame as snowpark_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Snowpark")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Snowpark")
logger.debug("KO: Snowpark")

try:
from duckdb import DuckDBPyConnection as duckdb_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " DuckDB")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " DuckDB")
logger.debug("KO: DuckDB")

try:
from google.cloud import bigquery

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " BigQuery")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " BigQuery")

logger.debug("KO: BigQuery")

logger.debug(Style.RESET_ALL)


class CheckLevel(enum.Enum):
Expand Down Expand Up @@ -225,7 +208,10 @@ def __init__(
self.rows = -1
self.config: Dict[str, str] = {}
self.table_name = table_name
self.iso = ISO(self)
try:
self.iso = ISO(self)
except (ModuleNotFoundError, ImportError):
logger.error("ISO module requires requests")
self.session = session

def __repr__(self):
Expand Down
8 changes: 5 additions & 3 deletions cuallee/cloud/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import os
import msgpack
import logging
import requests
from requests.exceptions import ConnectionError
import logging

logger = logging.getLogger("cuallee")
CUALLEE_CLOUD_HEADERS = {
"Content-Type": "application/octet-stream",
"Authorization": f"Bearer {os.getenv('CUALLEE_CLOUD_TOKEN')}",
}

try:
import msgpack
except (ModuleNotFoundError, ImportError):
logger.error("Module msgpack missing for cloud operations")

def standardize(check):
return {
Expand Down
7 changes: 0 additions & 7 deletions cuallee/pyspark_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

import cuallee.utils as cuallee_utils
from cuallee import Check, ComputeEngine, Rule
from colorama import Fore, Style # type: ignore

import os

Expand Down Expand Up @@ -567,12 +566,6 @@ def _field_type_filter(

def _replace_observe_compute(computed_expressions: dict) -> dict:
"""Replace observe based check with select"""
print(
"[😔]"
+ Fore.YELLOW
+ " PySpark < 3.3.0 | When you upgrade checks will run 2x faster."
)
print(Style.RESET_ALL)
select_only_expressions = {}
for k, v in computed_expressions.items():
instruction = v
Expand Down
21 changes: 9 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,28 @@ build-backend = "setuptools.build_meta"

[project]
name = "cuallee"
version = "0.8.7"
authors = [
{ name="Herminio Vazquez", email="canimus@gmail.com"},
{ name="Virginie Grosboillot", email="vestalisvirginis@gmail.com" }
]
version = "0.8.8"
authors = ["Herminio Vazquez <canimus@gmail.com>", "Virginie Grosboillot <vestalisvirginis@gmail.com>"]
license = "Apache-2.0"
description = "Python library for data validation on DataFrame APIs including Snowflake/Snowpark, Apache/PySpark and Pandas/DataFrame."
readme = "README.md"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = [
"colorama >= 0.4.6",
"toolz >= 0.12.0",
"pygments >= 2.15.1",
"requests >= 2.28.2",
"pandas>=1.5.3",
]

[project.optional-dependencies]
dev = [
"black==24.2.0",
"ruff==0.3.0"
]
iso = [
"requests>=2.28"
]
pyspark = [
"pyspark>=3.4.0"
]
Expand All @@ -45,7 +41,7 @@ bigquery =[
"pyarrow >= 11.0.0"
]
duckdb = [
"duckdb==0.9.2"
"duckdb==0.10.0"
]
polars = [
"polars>=0.19.6"
Expand All @@ -59,7 +55,8 @@ dagster = [
"dagster == 1.6.8"
]
cloud = [
"msgpack == 1.0.8"
"msgpack == 1.0.8",
"requests>=2.28",
]

[tool.ruff.lint]
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[metadata]
name = cuallee
version = 0.8.7
version = 0.8.8
[options]
packages = find:

0 comments on commit 1fc650b

Please sign in to comment.