Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature duckdb10 #172

Merged
merged 3 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 11 additions & 25 deletions cuallee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,59 +9,42 @@
from types import ModuleType
from typing import Any, Dict, List, Literal, Optional, Protocol, Tuple, Union
from .iso.checks import ISO

from colorama import Fore, Style # type: ignore
from toolz import compose, valfilter # type: ignore

logger = logging.getLogger("cuallee")
__version__ = "0.8.7"
__version__ = "0.8.8"
# Verify Libraries Available
# ==========================
try:
from pandas import DataFrame as pandas_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Pandas")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Pandas")
logger.debug("KO: Pandas")

try:
from polars.dataframe.frame import DataFrame as polars_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Polars")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Polars")
logger.debug("KO: Polars")

try:
from pyspark.sql import DataFrame as pyspark_dataframe

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " PySpark")

except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " PySpark")
logger.debug("KO: PySpark")

try:
from snowflake.snowpark import DataFrame as snowpark_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " Snowpark")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " Snowpark")
logger.debug("KO: Snowpark")

try:
from duckdb import DuckDBPyConnection as duckdb_dataframe # type: ignore

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " DuckDB")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " DuckDB")
logger.debug("KO: DuckDB")

try:
from google.cloud import bigquery

logger.debug(Fore.GREEN + "[OK]" + Fore.WHITE + " BigQuery")
except (ModuleNotFoundError, ImportError):
logger.debug(Fore.RED + "[KO]" + Fore.WHITE + " BigQuery")

logger.debug("KO: BigQuery")

logger.debug(Style.RESET_ALL)


class CheckLevel(enum.Enum):
Expand Down Expand Up @@ -225,7 +208,10 @@ def __init__(
self.rows = -1
self.config: Dict[str, str] = {}
self.table_name = table_name
self.iso = ISO(self)
try:
self.iso = ISO(self)
except (ModuleNotFoundError, ImportError):
logger.error("ISO module requires requests")
self.session = session

def __repr__(self):
Expand Down
8 changes: 5 additions & 3 deletions cuallee/cloud/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import os
import msgpack
import logging
import requests
from requests.exceptions import ConnectionError
import logging

logger = logging.getLogger("cuallee")
CUALLEE_CLOUD_HEADERS = {
"Content-Type": "application/octet-stream",
"Authorization": f"Bearer {os.getenv('CUALLEE_CLOUD_TOKEN')}",
}

try:
import msgpack
except (ModuleNotFoundError, ImportError):
logger.error("Module msgpack missing for cloud operations")

def standardize(check):
return {
Expand Down
7 changes: 0 additions & 7 deletions cuallee/pyspark_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

import cuallee.utils as cuallee_utils
from cuallee import Check, ComputeEngine, Rule
from colorama import Fore, Style # type: ignore

import os

Expand Down Expand Up @@ -567,12 +566,6 @@ def _field_type_filter(

def _replace_observe_compute(computed_expressions: dict) -> dict:
"""Replace observe based check with select"""
print(
"[😔]"
+ Fore.YELLOW
+ " PySpark < 3.3.0 | When you upgrade checks will run 2x faster."
)
print(Style.RESET_ALL)
select_only_expressions = {}
for k, v in computed_expressions.items():
instruction = v
Expand Down
21 changes: 9 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,28 @@ build-backend = "setuptools.build_meta"

[project]
name = "cuallee"
version = "0.8.7"
authors = [
{ name="Herminio Vazquez", email="canimus@gmail.com"},
{ name="Virginie Grosboillot", email="vestalisvirginis@gmail.com" }
]
version = "0.8.8"
authors = ["Herminio Vazquez <canimus@gmail.com>", "Virginie Grosboillot <vestalisvirginis@gmail.com>"]
license = "Apache-2.0"
description = "Python library for data validation on DataFrame APIs including Snowflake/Snowpark, Apache/PySpark and Pandas/DataFrame."
readme = "README.md"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = [
"colorama >= 0.4.6",
"toolz >= 0.12.0",
"pygments >= 2.15.1",
"requests >= 2.28.2",
"pandas>=1.5.3",
]

[project.optional-dependencies]
dev = [
"black==24.2.0",
"ruff==0.3.0"
]
iso = [
"requests>=2.28"
]
pyspark = [
"pyspark>=3.4.0"
]
Expand All @@ -45,7 +41,7 @@ bigquery =[
"pyarrow >= 11.0.0"
]
duckdb = [
"duckdb==0.9.2"
"duckdb==0.10.0"
]
polars = [
"polars>=0.19.6"
Expand All @@ -59,7 +55,8 @@ dagster = [
"dagster == 1.6.8"
]
cloud = [
"msgpack == 1.0.8"
"msgpack == 1.0.8",
"requests>=2.28",
]

[tool.ruff.lint]
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[metadata]
name = cuallee
version = 0.8.7
version = 0.8.8
[options]
packages = find: