Skip to content

Commit

Permalink
1.0 release to main (#115)
Browse files Browse the repository at this point in the history
Signed-off-by: Varun Mittal <varunmittal91@gmail.com>
Signed-off-by: Joshua Kwan <joshk@ternary.app>
Signed-off-by: Mike Fuller <mike@finops.org>
Signed-off-by: Justin Ohrenberger <justinohrenberger@gmail.com>
Co-authored-by: Varun Mittal <varunmittal91@gmail.com>
Co-authored-by: Joshua Kwan <joshua.m.kwan@gmail.com>
Co-authored-by: johrenberger <122705393+johrenberger@users.noreply.github.com>
  • Loading branch information
4 people authored Jun 6, 2024
1 parent c58c2b0 commit 7f80091
Show file tree
Hide file tree
Showing 263 changed files with 1,629 additions and 395 deletions.
4 changes: 3 additions & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.0
current_version = 1.0.0
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>(rc|dev))(?P<build>\d+))?
Expand All @@ -18,3 +18,5 @@ values =
first_value = 1

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"
8 changes: 5 additions & 3 deletions .github/workflows/coverage.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
name: Coverage

on:
push:
branches:
Expand All @@ -9,20 +8,23 @@ on:
pull_request:
branches:
- main

- dev
jobs:
coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
poetry lock
- uses: actions/cache@v3
name: Define a cache for the virtual environment based on the dependencies lock file
with:
Expand Down
11 changes: 8 additions & 3 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
name: Lint

on:
push:

pull_request:
branches:
- main
- dev
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
poetry lock
- uses: actions/cache@v3
name: Define a cache for the virtual environment based on the dependencies lock file
with:
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ on:
- main
- dev
- issue/**

jobs:
validate_focus:
runs-on: ubuntu-latest
Expand All @@ -14,8 +13,10 @@ jobs:
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@ on:
tags:
- 'v\d\.\d\.\d'
- 'v\d\.\d\.\d-(dev|rc)\d'

jobs:
publish:
permissions:
id-token: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: 3.11
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Install dependencies
run: |
poetry build
find -type l -exec bash -c 'ln -f "$(readlink -m "$0")" "$0"' {} \;
poetry build --format=sdist
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
12 changes: 5 additions & 7 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
name: Unittest

on:
push:
branches:
Expand All @@ -9,15 +8,14 @@ on:
pull_request:
branches:
- main

- dev
jobs:
test:
runs-on: ubuntu-latest

runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [ "3.8", "3.9", "3.10", "3.11" ]

python-version: [ "3.9", "3.10", "3.11", "3.12" ]
os: [ windows-latest, ubuntu-latest, macos-latest ]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -35,7 +33,7 @@ jobs:
name: Define a cache for the virtual environment based on the dependencies lock file
with:
path: ./.venv
key: venv-${{ hashFiles('poetry.lock') }}
key: venv-${{ hashFiles('poetry.lock') }}-${{ matrix.os }}-${{ matrix.python-version }}
- name: Install dependencies
run: |
poetry install
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ tbd

### Prerequisites

- Python 3.8+
- Python 3.9+
- Poetry (Package & Dependency Manager)

### Installation
Expand Down
21 changes: 21 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
import pathlib
import shutil
import yaml

def copy_rules(basedir):
with open(os.path.join(basedir, 'version_sets.yaml'), 'r') as file:
version_sets = yaml.safe_load(file)

for version, base_files in version_sets.items():
dest = os.path.join(basedir, 'version_sets', version)
if os.path.exists(dest):
shutil.rmtree(dest)
pathlib.Path(dest).mkdir(parents=True)
for f in base_files:
src_file = os.path.join(basedir, 'base_rule_definitions', f)
dest_file = os.path.join(dest, f)
shutil.copyfile(src_file, dest_file)

if __name__ == "__main__":
copy_rules(basedir='focus_validator/rules')
25 changes: 24 additions & 1 deletion focus_validator/config_objects/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from enum import Enum
from typing import List, Literal

from pydantic import BaseModel
import sqlglot
from pydantic import BaseModel, field_validator


class AllowNullsCheck(BaseModel):
Expand All @@ -12,6 +13,22 @@ class ValueInCheck(BaseModel):
value_in: List[str]


class SQLQueryCheck(BaseModel):
sql_query: str

@field_validator("sql_query")
def check_sql_query(cls, sql_query):
returned_columns = [
column.alias
for column in sqlglot.parse_one(sql_query).find_all(sqlglot.exp.Alias)
]

assert returned_columns == [
"check_output"
], "SQL query must only return a column called 'check_output'"
return sql_query


SIMPLE_CHECKS = Literal["check_unique", "column_required"]


Expand All @@ -20,6 +37,7 @@ class DataTypes(Enum):
DECIMAL = "decimal"
DATETIME = "datetime"
CURRENCY_CODE = "currency-code"
STRINGIFIED_JSON_OBJECT = "stringified-json-object"


class DataTypeCheck(BaseModel):
Expand Down Expand Up @@ -50,3 +68,8 @@ def generate_check_friendly_name(check, column_id):
return f"{column_id} does not allow null values."
elif isinstance(check, DataTypeCheck):
return f"{column_id} requires values of type {check.data_type.value}."
elif isinstance(check, SQLQueryCheck):
sql_query = " ".join([word.strip() for word in check.sql_query.split()])
return f"{column_id} requires values that return true when evaluated by the following SQL query: {sql_query}"
else:
raise NotImplementedError(f"Check {check} not implemented.")
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import os
from itertools import groupby
from typing import Dict, List, Optional, Set, Union

import pandas as pd
import pandera as pa
import sqlglot
from pandera.api.pandas.types import PandasDtypeInputTypes

from focus_validator.config_objects import ChecklistObject, InvalidRule, Rule
Expand All @@ -10,11 +13,25 @@
ChecklistObjectStatus,
DataTypeCheck,
DataTypes,
SQLQueryCheck,
ValueInCheck,
)
from focus_validator.config_objects.override import Override
from focus_validator.exceptions import FocusNotImplementedError

# group index column adds a column to the dataframe which is used to group the dataframe, otherwise the default
# groupby function does not carry forward all rows in the dataframe causing it to not have row numbers
GROUP_INDEX_COLUMN = "group_index_column"


def __groupby_fnc__(df: pd.DataFrame, column_alias: List[str]):
"""
Custom groupby function to be used with pandera check_sql_query, allowing null values
Default groupby function does not allow null values
"""
df[GROUP_INDEX_COLUMN] = range(0, len(df))
return df.groupby(column_alias + [GROUP_INDEX_COLUMN], dropna=False)


class FocusToPanderaSchemaConverter:
@staticmethod
Expand All @@ -40,9 +57,22 @@ def __generate_pandera_check__(rule: Rule, check_id):
return pa.Check.check_value_in(
allowed_values=check.value_in, error=error_string
)
elif isinstance(check, SQLQueryCheck):
column_alias = [
column.alias_or_name
for column in sqlglot.parse_one(check.sql_query).find_all(
sqlglot.exp.Column
)
]
return pa.Check.check_sql_query(
sql_query=check.sql_query,
error=error_string,
column_alias=column_alias,
groupby=lambda df: __groupby_fnc__(df=df, column_alias=column_alias),
)
elif isinstance(check, AllowNullsCheck):
return pa.Check.check_not_null(
error=error_string, ignore_na=False, allow_nulls=check.allow_nulls
error=error_string, ignore_na=check.allow_nulls
)
else:
raise FocusNotImplementedError(
Expand Down Expand Up @@ -77,6 +107,14 @@ def __generate_column_definition__(
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
elif data_type == DataTypes.STRINGIFIED_JSON_OBJECT:
pandera_type = None
column_checks.append(
pa.Check.check_stringified_json_object_dtype(
ignore_na=True,
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
else:
pandera_type = pa.String

Expand Down Expand Up @@ -151,7 +189,7 @@ def generate_pandera_schema(
for rule in rules:
if isinstance(rule, InvalidRule):
checklist[rule.rule_path] = ChecklistObject(
check_name=rule.rule_path,
check_name=os.path.splitext(os.path.basename(rule.rule_path))[0],
column_id="Unknown",
error=f"{rule.error_type}: {rule.error}",
status=ChecklistObjectStatus.ERRORED,
Expand Down Expand Up @@ -180,4 +218,7 @@ def generate_pandera_schema(
overrides=overrides,
schema_dict=schema_dict,
)
return pa.DataFrameSchema(schema_dict, strict=False), checklist
return (
pa.DataFrameSchema(schema_dict, strict=False),
checklist,
)
2 changes: 1 addition & 1 deletion focus_validator/config_objects/override.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List

import yaml
from pydantic import BaseModel
from pydantic.v1 import BaseModel


class Override(BaseModel):
Expand Down
Loading

0 comments on commit 7f80091

Please sign in to comment.