Skip to content

Commit

Permalink
1.0.7
Browse files Browse the repository at this point in the history
  • Loading branch information
akanz1 committed Nov 24, 2022
1 parent 2d9b50a commit 59a8c63
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "klib"
version = "1.0.6"
version = "1.0.7"
description = "Customized data preprocessing functions for frequent tasks."
authors = ["Andreas Kanz <andreas@akanz.de>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion src/klib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Data Science Module for Python
==================================
===============================
klib is an easy to use Python library of customized functions for cleaning and \
analyzing data.
"""
Expand Down
5 changes: 3 additions & 2 deletions src/klib/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""Current version of klib"""
__version__ = "1.0.6"
"""Current version of klib."""

__version__ = "1.0.7"
5 changes: 2 additions & 3 deletions src/klib/clean.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Functions for data cleaning.
"""Functions for data cleaning.
:author: Andreas Kanz
"""
Expand Down Expand Up @@ -279,7 +278,7 @@ def data_cleaning(
changed to categorical, by default 0.03
cat_exclude : Optional[list[str]], optional
List of columns to exclude from categorical conversion, by default None
clean_column_names: bool, optional
clean_col_names: bool, optional
Cleans the column names and provides hints on duplicate and long names, by \
default True
show : Optional[Literal["all", "changes"]], optional
Expand Down
12 changes: 6 additions & 6 deletions src/klib/describe.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Functions for descriptive analytics.
"""Functions for descriptive analytics.
:author: Andreas Kanz
Expand All @@ -18,6 +17,7 @@
from matplotlib import ticker
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import to_rgb
from matplotlib.gridspec import GridSpec

from klib.utils import _corr_selector
from klib.utils import _missing_vals
Expand All @@ -38,7 +38,7 @@ def cat_plot(
bottom: int = 3,
bar_color_top: str = "#5ab4ac",
bar_color_bottom: str = "#d8b365",
):
) -> GridSpec:
"""Two-dimensional visualization of the number and frequency of categorical \
features.
Expand Down Expand Up @@ -263,7 +263,7 @@ def corr_plot(
annot: bool = True,
dev: bool = False,
**kwargs,
):
) -> plt.Axes:
"""Two-dimensional visualization of the correlation between feature-columns \
excluding NA values.
Expand Down Expand Up @@ -311,7 +311,7 @@ def corr_plot(
Display figure settings in the plot by setting dev = True. If False, the \
settings are not displayed, by default False
Keyword Arguments : optional
kwargs : optional
Additional elements to control the visualization of the plot, e.g.:
* mask: bool, default True
Expand Down Expand Up @@ -615,7 +615,7 @@ def missingval_plot(
figsize: tuple = (20, 20),
sort: bool = False,
spine_color: str = "#EEEEEE",
):
) -> GridSpec:
"""Two-dimensional visualization of the missing values in a dataset.
Parameters
Expand Down
16 changes: 8 additions & 8 deletions src/klib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _diff_report(
print(f"Reduced memory by at least: {round(mem_change,3)} MB (-{mem_perc}%)\n")


def _print_cleaning_details(arg0, arg1, arg2, arg3):
def _print_cleaning_details(arg0, arg1, arg2, arg3) -> None:
print(arg0)
print(f"dtypes:\n{arg1.dtypes.value_counts()}")
print(f"\nNumber of rows: {str(arg1.shape[0]).rjust(8)}")
Expand Down Expand Up @@ -232,49 +232,49 @@ def _missing_vals(data: pd.DataFrame) -> MVResult:
}


def _validate_input_bool(value: bool, desc):
def _validate_input_bool(value: bool, desc) -> None:
if not isinstance(value, bool):
raise TypeError(
f"Input value for '{desc}' is {type(value)} but should be a boolean."
)


def _validate_input_int(value: int, desc):
def _validate_input_int(value: int, desc) -> None:
if not isinstance(value, int):
raise TypeError(
f"Input value for '{desc}' is {type(value)} but should be an integer."
)


def _validate_input_range(value, desc, lower, upper):
def _validate_input_range(value, desc, lower, upper) -> None:
if value < lower or value > upper:
raise ValueError(
f"'{desc}' = {value} but should be {lower} <= '{desc}' <= {upper}."
)


def _validate_input_smaller(value1, value2, desc):
def _validate_input_smaller(value1, value2, desc) -> None:
if value1 > value2:
raise ValueError(
f"The first input for '{desc}' should be smaller or equal to the second."
)


def _validate_input_sum_smaller(limit, desc, *args):
def _validate_input_sum_smaller(limit, desc, *args) -> None:
if sum(args) > limit:
raise ValueError(
f"The sum of input values for '{desc}' should be less or equal to {limit}."
)


def _validate_input_sum_larger(limit, desc, *args):
def _validate_input_sum_larger(limit, desc, *args) -> None:
if sum(args) < limit:
raise ValueError(
f"The sum of input values for '{desc}' should be larger/equal to {limit}."
)


def _validate_input_num_data(value: pd.DataFrame, desc):
def _validate_input_num_data(value: pd.DataFrame, desc) -> None:
if value.select_dtypes(include=["number"]).empty:
raise TypeError(
f"Input value for '{desc}' should contain at least one numerical column."
Expand Down

0 comments on commit 59a8c63

Please sign in to comment.