Skip to content

Commit

Permalink
Merge pull request #157 from shakedzy/153-add-type-hints-to-functions
Browse files Browse the repository at this point in the history
#153 add type hints to functions
#156 ImportError: cannot import name 'interp' from 'scipy'
  • Loading branch information
shakedzy authored Jan 27, 2024
2 parents d4593b0 + 2d1655f commit 9e65935
Show file tree
Hide file tree
Showing 12 changed files with 602 additions and 334 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ syntax: glob
env/*
venv/*
ENV/*
.idea/*
.vscode/*
.idea
.vscode
.DS_Store
dython.egg*/*
*__pycache__*
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Change Log

# 0.7.5 _(dev)_
* Adding type hints to all functions (issue [#153](https://github.com/shakedzy/dython/issues/153))
* Dropping dependency in `scikit-plot` as it is no longer maintained (issue [#156](https://github.com/shakedzy/dython/issues/156))

## 0.7.4
* Handling running plotting functions with `plot=False` in Jupyter and truly avoid plotting (issue [#147](https://github.com/shakedzy/dython/issues/147))

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ If you wish to install from source:
pip install git+https://github.com/shakedzy/dython.git
```

**Dependencies:** `numpy`, `pandas`, `seaborn`, `scipy`, `matplotlib`, `sklearn`, `scikit-plot`
**Dependencies:** `numpy`, `pandas`, `seaborn`, `scipy`, `matplotlib`, `sklearn`

## Contributing:
Contributions are always welcomed - if you found something you can fix, or have an idea for a new feature, feel free to write it and open a pull request. Please make sure to go over the [contributions guidelines](https://github.com/shakedzy/dython/blob/master/CONTRIBUTING.md).
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.7.4
0.7.5.dev
30 changes: 22 additions & 8 deletions dython/_private.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,34 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.typing import NDArray
from typing import Optional, Any, Tuple, Union, List, Literal
from .typing import Number, OneDimArray

IS_JUPYTER = None

IS_JUPYTER: bool = False

def set_is_jupyter(force_to=None):

def set_is_jupyter(force_to: Optional[bool] = None) -> None:
global IS_JUPYTER
if force_to is not None:
IS_JUPYTER = force_to
else:
IS_JUPYTER = "ipykernel_launcher.py" in sys.argv[0]


def plot_or_not(plot):
def plot_or_not(plot: bool) -> None:
if plot:
plt.show()
elif not plot and IS_JUPYTER:
plt.close()


def convert(data, to, copy=True):
def convert(
data: Union[List[Number], NDArray, pd.DataFrame],
to: Literal["array", "list", "dataframe"],
copy: bool = True,
) -> Union[List[Number], NDArray, pd.DataFrame]:
converted = None
if to == "array":
if isinstance(data, np.ndarray):
Expand All @@ -31,7 +39,7 @@ def convert(data, to, copy=True):
elif isinstance(data, list):
converted = np.array(data)
elif isinstance(data, pd.DataFrame):
converted = data.values()
converted = data.values() # type: ignore
elif to == "list":
if isinstance(data, list):
converted = data.copy() if copy else data
Expand All @@ -53,10 +61,12 @@ def convert(data, to, copy=True):
)
)
else:
return converted
return converted # type: ignore


def remove_incomplete_samples(x, y):
def remove_incomplete_samples(
x: Union[List[Any], OneDimArray], y: Union[List[Any], OneDimArray]
) -> Tuple[Union[List[Any], OneDimArray], Union[List[Any], OneDimArray]]:
x = [v if v is not None else np.nan for v in x]
y = [v if v is not None else np.nan for v in y]
arr = np.array([x, y]).transpose()
Expand All @@ -67,7 +77,11 @@ def remove_incomplete_samples(x, y):
return arr[0], arr[1]


def replace_nan_with_value(x, y, value):
def replace_nan_with_value(
x: Union[List[Any], OneDimArray],
y: Union[List[Any], OneDimArray],
value: Any,
) -> Tuple[NDArray, NDArray]:
x = np.array(
[v if v == v and v is not None else value for v in x]
) # NaN != NaN
Expand Down
53 changes: 31 additions & 22 deletions dython/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional, Tuple, List, Any, Union
from numpy.typing import NDArray
from .typing import Number, TwoDimArray
from ._private import convert, plot_or_not


Expand All @@ -12,7 +15,10 @@
]


def one_hot_encode(arr, classes=None):
def one_hot_encode(
array: Union[List[Union[Number, str]], NDArray],
classes: Optional[int] = None,
) -> NDArray:
"""
One-hot encode a 1D array.
Based on this StackOverflow answer: https://stackoverflow.com/a/29831596/5863503
Expand All @@ -35,31 +41,31 @@ def one_hot_encode(arr, classes=None):
[1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1.]])
"""
arr = convert(arr, "array").astype(int)
arr: NDArray = convert(array, "array").astype(int) # type: ignore
if not len(arr.shape) == 1:
raise ValueError(
f"array must have only one dimension, but has shape: {arr.shape}"
)
if arr.min() < 0:
raise ValueError("array cannot contain negative values")
classes = classes if classes is not None else arr.max() + 1
h = np.zeros((arr.size, classes))
h = np.zeros((arr.size, classes)) # type: ignore
h[np.arange(arr.size), arr] = 1
return h


def split_hist(
dataset,
values,
split_by,
title="",
xlabel="",
ylabel=None,
figsize=None,
legend="best",
plot=True,
dataset: pd.DataFrame,
values: str,
split_by: str,
title: Optional[str] = "",
xlabel: Optional[str] = "",
ylabel: Optional[str] = None,
figsize: Optional[Tuple[int, int]] = None,
legend: Optional[str] = "best",
plot: bool = True,
**hist_kwargs,
):
) -> plt.Axes:
"""
Plot a histogram of values from a given dataset, split by the values of a chosen column
Expand Down Expand Up @@ -88,7 +94,7 @@ def split_hist(
Returns:
--------
A Matplotlib `Axe`
A Matplotlib `Axes`
Example:
--------
Expand All @@ -111,13 +117,16 @@ def split_hist(
if title == "":
title = values + " by " + split_by
plt.title(title)
plt.ylabel(ylabel)
if ylabel:
plt.ylabel(ylabel)
ax = plt.gca()
plot_or_not(plot)
return ax


def identify_columns_by_type(dataset, include):
def identify_columns_by_type(
dataset: TwoDimArray, include: List[str]
) -> List[Any]:
"""
Given a dataset, identify columns of the types requested.
Expand All @@ -138,12 +147,12 @@ def identify_columns_by_type(dataset, include):
['col2', 'col3']
"""
dataset = convert(dataset, "dataframe")
columns = list(dataset.select_dtypes(include=include).columns)
df: pd.DataFrame = convert(dataset, "dataframe") # type: ignore
columns = list(df.select_dtypes(include=include).columns)
return columns


def identify_columns_with_na(dataset):
def identify_columns_with_na(dataset: TwoDimArray) -> pd.DataFrame:
"""
Return columns names having NA values, sorted in descending order by their number of NAs
Expand All @@ -164,10 +173,10 @@ def identify_columns_with_na(dataset):
1 col2 2
0 col1 1
"""
dataset = convert(dataset, "dataframe")
na_count = [sum(dataset[cc].isnull()) for cc in dataset.columns]
df: pd.DataFrame = convert(dataset, "dataframe") # type: ignore
na_count = [sum(df[cc].isnull()) for cc in df.columns]
return (
pd.DataFrame({"column": dataset.columns, "na_count": na_count})
pd.DataFrame({"column": df.columns, "na_count": na_count})
.query("na_count > 0")
.sort_values("na_count", ascending=False)
)
8 changes: 6 additions & 2 deletions dython/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def roc_graph_example():
y_score = classifier.fit(X_train, y_train).predict_proba(X_test)

# Plot ROC graphs
return metric_graph(y_test, y_score, "roc", class_names=iris.target_names)
return metric_graph(
y_test, y_score, "roc", class_names_list=iris.target_names
)


def pr_graph_example():
Expand Down Expand Up @@ -73,7 +75,9 @@ def pr_graph_example():
y_score = classifier.fit(X_train, y_train).predict_proba(X_test)

# Plot PR graphs
return metric_graph(y_test, y_score, "pr", class_names=iris.target_names)
return metric_graph(
y_test, y_score, "pr", class_names_list=iris.target_names
)


def associations_iris_example():
Expand Down
Loading

0 comments on commit 9e65935

Please sign in to comment.