Skip to content

Commit

Permalink
Merge pull request #23 from neptune-ai/aw/improve-tests
Browse files Browse the repository at this point in the history
Improve e2e tests
  • Loading branch information
AleksanderWWW authored Jan 19, 2024
2 parents 6dd775a + 549a798 commit 72797b9
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 56 deletions.
44 changes: 44 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from dataclasses import dataclass
from typing import Optional

import numpy as np
from pytest import fixture
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.utils import Bunch


@dataclass
class Dataset:
x: np.ndarray
y: np.ndarray
x_train: np.ndarray
x_test: np.ndarray
y_train: np.ndarray
y_test: np.ndarray


_IRIS_DATASET: Optional[Bunch] = None
_DIABETES_DATASET: Optional[Bunch] = None


@fixture(scope="session")
def iris() -> Dataset:
global _IRIS_DATASET
if _IRIS_DATASET is None:
_IRIS_DATASET = datasets.load_iris()
x = _IRIS_DATASET.data[:, :2]
y = _IRIS_DATASET.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5)
return Dataset(x, y, x_train, x_test, y_train, y_test)


@fixture(scope="session")
def diabetes() -> Dataset:
global _DIABETES_DATASET
if _DIABETES_DATASET is None:
_DIABETES_DATASET = datasets.load_diabetes(return_X_y=True)
x, y = _DIABETES_DATASET

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5)
return Dataset(x, y, x_train, x_test, y_train, y_test)
99 changes: 43 additions & 56 deletions tests/test_e2e.py
Original file line number Diff line number Diff line change
@@ -1,99 +1,86 @@
try:
from neptune import init_run
from neptune import (
Run,
init_run,
)
except ImportError:
from neptune.new import init_run
from neptune.new import Run, init_run

import pytest
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import (
LinearRegression,
LogisticRegression,
)
from sklearn.model_selection import (
GridSearchCV,
train_test_split,
from sklearn.dummy import (
DummyClassifier,
DummyRegressor,
)
from sklearn.model_selection import GridSearchCV

import neptune_sklearn as npt_utils


def test_classifier_summary():
run = init_run()

iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

model = LogisticRegression(C=1e5)
model.fit(X_train, y_train)

run["summary"] = npt_utils.create_classifier_summary(model, X_train, X_test, y_train, y_test)

run.wait()
validate_run(run, log_charts=True)

def test_classifier_summary(iris):
with init_run() as run:
model = DummyClassifier()
model.fit(iris.x_train, iris.y_train)

def test_regressor_summary():
run = init_run()
run["summary"] = npt_utils.create_classifier_summary(
model, iris.x_train, iris.x_test, iris.y_train, iris.y_test
)

X, y = datasets.load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
run.wait()
validate_run(run, log_charts=True)

model = LinearRegression()
model.fit(X_train, y_train)

run["summary"] = npt_utils.create_regressor_summary(model, X_train, X_test, y_train, y_test)
def test_regressor_summary(diabetes):
with init_run() as run:
model = DummyRegressor()
model.fit(diabetes.x_train, diabetes.y_train)

run.wait()
validate_run(run, log_charts=True)
run["summary"] = npt_utils.create_regressor_summary(
model, diabetes.x_train, diabetes.x_test, diabetes.y_train, diabetes.y_test
)

run.wait()
validate_run(run, log_charts=True)

def test_kmeans_summary():
run = init_run()

iris = datasets.load_iris()
X = iris.data[:, :2]
def test_kmeans_summary(iris):
with init_run() as run:

model = KMeans()
model.fit(X)
model = KMeans()
model.fit(iris.x)

run["summary"] = npt_utils.create_kmeans_summary(model, X, n_clusters=3)
run["summary"] = npt_utils.create_kmeans_summary(model, iris.x, n_clusters=3)

run.wait()
validate_run(run, log_charts=True)
run.wait()
validate_run(run, log_charts=True)


@pytest.mark.filterwarnings("error::neptune.common.warnings.NeptuneUnsupportedType")
def test_unsupported_object():
def test_unsupported_object(diabetes):
"""This method checks if Neptune throws a `NeptuneUnsupportedType` warning if expected metadata
is not found or skips trying to log such metadata"""

with init_run() as run:

X, y = datasets.load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

model = DummyRegressor()
model.fit(diabetes.x_train, diabetes.y_train)

param_grid = {
"strategy": ["mean", "median", "quantile"],
"quantile": [0.1, 0.5, 1.0],
}

X, y = datasets.fetch_california_housing(return_X_y=True)[:10]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

grid_cv = GridSearchCV(model, param_grid, scoring="neg_mean_absolute_error", cv=2).fit(X_train, y_train)
grid_cv = GridSearchCV(model, param_grid, scoring="neg_mean_absolute_error", cv=2).fit(
diabetes.x_train, diabetes.y_train
)

run["regressor_summary"] = npt_utils.create_regressor_summary(grid_cv, X_train, X_test, y_train, y_test)
run["regressor_summary"] = npt_utils.create_regressor_summary(
grid_cv, diabetes.x_train, diabetes.x_test, diabetes.y_train, diabetes.y_test
)

run.wait()


def validate_run(run, log_charts):
def validate_run(run: Run, log_charts: bool) -> None:
assert run.exists("summary/all_params")
assert run.exists("summary/pickled_model")
assert run.exists("summary/integration/about/neptune-sklearn")
Expand Down

0 comments on commit 72797b9

Please sign in to comment.