Skip to content

Commit

Permalink
fix: Update example test files (#216)
Browse files Browse the repository at this point in the history
Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>
  • Loading branch information
AhdraMeraliQB authored Apr 17, 2024
1 parent 835c559 commit 580f7f0
Show file tree
Hide file tree
Showing 10 changed files with 220 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
Tests should be placed in ``src/tests``, in modules that mirror your
project's structure, and in files named test_*.py. They are simply functions
named ``test_*`` which test a unit of logic.
To run the tests, run ``kedro test`` from the project root directory.
"""
from pathlib import Path

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
Tests should be placed in ``src/tests``, in modules that mirror your
project's structure, and in files named test_*.py. They are simply functions
named ``test_*`` which test a unit of logic.
To run the tests, run ``kedro test`` from the project root directory.
"""

from pathlib import Path
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,67 @@
import logging
import pandas as pd
import pytest
from kedro.io import DataCatalog
from kedro.runner import SequentialRunner
from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data


@pytest.fixture
def dummy_data():
return pd.DataFrame(
{"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30]})
{
"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30],
}
)

@pytest.fixture
def dummy_parameters():
parameters = {"model_options":
{"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"]}
}
parameters = {
"model_options": {
"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"],
}
}
return parameters

class TestDataScienceNodes:
def test_split_data(self, dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
assert len(X_train) == 2 # noqa: PLR2004
assert len(y_train) == 2 # noqa: PLR2004
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data(dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(
dummy_data, dummy_parameters["model_options"]
)
assert len(X_train) == 2
assert len(y_train) == 2
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data_missing_price(dummy_data, dummy_parameters):
dummy_data_missing_price = dummy_data.drop(columns="price")
with pytest.raises(KeyError) as e_info:
X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])

assert "price" in str(e_info.value)

def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
pipeline = (
create_ds_pipeline()
.from_nodes("split_data_node")
.to_nodes("evaluate_model_node")
)
catalog = DataCatalog()
catalog.add_feed_dict(
{
"model_input_table" : dummy_data,
"params:model_options": dummy_parameters["model_options"],
}
)

caplog.set_level(logging.DEBUG, logger="kedro")
successful_run_msg = "Pipeline execution completed successfully."

SequentialRunner().run(pipeline, catalog)

assert successful_run_msg in caplog.text
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
Tests should be placed in ``src/tests``, in modules that mirror your
project's structure, and in files named test_*.py. They are simply functions
named ``test_*`` which test a unit of logic.
To run the tests, run ``kedro test`` from the project root directory.
"""
from pathlib import Path

Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,67 @@
import logging
import pandas as pd
import pytest
from kedro.io import DataCatalog
from kedro.runner import SequentialRunner
from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data


@pytest.fixture
def dummy_data():
return pd.DataFrame(
{"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30]})
{
"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30],
}
)

@pytest.fixture
def dummy_parameters():
parameters = {"model_options":
{"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"]}
}
parameters = {
"model_options": {
"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"],
}
}
return parameters

class TestDataScienceNodes:
def test_split_data(self, dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
assert len(X_train) == 2 # noqa: PLR2004
assert len(y_train) == 2 # noqa: PLR2004
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data(dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(
dummy_data, dummy_parameters["model_options"]
)
assert len(X_train) == 2
assert len(y_train) == 2
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data_missing_price(dummy_data, dummy_parameters):
dummy_data_missing_price = dummy_data.drop(columns="price")
with pytest.raises(KeyError) as e_info:
X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])

assert "price" in str(e_info.value)

def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
pipeline = (
create_ds_pipeline()
.from_nodes("split_data_node")
.to_nodes("evaluate_model_node")
)
catalog = DataCatalog()
catalog.add_feed_dict(
{
"model_input_table" : dummy_data,
"params:model_options": dummy_parameters["model_options"],
}
)

caplog.set_level(logging.DEBUG, logger="kedro")
successful_run_msg = "Pipeline execution completed successfully."

SequentialRunner().run(pipeline, catalog)

assert successful_run_msg in caplog.text
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
Tests should be placed in ``src/tests``, in modules that mirror your
project's structure, and in files named test_*.py. They are simply functions
named ``test_*`` which test a unit of logic.
To run the tests, run ``kedro test`` from the project root directory.
"""
from pathlib import Path

Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,67 @@
import logging
import pandas as pd
import pytest
from kedro.io import DataCatalog
from kedro.runner import SequentialRunner
from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data


@pytest.fixture
def dummy_data():
return pd.DataFrame(
{"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30]})
{
"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30],
}
)

@pytest.fixture
def dummy_parameters():
parameters = {"model_options":
{"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"]}
}
parameters = {
"model_options": {
"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"],
}
}
return parameters

class TestDataScienceNodes:
def test_split_data(self, dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
assert len(X_train) == 2 # noqa: PLR2004
assert len(y_train) == 2 # noqa: PLR2004
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data(dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(
dummy_data, dummy_parameters["model_options"]
)
assert len(X_train) == 2
assert len(y_train) == 2
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data_missing_price(dummy_data, dummy_parameters):
dummy_data_missing_price = dummy_data.drop(columns="price")
with pytest.raises(KeyError) as e_info:
X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])

assert "price" in str(e_info.value)

def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
pipeline = (
create_ds_pipeline()
.from_nodes("split_data_node")
.to_nodes("evaluate_model_node")
)
catalog = DataCatalog()
catalog.add_feed_dict(
{
"model_input_table" : dummy_data,
"params:model_options": dummy_parameters["model_options"],
}
)

caplog.set_level(logging.DEBUG, logger="kedro")
successful_run_msg = "Pipeline execution completed successfully."

SequentialRunner().run(pipeline, catalog)

assert successful_run_msg in caplog.text
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
Tests should be placed in ``src/tests``, in modules that mirror your
project's structure, and in files named test_*.py. They are simply functions
named ``test_*`` which test a unit of logic.
To run the tests, run ``kedro test`` from the project root directory.
"""
from pathlib import Path

Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,67 @@
import logging
import pandas as pd
import pytest
from kedro.io import DataCatalog
from kedro.runner import SequentialRunner
from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data


@pytest.fixture
def dummy_data():
return pd.DataFrame(
{"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30]})
{
"engines": [1, 2, 3],
"crew": [4, 5, 6],
"passenger_capacity": [5, 6, 7],
"price": [120, 290, 30],
}
)

@pytest.fixture
def dummy_parameters():
parameters = {"model_options":
{"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"]}
}
parameters = {
"model_options": {
"test_size": 0.2,
"random_state": 3,
"features": ["engines", "passenger_capacity", "crew"],
}
}
return parameters

class TestDataScienceNodes:
def test_split_data(self, dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
assert len(X_train) == 2 # noqa: PLR2004
assert len(y_train) == 2 # noqa: PLR2004
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data(dummy_data, dummy_parameters):
X_train, X_test, y_train, y_test = split_data(
dummy_data, dummy_parameters["model_options"]
)
assert len(X_train) == 2
assert len(y_train) == 2
assert len(X_test) == 1
assert len(y_test) == 1

def test_split_data_missing_price(dummy_data, dummy_parameters):
dummy_data_missing_price = dummy_data.drop(columns="price")
with pytest.raises(KeyError) as e_info:
X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])

assert "price" in str(e_info.value)

def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
pipeline = (
create_ds_pipeline()
.from_nodes("split_data_node")
.to_nodes("evaluate_model_node")
)
catalog = DataCatalog()
catalog.add_feed_dict(
{
"model_input_table" : dummy_data,
"params:model_options": dummy_parameters["model_options"],
}
)

caplog.set_level(logging.DEBUG, logger="kedro")
successful_run_msg = "Pipeline execution completed successfully."

SequentialRunner().run(pipeline, catalog)

assert successful_run_msg in caplog.text
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
Tests should be placed in ``src/tests``, in modules that mirror your
project's structure, and in files named test_*.py. They are simply functions
named ``test_*`` which test a unit of logic.
To run the tests, run ``kedro test`` from the project root directory.
"""
from pathlib import Path

Expand Down

0 comments on commit 580f7f0

Please sign in to comment.