Skip to content

Commit

Permalink
add unit test for pd dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
weixuanfu committed Apr 9, 2018
1 parent 8fccfa4 commit 013622e
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
28 changes: 28 additions & 0 deletions tests/tpot_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from tpot.config.classifier_sparse import classifier_config_sparse

import numpy as np
import pandas as pd
from scipy import sparse
import inspect
import random
Expand Down Expand Up @@ -91,6 +92,16 @@ def closing(arg):
training_features_r, testing_features_r, training_target_r, testing_target_r = \
train_test_split(boston_data.data, boston_data.target, random_state=42)

# Set up pandas DataFrame for testing

input_data = pd.read_csv(
'tests/tests.csv',
sep=',',
dtype=np.float64,
)
pd_features = input_data.drop('class', axis=1)
pd_target = input_data['class']

# Set up the sparse matrix for testing
sparse_features = sparse.csr_matrix(training_features)
sparse_target = training_target
Expand Down Expand Up @@ -751,6 +762,23 @@ def test_fit_4():
assert not (tpot_obj._start_datetime is None)


def test_fit_5():
"""Assert that the TPOT fit function provides an optimized pipeline with pandas DataFrame"""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0
)

tpot_obj.fit(pd_features, pd_target)

assert isinstance(pd_features, pd.DataFrame)
assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
assert not (tpot_obj._start_datetime is None)


def test_memory():
"""Assert that the TPOT fit function runs normally with memory=\'auto\'."""
tpot_obj = TPOTClassifier(
Expand Down
5 changes: 3 additions & 2 deletions tpot/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,10 +499,11 @@ def tpot_driver(args):
_print_args(args)

input_data = _read_data_file(args)
features = input_data.drop(args.TARGET_NAME, axis=1).values
features = input_data.drop(args.TARGET_NAME, axis=1)

training_features, testing_features, training_target, testing_target = \
train_test_split(features, input_data[args.TARGET_NAME].values, random_state=args.RANDOM_STATE)
train_test_split(features, input_data[args.TARGET_NAME], random_state=args.RANDOM_STATE)


tpot_type = TPOTClassifier if args.TPOT_MODE == 'classification' else TPOTRegressor

Expand Down

0 comments on commit 013622e

Please sign in to comment.