add unit test for pd dataframe

EpistasisLab · Apr 9, 2018 · 013622e · 013622e
1 parent 8fccfa4
commit 013622e
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 2 deletions.
diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
@@ -41,6 +41,7 @@
 from tpot.config.classifier_sparse import classifier_config_sparse
 
 import numpy as np
+import pandas as pd
 from scipy import sparse
 import inspect
 import random
@@ -91,6 +92,16 @@ def closing(arg):
 training_features_r, testing_features_r, training_target_r, testing_target_r = \
     train_test_split(boston_data.data, boston_data.target, random_state=42)
 
+# Set up pandas DataFrame for testing
+
+input_data = pd.read_csv(
+    'tests/tests.csv',
+    sep=',',
+    dtype=np.float64,
+)
+pd_features = input_data.drop('class', axis=1)
+pd_target = input_data['class']
+
 # Set up the sparse matrix for testing
 sparse_features = sparse.csr_matrix(training_features)
 sparse_target = training_target
@@ -751,6 +762,23 @@ def test_fit_4():
     assert not (tpot_obj._start_datetime is None)
 
 
+def test_fit_5():
+    """Assert that the TPOT fit function provides an optimized pipeline with pandas DataFrame"""
+    tpot_obj = TPOTClassifier(
+        random_state=42,
+        population_size=1,
+        offspring_size=2,
+        generations=1,
+        verbosity=0
+    )
+
+    tpot_obj.fit(pd_features, pd_target)
+
+    assert isinstance(pd_features, pd.DataFrame)
+    assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
+    assert not (tpot_obj._start_datetime is None)
+
+
 def test_memory():
     """Assert that the TPOT fit function runs normally with memory=\'auto\'."""
     tpot_obj = TPOTClassifier(

diff --git a/tpot/driver.py b/tpot/driver.py
@@ -499,10 +499,11 @@ def tpot_driver(args):
         _print_args(args)
 
     input_data = _read_data_file(args)
-    features = input_data.drop(args.TARGET_NAME, axis=1).values
+    features = input_data.drop(args.TARGET_NAME, axis=1)
 
     training_features, testing_features, training_target, testing_target = \
-        train_test_split(features, input_data[args.TARGET_NAME].values, random_state=args.RANDOM_STATE)
+        train_test_split(features, input_data[args.TARGET_NAME], random_state=args.RANDOM_STATE)
+
 
     tpot_type = TPOTClassifier if args.TPOT_MODE == 'classification' else TPOTRegressor