From 9edbbce239943d0829c72b3888490386f94a7e1c Mon Sep 17 00:00:00 2001
From: hyunjuna <hyunjun.choi@cshs.org>
Date: Wed, 1 Feb 2023 17:59:43 -0800
Subject: [PATCH]  Make errors on uploads more user-friendly #570

Update error message for each case in validateDataset.py
Update labApi.test.ts
Update react to show error message for each case
---
 lab/pyutils/tests/test_validateDataset.py     | 205 ++++++++++--------
 lab/pyutils/validateDataset.py                |  43 ++--
 lab/webapp/src/components/FileUpload/index.js |  15 +-
 tests/integration/jest/labApi.test.ts         |  76 ++++++-
 4 files changed, 228 insertions(+), 111 deletions(-)

diff --git a/lab/pyutils/tests/test_validateDataset.py b/lab/pyutils/tests/test_validateDataset.py
index e93037d65..4c672693c 100644
--- a/lab/pyutils/tests/test_validateDataset.py
+++ b/lab/pyutils/tests/test_validateDataset.py
@@ -48,10 +48,11 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
+
 @nottest
 def load_bad_test_data():
     return [
-        ("appendicitis_bad_rows", 
+        ("appendicitis_bad_rows",
             "data/datasets/test/test_bad/appendicitis_bad_rows.csv",
             "classification",
             "target_class",
@@ -59,99 +60,99 @@ def load_bad_test_data():
             None,
             "Dataset has dimensions (5, 8), classification datasets must have at least 10 rows."),
         # because the data has only one column, the file is not autoparsed properly
-        ("appendicitis_bad_cols", 
+        ("appendicitis_bad_cols",
             "data/datasets/test/test_bad/appendicitis_bad_cols.csv",
             "classification",
             "target_class",
             None,
             None,
             "Target column 'target_class' not in data"),
-            #"Dataset has dimensions (5, 2), classification datasets must have at least 2 columns."),
-        ("appendicitis_bad_cols_per_class", 
+        # "Dataset has dimensions (5, 2), classification datasets must have at least 2 columns."),
+        ("appendicitis_bad_cols_per_class",
             "data/datasets/test/test_bad/appendicitis_bad_cols_per_class.csv",
             "classification",
             "target_class",
             None,
             None,
             "Classification datasets must have at least 2 rows per class, class(es) '[3]' have only 1 row."),
-        ("appendicitis_bad_dim", 
+        ("appendicitis_bad_dim",
             "data/datasets/test/test_bad/appendicitis_bad_dim.csv",
             "classification",
             "class",
             None,
             None,
             "Classification datasets must have at least 2 rows per class, class(es) '[9999999.0]' have only 1 row."),
-        ("appendicitis_bad_target_col", 
+        ("appendicitis_bad_target_col",
             "data/datasets/test/test_bad/appendicitis_bad_target_col.csv",
             "classification",
             "class",
             None,
             None,
             "Target column 'class' not in data"),
-        ("appendicitis_null", 
+        ("appendicitis_null",
             "data/datasets/test/test_bad/appendicitis_null.csv",
             "classification",
             "class",
             None,
             None,
-            "sklearn.check_array() validation Input contains NaN, infinity or a value too large for dtype('float64')."),    
-        ("appendicitis_cat", 
+            "sklearn.check_array() validation Input contains NaN, infinity or a value too large for dtype('float64')."),
+        ("appendicitis_cat",
             "data/datasets/test/test_bad/appendicitis_cat.csv",
             "classification",
             "target_class",
             None,
             None,
             "sklearn.check_array() validation could not convert string to float: 'b'"),
-        ("appendicitis_cat_2", 
+        ("appendicitis_cat_2",
             "data/datasets/test/test_bad/appendicitis_cat_2.csv",
             "classification",
             "target_class",
             None,
             None,
             "sklearn.check_array() validation could not convert string to float: 'a'"),
-        ("appendicitis_cat_missing_col", 
+        ("appendicitis_cat_missing_col",
             "data/datasets/test/integration/appendicitis_cat_ord.csv",
             "classification",
             "target_class",
             ["cat"],
             None,
             "sklearn.check_array() validation could not convert string to float: 'first'"),
-        ("appendicitis_ord_missing_col", 
+        ("appendicitis_ord_missing_col",
             "data/datasets/test/integration/appendicitis_cat_ord.csv",
             "classification",
             "target_class",
             None,
-            {"ord" : ["first", "second", "third"]},
+            {"ord": ["first", "second", "third"]},
             "sklearn.check_array() validation could not convert string to float: 'b'"),
-        ("appendicitis_cat_ord_missing_val", 
+        ("appendicitis_cat_ord_missing_val",
             "data/datasets/test/integration/appendicitis_cat_ord.csv",
             "classification",
             "target_class",
             ["cat"],
-            {"ord" : ["first", "second"]},
+            {"ord": ["first", "second"]},
             "encode_data() failed, Found unknown categories ['third'] in column 0 during fit"),
-        ("appendicitis_ord_target", 
+        ("appendicitis_ord_target",
             "data/datasets/test/test_bad/appendicitis_ord_target.csv",
             "classification",
             "target_class",
             None,
-            {"target_class" : ["true", "false"]},
+            {"target_class": ["true", "false"]},
             "Target column 'target_class' cannot be an ordinal feature"),
-        ("appendicitis_cat_target", 
+        ("appendicitis_cat_target",
             "data/datasets/test/test_bad/appendicitis_ord_target.csv",
             "classification",
             "target_class",
             ["target_class"],
             None,
             "Target column 'target_class' cannot be a categorical feature"),
-        ("reg_vineyard_null_target", 
+        ("reg_vineyard_null_target",
             "data/datasets/test/test_bad/regression/vineyard_null_target.csv",
             "regression",
             "target",
             None,
             None,
             "sklearn.check_array() validation Input contains NaN, infinity or a value too large for dtype('float64')."),
-        ("reg_vineyard_str_target", 
+        ("reg_vineyard_str_target",
             "data/datasets/test/test_bad/regression/vineyard_str_target.csv",
             "regression",
             "target",
@@ -160,194 +161,222 @@ def load_bad_test_data():
             "sklearn.check_array() validation could not convert string to float: 'bar'"),
     ]
 
+
 @nottest
 def load_bad_test_data_parameter_exception():
     return [
-            ("appendicitis_bad_prediction_type", 
-            "data/datasets/test/test_bad/appendicitis_ord_target.csv",
-            "badPredictionType",
-            "target_class",
-            ["target_class"],
-            None,
-            "Invalid prediction type: 'badPredictionType'")
+        ("appendicitis_bad_prediction_type",
+         "data/datasets/test/test_bad/appendicitis_ord_target.csv",
+         "badPredictionType",
+         "target_class",
+         ["target_class"],
+         None,
+         "Invalid prediction type: 'badPredictionType'")
     ]
 
+
 @nottest
 def load_bad_test_data_no_target():
     return [
-        ("appendicitis_bad_dim", 
+        ("appendicitis_bad_dim",
             "data/datasets/test/test_bad/appendicitis_bad_dim.csv",
             "classification",
             "class",
             "sklearn.check_array() validation Input contains NaN, infinity or a value too large for dtype('float64')."),
-        ("appendicitis_null", 
+        ("appendicitis_null",
             "data/datasets/test/test_bad/appendicitis_null.csv",
             "classification",
             "class",
-            "sklearn.check_array() validation Input contains NaN, infinity or a value too large for dtype('float64')."),    
-        ("appendicitis_cat", 
+            "sklearn.check_array() validation Input contains NaN, infinity or a value too large for dtype('float64')."),
+        ("appendicitis_cat",
             "data/datasets/test/test_bad/appendicitis_cat.csv",
             "target_class",
             "sklearn.check_array() validation could not convert string to float: 'b'"),
-        ("appendicitis_cat_2", 
+        ("appendicitis_cat_2",
             "data/datasets/test/test_bad/appendicitis_cat_2.csv",
             "target_class",
             "sklearn.check_array() validation could not convert string to float: 'a'"),
     ]
 
+
 @nottest
 def load_good_test_data():
     return [
-        ("allbp", 
+        ("allbp",
             "data/datasets/test/test_flat/allbp.csv",
             "classification",
             "class",
             None,
             None),
-        ("appendicitis_alt_target_col", 
+        ("appendicitis_alt_target_col",
             "data/datasets/test/test_flat/appendicitis.csv",
             "classification",
             "target_class",
             [],
             {}),
-        ("appendicitis_cat", 
+        ("appendicitis_cat",
             "data/datasets/test/integration/appendicitis_cat.csv",
             "classification",
             "target_class",
             ["cat"],
             None),
-        ("appendicitis_ord", 
+        ("appendicitis_ord",
             "data/datasets/test/integration/appendicitis_ord.csv",
             "classification",
             "target_class",
             None,
-            {"ord" : ["first", "second", "third"]}
-            ),
-        ("appendicitis_cat_ord", 
+            {"ord": ["first", "second", "third"]}
+         ),
+        ("appendicitis_cat_ord",
             "data/datasets/test/integration/appendicitis_cat_ord.csv",
             "classification",
             "target_class",
             ["cat"],
-            {"ord" : ["first", "second", "third"]}
-            ),
-        ("appendicitis_str_target", 
+            {"ord": ["first", "second", "third"]}
+         ),
+        ("appendicitis_str_target",
             "data/datasets/test/integration/appendicitis_string_target.csv",
             "classification",
             "target_class",
             None,
             None
-            ),
-        ("reg_vineyard", 
+         ),
+        ("reg_vineyard",
             "data/datasets/test/test_regression/192_vineyard.csv",
             "regression",
             "target",
             None,
             None
-            ),
-        ("reg_auto_price", 
+         ),
+        ("reg_auto_price",
             "data/datasets/test/test_regression/195_auto_price.tsv",
             "regression",
             "target",
             None,
             None
-            ),
-       ]
+         ),
+    ]
 
 
 class TestResultUtils(unittest.TestCase):
     @parameterized.expand(load_bad_test_data)
     def test_validate_data_file_bad(self, name, file_path, prediction_type, target_column, categories, ordinals, expectedMessage):
-        result, message = validateDataset.validate_data_from_filepath(file_path, prediction_type, target_column, categories, ordinals)
-        assert(message)
+        result, message = validateDataset.validate_data_from_filepath(
+            file_path, prediction_type, target_column, categories, ordinals)
+        assert (message)
         self.assertEqual(message, expectedMessage)
-        assert not(result)
+        assert not (result)
+
 
 class TestResultUtils(unittest.TestCase):
     @parameterized.expand(load_bad_test_data_parameter_exception)
     def test_validate_data_file_bad_parameters_bad(self, name, file_path, prediction_type, target_column, categories, ordinals, expectedMessage):
-        result, message = validateDataset.validate_data_from_filepath(file_path, prediction_type, target_column, categories, ordinals)
-        assert(message)
+        result, message = validateDataset.validate_data_from_filepath(
+            file_path, prediction_type, target_column, categories, ordinals)
+        assert (message)
         self.assertEqual(message, expectedMessage)
-        assert not(result)
+        assert not (result)
 
-    #  NaN or string errors are only checked if the target column is specified. 
-##  @parameterized.expand(load_bad_test_data_no_target)
-##  def test_validate_data_file_bad_no_target(self, name, file_path, prediction_type, target_column, expectedMessage):
-##      result, message = validateDataset.validate_data_from_filepath(file_path, prediction_type, None)
-##      assert(message)
-##      self.assertEqual(message, expectedMessage)
-##      assert not(result)
+    #  NaN or string errors are only checked if the target column is specified.
+# @parameterized.expand(load_bad_test_data_no_target)
+# def test_validate_data_file_bad_no_target(self, name, file_path, prediction_type, target_column, expectedMessage):
+# result, message = validateDataset.validate_data_from_filepath(file_path, prediction_type, None)
+# assert(message)
+# self.assertEqual(message, expectedMessage)
+# assert not(result)
 
     @parameterized.expand(load_good_test_data)
     def test_validate_data_file_good(self, name, file_path, prediction_type, target_column, categories, ordinals):
-        result, message = validateDataset.validate_data_from_filepath(file_path, prediction_type, target_column, categories, ordinals)
-        logger.debug("name: " + name + " file_path: " + file_path + " target:" + target_column + " res: " + str(result) + " msg: " + str(message))
+        result, message = validateDataset.validate_data_from_filepath(
+            file_path, prediction_type, target_column, categories, ordinals)
+        logger.debug("name: " + name + " file_path: " + file_path + " target:" +
+                     target_column + " res: " + str(result) + " msg: " + str(message))
         self.assertTrue(result)
-    
+
     @parameterized.expand(load_good_test_data)
     def test_validate_data_file_good_no_target(self, name, file_path, prediction_type, target_column, categories, ordinals):
-        result, message = validateDataset.validate_data_from_filepath(file_path, prediction_type, None, categories, ordinals)
-        logger.debug("name: " + name + " file_path: " + file_path + " target:" + target_column + " res: " + str(result) + " msg: " + str(message))
+        result, message = validateDataset.validate_data_from_filepath(
+            file_path, prediction_type, None, categories, ordinals)
+        logger.debug("name: " + name + " file_path: " + file_path + " target:" +
+                     target_column + " res: " + str(result) + " msg: " + str(message))
         self.assertTrue(result)
 
     @parameterized.expand(load_good_test_data)
     def test_validate_data_main_file_good(self, name, file_path, prediction_type, target_column, categories, ordinals):
         result = io.StringIO()
-        testargs = ["program.py", file_path, '-target', target_column, '-identifier_type', 'filepath']
-        
-        if (categories) : testargs.extend(['-categorical_features', simplejson.dumps(categories)])
-        if (ordinals) : testargs.extend(['-ordinal_features', simplejson.dumps(ordinals)])
-        if (prediction_type) : testargs.extend(['-prediction_type', prediction_type])
+        testargs = ["program.py", file_path, '-target',
+                    target_column, '-identifier_type', 'filepath']
+
+        if (categories):
+            testargs.extend(
+                ['-categorical_features', simplejson.dumps(categories)])
+        if (ordinals):
+            testargs.extend(['-ordinal_features', simplejson.dumps(ordinals)])
+        if (prediction_type):
+            testargs.extend(['-prediction_type', prediction_type])
 
         with patch.object(sys, 'argv', testargs):
             sys.stdout = result
             validateDataset.main()
             sys.stdout = sys.__stdout__
-        logger.debug("testargs: " + str(testargs) + " res: " + result.getvalue())
+        logger.debug("testargs: " + str(testargs) +
+                     " res: " + result.getvalue())
         self.assertTrue(result.getvalue())
         objResult = simplejson.loads(result.getvalue())
         self.assertEqual(objResult, {"success": True, "errorMessage": None})
 
-
     @parameterized.expand(load_bad_test_data)
     def test_validate_data_main_file_bad(self, name, file_path, prediction_type, target_column, categories, ordinals, expectedMessage):
         result = io.StringIO()
-        testargs = ["program.py", file_path, '-target', target_column, '-identifier_type', 'filepath']
+        testargs = ["program.py", file_path, '-target',
+                    target_column, '-identifier_type', 'filepath']
 
-        if (categories) : testargs.extend(['-categorical_features', simplejson.dumps(categories)])
-        if (ordinals) : testargs.extend(['-ordinal_features', simplejson.dumps(ordinals)])
-        if (prediction_type) : testargs.extend(['-prediction_type', prediction_type])
+        if (categories):
+            testargs.extend(
+                ['-categorical_features', simplejson.dumps(categories)])
+        if (ordinals):
+            testargs.extend(['-ordinal_features', simplejson.dumps(ordinals)])
+        if (prediction_type):
+            testargs.extend(['-prediction_type', prediction_type])
 
         with patch.object(sys, 'argv', testargs):
             sys.stdout = result
             validateDataset.main()
             sys.stdout = sys.__stdout__
-        logger.debug("testargs: " + str(testargs) + " res: " + result.getvalue())
+        logger.debug("testargs: " + str(testargs) +
+                     " res: " + result.getvalue())
         self.assertTrue(result.getvalue())
         objResult = simplejson.loads(result.getvalue())
-        self.assertEqual(objResult, {"success": False, "errorMessage": expectedMessage})
-
+        self.assertEqual(
+            objResult, {"success": False, "errorMessage": expectedMessage})
 
     @parameterized.expand(load_good_test_data)
     def test_validate_data_main_api_connect_error(self, name, file_path, prediction_type, target_column, categories, ordinals):
         result = io.StringIO()
-        testargs = ["program.py", file_path, '-target', target_column, '-identifier_type', 'fileid']
+        testargs = ["program.py", file_path, '-target',
+                    target_column, '-identifier_type', 'fileid']
 
-        if (categories) : testargs.extend(['-categorical_features', simplejson.dumps(categories)])
-        if (ordinals) : testargs.extend(['-ordinal_features', simplejson.dumps(ordinals)])
-        if (prediction_type) : testargs.extend(['-prediction_type', prediction_type])
+        if (categories):
+            testargs.extend(
+                ['-categorical_features', simplejson.dumps(categories)])
+        if (ordinals):
+            testargs.extend(['-ordinal_features', simplejson.dumps(ordinals)])
+        if (prediction_type):
+            testargs.extend(['-prediction_type', prediction_type])
 
         with patch.object(sys, 'argv', testargs):
             sys.stdout = result
             validateDataset.main()
             sys.stdout = sys.__stdout__
-        logger.debug("testargs: " + str(testargs) + " res: " + result.getvalue())
+        logger.debug("testargs: " + str(testargs) +
+                     " res: " + result.getvalue())
         self.assertTrue(result.getvalue())
         objResult = simplejson.loads(result.getvalue())
 
         self.assertIsInstance(objResult, dict)
         self.assertEqual(list(objResult), ["success", "errorMessage"])
         self.assertEqual(objResult['success'], False)
-        #self.assertRegex(objResult['errorMessage'], "^Exception: ConnectionError\(MaxRetryError")
-        #self.assertRegex(objResult['errorMessage'], "^Exception: ConnectTimeout\(MaxRetryError")
-        self.assertRegex(objResult['errorMessage'], "^Exception: Connect.*\(MaxRetryError")
+        # self.assertRegex(objResult['errorMessage'], "^Exception: ConnectionError\(MaxRetryError")
+        # self.assertRegex(objResult['errorMessage'], "^Exception: ConnectTimeout\(MaxRetryError")
+        self.assertRegex(objResult['errorMessage'],
+                         "^Exception: Connect.*\(MaxRetryError")
diff --git a/lab/pyutils/validateDataset.py b/lab/pyutils/validateDataset.py
index bb523ae43..12ef19a66 100644
--- a/lab/pyutils/validateDataset.py
+++ b/lab/pyutils/validateDataset.py
@@ -62,7 +62,8 @@ def check_dataframe(df, target_column):
     inf or -inf are not allowed in df.
     '''
 
-    error_message = "Found error in data:"
+    # error_message = "Found error in data:"
+    error_message = ""
 
     # find columns contain missing value(NaN) in df
     nan_cols = df.columns[df.isnull().any()].tolist()
@@ -96,15 +97,18 @@ def check_dataframe(df, target_column):
         error_message += "* 'STRING' in " + \
             str(str_cols)+" "
 
-    return error_message
+    if error_message != "":
+        raise ValueError(error_message)
+
+    return True
 
 
 def validate_data_from_server(file_id, prediction_type, target_field, categories=None, ordinals=None, **kwargs):
     # Read the data set into memory
     raw_data = get_file_from_server(file_id)
     df = pd.read_csv(StringIO(raw_data), sep=None, engine='python', **kwargs)
-    # return validate_data(df, prediction_type, target_field, categories, ordinals)
-    return validate_data_updated(df, prediction_type, target_field, categories, ordinals)
+    return validate_data(df, prediction_type, target_field, categories, ordinals)
+    # return validate_data_updated(df, prediction_type, target_field, categories, ordinals)
 
 
 def validate_data_from_filepath(file_id, prediction_type, target_field, categories=None, ordinals=None, **kwargs):
@@ -146,8 +150,10 @@ def encode_data(df, target_column, categories, ordinals, encoding_strategy="OneH
     else:
         return df
 
+# original validate_data function
 
-def validate_data(df, prediction_type="classification", target_column=None, categories=None, ordinals=None):
+
+def validate_data_origin(df, prediction_type="classification", target_column=None, categories=None, ordinals=None):
     '''
     Check that a datafile is valid
 
@@ -232,12 +238,12 @@ def validate_data(df, prediction_type="classification", target_column=None, cate
                 logger.warn("sklearn.check_array() validation " + str(e))
                 return False, "sklearn.check_array() validation " + str(e)
 
-        # check t
-
     return True, None
 
+# updated validate_data function
+
 
-def validate_data_updated(df, prediction_type="classification", target_column=None, categories=None, ordinals=None):
+def validate_data(df, prediction_type="classification", target_column=None, categories=None, ordinals=None):
     '''
     Check that a df is valid
     This function checks for the following:
@@ -304,16 +310,17 @@ def validate_data_updated(df, prediction_type="classification", target_column=No
                 logger.warn(msg)
                 return False, msg
 
-    # In the below code,the check_dataframe() checks whether features and target column contain only processed data.
-    # check whether each column contains only processed data or not
-    # missing values are not allowed in df
-    # strings are not allowed in df
-    # inf or -inf are not allowed in df
-    if (len(df.columns)) > 0:
-        error_message = check_dataframe(df, target_column)
-        if error_message != "Found error in data:":
-            logger.warn(str(error_message))
-            return False, str(error_message)
+        # In the below code,the check_dataframe() checks whether features and target column contain only processed data.
+        # check whether each column contains only processed data or not
+        # missing values are not allowed in df
+        # strings are not allowed in df
+        # inf or -inf are not allowed in df
+        if (len(df.columns)) > 0:
+            try:
+                check_dataframe(df, target_column)
+            except ValueError as e:
+                logger.warn("check_dataframe() validation " + str(e))
+                return False, "check_dataframe() validation " + str(e)
 
     return True, None
 
diff --git a/lab/webapp/src/components/FileUpload/index.js b/lab/webapp/src/components/FileUpload/index.js
index 650179d5e..b4eeb8e48 100644
--- a/lab/webapp/src/components/FileUpload/index.js
+++ b/lab/webapp/src/components/FileUpload/index.js
@@ -2445,7 +2445,20 @@ handleCatFeaturesUserTextCancel() {
       <Modal style={{ marginTop:'0' }} open={this.state.showErrorModal} onClose={this.handleErrorModalClose} closeIcon id="error_modal_dialog"> 
         <Modal.Header>{this.state.errorModalHeader}</Modal.Header>
 
-        <Modal.Content>{this.state.errorModalContent}</Modal.Content>
+        {/* <Modal.Content>{this.state.errorModalContent}</Modal.Content> */}
+
+        <Modal.Content>{errorModalContent.map((item, index) => {
+          
+          if (index==0)
+          {
+            return <p key={index}>{item}</p>;
+          }
+          else{
+            return <p key={index}>{index}: {item}</p>;
+          }
+          
+        })}</Modal.Content>
+
 
 
 
diff --git a/tests/integration/jest/labApi.test.ts b/tests/integration/jest/labApi.test.ts
index 121f68a9d..7253db131 100644
--- a/tests/integration/jest/labApi.test.ts
+++ b/tests/integration/jest/labApi.test.ts
@@ -15,14 +15,14 @@ describe('lab', () => {
 		describe.each`
 			testname					| filename						| prediction_type		| target			| categorical	| ordinal
 			${'numeric'}				| ${'appendicitis_2.csv'}		| ${'classification'}	| ${'target_class'}	| ${[]}			| ${{}}
-			${'categorical'}			| ${'appendicitis_cat.csv'}		| ${'classification'}	| ${'target_class'}	|${["cat"]}		| ${{}}
-			${'categorical_ordinal'}	| ${'appendicitis_cat_ord.csv'}	| ${'classification'}	| ${'target_class'}	|${["cat"]}		| ${ {"ord" : ["first", "second", "third"]}}
 			${'regression'}				| ${'192_vineyard.csv'}			| ${'regression'}		| ${'target'}		|${[]}			| ${{}}
 			`("putDatasetGood", ({testname, filename, prediction_type, target, categorical, ordinal}) => {
 				it(`${testname}`, async () => {
 					jest.setTimeout(15000)
 					let filepath = `${util.DATASET_PATH}/${filename}`
 
+					console.log('test!!!')
+
 					console.log(`${testname} ${filename} ${prediction_type}, ${target}, ${categorical} ${ordinal}`)
 					let form = new FormData();
 
@@ -94,6 +94,7 @@ describe('lab', () => {
 				}
 			});
 
+			// appendicitis_cat.csv
 			it('string data in file', async () => {
 				expect.assertions(3);
 
@@ -121,10 +122,77 @@ describe('lab', () => {
 					var json = await e.response.json()
 					expect(json.error).toBeTruthy()
 					expect(e.response.status).toEqual(400)
-					expect(json.error).toEqual("Unable to upload file. Error: Datafile validation failed, sklearn.check_array() validation could not convert string to float: 'b'")
+					expect(json.error).toEqual("Unable to upload file. Error: Datafile validation failed, check_dataframe() validation * 'STRING' in ['cat'] ")
+				}
+			});
+
+
+			// aappendicitis_cat_ord.csv
+			it('string data in file', async () => {
+				expect.assertions(3);
+
+				let filepath = `${util.DATASET_PATH}/appendicitis_cat_ord.csv`
+
+				let form = new FormData();
+
+				let metadata =  JSON.stringify({
+						'name': 'appendicitis_cat_ord_datasetbad.csv',
+						'username': 'testuser',
+						'timestamp': Date.now(),
+						'dependent_col' : 'target_class',
+						'categorical_features' : []		
+					})
+
+				form.append('_metadata', metadata)
+				form.append('_files', fs.createReadStream(filepath));
+
+				let result
+
+				try {
+					result = await labApi.putDataset(form);
+				}
+				catch (e) {
+					var json = await e.response.json()
+					expect(json.error).toBeTruthy()
+					expect(e.response.status).toEqual(400)
+					expect(json.error).toEqual("Unable to upload file. Error: Datafile validation failed, check_dataframe() validation * 'STRING' in ['cat', 'ord'] ")
 				}
 			});
 
+
+			it('string data in file', async () => {
+				expect.assertions(3);
+
+				let filepath = `${util.DATASET_PATH}/appendicitis_cat_ord.csv`
+
+				let form = new FormData();
+
+				let metadata =  JSON.stringify({
+						'name': 'appendicitis_cat_ord_datasetbad.csv',
+						'username': 'testuser',
+						'timestamp': Date.now(),
+						'dependent_col' : 'target_class',
+						'categorical_features' : []		
+					})
+
+				form.append('_metadata', metadata)
+				form.append('_files', fs.createReadStream(filepath));
+
+				let result
+
+				try {
+					result = await labApi.putDataset(form);
+				}
+				catch (e) {
+					var json = await e.response.json()
+					expect(json.error).toBeTruthy()
+					expect(e.response.status).toEqual(400)
+					expect(json.error).toEqual("Unable to upload file. Error: Datafile validation failed, check_dataframe() validation * 'STRING' in ['cat', 'ord'] ")
+				}
+			});
+
+			
+
 			it('invalid metadata key', async () => {
 				expect.assertions(3);
 
@@ -242,7 +310,7 @@ describe('lab', () => {
 					var json = await e.response.json()
 					expect(json.error).toBeTruthy()
 					expect(e.response.status).toEqual(400)
-					expect(json.error).toEqual("Unable to upload file. Error: metafeatures validation failed, dataset with metafeature signature '6526f294170ebc6066c53ad1f2b01b5b4c61708bd455fa08b1e60895a74a4a83' has already been registered, count: 1.")
+					expect(json.error).toEqual("Unable to upload file. Error: metafeatures validation failed, dataset with metafeature signature '13f1e79965d62e80bf70f4ea5b19c137be49df149ad223add6f9853b9d50e088' has already been registered, count: 1.")
 				}
 			});