diff --git a/fedot/core/data/data_preprocessing.py b/fedot/core/data/data_preprocessing.py index b0076e9f0a..49650e71c7 100644 --- a/fedot/core/data/data_preprocessing.py +++ b/fedot/core/data/data_preprocessing.py @@ -72,11 +72,15 @@ def find_categorical_columns(table: np.ndarray, column_type_ids: Optional[np.nda def force_categorical_determination(table: np.ndarray): """ Find string columns using 'computationally expensive' approach """ - real_columns_selector = np.all(np.isreal(table), axis=0) - non_categorical_ids = np.flatnonzero(real_columns_selector).tolist() - categorical_ids = np.flatnonzero(~real_columns_selector).tolist() - - return categorical_ids, non_categorical_ids + categorical_ids = [] + non_categorical_ids = [] + # For every column in table make check + for column_id, column in enumerate(table.T): + # Check if column is of string objects + if pd.api.types.infer_dtype(column, skipna=True) == 'string': + categorical_ids.append(column_id) + else: + non_categorical_ids.append(column_id) def data_has_missing_values(data: InputData) -> bool: diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py index b45e2ceabc..95985539d0 100644 --- a/fedot/preprocessing/preprocessing.py +++ b/fedot/preprocessing/preprocessing.py @@ -314,9 +314,11 @@ def _clean_extra_spaces(data: InputData) -> InputData: """ def strip_all_strs(item: Union[object, str]): - if isinstance(item, str): + try: return item.strip() - return item + except AttributeError: + # not an str object + return item features_df = pd.DataFrame(data.features) mixed_or_str = features_df.select_dtypes(object)