Skip to content

Commit

Permalink
Return fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
aPovidlo committed Dec 5, 2023
1 parent 8e046f5 commit cac26f6
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
14 changes: 9 additions & 5 deletions fedot/core/data/data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,15 @@ def find_categorical_columns(table: np.ndarray, column_type_ids: Optional[np.nda

def force_categorical_determination(table: np.ndarray):
""" Find string columns using 'computationally expensive' approach """
real_columns_selector = np.all(np.isreal(table), axis=0)
non_categorical_ids = np.flatnonzero(real_columns_selector).tolist()
categorical_ids = np.flatnonzero(~real_columns_selector).tolist()

return categorical_ids, non_categorical_ids
categorical_ids = []
non_categorical_ids = []
# For every column in table make check
for column_id, column in enumerate(table.T):
# Check if column is of string objects
if pd.api.types.infer_dtype(column, skipna=True) == 'string':
categorical_ids.append(column_id)
else:
non_categorical_ids.append(column_id)


def data_has_missing_values(data: InputData) -> bool:
Expand Down
6 changes: 4 additions & 2 deletions fedot/preprocessing/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,11 @@ def _clean_extra_spaces(data: InputData) -> InputData:
"""

def strip_all_strs(item: Union[object, str]):
if isinstance(item, str):
try:
return item.strip()
return item
except AttributeError:
# not an str object
return item

features_df = pd.DataFrame(data.features)
mixed_or_str = features_df.select_dtypes(object)
Expand Down

0 comments on commit cac26f6

Please sign in to comment.