Skip to content

Commit

Permalink
Simplify InputValidator: Allows pandas frame to directly reach the pi…
Browse files Browse the repository at this point in the history
…peline (#1135)

* [ADD] Move encoder to pipeline

* [Fix] Unit Tests

* [ADD] mypy support for preprocessing

* [FIX] unit test

* Feedback from PR

* [Fix] unit test

* [FIx] pre-commit

* Add better unit testing for anneal

* Fix unit testing

* Fix metalearning script

* Fix feat check

* Feedback from pr

* feat_type in testing

* sparse dataframe

* fix pandas landmarking meta-features

* Feedback from comments

* np.nan columns to category

* [Fix] Mypy

Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
  • Loading branch information
franchuterivera and mfeurer authored Jun 25, 2021
1 parent 01e6e60 commit 4a482de
Show file tree
Hide file tree
Showing 52 changed files with 2,352 additions and 1,499 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ repos:
args: [--show-error-codes]
name: mypy auto-sklearn-evaluation
files: autosklearn/evaluation
- id: mypy
args: [--show-error-codes]
name: mypy auto-sklearn-datapreprocessing
files: autosklearn/pipeline/components/data_preprocessing/
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
hooks:
Expand Down
6 changes: 2 additions & 4 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,10 +533,8 @@ def fit(
self._dataset_name = dataset_name
self._stopwatch.start_task(self._dataset_name)

if feat_type is None and self.InputValidator.feature_validator.feat_type:
self._feat_type = self.InputValidator.feature_validator.feat_type
elif feat_type is not None:
self._feat_type = feat_type
# Take the feature types from the validator
self._feat_type = self.InputValidator.feature_validator.feat_type

# Produce debug information to the logfile
self._logger.debug('Starting to print environment information')
Expand Down
35 changes: 3 additions & 32 deletions autosklearn/data/abstract_data_manager.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,12 @@
import abc
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, Union

import numpy as np

import scipy.sparse

from autosklearn.pipeline.components.data_preprocessing.data_preprocessing \
import DataPreprocessor
from autosklearn.util.data import predict_RAM_usage


def perform_one_hot_encoding(
sparse: bool,
categorical: List[bool],
data: List
) -> Tuple[List, bool]:
predicted_RAM_usage = float(
predict_RAM_usage(data[0], categorical)) / 1024 / 1024

if predicted_RAM_usage > 1000:
sparse = True

rvals = []
if any(categorical):
encoder = DataPreprocessor(
categorical_features=categorical, force_sparse_output=sparse)
rvals.append(encoder.fit_transform(data[0]))
for d in data[1:]:
rvals.append(encoder.transform(d))

if not sparse and scipy.sparse.issparse(rvals[0]):
for i in range(len(rvals)):
rvals[i] = rvals[i].todense()
else:
rvals = data

return rvals, sparse


class AbstractDataManager():
Expand All @@ -60,11 +31,11 @@ def info(self) -> Dict[str, Any]:
return self._info

@property
def feat_type(self) -> List[str]:
def feat_type(self) -> Dict[Union[str, int], str]:
return self._feat_type

@feat_type.setter
def feat_type(self, value: List[str]) -> None:
def feat_type(self, value: Dict[Union[str, int], str]) -> None:
self._feat_type = value

@property
Expand Down
Loading

0 comments on commit 4a482de

Please sign in to comment.