Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix regression preprocessing #955

Merged
merged 3 commits into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions cases/credit_scoring/credit_scoring_problem_multiobj.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
from cases.credit_scoring.credit_scoring_problem import get_scoring_data
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.data.data import InputData
from fedot.core.optimisers.gp_comp.gp_optimizer import GeneticSchemeTypesEnum
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.operators.inheritance import GeneticSchemeTypesEnum
from fedot.core.optimisers.gp_comp.operators.selection import SelectionTypesEnum
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.node import PrimaryNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.tuning.sequential import SequentialTuner
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.operation_types_repository import get_operations_for_task
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
Expand Down Expand Up @@ -57,7 +60,8 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
composer_requirements = PipelineComposerRequirements(
primary=available_model_types,
secondary=available_model_types,
timeout=timeout
timeout=timeout,
num_of_generations=20
)
params = GPGraphOptimizerParameters(
selection_types=[SelectionTypesEnum.spea2],
Expand All @@ -80,10 +84,18 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
results_visualization(composed_pipelines=pipelines_evo_composed, history=composer.history)

pipelines_roc_auc = []

for pipeline_num, pipeline_evo_composed in enumerate(pipelines_evo_composed):

pipeline_evo_composed.fine_tune_primary_nodes(input_data=dataset_to_compose,
iterations=50)
tuner = TunerBuilder(task)\
.with_tuner(SequentialTuner)\
.with_iterations(50)\
.with_metric(metrics[0])\
.build(dataset_to_compose)
nodes = pipeline_evo_composed.nodes
for node_index, node in enumerate(nodes):
if isinstance(node, PrimaryNode):
pipeline_evo_composed = tuner.tune_node(pipeline_evo_composed, node_index)

pipeline_evo_composed.fit(input_data=dataset_to_compose)

Expand Down
5 changes: 4 additions & 1 deletion cases/river_levels_prediction/river_level_case_manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def run_river_experiment(file_path, pipeline, iterations=20, tuner=None,
current_pipeline = copy(pipeline)

# Fit it
current_pipeline.fit_from_scratch(train_input)
current_pipeline.fit(train_input)

# Predict
predicted_values = current_pipeline.predict(predict_input)
Expand All @@ -68,6 +68,9 @@ def run_river_experiment(file_path, pipeline, iterations=20, tuner=None,
.build(train_input)
tuned_pipeline = pipeline_tuner.tune(current_pipeline)

# Fit it
tuned_pipeline.fit(train_input)

# Predict
predicted_values_tuned = tuned_pipeline.predict(predict_input)
preds_tuned = predicted_values_tuned.predict
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def run_refinement_scoring_example(train_path, test_path, with_tuning=False):
no_decompose_c = tuner.tune(no_decompose_c)
decompose_c = tuner.tune(decompose_c)

no_decompose_c.fit(test_dataset)
decompose_c.fit(test_dataset)

display_roc_auc(no_decompose_c, test_dataset, 'Non decomposition pipeline after tuning')
display_roc_auc(decompose_c, test_dataset, 'With decomposition pipeline after tuning')

Expand Down
8 changes: 4 additions & 4 deletions examples/advanced/decompose/regression_refinement_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@ def run_river_experiment(file_path, with_tuning=False):
r_pipeline = get_refinement_pipeline()
non_pipeline = get_non_refinement_pipeline()

# Fit it
r_pipeline.fit(train_input)
non_pipeline.fit(train_input)

if with_tuning:
tuner = TunerBuilder(task)\
.with_tuner(PipelineTuner)\
Expand All @@ -108,6 +104,10 @@ def run_river_experiment(file_path, with_tuning=False):
r_pipeline = tuner.tune(r_pipeline)
non_pipeline = tuner.tune(non_pipeline)

# Fit it
r_pipeline.fit(train_input)
non_pipeline.fit(train_input)

# Predict
predicted_values = r_pipeline.predict(predict_input)
r_preds = predicted_values.predict
Expand Down
5 changes: 4 additions & 1 deletion examples/simple/classification/classification_with_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def run_classification_tuning_experiment(pipeline, tuner=None):
data_type=DataTypesEnum.table)

# Fit it
pipeline.fit_from_scratch(train_input)
pipeline.fit(train_input)

# Predict
predicted_labels = pipeline.predict(predict_input)
Expand All @@ -126,6 +126,9 @@ def run_classification_tuning_experiment(pipeline, tuner=None):
.build(train_input)
tuned_pipeline = pipeline_tuner.tune(pipeline)

# Fit it
pipeline.fit(train_input)

# Predict
print('predict')
predicted_values_tuned = tuned_pipeline.predict(predict_input)
Expand Down
5 changes: 3 additions & 2 deletions examples/simple/classification/resample_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def run_resample_example(path_to_data=None, tune=False):

print('Begin fit Pipeline with balancing')
# pipeline.fit(train_input)
pipeline.fit_from_scratch(train_input)
pipeline.fit(train_input)

# Predict
predict_labels = pipeline.predict(predict_input)
Expand All @@ -91,7 +91,8 @@ def run_resample_example(path_to_data=None, tune=False):
.with_timeout(timedelta(minutes=1))\
.build(train_input)
tuned_pipeline = tuner.tune(pipeline)

# Fit
pipeline.fit(train_input)
# Predict
predicted_values_tuned = tuned_pipeline.predict(predict_input)
preds_tuned = predicted_values_tuned.predict
Expand Down
7 changes: 6 additions & 1 deletion examples/simple/regression/regression_with_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def run_experiment(pipeline, tuner):
data_type=DataTypesEnum.table)

# Fit it
pipeline.fit_from_scratch(train_input)
pipeline.fit(train_input)

# Predict
predicted_values = pipeline.predict(predict_input)
Expand All @@ -108,6 +108,9 @@ def run_experiment(pipeline, tuner):
.build(train_input)
tuned_pipeline = pipeline_tuner.tune(pipeline)

# Fit it
tuned_pipeline.fit(train_input)

# Predict
predicted_values_tuned = tuned_pipeline.predict(predict_input)
preds_tuned = predicted_values_tuned.predict
Expand All @@ -117,6 +120,8 @@ def run_experiment(pipeline, tuner):
print('Obtained metrics after tuning:')
print(f'MAE - {mae_value:.4f}\n')

pipeline.unfit()


# Script for testing is pipeline can process different datasets for regression task
if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def run_experiment(dataset: str, pipeline: Pipeline, len_forecast=250, tuning=Tr
.with_iterations(100) \
.build(train_data)
pipeline = tuner.tune(pipeline)

pipeline.fit(train_data)
prediction_after = pipeline.predict(test_data)
predict_after = np.ravel(np.array(prediction_after.predict))

Expand Down
7 changes: 2 additions & 5 deletions fedot/core/optimisers/objective/data_objective_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,14 @@ def __init__(self,
validation_blocks: Optional[int] = None,
pipelines_cache: Optional[OperationsCache] = None,
preprocessing_cache: Optional[PreprocessingCache] = None,
eval_n_jobs: int = 1,
do_unfit: bool = True):
eval_n_jobs: int = 1):
super().__init__(objective, eval_n_jobs=eval_n_jobs)
self._data_producer = data_producer
self._time_constraint = time_constraint
self._validation_blocks = validation_blocks
self._pipelines_cache = pipelines_cache
self._preprocessing_cache = preprocessing_cache
self._log = default_log(self)
self._do_unfit = do_unfit

def evaluate(self, graph: Pipeline) -> Fitness:
# Seems like a workaround for situation when logger is lost
Expand Down Expand Up @@ -83,8 +81,7 @@ def evaluate(self, graph: Pipeline) -> Fitness:
raise ValueError(f'Fitness {evaluated_fitness} is not valid')
else:
continue
if self._do_unfit:
graph.unfit()
graph.unfit()
if folds_metrics:
folds_metrics = tuple(np.mean(folds_metrics, axis=0)) # averages for each metric over folds
self._log.debug(f'Pipeline {graph_id} with evaluated metrics: {folds_metrics}')
Expand Down
2 changes: 1 addition & 1 deletion fedot/core/pipelines/tuning/tuner_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def build(self, data: InputData) -> HyperoptTuner:
data_producer = DataSourceSplitter(self.cv_folds, self.validation_blocks).build(data)
objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer,
validation_blocks=self.validation_blocks,
do_unfit=False, time_constraint=self.eval_time_constraint)
time_constraint=self.eval_time_constraint)
tuner = self.tuner_class(objective_evaluate=objective_evaluate,
iterations=self.iterations,
early_stopping_rounds=self.early_stopping_rounds,
Expand Down
2 changes: 1 addition & 1 deletion fedot/core/pipelines/tuning/tuner_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,4 @@ def final_check(self, tuned_pipeline: Pipeline):
else:
self.log.info(f'{prefix_init_phrase} {abs(self.obtained_metric):.3f} '
f'worse than initial (+ 5% deviation) {abs(init_metric):.3f}')
return self.init_pipeline
return self.init_pipeline
Loading