diff --git a/app.ipynb b/app.ipynb index 757da07..a116aed 100644 --- a/app.ipynb +++ b/app.ipynb @@ -640,7 +640,7 @@ "from callbacks import sidebar_callbacks\n", "\n", "# import pages\n", - "from view import page_data_loading, page_overview, page_categorical_feature, page_na_value, page_outlier_detection, page_transformation_table_data, page_transformation_time_series, page_supervised_classification, page_supervised_regression\n", + "from view import page_data_loading, page_overview, page_categorical_feature, page_na_value, page_outlier_detection, page_transformation_time_series, page_supervised_classification, page_supervised_regression\n", "\n", "# the styles for the main content position it to the right of the sidebar and\n", "# add some padding.\n", @@ -669,7 +669,7 @@ " id=\"page_content\", style=CONTENT_STYLE\n", ")\n", "\n", - "app.layout = html.Div([dcc.Location(id=\"url\"), sidebar_, content])\n", + "app.layout = html.Div([dcc.Location(id=\"url\", refresh=False), sidebar_, content])\n", "\n", "\n", "\n", @@ -703,7 +703,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.8.15" } }, "nbformat": 4, diff --git a/app.py b/app.py index b02dce7..6709c43 100644 --- a/app.py +++ b/app.py @@ -12,7 +12,7 @@ from callbacks import sidebar_callbacks # import pages -from view import page_data_loading, page_overview, page_categorical_feature, page_na_value, page_outlier_detection, page_transformation_table_data, page_transformation_time_series, page_supervised_classification, page_supervised_regression +from view import page_data_loading, page_overview, page_categorical_feature, page_na_value, page_outlier_detection, page_transformation_time_series, page_supervised_classification, page_supervised_regression # the styles for the main content position it to the right of the sidebar and # add some padding. @@ -41,7 +41,7 @@ id="page_content", style=CONTENT_STYLE ) -app.layout = html.Div([dcc.Location(id="url"), sidebar_, content]) +app.layout = html.Div([dcc.Location(id="url", refresh=False), sidebar_, content]) diff --git a/assets/img/link.png b/assets/img/link.png new file mode 100644 index 0000000..e5ff6f6 Binary files /dev/null and b/assets/img/link.png differ diff --git a/assets/style.css b/assets/style.css index b1af71f..95a1841 100644 --- a/assets/style.css +++ b/assets/style.css @@ -1,3 +1,39 @@ +.sidebar_link { + background-color: blue; + color: white; + text-transform: none; + margin-top: 0.5rem; + font-size: 16pt; + text-decoration: none; + padding: 1rem; +} + +.sidebar_logo { + width: 30rem; +} + +.sidebar_hr { + border-width: 0.3vh; + width: 100%; + border-color: black; + border-style: solid; +} + +.sidebar_nav { + padding: 50px 50px 50px 50px +} + +.sidebar_kit { + width: 10rem; + display: flex; + justify-content: center +} + +.sidebar_report_bugs { + display: flex; + justify-content: center +} + .no-gutters .col { padding-right: 0; padding-left: 0; @@ -224,13 +260,13 @@ margin-left: 1rem; margin-top: 3rem; margin-bottom: 2rem; - background-color: seagreen; + background-color: royalblue; color: white; } .btn_apply, .btn_apply:hover, .btn_apply:active, .btn_apply:focus { width: 10rem; - background-color: seagreen; + background-color: royalblue; color: white; margin-right: 1rem; } @@ -241,7 +277,7 @@ margin-left: 1rem; margin-top: 1rem; margin-bottom: 2rem; - background-color: seagreen; + background-color: royalblue; color: white; } @@ -289,10 +325,41 @@ } .dropdown_overview_single_feature { - width: 20rem; + width: 26.5rem; margin-right: 1rem; } +.dropdown_overview_target { + width: 21rem; + margin-left 1rem; + margin-right: 1rem; + display: inline-block; + margin-bottom: 1rem; + align-items: center; +} + +.dropdown_overview_target_large { + width: 50rem; + margin-left 1rem; + margin-right: 1rem; + display: inline-block; + margin-bottom: 1rem; + align-items: center; +} + +.dropdown_overview_index { + display: inline-block; + width: 20rem; + margin-bottom: 1rem; + align-items: center; +} + +.text_overview_index { + display: inline-block; + margin-right: 0.5rem; + align-items: center; +} + .dropdown_overview_single_feature_full_width { margin-right: 1rem; diff --git a/callbacks/helper_callbacks/__pycache__/disable_component.cpython-36.pyc b/callbacks/helper_callbacks/__pycache__/disable_component.cpython-36.pyc new file mode 100644 index 0000000..15ac30b Binary files /dev/null and b/callbacks/helper_callbacks/__pycache__/disable_component.cpython-36.pyc differ diff --git a/callbacks/helper_callbacks/disable_component.py b/callbacks/helper_callbacks/disable_component.py new file mode 100644 index 0000000..9f19b0d --- /dev/null +++ b/callbacks/helper_callbacks/disable_component.py @@ -0,0 +1,17 @@ +def disable_link(style): + if style is None: + style = {} + + style['pointer-events'] = 'none' + style['color'] = 'gray' + + return style + +def enable_link(style): + if style is None: + style = {} + + style['pointer-events'] = 'auto' + style['color'] = 'royalblue' + + return style \ No newline at end of file diff --git a/callbacks/page_categorical_feature_callbacks.py b/callbacks/page_categorical_feature_callbacks.py index f856b34..e475798 100644 --- a/callbacks/page_categorical_feature_callbacks.py +++ b/callbacks/page_categorical_feature_callbacks.py @@ -18,18 +18,17 @@ from methods.cleaning import delete_columns # import figures -from view.page_helper_components.plots import get_numeric_categorical_ratio_plot, get_categorical_feature_pie_plot +from view.page_helper_components.plots import get_numeric_categorical_ratio_plot, get_categorical_feature_pie_plot, get_overview_histogram_plot # import utility from methods.util import count_unique_values @app.callback( - Output("figure_categorical_feature_pie", "figure"), - Output("container_feature_encoding", "style"), + Output("figure_categorical_feature_pie", "figure", allow_duplicate=True), Input("dropdown_categorical_feature", "value"), - State("container_feature_encoding", "style"), + prevent_initial_call=True ) -def update_categorical_plot(col, style): +def update_categorical_plot(col): if table_data.DF_RAW is None: return dash.no_update @@ -39,39 +38,51 @@ def update_categorical_plot(col, style): if col is None or col == "" or triggered_id is None: return dash.no_update - if col not in list(df_cat.columns): - if style is None: - style = {'display': 'none'} - else: - style['display'] = 'none' - return dash.no_update, style + df = compute_plot(table_data.DF_RAW, None, col, reset_index=True) + # draw Figure + figure = get_overview_histogram_plot(df, col) - counts = count_unique_values(df_cat, col) + return figure + +@app.callback( + Output("container_feature_encoding", "style", allow_duplicate=True), + Input("dropdown_categorical_feature", "options"), + State("container_feature_encoding", "style"), + prevent_initial_call=True +) +def update_categorical_plot(options, style): + if table_data.DF_RAW is None: + return dash.no_update - figure = get_categorical_feature_pie_plot(counts) + if style is None: + style = {} + + if options == []: + style['display'] = 'none' - return figure, style + return style @app.callback( # update categorical page - Output("alert_categorical_unconvertable_string", "is_open"), - Output("figure_categorical_feature_pie", "figure"), - Output("dropdown_categorical_feature", "options"), - Output("dropdown_categorical_feature", "value"), - Output("dropdown_replace_value1", "options"), - Output("dropdown_replace_value1", "value"), - Output("dropdown_replace_value2", "options"), - Output("dropdown_replace_value2", "value"), + Output("alert_categorical_unconvertable_string", "is_open", allow_duplicate=True), + Output("figure_categorical_feature_pie", "figure", allow_duplicate=True), + Output("dropdown_categorical_feature", "options", allow_duplicate=True), + Output("dropdown_categorical_feature", "value", allow_duplicate=True), + Output("dropdown_replace_value1", "options", allow_duplicate=True), + Output("dropdown_replace_value1", "value", allow_duplicate=True), + Output("dropdown_replace_value2", "options", allow_duplicate=True), + Output("dropdown_replace_value2", "value", allow_duplicate=True), # update overview page - Output("datatable_overview", "data"), - Output("datatable_overview", "columns"), + Output("datatable_overview", "data", allow_duplicate=True), + Output("datatable_overview", "columns", allow_duplicate=True), # inputs Input("button_categorical_apply", "n_clicks"), State("dropdown_categorical_feature", "value"), State("dropdown_categorical_strategy", "value"), State("dropdown_replace_value1", "value"), State("dropdown_replace_value2", "value"), + prevent_initial_call=True ) def update_df_after_encoding(n_clicks, col, strategy, in_str, out_str): if table_data.DF_RAW is None: @@ -103,7 +114,7 @@ def update_df_after_encoding(n_clicks, col, strategy, in_str, out_str): # hide feature encoding when no more categorical features otherwise update parameter if len(options_cat) > 0: # if replacement selected update dropdowns - if strategy == ENCODING_STRATEGIES[3]: + if strategy == ENCODING_STRATEGIES[4]: unique_values = table_data.DF_RAW[col].unique().tolist() options_replacement = unique_values @@ -147,8 +158,9 @@ def update_df_after_encoding(n_clicks, col, strategy, in_str, out_str): return False, figure, options_cat, value, options_replacement, in_str, options_replacement, out_str, data_datatable_overview, columns_datatable_overview @app.callback( - Output("exploration_categorical_feature_ratio_bar_plot", "figure"), + Output("exploration_categorical_feature_ratio_bar_plot", "figure", allow_duplicate=True), Input("button_categorical_apply", "n_clicks"), + prevent_initial_call=True ) def update_ratio(n_clicks): if table_data.DF_RAW is None: @@ -161,11 +173,12 @@ def update_ratio(n_clicks): return figure @app.callback( - Output("dropdown_replace_value1", "options"), - Output("dropdown_replace_value1", "value"), - Output("dropdown_replace_value2", "options"), - Output("dropdown_replace_value2", "value"), + Output("dropdown_replace_value1", "options", allow_duplicate=True), + Output("dropdown_replace_value1", "value", allow_duplicate=True), + Output("dropdown_replace_value2", "options", allow_duplicate=True), + Output("dropdown_replace_value2", "value", allow_duplicate=True), Input("dropdown_categorical_feature", "value"), + prevent_initial_call=True ) def update_replacement(col): triggered_id = ctx.triggered_id @@ -184,17 +197,50 @@ def update_replacement(col): return options, value1, options, value2 @app.callback( - Output("card_categorical_replacement", "style"), + Output("card_categorical_replacement", "style", allow_duplicate=True), Input("dropdown_categorical_strategy", "value"), State("card_categorical_replacement", "style"), + prevent_initial_call=True ) def update_parameter(strategy, style): - if strategy != ENCODING_STRATEGIES[3]: + if strategy != ENCODING_STRATEGIES[4]: style['display'] = 'none' else: - style['display'] = 'inline-block' + style['display'] = 'block' return style +@app.callback( + Output("img_categorical_strategy", "src", allow_duplicate=True), + Output("link_categorical_strategy", "href", allow_duplicate=True), + Output("tooltip_categorical_strategy", "children", allow_duplicate=True), + Input("dropdown_categorical_strategy", "value"), + prevent_initial_call=True +) +def update_info(strategy): + if strategy == ENCODING_STRATEGIES[0]: + src = '/assets/img/link.png' + href = ENCODING_LINKS[0] + children = ENCODING_DESCRIPTIONS[0] + elif strategy == ENCODING_STRATEGIES[1]: + src = '/assets/img/link.png' + href = ENCODING_LINKS[1] + children = ENCODING_DESCRIPTIONS[1] + elif strategy == ENCODING_STRATEGIES[2]: + src = '/assets/img/link.png' + href = ENCODING_LINKS[2] + children = ENCODING_DESCRIPTIONS[2] + elif strategy == ENCODING_STRATEGIES[3]: + src = '/assets/img/tooltip.png' + href = ENCODING_LINKS[3] + children = ENCODING_DESCRIPTIONS[3] + elif strategy == ENCODING_STRATEGIES[4]: + src = '/assets/img/tooltip.png' + href = ENCODING_LINKS[4] + children = ENCODING_DESCRIPTIONS[4] + else: + return dash.no_update + + return src, href, children diff --git a/callbacks/page_data_loading_callbacks.py b/callbacks/page_data_loading_callbacks.py index 569f4e6..a588924 100644 --- a/callbacks/page_data_loading_callbacks.py +++ b/callbacks/page_data_loading_callbacks.py @@ -25,57 +25,68 @@ # import data from data import table_data -@app.callback(Output('text_loading_selected_data', 'children'), - Output('button_load', 'disabled'), - Output('container_loading_table_data_parameter', 'style'), +@app.callback(Output('text_loading_selected_data', 'children', allow_duplicate=True), + Output('button_load', 'disabled', allow_duplicate=True), + Output('container_loading_table_data_parameter', 'style', allow_duplicate=True), + Output('container_parameter_loading', 'style', allow_duplicate=True), Input('upload_data', 'contents'), State('upload_data', 'filename'), Input('dropdown_loading_datatype', 'value'), State('container_loading_table_data_parameter', 'style'), + State('container_parameter_loading', 'style'), + prevent_initial_call=True ) -def update_parameter(contents, filename, datatype, style_table_data): +def update_parameter(contents, filename, datatype, style_table_data, style_loading_parameter): if style_table_data is None: style_table_data = {} + + if style_table_data is None: + style_loading_parameter = {} if contents is not None: if datatype == DATA_TYPES[0] or datatype == DATA_TYPES[1]: style_table_data['display'] = 'block' + else: style_table_data['display'] = 'none' disabled = False + style_loading_parameter['display'] = 'block' else: filename = "" style_table_data['display'] = 'none' disabled = True - return filename, disabled, style_table_data + style_loading_parameter['display'] = 'none' + return filename, disabled, style_table_data, style_loading_parameter -@app.callback(Output('button_load', 'disabled'), - Output('alert_loading_files', 'is_open'), - Output("datatable_overview", "data"), - Output("button_overview", 'disabled'), - Output("button_categorical", 'disabled'), - Output("button_na_values", 'disabled'), - Output("button_outlier", 'disabled'), - Output("button_ts", 'disabled'), - Output("button_sc", 'disabled'), - Output("button_sr", 'disabled'), - Output("button_usl", 'disabled'), +@app.callback(Output('button_load', 'disabled', allow_duplicate=True), + Output('alert_loading_files', 'is_open', allow_duplicate=True), + Output("datatable_overview", "data", allow_duplicate=True), + Output("button_overview", 'disabled', allow_duplicate=True), + Output("button_categorical", 'disabled', allow_duplicate=True), + Output("button_na_values", 'disabled', allow_duplicate=True), + Output("button_outlier", 'disabled', allow_duplicate=True), + Output("button_ts", 'disabled', allow_duplicate=True), + Output("button_sc", 'disabled', allow_duplicate=True), + Output("button_sr", 'disabled', allow_duplicate=True), + Output("url", "pathname", allow_duplicate=True), Input('button_load', 'n_clicks'), State('upload_data', 'contents'), State('upload_data', 'filename'), State('dropdown_loading_datatype', 'value'), State('dropdown_loading_table_data_seperator', 'value'), State('dropdown_loading_table_data_index', 'value'), + prevent_initial_call=True ) def load_data(n_clicks, contents, filename, datatype, sep, index): is_open = False data_datatable_overview = [] columns_datatable_overview = [] params = {} + path_name = dash.no_update if n_clicks is None or n_clicks == 0: if table_data.DF_RAW is None: - return 3 * [dash.no_update] + 8 * [True] + return 3 * [dash.no_update] + 7 * [True] + [dash.no_update] else: cat_cols = table_data.DF_RAW.select_dtypes(include='object').columns nan_cols = table_data.DF_RAW.columns[table_data.DF_RAW.isna().any()].tolist() @@ -84,7 +95,7 @@ def load_data(n_clicks, contents, filename, datatype, sep, index): categorical_disabled = len(cat_cols) == 0 na_disabled = len(nan_cols) == 0 or len(cat_cols) > 0 rest_disabled = len(nan_cols) > 0 != [] or len(cat_cols) > 0 - return 3 * [dash.no_update] + [overview_disabled, categorical_disabled, na_disabled, rest_disabled, rest_disabled, rest_disabled, rest_disabled, rest_disabled] + return 3 * [dash.no_update] + [overview_disabled, categorical_disabled, na_disabled, rest_disabled, rest_disabled, rest_disabled, rest_disabled, path_name] is_open = False if contents is not None: @@ -103,6 +114,7 @@ def load_data(n_clicks, contents, filename, datatype, sep, index): table_data.ALL_DATASETS = {} delete_dataset(IN_PROCESSING_DATASETNAME) delete_dataset_states() + path_name = "/page-1/1" else: return [False] + [True] + 9 * [dash.no_update] # TODO add more datatypes @@ -115,7 +127,7 @@ def load_data(n_clicks, contents, filename, datatype, sep, index): na_disabled = len(nan_cols) == 0 or len(cat_cols) > 0 rest_disabled = len(nan_cols) > 0 != [] or len(cat_cols) > 0 - training_disabled = True + training_disabled = True - return False, False, data_datatable_overview, overview_disabled, categorical_disabled, na_disabled, rest_disabled, rest_disabled, training_disabled, training_disabled, training_disabled + return False, False, data_datatable_overview, overview_disabled, categorical_disabled, na_disabled, rest_disabled, rest_disabled, training_disabled, training_disabled, path_name diff --git a/callbacks/page_na_value_callbacks.py b/callbacks/page_na_value_callbacks.py index b2f0fc6..0cae8da 100644 --- a/callbacks/page_na_value_callbacks.py +++ b/callbacks/page_na_value_callbacks.py @@ -15,6 +15,9 @@ # import plots from view.page_helper_components.plots import * +# import sliders +from view.page_helper_components.sliders import * + # import app from view.app import app @@ -27,9 +30,10 @@ # update simple style @app.callback( - Output("container_na_simple", "style"), + Output("container_na_simple", "style", allow_duplicate=True), Input("dropdown_na_method", "value"), - State("container_na_simple", "style") + State("container_na_simple", "style"), + prevent_initial_call=True ) def update_style_simple(method, style): if style is None: @@ -41,10 +45,11 @@ def update_style_simple(method, style): return style @app.callback( - Output("container_na_simple_fill_value", "style"), + Output("container_na_simple_fill_value", "style", allow_duplicate=True), Input("dropdown_na_simple_strategy", "value"), Input("dropdown_na_method", "value"), - State("container_na_simple_fill_value", "style") + State("container_na_simple_fill_value", "style"), + prevent_initial_call=True ) def update_style_simple_fill_value(strategy, method, style): if style is None: @@ -55,12 +60,31 @@ def update_style_simple_fill_value(strategy, method, style): style['display'] = 'none' return style +@app.callback( + Output("container_na_imputer", "style", allow_duplicate=True), + Input("dropdown_na_feature", "options"), + State("container_na_imputer", "style"), + prevent_initial_call=True +) +def update_categorical_plot(options, style): + if table_data.DF_RAW is None: + return dash.no_update + + if style is None: + style = {} + + if options == []: + style['display'] = 'none' + + return style + # update iterative style @app.callback( - Output("container_na_iterative", "style"), + Output("container_na_iterative", "style", allow_duplicate=True), Input("dropdown_na_method", "value"), - State("container_na_iterative", "style") + State("container_na_iterative", "style"), + prevent_initial_call=True ) def update_style_iterative(method, style): if style is None: @@ -72,10 +96,11 @@ def update_style_iterative(method, style): return style @app.callback( - Output("container_na_iterative_fill_value", "style"), + Output("container_na_iterative_fill_value", "style", allow_duplicate=True), Input("dropdown_na_iterative_initial_strategy", "value"), Input("dropdown_na_method", "value"), - State("container_na_iterative_fill_value", "style") + State("container_na_iterative_fill_value", "style"), + prevent_initial_call=True ) def update_style_iterative_fill_value(strategy, method, style): if style is None: @@ -88,9 +113,10 @@ def update_style_iterative_fill_value(strategy, method, style): # update knn style @app.callback( - Output("container_na_knn", "style"), + Output("container_na_knn", "style", allow_duplicate=True), Input("dropdown_na_method", "value"), - State("container_na_knn", "style") + State("container_na_knn", "style"), + prevent_initial_call=True ) def update_style_knn(method, style): if style is None: @@ -103,9 +129,10 @@ def update_style_knn(method, style): # update manual style @app.callback( - Output("container_na_manual", "style"), + Output("container_na_manual", "style", allow_duplicate=True), Input("dropdown_na_method", "value"), - State("container_na_manual", "style") + State("container_na_manual", "style"), + prevent_initial_call=True ) def update_style_manual(method, style): if style is None: @@ -116,27 +143,48 @@ def update_style_manual(method, style): style['display'] = 'none' return style +# update fill value simple imputer +@app.callback( + Output("input_na_manual_fill_value", "style", allow_duplicate=True), + Input("dropdown_na_method", "value"), + State("input_na_manual_fill_value", "style"), + prevent_initial_call=True +) +def update_style_fill_value(method, style): + if style is None: + style = {} + if method == IMPUTER_METHODS[3]: + style['display'] = 'block' + else: + style['display'] = 'none' + return style + # update button styles @app.callback( - Output("button_na_apply", "style"), - Output("button_na_show", "style"), + Output("button_na_apply", "style", allow_duplicate=True), + Output("button_na_show", "style", allow_duplicate=True), Input("button_na_show", "n_clicks"), + Input("button_na_apply", "n_clicks"), Input("dropdown_na_feature", "value"), Input("dropdown_na_method", "value"), Input("dropdown_na_simple_strategy", "value"), Input("input_na_simple_fill_value", "value"), + Input("dropdown_na_iterative_filling_feature", "value"), Input("slider_na_iterative_max_iter", "value"), + Input("dropdown_na_iterative_tol", "value"), Input("slider_na_iterative_n_nearest_features", "value"), Input("dropdown_na_iterative_initial_strategy", "value"), Input("dropdown_na_iterative_imputation_order", "value"), + Input("dropdown_na_knn_filling_feature", "value"), Input("slider_na_knn_n_neighbors", "value"), - Input("dropdown_na_iterative_weights", "value"), + Input("dropdown_na_knn_weights", "value"), Input("input_na_manual_index", "value"), Input("input_na_manual_fill_value", "value"), State("button_na_apply", "style"), - State("button_na_show", "style") + State("button_na_show", "style"), + prevent_initial_call=True ) -def update_style_buttons(n_clicks, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, style_apply, style_show): +def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, style_apply, style_show): triggered_id = ctx.triggered_id if style_apply is None: style_apply = {} @@ -153,23 +201,27 @@ def update_style_buttons(n_clicks, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, # update line plot @app.callback( - Output("loading_na_imputer_preview", "children"), + Output("loading_na_imputer_preview", "children", allow_duplicate=True), Input("button_na_show", "n_clicks"), State("dropdown_na_feature", "value"), State("dropdown_na_method", "value"), State("dropdown_na_simple_strategy", "value"), State("input_na_simple_fill_value", "value"), + State("dropdown_na_iterative_filling_feature", "value"), State("slider_na_iterative_max_iter", "value"), + State("dropdown_na_iterative_tol", "value"), State("slider_na_iterative_n_nearest_features", "value"), State("dropdown_na_iterative_initial_strategy", "value"), State("dropdown_na_iterative_fill_value", "value"), State("dropdown_na_iterative_imputation_order", "value"), + State("dropdown_na_knn_filling_feature", "value"), State("slider_na_knn_n_neighbors", "value"), - State("dropdown_na_iterative_weights", "value"), + State("dropdown_na_knn_weights", "value"), State("input_na_manual_index", "value"), State("input_na_manual_fill_value", "value"), + prevent_initial_call=True ) -def update_line_plot_after_imputing(n_clicks, col, method, simple_strategy, simple_fill_value, iterative_max_iter, iterative_n_nearest_features, iterative_initial_strategy, iterative_fill_value, iterative_imputation_order, knn_n_neighbors, iterative_weights, manual_index, manual_fill_value): +def update_line_plot_after_imputing(n_clicks, col, method, simple_strategy, simple_fill_value, iterative_filling_feature, iterative_max_iter, iterative_tol, iterative_n_nearest_features, iterative_initial_strategy, iterative_fill_value, iterative_imputation_order, knn_filling_feature, knn_n_neighbors, knn_weights, manual_index, manual_fill_value): if col is None: return dash.no_update @@ -181,17 +233,23 @@ def update_line_plot_after_imputing(n_clicks, col, method, simple_strategy, simp if simple_strategy == list(IMPUTER_STRATEGIES.keys())[3]: params['fill_value'] = simple_fill_value elif method == IMPUTER_METHODS[1]: # iterativ imputer + col = [col] + iterative_filling_feature params['missing_values'] = np.nan params['max_iter'] = iterative_max_iter - params['n_nearest_features'] = iterative_n_nearest_features - #params['initial_strategy'] = IMPUTER_STRATEGIES[iterative_initial_strategy] - #if iterative_initial_strategy == list(IMPUTER_STRATEGIES.keys())[3]: - # params['fill_value'] = iterative_fill_value + params['tol'] = iterative_tol + if iterative_n_nearest_features < len(iterative_filling_feature): + params['n_nearest_features'] = iterative_n_nearest_features + else: + params['n_nearest_features'] = None + params['initial_strategy'] = IMPUTER_STRATEGIES[iterative_initial_strategy] + if iterative_initial_strategy == list(IMPUTER_STRATEGIES.keys())[3]: + params['fill_value'] = iterative_fill_value params['imputation_order'] = IMPUTER_ORDER[iterative_imputation_order] elif method == IMPUTER_METHODS[2]: # knn imputer + col = [col] + knn_filling_feature params['missing_values'] = np.nan params['n_neighbors'] = knn_n_neighbors - params['weights'] = IMPUTER_WEIGHTS[iterative_weights] + params['weights'] = IMPUTER_WEIGHTS[knn_weights] elif method == IMPUTER_METHODS[3]: # manual params['index'] = manual_index params['fill_value'] = manual_fill_value @@ -200,6 +258,10 @@ def update_line_plot_after_imputing(n_clicks, col, method, simple_strategy, simp df_num = table_data.DF_RAW.select_dtypes(include=NUMERICS) df_num = apply_imputing(df_num, col, method, params) + # remove helping columns + if method == IMPUTER_METHODS[1] or method == IMPUTER_METHODS[2]: + col = col[0] + # get nan positions in the dataframe if method != IMPUTER_METHODS[3]: nan_positions = get_nan_positions(table_data.DF_RAW, col)[0] @@ -211,42 +273,125 @@ def update_line_plot_after_imputing(n_clicks, col, method, simple_strategy, simp return graph +# update selected features +@app.callback( + Output('dropdown_na_knn_filling_feature', 'value', allow_duplicate=True), + Input("checkbox_na_knn_filling_feature", "value"), + State('dropdown_na_knn_filling_feature', 'options'), + prevent_initial_call=True +) +def update_selected_features(all_features, options): + if all_features is None or all_features == []: + return dash.no_update + + return options + +# update selected features +@app.callback( + Output('dropdown_na_iterative_filling_feature', 'value', allow_duplicate=True), + Input("checkbox_na_iterative_filling_feature", "value"), + State('dropdown_na_iterative_filling_feature', 'options'), + prevent_initial_call=True +) +def update_selected_features(all_features, options): + if all_features is None or all_features == []: + return dash.no_update + + return options + +# update features +@app.callback( + Output('dropdown_na_iterative_filling_feature', 'value', allow_duplicate=True), + Output('dropdown_na_iterative_filling_feature', 'options', allow_duplicate=True), + Input("dropdown_na_feature", "value"), + Input("dropdown_na_feature", "options"), + prevent_initial_call=True +) +def update_selected_features(na_value, na_options): + if na_options is None or na_options == []: + return dash.no_update + + options = list(table_data.DF_RAW.columns) + + options.remove(na_value) + + return options, options + +# update n nearest features +@app.callback( + Output('slider_na_iterative_n_nearest_features', 'value', allow_duplicate=True), + Output('slider_na_iterative_n_nearest_features', 'marks', allow_duplicate=True), + Input("dropdown_na_iterative_filling_feature", "value"), + prevent_initial_call=True +) +def update_selected_features(filling_features): + if filling_features is None or filling_features == []: + return dash.no_update + + value = len(filling_features) + max_features = len(filling_features) - 1 + marks = get_slider_marks_nearest_feature(max_features) + + return value, marks + +# update features +@app.callback( + Output('dropdown_na_knn_filling_feature', 'value', allow_duplicate=True), + Output('dropdown_na_knn_filling_feature', 'options', allow_duplicate=True), + Input("dropdown_na_feature", "value"), + Input("dropdown_na_feature", "options"), + prevent_initial_call=True +) +def update_selected_features(na_value, na_options): + if na_options is None or na_options == []: + return dash.no_update + + options = list(table_data.DF_RAW.columns) + + options.remove(na_value) + + return options, options + # update line plot @app.callback( # update parameter - Output("dropdown_na_feature", "options"), - Output("dropdown_na_feature", "value"), - Output("slider_na_iterative_n_nearest_features", "max"), - Output("slider_na_iterative_n_nearest_features", "value"), - Output("slider_na_iterative_n_nearest_features", "marks"), + Output("dropdown_na_feature", "options", allow_duplicate=True), + Output("dropdown_na_feature", "value", allow_duplicate=True), + Output("slider_na_iterative_n_nearest_features", "max", allow_duplicate=True), + Output("slider_na_iterative_n_nearest_features", "value", allow_duplicate=True), + Output("slider_na_iterative_n_nearest_features", "marks", allow_duplicate=True), # update na plots - Output("figure_na_bar_plot", "figure"), - Output("figure_na_heatmap", "figure"), + Output("figure_na_bar_plot", "figure", allow_duplicate=True), + Output("figure_na_heatmap", "figure", allow_duplicate=True), # update overview page - Output("datatable_overview", "data"), - Output("datatable_overview", "columns"), + Output("datatable_overview", "data", allow_duplicate=True), + Output("datatable_overview", "columns", allow_duplicate=True), # update sidebar - Output("button_outlier", "disabled"), - Output("button_ts", "disabled"), - Output("button_sc", "disabled"), - Output("button_sr", "disabled"), + Output("button_outlier", "disabled", allow_duplicate=True), + Output("button_ts", "disabled", allow_duplicate=True), + Output("button_sc", "disabled", allow_duplicate=True), + Output("button_sr", "disabled", allow_duplicate=True), # inputs Input("button_na_apply", "n_clicks"), State("dropdown_na_feature", "value"), State("dropdown_na_method", "value"), State("dropdown_na_simple_strategy", "value"), - State("input_na_simple_fill_value", "value"), + State("input_na_simple_fill_value", "value"), + State("dropdown_na_iterative_filling_feature", "value"), State("slider_na_iterative_max_iter", "value"), + State("dropdown_na_iterative_tol", "value"), State("slider_na_iterative_n_nearest_features", "value"), State("dropdown_na_iterative_initial_strategy", "value"), State("dropdown_na_iterative_fill_value", "value"), State("dropdown_na_iterative_imputation_order", "value"), + State("dropdown_na_knn_filling_feature", "value"), State("slider_na_knn_n_neighbors", "value"), - State("dropdown_na_iterative_weights", "value"), + State("dropdown_na_knn_weights", "value"), State("input_na_manual_index", "value"), State("input_na_manual_fill_value", "value"), + prevent_initial_call=True ) -def update_df_after_imputing(n_clicks, col, method, simple_strategy, simple_fill_value, iterative_max_iter, iterative_n_nearest_features, iterative_initial_strategy, iterative_fill_value, iterative_imputation_order, knn_n_neighbors, iterative_weights, manual_index, manual_fill_value): +def update_df_after_imputing(n_clicks, col, method, simple_strategy, simple_fill_value, iterative_filling_feature, iterative_max_iter, iterative_tol, iterative_n_nearest_features, iterative_initial_strategy, iterative_fill_value, iterative_imputation_order, knn_filling_feature, knn_n_neighbors, knn_weights, manual_index, manual_fill_value): if n_clicks is None or n_clicks == 0: return dash.no_update # read out parameter @@ -257,17 +402,23 @@ def update_df_after_imputing(n_clicks, col, method, simple_strategy, simple_fill if simple_strategy == list(IMPUTER_STRATEGIES.keys())[3]: params['fill_value'] = simple_fill_value elif method == IMPUTER_METHODS[1]: # iterativ imputer + col = [col] + iterative_filling_feature params['missing_values'] = np.nan params['max_iter'] = iterative_max_iter - params['n_nearest_features'] = iterative_n_nearest_features + params['tol'] = iterative_tol + if iterative_n_nearest_features < len(iterative_filling_feature): + params['n_nearest_features'] = iterative_n_nearest_features + else: + params['n_nearest_features'] = None params['initial_strategy'] = IMPUTER_STRATEGIES[iterative_initial_strategy] if iterative_initial_strategy == list(IMPUTER_STRATEGIES.keys())[3]: params['fill_value'] = iterative_fill_value params['imputation_order'] = IMPUTER_ORDER[iterative_imputation_order] elif method == IMPUTER_METHODS[2]: # knn imputer + col = [col] + knn_filling_feature params['missing_values'] = np.nan params['n_neighbors'] = knn_n_neighbors - params['weights'] = IMPUTER_WEIGHTS[iterative_weights] + params['weights'] = IMPUTER_WEIGHTS[knn_weights] elif method == IMPUTER_METHODS[3]: # manual params['index'] = manual_index params['fill_value'] = manual_fill_value @@ -278,6 +429,9 @@ def update_df_after_imputing(n_clicks, col, method, simple_strategy, simple_fill for i in tmp.columns: table_data.DF_RAW[i] = tmp[i] + # remove helping columns + if method == IMPUTER_METHODS[1] or method == IMPUTER_METHODS[2]: + col = col[0] # update parameter df_num = table_data.DF_RAW.select_dtypes(include=NUMERICS) @@ -313,4 +467,33 @@ def update_df_after_imputing(n_clicks, col, method, simple_strategy, simple_fill # update sidebar sidebar_disabled = nan_cols != [] - return options, value, max_nearest_features, iterative_n_nearest_features, marks, figure_na, figure_na_heatmap, data_datatable_overview, columns_datatable_overview, sidebar_disabled, sidebar_disabled, sidebar_disabled, sidebar_disabled \ No newline at end of file + return options, value, max_nearest_features, iterative_n_nearest_features, marks, figure_na, figure_na_heatmap, data_datatable_overview, columns_datatable_overview, sidebar_disabled, sidebar_disabled, sidebar_disabled, sidebar_disabled + +@app.callback( + Output("img_na_strategy", "src", allow_duplicate=True), + Output("link_na_strategy", "href", allow_duplicate=True), + Output("tooltip_na_strategy", "children", allow_duplicate=True), + Input("dropdown_na_method", "value"), + prevent_initial_call=True +) +def update_info(strategy): + if strategy == IMPUTER_METHODS[0]: + src = '/assets/img/link.png' + href = IMPUTER_LINKS[0] + children = IMPUTER_DESCRIPTIONS[0] + elif strategy == IMPUTER_METHODS[1]: + src = '/assets/img/link.png' + href = IMPUTER_LINKS[1] + children = IMPUTER_DESCRIPTIONS[1] + elif strategy == IMPUTER_METHODS[2]: + src = '/assets/img/link.png' + href = IMPUTER_LINKS[2] + children = IMPUTER_DESCRIPTIONS[2] + elif strategy == IMPUTER_METHODS[3]: + src = '/assets/img/tooltip.png' + href = IMPUTER_LINKS[3] + children = IMPUTER_DESCRIPTIONS[3] + else: + return dash.no_update + + return src, href, children \ No newline at end of file diff --git a/callbacks/page_outlier_detection_callbacks.py b/callbacks/page_outlier_detection_callbacks.py index 38b026e..a4658e4 100644 --- a/callbacks/page_outlier_detection_callbacks.py +++ b/callbacks/page_outlier_detection_callbacks.py @@ -27,9 +27,10 @@ # update random forest detector style @app.callback( - Output("container_outlier_random_forest", "style"), + Output("container_outlier_random_forest", "style", allow_duplicate=True), Input("dropdown_outlier_method", "value"), - State("container_outlier_random_forest", "style") + State("container_outlier_random_forest", "style"), + prevent_initial_call=True ) def update_style_rf(method, style): if style is None: @@ -42,9 +43,10 @@ def update_style_rf(method, style): # update density detector style @app.callback( - Output("container_outlier_densitiy", "style"), + Output("container_outlier_densitiy", "style", allow_duplicate=True), Input("dropdown_outlier_method", "value"), - State("container_outlier_densitiy", "style") + State("container_outlier_densitiy", "style"), + prevent_initial_call=True ) def update_style_density(method, style): if style is None: @@ -57,9 +59,10 @@ def update_style_density(method, style): # update kv detector style @app.callback( - Output("container_outlier_kv", "style"), + Output("container_outlier_kv", "style", allow_duplicate=True), Input("dropdown_outlier_method", "value"), - State("container_outlier_kv", "style") + State("container_outlier_kv", "style"), + prevent_initial_call=True ) def update_style_kv(method, style): if style is None: @@ -70,21 +73,39 @@ def update_style_kv(method, style): style['display'] = 'none' return style +# update selected features +@app.callback( + Output('dropdown_outlier_feature', 'value', allow_duplicate=True), + Input("checklist_outlier_all_features", "value"), + State('dropdown_outlier_feature', 'options'), + prevent_initial_call=True +) +def update_selected_features(all_features, options): + if all_features is None or all_features == []: + return dash.no_update + + return options + # update button styles @app.callback( - Output("button_outlier_apply", "style"), - Output("button_outlier_show", "style"), + Output("button_outlier_apply", "style", allow_duplicate=True), + Output("button_outlier_show", "style", allow_duplicate=True), Input("button_outlier_show", "n_clicks"), Input("button_outlier_apply", "n_clicks"), + Input("dropdown_outlier_feature", "value"), Input("dropdown_outlier_method", "value"), + Input("dropdown_outlier_random_forest_contamination", "value"), Input("slider_outlier_random_forest_n_estimators", "value"), Input("slider_outlier_densitiy_n_neighbors", "value"), + Input("dropdown_outlier_densitiy_contamination", "value"), + Input("dropdown_outlier_densitiy_metric", "value"), + Input("dropdown_outlier_densitiy_p", "value"), Input("dropdown_outlier_densitiy_algorithm", "value"), - Input("dropdown_outlier_kv_feature", "value"), State("button_outlier_apply", "style"), - State("button_outlier_show", "style") + State("button_outlier_show", "style"), + prevent_initial_call=True ) -def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, style_apply, style_show): +def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, v6, v7, v8, v9, style_apply, style_show): triggered_id = ctx.triggered_id if style_apply is None: style_apply = {} @@ -104,34 +125,47 @@ def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, style_apply, # update outlier plot @app.callback( - Output("loading_outlier_preview", "children"), - Output("table_outlier_detection", "data"), - Output("table_outlier_detection", "columns"), - Output("table_outlier_detection", "selected_rows"), + Output("loading_outlier_preview", "children", allow_duplicate=True), + Output("table_outlier_detection", "data", allow_duplicate=True), + Output("table_outlier_detection", "columns", allow_duplicate=True), + Output("table_outlier_detection", "selected_rows", allow_duplicate=True), Input("button_outlier_show", "n_clicks"), - State("dropdown_outlier_method", "value"), + State("dropdown_outlier_feature", "value"), + State("dropdown_outlier_method", "value"),# + State("dropdown_outlier_random_forest_contamination", "value"), State("slider_outlier_random_forest_n_estimators", "value"), State("slider_outlier_densitiy_n_neighbors", "value"), + State("dropdown_outlier_densitiy_contamination", "value"), + State("dropdown_outlier_densitiy_metric", "value"), + State("dropdown_outlier_densitiy_p", "value"), State("dropdown_outlier_densitiy_algorithm", "value"), - State("dropdown_outlier_kv_feature", "value"), State("table_outlier_detection", "data"), + prevent_initial_call=True ) -def update_outlier_plot(n_clicks, method, rf_n_estimators, densitiy_n_neighbors, densitiy_algorithm, kv_feature, data): +def update_outlier_plot(n_clicks, cols, method, rf_contamination, rf_n_estimators, densitiy_n_neighbors, density_contamination, density_metric, density_p, densitiy_algorithm, data): if n_clicks is None or n_clicks == 0: return dash.no_update # read out parameter params = {} if method == OUTLIER_DETECTION_METHODS[0]: # isolation forest detector params['n_estimators'] = rf_n_estimators + if rf_contamination != 'auto': + rf_contamination = float(rf_contamination) + params['contamination'] = rf_contamination elif method == OUTLIER_DETECTION_METHODS[1]: # density detector params['n_neighbors'] = densitiy_n_neighbors + if density_contamination != 'auto': + density_contamination = float(density_contamination) + params['contamination'] = density_contamination + params['metric'] = OUTLIER_DETECTION_LOCAL_OUTLIER_FACTOR_METRIC[density_metric] + params['p'] = density_p params['algorithm'] = OUTLIER_DETECTION_LOCAL_ALGORITHM[densitiy_algorithm] elif method == OUTLIER_DETECTION_METHODS[2]: # kv detector - params['feature'] = kv_feature + pass # apply detector - df_num = table_data.DF_RAW.select_dtypes(include=NUMERICS) - df_outlier, is_outlier= apply_outlier_detection(df_num, method, params) + df = table_data.DF_RAW[cols] + df_outlier, is_outlier= apply_outlier_detection(df, method, params) # update figure figure = get_outlier_plot(df_outlier) @@ -139,7 +173,7 @@ def update_outlier_plot(n_clicks, method, rf_n_estimators, densitiy_n_neighbors, # update outlier datatable indices = is_outlier.index[is_outlier == True] - df_outlier = df_num.loc[indices] + df_outlier = df.loc[indices] df_outlier = df_outlier.reset_index() data_datatable = [{col: df_outlier.loc[i, col] for col in df_outlier.columns} for i in df_outlier.index] @@ -150,33 +184,44 @@ def update_outlier_plot(n_clicks, method, rf_n_estimators, densitiy_n_neighbors, # update outlier plot @app.callback( - Output("table_outlier_detection", "data"), - Output("figure_outlier_preview", "figure"), + Output("table_outlier_detection", "data", allow_duplicate=True), + Output("figure_outlier_preview", "figure", allow_duplicate=True), # update overview page - Output("datatable_overview", "data"), - Output("datatable_overview", "columns"), + Output("datatable_overview", "data", allow_duplicate=True), + Output("datatable_overview", "columns", allow_duplicate=True), # inputes Input("button_outlier_apply", "n_clicks"), + State("dropdown_outlier_feature", "value"), State("dropdown_outlier_method", "value"), + State("dropdown_outlier_random_forest_contamination", "value"), State("slider_outlier_random_forest_n_estimators", "value"), State("slider_outlier_densitiy_n_neighbors", "value"), + State("dropdown_outlier_densitiy_contamination", "value"), + State("dropdown_outlier_densitiy_metric", "value"), + State("dropdown_outlier_densitiy_p", "value"), State("dropdown_outlier_densitiy_algorithm", "value"), - State("dropdown_outlier_kv_feature", "value"), State("table_outlier_detection", "data"), State("table_outlier_detection", "selected_rows"), + prevent_initial_call=True ) -def update_outlier_df(n_clicks, method, rf_n_estimators, densitiy_n_neighbors, densitiy_algorithm, kv_feature, data, selected_rows): +def update_outlier_df(n_clicks, cols, method, rf_contamination, rf_n_estimators, densitiy_n_neighbors, density_contamination, density_metric, density_p, densitiy_algorithm, data, selected_rows): if n_clicks is None or n_clicks == 0: return dash.no_update # read out parameter params = {} if method == OUTLIER_DETECTION_METHODS[0]: # isolation forest detector + params['contamination'] = rf_contamination params['n_estimators'] = rf_n_estimators elif method == OUTLIER_DETECTION_METHODS[1]: # density detector params['n_neighbors'] = densitiy_n_neighbors + if density_contamination != 'auto': + density_contamination = float(density_contamination) + params['contamination'] = density_contamination + params['metric'] = OUTLIER_DETECTION_LOCAL_OUTLIER_FACTOR_METRIC[density_metric] + params['p'] = density_p params['algorithm'] = OUTLIER_DETECTION_LOCAL_ALGORITHM[densitiy_algorithm] elif method == OUTLIER_DETECTION_METHODS[2]: # kv detector - params['feature'] = kv_feature + pass # update dataframe indices = [] @@ -194,3 +239,28 @@ def update_outlier_df(n_clicks, method, rf_n_estimators, densitiy_n_neighbors, d columns_datatable_overview = [{"name": col, "id": col} for col in df.columns] return data, figure, data_datatable_overview, columns_datatable_overview + +@app.callback( + Output("img_outlier_strategy", "src", allow_duplicate=True), + Output("link_outlier_strategy", "href", allow_duplicate=True), + Output("tooltip_outlier_strategy", "children", allow_duplicate=True), + Input("dropdown_outlier_method", "value"), + prevent_initial_call=True +) +def update_info(strategy): + if strategy == OUTLIER_DETECTION_METHODS[0]: + src = '/assets/img/link.png' + href = OUTLIER_LINKS[0] + children = OUTLIER_DESCRIPTIONS[0] + elif strategy == OUTLIER_DETECTION_METHODS[1]: + src = '/assets/img/link.png' + href = OUTLIER_LINKS[1] + children = OUTLIER_DESCRIPTIONS[1] + elif strategy == OUTLIER_DETECTION_METHODS[2]: + src = '/assets/img/tooltip.png' + href = OUTLIER_LINKS[2] + children = OUTLIER_DESCRIPTIONS[2] + else: + return dash.no_update + + return src, href, children diff --git a/callbacks/page_overview_callbacks.py b/callbacks/page_overview_callbacks.py index 422a5c4..459d72e 100644 --- a/callbacks/page_overview_callbacks.py +++ b/callbacks/page_overview_callbacks.py @@ -30,72 +30,87 @@ # update after feature removal @app.callback( # update textboard - Output('text_board_shape', 'children'), - Output('text_board_memory', 'children'), - Output('text_board_na', 'children'), - Output('text_board_num', 'children'), + Output('text_board_shape', 'children', allow_duplicate=True), + Output('text_board_memory', 'children', allow_duplicate=True), + Output('text_board_na', 'children', allow_duplicate=True), + Output('text_board_num', 'children', allow_duplicate=True), # update histogram - Output("dropdown_overview_features_selection_histogramgraph", "options"), - Output("dropdown_overview_features_selection_histogramgraph", "value"), - Output("dropdown_overview_feature_selection_rangeslider_histogram", "options"), - Output("dropdown_overview_feature_selection_rangeslider_histogram", "value"), + Output("dropdown_overview_features_selection_histogram", "options", allow_duplicate=True), + Output("dropdown_overview_features_selection_histogram", "value", allow_duplicate=True), + Output("dropdown_overview_target_selection_histogram", "options", allow_duplicate=True), + Output("dropdown_overview_target_selection_histogram", "value", allow_duplicate=True), + Output("dropdown_overview_class_selection_histogram", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_histogram", "value", allow_duplicate=True), + # update violinplot + Output("dropdown_overview_features_selection_violinplot", "options", allow_duplicate=True), + Output("dropdown_overview_features_selection_violinplot", "value", allow_duplicate=True), + Output("dropdown_overview_target_selection_violinplot", "options", allow_duplicate=True), + Output("dropdown_overview_target_selection_violinplot", "value", allow_duplicate=True), + Output("dropdown_overview_class_selection_violinplot", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_violinplot", "value", allow_duplicate=True), # update line plot - Output("dropdown_overview_features_selection_linegraph", "options"), - Output("dropdown_overview_features_selection_linegraph", "value"), - Output("dropdown_overview_feature_selection_rangeslider_linegraph", "options"), - Output("dropdown_overview_feature_selection_rangeslider_linegraph", "value"), + Output("dropdown_overview_features_selection_linegraph", "options", allow_duplicate=True), + Output("dropdown_overview_features_selection_linegraph", "value", allow_duplicate=True), + Output("dropdown_overview_target_selection_linegraph", "options", allow_duplicate=True), + Output("dropdown_overview_target_selection_linegraph", "value", allow_duplicate=True), + Output("dropdown_overview_class_selection_linegraph", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_linegraph", "value", allow_duplicate=True), + Output("dropdown_overview_feature_selection_rangeslider_linegraph", "options", allow_duplicate=True), + Output("dropdown_overview_feature_selection_rangeslider_linegraph", "value", allow_duplicate=True), # update scatter plot - Output("dropdown1_overview_feature_selection_scattergraph", "options"), - Output("dropdown1_overview_feature_selection_scattergraph", "value"), - Output("dropdown2_overview_feature_selection_scattergraph", "options"), - Output("dropdown2_overview_feature_selection_scattergraph", "value"), + Output("dropdown1_overview_feature_selection_scattergraph", "options", allow_duplicate=True), + Output("dropdown1_overview_feature_selection_scattergraph", "value", allow_duplicate=True), + Output("dropdown2_overview_feature_selection_scattergraph", "options", allow_duplicate=True), + Output("dropdown2_overview_feature_selection_scattergraph", "value", allow_duplicate=True), + Output("dropdown_overview_target_selection_scattergraph", "options", allow_duplicate=True), + Output("dropdown_overview_target_selection_scattergraph", "value", allow_duplicate=True), + Output("dropdown_overview_class_selection_scattergraph", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_scattergraph", "value", allow_duplicate=True), # update correlation heatmap - Output("dropdown_overview_feature_selection_heatmap", "options"), - Output("dropdown_overview_feature_selection_heatmap", "value"), + Output("dropdown_overview_feature_selection_heatmap", "options", allow_duplicate=True), + Output("dropdown_overview_feature_selection_heatmap", "value", allow_duplicate=True), + Output("dropdown_overview_target_selection_heatmap", "options", allow_duplicate=True), + Output("dropdown_overview_target_selection_heatmap", "value", allow_duplicate=True), + Output("dropdown_overview_class_selection_heatmap", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_heatmap", "value", allow_duplicate=True), # update categorical page - Output("dropdown_categorical_feature", "options"), - Output("dropdown_categorical_feature", "value"), - Output("exploration_categorical_feature_ratio_bar_plot", "figure"), + Output("dropdown_categorical_feature", "options", allow_duplicate=True), + Output("dropdown_categorical_feature", "value", allow_duplicate=True), + Output("exploration_categorical_feature_ratio_bar_plot", "figure", allow_duplicate=True), # update na page - Output("figure_na_bar_plot", "figure"), - Output("figure_na_heatmap", "figure"), - Output("dropdown_na_feature", "options"), - Output("dropdown_na_feature", "value"), - Output("slider_na_iterative_n_nearest_features", "max"), - Output("slider_na_iterative_n_nearest_features", "value"), - Output("slider_na_iterative_n_nearest_features", "marks"), + Output("figure_na_bar_plot", "figure", allow_duplicate=True), + Output("figure_na_heatmap", "figure", allow_duplicate=True), + Output("dropdown_na_feature", "options", allow_duplicate=True), + Output("dropdown_na_feature", "value", allow_duplicate=True), # update outlier page - Output("figure_outlier_violin_plot", "figure"), - Output("table_outlier_detection", "data"), - Output("figure_outlier_preview", "figure"), - Output("dropdown_outlier_kv_feature", "options"), - Output("dropdown_outlier_kv_feature", "value"), + Output("figure_outlier_violin_plot", "figure", allow_duplicate=True), + Output("table_outlier_detection", "data", allow_duplicate=True), + Output("figure_outlier_preview", "figure", allow_duplicate=True), + Output("dropdown_outlier_feature", "options", allow_duplicate=True), + Output("dropdown_outlier_feature", "value", allow_duplicate=True), # update transformation - Output("dropdown_transformation_time_series_dataset", "options"), - Output("dropdown_transformation_time_series_dataset", "value"), + Output("dropdown_transformation_time_series_dataset", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_dataset", "value", allow_duplicate=True), # update sidebar - Output("button_categorical", 'disabled'), - Output("button_na_values", 'disabled'), - Output("button_outlier", 'disabled'), - Output("button_ts", 'disabled'), - Output("button_sc", 'disabled'), - Output("button_sr", 'disabled'), - Output("button_usl", 'disabled'), + Output("button_categorical", 'disabled', allow_duplicate=True), + Output("button_na_values", 'disabled', allow_duplicate=True), + Output("button_outlier", 'disabled', allow_duplicate=True), + Output("button_ts", 'disabled', allow_duplicate=True), + Output("button_sc", 'disabled', allow_duplicate=True), + Output("button_sr", 'disabled', allow_duplicate=True), # input State('datatable_overview', 'data_previous'), Input('datatable_overview', 'data'), - State("dropdown_overview_features_selection_histogramgraph", "value"), - State("dropdown_overview_feature_selection_rangeslider_histogram", "value"), + State("dropdown_overview_features_selection_histogram", "value"), State("dropdown_overview_features_selection_linegraph", "value"), State("dropdown_overview_feature_selection_rangeslider_linegraph", "value"), State("dropdown1_overview_feature_selection_scattergraph", "value"), State("dropdown2_overview_feature_selection_scattergraph", "value"), State("dropdown_overview_feature_selection_heatmap", "value"), State("dropdown_na_feature", "value"), - State("slider_na_iterative_n_nearest_features", "value"), - State("dropdown_outlier_kv_feature", "value"), + prevent_initial_call=True ) -def delete_feature(previous, current, histogram_values, histogram_index_value, linegraph_values, linegraph_index_value, scatter1_value, scatter2_value, heatmap_value, na_value, iterative_n_nearest_features, outlier_kv_feature): +def delete_feature(previous, current, histogram_values, linegraph_values, linegraph_index_value, scatter1_value, scatter2_value, heatmap_value, na_value): if table_data.DF_RAW is None: return dash.no_update @@ -110,6 +125,9 @@ def delete_feature(previous, current, histogram_values, histogram_index_value, l # update table table_data.DF_RAW = delete_columns(table_data.DF_RAW, row) + # TODO delete later + #table_data.DF_RAW.fillna(0, inplace=True) + # update components df_num = table_data.DF_RAW.select_dtypes(include=NUMERICS) @@ -120,31 +138,61 @@ def delete_feature(previous, current, histogram_values, histogram_index_value, l # update textboard - value_shape = str(table_data.DF_RAW.shape) - value_memory = str(get_memory_usage(table_data.DF_RAW)) - value_na = str(get_percentage_nan_total(table_data.DF_RAW)) - value_num = str(get_percentage_numeric(table_data.DF_RAW)) + value_shape = html.H6(children=str(table_data.DF_RAW.shape), className='text_board_font1') + value_memory = html.H6(children=str(get_memory_usage(table_data.DF_RAW)), className='text_board_font1') + value_na = html.H6(children=str(get_percentage_nan_total(table_data.DF_RAW)), className='text_board_font1') + value_num = html.H6(children=str(get_percentage_numeric(table_data.DF_RAW)), className='text_board_font1') # update histogram histogram_options = options_all - histogram_index_options = options_int - histogram_values = histogram_options[:3] - histogram_index_value = options_int[0] + histogram_target_options = options_all + histogram_class_options = table_data.DF_RAW[options_all[0]].unique().tolist() + ['ALL'] + histogram_values = histogram_options[0] + histogram_target_value = options_all[0] + histogram_class_value = 'ALL' + + # update violinplot + violinplot_options = options_all + violinplot_target_options = options_all + violinplot_class_options = table_data.DF_RAW[options_all[0]].unique().tolist() + ['ALL'] + violinplot_values = violinplot_options[:3] + violinplot_target_value = violinplot_target_options[0] + violinplot_class_value = 'ALL' # update linegraph - linegraph_options = options_all - linegraph_index_options = options_int + linegraph_options = options_num + linegraph_target_options = options_all + linegraph_class_options = table_data.DF_RAW[options_all[0]].unique().tolist() + ['ALL'] + linegraph_index_options = ['index_auto'] + options_int linegraph_values = linegraph_options[:3] - linegraph_index_value = options_int[0] + linegraph_target_value = options_all[0] + linegraph_class_value = 'ALL' + linegraph_index_value = linegraph_index_options[0] # update scatter plot scatter_options = options_num + scatter_target_options = options_all + # Create an empty list to store column names with less than 50 unique values + scatter_target_options = [] + + # Loop through the columns in the DataFrame + for column in table_data.DF_RAW.columns: + unique_values = table_data.DF_RAW[column].nunique() + if unique_values < 50: + scatter_target_options.append(column) + scatter_class_options = table_data.DF_RAW[options_all[0]].unique().tolist() + ['ALL'] scatter1_value = options_num[0] scatter2_value = options_num[1] + scatter_target_value = scatter_target_options[0] + scatter_class_value = 'ALL' # update correlation heatmap heat_map_options = options_num + heat_map_target_options = options_all + heat_map_class_options = table_data.DF_RAW[options_all[0]].unique().tolist() + ['ALL'] heatmap_value = heat_map_options[:3] + heat_map_target_value = options_all[0] + heat_map_class_value = 'ALL' ### update categorical page df_cat = table_data.DF_RAW.select_dtypes(include='object').dropna(axis=1, how='all') @@ -169,11 +217,6 @@ def delete_feature(previous, current, histogram_values, histogram_index_value, l if row is not None and na_value == row or (na_value is None and len(options_na) > 0): na_value = options_na[0] - - max_nearest_features = len(df_num.columns) - marks = {i: {'label': str(round(i))} for i in np.arange(1, max_nearest_features, (max_nearest_features-1)/5)} - if iterative_n_nearest_features > max_nearest_features: - iterative_n_nearest_features = max_nearest_features # update overview plots na_count = get_num_nan(table_data.DF_RAW) @@ -184,10 +227,12 @@ def delete_feature(previous, current, histogram_values, histogram_index_value, l # update violin distibution figure_violin = get_violin_plot(df_num, df_num.columns) + # update features + outlier_options = options_num + outlier_value = options_num + # update datatable data_outlier = [] - if row is not None and outlier_kv_feature == row or outlier_kv_feature is None: - outlier_kv_feature = options_num[0] # update figure figure_outlier = {} @@ -216,134 +261,203 @@ def delete_feature(previous, current, histogram_values, histogram_index_value, l training_disabled = True - return value_shape, value_memory, value_na, value_num, histogram_options, histogram_values, histogram_index_options, histogram_index_value, linegraph_options, linegraph_values, linegraph_index_options, linegraph_index_value, scatter_options, scatter1_value, scatter_options, scatter2_value, heat_map_options, heatmap_value, options_cat, value_cat, figure_cat, figure_num_na, figure_heatmap_na, options_na, na_value, max_nearest_features, iterative_n_nearest_features, marks, figure_violin, data_outlier, figure_outlier, options_num, outlier_kv_feature, options_transformation, value_transformation, categorical_disabled, na_disabled, rest_disabled, rest_disabled, training_disabled, training_disabled, training_disabled + return value_shape, value_memory, value_na, value_num, histogram_options, histogram_values, histogram_target_options, histogram_target_value, histogram_class_options, histogram_class_value, violinplot_options, violinplot_values, violinplot_target_options, violinplot_target_value, violinplot_class_options, violinplot_class_value, linegraph_options, linegraph_values, linegraph_target_options, linegraph_target_value, linegraph_class_options, linegraph_class_value, linegraph_index_options, linegraph_index_value, scatter_options, scatter1_value, scatter_options, scatter2_value, scatter_target_options, scatter_target_value, scatter_class_options, scatter_class_value, heat_map_options, heatmap_value, heat_map_target_options, heat_map_target_value, heat_map_class_options, heat_map_class_value, options_cat, value_cat, figure_cat, figure_num_na, figure_heatmap_na, options_na, na_value, figure_violin, data_outlier, figure_outlier, outlier_options, outlier_value, options_transformation, value_transformation, categorical_disabled, na_disabled, rest_disabled, rest_disabled, training_disabled, training_disabled # update the histogram and update the rangeslider in the histogram board according to the dropdown @app.callback( - [ - Output("figure_overview_histogram", "figure"), - Output("rangeslider_overview_value_constraint_histogram", "min"), - Output("rangeslider_overview_value_constraint_histogram", "max"), - Output("rangeslider_overview_value_constraint_histogram", "marks"), - Output("rangeslider_overview_value_constraint_histogram", "value"), - ], - [ - Input("dropdown_overview_features_selection_histogramgraph", "value"), - Input("dropdown_overview_features_selection_histogramgraph", "options"), - Input("dropdown_overview_feature_selection_rangeslider_histogram", "value"), - Input("rangeslider_overview_value_constraint_histogram", "value"), - State("rangeslider_overview_value_constraint_histogram", "marks"), - State("rangeslider_overview_value_constraint_histogram", "min"), - State("rangeslider_overview_value_constraint_histogram", "max"), - ] + Output("figure_overview_histogram", "children", allow_duplicate=True), + Input("dropdown_overview_features_selection_histogram", "value"), + Input("dropdown_overview_target_selection_histogram", "value"), + Input("dropdown_overview_class_selection_histogram", "value"), + prevent_initial_call=True ) -def update_histogram_figure_under_constraint(cols, options, col_index, values, marks, curr_min, curr_max): +def update_histogram_figure_under_constraint(cols, target, target_class): if table_data.DF_RAW is None: return dash.no_update - triggered_id = ctx.triggered_id - - if triggered_id == "rangeslider_overview_value_constraint_histogram": - value_min = values[0] - value_max = values[1] - else: - value_min, value_max, curr_min, curr_max, _, values = get_marks_for_rangeslider(table_data.DF_RAW, col_index) - - marks = get_slider_marks((curr_min, curr_max)) + if target_class == 'ALL': + target_class = None + target = None - df = compute_plot(table_data.DF_RAW, col_index, cols, value_min, value_max) + df = compute_plot(table_data.DF_RAW, None, cols, reset_index=True, target=target, target_class=target_class) # draw Figure figure = get_overview_histogram_plot(df, cols) + graph = dcc.Graph(figure=figure, className='figure_overview') - return figure, curr_min, curr_max, marks, values + return graph -# update the line plot and update the rangeslider in the line plot board according to the dropdown +# update the violinplot @app.callback( - [ - Output("figure_overview_linegraph", "figure"), - Output("rangeslider_overview_value_constraint_linegraph", "min"), - Output("rangeslider_overview_value_constraint_linegraph", "max"), - Output("rangeslider_overview_value_constraint_linegraph", "marks"), - Output("rangeslider_overview_value_constraint_linegraph", "value"), - ], - [ - Input("dropdown_overview_features_selection_linegraph", "value"), - Input("dropdown_overview_features_selection_linegraph", "options"), - Input("dropdown_overview_feature_selection_rangeslider_linegraph", "value"), - Input("rangeslider_overview_value_constraint_linegraph", "value"), - State("rangeslider_overview_value_constraint_linegraph", "marks"), - State("rangeslider_overview_value_constraint_linegraph", "min"), - State("rangeslider_overview_value_constraint_linegraph", "max"), - ] + Output("figure_overview_violinplot", "children", allow_duplicate=True), + Input("dropdown_overview_features_selection_violinplot", "value"), + Input("dropdown_overview_target_selection_violinplot", "value"), + Input("dropdown_overview_class_selection_violinplot", "value"), + prevent_initial_call=True ) -def update_line_plot_under_constraint(cols, options, col_index, values, marks, curr_min, curr_max): +def update_histogram_figure_under_constraint(cols, target, target_class): if table_data.DF_RAW is None: return dash.no_update - triggered_id = ctx.triggered_id + if target_class == 'ALL': + target_class = None + target = None + + df = compute_plot(table_data.DF_RAW, None, cols, reset_index=True, target=target, target_class=target_class) + # draw Figure + figure = get_overview_violin_plot(df, cols) + graph = dcc.Graph(figure=figure ,className='figure_overview') + + return graph - if triggered_id == "rangeslider_overview_value_constraint_linegraph": - value_min = values[0] - value_max = values[1] - else: - value_min, value_max, curr_min, curr_max, _, values = get_marks_for_rangeslider(table_data.DF_RAW, col_index) +# update the line plot and update the rangeslider in the line plot board according to the dropdown +@app.callback( + Output("figure_overview_linegraph", "children", allow_duplicate=True), + Input("dropdown_overview_features_selection_linegraph", "value"), + Input("dropdown_overview_target_selection_linegraph", "value"), + Input("dropdown_overview_class_selection_linegraph", "value"), + Input("dropdown_overview_feature_selection_rangeslider_linegraph", "value"), + prevent_initial_call=True +) +def update_line_plot_under_constraint(cols, target, target_class, col_index): + if table_data.DF_RAW is None: + return dash.no_update + + if target_class == 'ALL': + target_class = None + target = None - marks = get_slider_marks((curr_min, curr_max)) + if col_index == 'index_auto': + col_index = None - df = compute_plot(table_data.DF_RAW, col_index, cols, value_min, value_max, reset_index=True) + df = compute_plot(table_data.DF_RAW, col_index, cols, reset_index=True, target=target, target_class=target_class) # draw Figure figure = get_overview_line_plot(df, cols) + graph = dcc.Graph(figure=figure ,className='figure_overview') - return figure, curr_min, curr_max, marks, values + return graph -# update the scatter plot and update the rangeslider in the scatter plot board according to the dropdown +# update the scatter plot according to the dropdown @app.callback( - [ - Output("figure_overview_scattergraph", "figure"), - Output("rangeslider_overview_value_constraint_scattergraph", "min"), - Output("rangeslider_overview_value_constraint_scattergraph", "max"), - Output("rangeslider_overview_value_constraint_scattergraph", "marks"), - Output("rangeslider_overview_value_constraint_scattergraph", "value"), - ], - [ - Input("dropdown1_overview_feature_selection_scattergraph", "value"), - Input("dropdown2_overview_feature_selection_scattergraph", "value"), - Input("rangeslider_overview_value_constraint_scattergraph", "value"), - State("rangeslider_overview_value_constraint_scattergraph", "marks"), - State("rangeslider_overview_value_constraint_scattergraph", "min"), - State("rangeslider_overview_value_constraint_scattergraph", "max"), - ] + Output("figure_overview_scattergraph", "children", allow_duplicate=True), + Input("dropdown1_overview_feature_selection_scattergraph", "value"), + Input("dropdown2_overview_feature_selection_scattergraph", "value"), + Input("dropdown_overview_target_selection_scattergraph", "value"), + Input("dropdown_overview_class_selection_scattergraph", "value"), + prevent_initial_call=True ) -def update_scatter_figure_under_constraint(col1, col2, values, marks, curr_min, curr_max): +def update_scatter_figure(col1, col2, target, target_class): if table_data.DF_RAW is None: return dash.no_update - triggered_id = ctx.triggered_id - - if triggered_id == "rangeslider_overview_value_constraint_scattergraph": - value_min = values[0] - value_max = values[1] - else: - value_min, value_max, curr_min, curr_max, _, values = get_marks_for_rangeslider(table_data.DF_RAW, col1) - - marks = get_slider_marks((curr_min, curr_max)) + df = compute_scatter(table_data.DF_RAW, col1, target=target, target_class=target_class) - df = compute_scatter(table_data.DF_RAW, col1, value_min, value_max) - figure = get_overview_scatter_plot(df, col1, col2) + if target_class != 'ALL': + target = None - return figure, curr_min, curr_max, marks, values + figure = get_overview_scatter_plot(df, col1, col2, target) + graph = dcc.Graph(figure=figure ,className='figure_overview') + + return graph # update heatmap @app.callback( - Output("figure_overview_heatmap", "figure"), - Input("dropdown_overview_feature_selection_heatmap", "value") + Output("figure_overview_heatmap", "figure", allow_duplicate=True), + Input("dropdown_overview_feature_selection_heatmap", "value"), + Input("dropdown_overview_target_selection_heatmap", "value"), + Input("dropdown_overview_class_selection_heatmap", "value"), + prevent_initial_call=True ) -def update_heatmap(cols): +def update_heatmap(cols, target, target_class): if table_data.DF_RAW is None: return dash.no_update - corr = analyse_correlation(table_data.DF_RAW[cols]) + if target_class == 'ALL': + target_class = None + target = None + + corr = analyse_correlation(table_data.DF_RAW, cols, target=target, target_class=target_class) figure = get_overview_heatmap(corr) return figure +# update class options histogram +@app.callback( + Output("dropdown_overview_class_selection_histogram", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_histogram", "value", allow_duplicate=True), + Input("dropdown_overview_target_selection_histogram", "value"), + prevent_initial_call=True +) +def update_histogram(target): + if target is None: + return dash.no_update + + options = table_data.DF_RAW[target].unique().tolist() + ['ALL'] + options = sorted(options, key=lambda x: (isinstance(x, (int, float)), x)) + value = 'ALL' + return options, value + + + +# update class options linegraph +@app.callback( + Output("dropdown_overview_class_selection_linegraph", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_linegraph", "value", allow_duplicate=True), + Input("dropdown_overview_target_selection_linegraph", "value"), + prevent_initial_call=True +) +def update_linegraph(target): + if target is None: + return dash.no_update + + options = table_data.DF_RAW[target].unique().tolist() + ['ALL'] + options = sorted(options, key=lambda x: (isinstance(x, (int, float)), x)) + value = 'ALL' + return options, value + +# update class options scatter +@app.callback( + Output("dropdown_overview_class_selection_scattergraph", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_scattergraph", "value", allow_duplicate=True), + Input("dropdown_overview_target_selection_scattergraph", "value"), + prevent_initial_call=True +) +def update_scatter(target): + if target is None: + return dash.no_update + + options = table_data.DF_RAW[target].unique().tolist() + ['ALL'] + options = sorted(options, key=lambda x: (isinstance(x, (int, float)), x)) + value = 'ALL' + return options, value + +# update class options heatmap +@app.callback( + Output("dropdown_overview_class_selection_heatmap", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_heatmap", "value", allow_duplicate=True), + Input("dropdown_overview_target_selection_heatmap", "value"), + prevent_initial_call=True +) +def update_heatmap(target): + if target is None: + return dash.no_update + + options = table_data.DF_RAW[target].unique().tolist() + ['ALL'] + options = sorted(options, key=lambda x: (isinstance(x, (int, float)), x)) + value = 'ALL' + return options, value + +# update class options violinplot +@app.callback( + Output("dropdown_overview_class_selection_violinplot", "options", allow_duplicate=True), + Output("dropdown_overview_class_selection_violinplot", "value", allow_duplicate=True), + Input("dropdown_overview_target_selection_violinplot", "value"), + prevent_initial_call=True +) +def update_violinplot(target): + if target is None: + return dash.no_update + + options = table_data.DF_RAW[target].unique().tolist() + ['ALL'] + options = sorted(options, key=lambda x: (isinstance(x, (int, float)), x)) + value = 'ALL' + return options, value diff --git a/callbacks/page_supervised_classification_callbacks.py b/callbacks/page_supervised_classification_callbacks.py index 045274c..c3bacee 100644 --- a/callbacks/page_supervised_classification_callbacks.py +++ b/callbacks/page_supervised_classification_callbacks.py @@ -15,6 +15,9 @@ # import plots from view.page_helper_components.plots import * +# import tables +from view.page_helper_components.tables import * + # import app from view.app import app @@ -27,9 +30,10 @@ # update baseline style @app.callback( - Output("container_classification_baseline_strategy", "style"), + Output("container_classification_baseline_strategy", "style", allow_duplicate=True), Input("dropdown_classification_model", "value"), - State("container_classification_baseline_strategy", "style") + State("container_classification_baseline_strategy", "style"), + prevent_initial_call=True ) def update_style_baseline(method, style): if style is None: @@ -41,10 +45,11 @@ def update_style_baseline(method, style): return style @app.callback( - Output("container_classification_baseline_constant", "style"), + Output("container_classification_baseline_constant", "style", allow_duplicate=True), Input("dropdown_classification_model", "value"), Input("dropdown_classification_baseline_strategy", "value"), - State("container_classification_baseline_constant", "style") + State("container_classification_baseline_constant", "style"), + prevent_initial_call=True ) def update_style_baseline_constant(method, strategy, style): if style is None: @@ -56,10 +61,11 @@ def update_style_baseline_constant(method, strategy, style): return style @app.callback( - Output("container_classification_baseline_look_back", "style"), + Output("container_classification_baseline_look_back", "style", allow_duplicate=True), Input("dropdown_classification_model", "value"), Input("dropdown_classification_baseline_strategy", "value"), - State("container_classification_baseline_look_back", "style") + State("container_classification_baseline_look_back", "style"), + prevent_initial_call=True ) def update_style_baseline_look_back(method, strategy, style): if style is None: @@ -72,9 +78,10 @@ def update_style_baseline_look_back(method, strategy, style): # update knn style @app.callback( - Output("container_classification_knn", "style"), + Output("container_classification_knn", "style", allow_duplicate=True), Input("dropdown_classification_model", "value"), - State("container_classification_knn", "style") + State("container_classification_knn", "style"), + prevent_initial_call=True ) def update_style_knn(method, style): if style is None: @@ -87,9 +94,10 @@ def update_style_knn(method, style): # update random forest style @app.callback( - Output("container_classification_random_forest", "style"), + Output("container_classification_random_forest", "style", allow_duplicate=True), Input("dropdown_classification_model", "value"), - State("container_classification_random_forest", "style") + State("container_classification_random_forest", "style"), + prevent_initial_call=True ) def update_style_rf(method, style): if style is None: @@ -100,26 +108,10 @@ def update_style_rf(method, style): style['display'] = 'none' return style - -# update xgboost style -@app.callback( - Output("container_classification_xgboost", "style"), - Input("dropdown_classification_model", "value"), - State("container_classification_xgboost", "style") -) -def update_style_xgboost(method, style): - if style is None: - style = {} - if method == CLASSFIER[3]: - style['display'] = 'block' - else: - style['display'] = 'none' - return style - # update button styles @app.callback( - Output("button_classification_apply", "style"), - Output("button_classification_show", "style"), + Output("button_classification_apply", "style", allow_duplicate=True), + Output("button_classification_show", "style", allow_duplicate=True), Input("button_classification_show", "n_clicks"), Input("button_classification_apply", "n_clicks"), # general @@ -152,6 +144,7 @@ def update_style_xgboost(method, style): Input("alert_classification_missing_classes", "is_open"), Input("alert_classification_invalid_neighbors", "is_open"), Input("alert_classification", "is_open"), + prevent_initial_call=True ) def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, style_apply, style_show, is_open_invalid_splits, is_open_missing_classes, is_open_invalid_neighbors, is_open_alert): triggered_id = ctx.triggered_id @@ -159,27 +152,28 @@ def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, v6, v7, v8, v style_apply = {} if style_show is None: style_show = {} - - if n_clicks1 is None or n_clicks1 == 0 or is_open_invalid_splits or is_open_missing_classes or is_open_invalid_neighbors or is_open_alert: + + if is_open_invalid_splits or is_open_missing_classes or is_open_invalid_neighbors or is_open_alert: style_apply['display'] = 'none' style_show['display'] = 'block' - elif triggered_id is None or triggered_id == 'button_classification_show': + elif triggered_id is None or triggered_id == 'button_classification_show' or triggered_id == 'alert_classification_invalid_splits' or triggered_id == 'alert_classification_missing_classes' or triggered_id == 'alert_classification_invalid_neighbors' or triggered_id == 'alert_classification': style_apply['display'] = 'block' - style_show['display'] = 'none' + style_show['display'] = 'none' else: + style_apply['display'] = 'none' style_show['display'] = 'block' - style_apply['display'] = 'none' return style_apply, style_show # update summary @app.callback( - Output("analysis_classification_summary", "figure"), - Output("input_classification_model_name", "value"), + Output("analysis_classification_summary", "figure", allow_duplicate=True), + Output("input_classification_model_name", "value", allow_duplicate=True), Input("button_classification_apply", "n_clicks"), State("input_classification_model_name", "value"), State("dropdown_classification_model", "value"), State("dropdown_classification_scoring", "value"), + prevent_initial_call=True ) def update_classification_summary(n_clicks, model_name, model, scoring): if n_clicks is None or n_clicks == 0: @@ -204,12 +198,14 @@ def update_classification_summary(n_clicks, model_name, model, scoring): # apply classifier @app.callback( - Output("alert_classification_invalid_splits", "is_open"), - Output("alert_classification_missing_classes", "is_open"), - Output("alert_classification_invalid_neighbors", "is_open"), - Output("alert_classification", "children"), - Output("alert_classification", "is_open"), - Output("loading_classification_prediction", "children"), + Output("alert_classification_invalid_splits", "is_open", allow_duplicate=True), + Output("alert_classification_missing_classes", "is_open", allow_duplicate=True), + Output("alert_classification_invalid_neighbors", "is_open", allow_duplicate=True), + Output("alert_classification", "children", allow_duplicate=True), + Output("alert_classification", "is_open", allow_duplicate=True), + Output("loading_classification_prediction", "children", allow_duplicate=True), + Output("loading_classification_feature_importance", "children", allow_duplicate=True), + Output("loading_classification_preview", "children", allow_duplicate=True), Input("button_classification_show", "n_clicks"), # general State("dropdown_classification_dataset", "value"), @@ -234,8 +230,9 @@ def update_classification_summary(n_clicks, model_name, model, scoring): State("slider_classification_xgboost_n_estimators", "value"), State("slider_classification_xgboost_max_depth", "value"), State("slider_classification_xgboost_learning_rate", "value"), + prevent_initial_call=True ) -def update_current_results(n_clicks, dataset_name, target, train_test_split, model, ts_cross_val, scoring, baseline_strategy, baseline_constant, baseline_look_back, knn_n_neighbors, knn_algorithm, knn_weights, rf_n_estimators, rf_criterion, rf_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate): +def update_current_result_overview(n_clicks, dataset_name, target, train_test_split, model, ts_cross_val, scoring, baseline_strategy, baseline_constant, baseline_look_back, knn_n_neighbors, knn_algorithm, knn_weights, rf_n_estimators, rf_criterion, rf_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate): if n_clicks is None or n_clicks == 0: return dash.no_update # read out parameter @@ -273,7 +270,8 @@ def update_current_results(n_clicks, dataset_name, target, train_test_split, mod df = df.loc[min_index:max_index].copy() try: - y_train, y_train_pred, y_test, y_test_pred = apply_classifier_prediction(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val) + y_train, y_train_pred, y_test, y_test_pred, feature_importance = apply_classifier_prediction(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val) + scores = apply_classifier(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val, scoring=CLASSIFIER_SCORING[scoring]) except ValueError as e: print(e) alert_splits = False @@ -290,118 +288,27 @@ def update_current_results(n_clicks, dataset_name, target, train_test_split, mod else: alert = True - return alert_splits, alert_missing_classes, alert_neighbors, alert_str, alert, dash.no_update + return alert_splits, alert_missing_classes, alert_neighbors, alert_str, alert, dash.no_update, dash.no_update, dash.no_update figure = get_prediction_plot(y_train, y_train_pred, y_test, y_test_pred, title="Original Data vs Predictions") - graph = dcc.Graph(id="figure_classification_prediction", className='graph_categorical', figure=figure) - - return False, False, False, dash.no_update, False, graph - -# apply classifier -@app.callback( - Output("alert_classification_invalid_splits", "is_open"), - Output("alert_classification_missing_classes", "is_open"), - Output("alert_classification_invalid_neighbors", "is_open"), - Output("alert_classification", "children"), - Output("alert_classification", "is_open"), - Output("loading_classification_preview", "children"), - Input("button_classification_show", "n_clicks"), - # general - State("dropdown_classification_dataset", "value"), - State("dropdown_classification_target", "value"), - State("slider_classification_train_test_split", "value"), - State("dropdown_classification_model", "value"), - State("checklist_classification_time_series_crossvalidation", "value"), - State("dropdown_classification_scoring", "value"), - # baseline - State("dropdown_classification_baseline_strategy", "value"), - State("input_classification_baseline_constant", "value"), - State("slider_classification_baseline_look_back", "value"), - # knn - State("slider_classification_knn_n_neighbors", "value"), - State("dropdown_classification_knn_algorithm", "value"), - State("dropdown_classification_knn_weights", "value"), - # random forest - State("slider_classification_random_forest_n_estimators", "value"), - State("slider_classification_random_forest_criterion", "value"), - State("slider_classification_random_forest_max_depth", "value"), - # xgboost - State("slider_classification_xgboost_n_estimators", "value"), - State("slider_classification_xgboost_max_depth", "value"), - State("slider_classification_xgboost_learning_rate", "value"), -) -def update_current_results(n_clicks, dataset_name, target, train_test_split, model, ts_cross_val, scoring, baseline_strategy, baseline_constant, baseline_look_back, knn_n_neighbors, knn_algorithm, knn_weights, rf_n_estimators, rf_criterion, rf_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate): - if n_clicks is None or n_clicks == 0: - return dash.no_update - # read out parameter - params = {} - if model == CLASSFIER[0]: # baseline - params['strategy'] = CLASSIFIER_BASELINE_STRATEGY[baseline_strategy] - if baseline_strategy == list(CLASSIFIER_BASELINE_STRATEGY.keys())[3]: - params['constant'] = baseline_constant - elif baseline_strategy == list(CLASSIFIER_BASELINE_STRATEGY.keys())[4]: - params['look_back'] = baseline_look_back - elif model == CLASSFIER[1]: # knn - params['n_neighbors'] = knn_n_neighbors - params['algorithm'] = CLASSIFIER_KNN_ALGORITHM[knn_algorithm] - params['weights'] = CLASSIFIER_KNN_WEIGHTS[knn_weights] - elif model == CLASSFIER[2]: # random forest - params['n_estimators'] = rf_n_estimators - params['criterion'] = CLASSIFIER_RF_CRITERION[rf_criterion] - if rf_max_depth == 36: - params['max_depth'] = None - else: - params['max_depth'] = rf_max_depth - elif model == CLASSFIER[3]: # xgboost - params['n_estimators'] = xgb_n_estimators - params['learning_rate'] = xgb_learning_rate - if xgb_max_depth == 36: - params['max_depth'] = None - else: - params['max_depth'] = xgb_max_depth - - df = table_data.ALL_DATASETS[dataset_name] - - # use data between defined ranges - min_index = table_data.ALL_RANGES[dataset_name][0] - max_index = table_data.ALL_RANGES[dataset_name][1] - df = df.loc[min_index:max_index].copy() - - try: - scores = apply_classifier(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val, scoring=CLASSIFIER_SCORING[scoring]) - except ValueError as e: - print(e) - alert_splits = False - alert_missing_classes = False - alert_neighbors = False - alert_str = str(e) - alert = False - if alert_str.startswith('n_splits='): - alert_splits = True - elif alert_str.startswith('Expected n_neighbors'): - alert_neighbors = True - elif "fits failed with the following error" in alert_str: - alert_missing_classes = True - else: - alert = True - - return alert_splits, alert_missing_classes, alert_neighbors, alert_str, alert, dash.no_update + graph_prediction = dcc.Graph(id="figure_classification_prediction", className='graph_categorical', figure=figure) figure = get_cross_validation_plot(scores) - graph = dcc.Graph(id="figure_classification_result", className='graph_categorical', figure=figure) + graph_overview = dcc.Graph(id="figure_classification_result", className='graph_categorical', figure=figure) + + datatable_feature_importance = get_feature_importance_table(id_table='datatable_classification_feature_importance', data=feature_importance) global CURR_RESULT CURR_RESULT = (model, scores["Score"].mean()) - - return False, False, False, dash.no_update, False, graph - - + + return False, False, False, dash.no_update, False, graph_prediction, datatable_feature_importance, graph_overview # update model name @app.callback( - Output("input_classification_model_name", "value"), + Output("input_classification_model_name", "value", allow_duplicate=True), Input("dropdown_classification_model", "value"), Input("dropdown_classification_scoring", "value"), + prevent_initial_call=True ) def update_update_model_name(model, scoring): @@ -414,9 +321,10 @@ def update_update_model_name(model, scoring): # update after selected dataset changes @app.callback( - Output("dropdown_classification_target", "options"), - Output("dropdown_classification_target", "value"), + Output("dropdown_classification_target", "options", allow_duplicate=True), + Output("dropdown_classification_target", "value", allow_duplicate=True), Input("dropdown_classification_dataset", "value"), + prevent_initial_call=True ) def update_after_dataset_changes(dataset_name): if dataset_name is None or dataset_name == "": @@ -431,11 +339,41 @@ def update_after_dataset_changes(dataset_name): # update after options for selected dataset changes @app.callback( - Output("button_classification_show", "disabled"), + Output("button_classification_show", "disabled", allow_duplicate=True), Input("dropdown_classification_dataset", "options"), + prevent_initial_call=True ) -def update_after_dataset_changes(options): +def update_after_dataset_options_changes(options): return len(options) < 1 +@app.callback( + Output("img_classification_strategy", "src", allow_duplicate=True), + Output("link_classification_strategy", "href", allow_duplicate=True), + Output("tooltip_classification_strategy", "children", allow_duplicate=True), + Input("dropdown_classification_model", "value"), + Input("dropdown_classification_baseline_strategy", "value"), + prevent_initial_call=True +) +def update_info(strategy, baseline_strategy): + if strategy == CLASSFIER[0] and baseline_strategy != list(CLASSIFIER_BASELINE_STRATEGY.keys())[-1]: + src = '/assets/img/link.png' + href = CLASSIFIER_LINKS[0] + children = CLASSIFIER_DESCRIPTION[0] + elif strategy == CLASSFIER[0]: + src = '/assets/img/tooltip.png' + href = CLASSIFIER_LINKS[1] + children = CLASSIFIER_DESCRIPTION[1] + elif strategy == CLASSFIER[1]: + src = '/assets/img/link.png' + href = CLASSIFIER_LINKS[2] + children = CLASSIFIER_DESCRIPTION[2] + elif strategy == CLASSFIER[2]: + src = '/assets/img/link.png' + href = CLASSIFIER_LINKS[3] + children = CLASSIFIER_DESCRIPTION[3] + else: + return dash.no_update + + return src, href, children \ No newline at end of file diff --git a/callbacks/page_supervised_regression_callbacks.py b/callbacks/page_supervised_regression_callbacks.py index fde507a..866953e 100644 --- a/callbacks/page_supervised_regression_callbacks.py +++ b/callbacks/page_supervised_regression_callbacks.py @@ -15,6 +15,9 @@ # import plots from view.page_helper_components.plots import * +# import tables +from view.page_helper_components.tables import * + # import app from view.app import app @@ -27,9 +30,10 @@ # update baseline style @app.callback( - Output("container_regression_baseline_strategy", "style"), + Output("container_regression_baseline_strategy", "style", allow_duplicate=True), Input("dropdown_regression_model", "value"), - State("container_regression_baseline_strategy", "style") + State("container_regression_baseline_strategy", "style"), + prevent_initial_call=True ) def update_style_baseline(method, style): if style is None: @@ -41,10 +45,11 @@ def update_style_baseline(method, style): return style @app.callback( - Output("container_regression_baseline_constant", "style"), + Output("container_regression_baseline_constant", "style", allow_duplicate=True), Input("dropdown_regression_model", "value"), Input("dropdown_regression_baseline_strategy", "value"), - State("container_regression_baseline_constant", "style") + State("container_regression_baseline_constant", "style"), + prevent_initial_call=True ) def update_style_baseline_constant(method, strategy, style): if style is None: @@ -56,10 +61,11 @@ def update_style_baseline_constant(method, strategy, style): return style @app.callback( - Output("container_regression_baseline_quantile", "style"), + Output("container_regression_baseline_quantile", "style", allow_duplicate=True), Input("dropdown_regression_model", "value"), Input("dropdown_regression_baseline_strategy", "value"), - State("container_regression_baseline_quantile", "style") + State("container_regression_baseline_quantile", "style"), + prevent_initial_call=True ) def update_style_baseline_quantile(method, strategy, style): if style is None: @@ -71,10 +77,11 @@ def update_style_baseline_quantile(method, strategy, style): return style @app.callback( - Output("container_regression_baseline_look_back", "style"), + Output("container_regression_baseline_look_back", "style", allow_duplicate=True), Input("dropdown_regression_model", "value"), Input("dropdown_regression_baseline_strategy", "value"), - State("container_regression_baseline_look_back", "style") + State("container_regression_baseline_look_back", "style"), + prevent_initial_call=True ) def update_style_baseline_look_back(method, strategy, style): if style is None: @@ -87,9 +94,10 @@ def update_style_baseline_look_back(method, strategy, style): # update random forest style @app.callback( - Output("container_regression_random_forest", "style"), + Output("container_regression_random_forest", "style", allow_duplicate=True), Input("dropdown_regression_model", "value"), - State("container_regression_random_forest", "style") + State("container_regression_random_forest", "style"), + prevent_initial_call=True ) def update_style_rf(method, style): if style is None: @@ -100,26 +108,10 @@ def update_style_rf(method, style): style['display'] = 'none' return style - -# update xgboost style -@app.callback( - Output("container_regression_xgboost", "style"), - Input("dropdown_regression_model", "value"), - State("container_regression_xgboost", "style") -) -def update_style_xgboost(method, style): - if style is None: - style = {} - if method == CLASSFIER[3]: - style['display'] = 'block' - else: - style['display'] = 'none' - return style - # update button styles @app.callback( - Output("button_regression_apply", "style"), - Output("button_regression_show", "style"), + Output("button_regression_apply", "style", allow_duplicate=True), + Output("button_regression_show", "style", allow_duplicate=True), Input("button_regression_show", "n_clicks"), Input("button_regression_apply", "n_clicks"), # general @@ -143,9 +135,10 @@ def update_style_xgboost(method, style): Input("slider_regression_xgboost_max_depth", "value"), Input("slider_regression_xgboost_learning_rate", "value"), # alerts - Input("alert_classification", "is_open"), + Input("alert_regression", "is_open"), State("button_regression_apply", "style"), - State("button_regression_show", "style") + State("button_regression_show", "style"), + prevent_initial_call=True ) def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, is_open_alert, style_apply, style_show): triggered_id = ctx.triggered_id @@ -154,25 +147,26 @@ def update_style_buttons(n_clicks1, n_clicks2, v1, v2, v3, v4, v5, v6, v7, v8, v if style_show is None: style_show = {} - if n_clicks1 is None or n_clicks1 == 0 or is_open_alert: + if n_clicks1 is None or n_clicks2 == 0 or is_open_alert: style_apply['display'] = 'none' style_show['display'] = 'block' - elif triggered_id is None or triggered_id == 'button_regression_show': + elif triggered_id is None or triggered_id == 'alert_regression': style_apply['display'] = 'block' style_show['display'] = 'none' else: - style_show['display'] = 'block' style_apply['display'] = 'none' + style_show['display'] = 'block' return style_apply, style_show # update summary @app.callback( - Output("analysis_regression_summary", "figure"), - Output("input_regression_model_name", "value"), + Output("analysis_regression_summary", "figure", allow_duplicate=True), + Output("input_regression_model_name", "value", allow_duplicate=True), Input("button_regression_apply", "n_clicks"), State("input_regression_model_name", "value"), State("dropdown_regression_model", "value"), State("dropdown_regression_scoring", "value"), + prevent_initial_call=True ) def update_regression_summary(n_clicks, model_name, model, scoring): if n_clicks is None or n_clicks == 0: @@ -197,9 +191,11 @@ def update_regression_summary(n_clicks, model_name, model, scoring): # apply regressor @app.callback( - Output("alert_regression", "children"), - Output("alert_regression", "is_open"), - Output("loading_regression_prediction", "children"), + Output("alert_regression", "children", allow_duplicate=True), + Output("alert_regression", "is_open", allow_duplicate=True), + Output("loading_regression_prediction", "children", allow_duplicate=True), + Output("loading_regression_feature_importance", "children", allow_duplicate=True), + Output("loading_regression_preview", "children", allow_duplicate=True), Input("button_regression_show", "n_clicks"), # general State("dropdown_regression_dataset", "value"), @@ -221,6 +217,7 @@ def update_regression_summary(n_clicks, model_name, model, scoring): State("slider_regression_xgboost_n_estimators", "value"), State("slider_regression_xgboost_max_depth", "value"), State("slider_regression_xgboost_learning_rate", "value"), + prevent_initial_call=True ) def update_current_prediction(n_clicks, dataset_name, target, train_test_split, model, ts_cross_val, scoring, baseline_strategy, baseline_constant, baseline_quantile, baseline_look_back, rf_n_estimators, rf_criterion, rf_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate): if n_clicks is None or n_clicks == 0: @@ -265,112 +262,34 @@ def update_current_prediction(n_clicks, dataset_name, target, train_test_split, ts_cross_val = True try: - y_train, y_train_pred, y_test, y_test_pred = apply_regressor_prediction(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val) + y_train, y_train_pred, y_test, y_test_pred, feature_importance = apply_regressor_prediction(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val) + scores = apply_regressor(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val, scoring=REGRESSOR_SCORING[scoring]) except ValueError as e: print(e) alert = True alert_str = str(e) - return alert_str, alert, dash.no_update + return alert_str, alert, dash.no_update, dash.no_update, dash.no_update figure = get_prediction_plot(y_train, y_train_pred, y_test, y_test_pred, title="Original Data vs Predictions") - graph = dcc.Graph(id="figure_regression_prediction", className='graph_categorical', figure=figure) - - return dash.no_update, False, graph - -# apply regressor -@app.callback( - Output("alert_regression", "children"), - Output("alert_regression", "is_open"), - Output("loading_regression_preview", "children"), - Input("button_regression_show", "n_clicks"), - # general - State("dropdown_regression_dataset", "value"), - State("dropdown_regression_target", "value"), - State("slider_regression_train_test_split", "value"), - State("dropdown_regression_model", "value"), - State("checklist_regression_time_series_crossvalidation", "value"), - State("dropdown_regression_scoring", "value"), - # baseline - State("dropdown_regression_baseline_strategy", "value"), - State("input_regression_baseline_constant", "value"), - State("input_regression_baseline_quantile", "value"), - State("slider_regression_baseline_look_back", "value"), - # random forest - State("slider_regression_random_forest_n_estimators", "value"), - State("slider_regression_random_forest_criterion", "value"), - State("slider_regression_random_forest_max_depth", "value"), - # xgboost - State("slider_regression_xgboost_n_estimators", "value"), - State("slider_regression_xgboost_max_depth", "value"), - State("slider_regression_xgboost_learning_rate", "value"), -) -def update_current_results(n_clicks, dataset_name, target, train_test_split, model, ts_cross_val, scoring, baseline_strategy, baseline_constant, baseline_quantile, baseline_look_back, rf_n_estimators, rf_criterion, rf_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate): - if n_clicks is None or n_clicks == 0: - return dash.no_update - # read out parameter - params = {} - if model == REGRESSOR[0]: # baseline - params['strategy'] = REGRESSOR_BASELINE_STRATEGY[baseline_strategy] - if baseline_strategy == list(REGRESSOR_BASELINE_STRATEGY.keys())[3]: - params['constant'] = baseline_constant - elif baseline_strategy == list(REGRESSOR_BASELINE_STRATEGY.keys())[2]: - params['quantile'] = baseline_quantile - elif baseline_strategy == list(REGRESSOR_BASELINE_STRATEGY.keys())[4]: - params['look_back'] = baseline_look_back - elif model == REGRESSOR[1]: # linear - pass - elif model == REGRESSOR[2]: # random forest - params['n_estimators'] = rf_n_estimators - params['criterion'] = REGRESSOR_RF_CRITERION[rf_criterion] - if rf_max_depth == 36: - params['max_depth'] = None - else: - params['max_depth'] = rf_max_depth - elif model == REGRESSOR[3]: # xgboost - params['n_estimators'] = xgb_n_estimators - params['learning_rate'] = xgb_learning_rate - if xgb_max_depth == 36: - params['max_depth'] = None - else: - params['max_depth'] = xgb_max_depth - - df = table_data.ALL_DATASETS[dataset_name] - - scoring = REGRESSOR_SCORING[scoring] - - # use data between defined ranges - min_index = table_data.ALL_RANGES[dataset_name][0] - max_index = table_data.ALL_RANGES[dataset_name][1] - df = df.loc[min_index:max_index].copy() - - if len(ts_cross_val) == 0: - ts_cross_val = False - else: - ts_cross_val = True - - try: - scores = apply_regressor(df, target, train_test_split, model, params, ts_cross_val=ts_cross_val, scoring=scoring) - except ValueError as e: - print(e) - alert = True - alert_str = str(e) - return alert_str, alert, dash.no_update + graph_predictions = dcc.Graph(id="figure_regression_prediction", className='graph_categorical', figure=figure) figure = get_cross_validation_plot(scores, title="Results Cross Validation Scores") - graph = dcc.Graph(id="figure_regression_result", className='graph_categorical', figure=figure) + graph_overview = dcc.Graph(id="figure_regression_result", className='graph_categorical', figure=figure) + + datatable_feature_importance = get_feature_importance_table(id_table='datatable_regression_feature_importance', data=feature_importance) global CURR_RESULT CURR_RESULT = (model, scores["Score"].mean()) - return dash.no_update, False, graph - + return dash.no_update, False, graph_predictions, datatable_feature_importance, graph_overview # update model name @app.callback( - Output("input_regression_model_name", "value"), + Output("input_regression_model_name", "value", allow_duplicate=True), Input("dropdown_regression_model", "value"), Input("dropdown_regression_scoring", "value"), + prevent_initial_call=True ) def update_update_model_name(model, scoring): @@ -383,9 +302,10 @@ def update_update_model_name(model, scoring): # update after selected dataset changes @app.callback( - Output("dropdown_regression_target", "options"), - Output("dropdown_regression_target", "value"), + Output("dropdown_regression_target", "options", allow_duplicate=True), + Output("dropdown_regression_target", "value", allow_duplicate=True), Input("dropdown_regression_dataset", "value"), + prevent_initial_call=True ) def update_after_dataset_changes(dataset_name): if dataset_name is None or dataset_name == "": @@ -400,9 +320,40 @@ def update_after_dataset_changes(dataset_name): # update after options for selected dataset changes @app.callback( - Output("button_regression_show", "disabled"), + Output("button_regression_show", "disabled", allow_duplicate=True), Input("dropdown_regression_dataset", "options"), + prevent_initial_call=True ) -def update_after_dataset_changes(options): +def update_after_dataset_options_changes(options): return len(options) < 1 + +@app.callback( + Output("img_regression_strategy", "src", allow_duplicate=True), + Output("link_regression_strategy", "href", allow_duplicate=True), + Output("tooltip_regression_strategy", "children", allow_duplicate=True), + Input("dropdown_regression_model", "value"), + Input("dropdown_regression_baseline_strategy", "value"), + prevent_initial_call=True +) +def update_info(strategy, baseline_strategy): + if strategy == REGRESSOR[0] and baseline_strategy != list(REGRESSOR_BASELINE_STRATEGY.keys())[-1]: + src = '/assets/img/link.png' + href = REGRESSOR_LINKS[0] + children = REGRESSOR_DESCRIPTION[0] + elif strategy == REGRESSOR[0]: + src = '/assets/img/tooltip.png' + href = REGRESSOR_LINKS[1] + children = REGRESSOR_DESCRIPTION[1] + elif strategy == REGRESSOR[1]: + src = '/assets/img/link.png' + href = REGRESSOR_LINKS[2] + children = REGRESSOR_DESCRIPTION[2] + elif strategy == REGRESSOR[2]: + src = '/assets/img/link.png' + href = REGRESSOR_LINKS[3] + children = REGRESSOR_DESCRIPTION[3] + else: + return dash.no_update + + return src, href, children diff --git a/callbacks/page_transformation_time_series_callbacks.py b/callbacks/page_transformation_time_series_callbacks.py index 5aeb0b1..0dbb4ec 100644 --- a/callbacks/page_transformation_time_series_callbacks.py +++ b/callbacks/page_transformation_time_series_callbacks.py @@ -25,12 +25,16 @@ # import util from methods.util import remove_item_if_exist +# import slider marks +from view.page_helper_components.sliders import get_slider_marks + # update datasets @app.callback( - Output("modal_transformation_time_series_delete_dataset", "is_open"), + Output("modal_transformation_time_series_delete_dataset", "is_open", allow_duplicate=True), Input("button_transformation_time_series_delete_dataset", "n_clicks"), Input("button_transformation_time_series_delete_dataset_no", "n_clicks"), State("modal_transformation_time_series_delete_dataset", "is_open"), + prevent_initial_call=True ) def toggle_deletion_modal(n_clicks1, n_clicks2, is_open): if n_clicks1 is None or n_clicks1 == 0: @@ -42,22 +46,24 @@ def toggle_deletion_modal(n_clicks1, n_clicks2, is_open): @app.callback( Output("button_transformation_time_series_save_dataset", "disabled"), Input("dropdown_transformation_time_series_dataset", "options"), + prevent_initial_call=True ) def disable_save_dataset(options): return not len(options) > 0 @app.callback( - Output("modal_transformation_time_series_delete_dataset", "is_open"), - Output("dropdown_transformation_time_series_dataset", "options"), - Output("dropdown_transformation_time_series_dataset", "value"), - Output("dropdown_transformation_time_series_overview_feature", "options"), - Output("dropdown_transformation_time_series_overview_feature", "value"), - Output("dropdown_transformation_time_series_features", "options"), - Output("dropdown_transformation_time_series_features", "value"), - Output("button_transformation_time_series_delete_dataset", "disabled"), + Output("modal_transformation_time_series_delete_dataset", "is_open", allow_duplicate=True), + Output("dropdown_transformation_time_series_dataset", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_dataset", "value", allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "value", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "value", allow_duplicate=True), + Output("button_transformation_time_series_delete_dataset", "disabled", allow_duplicate=True), Input("button_transformation_time_series_delete_dataset_yes", "n_clicks"), State("modal_transformation_time_series_delete_dataset", "is_open"), State("dropdown_transformation_time_series_dataset", "value"), + prevent_initial_call=True ) def delete_datset(n_clicks, is_open, dataset_name): if n_clicks is None or n_clicks == 0: @@ -86,9 +92,10 @@ def delete_datset(n_clicks, is_open, dataset_name): return not is_open, options_datasets, curr_dataset, options_features, value_overview, options_features, value_parameter, disabled @app.callback( - Output("modal_transformation_time_series_add_dataset", "is_open"), + Output("modal_transformation_time_series_add_dataset", "is_open", allow_duplicate=True), Input("button_transformation_time_series_plus_dataset", "n_clicks"), State("modal_transformation_time_series_add_dataset", "is_open"), + prevent_initial_call=True ) def toggle_modal(n_clicks, is_open): if n_clicks is None: @@ -97,8 +104,9 @@ def toggle_modal(n_clicks, is_open): return not is_open @app.callback( - Output("dropdown_transformation_time_series_dataset", "value"), + Output("dropdown_transformation_time_series_dataset", "value", allow_duplicate=True), Input("modal_transformation_time_series_add_dataset", "is_open"), + prevent_initial_call=True ) def close_modal(is_open): if is_open: @@ -110,9 +118,10 @@ def close_modal(is_open): return list(table_data.ALL_DATASETS.keys())[-1] @app.callback( - Output("button_transformation_time_series_save_dataset", "n_clicks"), + Output("button_transformation_time_series_save_dataset", "n_clicks", allow_duplicate=True), Input("button_transformation_time_series_save_dataset", "n_clicks"), State("dropdown_transformation_time_series_dataset", "value"), + prevent_initial_call=True ) def toggle_modal(n_clicks, dataset_name): if n_clicks is None or n_clicks == 0: @@ -125,32 +134,35 @@ def toggle_modal(n_clicks, dataset_name): return dash.no_update @app.callback( - Output("alert_transformation_time_series_duplicate_dataset", "is_open"), - Output("modal_transformation_time_series_add_dataset", "is_open"), - Output("dropdown_transformation_time_series_dataset", "options"), - Output("dropdown_transformation_time_series_dataset", "value"), - Output("input_transformation_time_series_new_dataset", "value"), - Output("dropdown_transformation_time_series_overview_feature", "options"), - Output("dropdown_transformation_time_series_overview_feature", "value"), - Output("dropdown_transformation_time_series_features", "options"), - Output("dropdown_transformation_time_series_features", "value"), - Output("button_transformation_time_series_delete_dataset", "disabled"), + Output("alert_transformation_time_series_duplicate_dataset", "is_open", allow_duplicate=True), + Output("modal_transformation_time_series_add_dataset", "is_open", allow_duplicate=True), + Output("dropdown_transformation_time_series_dataset", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_dataset", "value", allow_duplicate=True), + Output("input_transformation_time_series_new_dataset", "value", allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "value", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "value", allow_duplicate=True), + Output("button_transformation_time_series_delete_dataset", "disabled", allow_duplicate=True), + Output("button_outlier", "disabled", allow_duplicate=True), Input("button_transformation_time_series_add_dataset", "n_clicks"), State("modal_transformation_time_series_add_dataset", "is_open"), State("input_transformation_time_series_new_dataset", "value"), + prevent_initial_call=True ) def add_dataset(n_clicks, is_open, dataset_name): if n_clicks is None or n_clicks == 0: return dash.no_update if dataset_name in list(table_data.ALL_DATASETS.keys()): - return [True] + 9 * [dash.no_update] + return [True] + 10 * [dash.no_update] is_open = not is_open # update datasets table_data.ALL_DATASETS[dataset_name] = table_data.DF_RAW.copy(deep=True) table_data.ALL_RANGES[dataset_name] = [table_data.DF_RAW.index.min(), table_data.DF_RAW.index.max()] + table_data.ALL_MAX_RANGES[dataset_name] = [table_data.DF_RAW.index.min(), table_data.DF_RAW.index.max()] options_datasets = list(table_data.ALL_DATASETS.keys()) curr_dataset = dataset_name @@ -170,15 +182,16 @@ def add_dataset(n_clicks, is_open, dataset_name): #save_dataset_states(table_data.ALL_DATASETS, table_data.ALL_RANGES) - return False, is_open, options_datasets, dataset_name, new_dataset_name, options_features, value_overview, options_features, value_parameter, disabled + return False, is_open, options_datasets, dataset_name, new_dataset_name, options_features, value_overview, options_features, value_parameter, disabled, True @app.callback( - Output("alert_transformation_time_series_duplicate_feature_name", "is_open"), + Output("alert_transformation_time_series_duplicate_feature_name", "is_open", allow_duplicate=True), Input("button_transformation_time_series_show", "n_clicks"), State("dropdown_transformation_time_series_feature_transformation", "value"), State("input_transformation_time_series_pca_feature_name", "value"), State("input_transformation_time_series_dwt_feature_name", "value"), State("dropdown_transformation_time_series_dataset", "value"), + prevent_initial_call=True ) def toggle_feature_alert(n_clicks, method, pca_feature_name, dwt_feature_name, dataset_name): if n_clicks is None or n_clicks == 0: @@ -194,11 +207,12 @@ def toggle_feature_alert(n_clicks, method, pca_feature_name, dwt_feature_name, d return False @app.callback( - Output("alert_transformation_time_series_polyorder", "is_open"), + Output("alert_transformation_time_series_polyorder", "is_open", allow_duplicate=True), Input("button_transformation_time_series_show", "n_clicks"), State("dropdown_transformation_time_series_feature_transformation", "value"), State("slider_transformation_time_series_sgf_polyorder", "value"), State("slider_transformation_time_series_sgf_periods", "value"), + prevent_initial_call=True ) def toggle_sgf_alert(n_clicks, method, polyorder, window_size): if n_clicks is None or n_clicks == 0: @@ -211,14 +225,14 @@ def toggle_sgf_alert(n_clicks, method, polyorder, window_size): # update after feature removal @app.callback( - Output("dropdown_transformation_time_series_overview_feature", "options"), - Output("dropdown_transformation_time_series_overview_feature", "value"), - Output("dropdown_transformation_time_series_features", "options"), - Output("dropdown_transformation_time_series_features", "value"), - Output("dropdown_classification_target", "options"), - Output("dropdown_classification_target", "value"), - Output("dropdown_regression_target", "options"), - Output("dropdown_regression_target", "value"), + Output("dropdown_transformation_time_series_overview_feature", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "value", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "value", allow_duplicate=True), + Output("dropdown_classification_target", "options", allow_duplicate=True), + Output("dropdown_classification_target", "value", allow_duplicate=True), + Output("dropdown_regression_target", "options", allow_duplicate=True), + Output("dropdown_regression_target", "value", allow_duplicate=True), # input State('datatable_transformation_time_series_features', 'data_previous'), Input('datatable_transformation_time_series_features', 'data'), @@ -231,6 +245,7 @@ def toggle_sgf_alert(n_clicks, method, polyorder, window_size): State("dropdown_regression_dataset", "value"), State("dropdown_regression_target", "options"), State("dropdown_regression_target", "value"), + prevent_initial_call=True ) def delete_feature(previous, current, dataset_name, value_overview, value_parameter, value_dataset_name_classification, options_target_classification, value_target_classification, value_dataset_name_regression, options_target_regression, value_target_regression): if dataset_name is None: @@ -280,15 +295,19 @@ def delete_feature(previous, current, dataset_name, value_overview, value_parame # update after selected dataset changes @app.callback( - Output('datatable_transformation_time_series_features', 'data'), - Output('datatable_transformation_time_series_features', 'data_previous'), - Output("dropdown_transformation_time_series_overview_feature", "options"), - Output("dropdown_transformation_time_series_overview_feature", "value"), - Output("rangeslider_transformation_time_series_overview", "value"), - Output("dropdown_transformation_time_series_features", "options"), - Output("dropdown_transformation_time_series_features", "value"), - Output('checklist_transformation_time_series_all_features', 'value'), + Output('datatable_transformation_time_series_features', 'data', allow_duplicate=True), + Output('datatable_transformation_time_series_features', 'data_previous', allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "value", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "value", allow_duplicate=True), + Output('checklist_transformation_time_series_all_features', 'value', allow_duplicate=True), + Output("rangeslider_transformation_time_series_overview", "value", allow_duplicate=True), + Output("rangeslider_transformation_time_series_overview", "min", allow_duplicate=True), + Output("rangeslider_transformation_time_series_overview", "max", allow_duplicate=True), + Output("rangeslider_transformation_time_series_overview", "marks", allow_duplicate=True), Input("dropdown_transformation_time_series_dataset", "value"), + prevent_initial_call=True ) def update_after_dataset_changes(dataset_name): if dataset_name is None or dataset_name == '': @@ -303,19 +322,27 @@ def update_after_dataset_changes(dataset_name): df = table_data.ALL_DATASETS[dataset_name] options_features = list(df.columns) value_overview = options_features[:4] - range_values = table_data.ALL_RANGES[dataset_name] + + #range_values = table_data.ALL_RANGES[dataset_name] + + # update rangeslider + range_values = table_data.ALL_RANGES[dataset_name] + range_min, range_max = table_data.ALL_MAX_RANGES[dataset_name] + + marks = get_slider_marks((range_min, range_max)) # update parameter checklist_value = [] value_parameter = options_features[:3] - return data, None, options_features, value_overview, range_values, options_features, value_parameter, checklist_value + return data, None, options_features, value_overview, options_features, value_parameter, checklist_value, range_values, range_min, range_max, marks # update selected features @app.callback( - Output('dropdown_transformation_time_series_features', 'value'), + Output('dropdown_transformation_time_series_features', 'value', allow_duplicate=True), Input("checklist_transformation_time_series_all_features", "value"), State('dropdown_transformation_time_series_features', 'options'), + prevent_initial_call=True ) def update_selected_features(all_features, options): if all_features is None or all_features == []: @@ -323,17 +350,77 @@ def update_selected_features(all_features, options): return options +# update selected features when plot changes +@app.callback( + Output('dropdown_transformation_time_series_overview_feature', 'value', allow_duplicate=True), + Input("dropdown_transformation_time_series_plots", "value"), + Input('dropdown_transformation_time_series_overview_feature', 'value'), + prevent_initial_call=True +) +def update_selected_features_after_plot_changes(plot, cols): + if cols is None or cols == []: + return dash.no_update + + if plot == PLOTS[1]: + # histogram only needs one feature + cols = [cols[0]] + elif plot == PLOTS[3]: + if len(cols) > 2: + cols = cols[:2] + else: + return dash.no_update + + return cols + +# update after selected dataset changes +@app.callback( + Output("dropdown_classification_dataset", "options", allow_duplicate=True), + Output("dropdown_classification_dataset", "value", allow_duplicate=True), + Output("dropdown_regression_dataset", "options", allow_duplicate=True), + Output("dropdown_regression_dataset", "value", allow_duplicate=True), + Input("dropdown_transformation_time_series_dataset", "options"), + State("dropdown_classification_dataset", "value"), + State("dropdown_regression_dataset", "value"), + prevent_initial_call=True +) +def update_after_dataset_changes(datasets, dataset_name_classification, dataset_name_regression): + if datasets is None or datasets == []: + return dash.no_update + + + options = list(table_data.ALL_DATASETS.keys()) + if dataset_name_classification not in options: + dataset_name_classification = options[0] + + if dataset_name_regression not in options: + dataset_name_regression = options[0] + + df = table_data.ALL_DATASETS[dataset_name_classification] + + # update rangeslider + #min_range = df.index.min() + #max_range = df.index.max() + #value_range = table_data.ALL_RANGES[dataset_name_classification] + #range_min, range_max = table_data.ALL_MAX_RANGES[dataset_name] + + #marks = get_slider_marks((min_range, max_range)) + + return options, dataset_name_classification, options, dataset_name_regression + # update overview plot @app.callback( - Output("figure_transformation_time_series_overview", "figure"), + Output("figure_transformation_time_series_overview", "figure", allow_duplicate=True), Input("dropdown_transformation_time_series_overview_feature", "options"), Input("dropdown_transformation_time_series_overview_feature", "value"), Input("dropdown_transformation_time_series_plots", "value"), State("dropdown_transformation_time_series_dataset", "value"), Input("rangeslider_transformation_time_series_overview", "value"), + Input("rangeslider_transformation_time_series_overview", "min"), + Input("rangeslider_transformation_time_series_overview", "max"), + prevent_initial_call=True ) -def update_overview_plot(options, cols, plot, dataset_name, values_range): - if cols is None or cols == "": +def update_overview_plot(options, cols, plot, dataset_name, values_range, range_min, range_max): + if cols is None or cols == "" or cols == []: return dash.no_update if dataset_name is None or dataset_name == '': return dash.no_update @@ -354,7 +441,7 @@ def update_overview_plot(options, cols, plot, dataset_name, values_range): df = compute_plot(df, None, cols, value_min, value_max, reset_index=True) figure = get_overview_histogram_plot(df, cols) elif plot == PLOTS[2]: - corr = analyse_correlation(df[cols]) + corr = analyse_correlation(df, cols) figure = get_overview_heatmap(corr) elif plot == PLOTS[3]: if len(cols) >= 2: @@ -370,16 +457,20 @@ def update_overview_plot(options, cols, plot, dataset_name, values_range): figure = get_violin_plot(df, cols, max_index=None) # save range - table_data.ALL_RANGES[dataset_name] = values_range + triggered_id = ctx.triggered_id + if triggered_id == "rangeslider_transformation_time_series_overview": + table_data.ALL_RANGES[dataset_name] = values_range + table_data.ALL_MAX_RANGES[dataset_name] = (range_min, range_max) #save_dataset_states(table_data.ALL_DATASETS, table_data.ALL_RANGES) return figure # update parameter @app.callback( - Output("container_transformation_time_series_pca", "style"), + Output("container_transformation_time_series_pca", "style", allow_duplicate=True), Input("dropdown_transformation_time_series_feature_transformation", "value"), - State("container_transformation_time_series_pca", "style") + State("container_transformation_time_series_pca", "style"), + prevent_initial_call=True ) def update_style_pca(method, style): if style is None: @@ -391,9 +482,10 @@ def update_style_pca(method, style): return style @app.callback( - Output("slider_transformation_time_series_pca_n_components", "max"), - Output("input_transformation_time_series_pca_feature_name", "value"), + Output("slider_transformation_time_series_pca_n_components", "max", allow_duplicate=True), + Output("input_transformation_time_series_pca_feature_name", "value", allow_duplicate=True), Input("dropdown_transformation_time_series_features", "value"), + prevent_initial_call=True ) def update_style_pca_components(value): if value is None or value == "": @@ -408,8 +500,9 @@ def update_style_pca_components(value): return max_n_components, feature_name @app.callback( - Output("input_transformation_time_series_dwt_feature_name", "value"), + Output("input_transformation_time_series_dwt_feature_name", "value", allow_duplicate=True), Input("dropdown_transformation_time_series_features", "value"), + prevent_initial_call=True ) def update_style_pca_components(value): if value is None or value == "": @@ -420,8 +513,9 @@ def update_style_pca_components(value): return feature_name @app.callback( - Output("slider_transformation_time_series_dwt_vanishing_moments", "min"), - Input("dropdown_transformation_time_series_dwt_wavelet", "value") + Output("slider_transformation_time_series_dwt_vanishing_moments", "min", allow_duplicate=True), + Input("dropdown_transformation_time_series_dwt_wavelet", "value"), + prevent_initial_call=True ) def update_style_dwt_n(wavelet): if wavelet == list(WAVELETS.keys())[1]: @@ -432,9 +526,10 @@ def update_style_dwt_n(wavelet): return min_n @app.callback( - Output("container_transformation_time_series_dwt", "style"), + Output("container_transformation_time_series_dwt", "style", allow_duplicate=True), Input("dropdown_transformation_time_series_feature_transformation", "value"), - State("container_transformation_time_series_dwt", "style") + State("container_transformation_time_series_dwt", "style"), + prevent_initial_call=True ) def update_style_dwt(method, style): if style is None: @@ -447,12 +542,13 @@ def update_style_dwt(method, style): return style @app.callback( - Output("container_transformation_time_series_dwt_vanishing_moments", "style"), + Output("container_transformation_time_series_dwt_vanishing_moments", "style", allow_duplicate=True), Input("dropdown_transformation_time_series_feature_transformation", "value"), Input("dropdown_transformation_time_series_dwt_wavelet", "value"), - State("container_transformation_time_series_dwt_vanishing_moments", "style") + State("container_transformation_time_series_dwt_vanishing_moments", "style"), + prevent_initial_call=True ) -def update_style_dwt(method, wavelet, style): +def update_style_dwt_parameter(method, wavelet, style): if style is None: style = {} if method == TRANSFORMATIONS_TS[3] and wavelet != list(WAVELETS.keys())[3]: @@ -462,9 +558,10 @@ def update_style_dwt(method, wavelet, style): return style @app.callback( - Output("container_transformation_time_series_shift", "style"), + Output("container_transformation_time_series_shift", "style", allow_duplicate=True), Input("dropdown_transformation_time_series_feature_transformation", "value"), - State("container_transformation_time_series_shift", "style") + State("container_transformation_time_series_shift", "style"), + prevent_initial_call=True ) def update_style_shifting(method, style): if style is None: @@ -477,9 +574,10 @@ def update_style_shifting(method, style): return style @app.callback( - Output("container_transformation_time_series_sw", "style"), + Output("container_transformation_time_series_sw", "style", allow_duplicate=True), Input("dropdown_transformation_time_series_feature_transformation", "value"), - State("container_transformation_time_series_sw", "style") + State("container_transformation_time_series_sw", "style"), + prevent_initial_call=True ) def update_style_sliding_window(method, style): if style is None: @@ -492,9 +590,10 @@ def update_style_sliding_window(method, style): return style @app.callback( - Output("container_transformation_time_series_diff", "style"), + Output("container_transformation_time_series_diff", "style", allow_duplicate=True), Input("dropdown_transformation_time_series_feature_transformation", "value"), - State("container_transformation_time_series_diff", "style") + State("container_transformation_time_series_diff", "style"), + prevent_initial_call=True ) def update_style_differencing(method, style): if style is None: @@ -507,9 +606,10 @@ def update_style_differencing(method, style): return style @app.callback( - Output("container_transformation_time_series_sgf", "style"), + Output("container_transformation_time_series_sgf", "style", allow_duplicate=True), Input("dropdown_transformation_time_series_feature_transformation", "value"), - State("container_transformation_time_series_sgf", "style") + State("container_transformation_time_series_sgf", "style"), + prevent_initial_call=True ) def update_style_differencing(method, style): if style is None: @@ -523,9 +623,10 @@ def update_style_differencing(method, style): # update parameter @app.callback( - Output("slider_transformation_time_series_sgf_periods", "value"), + Output("slider_transformation_time_series_sgf_periods", "value", allow_duplicate=True), Input("slider_transformation_time_series_sgf_polyorder", "value"), State("slider_transformation_time_series_sgf_periods", "value"), + prevent_initial_call=True ) def update_sgf_periods(polyorder, periods): if polyorder is None or periods is None: @@ -542,9 +643,10 @@ def update_sgf_periods(polyorder, periods): # update parameter @app.callback( - Output("slider_transformation_time_series_sgf_polyorder", "value"), + Output("slider_transformation_time_series_sgf_polyorder", "value", allow_duplicate=True), State("slider_transformation_time_series_sgf_polyorder", "value"), Input("slider_transformation_time_series_sgf_periods", "value"), + prevent_initial_call=True ) def update_sgf_polyorder(polyorder, periods): if polyorder is None or periods is None: @@ -556,45 +658,11 @@ def update_sgf_polyorder(polyorder, periods): else: return periods - 1 -# update after selected dataset changes -@app.callback( - Output("dropdown_classification_dataset", "options"), - Output("dropdown_classification_dataset", "value"), - Output("dropdown_regression_dataset", "options"), - Output("dropdown_regression_dataset", "value"), - #Output("rangeslider_transformation_time_series_overview", "min"), - #Output("rangeslider_transformation_time_series_overview", "max"), - #Output("rangeslider_transformation_time_series_overview", "value"), - Input("dropdown_transformation_time_series_dataset", "options"), - State("dropdown_classification_dataset", "value"), - State("dropdown_regression_dataset", "value") -) -def update_after_dataset_changes(datasets, dataset_name_classification, dataset_name_regression): - if datasets is None or datasets == []: - return dash.no_update - - - options = list(table_data.ALL_DATASETS.keys()) - if dataset_name_classification not in options: - dataset_name_classification = options[0] - - if dataset_name_regression not in options: - dataset_name_regression = options[0] - - df = table_data.ALL_DATASETS[dataset_name_classification] - - # update rangeslider - min_range = df.index.min() - max_range = df.index.max() - value_range = table_data.ALL_RANGES[dataset_name_classification] - - - return options, dataset_name_classification, options, dataset_name_regression#, min_range, max_range, value_range # update button styles @app.callback( - Output("button_transformation_time_series_apply", "style"), - Output("button_transformation_time_series_show", "style"), + Output("button_transformation_time_series_apply", "style", allow_duplicate=True), + Output("button_transformation_time_series_show", "style", allow_duplicate=True), Input("button_transformation_time_series_show", "n_clicks"), Input("dropdown_transformation_time_series_dataset", "value"), Input("dropdown_transformation_time_series_features", "value"), @@ -616,7 +684,8 @@ def update_after_dataset_changes(datasets, dataset_name_classification, dataset_ Input('slider_transformation_time_series_sgf_polyorder', 'value'), Input('slider_transformation_time_series_sgf_periods', 'value'), State("button_transformation_time_series_apply", "style"), - State("button_transformation_time_series_show", "style") + State("button_transformation_time_series_show", "style"), + prevent_initial_call=True ) def update_style_buttons(n_clicks, dataset_name, features, method, v4, pca_feature_name, dwt_feature_name, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, sgf_poly_order, sgf_periods, style_apply, style_show): if style_apply is None: @@ -659,7 +728,7 @@ def update_style_buttons(n_clicks, dataset_name, features, method, v4, pca_featu # update line plot @app.callback( # update preview plot - Output("loading_transformation_time_series_preview", "children"), + Output("loading_transformation_time_series_preview", "children", allow_duplicate=True), # inputs Input("button_transformation_time_series_show", "n_clicks"), State("dropdown_transformation_time_series_dataset", "value"), @@ -679,6 +748,7 @@ def update_style_buttons(n_clicks, dataset_name, features, method, v4, pca_featu State('slider_transformation_time_series_diff_periods', 'value'), State('slider_transformation_time_series_sgf_polyorder', 'value'), State('slider_transformation_time_series_sgf_periods', 'value'), + prevent_initial_call=True ) def update_preview_plot(n_clicks, dataset_name, cols, method, pca_n_components, pca_feature_name, dwt_wavelet, dwt_mode, dwt_level, dwt_vanishing_moments, dwt_feature_name, shift_steps, shift_multi, sw_operations, sw_periods, diff_periods, sgf_poly_order, sgf_periods): if n_clicks is None or n_clicks == 0: @@ -730,14 +800,14 @@ def update_preview_plot(n_clicks, dataset_name, cols, method, pca_n_components, # update df and overview @app.callback( # update preview plot - Output('datatable_transformation_time_series_features', 'data'), - Output('datatable_transformation_time_series_features', 'data_previous'), + Output('datatable_transformation_time_series_features', 'data', allow_duplicate=True), + Output('datatable_transformation_time_series_features', 'data_previous', allow_duplicate=True), # update parameter - Output("dropdown_transformation_time_series_features", "options"), - Output("dropdown_transformation_time_series_features", "value"), + Output("dropdown_transformation_time_series_features", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_features", "value", allow_duplicate=True), # update overview - Output("dropdown_transformation_time_series_overview_feature", "options"), - Output("dropdown_transformation_time_series_overview_feature", "value"), + Output("dropdown_transformation_time_series_overview_feature", "options", allow_duplicate=True), + Output("dropdown_transformation_time_series_overview_feature", "value", allow_duplicate=True), # inputs Input("button_transformation_time_series_apply", "n_clicks"), State("dropdown_transformation_time_series_dataset", "value"), @@ -758,6 +828,7 @@ def update_preview_plot(n_clicks, dataset_name, cols, method, pca_n_components, State('slider_transformation_time_series_sgf_polyorder', 'value'), State('slider_transformation_time_series_sgf_periods', 'value'), State("dropdown_transformation_time_series_overview_feature", "value"), + prevent_initial_call=True ) def update_overview(n_clicks, dataset_name, cols, method, pca_n_components, pca_feature_name, dwt_wavelet, dwt_mode, dwt_level, dwt_vanishing_moments, dwt_feature_name, shift_steps, shift_multi, sw_operations, sw_periods, diff_periods, sgf_poly_order, sgf_periods, value_overview): if n_clicks is None or n_clicks == 0: @@ -809,3 +880,55 @@ def update_overview(n_clicks, dataset_name, cols, method, pca_n_components, pca_ return data, None, options_features, value_parameter, options_features, value_overview +@app.callback( + Output("img_time_series_strategy", "src", allow_duplicate=True), + Output("link_time_series_strategy", "href", allow_duplicate=True), + Output("tooltip_time_series_strategy", "children", allow_duplicate=True), + Input("dropdown_transformation_time_series_feature_transformation", "value"), + prevent_initial_call=True +) +def update_info(strategy): + if strategy == TRANSFORMATIONS_TS[0]: + src = '/assets/img/tooltip.png' + href = TRANSFORMATIONS_LINKS[0] + children = TRANSFORMATIONS_DESCRIPTIONS[0] + elif strategy == TRANSFORMATIONS_TS[1]: + src = '/assets/img/tooltip.png' + href = TRANSFORMATIONS_LINKS[1] + children = TRANSFORMATIONS_DESCRIPTIONS[1] + elif strategy == TRANSFORMATIONS_TS[2]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[2] + children = TRANSFORMATIONS_DESCRIPTIONS[2] + elif strategy == TRANSFORMATIONS_TS[3]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[3] + children = TRANSFORMATIONS_DESCRIPTIONS[3] + elif strategy == TRANSFORMATIONS_TS[4]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[4] + children = TRANSFORMATIONS_DESCRIPTIONS[4] + elif strategy == TRANSFORMATIONS_TS[5]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[5] + children = TRANSFORMATIONS_DESCRIPTIONS[5] + elif strategy == TRANSFORMATIONS_TS[6]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[6] + children = TRANSFORMATIONS_DESCRIPTIONS[6] + elif strategy == TRANSFORMATIONS_TS[7]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[7] + children = TRANSFORMATIONS_DESCRIPTIONS[7] + elif strategy == TRANSFORMATIONS_TS[8]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[8] + children = TRANSFORMATIONS_DESCRIPTIONS[8] + elif strategy == TRANSFORMATIONS_TS[9]: + src = '/assets/img/link.png' + href = TRANSFORMATIONS_LINKS[9] + children = TRANSFORMATIONS_DESCRIPTIONS[9] + else: + return dash.no_update + + return src, href, children diff --git a/callbacks/sidebar_callbacks.py b/callbacks/sidebar_callbacks.py index 49d1d9f..d308c55 100644 --- a/callbacks/sidebar_callbacks.py +++ b/callbacks/sidebar_callbacks.py @@ -22,82 +22,154 @@ def set_navitem_class(is_open): for i in [1, 2, 3]: app.callback( - Output(f"submenu-{i}-collapse", "is_open"), + Output(f"submenu-{i}-collapse", "is_open", allow_duplicate=True), [Input(f"submenu-{i}", "n_clicks")], [State(f"submenu-{i}-collapse", "is_open")], + prevent_initial_call=True )(toggle_collapse) app.callback( - Output(f"submenu-{i}", "className"), + Output(f"submenu-{i}", "className", allow_duplicate=True), [Input(f"submenu-{i}-collapse", "is_open")], + prevent_initial_call=True )(set_navitem_class) @app.callback( - #Output("page-content", "children"), - [ - Output("data_loading_container", "style"), - Output("data_overview_container", "style"), - Output("data_categorical_container", "style"), - Output("data_na_value_container", "style"), - Output("data_outlier_detection_container", "style"), -# Output("data_transformation_table_data_container", "style"), - Output("data_transformation_time_series_container", "style"), -# Output("data-visualization-container", "style"), - Output("data_supervised_classification_container", "style"), - Output("data_supervised_regression_container", "style"), -# Output("data-unsupervised-learning-container", "style"), - ], - [Input("url", "pathname")], + Output("data_loading_container", "style", allow_duplicate=True), + Output("data_overview_container", "style", allow_duplicate=True), + Output("data_categorical_container", "style", allow_duplicate=True), + Output("data_na_value_container", "style", allow_duplicate=True), + Output("data_outlier_detection_container", "style", allow_duplicate=True), + Output("data_transformation_time_series_container", "style", allow_duplicate=True), + Output("data_supervised_classification_container", "style", allow_duplicate=True), + Output("data_supervised_regression_container", "style", allow_duplicate=True), + Output("button_loading", "style", allow_duplicate=True), + Output("button_overview", "style", allow_duplicate=True), + Output("button_categorical", "style", allow_duplicate=True), + Output("button_na_values", "style", allow_duplicate=True), + Output("button_outlier", "style", allow_duplicate=True), + Output("button_ts", "style", allow_duplicate=True), + Output("button_sc", "style", allow_duplicate=True), + Output("button_sr", "style", allow_duplicate=True), + Input("url", "pathname"), + State("button_loading", "style"), + State("button_overview", "style"), + State("button_categorical", "style"), + State("button_na_values", "style"), + State("button_outlier", "style"), + State("button_ts", "style"), + State("button_sc", "style"), + State("button_sr", "style"), + prevent_initial_call=True ) -def render_page_content(pathname): - print(pathname) +def render_page_content(pathname, style_loading, style_overview, style_categorical, style_na, style_outlier, style_transformation, style_sc, style_sr): on = {"display": "block"} off = {"display": "none"} if pathname in ["/", "/page-1/0"]: - return on, off, off, off, off, off, off, off - elif pathname in ["/", "/page-1/1"]: - return off, on, off, off, off, off, off, off - elif pathname in ["/", "/page-1/2"]: # categorical feature - return off, off, on, off, off, off, off, off + style_loading['background-color'] = 'navy' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'royalblue' + return on, off, off, off, off, off, off, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr + elif pathname in "/page-1/1": + style_loading['background-color'] = 'royalblue' + style_overview['background-color'] = 'navy' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'royalblue' + return off, on, off, off, off, off, off, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr + elif pathname == "/page-1/2": # categorical feature + style_loading['background-color'] = 'royalblue' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'navy' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'royalblue' + return off, off, on, off, off, off, off, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr elif pathname == "/page-1/3": # na value - return off, off, off, on, off, off, off, off + style_loading['background-color'] = 'royalblue' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'navy' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'royalblue' + return off, off, off, on, off, off, off, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr elif pathname == "/page-1/4": # outlier - return off, off, off, off, on, off, off, off -# elif pathname == "/page-2/1": # table data -# return off, off, off, off, off, on, off, off + style_loading['background-color'] = 'royalblue' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'navy' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'royalblue' + return off, off, off, off, on, off, off, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr elif pathname == "/page-2/2": # ts - return off, off, off, off, off, on, off, off - -# elif pathname == "/page-2/3": -# return html.P("Oh cool, this is page 2.3!") -# elif pathname == "/page-2/4": -# return html.P("No way! This is page 2.4!") -# elif pathname == "/page-2/5": -# return html.P("No way! This is page 2.5!") + style_loading['background-color'] = 'royalblue' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'navy' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'royalblue' + return off, off, off, off, off, on, off, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr elif pathname == "/page-3/1": # data supervised classification training - return off, off, off, off, off, off, on, off + style_loading['background-color'] = 'royalblue' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'navy' + style_sr['background-color'] = 'royalblue' + return off, off, off, off, off, off, on, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr elif pathname == "/page-3/2": # data supervised classification training - return off, off, off, off, off, off, off, on -# elif pathname == "/page-3/2": # data unsupervised training -# return off, off, off, off, off, off - elif pathname in ["/regression","/classification"]: - return dash.no_update + style_loading['background-color'] = 'royalblue' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'navy' + return off, off, off, off, off, off, off, on, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr else: - return on, off, off, off, off, off, off - + style_loading['background-color'] = 'navy' + style_overview['background-color'] = 'royalblue' + style_categorical['background-color'] = 'royalblue' + style_na['background-color'] = 'royalblue' + style_outlier['background-color'] = 'royalblue' + style_transformation['background-color'] = 'royalblue' + style_sc['background-color'] = 'royalblue' + style_sr['background-color'] = 'royalblue' + return on, off, off, off, off, off, off, style_loading, style_overview, style_categorical, style_na, style_outlier,style_transformation, style_sc,style_sr -@app.callback(Output("button_sc", 'disabled'), - Output("button_sr", 'disabled'), - Input('dropdown_transformation_time_series_dataset', 'options'), +@app.callback( + Output("button_sc", 'disabled', allow_duplicate=True), + Output("button_sr", 'disabled', allow_duplicate=True), + Input('dropdown_transformation_time_series_dataset', 'options'), + prevent_initial_call=True ) def load_data(options): if len(options) < 1: disabled = True else: disabled = False - return disabled, disabled + return disabled, disabled \ No newline at end of file diff --git a/data/table_data.py b/data/table_data.py index e07987c..7189afd 100644 --- a/data/table_data.py +++ b/data/table_data.py @@ -2,10 +2,6 @@ import numpy as np from pandas.api.types import is_string_dtype -import sqlite3 -import mysql.connector -import psycopg2 -import pickle # the load data and set first column to index with name import os @@ -17,11 +13,7 @@ SEP = ',' index = 'auto' -if not check_existence(IN_PROCESSING_DATASETNAME): - DF_RAW = None -else: - index = None - DF_RAW = load_dataset(IN_PROCESSING_DATASETNAME, SEP, index=index) +DF_RAW = None def find_problematic_columns(dataframe): cols = [] @@ -31,6 +23,6 @@ def find_problematic_columns(dataframe): return cols -ALL_RANGES, ALL_DATASETS = load_dataset_states() +ALL_RANGES, ALL_MAX_RANGES, ALL_DATASETS = load_dataset_states() ALL_RESULTS_CLASSIFICATION = [] diff --git a/index.ipynb b/index.ipynb deleted file mode 100644 index 2fcd1ef..0000000 --- a/index.ipynb +++ /dev/null @@ -1,102 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "697572a0", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import dash\n", - "import dash_bootstrap_components as dbc\n", - "from dash import Input, Output, State, dcc, html\n", - "\n", - "# Connect to main app.py file\n", - "from view.app import app\n", - "\n", - "# Connect the navbar to the index\n", - "from view import sidebar\n", - "\n", - "# import callbacks\n", - "from callbacks import sidebar_callbacks\n", - "\n", - "# import pages\n", - "from view import page_data_loading, page_overview, page_categorical_feature, page_na_value, page_outlier_detection, page_transformation_table_data, page_transformation_time_series, page_supervised_classification, page_supervised_regression\n", - "\n", - "# the styles for the main content position it to the right of the sidebar and\n", - "# add some padding.\n", - "CONTENT_STYLE = {\n", - " \"margin-left\": \"32rem\",\n", - " \"margin-right\": \"2rem\",\n", - " \"padding\": \"2rem 1rem\",\n", - "}\n", - "\n", - "\n", - "sidebar_ = sidebar.sidebar()\n", - "\n", - "\n", - "content = html.Div(\n", - " [\n", - " page_data_loading.layout,\n", - " page_overview.layout,\n", - " page_categorical_feature.layout,\n", - " page_na_value.layout,\n", - " page_outlier_detection.layout,\n", - " page_transformation_time_series.layout,\n", - " page_supervised_classification.layout,\n", - " page_supervised_regression.layout,\n", - "\n", - " ],\n", - " id=\"page_content\", style=CONTENT_STYLE\n", - ")\n", - "\n", - "app.layout = html.Div([dcc.Location(id=\"url\"), sidebar_, content])\n", - "\n", - "\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " app.run_server()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9afd5660", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e7a64b73", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dfenv", - "language": "python", - "name": "dfenv" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/index.py b/index.py deleted file mode 100644 index b02dce7..0000000 --- a/index.py +++ /dev/null @@ -1,50 +0,0 @@ -import dash -import dash_bootstrap_components as dbc -from dash import Input, Output, State, dcc, html - -# Connect to main app.py file -from view.app import app - -# Connect the navbar to the index -from view import sidebar - -# import callbacks -from callbacks import sidebar_callbacks - -# import pages -from view import page_data_loading, page_overview, page_categorical_feature, page_na_value, page_outlier_detection, page_transformation_table_data, page_transformation_time_series, page_supervised_classification, page_supervised_regression - -# the styles for the main content position it to the right of the sidebar and -# add some padding. -CONTENT_STYLE = { - "margin-left": "32rem", - "margin-right": "2rem", - "padding": "2rem 1rem", -} - - -sidebar_ = sidebar.sidebar() - - -content = html.Div( - [ - page_data_loading.layout, - page_overview.layout, - page_categorical_feature.layout, - page_na_value.layout, - page_outlier_detection.layout, - page_transformation_time_series.layout, - page_supervised_classification.layout, - page_supervised_regression.layout, - - ], - id="page_content", style=CONTENT_STYLE -) - -app.layout = html.Div([dcc.Location(id="url"), sidebar_, content]) - - - - -if __name__ == "__main__": - app.run_server() diff --git a/methods/data_analysis/supervised_learning.py b/methods/data_analysis/supervised_learning.py index a851c8b..66286b5 100644 --- a/methods/data_analysis/supervised_learning.py +++ b/methods/data_analysis/supervised_learning.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from xgboost import XGBClassifier, XGBRegressor +#from xgboost import XGBClassifier, XGBRegressor from sklearn.model_selection import train_test_split from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor @@ -14,8 +14,8 @@ -CLASSFIER = ["Baseline", "KNN", "Random Forest", "Gradient Boosting"] -REGRESSOR = ["Baseline", "Linear", "Random Forest", "Gradient Boosting"] +CLASSFIER = ["Baseline", "KNN", "Random Forest"] +REGRESSOR = ["Baseline", "Linear", "Random Forest"] CLASSIFIER_BASELINE_STRATEGY = {"Prior": "prior", "Most Frequent": "most_frequent", "Stratified": "stratified", "Constant": "constant", "Look Back": "look_back"} REGRESSOR_BASELINE_STRATEGY = {"Mean": "mean", "Median": "median", "Quantile": "quantile", "Constant": "constant", "Look Back": "look_back"} @@ -24,10 +24,19 @@ CLASSIFIER_KNN_WEIGHTS = {'Uniform': 'uniform', 'Distance': 'distance'} CLASSIFIER_RF_CRITERION = {"Gini": "gini", "Entropy": "entropy", "Log Loss": "log_loss"} -REGRESSOR_RF_CRITERION = {"Squared Error": "squared_error", "Absolute error": "absolute_error", "Friedman MSE": "friedman_mse", "Poisson": "poisson"} +REGRESSOR_RF_CRITERION = {"MSE": "mse", "Friedman MSE": "friedman_mse", "Poisson": "poisson"} CLASSIFIER_SCORING = {"Accuracy": "accuracy", "Accuracy Balanced": "balanced_accuracy", "F1 (Binary)": "f1", "F1 Micro": "f1_micro", "F1 Macro": "f1_macro", "F1 Weighted": "f1_weighted", "Precision (Binary)": "precision", "Precision Micro": "precision_micro", "Precision Macro": "precision_macro", "Precision Weighted": "precision_weighted", "Recall (Binary)": "recall", "Recall Micro": "recall_micro", "Recall Macro": "recall_macro", "Recall Weighted": "recall_weighted", "MAE": "neg_mean_absolute_error", "MSE": "neg_mean_squared_error" , "RMSE": "rmse"} -REGRESSOR_SCORING ={"MAE": "neg_mean_absolute_error", "MSE": "neg_mean_squared_error" , "RMSE": "rmse", "R2": "r2", "Explained Variance": "explained_variance"} +REGRESSOR_SCORING ={"MAE": "neg_mean_absolute_error", "MSE": "neg_mean_squared_error" , "R2": "r2", "Explained Variance": "explained_variance"} + +CLASSIFIER_LINKS = ['https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyClassifier.html', '', 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html'] +CLASSIFIER_DESCRIPTION = ['Read more', 'This method allows to use the mean of the previous values as baseline.', 'Read more', 'Read more'] + +REGRESSOR_LINKS = ['https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html', '', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html'] +REGRESSOR_DESCRIPTION = ['Read more', 'This method allows to use the mean of the previous values as baseline.', 'Read more', 'Read more'] + +TS_CROSS_VALIDATION_LINKS = ['https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html'] +TS_CROSS_VALIDATION_DESCRIPTION = ['Read more'] def apply_classifier_prediction(df, target, train_size, model, params, ts_cross_val=False): # Extract the target values @@ -54,16 +63,37 @@ def apply_classifier_prediction(df, target, train_size, model, params, ts_cross_ clf = KNeighborsClassifier(**params) elif model == CLASSFIER[2]: clf = RandomForestClassifier(**params) - elif model == CLASSFIER[3]: - clf = XGBClassifier(**params) + #elif model == CLASSFIER[3]: + # clf = XGBClassifier(**params) clf.fit(X_train, y_train) y_train_pred = clf.predict(X_train) y_test_pred = clf.predict(X_test) + + # extract feature importance + feature_importance = [] + feature_names = list(df.columns) + + if isinstance(clf, RandomForestClassifier): + if hasattr(clf, "feature_importances_"): + for feature_name, importance_score in zip(feature_names, clf.feature_importances_): + dict_importance = {} + dict_importance['Feature'] = feature_name + dict_importance['Importance'] = importance_score + feature_importance.append(dict_importance) + elif model == CLASSFIER[0]: + for feature_name in feature_names: + dict_importance = {} + dict_importance['Feature'] = feature_name + if target == feature_name: + dict_importance['Importance'] = 1.0 + else: + dict_importance['Importance'] = 0.0 + feature_importance.append(dict_importance) - return y_train, y_train_pred, y_test, y_test_pred + return y_train, y_train_pred, y_test, y_test_pred, feature_importance def apply_classifier(df, target, train_size, model, params, ts_cross_val=False, scoring='f1_macro'): # Extract the target values @@ -101,8 +131,8 @@ def apply_classifier(df, target, train_size, model, params, ts_cross_val=False, clf = KNeighborsClassifier(**params) elif model == CLASSFIER[2]: clf = RandomForestClassifier(**params) - elif model == CLASSFIER[3]: - clf = XGBClassifier(**params) + #elif model == CLASSFIER[3]: + # clf = XGBClassifier(**params) # evalutate classifier if ts_cross_val: @@ -143,16 +173,44 @@ def apply_regressor_prediction(df, target, train_size, model, params, ts_cross_v reg = LinearRegression(**params) elif model == REGRESSOR[2]: reg = RandomForestRegressor(**params) - elif model == REGRESSOR[3]: - reg = XGBRegressor(**params) + #elif model == REGRESSOR[3]: + # reg = XGBRegressor(**params) reg.fit(X_train, y_train) y_train_pred = reg.predict(X_train) y_test_pred = reg.predict(X_test) + + # extract feature importance + feature_importance = [] + feature_names = list(df.columns) + + if isinstance(reg, RandomForestRegressor): + if hasattr(reg, "feature_importances_"): + for feature_name, importance_score in zip(feature_names, reg.feature_importances_): + dict_importance = {} + dict_importance['Feature'] = feature_name + dict_importance['Importance'] = importance_score + feature_importance.append(dict_importance) + elif isinstance(reg, LinearRegression): + if hasattr(reg, "coef_"): + for feature_name, coef in zip(feature_names, reg.coef_): + dict_importance = {} + dict_importance['Feature'] = feature_name + dict_importance['Importance'] = abs(coef) + feature_importance.append(dict_importance) + elif model == REGRESSOR[0]: + for feature_name in feature_names: + dict_importance = {} + dict_importance['Feature'] = feature_name + if target == feature_name: + dict_importance['Importance'] = 1.0 + else: + dict_importance['Importance'] = 0.0 + feature_importance.append(dict_importance) - return y_train, y_train_pred, y_test, y_test_pred + return y_train, y_train_pred, y_test, y_test_pred, feature_importance def apply_regressor(df, target, train_size, model, params, ts_cross_val=False, scoring='neg_mean_squared_error'): @@ -191,8 +249,8 @@ def apply_regressor(df, target, train_size, model, params, ts_cross_val=False, s reg = LinearRegression(**params) elif model == REGRESSOR[2]: reg = RandomForestRegressor(**params) - elif model == REGRESSOR[3]: - reg = XGBRegressor(**params) + #elif model == REGRESSOR[3]: + # reg = XGBRegressor(**params) # evalutate classifier if ts_cross_val: @@ -223,6 +281,12 @@ def cross_val(X, y, cv, model, scoring): scores.append(fold_score) + # Convert negative mean squared errors to positive + if scoring == 'neg_mean_squared_error' or scoring == 'neg_mean_absolute_error': + scores = [(-score) for score in scores] + elif scoring == 'rmse': + scores = [(-score) ** 0.5 for score in scores] + df_scores = pd.DataFrame({'Fold': range(1, cv + 1), 'Score': scores}) return df_scores diff --git a/methods/data_exploration/analyse.py b/methods/data_exploration/analyse.py index 553bb3e..1e26fca 100644 --- a/methods/data_exploration/analyse.py +++ b/methods/data_exploration/analyse.py @@ -120,34 +120,52 @@ def get_dtypes(df): dtypes = dtypes.astype(str).replace('object', 'categorical') return dtypes -def compute_plot(df, col_index, cols, value_min, value_max, reset_index=False): +def compute_plot(df, col_index, cols, value_min=None, value_max=None, reset_index=False, target=None, target_class=None): + if not target is None and not target is None: + df = df[df[target] == target_class] + if col_index is None: - sel = df.index.map(lambda x: x >= value_min and x <= value_max) + if value_min is None or value_max is None: + sel = df.index + else: + sel = df.index.map(lambda x: x >= value_min and x <= value_max) else: - sel = df[col_index].map(lambda x: x >= value_min and x <= value_max) + if value_min is None or value_max is None: + sel = df[col_index] + else: + sel = df[col_index].map(lambda x: x >= value_min and x <= value_max) + if reset_index: df = df.loc[sel, cols].reset_index() else: df = df.loc[sel, cols] + return df -def analyse_correlation(df): +def analyse_correlation(df, cols, target=None, target_class=None): + if not target is None and not target is None: + df = df[df[target] == target_class] + # only use numeric data - cols = df.select_dtypes(include=NUMERICS).columns df = df[cols] corr = df.corr() return corr -def compute_scatter(df, col, value_min, value_max): - # only use numeric data - cols = df.select_dtypes(include=NUMERICS).columns - df = df[cols] +def compute_scatter(df, col, value_min=None, value_max=None, target=None, target_class='ALL'): + if target_class != 'ALL': + df = df[df[target] == target_class] # filter data + if value_min is None: + value_min = df.index.min() + if value_max is None: + value_max = df.index.max() + sel = table_data.DF_RAW[col].map(lambda x: x >= value_min and x <= value_max) df = table_data.DF_RAW.loc[sel, table_data.DF_RAW.columns].reset_index() + return df \ No newline at end of file diff --git a/methods/data_exploration/encoder.py b/methods/data_exploration/encoder.py index 491aaca..71a72b9 100644 --- a/methods/data_exploration/encoder.py +++ b/methods/data_exploration/encoder.py @@ -2,8 +2,10 @@ from sklearn.preprocessing import LabelEncoder, OneHotEncoder import numpy as np import pandas as pd +import math import plotly.express as px import plotly.graph_objs as go +from dateutil import parser # import data from data import table_data @@ -11,7 +13,11 @@ # import utility from methods.util import is_float -ENCODING_STRATEGIES = ['One Hot Encoding', 'Label Encoding', 'Already Numeric', 'Replace Value'] +ENCODING_STRATEGIES = ['One Hot Encoding', 'Label Encoding', 'Date Encoding', 'Already Numeric', 'Replace Value'] + +ENCODING_LINKS = ['https://scikit-learn.org/0.16/modules/generated/sklearn.preprocessing.OneHotEncoder.html', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html', 'https://ianlondon.github.io/blog/encoding-cyclical-features-24hour-time/', '', ''] + +ENCODING_DESCRIPTIONS = ['Read more', 'Read more', 'Read more', 'This method is for categorical features that already contain only numbers.', 'This method is for replace certain feature values with another feature value of the same feature.'] def apply_encoding(df, col, strategy, in_str='', out_str=''): df = df.copy(deep=True) @@ -23,8 +29,10 @@ def apply_encoding(df, col, strategy, in_str='', out_str=''): elif strategy == ENCODING_STRATEGIES[1]: return apply_label_encoder(df, col) elif strategy == ENCODING_STRATEGIES[2]: - return apply_numeric(df, col) + return apply_date_encoder(df, col) elif strategy == ENCODING_STRATEGIES[3]: + return apply_numeric(df, col) + elif strategy == ENCODING_STRATEGIES[4]: return apply_replace_value(df, col, in_str, out_str) else: print(f"Unknown encoding strategy: {strategy}. Apply One Hot Encoding instead.") @@ -49,11 +57,30 @@ def apply_label_encoder(df, cols): original = df[i] mask = df[i].isnull() df[i] = LabelEncoder().fit_transform(df[i].astype(str)) - print(df[cols]) - print(df[i].dtypes) df[i] = df[i].where(~mask, original) df[i] = df[i].apply(lambda x: int(x) if str(x) != 'nan' else np.nan) return df + +def apply_date_encoder(df, date_cols): + for col in date_cols: + df[col] = df[col].apply(lambda x: parser.parse(x) if isinstance(x, str) else x) + + # Encode year linearly + df[col + ' year'] = df[col].dt.year + + # Encode other components using sine and cosine functions + components = ['month', 'day', 'hour', 'minute', 'second', 'microsecond'] + for comp in components: + df[col + ' ' + comp + ' sin'] = np.sin(2 * math.pi * df[col].dt.__getattribute__(comp) / df[col].dt.__getattribute__(comp).max()) + df[col + ' ' + comp + ' cos'] = np.cos(2 * math.pi * df[col].dt.__getattribute__(comp) / df[col].dt.__getattribute__(comp).max()) + + # Remove the original date columns + df.drop(date_cols, axis=1, inplace=True) + + # Remove columns with NaN values + df.dropna(axis=1, how='all', inplace=True) + + return df def apply_numeric(df, cols): diff --git a/methods/data_exploration/imputer.py b/methods/data_exploration/imputer.py index 3c3fb54..13ab7e1 100644 --- a/methods/data_exploration/imputer.py +++ b/methods/data_exploration/imputer.py @@ -10,6 +10,9 @@ IMPUTER_STRATEGIES = {'Mean': 'mean', 'Median': 'median', 'Most Frequent': 'most_frequent', 'Constant': 'constant'} IMPUTER_ORDER = {'Ascending': 'ascending', 'Descending': 'descending', 'Roman': 'roman', 'Arabic': 'arabic', 'Random': 'random'} IMPUTER_WEIGHTS = {'Uniform': 'uniform', 'Distance': 'distance'} + +IMPUTER_LINKS = ['https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html', 'https://scikit-learn.org/stable/modules/generated/sklearn.impute.IterativeImputer.html', 'https://scikit-learn.org/stable/modules/generated/sklearn.impute.KNNImputer.html', ''] +IMPUTER_DESCRIPTIONS = ['Read more', 'Read more', 'Read more', 'Allows to change the feature value of a certain index.'] def apply_imputing(df, cols, method, params): df = df.copy(deep=True) @@ -41,15 +44,16 @@ def apply_iterative_imputer(df, cols, params): transformed_data = imputer.fit_transform(df[cols]) - iterated_df = pd.DataFrame(transformed_data, columns=cols, index=df.index) - df.loc[:, cols] = iterated_df + df = pd.DataFrame(transformed_data[:, 0], columns=[cols[0]], index=df.index) return df def apply_knn_imputer(df, cols, params): imputer = KNNImputer(**params) - df = pd.DataFrame(imputer.fit_transform(df[cols]), columns=cols) + transformed_data = imputer.fit_transform(df[cols]) + + df = pd.DataFrame(transformed_data[:, 0], columns=[cols[0]], index=df.index) return df diff --git a/methods/data_exploration/outlier_detection.py b/methods/data_exploration/outlier_detection.py index 6885737..7bbf51d 100644 --- a/methods/data_exploration/outlier_detection.py +++ b/methods/data_exploration/outlier_detection.py @@ -6,8 +6,12 @@ from sklearn.ensemble import IsolationForest from sklearn.neighbors import LocalOutlierFactor -OUTLIER_DETECTION_METHODS = ["Random Forest Detector", "Densitiy Detector", "KV Detector"] +OUTLIER_DETECTION_METHODS = ["Isolation Forest", "Local Outlier Detector", "KV Detector"] OUTLIER_DETECTION_LOCAL_ALGORITHM = {"Auto": "auto", "Ball Tree": "ball_tree", "KD Tree": "kd_tree", "Brute": "brute"} +OUTLIER_DETECTION_LOCAL_OUTLIER_FACTOR_METRIC = {'Minkowski': 'minkowski', 'Euclidean': 'euclidean', 'Manhattan': 'manhattan', 'Chebyshev': 'chebyshev', 'Mahalanobis': 'mahalanobis'} + +OUTLIER_LINKS = ['https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html', 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.LocalOutlierFactor.html', ''] +OUTLIER_DESCRIPTIONS = ['Read more', 'Read more', 'This method is using the interquartile range (IQR) and mean. Datapoints that are more than 3 times the IQR away from the mean in either direction are considered outliers.'] def apply_outlier_detection(df, method, params): if method == OUTLIER_DETECTION_METHODS[0]: diff --git a/methods/data_transformation/transformations_table_data.py b/methods/data_transformation/transformations_table_data.py index 780843a..219e94a 100644 --- a/methods/data_transformation/transformations_table_data.py +++ b/methods/data_transformation/transformations_table_data.py @@ -35,7 +35,8 @@ SLIDING_WINDOW_OPERATIONS = {'Sum': 'sum', 'Mean': 'mean', 'Median': 'median', 'Min': 'min', 'Max': 'max', 'Std': 'std'} - +TRANSFORMATIONS_LINKS = ['', '', 'https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html', 'https://pywavelets.readthedocs.io/en/latest/ref/dwt-discrete-wavelet-transform.html', 'https://numpy.org/doc/stable/reference/generated/numpy.fft.rfft.html', 'https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.shift.html', 'https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html', 'https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.diff.html', 'https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html', 'https://pykalman.github.io/class_docs.html'] +TRANSFORMATIONS_DESCRIPTIONS = ['This method normalizes the values of the selected features between 0 and 1.','This method rescales the selected features to have a mean of 0 and a standard deviation of 1.', 'Read more', 'Read more', 'Read more', 'Read more', 'Read more', 'Read more', 'Read more', 'Read more'] def apply_transformation_table_data(df, cols, method, params): df = df.copy(deep=True) diff --git a/states/states.py b/states/states.py index bce797b..29168d3 100644 --- a/states/states.py +++ b/states/states.py @@ -160,6 +160,15 @@ def load_dataset_states(): else: with open(save_path, 'rb') as f: all_ranges= pickle.load(f) + + # load max ranges + save_file = 'max_ranges.pkl' + save_path = os.path.join(save_dirs[-1], save_file) + if not os.path.exists(save_path): + all_max_ranges = {} + else: + with open(save_path, 'rb') as f: + all_max_ranges= pickle.load(f) # load datasets save_file = 'data.pkl' @@ -170,7 +179,7 @@ def load_dataset_states(): with open(save_path, 'rb') as f: all_datasets = pickle.load(f) - return all_ranges, all_datasets + return all_ranges, all_max_ranges, all_datasets def check_existence(dataset_name, save_dir='./states/data_states'): save_path = os.path.join(save_dir, dataset_name) + ".csv" diff --git a/view/app.py b/view/app.py index 1004468..08067bc 100644 --- a/view/app.py +++ b/view/app.py @@ -1,15 +1,13 @@ import dash import dash_bootstrap_components as dbc -from dash_extensions.enrich import DashProxy, MultiplexerTransform import os assets_path = os.getcwd() +'/assets' -app = DashProxy( +app = dash.Dash( __name__, #suppress_callback_exceptions=True, external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.FONT_AWESOME], - transforms=[MultiplexerTransform()], #prevent_initial_callbacks='initial_duplicate', assets_folder=assets_path, # these meta_tags ensure content is scaled correctly on different devices diff --git a/view/page_categorical_feature.py b/view/page_categorical_feature.py index 05d8a95..d65eb07 100644 --- a/view/page_categorical_feature.py +++ b/view/page_categorical_feature.py @@ -14,7 +14,9 @@ from data import table_data # import methods -from methods.data_exploration.analyse import get_num_numeric_categorical +from methods.data_exploration.encoder import * +from methods.data_exploration.analyse import * +from methods.cleaning import delete_columns # import figures from view.page_helper_components.plots import get_numeric_categorical_ratio_plot, get_categorical_feature_pie_plot @@ -114,7 +116,16 @@ def create_container_for_parameter(): ), dbc.Card([ - dbc.CardHeader("Method:", className='card_subheader'), + dbc.CardHeader([ + "Method:", + dcc.Link( + html.Img(src='/assets/img/link.png', id='img_categorical_strategy', className='tooltip_img'), + id='link_categorical_strategy', + href='https://scikit-learn.org/0.16/modules/generated/sklearn.preprocessing.OneHotEncoder.html', + target='_blank', + ), + dbc.Tooltip("Read more", target='img_categorical_strategy', id='tooltip_categorical_strategy'), + ], className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id='dropdown_categorical_strategy', diff --git a/view/page_data_loading.py b/view/page_data_loading.py index 186ba51..26a79a3 100644 --- a/view/page_data_loading.py +++ b/view/page_data_loading.py @@ -131,7 +131,8 @@ def create_container_for_parameter(): ], ), - + style={'display': 'none'}, + id='container_parameter_loading', width=4 ) @@ -152,7 +153,7 @@ def create_container_for_table_data_parameter(): return layout def add_container_for_index(id_container, id_dropdown): - index_options = [{'label': 'Auto', 'value': 'auto'}, {'label': 'None', 'value': 'none'}] + index_options = [{'label': 'None', 'value': 'none'}, {'label': 'Auto', 'value': 'auto'}] layout = dbc.Card([ dbc.CardHeader([ @@ -176,7 +177,7 @@ def add_container_for_index(id_container, id_dropdown): ), ], className='card_subcontainer', - style={'display': 'block'}, + style={'display': 'none'}, id=id_container, ) diff --git a/view/page_helper_components/plots.py b/view/page_helper_components/plots.py index e65d318..e523f11 100644 --- a/view/page_helper_components/plots.py +++ b/view/page_helper_components/plots.py @@ -1,39 +1,33 @@ import plotly.express as px import plotly.graph_objs as go import numpy as np +import random # import util from methods.util import is_close +from methods.data_exploration.analyse import NUMERICS + PLOTS = ['Line Plot', 'Histogram', 'Correlations', 'Scatter Plot', 'Violin Plot'] -def get_overview_histogram_plot(df, cols): - if type(cols) == str: - cols = [cols] - - figure = go.Figure() - - for i, col in enumerate(cols): - if i > 5: - is_visible = 'legendonly' - else: - is_visible = True - - figure.add_trace( - go.Histogram( - x=df[col], - name=col, - visible=is_visible) - ) +def get_overview_histogram_plot(df, col): + if type(col) == list: + col = col[0] - figure.update_layout(barmode='overlay') - figure.update_traces(opacity=0.75) + figure = px.histogram(df, x=col) + + figure.update_layout( + xaxis_title=col, + yaxis_title='frequency', + bargap=0.1) return figure def get_overview_line_plot(df, cols, index='index'): if type(cols) == str: cols = [cols] + + df = df.select_dtypes(include=NUMERICS) figure = go.Figure() @@ -43,27 +37,62 @@ def get_overview_line_plot(df, cols, index='index'): x = df.index for i, col in enumerate(cols): - if i > 5: - is_visible = 'legendonly' - else: - is_visible = True - figure.add_trace( go.Scatter( x=x, y=df[col], - name=col, - visible=is_visible) + name=col) ) + figure.update_yaxes(title_text="features") + figure.update_xaxes(title_text=index) + + return figure + +def get_overview_violin_plot(df, cols, index='index'): + if type(cols) == str: + cols = [cols] + + figure = go.Figure() + + for index, col in enumerate(cols): + figure.add_trace(go.Violin(y=df[col], box_visible=True, meanline_visible=True, opacity=0.6, name=col)) + + figure.update_xaxes(title_text='features') + figure.update_yaxes(title_text='distribution') + return figure -def get_overview_scatter_plot(df, col1, col2): - figure = px.scatter(df, x=col1, y=col2) +def get_overview_scatter_plot(df, col1, col2, target=None): + if target is None: + df = df.select_dtypes(include=NUMERICS) + figure = px.scatter(df, x=col1, y=col2) + else: + random.seed(42) + + unique_classes = df[target].unique().tolist() + + # Generate a random color for each class + class_colors = {cls: f'{str(cls)}' for cls in unique_classes} + + # Map class labels to colors for each data point + df['color'] = df[target].map(class_colors) + + # Create the scatterplot using Plotly Express + figure = px.scatter(df, x=col1, y=col2, color='color') + + # Customize the legend + figure.update_layout(legend_title_text=target) # Set the legend title to the target column name + + # Update the legend labels to show class mappings + legend_labels = {cls: f'{cls}: {class_colors[cls]}' for cls in unique_classes} + #figure.update_layout(legend=dict(title_text=target, title=list(legend_labels.items()))) + return figure def get_overview_heatmap(df): + figure = px.imshow(df) return figure @@ -129,6 +158,8 @@ def get_na_bar_plot(df): y = '#NA' ) + figure.update_yaxes(title_text="# missing") + return figure def get_na_heatmap(df): diff --git a/view/page_helper_components/sliders.py b/view/page_helper_components/sliders.py index f5ca49e..9608676 100644 --- a/view/page_helper_components/sliders.py +++ b/view/page_helper_components/sliders.py @@ -1,6 +1,9 @@ def get_slider_marks(value): start, end = value - num_marks = 6 + if end <= 300: + num_marks = 5 + else: + num_marks = 6 # Calculate the step size based on the number of desired marks step_size = ((end - start) // (num_marks - 1)) // 10 * 10 @@ -9,4 +12,20 @@ def get_slider_marks(value): marks = {int(start + i * step_size): str(int(start + i * step_size)) for i in range(num_marks - 1)} marks[end] = str(end) + return marks + +def get_slider_marks_nearest_feature(max_value, min_value=1, value='None'): + if max_value < 10: + num_marks = max_value + else: + num_marks = 5 + + # Calculate the step size based on the number of desired marks + step_size = ((max_value - min_value) // (num_marks - 1)) + + # Generate evenly spaced marks that are divisible by 10 + marks = {int(min_value + i * step_size): str(int(min_value + i * step_size)) for i in range(num_marks - 1)} + marks[max_value] = max_value + marks[max_value + 1] = "None" + return marks \ No newline at end of file diff --git a/view/page_helper_components/tables.py b/view/page_helper_components/tables.py new file mode 100644 index 0000000..d947743 --- /dev/null +++ b/view/page_helper_components/tables.py @@ -0,0 +1,27 @@ +from dash import dash_table + +def get_feature_importance_table(id_table, data): + table = dash_table.DataTable(id=id_table, + columns= [{"name": "Feature", "id": "Feature"}, {"name": "Importance", "id": "Importance"}], + data=data, + filter_action='native', + page_size=14, + fill_width=True, + sort_action="native", + style_header={ + 'backgroundColor': 'rgb(30, 30, 30)', + 'color': 'white', + 'fontWeight': 'bold', + 'fontSize' : "13pt" + }, + fixed_rows={'headers': True}, + style_cell={'textAlign': 'left', 'color': 'black'}, + style_data={ + 'whiteSpace': 'normal', + 'height': 'auto', + 'fontSize' : "13pt", + 'minWidth': "50%" + }, + ), + + return table diff --git a/view/page_na_value.py b/view/page_na_value.py index 0bc7240..2e26597 100644 --- a/view/page_na_value.py +++ b/view/page_na_value.py @@ -41,7 +41,7 @@ def create_data_navalue_panel(): [ dbc.CardHeader( [ - "NA Imputing" + "Impute Missing Values" ], className='card_header' ), @@ -79,7 +79,7 @@ def create_na_bar_plot(): [ dbc.CardHeader( [ - "Number of NA Values", + "Number of Missing Values", ], className='card_header'), dbc.CardBody( @@ -104,7 +104,7 @@ def create_na_heatmap_plot(): [ dbc.CardHeader( [ - "Position of NA Values", + "Position of Missing Values", ], className='card_header'), dbc.CardBody( @@ -130,7 +130,7 @@ def create_container_for_parameter(): dbc.CardBody( [ dbc.Card([ - dbc.CardHeader("Feature:", className='card_subheader'), + dbc.CardHeader("Missing Features:", className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id='dropdown_na_feature', @@ -138,14 +138,25 @@ def create_container_for_parameter(): value=None, className='dropdown_overview_multi_feature', clearable=False, + multi=False, ), ]), + ], className='card_subcontainer', ), dbc.Card([ - dbc.CardHeader("Method:", className='card_subheader'), + dbc.CardHeader([ + "Method:", + dcc.Link( + html.Img(src='/assets/img/link.png', id='img_na_strategy', className='tooltip_img'), + id='link_na_strategy', + href='https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html', + target='_blank', + ), + dbc.Tooltip("Read more", target='img_na_strategy', id='tooltip_na_strategy'), + ], className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id='dropdown_na_method', @@ -203,7 +214,9 @@ def create_container_for_iterative_parameter(): layout = html.Div( [ + add_container_for_fill_features('container_na_iterative_filling_feature', 'dropdown_na_iterative_filling_feature', 'checkbox_na_iterative_filling_feature'), add_container_for_max_iter('container_na_iterative_max_iter', 'slider_na_iterative_max_iter'), + add_container_for_tolerance('container_na_iterative_tol', 'dropdown_na_iterative_tol'), add_container_for_n_nearest_features('container_na_iterative_n_nearest_features', 'slider_na_iterative_n_nearest_features'), add_container_for_initial_strategy('container_na_iterative_initial_strategy', 'dropdown_na_iterative_initial_strategy'), add_container_for_initial_fill_value('container_na_iterative_fill_value', 'dropdown_na_iterative_fill_value'), @@ -220,8 +233,9 @@ def create_container_for_knn_parameter(): layout = html.Div( [ + add_container_for_fill_features('container_na_knn_filling_feature', 'dropdown_na_knn_filling_feature', 'checkbox_na_knn_filling_feature'), add_container_for_n_neighbors('container_na_knn_n_neighbors', 'slider_na_knn_n_neighbors'), - add_container_for_weights('container_na_iterative_weights', 'dropdown_na_iterative_weights') + add_container_for_weights('container_na_knn_weights', 'dropdown_na_knn_weights') ], style={'display': 'none'}, id='container_na_knn' @@ -235,7 +249,7 @@ def create_container_for_manual_parameter(): layout = html.Div( [ add_container_for_index('container_na_manual_index', 'input_na_manual_index'), - add_container_for_fill_value('container_na_manual_fill_value', 'input_na_manual_fill_value') + add_container_for_fill_value('container_na_manual_fill_value', 'input_na_manual_fill_value', 'block') ], style={'display': 'none'}, id='container_na_manual' @@ -266,7 +280,35 @@ def add_container_for_strategy(id_container, id_dropdown): return layout -def add_container_for_fill_value(id_container, id_input): +def add_container_for_fill_features(id_container, id_dropdown, id_checkbox): + layout = dbc.Card([ + dbc.CardHeader("Features Used For Filling:", className='card_subheader'), + dbc.CardBody([ + dcc.Dropdown( + id=id_dropdown, + options=[], + value=None, + className='dropdown_overview_multi_feature', + multi=True, + ), + dcc.Checklist( + id=id_checkbox, + options=['Select all features'], + value=[], + inputStyle={"margin-right": "0.5rem",}, + ) + ], + + ), + ], + className='card_subcontainer', + style={'display': 'block'}, + id=id_container, + ) + + return layout + +def add_container_for_fill_value(id_container, id_input, display='none'): layout = dbc.Card([ dbc.CardHeader("Fill Value:", className='card_subheader'), dbc.CardBody([ @@ -282,7 +324,7 @@ def add_container_for_fill_value(id_container, id_input): ), ], className='card_subcontainer', - style={'display': 'block'}, + style={'display': display}, id=id_container, ) @@ -296,7 +338,15 @@ def add_container_for_max_iter(id_container, id_slider, min_iter=1, max_iter=20, id=id_slider, min=min_iter, max=max_iter, - marks = {i: {'label': str(round(i))} for i in np.arange(min_iter, max_iter, (max_iter-min_iter)/5)}, + marks={ + 1:"1", + 5:"5", + 10:"10", + 15:"15", + 20:"20", + 25:"25", + 30:"30", + }, step=1, value=value, tooltip={"placement": "top", "always_visible": False}, @@ -306,6 +356,7 @@ def add_container_for_max_iter(id_container, id_slider, min_iter=1, max_iter=20, ), ], className='card_subcontainer', + style={'display': 'block'}, id=id_container, ) @@ -314,15 +365,16 @@ def add_container_for_max_iter(id_container, id_slider, min_iter=1, max_iter=20, def add_container_for_n_nearest_features(id_container, id_slider, min_features=1, max_features=2): value = max_features layout = dbc.Card([ - dbc.CardHeader("Number of Nearest Features", className='card_subheader'), + dbc.CardHeader("Number of Nearest Features:", className='card_subheader'), dbc.CardBody([ dcc.Slider( - id=id_slider, - min=min_features, - max=max_features, - marks = {i: {'label': str(round(i))} for i in np.arange(min_features, max_features, (max_features-min_features)/5)}, + id=id_slider, + marks={ + 1:"1", + 2:"None", + }, step=1, - value=value, + value=2, tooltip={"placement": "top", "always_visible": False}, ), ], @@ -330,10 +382,33 @@ def add_container_for_n_nearest_features(id_container, id_slider, min_features=1 ), ], className='card_subcontainer', + style={'display': 'block'}, id=id_container, ) - return layout + return layout + +def add_container_for_tolerance(id_container, id_dropdown): + layout = dbc.Card([ + dbc.CardHeader("Tolerance:", className='card_subheader'), + dbc.CardBody([ + dcc.Dropdown( + id=id_dropdown, + options=[0.1, 0.01, 0.001], + value=0.001, + className='dropdown_overview_multi_feature', + clearable=False, + ), + ], + + ), + ], + className='card_subcontainer', + style={'display': 'block'}, + id=id_container, + ) + + return layout def add_container_for_initial_strategy(id_container, id_dropdown): layout = dbc.Card([ @@ -408,8 +483,7 @@ def add_container_for_n_neighbors(id_container, id_slider, min_neighbors=1, max_ dcc.Slider( id=id_slider, min=min_neighbors, - max=max_neighbors, - marks = {i: {'label': str(round(i))} for i in np.arange(min_neighbors, max_neighbors, (max_neighbors-min_neighbors)/5)}, + max=max_neighbors, step=1, value=value, tooltip={"placement": "top", "always_visible": False}, @@ -419,6 +493,7 @@ def add_container_for_n_neighbors(id_container, id_slider, min_neighbors=1, max_ ), ], className='card_subcontainer', + style={'display': 'block'}, id=id_container, ) @@ -426,7 +501,7 @@ def add_container_for_n_neighbors(id_container, id_slider, min_neighbors=1, max_ def add_container_for_weights(id_container, id_dropdown): layout = dbc.Card([ - dbc.CardHeader("Imputation Order:", className='card_subheader'), + dbc.CardHeader("Weight Function:", className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id=id_dropdown, diff --git a/view/page_outlier_detection.py b/view/page_outlier_detection.py index fdeda18..2a59833 100644 --- a/view/page_outlier_detection.py +++ b/view/page_outlier_detection.py @@ -18,12 +18,12 @@ # import callbacks from callbacks.page_outlier_detection_callbacks import * -# import analyse methods -from methods.data_exploration.analyse import * - # import detection methods from methods.data_exploration.outlier_detection import * +# import analyse methods +from methods.data_exploration.analyse import * + ###################################### ## First layer method: create panel ## ###################################### @@ -139,7 +139,38 @@ def create_container_for_parameter(): dbc.CardBody( [ dbc.Card([ - dbc.CardHeader("Method:", className='card_subheader'), + dbc.CardHeader("Features:", className='card_subheader'), + dbc.CardBody([ + dcc.Dropdown( + id='dropdown_outlier_feature', + options=[], + value=None, + className='dropdown_overview_multi_feature', + clearable=False, + multi=True, + ), + dcc.Checklist( + id='checklist_outlier_all_features', + options=['Select all features'], + value=[], + inputStyle={"margin-right": "0.5rem",}, + ) + ]), + + ], + className='card_subcontainer', + ), + dbc.Card([ + dbc.CardHeader([ + "Method:", + dcc.Link( + html.Img(src='/assets/img/link.png', id='img_outlier_strategy', className='tooltip_img'), + id='link_outlier_strategy', + href='https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html', + target='_blank', + ), + dbc.Tooltip("This method normalizes the values of the selected features between 0 and 1.", target='img_outlier_strategy', id='tooltip_outlier_strategy'), + ], className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id='dropdown_outlier_method', @@ -158,7 +189,7 @@ def create_container_for_parameter(): create_container_for_kv_detector(), html.Div([ - dbc.Button("Remove", color = "primary", id="button_outlier_apply", className='btn_apply') + dbc.Button("Remove", color = "primary", id="button_outlier_apply", className='btn_apply', style={'display': 'none'}) ], className="btn_aligned_right"), html.Div([ @@ -180,7 +211,7 @@ def create_container_for_parameter(): def create_container_for_random_forest_detector(): layout = html.Div( [ - #add_container_for_warm_start('container_outlier_random_forest_warm_start', 'check_outlier_random_forest_warm_start'), + add_container_for_contamination('container_outlier_random_forest_contamination', 'dropdown_outlier_random_forest_contamination'), add_container_for_n_estimators('container_outlier_random_forest_n_estimators', 'slider_outlier_random_forest_n_estimators'), ], style={'display': 'block'}, @@ -254,7 +285,9 @@ def create_container_for_density_detector(): layout = html.Div( [ add_container_for_n_neighbors('container_outlier_densitiy_n_neighbors', 'slider_outlier_densitiy_n_neighbors'), -# add_container_for_contamination('container_outlier_densitiy_contamination', 'slider_outlier_densitiy_contamination') + add_container_for_contamination('container_outlier_densitiy_contamination', 'dropdown_outlier_densitiy_contamination'), + add_container_for_metric('container_outlier_densitiy_metric', 'dropdown_outlier_densitiy_metric'), + add_container_for_n_p('container_outlier_densitiy_p', 'dropdown_outlier_densitiy_p'), add_container_for_algorithm('container_outlier_densitiy_algorithm', 'dropdown_outlier_densitiy_algorithm'), ], style={'display': 'none'}, @@ -314,23 +347,42 @@ def add_container_for_algorithm(id_container, id_dropdown): return layout -def add_container_for_contamination(id_container, id_slider): +def add_container_for_metric(id_container, id_dropdown): layout = dbc.Card([ - dbc.CardHeader("Proportion of Outliers:", className='card_subheader'), + dbc.CardHeader("Metric:", className='card_subheader'), dbc.CardBody([ + dcc.Dropdown( + id=id_dropdown, + options=[{'label': i, 'value': i} for i in OUTLIER_DETECTION_LOCAL_OUTLIER_FACTOR_METRIC.keys()], + value=list(OUTLIER_DETECTION_LOCAL_OUTLIER_FACTOR_METRIC.keys())[0], + className='dropdown_overview_multi_feature', + clearable=False, + ), + ], + + ), + ], + className='card_subcontainer', + style={'display': 'block'}, + id=id_container, + ) + + return layout + +def add_container_for_n_p(id_container, id_slider): + layout = dbc.Card([ + dbc.CardHeader("P:", className='card_subheader'), + dbc.CardBody([ dcc.Slider( - id="id_slider", - min=0.01, - max=0.99, - step=0.01, - value=0.1, + id=id_slider, + step=1, + value=2, marks={ - 0.01:"0.01", - 0.1:"0.1", - 0.2:"0.2", - 0.3:"0.3", - 0.4:"0.4", - 0.5:"0.5", + 1:"1", + 2:"2", + 3:"3", + 4:"4", + 5:"5", }, tooltip={"placement": "bottom", "always_visible": False} ), @@ -348,7 +400,6 @@ def add_container_for_contamination(id_container, id_slider): def create_container_for_kv_detector(): layout = html.Div( [ - add_container_for_feature('container_outlier_kv_feature', 'dropdown_outlier_kv_feature'), ], style={'display': 'none'}, id='container_outlier_kv' @@ -377,6 +428,28 @@ def add_container_for_feature(id_container, id_dropdown): return layout +def add_container_for_contamination(id_container, id_dropdown): + layout = dbc.Card([ + dbc.CardHeader("Contamination:", className='card_subheader'), + dbc.CardBody([ + dcc.Dropdown( + id=id_dropdown, + options={'auto': 'Auto', '0.01': 0.01, '0.02': 0.02, '0.03': 0.03, '0.04': 0.04, '0.5': 0.05, '0.1': 0.1, '0.2': 0.2, '0.3': 0.3, '0.4': 0.4, '0.5': 0.5}, + value='auto', + className='dropdown_overview_multi_feature', + clearable=False, + ), + ], + + ), + ], + className='card_subcontainer', + style={'display': 'block'}, + id=id_container, + ) + + return layout + # outlier table def create_outlier_table(): diff --git a/view/page_overview.py b/view/page_overview.py index 6baa5fb..2d4a7b5 100644 --- a/view/page_overview.py +++ b/view/page_overview.py @@ -33,7 +33,7 @@ def create_data_overview_panel(): [ generate_text_board('text_board_shape', '(#Rows, #Features)', '(0,0)'), generate_text_board('text_board_memory', 'Memory used', '0MB'), - generate_text_board('text_board_na', 'NA', '0%'), + generate_text_board('text_board_na', 'Missing', '0%'), generate_text_board('text_board_num','Numeric', '0%'), ], align = "start", @@ -51,14 +51,24 @@ def create_data_overview_panel(): className='row_overview_plots' ), + + dbc.Row( + [ + generate_lineplot('dropdown_overview_features_selection_linegraph', 'dropdown_overview_feature_selection_rangeslider_linegraph', 'dropdown_overview_target_selection_linegraph', 'dropdown_overview_class_selection_linegraph', 'figure_overview_linegraph', 'Line Plot'), + ], + align = "start", + justify = 'center', + className='row_overview_plots' + + ), dbc.Row( [ # histogram - generate_histogram_with_rangeslide('dropdown_overview_features_selection_histogramgraph', 'dropdown_overview_feature_selection_rangeslider_histogram', 'figure_overview_histogram', 'rangeslider_overview_value_constraint_histogram', 'Histogram'), + generate_histogram('dropdown_overview_features_selection_histogram', 'dropdown_overview_target_selection_histogram', 'dropdown_overview_class_selection_histogram', 'figure_overview_histogram', 'Histogram'), # line graph - generate_line_with_rangeslide('dropdown_overview_features_selection_linegraph', 'dropdown_overview_feature_selection_rangeslider_linegraph', 'figure_overview_linegraph', 'rangeslider_overview_value_constraint_linegraph', 'Line Plot'), + generate_violinplot('dropdown_overview_features_selection_violinplot', 'dropdown_overview_feature_selection_rangeslider_violinplot', 'dropdown_overview_target_selection_violinplot', 'dropdown_overview_class_selection_violinplot', 'figure_overview_violinplot', 'Violin Plot'), ], align = "start", @@ -70,9 +80,9 @@ def create_data_overview_panel(): dbc.Row( [ # generate correlation heatmap - generate_correlation_heatmap('dropdown_overview_feature_selection_heatmap', 'figure_overview_heatmap', 'Correlations'), + generate_correlation_heatmap('dropdown_overview_feature_selection_heatmap', 'dropdown_overview_target_selection_heatmap', 'dropdown_overview_class_selection_heatmap', 'figure_overview_heatmap', 'Correlations'), # scatter graph - generate_scatter_with_rangeslide('dropdown1_overview_feature_selection_scattergraph', 'dropdown2_overview_feature_selection_scattergraph', 'figure_overview_scattergraph', 'rangeslider_overview_value_constraint_scattergraph', 'Scatter Plot'), + generate_scatter('dropdown1_overview_feature_selection_scattergraph', 'dropdown2_overview_feature_selection_scattergraph', 'dropdown_overview_target_selection_scattergraph', 'dropdown_overview_class_selection_scattergraph', 'figure_overview_scattergraph','Scatter Plot'), ], align = "start", @@ -97,7 +107,10 @@ def generate_text_board(id, text, value): dbc.Col( dbc.CardBody( [ - html.H6(id=id, children=str(value), className='text_board_font1'), + dcc.Loading( + id=id, + children=html.H6(children=str(value), className='text_board_font1'), + ), html.P(str(text), className='text_board_font2') ] ), @@ -111,19 +124,124 @@ def generate_text_board(id, text, value): width=3, ) +#### violinplot +def generate_lineplot(id_fs1, id_fs2, id_fs3, id_fs4, id_figure, title=''): + if table_data.DF_RAW is not None: + df = table_data.DF_RAW + options_all = list(df.columns) + value_features = options_all[:2] + value_target = options_all[0] + df_num = df.select_dtypes(include=NUMERICS) + options_num = ['index_auto'] + list(df_num.columns) + value_index= options_num[0] + else: + options_all = [] + value_features = None + value_target = None + options_num = [] + value_index = None + + tooltip = "Plot the features in a line plot. If your data contains a class feature you can filter the data for a specific class to only show data points that belong to this class." + + layout = dbc.Col( + dbc.Card( + [ + add_cardheader_for_fullscreen_and_close(title, 'img_overview_lineplot', tooltip), + dbc.CardBody( + [ + html.Div([ + dcc.Dropdown( + id = id_fs1, + options=options_all, + value= value_features, + multi=True, + className='dropdown_overview_multi_feature', + clearable=False + ), + ]), + html.Div( + [ + dbc.Col( + [ + html.Div("Target:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs3, + options=options_all, + value=value_target, + clearable=False, + multi=False, + className='dropdown_overview_target_large', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width = 6 + ), + dbc.Col( + [ + html.Div("Class:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs4, + options=['ALL'] + options_all, + value='ALL', + clearable=False, + multi=False, + className='dropdown_overview_target_large', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width = 6 + ), + ], + className='div_overview_single_feature' + ), + dcc.Loading( + id=id_figure, + type="default", + children=dcc.Graph(className='figure_overview') + ), + html.Div( + [ + dbc.Col( + [ + html.Div("Index:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id=id_fs2, + options=options_num, + value=value_index, + clearable=False, + className='dropdown_overview_index', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width=6, + ) + ], + className='row', + ) + ], + #className="four columns", + ) + ], + ), + width = 12, + className='card_container', + id = 'card_overview_linegraph' + ) + return layout ##### histogram -def generate_histogram_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, title=''): +def generate_histogram(id_fs1, id_fs3, id_fs4, id_figure, title=''): if table_data.DF_RAW is not None: df = table_data.DF_RAW options_all = list(df.columns) - value_features = options_all[:2] + value_features = options_all[0] + value_target = options_all[0] df_num = df.select_dtypes(include=NUMERICS) - options_num = list(df_num.columns) + options_num = ['index_auto'] + list(df_num.columns) value_index= options_num[0] min_first_num = df_num.iloc[:, 0].min() @@ -131,17 +249,20 @@ def generate_histogram_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, tit else: options_all = [] value_features = None + value_target = None options_num = [] value_index = None min_first_num = 0 max_first_num = 1 - + + tooltip = "Plot a the frequency of feature values of a feature in a histogram. If your data contains a class feature you can filter the data for a specific class to only show data points that belong to this class." + marks = get_slider_marks((min_first_num, max_first_num)) layout = dbc.Col( dbc.Card( [ - add_cardheader_for_fullscreen_and_close('button_overview_histogram_close', 'button_overview_histogram_fullscreen', title, 'card_overview_histogram'), + add_cardheader_for_fullscreen_and_close(title, 'img_overview_histogramm', tooltip), dbc.CardBody( [ html.Div([ @@ -149,43 +270,51 @@ def generate_histogram_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, tit id = id_fs1, options=options_all, value=value_features, - multi=True, + multi=False, className='dropdown_overview_multi_feature', clearable=False ), ]), - dcc.Graph( - id = id_figure, - className='figure_overview' - ), html.Div( [ dbc.Col( - dcc.Dropdown( - id = id_fs2, - options=options_num, - value=value_index, - clearable=False - ), - width = 4 + [ + html.Div("Target:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs3, + options=options_all, + value=value_target, + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width = 6 ), dbc.Col( - html.Div( - dcc.RangeSlider( - id = id_slider, - min= min_first_num, - max= max_first_num, - marks = marks, - tooltip={"placement": "top", "always_visible": False}, - value=[min_first_num, max_first_num], + [ + html.Div("Class:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs4, + options=['ALL'] + options_all, + value='ALL', + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} ), - ), - width = 8 - ) - + ], + width = 6 + ), ], - className = 'row', - ) + className='div_overview_single_feature' + ), + dcc.Loading( + id=id_figure, + type="default", + children=dcc.Graph(className='figure_overview') + ), ], ) @@ -198,32 +327,29 @@ def generate_histogram_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, tit return layout -#### line graph -def generate_line_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, title=''): +#### violinplot +def generate_violinplot(id_fs1, id_fs2, id_fs3, id_fs4, id_figure, title=''): if table_data.DF_RAW is not None: df = table_data.DF_RAW options_all = list(df.columns) value_features = options_all[:2] - - + value_target = options_all[0] df_num = df.select_dtypes(include=NUMERICS) - options_num = list(df_num.columns) + options_num = ['index_auto'] + list(df_num.columns) value_index= options_num[0] - - min_first_num = df_num.iloc[:, 0].min() - max_first_num = df_num.iloc[:, 0].max() else: options_all = [] value_features = None + value_target = None options_num = [] value_index = None - min_first_num = 0 - max_first_num = 1 + + tooltip = "Plot the distribution of the feature values in a violin plot. If your data contains a class feature you can filter the data for a specific class to only show data points that belong to this class." layout = dbc.Col( dbc.Card( [ - add_cardheader_for_fullscreen_and_close('button_overview_linegraph_close', 'button_overview_linegraph_fullscreen', title, 'card_overview_linegraph'), + add_cardheader_for_fullscreen_and_close(title, 'img_overview_violinplot', tooltip), dbc.CardBody( [ html.Div([ @@ -236,37 +362,46 @@ def generate_line_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, title='' clearable=False ), ]), - dcc.Graph( - id = id_figure, - className='figure_overview' - ), html.Div( [ dbc.Col( - dcc.Dropdown( - id = id_fs2, - options=options_num, - value=value_index, - clearable=False - ), - width = 4, + [ + html.Div("Target:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs3, + options=options_all, + value=value_target, + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width = 6 ), dbc.Col( - html.Div( - dcc.RangeSlider( - id = id_slider, - min= min_first_num, - max= max_first_num, - marks = {i: {'label': str(round(i))} for i in np.arange(min_first_num, max_first_num, (max_first_num-min_first_num)/5)}, - tooltip={"placement": "top", "always_visible": False}, - value=[min_first_num, max_first_num], + [ + html.Div("Class:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs4, + options=['ALL'] + options_all, + value='ALL', + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} ), - ), - width = 8 + ], + width = 6 ), ], - className = 'row', - ) + className='div_overview_single_feature' + ), + dcc.Loading( + id=id_figure, + type="default", + children=dcc.Graph(className='figure_overview') + ), ], #className="four columns", @@ -275,33 +410,35 @@ def generate_line_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, title='' ), width = 6, className='card_container', - id = 'card_overview_linegraph' + id = 'card_overview_violinplot' ) return layout #### scatter plot -def generate_scatter_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, title=''): +def generate_scatter(id_fs1, id_fs2, id_fs3, id_fs4, id_figure, title=''): if table_data.DF_RAW is not None: df = table_data.DF_RAW + options_all = list(df.columns) + value_target = options_all[0] df_num = df.select_dtypes(include=NUMERICS) options_num = list(df_num.columns) value_num_1 = options_num[0] value_num_2 = options_num[1] - - min_first_num = df_num.iloc[:, 0].min() - max_first_num = df_num.iloc[:, 0].max() else: + options_all = [] + value_target = None options_num = [] value_num_1 = None value_num_2 = None - min_first_num = 0 - max_first_num = 1 + + tooltip = "Plot two features in a scatter plot. If your data contains a class feature you can filter the data for a specific class to only show data points that belong to this class. Or you can plot all classes and the datapoints are colored in different according to the corresponding class." + layout = dbc.Col( dbc.Card( [ - add_cardheader_for_fullscreen_and_close('button_overview_scatter_close', 'button_overview_scatter_fullscreen', title, 'card_overview_scatter'), + add_cardheader_for_fullscreen_and_close(title, 'img_overview_scatter', tooltip), dbc.CardBody( [ html.Div( @@ -327,28 +464,46 @@ def generate_scatter_with_rangeslide(id_fs1, id_fs2, id_figure, id_slider, title ], className='div_overview_single_feature' ), - dcc.Graph( - id = id_figure, - className='figure_overview' - ), html.Div( [ dbc.Col( - html.Div( - dcc.RangeSlider( - id = id_slider, - min= min_first_num, - max= max_first_num, - marks = {i: {'label': str(round(i))} for i in np.arange(min_first_num, max_first_num, (max_first_num-min_first_num)/5)}, - tooltip={"placement": "top", "always_visible": False}, - value=[min_first_num, max_first_num], + [ + html.Div("Target:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs3, + options=options_all, + value=value_target, + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} ), - ), - width = 16 - ) + ], + width = 6 + ), + dbc.Col( + [ + html.Div("Class:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs4, + options=['ALL'] + options_all, + value='ALL', + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width = 6 + ), ], - className = 'row', - ) + className='div_overview_single_feature' + ), + dcc.Loading( + id=id_figure, + type="default", + children=dcc.Graph(className='figure_overview') + ), ], ) @@ -372,12 +527,12 @@ def generate_datatable_with_dataframe(id, title=''): cols = [{"name": i, "id": i} for i in OVERVIEW_COLUMNS] data = None - tooltip = "In this table you can delete features. You can also use the search function (first row under the header) to filter for values. In case of numeric values you can also use '<', '>','<=', '>=' and '!='. E.g. use '< 60.8' to filter for values less than 60.8. " + tooltip = "In this table you can delete features by clicking the cross beside the feature. You can also use the search function (first row under the header) to filter for values. In case of numeric values you can also use '<', '>','<=', '>=' and '!='. E.g. use '< 60.8' to filter for values less than 60.8." layout = dbc.Col( dbc.Card( [ - add_cardheader_for_fullscreen_and_close('button_overview_datatable_close', 'button_overview_datatable_fullscreen', title, 'card_overview_datatable', 'img_overview_datatable', tooltip), + add_cardheader_for_fullscreen_and_close(title, 'img_overview_datatable', tooltip), dbc.CardBody( [ dash_table.DataTable( @@ -386,7 +541,7 @@ def generate_datatable_with_dataframe(id, title=''): data = data, filter_action='native', row_deletable=True, - editable=True, + editable=False, page_size=14, fill_width=True, sort_action="native", @@ -417,9 +572,11 @@ def generate_datatable_with_dataframe(id, title=''): ) return layout -def generate_correlation_heatmap(id_dropdown, id_figure, title=''): +def generate_correlation_heatmap(id_dropdown, id_fs2, id_fs3, id_figure, title=''): if table_data.DF_RAW is not None: df = table_data.DF_RAW + options_all = list(df.columns) + value_target = options_all[0] df_num = df.select_dtypes(include=NUMERICS) options_num =list(df_num.columns) value_num = options_num[:5] @@ -427,15 +584,19 @@ def generate_correlation_heatmap(id_dropdown, id_figure, title=''): min_first_num = df_num.iloc[:, 0].min() max_first_num = df_num.iloc[:, 0].max() else: + options_all = [] options_num = [] value_num = None + value_target = None min_first_num = 0 max_first_num = 1 + + tooltip = "Plot the correlations between the different features. If your data contains a class feature you can filter the data for a specific class to only show data points that belong to this class." layout = dbc.Col( dbc.Card( [ - add_cardheader_for_fullscreen_and_close('button_overview_heatmap_close', 'button_overview_heatmap_fullscreen', title, 'card_overview_heatmap'), + add_cardheader_for_fullscreen_and_close(title, 'img_overview_heatmap', tooltip), dbc.CardBody( [ html.Div([ @@ -448,7 +609,42 @@ def generate_correlation_heatmap(id_dropdown, id_figure, title=''): clearable=False ), ]), - dcc.Graph(id=id_figure,className='figure_overview') + html.Div( + [ + dbc.Col( + [ + html.Div("Target:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs2, + options=options_all, + value=value_target, + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width = 6 + ), + dbc.Col( + [ + html.Div("Class:", className='text_overview_index', style={'display': 'inline-block', 'vertical-align': 'middle'}), + dcc.Dropdown( + id = id_fs3, + options=['ALL'] + options_all, + value='ALL', + clearable=False, + multi=False, + className='dropdown_overview_target', + style={'display': 'inline-block', 'vertical-align': 'middle'} + ), + ], + width = 6 + ), + ], + className='div_overview_single_feature' + ), + dcc.Graph(id=id_figure,className='figure_overview'), ], ) ], @@ -495,31 +691,13 @@ def generate_heatmap_figure_with_dataframe(df): ############# util -def add_cardheader_for_fullscreen_and_close(id_close, id_fullscreen, title, id_target=None, id_tooltip="", tooltip=""): +def add_cardheader_for_fullscreen_and_close(title, id_target="", tooltip=""): if tooltip == "": layout = dbc.CardHeader( [ dbc.Row( [ dbc.Col(html.H5(title, className='h5'), width=11), - dbc.Col( - [ - dbc.Button( - html.Img(src="/assets/img/fullscreen.png", className='btn_img'), - id=id_fullscreen, - color="#FDFCFC", - style={'display': 'block'} - ), - dbc.Button( - html.Img(src="/assets/img/close.png", className='btn_img'), - id=id_close, - color="#FDFCFC", - style={'display': 'block'} - ) - ], - width=1, - style={'display': 'flex', 'flex-direction': 'row', 'align-items': 'center'} - ), ], className='card_header_fullscreen_and_close' ) @@ -528,78 +706,53 @@ def add_cardheader_for_fullscreen_and_close(id_close, id_fullscreen, title, id_t else: layout = dbc.CardHeader( [ - dbc.Row( - [ - dbc.Col(html.H5(title, className='h5'), width=11), - dbc.Col( - [ - html.Img(id=id_tooltip, src="/assets/img/tooltip.png", className='tooltip_img'), - dbc.Tooltip( - tooltip, - target='img_overview_datatable', - ), - dbc.Button( - html.Img(src="/assets/img/fullscreen.png", className='btn_img'), - id=id_fullscreen, - color="#FDFCFC", - style={'display': 'block'} - ), - dbc.Button( - html.Img(src="/assets/img/close.png", className='btn_img'), - id=id_close, - color="#FDFCFC", - style={'display': 'block'} - ) - ], - width=1, - style={'display': 'flex', 'flex-direction': 'row', 'align-items': 'center'} - ), - ], - className='card_header_fullscreen_and_close' - ) + html.Img(id=id_target, src="/assets/img/tooltip.png", className='tooltip_img'), + dbc.Tooltip(tooltip, target=id_target), + html.H5(title, className='h5'), ], ) if id_target: + pass # regist callback - @app.callback( - Output(id_target, "style"), - Output(id_close, "style"), - Input(id_close, 'n_clicks'), - Input(id_fullscreen, 'n_clicks'), - State(id_target, "style"), - State(id_close, "style") - - ) - def fullscreen_target(n_clicks1, n_clicks2, style, style_close): - triggered_id = ctx.triggered_id - if triggered_id == id_close: - if n_clicks1 and n_clicks1%2 == 1: - style['display'] = 'none' - else: - style['display'] = 'block' - return style - else: - style_card_full = { - 'height': '90vh', - 'width': 'auto', - 'zIndex': 998, - 'position': 'fixed', 'top': '55px', - 'bottom': 0, 'left': 0, 'right': 0, - 'background-color': 'white' - } - style_card_small = { - - } - style_component = { - 'height': '89vh', 'maxHeight': '89vh', - } - if n_clicks2 and n_clicks2%2 == 1: - style_close['display'] = 'none' - return style_card_full, style_close - else: - style_close['display'] = 'block' - return style_card_small, style_close + #@app.callback( + # Output(id_target, "style"), + # Output(id_close, "style"), + # Input(id_close, 'n_clicks'), + # Input(id_fullscreen, 'n_clicks'), + # State(id_target, "style"), + # State(id_close, "style") + + #) + #def fullscreen_target(n_clicks1, n_clicks2, style, style_close): + # triggered_id = ctx.triggered_id + # if triggered_id == id_close: + # if n_clicks1 and n_clicks1%2 == 1: + # style['display'] = 'none' + # else: + # style['display'] = 'block' + # return style + # else: + # style_card_full = { + # 'height': '90vh', + # 'width': 'auto', + # 'zIndex': 998, + # 'position': 'fixed', 'top': '55px', + # 'bottom': 0, 'left': 0, 'right': 0, + # 'background-color': 'white' + # } + # style_card_small = { + + # } + # style_component = { + # 'height': '89vh', 'maxHeight': '89vh', + # } + # if n_clicks2 and n_clicks2%2 == 1: + # style_close['display'] = 'none' + # return style_card_full, style_close + # else: + # style_close['display'] = 'block' + # return style_card_small, style_close return layout diff --git a/view/page_supervised_classification.py b/view/page_supervised_classification.py index 098d4a5..54a3ac7 100644 --- a/view/page_supervised_classification.py +++ b/view/page_supervised_classification.py @@ -2,6 +2,7 @@ from dash import dcc from dash import Input, Output, html, State, no_update, ctx import dash +from dash import dash_table # import app from view.app import app @@ -71,7 +72,14 @@ def create_data_supervised_panel(): ), dbc.CardBody( [ - create_prediction_plot(), + dbc.Row( + [ + create_prediction_plot(), + create_feature_importance_table(), + ], + align = "start", + justify = 'center', + ), ], ), ], @@ -138,7 +146,52 @@ def create_prediction_plot(): ) ), - width=12 + width=8 + ) + + + return layout + +def create_feature_importance_table(): + + layout = dbc.Col( + dbc.Card( + dbc.CardBody( + [ + dcc.Loading( + id="loading_classification_feature_importance", + type="default", + children=[ + dash_table.DataTable( + id='datatable_classification_feature_importance', + columns= [{"name": "Feature", "id": "Feature"}, {"name": "Importance", "id": "Importance"}], + filter_action='native', + page_size=14, + fill_width=True, + sort_action="native", + style_header={ + 'backgroundColor': 'rgb(30, 30, 30)', + 'color': 'white', + 'fontWeight': 'bold', + 'fontSize' : "13pt" + }, + fixed_rows={'headers': True}, + style_cell={'textAlign': 'left', 'color': 'black'}, + style_data={ + 'whiteSpace': 'normal', + 'height': 'auto', + 'fontSize' : "13pt", + 'minWidth': 50 + }, + ), + ] + ) + + ] + ) + ), + + width=4 ) @@ -202,11 +255,13 @@ def create_container_for_parameter(): id='slider_classification_train_test_split', ), - html.Img(id='img_classification_time_series_crossvalidation', src="/assets/img/tooltip.png", className='tooltip_img'), - dbc.Tooltip( - "If checked it keeps the order of the datapoints, otherwise it shuffles the dataset.", - target='img_classification_time_series_crossvalidation', + dcc.Link( + html.Img(src='/assets/img/link.png', id='img_classification_time_series_cv', className='tooltip_img'), + id='link_classification_time_series_cv', + href=TS_CROSS_VALIDATION_LINKS[0], + target='_blank', ), + dbc.Tooltip(TS_CROSS_VALIDATION_DESCRIPTION[0], target='img_classification_time_series_cv', id='tooltip_classification_time_series_cv'), dcc.Checklist( id='checklist_classification_time_series_crossvalidation', @@ -238,7 +293,16 @@ def create_container_for_parameter(): ), dbc.Card([ - dbc.CardHeader("Model:", className='card_subheader'), + dbc.CardHeader([ + "Model:", + dcc.Link( + html.Img(src='/assets/img/link.png', id='img_classification_strategy', className='tooltip_img'), + id='link_classification_strategy', + href=CLASSIFIER_LINKS[0], + target='_blank', + ), + dbc.Tooltip(CLASSIFIER_DESCRIPTION[0], target='img_classification_strategy', id='tooltip_classification_strategy'), + ], className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id='dropdown_classification_model', @@ -363,6 +427,18 @@ def add_container_for_look_back(id_container, id_slider): min=1, max=20, step=1, + marks={ + 1:"1", + 3:"3", + 5:"5", + 7:"7", + 9:"9", + 11:"11", + 13:"13", + 15:"15", + 17:"17", + 19:"19", + }, value=5, tooltip={"placement": "bottom", "always_visible": False} ), @@ -385,7 +461,7 @@ def create_container_for_knn(): add_container_for_algorithm('container_classification_knn_algorithm', 'dropdown_classification_knn_algorithm'), add_container_for_weights('container_classification_knn_weights', 'dropdown_classification_knn_weights'), ], - style={'display': 'block'}, + style={'display': 'none'}, id='container_classification_knn' ) @@ -502,17 +578,14 @@ def add_container_for_n_estimators(id_container, id_slider): dbc.CardBody([ dcc.Slider( id=id_slider, - min=50, - max=300, step=10, value=50, marks={ + 10:"10", 50:"50", 100:"100", 150:"150", 200:"200", - 250:"250", - 300:"300", }, tooltip={"placement": "bottom", "always_visible": False} ), diff --git a/view/page_supervised_regression.py b/view/page_supervised_regression.py index db4095a..6640b06 100644 --- a/view/page_supervised_regression.py +++ b/view/page_supervised_regression.py @@ -2,6 +2,7 @@ from dash import dcc from dash import Input, Output, html, State, no_update, ctx import dash +from dash import dash_table # import app from view.app import app @@ -69,7 +70,14 @@ def create_data_supervised_panel(): ), dbc.CardBody( [ - create_prediction_plot(), + dbc.Row( + [ + create_prediction_plot(), + create_feature_importance_table(), + ], + align = "start", + justify = 'center', + ), ], ), ], @@ -136,7 +144,52 @@ def create_prediction_plot(): ) ), - width=12 + width=8 + ) + + + return layout + +def create_feature_importance_table(): + + layout = dbc.Col( + dbc.Card( + dbc.CardBody( + [ + dcc.Loading( + id="loading_regression_feature_importance", + type="default", + children=[ + dash_table.DataTable( + id='datatable_regression_feature_importance', + columns= [{"name": "Feature", "id": "Feature"}, {"name": "Importance", "id": "Importance"}], + filter_action='native', + page_size=14, + fill_width=True, + sort_action="native", + style_header={ + 'backgroundColor': 'rgb(30, 30, 30)', + 'color': 'white', + 'fontWeight': 'bold', + 'fontSize' : "13pt" + }, + fixed_rows={'headers': True}, + style_cell={'textAlign': 'left', 'color': 'black'}, + style_data={ + 'whiteSpace': 'normal', + 'height': 'auto', + 'fontSize' : "13pt", + 'minWidth': 50 + }, + ), + ] + ) + + ] + ) + ), + + width=4 ) @@ -200,11 +253,13 @@ def create_container_for_parameter(): id='slider_regression_train_test_split', ), - html.Img(id='img_regression_time_series_crossvalidation', src="/assets/img/tooltip.png", className='tooltip_img'), - dbc.Tooltip( - "If checked it keeps the order of the datapoints, otherwise it shuffles the dataset.", - target='img_regression_time_series_crossvalidation', + dcc.Link( + html.Img(src='/assets/img/link.png', id='img_regression_time_series_cv', className='tooltip_img'), + id='link_regression_time_series_cv', + href=TS_CROSS_VALIDATION_LINKS[0], + target='_blank', ), + dbc.Tooltip(TS_CROSS_VALIDATION_DESCRIPTION[0], target='img_regression_time_series_cv', id='tooltip_regression_time_series_cv'), dcc.Checklist( id='checklist_regression_time_series_crossvalidation', @@ -238,7 +293,16 @@ def create_container_for_parameter(): ), dbc.Card([ - dbc.CardHeader("Model:", className='card_subheader'), + dbc.CardHeader([ + "Model:", + dcc.Link( + html.Img(src='/assets/img/link.png', id='img_regression_strategy', className='tooltip_img'), + id='link_regression_strategy', + href=REGRESSOR_LINKS[0], + target='_blank', + ), + dbc.Tooltip(REGRESSOR_DESCRIPTION[0], target='img_regression_strategy', id='tooltip_regression_strategy'), + ], className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id='dropdown_regression_model', @@ -402,6 +466,18 @@ def add_container_for_look_back(id_container, id_slider): max=20, step=1, value=5, + marks={ + 1:"1", + 3:"3", + 5:"5", + 7:"7", + 9:"9", + 11:"11", + 13:"13", + 15:"15", + 17:"17", + 19:"19", + }, tooltip={"placement": "bottom", "always_visible": False} ), ], @@ -449,17 +525,14 @@ def add_container_for_n_estimators(id_container, id_slider): dbc.CardBody([ dcc.Slider( id=id_slider, - min=50, - max=300, step=10, value=50, marks={ + 10:"10", 50:"50", 100:"100", 150:"150", 200:"200", - 250:"250", - 300:"300", }, tooltip={"placement": "bottom", "always_visible": False} ), diff --git a/view/page_transformation_time_series.py b/view/page_transformation_time_series.py index aad6158..5b1a991 100644 --- a/view/page_transformation_time_series.py +++ b/view/page_transformation_time_series.py @@ -357,7 +357,16 @@ def create_container_for_parameter(): ), dbc.Card([ - dbc.CardHeader("Transformation:", className='card_subheader'), + dbc.CardHeader([ + "Transformation:", + dcc.Link( + html.Img(src='/assets/img/tooltip.png', id='img_time_series_strategy', className='tooltip_img'), + id='link_time_series_strategy', + href='', + target='_blank', + ), + dbc.Tooltip(TRANSFORMATIONS_DESCRIPTIONS[0], target='img_time_series_strategy', id='tooltip_time_series_strategy'), + ], className='card_subheader'), dbc.CardBody([ dcc.Dropdown( id='dropdown_transformation_time_series_feature_transformation', @@ -458,7 +467,7 @@ def create_container_for_pca_parameter(): add_container_for_n_components('container_transformation_time_series_pca_n_components', 'slider_transformation_time_series_pca_n_components'), add_container_for_feature_name('container_transformation_time_series_pca_feature_name', 'input_transformation_time_series_pca_feature_name') ], - style={'display': 'block'}, + style={'display': 'none'}, id='container_transformation_time_series_pca' ) @@ -501,7 +510,6 @@ def add_container_for_feature_name(id_container, id_input): ), ], className='card_subcontainer', - style={'display': 'block'}, id=id_container, ) @@ -517,7 +525,7 @@ def create_container_for_dwt_parameter(): add_container_for_vanishing_moments('container_transformation_time_series_dwt_vanishing_moments', 'slider_transformation_time_series_dwt_vanishing_moments'), add_container_for_feature_name('container_transformation_time_series_dwt_feature_name', 'input_transformation_time_series_dwt_feature_name') ], - style={'display': 'block'}, + style={'display': 'none'}, id='container_transformation_time_series_dwt' ) @@ -648,6 +656,18 @@ def add_container_for_steps(id_container, id_slider): max=20, step=1, value=1, + marks={ + 1:"1", + 3:"3", + 5:"5", + 7:"7", + 9:"9", + 11:"11", + 13:"13", + 15:"15", + 17:"17", + 19:"19", + }, tooltip={"placement": "top", "always_visible": False}, ), ], @@ -726,6 +746,18 @@ def add_container_for_periods(id_container, id_slider): max=20, step=1, value=2, + marks={ + 1:"1", + 3:"3", + 5:"5", + 7:"7", + 9:"9", + 11:"11", + 13:"13", + 15:"15", + 17:"17", + 19:"19", + }, tooltip={"placement": "top", "always_visible": False}, ), ], @@ -766,6 +798,19 @@ def add_container_for_diff_periods(id_container, id_slider): max=10, step=1, value=1, + marks={ + -10:"-10", + -8:"-8", + -6:"-6", + -4:"-4", + -2:"-2", + 0:"0", + 2:"2", + 4:"4", + 6:"6", + 8:"8", + 10:"10", + }, tooltip={"placement": "top", "always_visible": False}, ), ], @@ -802,6 +847,18 @@ def add_container_for_polyorder(id_container, id_slider): max=20, step=1, value=1, + marks={ + 1:"1", + 3:"3", + 5:"5", + 7:"7", + 9:"9", + 11:"11", + 13:"13", + 15:"15", + 17:"17", + 19:"19", + }, tooltip={"placement": "top", "always_visible": False}, ), ], @@ -859,3 +916,4 @@ def create_container_for_alerts(): style = {"display": "none"} ) + diff --git a/view/sidebar.py b/view/sidebar.py index 4b3a9f7..6224fe5 100644 --- a/view/sidebar.py +++ b/view/sidebar.py @@ -4,50 +4,37 @@ from view.app import app -# the style arguments for the sidebar. We use position:fixed and a fixed width -SIDEBAR_STYLE = { - "position": "fixed", - "top": 0, - "left": 0, - "bottom": 0, - "width": "33rem", - "padding": "2rem 1rem", - "background-color": "#f8f9fa", - "overflow": "scroll" -} - # ---------------------------------- SUBMENU 1 --------------------------------------- +BUTTON_STYLE = { + "text-transform": "none", + 'margin-top': "0.5rem", + 'background-color': 'royalblue', + "font-family": "Times New Roman", + "font-size": "16pt"} + +BUTTON_STYLE2 = { + "text-transform": "none", + 'margin-top': "0.5rem", + 'background-color': 'navy', + "font-family": "Times New Roman", + "font-size": "16pt"} button_dl = dbc.Button( "Data Loading", - outline=False, - #active=True, - color="primary", + id="button_loading", href="/page-1/0", - #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE2, ) button_do = dbc.Button( "Data Overview", id="button_overview", - outline=False, - #active=True, color="primary", href="/page-1/1", disabled=True, - #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) @@ -60,15 +47,11 @@ href="/page-1/2", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) button_na = dbc.Button( - "NA Values", + "Missing Values", id="button_na_values", outline=False, #active=True, @@ -76,11 +59,7 @@ href="/page-1/3", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) button_od = dbc.Button( @@ -92,11 +71,7 @@ href="/page-1/4", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) submenu_1 = [ @@ -156,11 +131,7 @@ href="/page-2/1", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) @@ -187,11 +158,7 @@ href="/page-2/5", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) @@ -203,11 +170,7 @@ href="/page-2/3", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) button_id = dbc.Button( @@ -218,11 +181,7 @@ href="/page-2/4", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) button_ts = dbc.Button( @@ -234,11 +193,7 @@ href="/page-2/2", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) submenu_2 = [ @@ -301,11 +256,7 @@ href="/page-3/1", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) button_sr = dbc.Button( @@ -317,11 +268,7 @@ href="/page-3/2", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) button_usl = dbc.Button( @@ -333,11 +280,7 @@ href="/page-3/3", disabled=True, #id="gh-link", - style={ - "text-transform": "none", - 'margin-top': "0.5rem", - "font-family": - "Times New Roman","font-size": "16pt"}, + style=BUTTON_STYLE, ) submenu_3 = [ @@ -403,34 +346,75 @@ +#def sidebar(): +# layout = html.Div( +# [ +# #html.H2("WEFA Inotec", className="display-7"), +# html.A( +# href = "https://wefa.com/", +# children=[ +# html.Img(src=app.get_asset_url('./img/logo.png'), style={"width": "30rem"}), +# ] +# ), +# +# html.Hr(style={'borderWidth': "0.3vh", "width": "100%", "borderColor": "black", "borderStyle":"solid"}), +# dbc.Nav( +# submenu_1 + submenu_2 + submenu_3, +# vertical=True, +# pills=False, + #style = {"padding": "50px 50px 50px 50px"} +# ), + #html.Div(submenu_1 + submenu_2 + submenu_3), + #html.Div(submenu_2), + #html.Div(submenu_3), +# html.Div( +# dbc.Row( +# [ +# dbc.Col(html.Img(src=app.get_asset_url('./img/kit.png'),style={"width": "10rem", "display": "flex", +# "justify-content": "center"})), +# dbc.Col(button_report_bugs,style={"display": "flex", +# "justify-content": "center"}), +# ] +# ), +# style={"margin":"2rem"}, +# ), +# ], +# style=SIDEBAR_STYLE, +# id="sidebar", +# ) +# return layout + +SIDEBAR_STYLE = { + "position": "fixed", + "top": 0, + "left": 0, + "bottom": 0, + "width": "33rem", + "padding": "2rem 1rem", + "background-color": "#f8f9fa", + "overflow": "scroll" +} + def sidebar(): layout = html.Div( [ - #html.H2("WEFA Inotec", className="display-7"), html.A( - href = "https://wefa.com/", + href = "https://www.sdsc-bw.de/", children=[ - html.Img(src=app.get_asset_url('./img/logo.png'), style={"width": "30rem"}), + html.Img(src=app.get_asset_url('./img/logo.png'), className="siderbar_logo"), ] ), - - html.Hr(style={'borderWidth': "0.3vh", "width": "100%", "borderColor": "black", "borderStyle":"solid"}), + html.Hr(className="sidebar_hr"), dbc.Nav( submenu_1 + submenu_2 + submenu_3, vertical=True, pills=False, - #style = {"padding": "50px 50px 50px 50px"} - ), - #html.Div(submenu_1 + submenu_2 + submenu_3), - #html.Div(submenu_2), - #html.Div(submenu_3), + ), html.Div( dbc.Row( [ - dbc.Col(html.Img(src=app.get_asset_url('./img/kit.png'),style={"width": "10rem", "display": "flex", - "justify-content": "center"})), - dbc.Col(button_report_bugs,style={"display": "flex", - "justify-content": "center"}), + dbc.Col(html.Img(src=app.get_asset_url('./img/kit.png'), className="sidebar_kit")), + dbc.Col(button_report_bugs, className="sidebar_report_bugs"), ] ), style={"margin":"2rem"}, @@ -439,4 +423,9 @@ def sidebar(): style=SIDEBAR_STYLE, id="sidebar", ) + return layout + + + + \ No newline at end of file