diff --git a/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py b/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py index dd6e1433ff..157dbb2056 100644 --- a/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py +++ b/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py @@ -729,7 +729,7 @@ def node_to_dict(df, tree, nodeid, categories, json, metric_name = metric_to_display_name[metric] is_error_metric = metric in error_metrics if SPLIT_FEATURE in tree: - node_name = feature_names[tree[SPLIT_FEATURE]] + node_name = str(feature_names[tree[SPLIT_FEATURE]]) else: node_name = None json.append(get_json_node(arg, condition, error, nodeid, method, diff --git a/erroranalysis/erroranalysis/report/error_report.py b/erroranalysis/erroranalysis/report/error_report.py index aa7570b194..7dc14ed14a 100644 --- a/erroranalysis/erroranalysis/report/error_report.py +++ b/erroranalysis/erroranalysis/report/error_report.py @@ -4,6 +4,8 @@ import json import uuid +from raiutils.data_processing import serialize_json_safe + _ErrorReportVersion1 = '1.0' _ErrorReportVersion2 = '2.0' _ErrorReportVersion3 = '3.0' @@ -35,10 +37,7 @@ def json_converter(obj): if isinstance(obj, ErrorReport): rdict = obj.__dict__ return rdict - try: - return obj.to_json() - except AttributeError: - return obj.__dict__ + return serialize_json_safe(obj) def as_error_report(error_dict): diff --git a/erroranalysis/erroranalysis/version.py b/erroranalysis/erroranalysis/version.py index 356ccd1018..808a6b661f 100644 --- a/erroranalysis/erroranalysis/version.py +++ b/erroranalysis/erroranalysis/version.py @@ -4,5 +4,5 @@ name = 'erroranalysis' _major = '0' _minor = '3' -_patch = '3' +_patch = '4' version = '{}.{}.{}'.format(_major, _minor, _patch) diff --git a/erroranalysis/requirements.txt b/erroranalysis/requirements.txt index 21e02c0977..831152f71c 100644 --- a/erroranalysis/requirements.txt +++ b/erroranalysis/requirements.txt @@ -3,3 +3,4 @@ pandas>=0.25.1 scipy>=1.4.1 scikit-learn>=0.22.1 lightgbm>=2.0.11 +raiutils>=0.1.0 \ No newline at end of file diff --git a/erroranalysis/tests/test_error_report.py b/erroranalysis/tests/test_error_report.py index 412d18a7e9..cd41f7474d 100644 --- a/erroranalysis/tests/test_error_report.py +++ b/erroranalysis/tests/test_error_report.py @@ -30,6 +30,18 @@ def test_error_report_iris(self, alter_feature_names): categorical_features, expect_user_warnings=alter_feature_names) + def test_error_report_iris_numpy_int64_features(self): + X_train, X_test, y_train, y_test, _, _ = create_iris_data() + # Test with numpy feature indexes instead of string feature names + feature_names = range(0, X_train.shape[1]) + feature_names = [np.int64(i) for i in feature_names] + models = create_models_classification(X_train, y_train) + + for model in models: + categorical_features = [] + run_error_analyzer(model, X_test, y_test, feature_names, + categorical_features) + def test_error_report_cancer(self): X_train, X_test, y_train, y_test, feature_names, _ = \ create_cancer_data() diff --git a/raiwidgets/raiwidgets/error_analysis_dashboard_input.py b/raiwidgets/raiwidgets/error_analysis_dashboard_input.py index d6a27e27dc..745d4df90d 100644 --- a/raiwidgets/raiwidgets/error_analysis_dashboard_input.py +++ b/raiwidgets/raiwidgets/error_analysis_dashboard_input.py @@ -11,9 +11,8 @@ from erroranalysis._internal.error_analyzer import (ModelAnalyzer, PredictionsAnalyzer) from erroranalysis._internal.utils import is_spark -from raiutils.data_processing import serialize_json_safe +from raiutils.data_processing import convert_to_list, serialize_json_safe from raiutils.models import is_classifier -from responsibleai._input_processing import _convert_to_list from responsibleai._interfaces import ErrorAnalysisData from .constants import ModelTask @@ -183,14 +182,14 @@ def setup_pyspark(self, model, dataset, true_y, classes, predicted_y = self.predicted_y_to_list(predicted_y) true_y = pd_sample[true_y] pd_sample = pd_sample[features] - list_dataset = _convert_to_list(pd_sample) + list_dataset = convert_to_list(pd_sample) self.setup_visualization_input(classes, predicted_y, list_dataset, true_y, features) def setup_visualization_input(self, classes, predicted_y, list_dataset, true_y, features): if classes is not None: - classes = _convert_to_list(classes) + classes = convert_to_list(classes) self.dashboard_input[ ExplanationDashboardInterface.CLASS_NAMES ] = classes @@ -222,7 +221,7 @@ def setup_visualization_input(self, classes, predicted_y, ] = serialize_json_safe(list_dataset) if true_y is not None and len(true_y) == row_length: - list_true_y = _convert_to_list(true_y) + list_true_y = convert_to_list(true_y) # If classes specified, convert true_y to numeric representation if classes is not None and list_true_y[0] in class_to_index: for i in range(len(list_true_y)): @@ -232,7 +231,7 @@ def setup_visualization_input(self, classes, predicted_y, ] = list_true_y if features is not None: - features = _convert_to_list(features) + features = convert_to_list(features) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" @@ -277,7 +276,7 @@ def setup_local(self, explanation, model, dataset, true_y, classes, self._dataframeColumns = dataset.columns self._dfdtypes = dataset.dtypes try: - list_dataset = _convert_to_list(dataset) + list_dataset = convert_to_list(dataset) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( @@ -306,7 +305,7 @@ def setup_local(self, explanation, model, dataset, true_y, classes, " for given dataset type," " inner error: {}".format(ex_str)) try: - probability_y = _convert_to_list(probability_y) + probability_y = convert_to_list(probability_y) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( @@ -407,7 +406,7 @@ def compute_predicted_y(self, model, dataset): def predicted_y_to_list(self, predicted_y): try: - predicted_y = _convert_to_list(predicted_y) + predicted_y = convert_to_list(predicted_y) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( @@ -447,13 +446,13 @@ def input_explanation_data(self, list_dataset, classes): if local_explanation is not None: try: - local_explanation["scores"] = _convert_to_list( + local_explanation["scores"] = convert_to_list( local_explanation["scores"]) if np.shape(local_explanation["scores"])[-1] > 1000: raise ValueError("Exceeds maximum number of features for " "visualization (1000). Please regenerate" " the explanation using fewer features.") - local_explanation["intercept"] = _convert_to_list( + local_explanation["intercept"] = convert_to_list( local_explanation["intercept"]) # We can ignore perf explanation data. # Note if it is added back at any point, @@ -490,10 +489,10 @@ def input_explanation_data(self, list_dataset, classes): "local explanations dimension") if local_explanation is None and global_explanation is not None: try: - global_explanation["scores"] = _convert_to_list( + global_explanation["scores"] = convert_to_list( global_explanation["scores"]) if 'intercept' in global_explanation: - global_explanation["intercept"] = _convert_to_list( + global_explanation["intercept"] = convert_to_list( global_explanation["intercept"]) self.dashboard_input[ ExplanationDashboardInterface.GLOBAL_EXPLANATION @@ -583,10 +582,10 @@ def on_predict(self, data): data = data.astype(dict(self._dfdtypes)) if (self._is_classifier): model_pred_proba = self._model.predict_proba(data) - prediction = _convert_to_list(model_pred_proba) + prediction = convert_to_list(model_pred_proba) else: model_predict = self._model.predict(data) - prediction = _convert_to_list(model_predict) + prediction = convert_to_list(model_predict) return { WidgetRequestResponseConstants.DATA: prediction } diff --git a/raiwidgets/raiwidgets/explanation_dashboard_input.py b/raiwidgets/raiwidgets/explanation_dashboard_input.py index ddf4586bcf..90c6e0ff28 100644 --- a/raiwidgets/raiwidgets/explanation_dashboard_input.py +++ b/raiwidgets/raiwidgets/explanation_dashboard_input.py @@ -6,9 +6,8 @@ import numpy as np import pandas as pd -from raiutils.data_processing import serialize_json_safe +from raiutils.data_processing import convert_to_list, serialize_json_safe from raiutils.models import is_classifier -from responsibleai._input_processing import _convert_to_list from .constants import ErrorMessages from .error_handling import _format_exception @@ -94,7 +93,7 @@ class and for the regression case a method of predict() self._dataframeColumns = dataset.columns self._dfdtypes = dataset.dtypes try: - list_dataset = _convert_to_list(dataset, EXP_VIZ_ERR_MSG) + list_dataset = convert_to_list(dataset, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( @@ -109,7 +108,7 @@ class and for the regression case a method of predict() ex_str) raise ValueError(msg) try: - predicted_y = _convert_to_list(predicted_y, EXP_VIZ_ERR_MSG) + predicted_y = convert_to_list(predicted_y, EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( @@ -147,13 +146,13 @@ class and for the regression case a method of predict() if true_y is not None and len(true_y) == row_length: self.dashboard_input[ ExplanationDashboardInterface.TRUE_Y - ] = _convert_to_list(true_y, EXP_VIZ_ERR_MSG) + ] = convert_to_list(true_y, EXP_VIZ_ERR_MSG) if local_explanation is not None: try: - local_explanation["scores"] = _convert_to_list( + local_explanation["scores"] = convert_to_list( local_explanation["scores"], EXP_VIZ_ERR_MSG) - local_explanation["intercept"] = _convert_to_list( + local_explanation["intercept"] = convert_to_list( local_explanation["intercept"], EXP_VIZ_ERR_MSG) # We can ignore perf explanation data. # Note if it is added back at any point, @@ -185,10 +184,10 @@ class and for the regression case a method of predict() " length differs from dataset") if local_explanation is None and global_explanation is not None: try: - global_explanation["scores"] = _convert_to_list( + global_explanation["scores"] = convert_to_list( global_explanation["scores"], EXP_VIZ_ERR_MSG) if 'intercept' in global_explanation: - global_explanation["intercept"] = _convert_to_list( + global_explanation["intercept"] = convert_to_list( global_explanation["intercept"], EXP_VIZ_ERR_MSG) self.dashboard_input[ ExplanationDashboardInterface.GLOBAL_EXPLANATION @@ -213,7 +212,7 @@ class and for the regression case a method of predict() and explanation.features is not None: features = explanation.features if features is not None: - features = _convert_to_list(features, EXP_VIZ_ERR_MSG) + features = convert_to_list(features, EXP_VIZ_ERR_MSG) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" @@ -227,7 +226,7 @@ class and for the regression case a method of predict() and explanation.classes is not None: classes = explanation.classes if classes is not None: - classes = _convert_to_list(classes, EXP_VIZ_ERR_MSG) + classes = convert_to_list(classes, EXP_VIZ_ERR_MSG) if local_dim is not None and len(classes) != local_dim[0]: raise ValueError("Class vector length mismatch:" "class names length differs from" @@ -244,8 +243,8 @@ class and for the regression case a method of predict() " for given dataset type," " inner error: {}".format(ex_str)) try: - probability_y = _convert_to_list(probability_y, - EXP_VIZ_ERR_MSG) + probability_y = convert_to_list(probability_y, + EXP_VIZ_ERR_MSG) except Exception as ex: ex_str = _format_exception(ex) raise ValueError( @@ -261,11 +260,11 @@ def on_predict(self, data): data = pd.DataFrame(data, columns=self._dataframeColumns) data = data.astype(dict(self._dfdtypes)) if (self._is_classifier): - prediction = _convert_to_list( + prediction = convert_to_list( self._model.predict_proba(data), EXP_VIZ_ERR_MSG) else: - prediction = _convert_to_list(self._model.predict(data), - EXP_VIZ_ERR_MSG) + prediction = convert_to_list(self._model.predict(data), + EXP_VIZ_ERR_MSG) return { WidgetRequestResponseConstants.DATA: prediction } diff --git a/raiwidgets/raiwidgets/fairness_dashboard.py b/raiwidgets/raiwidgets/fairness_dashboard.py index 5da16f3953..be8307944e 100644 --- a/raiwidgets/raiwidgets/fairness_dashboard.py +++ b/raiwidgets/raiwidgets/fairness_dashboard.py @@ -6,8 +6,8 @@ import numpy as np from flask import jsonify, request -from responsibleai._input_processing import (_convert_to_list, - _convert_to_string_list_dict) +from raiutils.data_processing import (convert_to_list, + convert_to_string_list_dict) from .dashboard import Dashboard from .fairness_metric_calculation import FairnessMetricModule @@ -57,15 +57,15 @@ def __init__( if sensitive_features is None or y_true is None or y_pred is None: raise ValueError("Required parameters not provided") - model_dict = _convert_to_string_list_dict("Model {0}", - y_pred, - y_true) - sf_dict = _convert_to_string_list_dict("Sensitive Feature {0}", - sensitive_features, - y_true) + model_dict = convert_to_string_list_dict("Model {0}", + y_pred, + y_true) + sf_dict = convert_to_string_list_dict("Sensitive Feature {0}", + sensitive_features, + y_true) # Make sure that things are as the TS layer expects - self._y_true = _convert_to_list(y_true) + self._y_true = convert_to_list(y_true) self._y_pred = list(model_dict.values()) # Note transpose in the following dataset = (np.array(list(sf_dict.values())).T).tolist() diff --git a/raiwidgets/raiwidgets/responsibleai_dashboard_input.py b/raiwidgets/raiwidgets/responsibleai_dashboard_input.py index 799ca41a1c..ea51797b65 100644 --- a/raiwidgets/raiwidgets/responsibleai_dashboard_input.py +++ b/raiwidgets/raiwidgets/responsibleai_dashboard_input.py @@ -8,9 +8,9 @@ import pandas as pd from erroranalysis._internal.constants import ModelTask, display_name_to_metric +from raiutils.data_processing import convert_to_list from raiutils.models import is_classifier from responsibleai import RAIInsights -from responsibleai._input_processing import _convert_to_list from responsibleai._internal.constants import ManagerNames from responsibleai.exceptions import UserConfigValidationException @@ -101,10 +101,10 @@ def on_predict(self, data): data = pd.DataFrame( data, columns=self.dashboard_input.dataset.feature_names) if (self._is_classifier): - prediction = _convert_to_list( + prediction = convert_to_list( self._analysis.model.predict_proba(data), EXP_VIZ_ERR_MSG) else: - prediction = _convert_to_list( + prediction = convert_to_list( self._analysis.model.predict(data), EXP_VIZ_ERR_MSG) return { WidgetRequestResponseConstants.data: prediction diff --git a/raiwidgets/requirements.txt b/raiwidgets/requirements.txt index d43297c3c8..1b15419576 100644 --- a/raiwidgets/requirements.txt +++ b/raiwidgets/requirements.txt @@ -5,6 +5,6 @@ rai-core-flask==0.3.0 itsdangerous==2.0.1 scikit-learn>=0.22.1 lightgbm>=2.0.11 -erroranalysis>=0.3.2 +erroranalysis>=0.3.3 fairlearn>=0.7.0 raiutils>=0.1.0 diff --git a/responsibleai/requirements.txt b/responsibleai/requirements.txt index 2b7c5a42e6..83d18b8c77 100644 --- a/responsibleai/requirements.txt +++ b/responsibleai/requirements.txt @@ -1,7 +1,7 @@ dice-ml>=0.8,<0.9 econml~=0.13.0 jsonschema -erroranalysis>=0.3.2 +erroranalysis>=0.3.3 interpret-community>=0.26.0 lightgbm>=2.0.11 numpy>=1.17.2 diff --git a/responsibleai/responsibleai/_input_processing.py b/responsibleai/responsibleai/_input_processing.py deleted file mode 100644 index 9209a5e1ac..0000000000 --- a/responsibleai/responsibleai/_input_processing.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) Microsoft Corporation -# Licensed under the MIT License. - -from typing import Dict, List - -import numpy as np -import pandas as pd -from scipy.sparse import issparse -from sklearn.utils import check_consistent_length - -_DF_COLUMN_BAD_NAME = "DataFrame column names must be strings."\ - " Name '{0}' is of type {1}" -_LIST_NONSCALAR = "Lists must be of scalar types" -_TOO_MANY_DIMS = "Array must have at most two dimensions" - - -def _convert_to_list(array, custom_err_msg=None): - if issparse(array): - if array.shape[1] > 1000: - if custom_err_msg is None: - raise ValueError("Exceeds maximum number of features for " - "visualization (1000)") - else: - raise ValueError(custom_err_msg) - return array.toarray().tolist() - if isinstance(array, pd.DataFrame): - return array.values.tolist() - if isinstance(array, pd.Series): - return array.values.tolist() - if isinstance(array, np.ndarray): - return array.tolist() - if isinstance(array, pd.Index): - return array.tolist() - return array - - -def _convert_to_string_list_dict( - base_name_format: str, - ys, - sample_array) -> Dict[str, List]: - """Convert the given input to a string-list dictionary. - - This function is used to convert arrays in a variety of types - into a dictionary mapping column names to regular Python lists - (in preparation for JSON serialisation). It is a modification - of the feature processing code in :class:`fairlearn.metrics.MetricFrame`. - - The array to be converted is passed in :code:`ys`, and a variety - of types are supported. The :code:`sample_array` argument is - used in a call to :func:`sklearn.utils.check_consistent_length` - to ensure that the resultant lists are of the right length. - Finally `base_name_format` is used to generate sequential - keys for the dictionary if none are in the supplied :code:`ys`. - It must be of the form :code:`'Base String {0}'`, with the - :code:`{0}` being replaced by a sequential integer. - - It is not possible to list out all the possible underlying types - for :code:`ys`. A brief summary: - - :class:`pd.Series` - - :class:`pd.DataFrame` - - A simple Python list - - A Python dictionary with string keys and values which are - convertible to lists - - Anything convertible to a :class:`np.ndarray` - """ - result = {} - - if isinstance(ys, pd.Series): - check_consistent_length(ys, sample_array) - if ys.name is not None: - result[ys.name] = _convert_to_list(ys) - else: - result[base_name_format.format(0)] = _convert_to_list(ys) - elif isinstance(ys, pd.DataFrame): - for i in range(len(ys.columns)): - col_name = ys.columns[i] - if not isinstance(col_name, str): - msg = _DF_COLUMN_BAD_NAME.format(col_name, type(col_name)) - raise ValueError(msg) - column = ys.iloc[:, i] - check_consistent_length(column, sample_array) - result[col_name] = _convert_to_list(column) - elif isinstance(ys, list): - if np.isscalar(ys[0]): - f_arr = np.atleast_1d(np.squeeze(np.asarray(ys))) - assert len(f_arr.shape) == 1 # Sanity check - check_consistent_length(f_arr, sample_array) - result[base_name_format.format(0)] = _convert_to_list(f_arr) - else: - raise ValueError(_LIST_NONSCALAR) - elif isinstance(ys, dict): - for k, v in ys.items(): - result[k] = _convert_to_list(v) - else: - # Assume it's something which can go into np.as_array - f_arr = np.squeeze(np.asarray(ys, dtype=object)) - if len(f_arr.shape) == 1: - check_consistent_length(f_arr, sample_array) - result[base_name_format.format(0)] = _convert_to_list(f_arr) - elif len(f_arr.shape) == 2: - # Work similarly to pd.DataFrame(data=ndarray) - for i in range(f_arr.shape[1]): - col = f_arr[:, i] - check_consistent_length(col, sample_array) - result[base_name_format.format(i)] = _convert_to_list(col) - else: - raise ValueError(_TOO_MANY_DIMS) - - return result diff --git a/responsibleai/responsibleai/managers/explainer_manager.py b/responsibleai/responsibleai/managers/explainer_manager.py index 751af85643..2524cc3bc0 100644 --- a/responsibleai/responsibleai/managers/explainer_manager.py +++ b/responsibleai/responsibleai/managers/explainer_manager.py @@ -20,7 +20,7 @@ LinearExplainableModel from scipy.sparse import issparse -from responsibleai._input_processing import _convert_to_list +from raiutils.data_processing import convert_to_list from responsibleai._interfaces import (EBMGlobalExplanation, FeatureImportance, ModelExplanationData, PrecomputedExplanations) @@ -211,13 +211,13 @@ def _get_interpret(self, explanation): if local_explanation is not None: try: local_feature_importance = FeatureImportance() - local_feature_importance.scores = _convert_to_list( + local_feature_importance.scores = convert_to_list( local_explanation["scores"]) if np.shape(local_feature_importance.scores)[-1] > 1000: raise ValueError("Exceeds maximum number of features for " "visualization (1000). Please regenerate" " the explanation using fewer features.") - local_feature_importance.intercept = _convert_to_list( + local_feature_importance.intercept = convert_to_list( local_explanation["intercept"]) # We can ignore perf explanation data. # Note if it is added back at any point, @@ -252,11 +252,11 @@ def _get_interpret(self, explanation): if global_explanation is not None: try: global_feature_importance = FeatureImportance() - global_feature_importance.scores = _convert_to_list( + global_feature_importance.scores = convert_to_list( global_explanation["scores"]) if 'intercept' in global_explanation: global_feature_importance.intercept\ - = _convert_to_list( + = convert_to_list( global_explanation["intercept"]) interpretation.precomputedExplanations.globalFeatureImportance\ = global_explanation diff --git a/responsibleai/responsibleai/rai_insights/rai_insights.py b/responsibleai/responsibleai/rai_insights/rai_insights.py index 4752f7f229..30f6d6cafa 100644 --- a/responsibleai/responsibleai/rai_insights/rai_insights.py +++ b/responsibleai/responsibleai/rai_insights/rai_insights.py @@ -12,8 +12,8 @@ import numpy as np import pandas as pd +from raiutils.data_processing import convert_to_list from raiutils.models import SKLearn, is_classifier -from responsibleai._input_processing import _convert_to_list from responsibleai._interfaces import Dataset, RAIInsightsData from responsibleai._internal.constants import ManagerNames, Metadata from responsibleai.exceptions import UserConfigValidationException @@ -404,7 +404,7 @@ def _get_dataset(self): dashboard_dataset = Dataset() dashboard_dataset.task_type = self.task_type dashboard_dataset.categorical_features = self.categorical_features - dashboard_dataset.class_names = _convert_to_list( + dashboard_dataset.class_names = convert_to_list( self._classes) predicted_y = None @@ -416,7 +416,7 @@ def _get_dataset(self): if isinstance(dataset, pd.DataFrame) and hasattr(dataset, 'columns'): self._dataframeColumns = dataset.columns try: - list_dataset = _convert_to_list(dataset) + list_dataset = convert_to_list(dataset) except Exception as ex: raise ValueError( "Unsupported dataset type") from ex @@ -428,7 +428,7 @@ def _get_dataset(self): "dataset type" raise ValueError(msg) from ex try: - predicted_y = _convert_to_list(predicted_y) + predicted_y = convert_to_list(predicted_y) except Exception as ex: raise ValueError( "Model prediction output of unsupported type,") from ex @@ -461,12 +461,12 @@ def _get_dataset(self): dashboard_dataset.class_names is not None): true_y = [dashboard_dataset.class_names.index( y) for y in true_y] - dashboard_dataset.true_y = _convert_to_list(true_y) + dashboard_dataset.true_y = convert_to_list(true_y) features = dataset.columns if features is not None: - features = _convert_to_list(features) + features = convert_to_list(features) if feature_length is not None and len(features) != feature_length: raise ValueError("Feature vector length mismatch:" " feature names length differs" @@ -480,7 +480,7 @@ def _get_dataset(self): raise ValueError("Model does not support predict_proba method" " for given dataset type,") from ex try: - probability_y = _convert_to_list(probability_y) + probability_y = convert_to_list(probability_y) except Exception as ex: raise ValueError( "Model predict_proba output of unsupported type,") from ex @@ -523,7 +523,7 @@ def _save_metadata(self, path): :type path: str """ top_dir = Path(path) - classes = _convert_to_list(self._classes) + classes = convert_to_list(self._classes) meta = { _TARGET_COLUMN: self.target_column, _TASK_TYPE: self.task_type, diff --git a/responsibleai/responsibleai/serialization_utilities.py b/responsibleai/responsibleai/serialization_utilities.py index e8140d6b00..43df549b57 100644 --- a/responsibleai/responsibleai/serialization_utilities.py +++ b/responsibleai/responsibleai/serialization_utilities.py @@ -17,8 +17,8 @@ def serialize_json_safe(o: Any): :return: Serialized object. """ warnings.warn( - "FUNCTION-DEPRECATION-WARNING: The function serialize_json_safe will be " - "deprecated in responsibleai. " + "FUNCTION-DEPRECATION-WARNING: The function serialize_json_safe " + "will be deprecated in responsibleai. " "Please import this function from raiutils.data_processing instead.", DeprecationWarning) diff --git a/responsibleai/tests/test_input_processing.py b/responsibleai/tests/test_input_processing.py deleted file mode 100644 index 7cebadfb47..0000000000 --- a/responsibleai/tests/test_input_processing.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) Microsoft Corporation -# Licensed under the MIT License. - -import numpy as np -import pandas as pd -import pytest -from scipy.sparse import csr_matrix - -from responsibleai._input_processing import (_convert_to_list, - _convert_to_string_list_dict) - - -class TestConvertToStringListDict: - def test_unnamed_series(self): - input = pd.Series(data=[0, 1, 2]) - sample_array = [4, 5, 6] - result = _convert_to_string_list_dict("Base {0}", input, sample_array) - assert isinstance(result, dict) - assert len(result) == 1 - assert "Base 0" in result - arr = result["Base 0"] - assert isinstance(arr, list) - assert np.array_equal(arr, [0, 1, 2]) - - def test_named_series(self): - input = pd.Series(data=[1, 3, 5], name="Something") - sample_array = [4, 5, 6] - result = _convert_to_string_list_dict("Base {0}", input, sample_array) - assert isinstance(result, dict) - assert len(result) == 1 - assert "Something" in result - arr = result["Something"] - assert isinstance(arr, list) - assert np.array_equal(arr, [1, 3, 5]) - - def test_dataframe(self): - input = pd.DataFrame.from_dict({"a": [0, 1, 2], "b": [4, 5, 6]}) - sample_array = [3, 6, 9] - result = _convert_to_string_list_dict("Base {0}", input, sample_array) - assert isinstance(result, dict) - assert len(result) == 2 - assert "a" in result - arr = result["a"] - assert isinstance(arr, list) - assert np.array_equal(arr, [0, 1, 2]) - assert "b" in result - arr = result["b"] - assert isinstance(arr, list) - assert np.array_equal(arr, [4, 5, 6]) - - def test_simplelist(self): - input = [0, 1, 4] - sample_array = [2, 3, 4] - result = _convert_to_string_list_dict("Base {0}", input, sample_array) - assert isinstance(result, dict) - assert len(result) == 1 - assert "Base 0" in result - arr = result["Base 0"] - assert isinstance(arr, list) - assert np.array_equal(arr, [0, 1, 4]) - - def test_dict(self): - input = {"a": np.array([0, 1, 2]), "b": pd.Series(data=[3, 4, 5])} - sample_array = [2, 3, 4] - result = _convert_to_string_list_dict("Base {0}", input, sample_array) - assert isinstance(result, dict) - assert len(result) == 2 - assert "a" in result - arr = result["a"] - assert isinstance(arr, list) - assert np.array_equal(arr, [0, 1, 2]) - assert "b" in result - arr = result["b"] - assert isinstance(arr, list) - assert np.array_equal(arr, [3, 4, 5]) - - def test_numpy1d(self): - input = np.array([0, 1, 4]) - sample_array = [2, 3, 4] - result = _convert_to_string_list_dict("Base {0}", input, sample_array) - assert isinstance(result, dict) - assert len(result) == 1 - assert "Base 0" in result - arr = result["Base 0"] - assert isinstance(arr, list) - assert np.array_equal(arr, [0, 1, 4]) - - def test_numpy2d(self): - # Note transpose on the end - input = np.array([[0, 1, 4], [2, 6, 7]]).T - sample_array = [2, 3, 4] - result = _convert_to_string_list_dict("Base {0}", input, sample_array) - assert isinstance(result, dict) - assert len(result) == 2 - assert "Base 0" in result - arr = result["Base 0"] - assert isinstance(arr, list) - assert np.array_equal(arr, [0, 1, 4]) - assert "Base 1" in result - arr = result["Base 1"] - assert isinstance(arr, list) - assert np.array_equal(arr, [2, 6, 7]) - - -class TestConvertToList: - def test_pandas_dataframe_to_list(self): - input_dataframe = pd.DataFrame.from_dict( - {"a": [0, 1, 2], "b": [4, 5, 6]} - ) - expected_list = [[0, 4], [1, 5], [2, 6]] - input_as_list = _convert_to_list(input_dataframe) - - assert input_as_list is not None - assert input_as_list == expected_list - - def test_array_to_list(self): - input_array = np.array([[0, 4], [1, 5], [2, 6]]) - expected_list = [[0, 4], [1, 5], [2, 6]] - input_as_list = _convert_to_list(input_array) - - assert input_as_list is not None - assert input_as_list == expected_list - - def test_list_to_list(self): - input_list = [[0, 4], [1, 5], [2, 6]] - expected_list = [[0, 4], [1, 5], [2, 6]] - input_as_list = _convert_to_list(input_list) - - assert input_as_list is not None - assert input_as_list == expected_list - - def test_series_to_list(self): - input_series = pd.Series(data=[[0, 4], [1, 5], [2, 6]]) - expected_list = [[0, 4], [1, 5], [2, 6]] - input_as_list = _convert_to_list(input_series) - - assert input_as_list is not None - assert input_as_list == expected_list - - def test_index_to_list(self): - input_index = pd.Index(data=[[0, 4], [1, 5], [2, 6]]) - expected_list = [[0, 4], [1, 5], [2, 6]] - input_as_list = _convert_to_list(input_index) - - assert input_as_list is not None - assert input_as_list == expected_list - - def test_csr_matrix_to_list(self): - input_sparse_matrix = csr_matrix((3, 10000), - dtype=np.int8) - with pytest.raises(ValueError) as ve: - _convert_to_list(input_sparse_matrix) - assert "Exceeds maximum number of features for " + \ - "visualization (1000)" in str(ve.value) - - with pytest.raises(ValueError) as ve: - _convert_to_list(input_sparse_matrix, - custom_err_msg="Error occurred") - assert "Error occurred" in str(ve.value) - - row = np.array([0, 0, 1, 2, 2, 2]) - col = np.array([0, 2, 2, 0, 1, 2]) - data = np.array([1, 2, 3, 4, 5, 6]) - sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3)) - expected_list = [[1, 0, 2], - [0, 0, 3], - [4, 5, 6]] - input_as_list = _convert_to_list(sparse_matrix) - - assert input_as_list is not None - assert input_as_list == expected_list