microsoft · romanlutz · Jan 27, 2023 · Jan 26, 2023 · Jan 27, 2023 · Jan 27, 2023
@@ -19,6 +19,19 @@ class Metadata(object):
 
     META_JSON = 'meta.json'
     MODEL = 'model'
+    TRAIN = 'train'
+    TEST = 'test'
+    TASK_TYPE = 'task_type'
+    TARGET_COLUMN = 'target_column'
+    CLASSES = 'classes'
+    FEATURE_COLUMNS = 'feature_columns'
+    FEATURE_METADATA = 'feature_metadata'
+    FEATURE_RANGES = 'feature_ranges'
+    CATEGORICAL_FEATURES = 'categorical_features'
+    CATEGORIES = 'categories'
+    CATEGORY_DICTIONARY = 'category_dictionary'
+    CATEGORICAL_INDEXES = 'categorical_indexes'
+    STRING_IND_DATA = 'string_ind_data'
 
 
 class ListProperties(object):
@@ -83,14 +96,38 @@ class SerializationAttributes:
 
     # File structure
     RESULTS_DIRECTORY = 'results'
+    PREDICTIONS_DIRECTORY = 'predictions'
+    DATA_DIRECTORY = 'data'
+    DASHBOARD_SCHEMAS = 'dashboard_schemas'
 
     # Metadata keys
     ID_KEY = 'id'
     VERSION_KEY = 'version'
 
-    # Metadata filnames
+    # Metadata filenames
     ID_FILENAME = 'id.json'
     VERSION_FILENAME = 'version.json'
+    META_JSON = 'meta.json'
+    RAI_VERSION_JSON = 'rai_version.json'
 
     # Dashboard filenames
     DASHBOARD_FILENAME = 'dashboard.json'
+
+    # Model filenames
+    MODEL_PKL = 'model.pkl'
+
+    # Prediction filenames
+    PREDICT_JSON = "predict.json"
+    LARGE_PREDICT_JSON = "large_predict.json"
+    PREDICT_PROBA_JSON = "predict_proba.json"
+    LARGE_PREDICT_PROBA_JSON = "large_predict_proba.json"
+
+    # Data filenames
+    LARGE_TEST_JSON = "large_test.json"
+
+
+class FileFormats:
+    """Constants relating to file formats."""
+    JSON = '.json'
+    PKL = '.pkl'
+    TXT = '.txt'
@@ -16,6 +16,8 @@
                                        CausalPolicyGains,
                                        CausalPolicyTreeInternal,
                                        CausalPolicyTreeLeaf, ComparisonTypes)
+from responsibleai._internal.constants import (FileFormats,
+                                               SerializationAttributes)
 from responsibleai._tools.causal.causal_config import CausalConfig
 from responsibleai._tools.causal.causal_constants import ResultAttributes
 from responsibleai._tools.shared.base_result import BaseResult
@@ -311,8 +313,9 @@ def _parse_comparison(
     def _get_schema(cls, version: str):
         cls._validate_version(version)
 
-        schema_directory = Path(__file__).parent / 'dashboard_schemas'
-        schema_filename = f'schema_{version}.json'
+        schema_directory = Path(__file__).parent / \
+            SerializationAttributes.DASHBOARD_SCHEMAS
+        schema_filename = f'schema_{version}{FileFormats.JSON}'
         schema_filepath = schema_directory / schema_filename
         with open(schema_filepath, 'r') as f:
             return json.load(f)

@@ -8,6 +8,8 @@
 from pathlib import Path
 from typing import Any, List, Union
 
+from responsibleai._internal.constants import FileFormats
+
 
 class SerializationFormats:
     PICKLE = 'pickle'
@@ -16,18 +18,14 @@ class SerializationFormats:
 
 
 class SerializationExtensions:
-    PKL = 'pkl'
-    JSON = 'json'
-    TXT = 'txt'
-
     @classmethod
     def from_format(cls, file_format: str) -> str:
         if file_format == SerializationFormats.PICKLE:
-            return cls.PKL
+            return FileFormats.PKL
         elif file_format == SerializationFormats.JSON:
-            return cls.JSON
+            return FileFormats.JSON
         elif file_format == SerializationFormats.TEXT:
-            return cls.TXT
+            return FileFormats.TXT
         else:
             raise ValueError(f"Unknown format: {file_format}")
 
@@ -55,7 +53,7 @@ def save_attributes(
         attribute_format = file_format[i] if is_format_list else file_format
         value = getattr(o, attribute)
         extension = SerializationExtensions.from_format(attribute_format)
-        path = dir_path / f'{attribute}.{extension}'
+        path = dir_path / f'{attribute}{extension}'
         _save_attribute(value, path, attribute_format)
         paths.append(path)
     return paths
@@ -102,7 +100,7 @@ def load_attributes(
     for i, attribute in enumerate(attributes):
         attribute_format = file_format[i] if is_format_list else file_format
         extension = SerializationExtensions.from_format(attribute_format)
-        path = dir_path / f'{attribute}.{extension}'
+        path = dir_path / f'{attribute}{extension}'
         if not fail_on_missing and (not path.exists() or not path.is_file()):
             continue
         value = _load_attribute(path, attribute_format)

@@ -20,7 +20,8 @@
 from responsibleai._data_validations import validate_train_test_categories
 from responsibleai._interfaces import CounterfactualData
 from responsibleai._internal.constants import (CounterfactualManagerKeys,
-                                               ListProperties, ManagerNames)
+                                               FileFormats, ListProperties,
+                                               ManagerNames)
 from responsibleai._tools.shared.state_directory_management import \
     DirectoryManager
 from responsibleai.exceptions import (DuplicateManagerConfigException,
@@ -129,9 +130,9 @@ class CounterfactualConfig(BaseConfig):
     HAS_COMPUTATION_FAILED = 'has_computation_failed'
     FAILURE_REASON = 'failure_reason'
 
-    CONFIG_FILE_NAME = 'config.json'
-    RESULT_FILE_NAME = 'result.json'
-    EXPLAINER_FILE_NAME = 'explainer.pkl'
+    CONFIG_FILE_NAME = f'config{FileFormats.JSON}'
+    RESULT_FILE_NAME = f'result{FileFormats.JSON}'
+    EXPLAINER_FILE_NAME = f'explainer{FileFormats.PKL}'
 
     def __init__(self, method, continuous_features, total_CFs,
                  desired_class=CounterfactualConstants.OPPOSITE,
@@ -264,36 +265,37 @@ def save_result(self, data_directory_path):
 
             for counterfactual_examples_key in cf_schema_keys:
                 file_path = (data_directory_path /
-                             (counterfactual_examples_key + '.json'))
+                             (counterfactual_examples_key + FileFormats.JSON))
                 with open(file_path, 'w') as file_path:
                     json.dump(
                         counterfactuals_dict[counterfactual_examples_key],
                         file_path)
 
-        file_path = (data_directory_path /
-                     (CounterfactualConfig.HAS_COMPUTATION_FAILED + '.json'))
+        file_path = data_directory_path / (
+            CounterfactualConfig.HAS_COMPUTATION_FAILED + FileFormats.JSON)
         with open(file_path, 'w') as file_path:
             json.dump(
                 cf_result[CounterfactualConfig.HAS_COMPUTATION_FAILED],
                 file_path)
 
         file_path = (data_directory_path /
-                     (CounterfactualConfig.FAILURE_REASON + '.json'))
+                     (CounterfactualConfig.FAILURE_REASON + FileFormats.JSON))
         with open(file_path, 'w') as file_path:
             json.dump(
                 cf_result[CounterfactualConfig.FAILURE_REASON],
                 file_path)
 
         file_path = (data_directory_path /
-                     (CounterfactualConfig.IS_COMPUTED + '.json'))
+                     (CounterfactualConfig.IS_COMPUTED + FileFormats.JSON))
         with open(file_path, 'w') as file_path:
             json.dump(
                 cf_result[CounterfactualConfig.IS_COMPUTED],
                 file_path)
 
     def load_result(self, data_directory_path):
-        metadata_file_path = (data_directory_path /
-                              (_CommonSchemaConstants.METADATA + '.json'))
+        metadata_file_path = (
+            data_directory_path /
+            (_CommonSchemaConstants.METADATA + FileFormats.JSON))
 
         if metadata_file_path.exists():
             with open(metadata_file_path, 'r') as result_file:
@@ -306,8 +308,9 @@ def load_result(self, data_directory_path):
 
             counterfactual_examples_dict = {}
             for counterfactual_examples_key in cf_schema_keys:
-                result_path = (data_directory_path /
-                               (counterfactual_examples_key + '.json'))
+                result_path = (
+                    data_directory_path /
+                    (counterfactual_examples_key + FileFormats.JSON))
                 with open(result_path, 'r') as result_file:
                     counterfactual_examples_dict[
                         counterfactual_examples_key] = json.load(result_file)
@@ -318,18 +321,21 @@ def load_result(self, data_directory_path):
         else:
             self.counterfactual_obj = None
 
-        result_path = (data_directory_path /
-                       (CounterfactualConfig.HAS_COMPUTATION_FAILED + '.json'))
+        result_path = (
+            data_directory_path /
+            (CounterfactualConfig.HAS_COMPUTATION_FAILED + FileFormats.JSON))
         with open(result_path, 'r') as result_file:
             self.has_computation_failed = json.load(result_file)
 
-        result_path = (data_directory_path /
-                       (CounterfactualConfig.FAILURE_REASON + '.json'))
+        result_path = (
+            data_directory_path /
+            (CounterfactualConfig.FAILURE_REASON + FileFormats.JSON))
         with open(result_path, 'r') as result_file:
             self.failure_reason = json.load(result_file)
 
-        result_path = (data_directory_path /
-                       (CounterfactualConfig.IS_COMPUTED + '.json'))
+        result_path = (
+            data_directory_path /
+            (CounterfactualConfig.IS_COMPUTED + FileFormats.JSON))
         with open(result_path, 'r') as result_file:
             self.is_computed = json.load(result_file)
 
@@ -398,9 +404,10 @@ def _create_diceml_explainer(self, method, continuous_features):
         dice_data = dice_ml.Data(dataframe=self._train,
                                  continuous_features=continuous_features,
                                  outcome_name=self._target_column)
-        model_type = CounterfactualConstants.CLASSIFIER \
-            if self._task_type == ModelTask.CLASSIFICATION else \
-            CounterfactualConstants.REGRESSOR
+        model_type = (
+            CounterfactualConstants.CLASSIFIER
+            if self._task_type == ModelTask.CLASSIFICATION else
+            CounterfactualConstants.REGRESSOR)
         dice_model = dice_ml.Model(model=self._model,
                                    backend=CounterfactualConstants.SKLEARN,
                                    model_type=model_type)
@@ -569,8 +576,7 @@ def compute(self):
                     jsonschema.validate(
                         json.loads(counterfactual_obj.to_json()), schema)
 
-                    cf_config.counterfactual_obj = \
-                        counterfactual_obj
+                    cf_config.counterfactual_obj = counterfactual_obj
 
                 except Exception as e:
                     cf_config.has_computation_failed = True

@@ -13,7 +13,8 @@
 
 from responsibleai._interfaces import TaskType
 from responsibleai._internal.constants import DataBalanceManagerKeys as Keys
-from responsibleai._internal.constants import ListProperties, ManagerNames
+from responsibleai._internal.constants import (FileFormats, ListProperties,
+                                               ManagerNames)
 from responsibleai._tools.shared.state_directory_management import \
     DirectoryManager
 from responsibleai.databalanceanalysis import (AggregateBalanceMeasures,
@@ -23,9 +24,9 @@
     prepare_df, transform_measures_to_dict)
 from responsibleai.managers.base_manager import BaseManager
 
-DATA_JSON = "data.json"
-MANAGER_JSON = "manager.json"
-MEASURES_JSON = "measures.json"
+DATA_JSON = f"data{FileFormats.JSON}"
+MANAGER_JSON = f"manager{FileFormats.JSON}"
+MEASURES_JSON = f"measures{FileFormats.JSON}"
 
 
 class DataBalanceManager(BaseManager):

@@ -18,7 +18,8 @@
 from responsibleai._config.base_config import BaseConfig
 from responsibleai._interfaces import ErrorAnalysisData
 from responsibleai._internal.constants import ErrorAnalysisManagerKeys as Keys
-from responsibleai._internal.constants import ListProperties, ManagerNames
+from responsibleai._internal.constants import (FileFormats, ListProperties,
+                                               ManagerNames)
 from responsibleai._tools.shared.state_directory_management import \
     DirectoryManager
 from responsibleai.exceptions import (ConfigAndResultMismatchException,
@@ -342,7 +343,7 @@ def _get_error_analysis_schema():
         """Get the schema for validating the error analysis output."""
         schema_directory = (Path(__file__).parent.parent / '_tools' /
                             'error_analysis' / 'dashboard_schemas')
-        schema_filename = 'error_analysis_output_v0.0.json'
+        schema_filename = f'error_analysis_output_v0.0{FileFormats.JSON}'
         schema_filepath = schema_directory / schema_filename
         with open(schema_filepath, 'r') as f:
             return json.load(f)
@@ -420,15 +421,15 @@ def _save(self, path):
             # save the configs
             directory_manager = DirectoryManager(parent_directory_path=path)
             config_path = (directory_manager.create_config_directory() /
-                           'config.json')
+                           f'config{FileFormats.JSON}')
             ea_config = self._ea_config_list[index]
             with open(config_path, 'w') as file:
                 json.dump(ea_config, file,
                           default=config_json_converter)
 
             # save the reports
             report_path = (directory_manager.create_data_directory() /
-                           'report.json')
+                           f'report{FileFormats.JSON}')
             ea_report = self._ea_report_list[index]
             with open(report_path, 'w') as file:
                 json.dump(ea_report, file,
@@ -458,13 +459,13 @@ def _load(path, rai_insights):
                 sub_directory_name=ea_dir)
 
             config_path = (directory_manager.get_config_directory() /
-                           'config.json')
+                           f'config{FileFormats.JSON}')
             with open(config_path, 'r') as file:
                 ea_config = json.load(file, object_hook=as_error_config)
                 ea_config_list.append(ea_config)
 
             report_path = (directory_manager.get_data_directory() /
-                           'report.json')
+                           f'report{FileFormats.JSON}')
             with open(report_path, 'r') as file:
                 ea_report = json.load(file, object_hook=as_error_report)
                 # Validate the serialized output against schema

@@ -40,8 +40,6 @@
 U_EVALUATION_EXAMPLES = '_evaluation_examples'
 FEATURES = 'features'
 CATEGORICAL_FEATURES = 'categorical_features'
-META_JSON = Metadata.META_JSON
-MODEL = Metadata.MODEL
 EXPLANATION = '_explanation'
 
 
@@ -290,7 +288,7 @@ def _get_interpret(self, explanation, evaluation_examples=None):
                     raise ValueError(
                         "Shape mismatch: local explanation"
                         "length differs from dataset")
-                if(len(local_dim) == 3 and
+                if (len(local_dim) == 3 and
                    (local_dim[2] != _feature_length or
                         local_dim[1] != _row_length)):
                     raise ValueError(
@@ -350,7 +348,7 @@ def _save(self, path):
 
             meta = {IS_RUN: self._is_run,
                     IS_ADDED: self._is_added}
-            with open(data_directory / META_JSON, 'w') as file:
+            with open(data_directory / Metadata.META_JSON, 'w') as file:
                 json.dump(meta, file)
 
     @staticmethod
@@ -375,7 +373,7 @@ def _load(path, rai_insights):
                 sub_directory_name=all_cf_dirs[0])
             data_directory = directory_manager.get_data_directory()
 
-            with open(data_directory / META_JSON, 'r') as meta_file:
+            with open(data_directory / Metadata.META_JSON, 'r') as meta_file:
                 meta = meta_file.read()
             meta = json.loads(meta)
             inst.__dict__['_' + IS_RUN] = meta[IS_RUN]
@@ -391,7 +389,7 @@ def _load(path, rai_insights):
             inst.__dict__['_' + IS_ADDED] = False
             inst.__dict__[EXPLANATION] = None
 
-        inst.__dict__['_' + MODEL] = rai_insights.model
+        inst.__dict__['_' + Metadata.MODEL] = rai_insights.model
         inst.__dict__['_' + CLASSES] = rai_insights._classes
         inst.__dict__['_' + CATEGORICAL_FEATURES] = \
             rai_insights.categorical_features