Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify responsibleai constants #1925

Merged
merged 7 commits into from
Jan 27, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion responsibleai/responsibleai/_internal/constants.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,19 @@ class Metadata(object):

META_JSON = 'meta.json'
MODEL = 'model'
TRAIN = 'train'
TEST = 'test'
TASK_TYPE = 'task_type'
TARGET_COLUMN = 'target_column'
CLASSES = 'classes'
FEATURE_COLUMNS = 'feature_columns'
FEATURE_METADATA = 'feature_metadata'
FEATURE_RANGES = 'feature_ranges'
CATEGORICAL_FEATURES = 'categorical_features'
romanlutz marked this conversation as resolved.
Show resolved Hide resolved
CATEGORIES = 'categories'
CATEGORY_DICTIONARY = 'category_dictionary'
CATEGORICAL_INDEXES = 'categorical_indexes'
STRING_IND_DATA = 'string_ind_data'


class ListProperties(object):
@@ -83,14 +96,38 @@ class SerializationAttributes:

# File structure
RESULTS_DIRECTORY = 'results'
PREDICTIONS_DIRECTORY = 'predictions'
DATA_DIRECTORY = 'data'
DASHBOARD_SCHEMAS = 'dashboard_schemas'

# Metadata keys
ID_KEY = 'id'
VERSION_KEY = 'version'

# Metadata filnames
# Metadata filenames
ID_FILENAME = 'id.json'
VERSION_FILENAME = 'version.json'
META_JSON = 'meta.json'
RAI_VERSION_JSON = 'rai_version.json'

# Dashboard filenames
DASHBOARD_FILENAME = 'dashboard.json'

# Model filenames
MODEL_PKL = 'model.pkl'

# Prediction filenames
PREDICT_JSON = "predict.json"
LARGE_PREDICT_JSON = "large_predict.json"
PREDICT_PROBA_JSON = "predict_proba.json"
LARGE_PREDICT_PROBA_JSON = "large_predict_proba.json"

# Data filenames
LARGE_TEST_JSON = "large_test.json"


class FileFormats:
"""Constants relating to file formats."""
JSON = '.json'
romanlutz marked this conversation as resolved.
Show resolved Hide resolved
PKL = '.pkl'
TXT = '.txt'
7 changes: 5 additions & 2 deletions responsibleai/responsibleai/_tools/causal/causal_result.py
Original file line number Diff line number Diff line change
@@ -16,6 +16,8 @@
CausalPolicyGains,
CausalPolicyTreeInternal,
CausalPolicyTreeLeaf, ComparisonTypes)
from responsibleai._internal.constants import (FileFormats,
SerializationAttributes)
from responsibleai._tools.causal.causal_config import CausalConfig
from responsibleai._tools.causal.causal_constants import ResultAttributes
from responsibleai._tools.shared.base_result import BaseResult
@@ -311,8 +313,9 @@ def _parse_comparison(
def _get_schema(cls, version: str):
cls._validate_version(version)

schema_directory = Path(__file__).parent / 'dashboard_schemas'
schema_filename = f'schema_{version}.json'
schema_directory = Path(__file__).parent / \
SerializationAttributes.DASHBOARD_SCHEMAS
schema_filename = f'schema_{version}{FileFormats.JSON}'
schema_filepath = schema_directory / schema_filename
with open(schema_filepath, 'r') as f:
return json.load(f)
Original file line number Diff line number Diff line change
@@ -8,6 +8,8 @@
from pathlib import Path
from typing import Any, List, Union

from responsibleai._internal.constants import FileFormats


class SerializationFormats:
PICKLE = 'pickle'
@@ -16,18 +18,14 @@ class SerializationFormats:


class SerializationExtensions:
PKL = 'pkl'
JSON = 'json'
TXT = 'txt'

@classmethod
def from_format(cls, file_format: str) -> str:
if file_format == SerializationFormats.PICKLE:
return cls.PKL
return FileFormats.PKL
elif file_format == SerializationFormats.JSON:
return cls.JSON
return FileFormats.JSON
elif file_format == SerializationFormats.TEXT:
return cls.TXT
return FileFormats.TXT
else:
raise ValueError(f"Unknown format: {file_format}")

@@ -55,7 +53,7 @@ def save_attributes(
attribute_format = file_format[i] if is_format_list else file_format
value = getattr(o, attribute)
extension = SerializationExtensions.from_format(attribute_format)
path = dir_path / f'{attribute}.{extension}'
path = dir_path / f'{attribute}{extension}'
_save_attribute(value, path, attribute_format)
paths.append(path)
return paths
@@ -102,7 +100,7 @@ def load_attributes(
for i, attribute in enumerate(attributes):
attribute_format = file_format[i] if is_format_list else file_format
extension = SerializationExtensions.from_format(attribute_format)
path = dir_path / f'{attribute}.{extension}'
path = dir_path / f'{attribute}{extension}'
if not fail_on_missing and (not path.exists() or not path.is_file()):
continue
value = _load_attribute(path, attribute_format)
54 changes: 30 additions & 24 deletions responsibleai/responsibleai/managers/counterfactual_manager.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,8 @@
from responsibleai._data_validations import validate_train_test_categories
from responsibleai._interfaces import CounterfactualData
from responsibleai._internal.constants import (CounterfactualManagerKeys,
ListProperties, ManagerNames)
FileFormats, ListProperties,
ManagerNames)
from responsibleai._tools.shared.state_directory_management import \
DirectoryManager
from responsibleai.exceptions import (DuplicateManagerConfigException,
@@ -129,9 +130,9 @@ class CounterfactualConfig(BaseConfig):
HAS_COMPUTATION_FAILED = 'has_computation_failed'
FAILURE_REASON = 'failure_reason'

CONFIG_FILE_NAME = 'config.json'
RESULT_FILE_NAME = 'result.json'
EXPLAINER_FILE_NAME = 'explainer.pkl'
CONFIG_FILE_NAME = f'config{FileFormats.JSON}'
RESULT_FILE_NAME = f'result{FileFormats.JSON}'
EXPLAINER_FILE_NAME = f'explainer{FileFormats.PKL}'

def __init__(self, method, continuous_features, total_CFs,
desired_class=CounterfactualConstants.OPPOSITE,
@@ -264,36 +265,37 @@ def save_result(self, data_directory_path):

for counterfactual_examples_key in cf_schema_keys:
file_path = (data_directory_path /
(counterfactual_examples_key + '.json'))
(counterfactual_examples_key + FileFormats.JSON))
with open(file_path, 'w') as file_path:
json.dump(
counterfactuals_dict[counterfactual_examples_key],
file_path)

file_path = (data_directory_path /
(CounterfactualConfig.HAS_COMPUTATION_FAILED + '.json'))
file_path = data_directory_path / (
CounterfactualConfig.HAS_COMPUTATION_FAILED + FileFormats.JSON)
with open(file_path, 'w') as file_path:
json.dump(
cf_result[CounterfactualConfig.HAS_COMPUTATION_FAILED],
file_path)

file_path = (data_directory_path /
(CounterfactualConfig.FAILURE_REASON + '.json'))
(CounterfactualConfig.FAILURE_REASON + FileFormats.JSON))
with open(file_path, 'w') as file_path:
json.dump(
cf_result[CounterfactualConfig.FAILURE_REASON],
file_path)

file_path = (data_directory_path /
(CounterfactualConfig.IS_COMPUTED + '.json'))
(CounterfactualConfig.IS_COMPUTED + FileFormats.JSON))
with open(file_path, 'w') as file_path:
json.dump(
cf_result[CounterfactualConfig.IS_COMPUTED],
file_path)

def load_result(self, data_directory_path):
metadata_file_path = (data_directory_path /
(_CommonSchemaConstants.METADATA + '.json'))
metadata_file_path = (
data_directory_path /
(_CommonSchemaConstants.METADATA + FileFormats.JSON))

if metadata_file_path.exists():
with open(metadata_file_path, 'r') as result_file:
@@ -306,8 +308,9 @@ def load_result(self, data_directory_path):

counterfactual_examples_dict = {}
for counterfactual_examples_key in cf_schema_keys:
result_path = (data_directory_path /
(counterfactual_examples_key + '.json'))
result_path = (
data_directory_path /
(counterfactual_examples_key + FileFormats.JSON))
with open(result_path, 'r') as result_file:
counterfactual_examples_dict[
counterfactual_examples_key] = json.load(result_file)
@@ -318,18 +321,21 @@ def load_result(self, data_directory_path):
else:
self.counterfactual_obj = None

result_path = (data_directory_path /
(CounterfactualConfig.HAS_COMPUTATION_FAILED + '.json'))
result_path = (
data_directory_path /
(CounterfactualConfig.HAS_COMPUTATION_FAILED + FileFormats.JSON))
with open(result_path, 'r') as result_file:
self.has_computation_failed = json.load(result_file)

result_path = (data_directory_path /
(CounterfactualConfig.FAILURE_REASON + '.json'))
result_path = (
data_directory_path /
(CounterfactualConfig.FAILURE_REASON + FileFormats.JSON))
with open(result_path, 'r') as result_file:
self.failure_reason = json.load(result_file)

result_path = (data_directory_path /
(CounterfactualConfig.IS_COMPUTED + '.json'))
result_path = (
data_directory_path /
(CounterfactualConfig.IS_COMPUTED + FileFormats.JSON))
with open(result_path, 'r') as result_file:
self.is_computed = json.load(result_file)

@@ -398,9 +404,10 @@ def _create_diceml_explainer(self, method, continuous_features):
dice_data = dice_ml.Data(dataframe=self._train,
continuous_features=continuous_features,
outcome_name=self._target_column)
model_type = CounterfactualConstants.CLASSIFIER \
if self._task_type == ModelTask.CLASSIFICATION else \
CounterfactualConstants.REGRESSOR
model_type = (
CounterfactualConstants.CLASSIFIER
if self._task_type == ModelTask.CLASSIFICATION else
CounterfactualConstants.REGRESSOR)
dice_model = dice_ml.Model(model=self._model,
backend=CounterfactualConstants.SKLEARN,
model_type=model_type)
@@ -569,8 +576,7 @@ def compute(self):
jsonschema.validate(
json.loads(counterfactual_obj.to_json()), schema)

cf_config.counterfactual_obj = \
counterfactual_obj
cf_config.counterfactual_obj = counterfactual_obj

except Exception as e:
cf_config.has_computation_failed = True
Original file line number Diff line number Diff line change
@@ -13,7 +13,8 @@

from responsibleai._interfaces import TaskType
from responsibleai._internal.constants import DataBalanceManagerKeys as Keys
from responsibleai._internal.constants import ListProperties, ManagerNames
from responsibleai._internal.constants import (FileFormats, ListProperties,
ManagerNames)
from responsibleai._tools.shared.state_directory_management import \
DirectoryManager
from responsibleai.databalanceanalysis import (AggregateBalanceMeasures,
@@ -23,9 +24,9 @@
prepare_df, transform_measures_to_dict)
from responsibleai.managers.base_manager import BaseManager

DATA_JSON = "data.json"
MANAGER_JSON = "manager.json"
MEASURES_JSON = "measures.json"
DATA_JSON = f"data{FileFormats.JSON}"
MANAGER_JSON = f"manager{FileFormats.JSON}"
MEASURES_JSON = f"measures{FileFormats.JSON}"


class DataBalanceManager(BaseManager):
13 changes: 7 additions & 6 deletions responsibleai/responsibleai/managers/error_analysis_manager.py
Original file line number Diff line number Diff line change
@@ -18,7 +18,8 @@
from responsibleai._config.base_config import BaseConfig
from responsibleai._interfaces import ErrorAnalysisData
from responsibleai._internal.constants import ErrorAnalysisManagerKeys as Keys
from responsibleai._internal.constants import ListProperties, ManagerNames
from responsibleai._internal.constants import (FileFormats, ListProperties,
ManagerNames)
from responsibleai._tools.shared.state_directory_management import \
DirectoryManager
from responsibleai.exceptions import (ConfigAndResultMismatchException,
@@ -342,7 +343,7 @@ def _get_error_analysis_schema():
"""Get the schema for validating the error analysis output."""
schema_directory = (Path(__file__).parent.parent / '_tools' /
'error_analysis' / 'dashboard_schemas')
schema_filename = 'error_analysis_output_v0.0.json'
schema_filename = f'error_analysis_output_v0.0{FileFormats.JSON}'
schema_filepath = schema_directory / schema_filename
with open(schema_filepath, 'r') as f:
return json.load(f)
@@ -420,15 +421,15 @@ def _save(self, path):
# save the configs
directory_manager = DirectoryManager(parent_directory_path=path)
config_path = (directory_manager.create_config_directory() /
'config.json')
f'config{FileFormats.JSON}')
ea_config = self._ea_config_list[index]
with open(config_path, 'w') as file:
json.dump(ea_config, file,
default=config_json_converter)

# save the reports
report_path = (directory_manager.create_data_directory() /
'report.json')
f'report{FileFormats.JSON}')
ea_report = self._ea_report_list[index]
with open(report_path, 'w') as file:
json.dump(ea_report, file,
@@ -458,13 +459,13 @@ def _load(path, rai_insights):
sub_directory_name=ea_dir)

config_path = (directory_manager.get_config_directory() /
'config.json')
f'config{FileFormats.JSON}')
with open(config_path, 'r') as file:
ea_config = json.load(file, object_hook=as_error_config)
ea_config_list.append(ea_config)

report_path = (directory_manager.get_data_directory() /
'report.json')
f'report{FileFormats.JSON}')
with open(report_path, 'r') as file:
ea_report = json.load(file, object_hook=as_error_report)
# Validate the serialized output against schema
10 changes: 4 additions & 6 deletions responsibleai/responsibleai/managers/explainer_manager.py
Original file line number Diff line number Diff line change
@@ -40,8 +40,6 @@
U_EVALUATION_EXAMPLES = '_evaluation_examples'
FEATURES = 'features'
CATEGORICAL_FEATURES = 'categorical_features'
META_JSON = Metadata.META_JSON
MODEL = Metadata.MODEL
EXPLANATION = '_explanation'


@@ -290,7 +288,7 @@ def _get_interpret(self, explanation, evaluation_examples=None):
raise ValueError(
"Shape mismatch: local explanation"
"length differs from dataset")
if(len(local_dim) == 3 and
if (len(local_dim) == 3 and
(local_dim[2] != _feature_length or
local_dim[1] != _row_length)):
raise ValueError(
@@ -350,7 +348,7 @@ def _save(self, path):

meta = {IS_RUN: self._is_run,
IS_ADDED: self._is_added}
with open(data_directory / META_JSON, 'w') as file:
with open(data_directory / Metadata.META_JSON, 'w') as file:
json.dump(meta, file)

@staticmethod
@@ -375,7 +373,7 @@ def _load(path, rai_insights):
sub_directory_name=all_cf_dirs[0])
data_directory = directory_manager.get_data_directory()

with open(data_directory / META_JSON, 'r') as meta_file:
with open(data_directory / Metadata.META_JSON, 'r') as meta_file:
meta = meta_file.read()
meta = json.loads(meta)
inst.__dict__['_' + IS_RUN] = meta[IS_RUN]
@@ -391,7 +389,7 @@ def _load(path, rai_insights):
inst.__dict__['_' + IS_ADDED] = False
inst.__dict__[EXPLANATION] = None

inst.__dict__['_' + MODEL] = rai_insights.model
inst.__dict__['_' + Metadata.MODEL] = rai_insights.model
inst.__dict__['_' + CLASSES] = rai_insights._classes
inst.__dict__['_' + CATEGORICAL_FEATURES] = \
rai_insights.categorical_features
Loading