Skip to content

Commit

Permalink
Merge pull request #562 from sibyl-dev/release-v0.4.10
Browse files Browse the repository at this point in the history
Bump version
  • Loading branch information
zyteka authored Aug 5, 2024
2 parents 5f6019c + 2445b11 commit f4b1ca2
Show file tree
Hide file tree
Showing 28 changed files with 1,761 additions and 574 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ An easier approach to understanding your model's predictions.
**Pyreal** gives you easy-to-understand explanations of your machine learning models in a low-code manner.
Pyreal wraps full ML pipelines in a RealApp object that makes it easy to use, understand, and interact with your ML model — regardless of your ML expertise.

See our tutorial series for an example of using Pyreal for house-price prediction:
- [Part 1: Learn about feature engineering and modelling](https://medium.com/mit-data-to-ai-lab/using-and-understanding-machine-learning-ml-models-ada6525cf192)
- [Part 2: Learn how to use Pyreal to use and understand ML models ](https://medium.com/mit-data-to-ai-lab/using-and-understanding-machine-learning-ml-models-0ce7c227837e)

# Install

## Requirements
Expand Down
14 changes: 14 additions & 0 deletions docs/api_reference/transformer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,17 @@ Geo Transformers

LatLongToPlace

Narrative Transformers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: api/

NarrativeTransformer

Aggregating Transformers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: api/

Aggregator

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ maintainers = [

description = "Library for evaluating and deploying human readable machine learning explanations."
name = "pyreal"
version = "0.4.9"
version = "0.4.10"

license = ""

Expand Down
2 changes: 1 addition & 1 deletion pyreal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

__author__ = "MIT Data To AI Lab"
__email__ = "dailabmit@gmail.com"
__version__ = "0.4.9"
__version__ = "0.4.10"


__all__ = [
Expand Down
6 changes: 6 additions & 0 deletions pyreal/explainers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ def transform_explanation(self, explanation, x_orig=None):

# Iterate through algorithm transformers
for i, t in enumerate(a_transformers[::-1]):
if t.require_values:
explanation.update_values( # UNTESTED
run_transformers(a_transformers[0 : len(a_transformers) - i], x), inplace=True
)
try:
explanation = t.inverse_transform_explanation(explanation)
# If this is a breaking transformer, transform x to the current point and return
Expand All @@ -404,6 +408,8 @@ def transform_explanation(self, explanation, x_orig=None):
return explanation
# Iterate through interpret transformers
for t in i_transformers:
if t.require_values and x is not None:
explanation.update_values(x, inplace=True)
if not t.algorithm:
try:
explanation = t.transform_explanation(explanation)
Expand Down
141 changes: 20 additions & 121 deletions pyreal/explainers/lfc/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np

from pyreal.explainers import ExplainerBase
from pyreal.transformers import NarrativeTransformer


class LocalFeatureContributionsBase(ExplainerBase, ABC):
Expand Down Expand Up @@ -108,17 +109,20 @@ def produce_narrative_explanation(
raise ValueError("OpenAI API key or client must be provided to produce narrative")
openai_client = self.openai_client

return self.narrify(
openai_client,
self.produce(x_orig),
narrative_transformer = NarrativeTransformer(
openai_client=openai_client,
num_features=num_features,
max_tokens=max_tokens,
temperature=temperature,
llm_model=llm_model,
detail_level=detail_level,
context_description=context_description,
few_shot_training_examples=self.llm_training_data,
max_tokens=max_tokens,
temperature=temperature,
training_examples={"feature_contributions": self.llm_training_data},
)
self.transformers.append(narrative_transformer)
result = self.produce(x_orig)
self.transformers.pop()
return result

def train_llm(
self, x_train=None, live=True, provide_examples=False, num_inputs=5, num_features=3
Expand Down Expand Up @@ -163,7 +167,9 @@ def train_llm(
else:
x_train = x_train.sample(num_inputs)
explanation = self.produce(x_train)
parsed_explanations = parse_explanation_for_llm(explanation, num_features=num_features)
parsed_explanations = NarrativeTransformer.parse_feature_contribution_explanation_for_llm(
explanation, num_features=num_features
)
narratives = []
print("For each of the following inputs, please provide an appropriate narrative version.")
for i in range(num_inputs):
Expand All @@ -173,9 +179,13 @@ def train_llm(
f"Input {i+1} (feature, value, contribution):\n{parsed_explanation_formatted}\n"
)
if provide_examples:
example = LocalFeatureContributionsBase.narrify(
self.openai_client, explanation[i], num_features=num_features
)[0]
example = (
NarrativeTransformer(
openai_client=self.openai_client, num_features=num_features
)
.transform_explanation_feature_contribution(explanation[i])
.get()[0]
)
instruction += f"Example: {example}\n"
instruction += "Narrative explanation ('k' to keep example, 'q' to quit): "
narrative = input(instruction)
Expand Down Expand Up @@ -210,114 +220,3 @@ def set_llm_training_data(self, training_data):
trained on these examples before generating the explanation.
"""
self.llm_training_data = training_data

@staticmethod
def narrify(
openai_client,
explanation,
num_features=None,
llm_model="gpt3.5",
detail_level="high",
context_description="",
max_tokens=200,
temperature=0.5,
few_shot_training_examples=None,
):
"""
Generate a narrative explanation from a feature contribution explanation
Args:
openai_client (OpenAI API client):
OpenAI API client, with API key set
explanation (LocalFeatureContributionExplanation):
Feature contribution explanations. Each row represents an instance, and each
column a feature.
num_features (int):
Number of features to include in the explanation. If None, all features will be
included
llm_model (string):
One of ["gpt3.5", "gpt4"]. LLM model to use to generate the explanation.
GPT4 may provide better results, but is more expensive.
detail_level (string):
One of ["high", "low"]. Level of detail to include in the explanation.
High detail should include precise contribution values. Low detail
will include only basic information about features used.
context_description (string):
Description of the model's prediction task, in sentence format. This will be
passed to the LLM and may help produce more accurate explanations.
For example: "The model predicts the price of houses."
max_tokens (int):
Maximum number of tokens to use in the explanation
temperature (float):
LLM Temperature to use. Values closer to 1 will produce more creative values.
Values closer to 0 will produce more consistent or conservative explanations.
few_shot_training_examples (list of (explanation, narrative) pairs):
Training examples to use for few-shot learning. If provided, the LLM will be
trained on these examples before generating the explanation.
Returns:
DataFrame of shape (n_instances, n_features)
Narrative explanation
"""
if llm_model == "gpt3.5":
model = "gpt-3.5-turbo-0125"
elif llm_model == "gpt4":
model = "gpt-4-0125-preview"
else:
raise ValueError(
"Invalid LLM model %s. Expected one of ['gpt3.5', 'gpt4']" % llm_model
)
if context_description is None:
context_description = ""
if context_description:
context_description = context_description.strip()
if not context_description.endswith("."):
context_description += "."
prompt = (
"You are helping users who do not have experience working with ML understand an ML"
f" model's predictions. {context_description} I will give you feature contribution"
" explanations, generated using SHAP, in (feature, feature_value, contribution )"
" format. Convert the explanations into simple narratives. Do not use more tokens"
" than necessary. Make your answers sound very natural, as if said in conversation. "
)
if detail_level == "low":
prompt += (
"Keep the explanations simple and easy to understand. Do not include exact"
" contribution values. "
)
elif detail_level == "high":
prompt += "Include all exact contribution values in your response. "
else:
raise ValueError(
"Invalid detail_level %s. Expected one of ['high', 'low']" % detail_level
)
# explanation = explanation.get_top_features(num_features=num_features)
narrative_explanations = []
base_messages = [{"role": "system", "content": prompt}]
if few_shot_training_examples is not None:
for training_exp, training_narr in few_shot_training_examples:
base_messages.append({"role": "user", "content": training_exp})
base_messages.append({"role": "assistant", "content": training_narr})
parsed_explanations = parse_explanation_for_llm(explanation, num_features=num_features)
for parsed_explanation in parsed_explanations:
messages = base_messages + [{"role": "user", "content": parsed_explanation}]
response = openai_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
)
narrative_explanations.append(response.choices[0].message.content)
return narrative_explanations


def parse_explanation_for_llm(explanation, num_features=None):
explanations = explanation.get_top_features(num_features=num_features)
parsed_explanations = []
for explanation in explanations:
strings = []
for feature, value, contribution in zip(
explanation[0].index, explanation[1], explanation[0]
):
strings.append(f"({feature}, {value}, {contribution})")
parsed_explanations.append(", ".join(strings))
return parsed_explanations
32 changes: 28 additions & 4 deletions pyreal/explainers/lfc/shap_feature_contribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@
from pyreal.explanation_types import AdditiveFeatureContributionExplanation


def _get_average_or_mode(df):
"""
Gets the average of numeric features and the mode of categorical features
Args:
df (DataFrame):
Input
Returns:
Series
Average or mode of every column in df
"""
s = df.select_dtypes(np.number).mean()
if len(s) == df.shape[1]: # all columns are numeric
return s
return pd.concat((df.drop(s.index, axis=1).mode().iloc[0], s))


class ShapFeatureContribution(LocalFeatureContributionsBase):
"""
ShapFeatureContribution object.
Expand All @@ -20,13 +37,13 @@ class ShapFeatureContribution(LocalFeatureContributionsBase):
Filepath to the pickled model to explain, or model object with .predict() function
x_train_orig (DataFrame of size (n_instances, n_features)):
Training set in original form.
shap_type (string, one of ["kernel", "linear"]):
shap_type (string, one of ["kernel", "linear", "tree"]):
Type of shap algorithm to use. If None, SHAP will pick one.
**kwargs: see base Explainer args
"""

def __init__(self, model, x_train_orig=None, shap_type=None, **kwargs):
supported_types = ["kernel", "linear"]
supported_types = ["kernel", "linear", "tree"]
if shap_type is not None and shap_type not in supported_types:
raise ValueError(
"Shap type not supported, given %s, expected one of %s or None"
Expand All @@ -37,6 +54,7 @@ def __init__(self, model, x_train_orig=None, shap_type=None, **kwargs):

self.explainer = None
self.explainer_input_size = None
self.average_values = None
super(ShapFeatureContribution, self).__init__(model, x_train_orig, **kwargs)

def fit(self, x_train_orig=None, y_train=None):
Expand All @@ -50,6 +68,7 @@ def fit(self, x_train_orig=None, y_train=None):
Targets of training set, required if not provided on initialization
"""
x_train_orig = self._get_x_train_orig(x_train_orig)
self.average_values = _get_average_or_mode(x_train_orig)

dataset = self.transform_to_x_algorithm(x_train_orig)
self.explainer_input_size = dataset.shape[1]
Expand All @@ -58,6 +77,8 @@ def fit(self, x_train_orig=None, y_train=None):
# Note: we manually check for linear model here because of SHAP bug
elif self.shap_type == "linear":
self.explainer = LinearExplainer(self.model, dataset)
elif self.shap_type == "tree":
self.explainer = TreeExplainer(self.model, dataset)
else:
self.explainer = ShapExplainer(self.model, dataset) # SHAP will pick an algorithm
return self
Expand All @@ -83,6 +104,7 @@ def produce_explanation(self, x_orig, **kwargs):
)
)
columns = x.columns
index = x.index
x = np.asanyarray(x)

if isinstance(self.explainer, TreeExplainer):
Expand All @@ -93,13 +115,15 @@ def produce_explanation(self, x_orig, **kwargs):
raise RuntimeError("Something went wrong with SHAP - expected at least 2 dimensions")
if shap_values.ndim == 2:
return AdditiveFeatureContributionExplanation(
pd.DataFrame(shap_values, columns=columns)
pd.DataFrame(shap_values, columns=columns, index=index),
other_properties={"average_values": self.average_values},
)
if shap_values.ndim > 2:
predictions = self.model_predict(x_orig)
if self.classes is not None:
predictions = [np.where(self.classes == i)[0][0] for i in predictions]
shap_values = shap_values[predictions, np.arange(shap_values.shape[1]), :]
return AdditiveFeatureContributionExplanation(
pd.DataFrame(shap_values, columns=columns)
pd.DataFrame(shap_values, columns=columns, index=index),
other_properties={"average_values": self.average_values},
)
2 changes: 2 additions & 0 deletions pyreal/explanation_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
PartialDependenceExplanation,
)
from pyreal.explanation_types.time_series_saliency import TimeSeriesSaliency
from pyreal.explanation_types.narrative import NarrativeExplanation

__all__ = [
"Explanation",
Expand All @@ -34,4 +35,5 @@
"FeatureValueBased",
"PartialDependenceExplanation",
"TimeSeriesSaliency",
"NarrativeExplanation",
]
Loading

0 comments on commit f4b1ca2

Please sign in to comment.