Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relative probability metric #5

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions zone_model/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ def correlate(observed, predicted):

corr = model.score(scoring_function=correlate, aggregate=True)
print(" Correlation is {}".format(corr))

relative_probabilities = pd.Series(model.relative_probabilities())
print(" Variables by probability influence:")
print(relative_probabilities.sort_values(ascending=False))
91 changes: 91 additions & 0 deletions zone_model/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,97 @@ def score(self, scoring_function=accuracy_score, choosers=None,

return scoring_function(observed_choices, predicted_choices)

def single_alternative_proba(self, alternative_data, choosers=None,
alternatives=None):
"""
Probability of a single alternative with user-supplied attributes
being selected. For use in diagnostic settings.
Parameters
----------
alternative_data : dict or pd.Series
The single alternative's attributes. A mapping between variable
name and variable value. Should contain key for each explanatory
variable in the model specification.
choosers : pandas.DataFrame, optional
DataFrame of choosers.
alternatives : pandas.DataFrame, optional
DataFrame of alternatives.
Returns
-------
probability : float
Probability of alternative with user-supplied characteristics
being selected.
"""
if choosers is None or alternatives is None:
choosers, alternatives = self.calculate_model_variables()

alternatives_plus = alternatives.append(alternative_data,
ignore_index=True)
probabilities = self.calculate_probabilities(choosers,
alternatives_plus)

probability = probabilities.iloc[-1]

return probability

def relative_probabilities(self, low_percentile=.05, high_percentile=.95,
choosers=None, alternatives=None):
"""
Indicator of explanatory variable influence. For each variable,
calculate relative variable probability contribution by holding all
other variables at their median value and having the variable of
interest take on its 5th and 95th percentile values, then calculating
the difference in resulting probabilities.
Parameters
----------
low_percentile : float, optional
The percentile that represents the value variable takes on in the
low end of its range.
high_percentile : float, optional
The percentile that represents the value variable takes on in the
high end of its range.
choosers : pandas.DataFrame, optional
DataFrame of choosers.
alternatives : pandas.DataFrame, optional
DataFrame of alternatives.
Returns
-------
relative_probabilities : dict
Mapping between variable name and it's contribution to
probability.
"""
if choosers is None or alternatives is None:
choosers, alternatives = self.calculate_model_variables()

explanatory_variables = list(self.model_expression)
alternatives = alternatives[explanatory_variables]

relative_probabilities = {}
for var_to_measure in explanatory_variables:

low_percentile_value = alternatives[var_to_measure].quantile(
low_percentile)
high_percentile_value = alternatives[var_to_measure].quantile(
high_percentile)

constant_vars = [var for var in explanatory_variables if
var != var_to_measure]

mock_observation = alternatives[constant_vars].median()

mock_observation[var_to_measure] = high_percentile_value
high_proba = self.single_alternative_proba(mock_observation,
choosers, alternatives)

mock_observation[var_to_measure] = low_percentile_value
low_proba = self.single_alternative_proba(mock_observation,
choosers, alternatives)

proba_difference = high_proba - low_proba
relative_probabilities[var_to_measure] = proba_difference

return relative_probabilities


class SimpleEnsemble(SimulationChoiceModel):
"""
Expand Down