UDST · janowicz · Apr 17, 2017 · Apr 17, 2017
diff --git a/zone_model/evaluate.py b/zone_model/evaluate.py
@@ -35,3 +35,7 @@ def correlate(observed, predicted):
 
     corr = model.score(scoring_function=correlate, aggregate=True)
     print("  Correlation is {}".format(corr))
+
+    relative_probabilities = pd.Series(model.relative_probabilities())
+    print("  Variables by probability influence:")
+    print(relative_probabilities.sort_values(ascending=False))
diff --git a/zone_model/utils.py b/zone_model/utils.py
@@ -445,6 +445,97 @@ def score(self, scoring_function=accuracy_score, choosers=None,
 
         return scoring_function(observed_choices, predicted_choices)
 
+    def single_alternative_proba(self, alternative_data, choosers=None,
+                                 alternatives=None):
+        """
+        Probability of a single alternative with user-supplied attributes
+        being selected. For use in diagnostic settings.
+        Parameters
+        ----------
+        alternative_data : dict or pd.Series
+            The single alternative's attributes.  A mapping between variable
+            name and variable value.  Should contain key for each explanatory
+            variable in the model specification.
+        choosers : pandas.DataFrame, optional
+            DataFrame of choosers.
+        alternatives : pandas.DataFrame, optional
+            DataFrame of alternatives.
+        Returns
+        -------
+        probability : float
+            Probability of alternative with user-supplied characteristics
+            being selected.
+        """
+        if choosers is None or alternatives is None:
+            choosers, alternatives = self.calculate_model_variables()
+
+        alternatives_plus = alternatives.append(alternative_data,
+                                                ignore_index=True)
+        probabilities = self.calculate_probabilities(choosers,
+                                                     alternatives_plus)
+
+        probability = probabilities.iloc[-1]
+
+        return probability
+
+    def relative_probabilities(self, low_percentile=.05, high_percentile=.95,
+                               choosers=None, alternatives=None):
+        """
+        Indicator of explanatory variable influence.  For each variable,
+        calculate relative variable probability contribution by holding all
+        other variables at their median value and having the variable of
+        interest take on its 5th and 95th percentile values, then calculating
+        the difference in resulting probabilities.
+        Parameters
+        ----------
+        low_percentile : float, optional
+            The percentile that represents the value variable takes on in the
+            low end of its range.
+        high_percentile : float, optional
+            The percentile that represents the value variable takes on in the
+            high end of its range.
+        choosers : pandas.DataFrame, optional
+            DataFrame of choosers.
+        alternatives : pandas.DataFrame, optional
+            DataFrame of alternatives.
+        Returns
+        -------
+        relative_probabilities : dict
+            Mapping between variable name and it's contribution to
+            probability.
+        """
+        if choosers is None or alternatives is None:
+            choosers, alternatives = self.calculate_model_variables()
+
+        explanatory_variables = list(self.model_expression)
+        alternatives = alternatives[explanatory_variables]
+
+        relative_probabilities = {}
+        for var_to_measure in explanatory_variables:
+
+            low_percentile_value = alternatives[var_to_measure].quantile(
+                                                               low_percentile)
+            high_percentile_value = alternatives[var_to_measure].quantile(
+                                                              high_percentile)
+
+            constant_vars = [var for var in explanatory_variables if
+                             var != var_to_measure]
+
+            mock_observation = alternatives[constant_vars].median()
+
+            mock_observation[var_to_measure] = high_percentile_value
+            high_proba = self.single_alternative_proba(mock_observation,
+                                                       choosers, alternatives)
+
+            mock_observation[var_to_measure] = low_percentile_value
+            low_proba = self.single_alternative_proba(mock_observation,
+                                                      choosers, alternatives)
+
+            proba_difference = high_proba - low_proba
+            relative_probabilities[var_to_measure] = proba_difference
+
+        return relative_probabilities
+
 
 class SimpleEnsemble(SimulationChoiceModel):
     """