From 7e386949de5a71d380f5a715557d7775b5eee599 Mon Sep 17 00:00:00 2001 From: rdnfn <75615911+rdnfn@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:17:42 +0100 Subject: [PATCH 1/4] update if statement to only apply if no generator column --- src/alpaca_eval/analyze.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alpaca_eval/analyze.py b/src/alpaca_eval/analyze.py index 74b4e421..cd09df1c 100644 --- a/src/alpaca_eval/analyze.py +++ b/src/alpaca_eval/analyze.py @@ -275,7 +275,7 @@ def estimate_correlations( is_add_generator = annotations_2 == "gold_crossannotations" annotations_2 = self._get_annotations(annotations_2) - if is_add_generator: + if "generator" not in annotations_2.columns and is_add_generator: # TODO clean: following is because we don't save generator in HF crossannotation dataset => reconstructs it. # takes only eval set for the leaderboard merge_kwargs = dict(right=self.df_gold_annotations[self.keys + ["generator"]], on=self.keys) From 0402542c2b6f0433e76e49fed313ed37e18f882d Mon Sep 17 00:00:00 2001 From: rdnfn <75615911+rdnfn@users.noreply.github.com> Date: Sat, 27 Apr 2024 10:39:17 +0100 Subject: [PATCH 2/4] add try/except statement to correlation computation --- src/alpaca_eval/analyze.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/alpaca_eval/analyze.py b/src/alpaca_eval/analyze.py index cd09df1c..302981aa 100644 --- a/src/alpaca_eval/analyze.py +++ b/src/alpaca_eval/analyze.py @@ -307,8 +307,13 @@ def estimate_correlations( left_index=True, right_index=True, ) - s = spearmanr(df["win_rate_2"], df["win_rate_1"]).statistic - r = pearsonr(df["win_rate_2"], df["win_rate_1"]).statistic + try: + s = spearmanr(df["win_rate_2"], df["win_rate_1"]).statistic + r = pearsonr(df["win_rate_2"], df["win_rate_1"]).statistic + except ValueError: + logging.warning("Could not compute correlations. This issue may be due to a lack of different generators models in the data (see 'generator' column).") + s = np.nan + r = np.nan return dict(spearman=s, pearson=r) From 52432540c4e645728c7daaf649a5b44c95d2f817 Mon Sep 17 00:00:00 2001 From: rdnfn <75615911+rdnfn@users.noreply.github.com> Date: Sat, 27 Apr 2024 10:42:28 +0100 Subject: [PATCH 3/4] fix spelling --- src/alpaca_eval/analyze.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alpaca_eval/analyze.py b/src/alpaca_eval/analyze.py index 302981aa..50d9f9c2 100644 --- a/src/alpaca_eval/analyze.py +++ b/src/alpaca_eval/analyze.py @@ -263,7 +263,7 @@ def estimate_correlations( corresponding gold annotations. groupby: list[str], optional - Columns to groupby for computing the ldeaderboard. + Columns to groupby for computing the leaderboard. Returns ------- From 9eb4277c3db17ec27dcdffc07feed956b1f2d554 Mon Sep 17 00:00:00 2001 From: rdnfn <75615911+rdnfn@users.noreply.github.com> Date: Sat, 27 Apr 2024 10:56:43 +0100 Subject: [PATCH 4/4] add more informative logging message if correlation computation fails --- src/alpaca_eval/analyze.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/alpaca_eval/analyze.py b/src/alpaca_eval/analyze.py index 50d9f9c2..f3d44338 100644 --- a/src/alpaca_eval/analyze.py +++ b/src/alpaca_eval/analyze.py @@ -311,7 +311,13 @@ def estimate_correlations( s = spearmanr(df["win_rate_2"], df["win_rate_1"]).statistic r = pearsonr(df["win_rate_2"], df["win_rate_1"]).statistic except ValueError: - logging.warning("Could not compute correlations. This issue may be due to a lack of different generators models in the data (see 'generator' column).") + logging.warning( + ( + "Could not compute correlations. This issue may be due to a lack of different " + f"values of the column data is grouped by (using {groupby} column of dataset). " + f"The computation failed for the following dataframe:\n{df}" + ), + ) s = np.nan r = np.nan