Fix loglikelihood_rolling caching ( EleutherAI#1821 ) (EleutherAI#2187

) * fix revision type * allow for None-input loglikelihood reqs to be cached * handle no remaining cache items * pre-commit * change cache_hook.add_partial(loglikelihood_rolling...) convention --------- Co-authored-by: Baber Abbasi <baber@eleuther.ai>
artemorloff · Aug 28, 2024 · 8138fd5 · 8138fd5
1 parent 2de3688
commit 8138fd5
Show file tree

Hide file tree

Showing 7 changed files with 42 additions and 7 deletions.
diff --git a/lm_eval/api/model.py b/lm_eval/api/model.py
@@ -283,8 +283,11 @@ def fn(requests):
             eval_logger.info(
                 f"Cached requests: {len(requests) - len(remaining_reqs)}, Requests remaining: {len(remaining_reqs)}"
             )
-            # actually run the LM on the requests that do not have cached results
-            rem_res = getattr(self.lm, attr)(remaining_reqs)
+            if remaining_reqs:
+                # actually run the LM on the requests that do not have cached results
+                rem_res = getattr(self.lm, attr)(remaining_reqs)
+            else:
+                rem_res = []
 
             # stick the new ones back into the list and also cache any of the new ones
             resptr = 0

diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py
@@ -510,7 +510,7 @@ def _collate(req: LogLikelihoodInputs):
                 ):
                     if answer_ is not None:
                         res.append(answer_)
-                        # partial caching
+                        # cache requests that aren't from a loglikelihood_rolling request
                         if cache_key is not None:
                             self.cache_hook.add_partial(
                                 "loglikelihood", cache_key, answer_
@@ -638,4 +638,7 @@ def loglikelihood_rolling(
 
             string_nll = sum(string_nll)
             loglikelihoods.append(string_nll)
+
+            # cache this loglikelihood_rolling request
+            self.cache_hook.add_partial("loglikelihood_rolling", (string,), string_nll)
         return loglikelihoods
diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py
@@ -1018,6 +1018,9 @@ def loglikelihood_rolling(
             string_nll = sum(string_nll)
             loglikelihoods.append(string_nll)
 
+            # cache this loglikelihood_rolling request
+            self.cache_hook.add_partial("loglikelihood_rolling", (string,), string_nll)
+
         return loglikelihoods
 
     def _batch_scheduler(self, pos, n_reordered_requests):
@@ -1246,7 +1249,13 @@ def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]):
 
                     res.append(answer)
 
-                    self.cache_hook.add_partial("loglikelihood", request_str, answer)
+                    if request_str is not None:
+                        # special case: loglikelihood_rolling produces a number of loglikelihood requests
+                        # all with cache key None. instead do add_partial on the per-example level
+                        # in the loglikelihood_rolling() function for those.
+                        self.cache_hook.add_partial(
+                            "loglikelihood", request_str, answer
+                        )
                     pbar.update(1)
 
         pbar.close()

diff --git a/lm_eval/models/nemo_lm.py b/lm_eval/models/nemo_lm.py
@@ -386,6 +386,9 @@ def loglikelihood_rolling(
 
             string_nll = sum(string_nll)
             loglikelihoods.append(string_nll)
+
+            # cache this loglikelihood_rolling request
+            self.cache_hook.add_partial("loglikelihood_rolling", (string,), string_nll)
         return loglikelihoods
 
     def _loglikelihood_tokens(self, requests, disable_tqdm=False):
@@ -468,6 +471,9 @@ def _collate(x):
                 answer = (logprob, is_greedy)
 
                 if cache_key is not None:
+                    # special case: loglikelihood_rolling produces a number of loglikelihood requests
+                    # all with cache key None. instead do add_partial on the per-example level
+                    # in the loglikelihood_rolling() function for those.
                     self.cache_hook.add_partial("loglikelihood", cache_key, answer)
 
                 res.append(answer)

diff --git a/lm_eval/models/neuralmagic.py b/lm_eval/models/neuralmagic.py
@@ -321,6 +321,9 @@ def _collate(x):
                 res.append(answer)
 
                 if cache_key is not None:
+                    # special case: loglikelihood_rolling produces a number of loglikelihood requests
+                    # all with cache key None. instead do add_partial on the per-example level
+                    # in the loglikelihood_rolling() function for those.
                     self.cache_hook.add_partial("loglikelihood", cache_key, answer)
 
         return re_ord.get_original(res)

diff --git a/lm_eval/models/neuron_optimum.py b/lm_eval/models/neuron_optimum.py
@@ -502,7 +502,8 @@ def loglikelihood_rolling(self, requests, disable_tqdm: bool = False):
 
             string_nll = sum(string_nll)
             loglikelihoods.append(string_nll)
-
+            # cache this loglikelihood_rolling request
+            self.cache_hook.add_partial("loglikelihood_rolling", (string,), string_nll)
         return loglikelihoods
 
     def _loglikelihood_tokens(
@@ -620,7 +621,11 @@ def _collate(x):
 
                 res.append(answer)
 
-                self.cache_hook.add_partial("loglikelihood", cache_key, answer)
+                if cache_key is not None:
+                    # special case: loglikelihood_rolling produces a number of loglikelihood requests
+                    # all with cache key None. instead do add_partial on the per-example level
+                    # in the loglikelihood_rolling() function for those.
+                    self.cache_hook.add_partial("loglikelihood", cache_key, answer)
 
         return re_ord.get_original(res)
 

diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py
@@ -307,6 +307,10 @@ def loglikelihood_rolling(
 
             string_nll = sum(string_nll)
             loglikelihoods.append(string_nll)
+
+            # cache this loglikelihood_rolling request
+            self.cache_hook.add_partial("loglikelihood_rolling", (string,), string_nll)
+
         return loglikelihoods
 
     def generate_until(
@@ -453,8 +457,10 @@ def _collate(x):
 
                 res.append(answer)
 
-                # partial caching
                 if cache_key is not None:
+                    # special case: loglikelihood_rolling produces a number of loglikelihood requests
+                    # all with cache key None. instead do add_partial on the per-example level
+                    # in the loglikelihood_rolling() function for those.
                     self.cache_hook.add_partial("loglikelihood", cache_key, answer)
                 pbar.update(1)
         pbar.close()