Update the prompt to be consistent with the current LiveBench design (

EvolvingLMMs-Lab#319) * Fix typo in lmms-eval command and update error message * Update subtask name to "Concrete Recognition" * chinese " symbol * Refactor prompt.md to remove unnecessary references * update prompts to be consistent with the paper
ZhaoCinyu · Dec 9, 2024 · 81ce488 · 81ce488
1 parent dc1181e
commit 81ce488
Show file tree

Hide file tree

Showing 17 changed files with 1,929 additions and 1,307 deletions.
diff --git a/lmms_eval/__main__.py b/lmms_eval/__main__.py
@@ -457,10 +457,10 @@ def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) -> None:
             if task_missing:
                 missing = ", ".join(task_missing)
                 eval_logger.error(
-                    f"Tasks were not found: {missing}\n" f"{utils.SPACING}Try `lm-eval --tasks list` for list of available tasks",
+                    f"Tasks were not found: {missing}\n" f"{utils.SPACING}Try `lmms-eval --tasks list` for list of available tasks",
                 )
                 raise ValueError(
-                    f"Tasks not found: {missing}. Try `lm-eval --tasks {{list_groups,list_subtasks,list_tags,list}}` to list out all available names for task groupings; only (sub)tasks; tags; or all of the above, or pass '--verbosity DEBUG' to troubleshoot task registration issues."
+                    f"Tasks not found: {missing}. Try `lmms-eval --tasks {{list_groups,list_subtasks,list_tags,list}}` to list out all available names for task groupings; only (sub)tasks; tags; or all of the above, or pass '--verbosity DEBUG' to troubleshoot task registration issues."
                 )
 
     eval_logger.info(f"Selected Tasks: {task_names}")

diff --git a/tools/live_bench/data_summary.ipynb b/tools/live_bench/data_summary.ipynb
@@ -59,7 +59,7 @@
        "Contextual Analysis     103\n",
        "Deeper Implications      97\n",
        "Broader Implications     79\n",
-       "Basic Understanding      59\n",
+       "Concrete Recognition      59\n",
        "Further Insights         55\n",
        "Name: count, dtype: int64"
       ]
@@ -130,7 +130,7 @@
      "data": {
       "text/plain": [
        "subtask\n",
-       "Basic Understanding     50\n",
+       "Concrete Recognition     50\n",
        "Broader Implications    50\n",
        "Contextual Analysis     50\n",
        "Deeper Implications     50\n",

diff --git a/tools/live_bench/example.ipynb b/tools/live_bench/example.ipynb