From 4917c3930b9465c4ba3c764de952047ef728da2a Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Thu, 11 Jul 2024 17:14:58 -0700
Subject: [PATCH 01/10] Allow model key in eval script

---
 scripts/eval/eval.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index 9667db6308..daef2f6e03 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -184,6 +184,12 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
     for code_path in cfg.get('code_paths', []):
         import_file(code_path)
 
+    # Allow for single model to be specified in the config to be consistent with train.py
+    if 'model' in cfg:
+        if 'models' in cfg:
+            raise ValueError('Please specify either model or models in the config, not both')
+        cfg['models'] = [cfg.pop('model')]
+
     logged_cfg, eval_config = make_dataclass_and_log_config(
         cfg,
         EvalConfig,

From 2b5145ca2c56a67e36f83bae8e706ce5b301e5a3 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Thu, 11 Jul 2024 17:36:33 -0700
Subject: [PATCH 02/10] Compatibility

---
 scripts/eval/eval.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index daef2f6e03..dec8d3987c 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -184,11 +184,18 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
     for code_path in cfg.get('code_paths', []):
         import_file(code_path)
 
-    # Allow for single model to be specified in the config to be consistent with train.py
+    # Allow for single model to be specified in the config to be compatible with train.py syntax
     if 'model' in cfg:
         if 'models' in cfg:
             raise ValueError('Please specify either model or models in the config, not both')
-        cfg['models'] = [cfg.pop('model')]
+        model_cfg = {}
+        top_level_keys = ['model', 'tokenizer', 'load_path']
+        for key in top_level_keys:
+            if key not in cfg:
+                raise ValueError(f'When specifying model, "{key}" must be provided in the config')
+            model_cfg[key] = cfg.pop(key)
+        model_cfg['model_name'] = cfg.pop('model_name', 'unnamed')
+        cfg['models'] = [model_cfg]
 
     logged_cfg, eval_config = make_dataclass_and_log_config(
         cfg,

From 9b47caa522ed029d4ba205f580824f2a2d270b12 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Thu, 11 Jul 2024 18:03:35 -0700
Subject: [PATCH 03/10] pre-commit fix

---
 scripts/eval/eval.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index dec8d3987c..18aa7a8270 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -187,12 +187,16 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
     # Allow for single model to be specified in the config to be compatible with train.py syntax
     if 'model' in cfg:
         if 'models' in cfg:
-            raise ValueError('Please specify either model or models in the config, not both')
+            raise ValueError(
+                'Please specify either model or models in the config, not both'
+            )
         model_cfg = {}
         top_level_keys = ['model', 'tokenizer', 'load_path']
         for key in top_level_keys:
             if key not in cfg:
-                raise ValueError(f'When specifying model, "{key}" must be provided in the config')
+                raise ValueError(
+                    f'When specifying model, "{key}" must be provided in the config'
+                )
             model_cfg[key] = cfg.pop(key)
         model_cfg['model_name'] = cfg.pop('model_name', 'unnamed')
         cfg['models'] = [model_cfg]

From e88af4898a33015719fe5b6ff5d94f7387eefe35 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Mon, 15 Jul 2024 09:42:27 -0700
Subject: [PATCH 04/10] Fix load_path

---
 scripts/eval/eval.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index 18aa7a8270..8b0a8d6e80 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -191,13 +191,13 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
                 'Please specify either model or models in the config, not both'
             )
         model_cfg = {}
-        top_level_keys = ['model', 'tokenizer', 'load_path']
-        for key in top_level_keys:
-            if key not in cfg:
-                raise ValueError(
-                    f'When specifying model, "{key}" must be provided in the config'
-                )
-            model_cfg[key] = cfg.pop(key)
+        model_cfg['model'] = cfg.pop('model')
+        if 'tokenizer' not in cfg:
+            raise ValueError(
+                'When specifying model, "tokenizer" must be provided in the config'
+            )
+        if 'load_path' in cfg:
+            model_cfg['load_path'] = cfg.pop('load_path')
         model_cfg['model_name'] = cfg.pop('model_name', 'unnamed')
         cfg['models'] = [model_cfg]
 

From 222b68921eaee93bdc610950429f930b6122ec53 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Mon, 15 Jul 2024 12:09:19 -0700
Subject: [PATCH 05/10] fix

---
 scripts/eval/eval.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index 8b0a8d6e80..07f70905ce 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -196,6 +196,7 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
             raise ValueError(
                 'When specifying model, "tokenizer" must be provided in the config'
             )
+        model_cfg['tokenizer'] = cfg.pop('tokenizer')
         if 'load_path' in cfg:
             model_cfg['load_path'] = cfg.pop('load_path')
         model_cfg['model_name'] = cfg.pop('model_name', 'unnamed')

From e96055dbb3943c8141bea51fd1990946ec0879b7 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Wed, 17 Jul 2024 15:21:52 -0400
Subject: [PATCH 06/10] Refactor as a config transform

---
 scripts/eval/eval.py | 53 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index 07f70905ce..4d71d37868 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -179,33 +179,66 @@ def evaluate_model(
     return (trainer, logger_keys, eval_gauntlet_callback, eval_gauntlet_df)
 
 
-def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
-    # Run user provided code if specified
-    for code_path in cfg.get('code_paths', []):
-        import_file(code_path)
+def allow_toplevel_keys(cfg: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Transform the config to allow top-level keys for model configuration.
+
+    This function allows users to use the 'train.py' syntax in 'eval.py'.
+    It converts a config with top-level 'model', 'tokenizer', and (optionally) 'load_path' keys
+    into the nested 'models' list format required by 'eval.py'.
+
+    Input config format (train.py style):
+    ```yaml
+    model:
+      <model_kwargs>
+    load_path: /path/to/checkpoint
+    tokenizer:
+      <tokenizer_kwargs>
+    ```
+
+    Output config format (eval.py style):
+    ```yaml
+    models:
+      - model:
+          <model_kwargs>
+        tokenizer:
+          <tokenizer_kwargs>
+        load_path: /path/to/checkpoint
+    ```
+    """
 
-    # Allow for single model to be specified in the config to be compatible with train.py syntax
     if 'model' in cfg:
         if 'models' in cfg:
             raise ValueError(
                 'Please specify either model or models in the config, not both'
             )
-        model_cfg = {}
-        model_cfg['model'] = cfg.pop('model')
-        if 'tokenizer' not in cfg:
+        model_cfg = {
+            'model': cfg.pop('model'),
+            'tokenizer': cfg.pop('tokenizer', None),
+            'model_name': cfg.pop('model_name', 'unnamed')
+        }
+        if 'tokenizer' not in model_cfg or model_cfg['tokenizer'] is None:
             raise ValueError(
                 'When specifying model, "tokenizer" must be provided in the config'
             )
-        model_cfg['tokenizer'] = cfg.pop('tokenizer')
         if 'load_path' in cfg:
             model_cfg['load_path'] = cfg.pop('load_path')
-        model_cfg['model_name'] = cfg.pop('model_name', 'unnamed')
         cfg['models'] = [model_cfg]
 
+    return cfg
+
+
+def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
+    # Run user provided code if specified
+    for code_path in cfg.get('code_paths', []):
+        import_file(code_path)
+
+
     logged_cfg, eval_config = make_dataclass_and_log_config(
         cfg,
         EvalConfig,
         EVAL_CONFIG_KEYS,
+        transforms=[allow_toplevel_keys],
         icl_tasks_required=True,
     )
 

From 5c9c1ac9d09aebec520200a236607eea766da4e9 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Wed, 17 Jul 2024 15:24:02 -0400
Subject: [PATCH 07/10] formatting

---
 scripts/eval/eval.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index 4d71d37868..741fbcf6d7 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -180,8 +180,7 @@ def evaluate_model(
 
 
 def allow_toplevel_keys(cfg: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Transform the config to allow top-level keys for model configuration.
+    """Transform the config to allow top-level keys for model configuration.
 
     This function allows users to use the 'train.py' syntax in 'eval.py'.
     It converts a config with top-level 'model', 'tokenizer', and (optionally) 'load_path' keys
@@ -206,20 +205,19 @@ def allow_toplevel_keys(cfg: Dict[str, Any]) -> Dict[str, Any]:
         load_path: /path/to/checkpoint
     ```
     """
-
     if 'model' in cfg:
         if 'models' in cfg:
             raise ValueError(
-                'Please specify either model or models in the config, not both'
+                'Please specify either model or models in the config, not both',
             )
         model_cfg = {
             'model': cfg.pop('model'),
             'tokenizer': cfg.pop('tokenizer', None),
-            'model_name': cfg.pop('model_name', 'unnamed')
+            'model_name': cfg.pop('model_name', 'unnamed'),
         }
         if 'tokenizer' not in model_cfg or model_cfg['tokenizer'] is None:
             raise ValueError(
-                'When specifying model, "tokenizer" must be provided in the config'
+                'When specifying model, "tokenizer" must be provided in the config',
             )
         if 'load_path' in cfg:
             model_cfg['load_path'] = cfg.pop('load_path')
@@ -233,7 +231,6 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]:
     for code_path in cfg.get('code_paths', []):
         import_file(code_path)
 
-
     logged_cfg, eval_config = make_dataclass_and_log_config(
         cfg,
         EvalConfig,

From 946560338a24b3549de3e31ca66ba342269d2824 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:55:34 -0400
Subject: [PATCH 08/10] fix

---
 scripts/eval/eval.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
index 741fbcf6d7..29a03b72cc 100644
--- a/scripts/eval/eval.py
+++ b/scripts/eval/eval.py
@@ -210,10 +210,11 @@ def allow_toplevel_keys(cfg: Dict[str, Any]) -> Dict[str, Any]:
             raise ValueError(
                 'Please specify either model or models in the config, not both',
             )
+        default_name = cfg.get('model').get('name')
         model_cfg = {
             'model': cfg.pop('model'),
             'tokenizer': cfg.pop('tokenizer', None),
-            'model_name': cfg.pop('model_name', 'unnamed'),
+            'model_name': cfg.pop('model_name', default_name),
         }
         if 'tokenizer' not in model_cfg or model_cfg['tokenizer'] is None:
             raise ValueError(

From 5d5efcf407d152983940801d59e130089f13de92 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Thu, 18 Jul 2024 18:00:16 -0400
Subject: [PATCH 09/10] fix pyright

---
 llmfoundry/command_utils/eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index 630d418ad0..031a9d053a 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -206,7 +206,7 @@ def allow_toplevel_keys(cfg: Dict[str, Any]) -> Dict[str, Any]:
             raise ValueError(
                 'Please specify either model or models in the config, not both',
             )
-        default_name = cfg.get('model').get('name')
+        default_name = cfg.get('model').get('name') # type: ignore
         model_cfg = {
             'model': cfg.pop('model'),
             'tokenizer': cfg.pop('tokenizer', None),

From 34861365ef22add5b99d9400221f11b79ce3c9e2 Mon Sep 17 00:00:00 2001
From: Jose Javier <26491792+josejg@users.noreply.github.com>
Date: Thu, 18 Jul 2024 18:35:50 -0400
Subject: [PATCH 10/10] fix

---
 llmfoundry/command_utils/eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index 031a9d053a..bddd592dba 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -206,7 +206,7 @@ def allow_toplevel_keys(cfg: Dict[str, Any]) -> Dict[str, Any]:
             raise ValueError(
                 'Please specify either model or models in the config, not both',
             )
-        default_name = cfg.get('model').get('name') # type: ignore
+        default_name = cfg.get('model').get('name')  # type: ignore
         model_cfg = {
             'model': cfg.pop('model'),
             'tokenizer': cfg.pop('tokenizer', None),