From 49380f1b80408cac4ca3b4f534f74bd9f898208e Mon Sep 17 00:00:00 2001
From: Saikat Mitra <saikatmitra91@gmail.com>
Date: Fri, 26 Apr 2024 06:25:51 +0530
Subject: [PATCH 1/2] feat: ability to add scorers outside of runs in
 empiricalrc config

---
 docs/quickstart.mdx                 | 21 ++++++----------
 docs/scoring/basics.mdx             |  5 ----
 examples/basic/empiricalrc.json     | 21 ++++++----------
 examples/humaneval/empiricalrc.json | 18 +++++++-------
 examples/rag/empiricalrc.json       | 24 +++++++-----------
 examples/spider/empiricalrc.json    | 38 ++++++++---------------------
 packages/cli/src/bin/index.ts       | 37 +++++++++++++++++++---------
 packages/cli/src/types/index.ts     |  5 ++--
 8 files changed, 73 insertions(+), 96 deletions(-)
diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx
index 3a6c9d79..6ca56f91 100644
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -80,12 +80,7 @@ Our test will succeed if the model outputs valid JSON.
           "type": "model",
           "provider": "openai",
           "model": "gpt-3.5-turbo",
-          "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}",
-          "scorers": [
-            {
-              "type": "is-json"
-            }
-          ]
+          "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}"
         },
         {
           "type": "model",
@@ -96,12 +91,7 @@ Our test will succeed if the model outputs valid JSON.
               "type": "json_object"
             }
           },
-          "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}",
-          "scorers": [
-            {
-              "type": "is-json"
-            }
-          ]
+          "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}"
         }
       ],
       "dataset": {
@@ -117,7 +107,12 @@ Our test will succeed if the model outputs valid JSON.
             }
           }
         ]
-      }
+      },
+      "scorers": [
+        {
+          "type": "is-json"
+        }
+      ]
     }
     ```
     </Accordion>
diff --git a/docs/scoring/basics.mdx b/docs/scoring/basics.mdx
index 2bda1808..71634c27 100644
--- a/docs/scoring/basics.mdx
+++ b/docs/scoring/basics.mdx
@@ -12,11 +12,6 @@ as you like.
 
 ```json empiricalrc.json
 {
-    "type": "model",
-    "name": "gpt-3.5-turbo run",
-    "provider": "openai",
-    "model": "gpt-3.5-turbo",
-    "prompt": "Always respond with a JSON object.",
     "scorers": [
         {
             "type": "is-json"
diff --git a/examples/basic/empiricalrc.json b/examples/basic/empiricalrc.json
index 86c6f742..02029126 100644
--- a/examples/basic/empiricalrc.json
+++ b/examples/basic/empiricalrc.json
@@ -5,12 +5,7 @@
       "type": "model",
       "provider": "openai",
       "model": "gpt-3.5-turbo",
-      "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}",
-      "scorers": [
-        {
-          "type": "is-json"
-        }
-      ]
+      "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}"
     },
     {
       "type": "model",
@@ -21,12 +16,7 @@
         "response_format": {
           "type": "json_object"
         }
-      },
-      "scorers": [
-        {
-          "type": "is-json"
-        }
-      ]
+      }
     }
   ],
   "dataset": {
@@ -42,5 +32,10 @@
         }
       }
     ]
-  }
+  },
+  "scorers": [
+    {
+      "type": "is-json"
+    }
+  ]
 }
\ No newline at end of file
diff --git a/examples/humaneval/empiricalrc.json b/examples/humaneval/empiricalrc.json
index 824a56a4..467de651 100644
--- a/examples/humaneval/empiricalrc.json
+++ b/examples/humaneval/empiricalrc.json
@@ -8,17 +8,17 @@
       "prompt": "Complete the following python function. Return only the completed function so that it can be directly run on a Python shell, including imports like from typing import List.\n```python\n{{prompt}}\n```",
       "parameters": {
         "temperature": 0.1
-      },
-      "scorers": [
-        {
-          "type": "py-script",
-          "path": "score.py",
-          "name": "unit-tests"
-        }
-      ]
+      }
     }
   ],
   "dataset": {
     "path": "HumanEval.jsonl"
-  }
+  },
+  "scorers": [
+    {
+      "type": "py-script",
+      "path": "score.py",
+      "name": "unit-tests"
+    }
+  ]
 }
\ No newline at end of file
diff --git a/examples/rag/empiricalrc.json b/examples/rag/empiricalrc.json
index 807768a2..85452af2 100644
--- a/examples/rag/empiricalrc.json
+++ b/examples/rag/empiricalrc.json
@@ -6,29 +6,23 @@
       "path": "rag.py",
       "parameters": {
         "model": "gpt-3.5-turbo"
-      },
-      "scorers": [
-        {
-          "type": "py-script",
-          "path": "score.py"
-        }
-      ]
+      }
     },
     {
       "type": "py-script",
       "path": "rag.py",
       "parameters": {
         "model": "gpt-4-turbo-preview"
-      },
-      "scorers": [
-        {
-          "type": "py-script",
-          "path": "score.py"
-        }
-      ]
+      }
     }
   ],
   "dataset": {
     "path": ".empiricalrun/dataset.jsonl"
-  }
+  },
+  "scorers": [
+    {
+      "type": "py-script",
+      "path": "score.py"
+    }
+  ]
 }
\ No newline at end of file
diff --git a/examples/spider/empiricalrc.json b/examples/spider/empiricalrc.json
index 5ee48adc..b8182c0f 100644
--- a/examples/spider/empiricalrc.json
+++ b/examples/spider/empiricalrc.json
@@ -14,15 +14,6 @@
                     "role": "user",
                     "content": "Question: {{question}} \n\nAnswer the above question with only the SQL query."
                 }
-            ],
-            "scorers": [
-                {
-                    "type": "sql-syntax"
-                },
-                {
-                    "type": "py-script",
-                    "path": "execution_accuracy.py"
-                }
             ]
         },
         {
@@ -38,15 +29,6 @@
                     "role": "user",
                     "content": "Question: {{question}} \n\nAnswer the above question with only the SQL query."
                 }
-            ],
-            "scorers": [
-                {
-                    "type": "sql-syntax"
-                },
-                {
-                    "type": "py-script",
-                    "path": "execution_accuracy.py"
-                }
             ]
         },
         {
@@ -62,19 +44,19 @@
                     "role": "user",
                     "content": "Question: {{question}} \n\nAnswer the above question with only the SQL query."
                 }
-            ],
-            "scorers": [
-                {
-                    "type": "sql-syntax"
-                },
-                {
-                    "type": "py-script",
-                    "path": "execution_accuracy.py"
-                }
             ]
         }
     ],
     "dataset": {
         "path": "https://docs.google.com/spreadsheets/d/1x_p0lX2pJEyGkFoe1A9nY3q87qOJUd547f2lz99ugiM/edit#gid=0"
-    }
+    },
+    "scorers": [
+        {
+            "type": "sql-syntax"
+        },
+        {
+            "type": "py-script",
+            "path": "execution_accuracy.py"
+        }
+    ]
 }
\ No newline at end of file
diff --git a/packages/cli/src/bin/index.ts b/packages/cli/src/bin/index.ts
index 3c82b2e5..e42c7173 100644
--- a/packages/cli/src/bin/index.ts
+++ b/packages/cli/src/bin/index.ts
@@ -49,6 +49,31 @@ const cacheDir = ".empiricalrun";
 const outputFilePath = `${cwd}/${cacheDir}/${outputFileName}`;
 const runtimeOptionsPath = `${cwd}/${cacheDir}/runtime.json`;
 
+const readConfig = async (): Promise<RunsConfig> => {
+  let data: string;
+  try {
+    data = (await fs.readFile(configFileFullPath)).toString();
+    console.log(buildSuccessLog(`read ${configFileName} file successfully`));
+  } catch (err) {
+    console.log(buildErrorLog(`Failed to read ${configFileName} file`));
+    console.log(yellow("Please ensure running init command first"));
+    process.exit(1);
+  }
+  const { runs, dataset, scorers } = JSON.parse(data) as RunsConfig;
+
+  runs.forEach((r) => {
+    // if scorers are not set for a run, then override it with the global scorers
+    if (!r.scorers && scorers) {
+      r.scorers = scorers;
+    }
+  });
+
+  return {
+    runs,
+    dataset,
+  };
+};
+
 program
   .name("Empirical.run CLI")
   .description(
@@ -90,19 +115,9 @@ program
     dotenv.config({ path: runTimeOptions.envFilePath });
     console.log(yellow("Initiating run..."));
 
-    let data;
     const startTime = performance.now();
-    try {
-      data = await fs.readFile(configFileFullPath);
-    } catch (err) {
-      console.log(buildErrorLog(`Failed to read ${configFileName} file`));
-      console.log(yellow("Please ensure running init command first"));
-      process.exit(1);
-    }
+    const { runs, dataset: datasetConfig } = await readConfig();
 
-    console.log(buildSuccessLog(`read ${configFileName} file successfully`));
-    const jsonStr = data.toString();
-    const { runs, dataset: datasetConfig } = JSON.parse(jsonStr) as RunsConfig;
     // TODO: add check here for empty runs config. Add validator of the file
     let dataset: Dataset;
     const store = new EmpiricalStore();
diff --git a/packages/cli/src/types/index.ts b/packages/cli/src/types/index.ts
index bd84e0f2..fc1f7f7b 100644
--- a/packages/cli/src/types/index.ts
+++ b/packages/cli/src/types/index.ts
@@ -1,7 +1,8 @@
-import { RunConfig, DatasetConfig } from "@empiricalrun/types";
+import { RunConfig, DatasetConfig, Scorer } from "@empiricalrun/types";
 
 export type RunsConfig = {
+  $schema?: string;
   runs: RunConfig[];
   dataset: DatasetConfig;
-  $schema?: string;
+  scorers?: Scorer[];
 };

From 28251d81695de7006b3d5721a7f142e5ff852ab5 Mon Sep 17 00:00:00 2001
From: Saikat Mitra <saikatmitra91@gmail.com>
Date: Fri, 26 Apr 2024 06:30:03 +0530
Subject: [PATCH 2/2] chore: add changeset

---
 .changeset/yellow-trees-pull.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/yellow-trees-pull.md

diff --git a/.changeset/yellow-trees-pull.md b/.changeset/yellow-trees-pull.md
new file mode 100644
index 00000000..ed5dd72a
--- /dev/null
+++ b/.changeset/yellow-trees-pull.md
@@ -0,0 +1,5 @@
+---
+"@empiricalrun/cli": minor
+---
+
+feat: ability to add global scorers