From 49380f1b80408cac4ca3b4f534f74bd9f898208e Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Fri, 26 Apr 2024 06:25:51 +0530 Subject: [PATCH 1/2] feat: ability to add scorers outside of runs in empiricalrc config --- docs/quickstart.mdx | 21 ++++++---------- docs/scoring/basics.mdx | 5 ---- examples/basic/empiricalrc.json | 21 ++++++---------- examples/humaneval/empiricalrc.json | 18 +++++++------- examples/rag/empiricalrc.json | 24 +++++++----------- examples/spider/empiricalrc.json | 38 ++++++++--------------------- packages/cli/src/bin/index.ts | 37 +++++++++++++++++++--------- packages/cli/src/types/index.ts | 5 ++-- 8 files changed, 73 insertions(+), 96 deletions(-) diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 3a6c9d79..6ca56f91 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -80,12 +80,7 @@ Our test will succeed if the model outputs valid JSON. "type": "model", "provider": "openai", "model": "gpt-3.5-turbo", - "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}", - "scorers": [ - { - "type": "is-json" - } - ] + "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}" }, { "type": "model", @@ -96,12 +91,7 @@ Our test will succeed if the model outputs valid JSON. "type": "json_object" } }, - "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}", - "scorers": [ - { - "type": "is-json" - } - ] + "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}" } ], "dataset": { @@ -117,7 +107,12 @@ Our test will succeed if the model outputs valid JSON. } } ] - } + }, + "scorers": [ + { + "type": "is-json" + } + ] } ``` diff --git a/docs/scoring/basics.mdx b/docs/scoring/basics.mdx index 2bda1808..71634c27 100644 --- a/docs/scoring/basics.mdx +++ b/docs/scoring/basics.mdx @@ -12,11 +12,6 @@ as you like. ```json empiricalrc.json { - "type": "model", - "name": "gpt-3.5-turbo run", - "provider": "openai", - "model": "gpt-3.5-turbo", - "prompt": "Always respond with a JSON object.", "scorers": [ { "type": "is-json" diff --git a/examples/basic/empiricalrc.json b/examples/basic/empiricalrc.json index 86c6f742..02029126 100644 --- a/examples/basic/empiricalrc.json +++ b/examples/basic/empiricalrc.json @@ -5,12 +5,7 @@ "type": "model", "provider": "openai", "model": "gpt-3.5-turbo", - "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}", - "scorers": [ - { - "type": "is-json" - } - ] + "prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}" }, { "type": "model", @@ -21,12 +16,7 @@ "response_format": { "type": "json_object" } - }, - "scorers": [ - { - "type": "is-json" - } - ] + } } ], "dataset": { @@ -42,5 +32,10 @@ } } ] - } + }, + "scorers": [ + { + "type": "is-json" + } + ] } \ No newline at end of file diff --git a/examples/humaneval/empiricalrc.json b/examples/humaneval/empiricalrc.json index 824a56a4..467de651 100644 --- a/examples/humaneval/empiricalrc.json +++ b/examples/humaneval/empiricalrc.json @@ -8,17 +8,17 @@ "prompt": "Complete the following python function. Return only the completed function so that it can be directly run on a Python shell, including imports like from typing import List.\n```python\n{{prompt}}\n```", "parameters": { "temperature": 0.1 - }, - "scorers": [ - { - "type": "py-script", - "path": "score.py", - "name": "unit-tests" - } - ] + } } ], "dataset": { "path": "HumanEval.jsonl" - } + }, + "scorers": [ + { + "type": "py-script", + "path": "score.py", + "name": "unit-tests" + } + ] } \ No newline at end of file diff --git a/examples/rag/empiricalrc.json b/examples/rag/empiricalrc.json index 807768a2..85452af2 100644 --- a/examples/rag/empiricalrc.json +++ b/examples/rag/empiricalrc.json @@ -6,29 +6,23 @@ "path": "rag.py", "parameters": { "model": "gpt-3.5-turbo" - }, - "scorers": [ - { - "type": "py-script", - "path": "score.py" - } - ] + } }, { "type": "py-script", "path": "rag.py", "parameters": { "model": "gpt-4-turbo-preview" - }, - "scorers": [ - { - "type": "py-script", - "path": "score.py" - } - ] + } } ], "dataset": { "path": ".empiricalrun/dataset.jsonl" - } + }, + "scorers": [ + { + "type": "py-script", + "path": "score.py" + } + ] } \ No newline at end of file diff --git a/examples/spider/empiricalrc.json b/examples/spider/empiricalrc.json index 5ee48adc..b8182c0f 100644 --- a/examples/spider/empiricalrc.json +++ b/examples/spider/empiricalrc.json @@ -14,15 +14,6 @@ "role": "user", "content": "Question: {{question}} \n\nAnswer the above question with only the SQL query." } - ], - "scorers": [ - { - "type": "sql-syntax" - }, - { - "type": "py-script", - "path": "execution_accuracy.py" - } ] }, { @@ -38,15 +29,6 @@ "role": "user", "content": "Question: {{question}} \n\nAnswer the above question with only the SQL query." } - ], - "scorers": [ - { - "type": "sql-syntax" - }, - { - "type": "py-script", - "path": "execution_accuracy.py" - } ] }, { @@ -62,19 +44,19 @@ "role": "user", "content": "Question: {{question}} \n\nAnswer the above question with only the SQL query." } - ], - "scorers": [ - { - "type": "sql-syntax" - }, - { - "type": "py-script", - "path": "execution_accuracy.py" - } ] } ], "dataset": { "path": "https://docs.google.com/spreadsheets/d/1x_p0lX2pJEyGkFoe1A9nY3q87qOJUd547f2lz99ugiM/edit#gid=0" - } + }, + "scorers": [ + { + "type": "sql-syntax" + }, + { + "type": "py-script", + "path": "execution_accuracy.py" + } + ] } \ No newline at end of file diff --git a/packages/cli/src/bin/index.ts b/packages/cli/src/bin/index.ts index 3c82b2e5..e42c7173 100644 --- a/packages/cli/src/bin/index.ts +++ b/packages/cli/src/bin/index.ts @@ -49,6 +49,31 @@ const cacheDir = ".empiricalrun"; const outputFilePath = `${cwd}/${cacheDir}/${outputFileName}`; const runtimeOptionsPath = `${cwd}/${cacheDir}/runtime.json`; +const readConfig = async (): Promise => { + let data: string; + try { + data = (await fs.readFile(configFileFullPath)).toString(); + console.log(buildSuccessLog(`read ${configFileName} file successfully`)); + } catch (err) { + console.log(buildErrorLog(`Failed to read ${configFileName} file`)); + console.log(yellow("Please ensure running init command first")); + process.exit(1); + } + const { runs, dataset, scorers } = JSON.parse(data) as RunsConfig; + + runs.forEach((r) => { + // if scorers are not set for a run, then override it with the global scorers + if (!r.scorers && scorers) { + r.scorers = scorers; + } + }); + + return { + runs, + dataset, + }; +}; + program .name("Empirical.run CLI") .description( @@ -90,19 +115,9 @@ program dotenv.config({ path: runTimeOptions.envFilePath }); console.log(yellow("Initiating run...")); - let data; const startTime = performance.now(); - try { - data = await fs.readFile(configFileFullPath); - } catch (err) { - console.log(buildErrorLog(`Failed to read ${configFileName} file`)); - console.log(yellow("Please ensure running init command first")); - process.exit(1); - } + const { runs, dataset: datasetConfig } = await readConfig(); - console.log(buildSuccessLog(`read ${configFileName} file successfully`)); - const jsonStr = data.toString(); - const { runs, dataset: datasetConfig } = JSON.parse(jsonStr) as RunsConfig; // TODO: add check here for empty runs config. Add validator of the file let dataset: Dataset; const store = new EmpiricalStore(); diff --git a/packages/cli/src/types/index.ts b/packages/cli/src/types/index.ts index bd84e0f2..fc1f7f7b 100644 --- a/packages/cli/src/types/index.ts +++ b/packages/cli/src/types/index.ts @@ -1,7 +1,8 @@ -import { RunConfig, DatasetConfig } from "@empiricalrun/types"; +import { RunConfig, DatasetConfig, Scorer } from "@empiricalrun/types"; export type RunsConfig = { + $schema?: string; runs: RunConfig[]; dataset: DatasetConfig; - $schema?: string; + scorers?: Scorer[]; }; From 28251d81695de7006b3d5721a7f142e5ff852ab5 Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Fri, 26 Apr 2024 06:30:03 +0530 Subject: [PATCH 2/2] chore: add changeset --- .changeset/yellow-trees-pull.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/yellow-trees-pull.md diff --git a/.changeset/yellow-trees-pull.md b/.changeset/yellow-trees-pull.md new file mode 100644 index 00000000..ed5dd72a --- /dev/null +++ b/.changeset/yellow-trees-pull.md @@ -0,0 +1,5 @@ +--- +"@empiricalrun/cli": minor +--- + +feat: ability to add global scorers