Skip to content

Commit

Permalink
feat: ability to add global scorers (#178)
Browse files Browse the repository at this point in the history
  • Loading branch information
saikatmitra91 authored Apr 26, 2024
1 parent 93f995b commit db945c2
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 96 deletions.
5 changes: 5 additions & 0 deletions .changeset/yellow-trees-pull.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@empiricalrun/cli": minor
---

feat: ability to add global scorers
21 changes: 8 additions & 13 deletions docs/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,7 @@ Our test will succeed if the model outputs valid JSON.
"type": "model",
"provider": "openai",
"model": "gpt-3.5-turbo",
"prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}",
"scorers": [
{
"type": "is-json"
}
]
"prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}"
},
{
"type": "model",
Expand All @@ -96,12 +91,7 @@ Our test will succeed if the model outputs valid JSON.
"type": "json_object"
}
},
"prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}",
"scorers": [
{
"type": "is-json"
}
]
"prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}"
}
],
"dataset": {
Expand All @@ -117,7 +107,12 @@ Our test will succeed if the model outputs valid JSON.
}
}
]
}
},
"scorers": [
{
"type": "is-json"
}
]
}
```
</Accordion>
Expand Down
5 changes: 0 additions & 5 deletions docs/scoring/basics.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@ as you like.

```json empiricalrc.json
{
"type": "model",
"name": "gpt-3.5-turbo run",
"provider": "openai",
"model": "gpt-3.5-turbo",
"prompt": "Always respond with a JSON object.",
"scorers": [
{
"type": "is-json"
Expand Down
21 changes: 8 additions & 13 deletions examples/basic/empiricalrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@
"type": "model",
"provider": "openai",
"model": "gpt-3.5-turbo",
"prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}",
"scorers": [
{
"type": "is-json"
}
]
"prompt": "Extract the name, age and location from the message, and respond with a JSON object. If an entity is missing, respond with null.\n\nMessage: {{user_message}}"
},
{
"type": "model",
Expand All @@ -21,12 +16,7 @@
"response_format": {
"type": "json_object"
}
},
"scorers": [
{
"type": "is-json"
}
]
}
}
],
"dataset": {
Expand All @@ -42,5 +32,10 @@
}
}
]
}
},
"scorers": [
{
"type": "is-json"
}
]
}
18 changes: 9 additions & 9 deletions examples/humaneval/empiricalrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@
"prompt": "Complete the following python function. Return only the completed function so that it can be directly run on a Python shell, including imports like from typing import List.\n```python\n{{prompt}}\n```",
"parameters": {
"temperature": 0.1
},
"scorers": [
{
"type": "py-script",
"path": "score.py",
"name": "unit-tests"
}
]
}
}
],
"dataset": {
"path": "HumanEval.jsonl"
}
},
"scorers": [
{
"type": "py-script",
"path": "score.py",
"name": "unit-tests"
}
]
}
24 changes: 9 additions & 15 deletions examples/rag/empiricalrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,23 @@
"path": "rag.py",
"parameters": {
"model": "gpt-3.5-turbo"
},
"scorers": [
{
"type": "py-script",
"path": "score.py"
}
]
}
},
{
"type": "py-script",
"path": "rag.py",
"parameters": {
"model": "gpt-4-turbo-preview"
},
"scorers": [
{
"type": "py-script",
"path": "score.py"
}
]
}
}
],
"dataset": {
"path": ".empiricalrun/dataset.jsonl"
}
},
"scorers": [
{
"type": "py-script",
"path": "score.py"
}
]
}
38 changes: 10 additions & 28 deletions examples/spider/empiricalrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,6 @@
"role": "user",
"content": "Question: {{question}} \n\nAnswer the above question with only the SQL query."
}
],
"scorers": [
{
"type": "sql-syntax"
},
{
"type": "py-script",
"path": "execution_accuracy.py"
}
]
},
{
Expand All @@ -38,15 +29,6 @@
"role": "user",
"content": "Question: {{question}} \n\nAnswer the above question with only the SQL query."
}
],
"scorers": [
{
"type": "sql-syntax"
},
{
"type": "py-script",
"path": "execution_accuracy.py"
}
]
},
{
Expand All @@ -62,19 +44,19 @@
"role": "user",
"content": "Question: {{question}} \n\nAnswer the above question with only the SQL query."
}
],
"scorers": [
{
"type": "sql-syntax"
},
{
"type": "py-script",
"path": "execution_accuracy.py"
}
]
}
],
"dataset": {
"path": "https://docs.google.com/spreadsheets/d/1x_p0lX2pJEyGkFoe1A9nY3q87qOJUd547f2lz99ugiM/edit#gid=0"
}
},
"scorers": [
{
"type": "sql-syntax"
},
{
"type": "py-script",
"path": "execution_accuracy.py"
}
]
}
37 changes: 26 additions & 11 deletions packages/cli/src/bin/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,31 @@ const cacheDir = ".empiricalrun";
const outputFilePath = `${cwd}/${cacheDir}/${outputFileName}`;
const runtimeOptionsPath = `${cwd}/${cacheDir}/runtime.json`;

const readConfig = async (): Promise<RunsConfig> => {
let data: string;
try {
data = (await fs.readFile(configFileFullPath)).toString();
console.log(buildSuccessLog(`read ${configFileName} file successfully`));
} catch (err) {
console.log(buildErrorLog(`Failed to read ${configFileName} file`));
console.log(yellow("Please ensure running init command first"));
process.exit(1);
}
const { runs, dataset, scorers } = JSON.parse(data) as RunsConfig;

runs.forEach((r) => {
// if scorers are not set for a run, then override it with the global scorers
if (!r.scorers && scorers) {
r.scorers = scorers;
}
});

return {
runs,
dataset,
};
};

program
.name("Empirical.run CLI")
.description(
Expand Down Expand Up @@ -90,19 +115,9 @@ program
dotenv.config({ path: runTimeOptions.envFilePath });
console.log(yellow("Initiating run..."));

let data;
const startTime = performance.now();
try {
data = await fs.readFile(configFileFullPath);
} catch (err) {
console.log(buildErrorLog(`Failed to read ${configFileName} file`));
console.log(yellow("Please ensure running init command first"));
process.exit(1);
}
const { runs, dataset: datasetConfig } = await readConfig();

console.log(buildSuccessLog(`read ${configFileName} file successfully`));
const jsonStr = data.toString();
const { runs, dataset: datasetConfig } = JSON.parse(jsonStr) as RunsConfig;
// TODO: add check here for empty runs config. Add validator of the file
let dataset: Dataset;
const store = new EmpiricalStore();
Expand Down
5 changes: 3 additions & 2 deletions packages/cli/src/types/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { RunConfig, DatasetConfig } from "@empiricalrun/types";
import { RunConfig, DatasetConfig, Scorer } from "@empiricalrun/types";

export type RunsConfig = {
$schema?: string;
runs: RunConfig[];
dataset: DatasetConfig;
$schema?: string;
scorers?: Scorer[];
};

0 comments on commit db945c2

Please sign in to comment.