From df73d5e6ea8628a96a91b7ac01136415436966d5 Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 18:39:48 +0530 Subject: [PATCH 1/8] fix: rename py script executor value property to path --- development/schema/schema.json | 7 +++++-- docs/models/custom.mdx | 2 +- examples/rag/empiricalrc.json | 2 +- packages/cli/src/types/index.ts | 4 ++-- packages/core/src/run/executors/index.ts | 4 ++-- packages/core/src/run/executors/interface.ts | 4 ++-- packages/core/src/run/executors/model.ts | 4 ++-- packages/core/src/run/executors/script.ts | 2 +- packages/core/src/run/index.ts | 12 ++++++------ packages/types/src/index.ts | 8 ++++---- 10 files changed, 26 insertions(+), 23 deletions(-) diff --git a/development/schema/schema.json b/development/schema/schema.json index a68ec8fd..02160e14 100644 --- a/development/schema/schema.json +++ b/development/schema/schema.json @@ -128,9 +128,12 @@ "js-script" ] }, - "value": { + "path": { "type": "string" }, + "config": { + "type": "object" + }, "scorers": { "type": "array", "items": { @@ -208,7 +211,7 @@ }, "required": [ "type", - "value" + "path" ], "additionalProperties": false } diff --git a/docs/models/custom.mdx b/docs/models/custom.mdx index 4989677a..6ee97cbe 100644 --- a/docs/models/custom.mdx +++ b/docs/models/custom.mdx @@ -16,7 +16,7 @@ path in the `value` field. "runs": [ { "type": "py-script", - "value": "rag.py" + "path": "rag.py" } ] ``` diff --git a/examples/rag/empiricalrc.json b/examples/rag/empiricalrc.json index df022283..5f65178b 100644 --- a/examples/rag/empiricalrc.json +++ b/examples/rag/empiricalrc.json @@ -4,7 +4,7 @@ { "name": "rag script run", "type": "py-script", - "value": "rag.py", + "path": "rag.py", "scorers": [ { "type": "py-script", diff --git a/packages/cli/src/types/index.ts b/packages/cli/src/types/index.ts index f82e94a4..2fa3f87b 100644 --- a/packages/cli/src/types/index.ts +++ b/packages/cli/src/types/index.ts @@ -1,6 +1,6 @@ -import { IRunConfig, DatasetConfig } from "@empiricalrun/types"; +import { RunConfig, DatasetConfig } from "@empiricalrun/types"; export type RunsConfig = { - runs: IRunConfig[]; + runs: RunConfig[]; dataset: DatasetConfig; }; diff --git a/packages/core/src/run/executors/index.ts b/packages/core/src/run/executors/index.ts index 25eeae07..55be0fba 100644 --- a/packages/core/src/run/executors/index.ts +++ b/packages/core/src/run/executors/index.ts @@ -1,9 +1,9 @@ -import { IRunConfig } from "@empiricalrun/types"; +import { RunConfig } from "@empiricalrun/types"; import { modelExecutor } from "./model"; import { Executor } from "./interface"; import { scriptExecutor } from "./script"; -export const getExecutor = (runConfig: IRunConfig): Executor | undefined => { +export const getExecutor = (runConfig: RunConfig): Executor | undefined => { if (runConfig.type === "model") { return modelExecutor; } else if (runConfig.type === "py-script") { diff --git a/packages/core/src/run/executors/interface.ts b/packages/core/src/run/executors/interface.ts index 55285962..feb0f242 100644 --- a/packages/core/src/run/executors/interface.ts +++ b/packages/core/src/run/executors/interface.ts @@ -1,8 +1,8 @@ -import { DatasetSample, IRunConfig, RunOutput } from "@empiricalrun/types"; +import { DatasetSample, RunConfig, RunOutput } from "@empiricalrun/types"; export interface Executor { ( - runConfig: IRunConfig, + runConfig: RunConfig, sample: DatasetSample, ): Promise<{ output: RunOutput; diff --git a/packages/core/src/run/executors/model.ts b/packages/core/src/run/executors/model.ts index 66e518e6..aaa1342a 100644 --- a/packages/core/src/run/executors/model.ts +++ b/packages/core/src/run/executors/model.ts @@ -1,10 +1,10 @@ -import { IRunConfig } from "@empiricalrun/types"; +import { RunConfig } from "@empiricalrun/types"; import { Executor } from "./interface"; import { ChatCompletionMessageParam } from "openai/resources/index.mjs"; import { AIError, EmpiricalAI, replacePlaceholders } from "@empiricalrun/ai"; export const modelExecutor: Executor = async function ( - runConfig: IRunConfig, + runConfig: RunConfig, sample, ) { if (runConfig.type !== "model") { diff --git a/packages/core/src/run/executors/script.ts b/packages/core/src/run/executors/script.ts index 965be9b4..40bd2dee 100644 --- a/packages/core/src/run/executors/script.ts +++ b/packages/core/src/run/executors/script.ts @@ -19,7 +19,7 @@ export const scriptExecutor: Executor = async (runConfig, sample) => { }, }; } - const scriptPath = runConfig.value; + const scriptPath = runConfig.path; if (!scriptPath) { return { output, diff --git a/packages/core/src/run/index.ts b/packages/core/src/run/index.ts index 546a8b6b..682f636a 100644 --- a/packages/core/src/run/index.ts +++ b/packages/core/src/run/index.ts @@ -1,7 +1,7 @@ import { Dataset, DatasetSample, - IRunConfig, + RunConfig, RunCompletion, RunOutputSample, } from "@empiricalrun/types"; @@ -10,9 +10,9 @@ import score from "@empiricalrun/scorer"; import { getExecutor } from "./executors"; export async function execute( - run: IRunConfig, + run: RunConfig, dataset: Dataset, - progressCallback: (sample: RunOutputSample) => void, + progressCallback?: (sample: RunOutputSample) => void, ): Promise { const runCreationDate = new Date(); const sampleCompletions: RunOutputSample[] = []; @@ -37,7 +37,7 @@ export async function execute( sampleCompletions.push(sample); try { - progressCallback(sample); + progressCallback?.(sample); } catch (e) { console.warn(e); } @@ -88,12 +88,12 @@ export async function execute( }; } -function getDefaultRunName(run: IRunConfig, id: string): string { +function getDefaultRunName(run: RunConfig, id: string): string { let name = ""; if (run.type === "model") { name = run.model; } else if (run.type === "py-script" || run.type === "js-script") { - name = run.value; + name = run.path; } return `Run #${id}: ${name}`; } diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index 9d0e24a7..496fcbc7 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -56,16 +56,16 @@ export interface ModelRunConfig extends RunConfigBase { export interface JSScriptRunConfig extends RunConfigBase { type: "js-script"; - value: string; + path: string; } export interface PyScriptRunConfig extends RunConfigBase { type: "py-script"; - value: string; + path: string; pythonPath?: string; } -export type IRunConfig = ModelRunConfig | PyScriptRunConfig | JSScriptRunConfig; +export type RunConfig = ModelRunConfig | PyScriptRunConfig | JSScriptRunConfig; export interface ScoreStats { name: string; @@ -84,7 +84,7 @@ export interface RunOutputStats { export interface RunCompletion { id: string; - run_config: IRunConfig; + run_config: RunConfig; dataset_config: { id: string }; samples: RunOutputSample[]; stats?: RunOutputStats; From cfa1aa2cb3f604ed3487d1025341d76a96e6238a Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 18:52:45 +0530 Subject: [PATCH 2/8] chore: update changeset --- .changeset/fair-bottles-taste.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .changeset/fair-bottles-taste.md diff --git a/.changeset/fair-bottles-taste.md b/.changeset/fair-bottles-taste.md new file mode 100644 index 00000000..e6c5a855 --- /dev/null +++ b/.changeset/fair-bottles-taste.md @@ -0,0 +1,9 @@ +--- +"@empiricalrun/scorer": patch +"@empiricalrun/types": patch +"@empiricalrun/core": patch +"@empiricalrun/cli": patch +"@empiricalrun/ai": patch +--- + +fix: change script value property to path in run From 51714856c59d01c3f0760475b13398f1cdfc2147 Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 19:24:23 +0530 Subject: [PATCH 3/8] feat: add executor config --- examples/basic/empiricalrc.json | 2 +- examples/chatbot/empiricalrc.json | 2 +- examples/humaneval/empiricalrc.json | 2 +- examples/rag/empiricalrc.json | 2 +- examples/rag/rag.py | 2 +- packages/core/src/python/executor_wrapper.py | 2 +- packages/core/src/run/executors/script.ts | 7 ++++++- packages/types/src/index.ts | 2 ++ 8 files changed, 14 insertions(+), 7 deletions(-) diff --git a/examples/basic/empiricalrc.json b/examples/basic/empiricalrc.json index 54c7fb48..8fa72a8c 100644 --- a/examples/basic/empiricalrc.json +++ b/examples/basic/empiricalrc.json @@ -1,5 +1,5 @@ { - "$schema": "https://assets.empirical.run/config/schema/v1.12.json", + "$schema": "https://assets.empirical.run/config/schema/v1.13.json", "runs": [ { "type": "model", diff --git a/examples/chatbot/empiricalrc.json b/examples/chatbot/empiricalrc.json index 6ce6feaf..faa0bc55 100644 --- a/examples/chatbot/empiricalrc.json +++ b/examples/chatbot/empiricalrc.json @@ -1,5 +1,5 @@ { - "$schema": "https://assets.empirical.run/config/schema/v1.12.json", + "$schema": "https://assets.empirical.run/config/schema/v1.13.json", "runs": [ { "name": "less context setting", diff --git a/examples/humaneval/empiricalrc.json b/examples/humaneval/empiricalrc.json index 56ec76a7..e30a8fe3 100644 --- a/examples/humaneval/empiricalrc.json +++ b/examples/humaneval/empiricalrc.json @@ -1,5 +1,5 @@ { - "$schema": "https://assets.empirical.run/config/schema/v1.12.json", + "$schema": "https://assets.empirical.run/config/schema/v1.13.json", "runs": [ { "type": "model", diff --git a/examples/rag/empiricalrc.json b/examples/rag/empiricalrc.json index 5f65178b..b3e703db 100644 --- a/examples/rag/empiricalrc.json +++ b/examples/rag/empiricalrc.json @@ -1,5 +1,5 @@ { - "$schema": "https://assets.empirical.run/config/schema/v1.12.json", + "$schema": "https://assets.empirical.run/config/schema/v1.13.json", "runs": [ { "name": "rag script run", diff --git a/examples/rag/rag.py b/examples/rag/rag.py index 47522820..8c9b6658 100644 --- a/examples/rag/rag.py +++ b/examples/rag/rag.py @@ -6,7 +6,7 @@ nest_asyncio.apply() -def execute(inputs): +def execute(inputs, config): # load documents question = inputs["question"] reader = SimpleDirectoryReader("./arxiv-papers/", num_files_limit=30) diff --git a/packages/core/src/python/executor_wrapper.py b/packages/core/src/python/executor_wrapper.py index f068ca0c..e375852f 100644 --- a/packages/core/src/python/executor_wrapper.py +++ b/packages/core/src/python/executor_wrapper.py @@ -6,5 +6,5 @@ sys.path.append(sys.argv[1]) user_module = importlib.import_module(sys.argv[2]) -result = user_module.execute(json.loads(sys.argv[3])) +result = user_module.execute(json.loads(sys.argv[3]), json.loads(sys.argv[4])) print("execution_output:", json.dumps(result)) diff --git a/packages/core/src/run/executors/script.ts b/packages/core/src/run/executors/script.ts index 40bd2dee..e9f39dcc 100644 --- a/packages/core/src/run/executors/script.ts +++ b/packages/core/src/run/executors/script.ts @@ -32,7 +32,12 @@ export const scriptExecutor: Executor = async (runConfig, sample) => { let basePath = path.dirname(scriptPath); let moduleName = path.basename(scriptPath).replace(".py", ""); - let pythonArgs = [basePath, moduleName, JSON.stringify(sample.inputs)]; + let pythonArgs = [ + basePath, + moduleName, + JSON.stringify(sample.inputs), + JSON.stringify(runConfig.config || {}), + ]; const runOutput = await new Promise((resolve) => { let output: string[] = []; diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index 496fcbc7..97216234 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -57,12 +57,14 @@ export interface ModelRunConfig extends RunConfigBase { export interface JSScriptRunConfig extends RunConfigBase { type: "js-script"; path: string; + config?: any; } export interface PyScriptRunConfig extends RunConfigBase { type: "py-script"; path: string; pythonPath?: string; + config?: any; } export type RunConfig = ModelRunConfig | PyScriptRunConfig | JSScriptRunConfig; From 4f723a1c03eb240246325dc2192e0ca21d73ad8f Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 19:28:15 +0530 Subject: [PATCH 4/8] chore: add changeset --- .changeset/warm-ravens-raise.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/warm-ravens-raise.md diff --git a/.changeset/warm-ravens-raise.md b/.changeset/warm-ravens-raise.md new file mode 100644 index 00000000..f5d3ab9d --- /dev/null +++ b/.changeset/warm-ravens-raise.md @@ -0,0 +1,7 @@ +--- +"@empiricalrun/types": minor +"@empiricalrun/core": minor +"@empiricalrun/cli": patch +--- + +feat: add support for passthrough config for script executor From a5a8597765a6a5277b9aa6c8522f8d07c25c4fd9 Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 20:17:21 +0530 Subject: [PATCH 5/8] fix: change the config property to parameters --- development/schema/schema.json | 2 +- docs/models/custom.mdx | 9 ++++++-- examples/rag/rag.py | 3 ++- packages/cli/src/bin/index.ts | 5 ++--- packages/core/src/run/executors/script.ts | 4 ++-- packages/core/src/run/index.ts | 25 +++++++++++++---------- packages/types/src/index.ts | 10 ++++++--- 7 files changed, 35 insertions(+), 23 deletions(-) diff --git a/development/schema/schema.json b/development/schema/schema.json index 02160e14..b26c6403 100644 --- a/development/schema/schema.json +++ b/development/schema/schema.json @@ -131,7 +131,7 @@ "path": { "type": "string" }, - "config": { + "parameters": { "type": "object" }, "scorers": { diff --git a/docs/models/custom.mdx b/docs/models/custom.mdx index 6ee97cbe..f7078b4f 100644 --- a/docs/models/custom.mdx +++ b/docs/models/custom.mdx @@ -10,22 +10,27 @@ does pre or post-processing around the LLM call or chains multiple LLM calls tog ## Run configuration In your config file, set `type` as `py-script` and specify the Python file -path in the `value` field. +path in the `path` field. ```json "runs": [ { "type": "py-script", - "path": "rag.py" + "path": "rag.py", } ] ``` +You can additional pass following properties in run configuration: +`name`: Name of the custom run. +`parameters`: Parameters to be passed to the script file to modify its behavior. + The Python file is expected to have a method called `execute` with the following signature: - **Arguments** - inputs: dict of key-value pairs with [sample inputs](../dataset/basics) + - parameters: dict of key-value pairs of run parameters. - **Returns**: a dict with - output (string): The response from the model/application - metadata (dict): Custom key-value pairs that are passed on to the scorer and diff --git a/examples/rag/rag.py b/examples/rag/rag.py index 8c9b6658..b0317e78 100644 --- a/examples/rag/rag.py +++ b/examples/rag/rag.py @@ -6,7 +6,8 @@ nest_asyncio.apply() -def execute(inputs, config): +def execute(inputs, parameters): + print(parameters) # load documents question = inputs["question"] reader = SimpleDirectoryReader("./arxiv-papers/", num_files_limit=30) diff --git a/packages/cli/src/bin/index.ts b/packages/cli/src/bin/index.ts index 7a963d45..60e8e532 100644 --- a/packages/cli/src/bin/index.ts +++ b/packages/cli/src/bin/index.ts @@ -100,9 +100,8 @@ program ); const completion = await Promise.all( runs.map((r) => { - if (r.type === "py-script") { - r.pythonPath = options.pythonPath; - } + r.parameters = r.parameters ? r.parameters : {}; + r.parameters.pythonPath = options.pythonPath; return execute(r, dataset, () => { progressBar.increment(); }); diff --git a/packages/core/src/run/executors/script.ts b/packages/core/src/run/executors/script.ts index e9f39dcc..bf7c78b7 100644 --- a/packages/core/src/run/executors/script.ts +++ b/packages/core/src/run/executors/script.ts @@ -36,13 +36,13 @@ export const scriptExecutor: Executor = async (runConfig, sample) => { basePath, moduleName, JSON.stringify(sample.inputs), - JSON.stringify(runConfig.config || {}), + JSON.stringify(runConfig.parameters || {}), ]; const runOutput = await new Promise((resolve) => { let output: string[] = []; const shell = new PythonShell(wrapperScriptFile, { - pythonPath: runConfig.pythonPath || undefined, + pythonPath: runConfig.parameters?.pythonPath || undefined, scriptPath: wrapperScriptDirectory, args: pythonArgs, }); diff --git a/packages/core/src/run/index.ts b/packages/core/src/run/index.ts index 682f636a..4fc0a7b0 100644 --- a/packages/core/src/run/index.ts +++ b/packages/core/src/run/index.ts @@ -10,21 +10,21 @@ import score from "@empiricalrun/scorer"; import { getExecutor } from "./executors"; export async function execute( - run: RunConfig, + runConfig: RunConfig, dataset: Dataset, progressCallback?: (sample: RunOutputSample) => void, ): Promise { const runCreationDate = new Date(); const sampleCompletions: RunOutputSample[] = []; const runId = generateHex(4); - const { scorers } = run; + const { scorers } = runConfig; const completionsPromises = []; for (const datasetSample of dataset.samples) { - const executor = getExecutor(run); + const executor = getExecutor(runConfig); if (executor) { // if llm error then add to the completion object but if something else throw error and stop the run completionsPromises.push( - executor(run, datasetSample).then(({ output, error }) => { + executor(runConfig, datasetSample).then(({ output, error }) => { const sample: RunOutputSample = { inputs: datasetSample.inputs, output, @@ -65,7 +65,7 @@ export async function execute( output: sampleCompletion.output, scorers, options: { - pythonPath: run.type === "py-script" ? run.pythonPath : undefined, + pythonPath: runConfig.parameters?.pythonPath, }, }).then((scores) => { sampleCompletion.scores = scores; @@ -79,7 +79,10 @@ export async function execute( } return { id: runId, - run_config: { ...run, name: run.name || getDefaultRunName(run, runId) }, + run_config: { + ...runConfig, + name: runConfig.name || getDefaultRunName(runConfig, runId), + }, dataset_config: { id: dataset.id, }, @@ -88,12 +91,12 @@ export async function execute( }; } -function getDefaultRunName(run: RunConfig, id: string): string { +function getDefaultRunName(runConfig: RunConfig, id: string): string { let name = ""; - if (run.type === "model") { - name = run.model; - } else if (run.type === "py-script" || run.type === "js-script") { - name = run.path; + if (runConfig.type === "model") { + name = runConfig.model; + } else if (runConfig.type === "py-script" || runConfig.type === "js-script") { + name = runConfig.path; } return `Run #${id}: ${name}`; } diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index 97216234..0291c233 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -45,6 +45,9 @@ interface RunConfigBase { type: string; name?: string; scorers?: Scorer[]; + parameters?: { + [key: string]: any; + }; } export interface ModelRunConfig extends RunConfigBase { @@ -57,14 +60,15 @@ export interface ModelRunConfig extends RunConfigBase { export interface JSScriptRunConfig extends RunConfigBase { type: "js-script"; path: string; - config?: any; } export interface PyScriptRunConfig extends RunConfigBase { type: "py-script"; path: string; - pythonPath?: string; - config?: any; + parameters?: { + pythonPath?: string; + [key: string]: any; + }; } export type RunConfig = ModelRunConfig | PyScriptRunConfig | JSScriptRunConfig; From 0bf7124b0756b392330934fe15b4027d09f789b9 Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 20:25:32 +0530 Subject: [PATCH 6/8] fix: review comments --- examples/rag/rag.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/rag/rag.py b/examples/rag/rag.py index b0317e78..84a73a14 100644 --- a/examples/rag/rag.py +++ b/examples/rag/rag.py @@ -7,7 +7,6 @@ def execute(inputs, parameters): - print(parameters) # load documents question = inputs["question"] reader = SimpleDirectoryReader("./arxiv-papers/", num_files_limit=30) From b220ea44c036c43c5b8b70e028cd8a4d9a3f0c63 Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 20:34:46 +0530 Subject: [PATCH 7/8] fix: review comments Co-authored-by: Arjun Attam --- docs/models/custom.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/models/custom.mdx b/docs/models/custom.mdx index f7078b4f..7bde9f87 100644 --- a/docs/models/custom.mdx +++ b/docs/models/custom.mdx @@ -30,7 +30,7 @@ signature: - **Arguments** - inputs: dict of key-value pairs with [sample inputs](../dataset/basics) - - parameters: dict of key-value pairs of run parameters. + - parameters: dict of key-value pairs with the run parameters - **Returns**: a dict with - output (string): The response from the model/application - metadata (dict): Custom key-value pairs that are passed on to the scorer and From 8c007f6b29922a20f605ec0371b80f9caccf5441 Mon Sep 17 00:00:00 2001 From: Saikat Mitra Date: Wed, 3 Apr 2024 20:36:29 +0530 Subject: [PATCH 8/8] fix: review comments --- docs/models/custom.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/models/custom.mdx b/docs/models/custom.mdx index f7078b4f..1cc7e788 100644 --- a/docs/models/custom.mdx +++ b/docs/models/custom.mdx @@ -16,7 +16,7 @@ path in the `path` field. "runs": [ { "type": "py-script", - "path": "rag.py", + "path": "rag.py" } ] ```