docs: add readme support

empirical-run · May 22, 2024 · 48319c7 · 48319c7
1 parent 8308e78
commit 48319c7
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 5 deletions.
diff --git a/examples/spider-using-ts/README.md b/examples/spider-using-ts/README.md
@@ -0,0 +1,33 @@
+# Scoring Text-to-SQL outputs using Typescript
+
+LLMs are good at converting natural language questions to SQL queries. This examples uses that
+scenario to demo Empirical. 
+This example is based on the [Spider](https://github.com/taoyds/spider) dataset and uses typescript config to score the outputs.
+
+In this example, we generate SQL queries, and score them on
+
+1. SQL syntax (with the `sql-syntax` scorer): Checks if the output syntax is valid SQL. For example, if the output is in
+   markdown syntax (with backticks), it is not a valid SQL query.
+2. Execution accuracy: We run the generated SQL query against a test database, and check
+   if the query returns a result. This scorer cleans query outputs that have backticks
+   ([see code](./empiricalrc.ts)).
+
+This example requires Typescript.
+
+## Usage
+
+1. Review the `empiricalrc.ts` configuration, and make changes if any. The current configuration runs models
+   from OpenAI, and Llama and thus, requires [relevant environment variables](https://docs.empirical.run/models/basic).
+  ```sh
+  cat empiricalrc.ts
+  ```
+
+1. Run with Empirical
+  ```sh
+  npx empiricalrun
+  ```
+
+1. See results on the Empirical web reporter
+  ```sh
+  npx empiricalrun ui
+  ```
diff --git a/examples/spider-using-ts/empiricalrc.ts b/examples/spider-using-ts/empiricalrc.ts
@@ -1,4 +1,4 @@
-import { Config, loadDataset } from "empiricalrun";
+import { Config, loadDataset, JSScriptScorer } from "empiricalrun";
 import { executeQuery, getConnection, getSchema } from "./db";
 
 async function datasetLoader() {
@@ -38,11 +38,17 @@ const config: Config = {
     {
       provider: "openai",
       type: "model",
-      name: "default-prompt-gpt3.5",
       model: "gpt-3.5-turbo",
       prompt:
         "You are an SQLite expert who can convert natural language questions to SQL queries for the database schema given below.\n\nDatabase schema:\n{{schema}}\n\nAnswer the following question with only the SQL query.\n\nQuestion: {{question}}",
     },
+    {
+      type: "model",
+      provider: "fireworks",
+      model: "llama-v3-8b-instruct",
+      prompt:
+        "You are an SQLite expert who can convert natural language questions to SQL queries for the database schema given below.\n\nDatabase schema:\n{{schema}}\n\nAnswer the following question with only the SQL query.\n\nQuestion: {{question}}",
+    }
   ],
   dataset: datasetLoader,
   scorers: [

diff --git a/packages/empiricalrun/src/bin/dataset/loaders.ts b/packages/empiricalrun/src/bin/dataset/loaders.ts
@@ -1,4 +1,4 @@
-import { DatasetSample } from "@empiricalrun/types";
+import { DatasetSample, DatasetSampleInputs } from "@empiricalrun/types";
 import crypto from "crypto";
 import { DatasetError, DatasetErrorEnum } from "../error";
 import csv from "csvtojson";
@@ -7,9 +7,9 @@ type LoaderFunction = (contents: string) => Promise<DatasetSample[]>;
 
 async function jsonLoader(contents: string): Promise<DatasetSample[]> {
   try {
-    const parsed: Record<string, unknown>[] = JSON.parse(contents);
+    const parsed: DatasetSampleInputs[] = JSON.parse(contents);
     const datasetSamples: DatasetSample[] = parsed.map(
-      (inputs: Record<string, unknown>, index: number) => {
+      (inputs, index: number) => {
         return {
           id: `${index + 1}`,
           inputs,