Merge branch 'main' into spider-full

empirical-run · May 21, 2024 · 9dba00b · 9dba00b
2 parents 1b26a43 + 5aaf4cc
commit 9dba00b
Show file tree

Hide file tree

Showing 209 changed files with 9,147 additions and 1,913 deletions.
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -3,6 +3,12 @@
 module.exports = {
   ignorePatterns: ["apps/**", "packages/**"],
   extends: ["@empiricalrun/eslint-config/library.js"],
+  ignorePatterns: [
+    // Ignore dotfiles
+    ".*.js?(x)",
+    "node_modules/",
+    "examples/"
+  ],
   parser: "@typescript-eslint/parser",
   parserOptions: {
     project: true,

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -36,9 +36,17 @@ jobs:
 
       - name: Build
         run: pnpm build
-
+        env:
+          POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+
       - name: Test
         run: pnpm test
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_RESOURCE_NAME: ${{ secrets.AZURE_OPENAI_RESOURCE_NAME }}
+          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+          AZURE_OPENAI_BASE_URL: ${{ secrets.AZURE_OPENAI_BASE_URL }}
diff --git a/.github/workflows/example.yml b/.github/workflows/example.yml
@@ -24,11 +24,16 @@ jobs:
           node-version: 20
 
       - name: Run Empirical
-        run: npx @empiricalrun/cli run
+        run: npx empiricalrun
         working-directory: ./examples/basic
         id: empirical-run
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_RESOURCE_NAME: ${{ secrets.AZURE_OPENAI_RESOURCE_NAME }}
+          AZURE_OPENAI_BASE_URL: ${{ secrets.AZURE_OPENAI_BASE_URL }}
 
       - name: Find Comment
         uses: peter-evans/find-comment@v3

diff --git a/.github/workflows/json-schema-upload.yml b/.github/workflows/json-schema-upload.yml
@@ -1,12 +1,17 @@
 name: Upload JSON Schema
 
-on: workflow_dispatch
+on:
+  workflow_run:
+    workflows: [Publish packages]
+    types:
+      - completed
 
 jobs:
   upload:
     name: Upload JSON Schema
     timeout-minutes: 8
     runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -6,6 +6,7 @@ on:
 jobs:
   publish:
     name: Publish packages
+    # Update json-schema-upload.yml if you are changing the name above
     timeout-minutes: 8
     runs-on: ubuntu-latest
     steps:
@@ -32,12 +33,20 @@ jobs:
 
       - name: Build
         run: pnpm build
-
+        env:
+          POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+
       - name: Test
         run: pnpm test
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_RESOURCE_NAME: ${{ secrets.AZURE_OPENAI_RESOURCE_NAME }}
+          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+          AZURE_OPENAI_BASE_URL: ${{  secrets.AZURE_OPENAI_BASE_URL }}
 
       - name: Creating .npmrc
         run: |

diff --git a/README.md b/README.md
@@ -1,10 +1,9 @@
 # Empirical
 
-[![npm](https://img.shields.io/npm/v/@empiricalrun/cli)](https://npmjs.com/package/@empiricalrun/cli)
+[![npm](https://img.shields.io/npm/v/empiricalrun)](https://npmjs.com/package/empiricalrun)
 [![Discord](https://img.shields.io/badge/discord-empirical.run-blue?logo=discord&logoColor=white&color=5d68e8)](https://discord.gg/NeR6jj8dw9)
 
-Empirical is the fastest way to test different LLMs, prompts and other model configurations, across all the scenarios
-that matter for your application.
+Empirical is the fastest way to test your LLM app and iterate over prompts and other model configuration.
 
 With Empirical, you can:
 
@@ -14,8 +13,7 @@ With Empirical, you can:
 - Score your outputs with [scoring functions](https://docs.empirical.run/scoring/basics)
 - Run [tests on CI/CD](https://docs.empirical.run/running-in-ci)
 
-[Watch demo video](https://www.loom.com/share/5992fdf0edc443e282f44936e6c32672) | [See all docs](https://docs.empirical.run)
-
+https://github.com/empirical-run/empirical/assets/284612/3309283c-ddad-4c4e-8175-08a32460686c
 
 ## Usage
 
@@ -39,7 +37,7 @@ Our test will succeed if the model outputs valid JSON.
 1. Use the CLI to create a sample configuration file called `empiricalrc.json`.
 
     ```sh
-    npx @empiricalrun/cli init
+    npx empiricalrun init
     cat empiricalrc.json
     ```
 
@@ -48,13 +46,13 @@ Our test will succeed if the model outputs valid JSON.
    execution will cost $0.0026, based on the selected models.
 
     ```sh
-    npx @empiricalrun/cli run
+    npx empiricalrun
     ```
 
 3. Use the `ui` command to open the reporter web app and see side-by-side results.
 
     ```sh
-    npx @empiricalrun/cli ui
+    npx empiricalrun ui
     ```
 
 ### Make it yours

diff --git a/apps/web/CHANGELOG.md b/apps/web/CHANGELOG.md
@@ -1,5 +1,63 @@
 # web
 
+## 0.12.1
+
+### Patch Changes
+
+- d45cc70: feat: add support for js and ts config
+
+## 0.12.0
+
+### Minor Changes
+
+- 740a844: feat: add support for merging inputs and add multi-turn chat example
+
+## 0.11.0
+
+### Minor Changes
+
+- 973c9b2: feat: add support for tool calls
+
+### Patch Changes
+
+- 73e35c5: feat: support editing of dataset sample in UI
+
+## 0.10.0
+
+### Minor Changes
+
+- 2b03d24: feat: dedicated edit UI for assistant instructions
+- bde6bf0: feat: add support for configuring assistant tools
+- 93e12e0: feat: add support for assistant tool calls
+
+### Patch Changes
+
+- d8842ef: fix: output and dataset sample alignment issues
+
+## 0.9.0
+
+### Minor Changes
+
+- 2517c74: feat: add support for openai assistants
+
+## 0.8.0
+
+### Minor Changes
+
+- ba98ebb: feat: add support for chat format prompt
+
+## 0.7.3
+
+### Patch Changes
+
+- 65eec6a: feat: get aggregate latency and token numbers for the run
+
+## 0.7.2
+
+### Patch Changes
+
+- 837528d: feat: add support for sharing results using empirical link
+
 ## 0.7.1
 
 ### Patch Changes