Merge branch 'stanford-crfm:main' into DecodingTrust

AI-secure · Jan 7, 2024 · 2a4bdc4 · 2a4bdc4
2 parents 88b3e50 + 2a112cb
commit 2a4bdc4
Show file tree

Hide file tree

Showing 347 changed files with 15,818 additions and 12,294 deletions.
diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml
@@ -5,12 +5,12 @@ on:
     branches:
       - '*'
     paths:
-      - 'src/helm-frontend/**'
+      - 'helm-frontend/**'
   pull_request:
     branches:
       - '*'
     paths:
-      - 'src/helm-frontend/**'
+      - 'helm-frontend/**'
 
 jobs:
   test:
@@ -22,18 +22,20 @@ jobs:
       uses: actions/setup-node@v3
       with:
         node-version: '18'
+    - name: Install Yarn
+      run: npm install --global yarn
     - name: Install dependencies
-      working-directory: ./src/helm-frontend
-      run: npm ci
+      working-directory: ./helm-frontend
+      run: yarn install
     - name: Run lint
-      working-directory: ./src/helm-frontend
-      run: npm run lint
+      working-directory: ./helm-frontend
+      run: yarn lint
     - name: Run check format
-      working-directory: ./src/helm-frontend
-      run: npm run format:check
+      working-directory: ./helm-frontend
+      run: yarn format:check
     - name: Run tests
-      working-directory: ./src/helm-frontend
-      run: npm run test
+      working-directory: ./helm-frontend
+      run: yarn test
 
   build:
     runs-on: ubuntu-latest
@@ -53,12 +55,15 @@ jobs:
       uses: actions/setup-node@v3
       with:
         node-version: '18'
+    - name: Install Yarn
+      working-directory: ./helm-frontend
+      run: npm install --global yarn
     - name: Install dependencies
-      working-directory: ./src/helm-frontend
-      run: npm ci
+      working-directory: ./helm-frontend
+      run: yarn install
     - name: Build app
-      working-directory: ./src/helm-frontend
-      run: npm run build
+      working-directory: ./helm-frontend
+      run: yarn build
     - name: Upload artifact
       uses: actions/upload-pages-artifact@v2
       with:

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -3,10 +3,10 @@ on:
   push:
     branches: [ main ]
     paths-ignore:
-      - 'src/helm-frontend/**'
+      - 'helm-frontend/**'
   pull_request:
     paths-ignore:
-      - 'src/helm-frontend/**'
+      - 'helm-frontend/**'
 
 jobs:
   install:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,75 @@
 
 ## [Upcoming]
 
+## [v0.4.0] - 2023-12-20
+
+### Models
+
+- Added Google PaLM 2 (#2087, #2111, #2139)
+- Added Anthropic Claude 2.1 and Claude Instant 1.2 (#2095, #2123)
+- Added Writer Palmyra-X v2 and v3 (#2104)
+- Added OpenAI GPT-4 Turbo preview (#2092)
+- Added 01.AI Yi (#2009)
+- Added Mistral AI Mixtral-8x7B (#2130)
+- Fixed race condition with "Already borrowed" error for  Hugging Face tokenizers (#2088, #2091, #2116)
+- Support configuration precision and quantization in HuggingFaceClient (#1912)
+- Support LanguageModelingAdapter for HuggingFaceClient (#1964)
+
+### Scenarios
+
+- Added VizWiz Scenario (#1983)
+- Added LegalBench scenario (#2129)
+- Refactored CommonSenseScenario into HellaSwagScenario, OpenBookQA, SiqaScenario, and PiqaScenario (#2117, #2118, #2119)
+- Added run specs configuration for HELM Lite (#2009)
+- Changed the default metric in GSM8K to check exact match of the final number in the response (#2130)
+
+### Framework
+
+- Added tutorial for computing the leaderboard rank of a model using the method from "Efficient Benchmarking (of Language Models)" (#1968, #1986, #1985)
+- Refactored ModelMetadata, ModelDeployment and Tokenizer, and moved configuration to YAML files (#1903, #1994)
+- Fixed a bug regarding writing `runs_to_run_suites.json` when using `helm-release` with `--release` (#2012)
+- Made pymongo an optional dependency (#1882)
+- Made SlurmRunner retry some failed Slurm requests (#2077)
+- Shortened cache retry time (#2081)
+- Added retrying to AutoTokenizer (#2090)
+- Added support for user configuration of model deployments and tokenizer configurations (#1996, #2142)
+- Added support for passing in an arbitrary schema file to `helm-rummarize` (#2075)
+- Changed the prompt format for some instruction following models (#2130)
+- Added py.typed to package type information (#2169)
+
+### Frontend
+
+- Made visual improvements and bugfixes for the new React frontend (#1947, #2000, #2005, #2018)
+- Changed front page on Raect frontend to display a mini leaderboard (#2113, #2128)
+- Added a dropdown menu for switching between different HELM results websites (#1947)
+- Added a dropdown menu for switching between different versions (#2135)
+
+### Evaluation Results
+
+- Launched new React frontend
+- [HELM Classic v0.4.0](https://crfm.stanford.edu/helm/classic/v0.4.0/)
+    - Added evaluation results for Mistral
+- [HELM Lite v1.0.0](https://crfm.stanford.edu/helm/lite/v1.0.0/)
+    - Launched new [HELM Lite leaderboard](https://crfm.stanford.edu/2023/12/19/helm-lite.html) with 30 models and 10 scenarios
+
+### Contributors
+
+Thank you to the following contributors for your work on this HELM release!
+
+- @brianwgoldman
+- @dlwh
+- @farzaank
+- @JosselinSomervilleRoberts
+- @krh26
+- @neelguha
+- @percyliang
+- @perlitz
+- @pettter
+- @ruixin31
+- @teetone
+- @yifanmai
+- @yotamp
+
 ## [v0.3.0] - 2023-11-01
 
 ### Models
@@ -236,7 +305,8 @@ Thank you to the following contributors for your contributions to this HELM rele
 
 - Initial release
 
-[upcoming]: https://github.com/stanford-crfm/helm/compare/v0.3.0...HEAD
+[upcoming]: https://github.com/stanford-crfm/helm/compare/v0.4.0...HEAD
+[v0.4.0]: https://github.com/stanford-crfm/helm/releases/tag/v0.4.0
 [v0.3.0]: https://github.com/stanford-crfm/helm/releases/tag/v0.3.0
 [v0.2.4]: https://github.com/stanford-crfm/helm/releases/tag/v0.2.4
 [v0.2.3]: https://github.com/stanford-crfm/helm/releases/tag/v0.2.3

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
+recursive-include src/helm/ py.typed
 recursive-include src/helm/proxy/clients/ *.sp
 recursive-include src/helm/benchmark/ *.json
 recursive-include src/helm/benchmark/static/ *.css *.html *.js *.png *.yaml

diff --git a/README.md b/README.md
@@ -49,3 +49,36 @@ The directory structure for this repo is as follows
 │
 └── helm-frontend # New React Front-end
 ```
+
+# Holistic Evaluation of Text-To-Image Models
+
+<img src="https://github.com/stanford-crfm/helm/raw/heim/src/helm/benchmark/static/heim/images/heim-logo.png" alt=""  width="800"/>
+
+Significant effort has recently been made in developing text-to-image generation models, which take textual prompts as 
+input and generate images. As these models are widely used in real-world applications, there is an urgent need to 
+comprehensively understand their capabilities and risks. However, existing evaluations primarily focus on image-text 
+alignment and image quality. To address this limitation, we introduce a new benchmark, 
+**Holistic Evaluation of Text-To-Image Models (HEIM)**.
+
+We identify 12 different aspects that are important in real-world model deployment, including:
+
+- image-text alignment
+- image quality
+- aesthetics
+- originality
+- reasoning
+- knowledge
+- bias
+- toxicity
+- fairness
+- robustness
+- multilinguality
+- efficiency
+
+By curating scenarios encompassing these aspects, we evaluate state-of-the-art text-to-image models using this benchmark. 
+Unlike previous evaluations that focused on alignment and quality, HEIM significantly improves coverage by evaluating all 
+models across all aspects. Our results reveal that no single model excels in all aspects, with different models 
+demonstrating strengths in different aspects.
+
+This repository contains the code used to produce the [results on the website](https://crfm.stanford.edu/heim/latest/) 
+and [paper](https://arxiv.org/abs/2311.04287).
diff --git a/docs/code.md b/docs/code.md
@@ -152,3 +152,11 @@ multiple perturbations and applying it onto a single instance.
 4. Add a new class `<Name of tokenizer>WindowService` in file `<Name of tokenizer>_window_service.py`.
    Follow what we did for `GPTJWindowService`.
 5. Import the new `WindowService` and map the model(s) to it in `WindowServiceFactory`.
+
+
+## HEIM (text-to-image evaluation)
+
+The overall code structure is the same as HELM's.
+
+When adding new scenarios and metrics for image generation, place the Python files under the `image_generation` package 
+(e.g., `src/helm/benchmark/scenarios/image_generation`).
diff --git a/docs/heim.md b/docs/heim.md
@@ -0,0 +1,16 @@
+# HEIM Quick Start (text-to-image evaluation)
+
+To run HEIM, follow these steps:
+
+1. Create a run specs configuration file. For example, to evaluate 
+[Stable Diffusion v1.4](https://huggingface.co/CompVis/stable-diffusion-v1-4) against the 
+[MS-COCO scenario](https://github.com/stanford-crfm/heim/blob/main/src/helm/benchmark/scenarios/image_generation/mscoco_scenario.py), run:
+```
+echo 'entries: [{description: "mscoco:model=huggingface/stable-diffusion-v1-4", priority: 1}]' > run_specs.conf
+```
+2. Run the benchmark with certain number of instances (e.g., 10 instances): 
+`helm-run --conf-paths run_specs.conf --suite heim_v1 --max-eval-instances 10`
+
+Examples of run specs configuration files can be found [here](https://github.com/stanford-crfm/helm/tree/main/src/helm/benchmark/presentation).
+We used [this configuration file](https://github.com/stanford-crfm/helm/blob/main/src/helm/benchmark/presentation/run_specs_heim.conf) 
+to produce results of the paper.
diff --git a/docs/index.md b/docs/index.md
@@ -17,3 +17,7 @@ To add new models and scenarios, refer to the Developer Guide's chapters:
 
 - [Developer Setup](developer_setup.md)
 - [Code Structure](code.md)
+
+
+We also support evaluating text-to-image models as introduced in **Holistic Evaluation of Text-to-Image Models (HEIM)**
+([paper](https://arxiv.org/abs/2311.04287), [website](https://crfm.stanford.edu/heim/latest)).
diff --git a/docs/installation.md b/docs/installation.md
@@ -34,3 +34,19 @@ Within this virtual environment, run:
 ```
 pip install crfm-helm
 ```
+
+### For HEIM (text-to-image evaluation)
+
+To install the additional dependencies to run HEIM, run:
+
+```
+pip install "crfm-helm[heim]"
+``` 
+
+Some models (e.g., DALLE-mini/mega) and metrics (`DetectionMetric`) require extra dependencies that are 
+not available on PyPI. To install these dependencies, download and run the 
+[extra install script](https://github.com/stanford-crfm/helm/blob/main/install-heim-extras.sh):
+
+```
+bash install-heim-extras.sh
+```
diff --git a/docs/models.md b/docs/models.md
@@ -1,3 +1,9 @@
 # Models
 
-Please visit the models [page](https://crfm.stanford.edu/helm/latest/?models) of HELM's website for a list of available models and their descriptions.
+Please visit the models [page](https://crfm.stanford.edu/helm/latest/?models) of HELM's website 
+for a list of available models and their descriptions.
+
+
+## HEIM (text-to-image evaluation)
+
+Please visit the [models page](https://crfm.stanford.edu/heim/latest/?models) of the HEIM results website.
diff --git a/docs/quick_start.md b/docs/quick_start.md
@@ -18,4 +18,9 @@ helm-server
 
 Then go to http://localhost:8000/ in your browser.
 
-**Next steps:** click [here](get_helm_rank.md) to find out how to to run the full benchmark and get your model's leaderboard rank.
+
+## Next steps
+
+Click [here](get_helm_rank.md) to find out how to run the full benchmark and get your model's leaderboard rank.
+
+For the quick start page for HEIM, visit [here](heim.md).
diff --git a/src/helm-frontend/.eslintrc.cjs → helm-frontend/.eslintrc.cjs b/src/helm-frontend/.eslintrc.cjs → helm-frontend/.eslintrc.cjs
diff --git a/src/helm-frontend/.gitignore → helm-frontend/.gitignore b/src/helm-frontend/.gitignore → helm-frontend/.gitignore
diff --git a/src/helm-frontend/README.md → helm-frontend/README.md b/src/helm-frontend/README.md → helm-frontend/README.md
diff --git a/src/helm-frontend/index.html → helm-frontend/index.html b/src/helm-frontend/index.html → helm-frontend/index.html
diff --git a/src/helm-frontend/package.json → helm-frontend/package.json b/src/helm-frontend/package.json → helm-frontend/package.json
diff --git a/src/helm-frontend/postcss.config.js → helm-frontend/postcss.config.js b/src/helm-frontend/postcss.config.js → helm-frontend/postcss.config.js
diff --git a/src/helm-frontend/public/config.js → helm-frontend/public/config.js b/src/helm-frontend/public/config.js → helm-frontend/public/config.js
diff --git a/src/helm-frontend/public/helm.svg → helm-frontend/public/helm.svg b/src/helm-frontend/public/helm.svg → helm-frontend/public/helm.svg
diff --git a/src/helm-frontend/src/App.css → helm-frontend/src/App.css b/src/helm-frontend/src/App.css → helm-frontend/src/App.css
diff --git a/src/helm-frontend/src/App.tsx → helm-frontend/src/App.tsx b/src/helm-frontend/src/App.tsx → helm-frontend/src/App.tsx
diff --git a/src/helm-frontend/src/assets/crfm-logo.png → helm-frontend/src/assets/crfm-logo.png b/src/helm-frontend/src/assets/crfm-logo.png → helm-frontend/src/assets/crfm-logo.png
diff --git a/src/helm-frontend/src/assets/heim-logo.png → helm-frontend/src/assets/heim-logo.png b/src/helm-frontend/src/assets/heim-logo.png → helm-frontend/src/assets/heim-logo.png
diff --git a/...-frontend/src/assets/helm-logo-simple.png → ...-frontend/src/assets/helm-logo-simple.png b/...-frontend/src/assets/helm-logo-simple.png → ...-frontend/src/assets/helm-logo-simple.png
diff --git a/src/helm-frontend/src/assets/helm-logo.png → helm-frontend/src/assets/helm-logo.png b/src/helm-frontend/src/assets/helm-logo.png → helm-frontend/src/assets/helm-logo.png
diff --git a/src/helm-frontend/src/assets/helmhero.png → helm-frontend/src/assets/helmhero.png b/src/helm-frontend/src/assets/helmhero.png → helm-frontend/src/assets/helmhero.png
diff --git a/...ontend/src/assets/language-model-helm.png → ...ontend/src/assets/language-model-helm.png b/...ontend/src/assets/language-model-helm.png → ...ontend/src/assets/language-model-helm.png
diff --git a/src/helm-frontend/src/assets/logos/ai21.png → helm-frontend/src/assets/logos/ai21.png b/src/helm-frontend/src/assets/logos/ai21.png → helm-frontend/src/assets/logos/ai21.png
diff --git a/...m-frontend/src/assets/logos/anthropic.png → helm-frontend/src/assets/logos/anthropic.png b/...m-frontend/src/assets/logos/anthropic.png → helm-frontend/src/assets/logos/anthropic.png
diff --git a/...-frontend/src/assets/logos/bigscience.png → ...-frontend/src/assets/logos/bigscience.png b/...-frontend/src/assets/logos/bigscience.png → ...-frontend/src/assets/logos/bigscience.png
diff --git a/...helm-frontend/src/assets/logos/cohere.png → helm-frontend/src/assets/logos/cohere.png b/...helm-frontend/src/assets/logos/cohere.png → helm-frontend/src/assets/logos/cohere.png
diff --git a/...-frontend/src/assets/logos/eleutherai.png → ...-frontend/src/assets/logos/eleutherai.png b/...-frontend/src/assets/logos/eleutherai.png → ...-frontend/src/assets/logos/eleutherai.png
diff --git a/...helm-frontend/src/assets/logos/google.png → helm-frontend/src/assets/logos/google.png b/...helm-frontend/src/assets/logos/google.png → helm-frontend/src/assets/logos/google.png
diff --git a/src/helm-frontend/src/assets/logos/index.ts → helm-frontend/src/assets/logos/index.ts b/src/helm-frontend/src/assets/logos/index.ts → helm-frontend/src/assets/logos/index.ts
diff --git a/src/helm-frontend/src/assets/logos/meta.png → helm-frontend/src/assets/logos/meta.png b/src/helm-frontend/src/assets/logos/meta.png → helm-frontend/src/assets/logos/meta.png
diff --git a/...m-frontend/src/assets/logos/microsoft.png → helm-frontend/src/assets/logos/microsoft.png b/...m-frontend/src/assets/logos/microsoft.png → helm-frontend/src/assets/logos/microsoft.png
diff --git a/...helm-frontend/src/assets/logos/nvidia.png → helm-frontend/src/assets/logos/nvidia.png b/...helm-frontend/src/assets/logos/nvidia.png → helm-frontend/src/assets/logos/nvidia.png
diff --git a/...helm-frontend/src/assets/logos/openai.png → helm-frontend/src/assets/logos/openai.png b/...helm-frontend/src/assets/logos/openai.png → helm-frontend/src/assets/logos/openai.png
diff --git a/...lm-frontend/src/assets/logos/together.png → helm-frontend/src/assets/logos/together.png b/...lm-frontend/src/assets/logos/together.png → helm-frontend/src/assets/logos/together.png
diff --git a/...rontend/src/assets/logos/tsinghua-keg.png → ...rontend/src/assets/logos/tsinghua-keg.png b/...rontend/src/assets/logos/tsinghua-keg.png → ...rontend/src/assets/logos/tsinghua-keg.png
diff --git a/...helm-frontend/src/assets/logos/yandex.png → helm-frontend/src/assets/logos/yandex.png b/...helm-frontend/src/assets/logos/yandex.png → helm-frontend/src/assets/logos/yandex.png
diff --git a/...ntend/src/assets/scenarios-by-metrics.png → ...ntend/src/assets/scenarios-by-metrics.png b/...ntend/src/assets/scenarios-by-metrics.png → ...ntend/src/assets/scenarios-by-metrics.png
diff --git a/...rontend/src/assets/taxonomy-scenarios.png → ...rontend/src/assets/taxonomy-scenarios.png b/...rontend/src/assets/taxonomy-scenarios.png → ...rontend/src/assets/taxonomy-scenarios.png
diff --git a/...mponents/AccessBadge/AccessBadge.test.tsx → ...mponents/AccessBadge/AccessBadge.test.tsx b/...mponents/AccessBadge/AccessBadge.test.tsx → ...mponents/AccessBadge/AccessBadge.test.tsx
diff --git a/...rc/components/AccessBadge/AccessBadge.tsx → ...rc/components/AccessBadge/AccessBadge.tsx b/...rc/components/AccessBadge/AccessBadge.tsx → ...rc/components/AccessBadge/AccessBadge.tsx
diff --git a/...tend/src/components/AccessBadge/index.tsx → ...tend/src/components/AccessBadge/index.tsx b/...tend/src/components/AccessBadge/index.tsx → ...tend/src/components/AccessBadge/index.tsx
diff --git a/src/helm-frontend/src/components/Alert.tsx → helm-frontend/src/components/Alert.tsx b/src/helm-frontend/src/components/Alert.tsx → helm-frontend/src/components/Alert.tsx
diff --git a/src/helm-frontend/src/components/Card.tsx → helm-frontend/src/components/Card.tsx b/src/helm-frontend/src/components/Card.tsx → helm-frontend/src/components/Card.tsx
diff --git a/src/helm-frontend/src/components/Footer.tsx → helm-frontend/src/components/Footer.tsx b/src/helm-frontend/src/components/Footer.tsx → helm-frontend/src/components/Footer.tsx
diff --git a/...-frontend/src/components/GroupsCharts.tsx → ...-frontend/src/components/GroupsCharts.tsx b/...-frontend/src/components/GroupsCharts.tsx → ...-frontend/src/components/GroupsCharts.tsx
@@ -26,8 +26,8 @@ const colorNames = [
   "warning",
   "error",
 ];
-// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-const theme = daisyuiColors["[data-theme=business]"] as Record<string, string>;
+
+const theme = daisyuiColors["[data-theme=business]"];
 
 interface Props {
   groupsTables: GroupsTable[];

diff --git a/...-frontend/src/components/GroupsTables.tsx → ...-frontend/src/components/GroupsTables.tsx b/...-frontend/src/components/GroupsTables.tsx → ...-frontend/src/components/GroupsTables.tsx
diff --git a/...elm-frontend/src/components/HeadTitle.tsx → helm-frontend/src/components/HeadTitle.tsx b/...elm-frontend/src/components/HeadTitle.tsx → helm-frontend/src/components/HeadTitle.tsx
diff --git a/src/helm-frontend/src/components/Hero.tsx → helm-frontend/src/components/Hero.tsx b/src/helm-frontend/src/components/Hero.tsx → helm-frontend/src/components/Hero.tsx
diff --git a/...c/components/Indicator/Indicator.test.tsx → ...c/components/Indicator/Indicator.test.tsx b/...c/components/Indicator/Indicator.test.tsx → ...c/components/Indicator/Indicator.test.tsx
diff --git a/...nd/src/components/Indicator/Indicator.tsx → ...nd/src/components/Indicator/Indicator.tsx b/...nd/src/components/Indicator/Indicator.tsx → ...nd/src/components/Indicator/Indicator.tsx
diff --git a/...ontend/src/components/Indicator/index.tsx → ...ontend/src/components/Indicator/index.tsx b/...ontend/src/components/Indicator/index.tsx → ...ontend/src/components/Indicator/index.tsx
diff --git a/...-frontend/src/components/InstanceData.tsx → ...-frontend/src/components/InstanceData.tsx b/...-frontend/src/components/InstanceData.tsx → ...-frontend/src/components/InstanceData.tsx
diff --git a/...tend/src/components/LeaderboardTables.tsx → ...tend/src/components/LeaderboardTables.tsx b/...tend/src/components/LeaderboardTables.tsx → ...tend/src/components/LeaderboardTables.tsx
diff --git a/src/helm-frontend/src/components/Link.tsx → helm-frontend/src/components/Link.tsx b/src/helm-frontend/src/components/Link.tsx → helm-frontend/src/components/Link.tsx
diff --git a/src/helm-frontend/src/components/Loading.tsx → helm-frontend/src/components/Loading.tsx b/src/helm-frontend/src/components/Loading.tsx → helm-frontend/src/components/Loading.tsx
diff --git a/...ents/MarkdownValue/MarkdownValue.test.tsx → ...ents/MarkdownValue/MarkdownValue.test.tsx b/...ents/MarkdownValue/MarkdownValue.test.tsx → ...ents/MarkdownValue/MarkdownValue.test.tsx
diff --git a/...omponents/MarkdownValue/MarkdownValue.tsx → ...omponents/MarkdownValue/MarkdownValue.tsx b/...omponents/MarkdownValue/MarkdownValue.tsx → ...omponents/MarkdownValue/MarkdownValue.tsx
diff --git a/...nd/src/components/MarkdownValue/index.tsx → ...nd/src/components/MarkdownValue/index.tsx b/...nd/src/components/MarkdownValue/index.tsx → ...nd/src/components/MarkdownValue/index.tsx
diff --git a/...m-frontend/src/components/MetricsList.tsx → helm-frontend/src/components/MetricsList.tsx b/...m-frontend/src/components/MetricsList.tsx → helm-frontend/src/components/MetricsList.tsx
diff --git a/...ontend/src/components/MiniLeaderboard.tsx → ...ontend/src/components/MiniLeaderboard.tsx b/...ontend/src/components/MiniLeaderboard.tsx → ...ontend/src/components/MiniLeaderboard.tsx
diff --git a/...lm-frontend/src/components/ModelsList.tsx → helm-frontend/src/components/ModelsList.tsx b/...lm-frontend/src/components/ModelsList.tsx → helm-frontend/src/components/ModelsList.tsx
diff --git a/...end/src/components/NavBar/NavBar.test.tsx → ...end/src/components/NavBar/NavBar.test.tsx b/...end/src/components/NavBar/NavBar.test.tsx → ...end/src/components/NavBar/NavBar.test.tsx
diff --git a/...frontend/src/components/NavBar/NavBar.tsx → ...frontend/src/components/NavBar/NavBar.tsx b/...frontend/src/components/NavBar/NavBar.tsx → ...frontend/src/components/NavBar/NavBar.tsx
diff --git a/...-frontend/src/components/NavBar/index.tsx → ...-frontend/src/components/NavBar/index.tsx b/...-frontend/src/components/NavBar/index.tsx → ...-frontend/src/components/NavBar/index.tsx
diff --git a/...m-frontend/src/components/NavDropdown.tsx → helm-frontend/src/components/NavDropdown.tsx b/...m-frontend/src/components/NavDropdown.tsx → helm-frontend/src/components/NavDropdown.tsx
diff --git a/...c/components/PageTitle/PageTitle.test.tsx → ...c/components/PageTitle/PageTitle.test.tsx b/...c/components/PageTitle/PageTitle.test.tsx → ...c/components/PageTitle/PageTitle.test.tsx
diff --git a/...nd/src/components/PageTitle/PageTitle.tsx → ...nd/src/components/PageTitle/PageTitle.tsx b/...nd/src/components/PageTitle/PageTitle.tsx → ...nd/src/components/PageTitle/PageTitle.tsx
diff --git a/...ontend/src/components/PageTitle/index.tsx → ...ontend/src/components/PageTitle/index.tsx b/...ontend/src/components/PageTitle/index.tsx → ...ontend/src/components/PageTitle/index.tsx
diff --git a/...lm-frontend/src/components/Pagination.tsx → helm-frontend/src/components/Pagination.tsx b/...lm-frontend/src/components/Pagination.tsx → helm-frontend/src/components/Pagination.tsx
diff --git a/...m-frontend/src/components/Predictions.tsx → helm-frontend/src/components/Predictions.tsx b/...m-frontend/src/components/Predictions.tsx → helm-frontend/src/components/Predictions.tsx
diff --git a/src/helm-frontend/src/components/Preview.tsx → helm-frontend/src/components/Preview.tsx b/src/helm-frontend/src/components/Preview.tsx → helm-frontend/src/components/Preview.tsx
diff --git a/...ontend/src/components/ReleaseDropdown.tsx → ...ontend/src/components/ReleaseDropdown.tsx b/...ontend/src/components/ReleaseDropdown.tsx → ...ontend/src/components/ReleaseDropdown.tsx
diff --git a/...d/src/components/Request/Request.test.tsx → ...d/src/components/Request/Request.test.tsx b/...d/src/components/Request/Request.test.tsx → ...d/src/components/Request/Request.test.tsx
diff --git a/...ontend/src/components/Request/Request.tsx → ...ontend/src/components/Request/Request.tsx b/...ontend/src/components/Request/Request.tsx → ...ontend/src/components/Request/Request.tsx
diff --git a/...frontend/src/components/Request/index.tsx → ...frontend/src/components/Request/index.tsx b/...frontend/src/components/Request/index.tsx → ...frontend/src/components/Request/index.tsx
diff --git a/...helm-frontend/src/components/RowValue.tsx → helm-frontend/src/components/RowValue.tsx b/...helm-frontend/src/components/RowValue.tsx → helm-frontend/src/components/RowValue.tsx
diff --git a/...frontend/src/components/ScenariosList.tsx → ...frontend/src/components/ScenariosList.tsx b/...frontend/src/components/ScenariosList.tsx → ...frontend/src/components/ScenariosList.tsx
diff --git a/...ontend/src/components/StatNameDisplay.tsx → ...ontend/src/components/StatNameDisplay.tsx b/...ontend/src/components/StatNameDisplay.tsx → ...ontend/src/components/StatNameDisplay.tsx
diff --git a/src/helm-frontend/src/components/Tab.tsx → helm-frontend/src/components/Tab.tsx b/src/helm-frontend/src/components/Tab.tsx → helm-frontend/src/components/Tab.tsx
diff --git a/...rontend/src/components/Tabs/Tabs.test.tsx → ...rontend/src/components/Tabs/Tabs.test.tsx b/...rontend/src/components/Tabs/Tabs.test.tsx → ...rontend/src/components/Tabs/Tabs.test.tsx
diff --git a/...elm-frontend/src/components/Tabs/Tabs.tsx → helm-frontend/src/components/Tabs/Tabs.tsx b/...elm-frontend/src/components/Tabs/Tabs.tsx → helm-frontend/src/components/Tabs/Tabs.tsx
diff --git a/...lm-frontend/src/components/Tabs/index.tsx → helm-frontend/src/components/Tabs/index.tsx b/...lm-frontend/src/components/Tabs/index.tsx → helm-frontend/src/components/Tabs/index.tsx
diff --git a/src/helm-frontend/src/layouts/Main.tsx → helm-frontend/src/layouts/Main.tsx b/src/helm-frontend/src/layouts/Main.tsx → helm-frontend/src/layouts/Main.tsx
diff --git a/src/helm-frontend/src/main.tsx → helm-frontend/src/main.tsx b/src/helm-frontend/src/main.tsx → helm-frontend/src/main.tsx
diff --git a/src/helm-frontend/src/routes/Group.tsx → helm-frontend/src/routes/Group.tsx b/src/helm-frontend/src/routes/Group.tsx → helm-frontend/src/routes/Group.tsx
diff --git a/src/helm-frontend/src/routes/Groups.tsx → helm-frontend/src/routes/Groups.tsx b/src/helm-frontend/src/routes/Groups.tsx → helm-frontend/src/routes/Groups.tsx
diff --git a/src/helm-frontend/src/routes/Landing.tsx → helm-frontend/src/routes/Landing.tsx b/src/helm-frontend/src/routes/Landing.tsx → helm-frontend/src/routes/Landing.tsx
diff --git a/src/helm-frontend/src/routes/Leaderboard.tsx → helm-frontend/src/routes/Leaderboard.tsx b/src/helm-frontend/src/routes/Leaderboard.tsx → helm-frontend/src/routes/Leaderboard.tsx
diff --git a/src/helm-frontend/src/routes/Models.tsx → helm-frontend/src/routes/Models.tsx b/src/helm-frontend/src/routes/Models.tsx → helm-frontend/src/routes/Models.tsx
diff --git a/src/helm-frontend/src/routes/Run.tsx → helm-frontend/src/routes/Run.tsx b/src/helm-frontend/src/routes/Run.tsx → helm-frontend/src/routes/Run.tsx
diff --git a/src/helm-frontend/src/routes/Runs.tsx → helm-frontend/src/routes/Runs.tsx b/src/helm-frontend/src/routes/Runs.tsx → helm-frontend/src/routes/Runs.tsx
diff --git a/src/helm-frontend/src/routes/Scenarios.tsx → helm-frontend/src/routes/Scenarios.tsx b/src/helm-frontend/src/routes/Scenarios.tsx → helm-frontend/src/routes/Scenarios.tsx
diff --git a/...c/services/getDisplayPredictionsByName.ts → ...c/services/getDisplayPredictionsByName.ts b/...c/services/getDisplayPredictionsByName.ts → ...c/services/getDisplayPredictionsByName.ts
diff --git a/.../src/services/getDisplayRequestsByName.ts → .../src/services/getDisplayRequestsByName.ts b/.../src/services/getDisplayRequestsByName.ts → .../src/services/getDisplayRequestsByName.ts
diff --git a/...tend/src/services/getGroupTablesByName.ts → ...tend/src/services/getGroupTablesByName.ts b/...tend/src/services/getGroupTablesByName.ts → ...tend/src/services/getGroupTablesByName.ts
diff --git a/...rontend/src/services/getGroupsMetadata.ts → ...rontend/src/services/getGroupsMetadata.ts b/...rontend/src/services/getGroupsMetadata.ts → ...rontend/src/services/getGroupsMetadata.ts
diff --git a/...-frontend/src/services/getGroupsTables.ts → ...-frontend/src/services/getGroupsTables.ts b/...-frontend/src/services/getGroupsTables.ts → ...-frontend/src/services/getGroupsTables.ts
diff --git a/...elm-frontend/src/services/getInstances.ts → helm-frontend/src/services/getInstances.ts b/...elm-frontend/src/services/getInstances.ts → helm-frontend/src/services/getInstances.ts
diff --git a/...frontend/src/services/getRunSpecByName.ts → ...frontend/src/services/getRunSpecByName.ts b/...frontend/src/services/getRunSpecByName.ts → ...frontend/src/services/getRunSpecByName.ts
diff --git a/...helm-frontend/src/services/getRunSpecs.ts → helm-frontend/src/services/getRunSpecs.ts b/...helm-frontend/src/services/getRunSpecs.ts → helm-frontend/src/services/getRunSpecs.ts
diff --git a/...ontend/src/services/getRunsToRunSuites.ts → ...ontend/src/services/getRunsToRunSuites.ts b/...ontend/src/services/getRunsToRunSuites.ts → ...ontend/src/services/getRunsToRunSuites.ts
diff --git a/...rontend/src/services/getScenarioByName.ts → ...rontend/src/services/getScenarioByName.ts b/...rontend/src/services/getScenarioByName.ts → ...rontend/src/services/getScenarioByName.ts
diff --git a/...nd/src/services/getScenarioStateByName.ts → ...nd/src/services/getScenarioStateByName.ts b/...nd/src/services/getScenarioStateByName.ts → ...nd/src/services/getScenarioStateByName.ts
diff --git a/src/helm-frontend/src/services/getSchema.ts → helm-frontend/src/services/getSchema.ts b/src/helm-frontend/src/services/getSchema.ts → helm-frontend/src/services/getSchema.ts
diff --git a/...m-frontend/src/services/getStatsByName.ts → helm-frontend/src/services/getStatsByName.ts b/...m-frontend/src/services/getStatsByName.ts → helm-frontend/src/services/getStatsByName.ts
diff --git a/...m-frontend/src/services/getSuiteForRun.ts → helm-frontend/src/services/getSuiteForRun.ts b/...m-frontend/src/services/getSuiteForRun.ts → helm-frontend/src/services/getSuiteForRun.ts
diff --git a/src/helm-frontend/src/types/Adapter.ts → helm-frontend/src/types/Adapter.ts b/src/helm-frontend/src/types/Adapter.ts → helm-frontend/src/types/Adapter.ts
diff --git a/...m-frontend/src/types/DisplayPrediction.ts → helm-frontend/src/types/DisplayPrediction.ts b/...m-frontend/src/types/DisplayPrediction.ts → helm-frontend/src/types/DisplayPrediction.ts
diff --git a/...ontend/src/types/DisplayPredictionsMap.ts → ...ontend/src/types/DisplayPredictionsMap.ts b/...ontend/src/types/DisplayPredictionsMap.ts → ...ontend/src/types/DisplayPredictionsMap.ts
diff --git a/...helm-frontend/src/types/DisplayRequest.ts → helm-frontend/src/types/DisplayRequest.ts b/...helm-frontend/src/types/DisplayRequest.ts → helm-frontend/src/types/DisplayRequest.ts
diff --git a/...-frontend/src/types/DisplayRequestsMap.ts → ...-frontend/src/types/DisplayRequestsMap.ts b/...-frontend/src/types/DisplayRequestsMap.ts → ...-frontend/src/types/DisplayRequestsMap.ts
diff --git a/src/helm-frontend/src/types/GroupMetadata.ts → helm-frontend/src/types/GroupMetadata.ts b/src/helm-frontend/src/types/GroupMetadata.ts → helm-frontend/src/types/GroupMetadata.ts
diff --git a/...helm-frontend/src/types/GroupsMetadata.ts → helm-frontend/src/types/GroupsMetadata.ts b/...helm-frontend/src/types/GroupsMetadata.ts → helm-frontend/src/types/GroupsMetadata.ts
diff --git a/src/helm-frontend/src/types/GroupsTable.ts → helm-frontend/src/types/GroupsTable.ts b/src/helm-frontend/src/types/GroupsTable.ts → helm-frontend/src/types/GroupsTable.ts
diff --git a/src/helm-frontend/src/types/HeaderValue.ts → helm-frontend/src/types/HeaderValue.ts b/src/helm-frontend/src/types/HeaderValue.ts → helm-frontend/src/types/HeaderValue.ts
diff --git a/src/helm-frontend/src/types/Instance.ts → helm-frontend/src/types/Instance.ts b/src/helm-frontend/src/types/Instance.ts → helm-frontend/src/types/Instance.ts
diff --git a/src/helm-frontend/src/types/LinkValue.ts → helm-frontend/src/types/LinkValue.ts b/src/helm-frontend/src/types/LinkValue.ts → helm-frontend/src/types/LinkValue.ts
diff --git a/src/helm-frontend/src/types/Metric.ts → helm-frontend/src/types/Metric.ts b/src/helm-frontend/src/types/Metric.ts → helm-frontend/src/types/Metric.ts
diff --git a/src/helm-frontend/src/types/MetricGroup.ts → helm-frontend/src/types/MetricGroup.ts b/src/helm-frontend/src/types/MetricGroup.ts → helm-frontend/src/types/MetricGroup.ts
diff --git a/src/helm-frontend/src/types/Model.ts → helm-frontend/src/types/Model.ts b/src/helm-frontend/src/types/Model.ts → helm-frontend/src/types/Model.ts
diff --git a/src/helm-frontend/src/types/Perturbation.ts → helm-frontend/src/types/Perturbation.ts b/src/helm-frontend/src/types/Perturbation.ts → helm-frontend/src/types/Perturbation.ts
diff --git a/src/helm-frontend/src/types/RowValue.ts → helm-frontend/src/types/RowValue.ts b/src/helm-frontend/src/types/RowValue.ts → helm-frontend/src/types/RowValue.ts
diff --git a/src/helm-frontend/src/types/RunGroup.ts → helm-frontend/src/types/RunGroup.ts b/src/helm-frontend/src/types/RunGroup.ts → helm-frontend/src/types/RunGroup.ts
diff --git a/src/helm-frontend/src/types/RunSpec.ts → helm-frontend/src/types/RunSpec.ts b/src/helm-frontend/src/types/RunSpec.ts → helm-frontend/src/types/RunSpec.ts
diff --git a/src/helm-frontend/src/types/Scenario.ts → helm-frontend/src/types/Scenario.ts b/src/helm-frontend/src/types/Scenario.ts → helm-frontend/src/types/Scenario.ts
diff --git a/src/helm-frontend/src/types/Schema.ts → helm-frontend/src/types/Schema.ts b/src/helm-frontend/src/types/Schema.ts → helm-frontend/src/types/Schema.ts
diff --git a/src/helm-frontend/src/types/Stat.ts → helm-frontend/src/types/Stat.ts b/src/helm-frontend/src/types/Stat.ts → helm-frontend/src/types/Stat.ts
diff --git a/src/helm-frontend/src/types/Taxonomy.ts → helm-frontend/src/types/Taxonomy.ts b/src/helm-frontend/src/types/Taxonomy.ts → helm-frontend/src/types/Taxonomy.ts
diff --git a/src/helm-frontend/src/types/global.d.ts → helm-frontend/src/types/global.d.ts b/src/helm-frontend/src/types/global.d.ts → helm-frontend/src/types/global.d.ts
diff --git a/...rontend/src/utils/getBenchmarkEndpoint.ts → ...rontend/src/utils/getBenchmarkEndpoint.ts b/...rontend/src/utils/getBenchmarkEndpoint.ts → ...rontend/src/utils/getBenchmarkEndpoint.ts
diff --git a/...frontend/src/utils/getBenchmarkRelease.ts → ...frontend/src/utils/getBenchmarkRelease.ts b/...frontend/src/utils/getBenchmarkRelease.ts → ...frontend/src/utils/getBenchmarkRelease.ts
diff --git a/...m-frontend/src/utils/getBenchmarkSuite.ts → helm-frontend/src/utils/getBenchmarkSuite.ts b/...m-frontend/src/utils/getBenchmarkSuite.ts → helm-frontend/src/utils/getBenchmarkSuite.ts
diff --git a/...m-frontend/src/utils/getVersionBaseUrl.ts → helm-frontend/src/utils/getVersionBaseUrl.ts b/...m-frontend/src/utils/getVersionBaseUrl.ts → helm-frontend/src/utils/getVersionBaseUrl.ts
diff --git a/...m-frontend/src/utils/underscoreToTitle.ts → helm-frontend/src/utils/underscoreToTitle.ts b/...m-frontend/src/utils/underscoreToTitle.ts → helm-frontend/src/utils/underscoreToTitle.ts
diff --git a/src/helm-frontend/src/vite-env.d.ts → helm-frontend/src/vite-env.d.ts b/src/helm-frontend/src/vite-env.d.ts → helm-frontend/src/vite-env.d.ts
diff --git a/src/helm-frontend/tailwind.config.js → helm-frontend/tailwind.config.js b/src/helm-frontend/tailwind.config.js → helm-frontend/tailwind.config.js
diff --git a/src/helm-frontend/tsconfig.json → helm-frontend/tsconfig.json b/src/helm-frontend/tsconfig.json → helm-frontend/tsconfig.json
diff --git a/src/helm-frontend/tsconfig.node.json → helm-frontend/tsconfig.node.json b/src/helm-frontend/tsconfig.node.json → helm-frontend/tsconfig.node.json
diff --git a/src/helm-frontend/vite.config.ts → helm-frontend/vite.config.ts b/src/helm-frontend/vite.config.ts → helm-frontend/vite.config.ts
@@ -15,6 +15,6 @@ export default defineConfig({
     environment: "jsdom",
   },
   build: {
-    outDir: `${__dirname}/../helm/benchmark/static_build`,
+    outDir: `${__dirname}/../src/helm/benchmark/static_build`,
   },
 });
diff --git a/src/helm-frontend/yarn.lock → helm-frontend/yarn.lock b/src/helm-frontend/yarn.lock → helm-frontend/yarn.lock
diff --git a/install-heim-extras.sh b/install-heim-extras.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Extra dependencies for HEIM when evaluating the following:
+# Models: craiyon/dalle-mini, craiyon/dalle-mega, thudm/cogview2
+# Scenarios: detection with the `DetectionMetric`
+
+# This script fails when any of its commands fail.
+set -e
+
+# For DALLE-mini/mega, install the following dependencies.
+# On Mac OS, skip installing pytorch with CUDA because CUDA is not supported
+if [[ $OSTYPE != 'darwin'* ]]; then
+  # Manually install pytorch to avoid pip getting killed: https://stackoverflow.com/a/54329850
+  pip install --no-cache-dir --find-links https://download.pytorch.org/whl/torch_stable.html torch==1.12.1+cu113 torchvision==0.13.1+cu113
+
+  # DALLE mini requires jax install
+  pip install jax==0.3.25 jaxlib==0.3.25+cuda11.cudnn805 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+fi
+
+# For CogView2, manually install apex and Image-Local-Attention. NOTE: need to run this on a GPU machine
+echo "Installing CogView2 dependencies..."
+pip install localAttention@git+https://github.com/Sleepychord/Image-Local-Attention.git@43fee310cb1c6f64fb0ed77404ba3b01fa586026
+pip install --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" apex@git+https://github.com/michiyasunaga/apex.git@9395ba2aab3c05e0e36ef0b7fe48d42de9f10bcf
+
+# For Detectron2. Following https://detectron2.readthedocs.io/en/latest/tutorials/install.html
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+
+echo "Done."
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -35,6 +35,7 @@ nav:
   - 'User Guide':
     - 'installation.md'
     - 'quick_start.md'
+    - 'heim.md'
     - 'get_helm_rank.md'
     - 'tutorial.md'
     - 'benchmark.md'

diff --git a/requirements.txt b/requirements.txt
@@ -50,7 +50,7 @@ google-api-core==2.10.1
 google-api-python-client==2.64.0
 google-auth==2.12.0
 google-auth-httplib2==0.1.0
-google-cloud-aiplatform==1.36.4
+google-cloud-aiplatform==1.38.1
 googleapis-common-protos==1.56.4
 greenlet==1.1.3
 gunicorn==20.1.0
@@ -88,6 +88,7 @@ nodeenv==1.7.0
 numba==0.56.4
 numpy==1.23.3
 openai==0.27.8
+opencv-python==4.8.1.78
 openpyxl==3.0.10
 outcome==1.2.0
 packaging==21.3

diff --git a/scripts/offline_eval/deepfloyd/__init__.py b/scripts/offline_eval/deepfloyd/__init__.py