added some flexibility to create your custom benchmark splits (#307) #825
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit tests | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
workflow_dispatch: | |
jobs: | |
code-format: | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
run: pip install black[jupyter]==24.2.0 blacken-docs | |
- name: Code Formatting | |
run: black . --check | |
agentlab: | |
runs-on: ubuntu-22.04 | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Checkout AgentLab | |
uses: actions/checkout@v4 | |
with: | |
repository: 'ServiceNow/AgentLab' | |
ref: 'main' | |
path: 'agentlab' | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Install AgentLab | |
working-directory: ./agentlab | |
run: pip install -e . | |
- name: Install BrowserGym | |
run: make install | |
- name: Pip list | |
run: pip list | |
- name: Fetch MiniWob | |
uses: actions/checkout@v4 | |
with: | |
repository: "Farama-Foundation/miniwob-plusplus" | |
ref: "7fd85d71a4b60325c6585396ec4f48377d049838" | |
path: "miniwob-plusplus" | |
- name: Serve MiniWob | |
uses: Eun/http-server-action@v1 | |
with: | |
directory: "${{ github.workspace }}/miniwob-plusplus/miniwob/html" | |
port: 8080 | |
- name: Pre-download tokenizer ressources (for WebArena) | |
run: python -c "import nltk; nltk.download('punkt_tab')" | |
# - name: Run AgentLab Unit Tests | |
# env: | |
# MINIWOB_URL: "http://localhost:8080/miniwob/" | |
# run: pytest -n 5 --durations=10 -m 'not pricy' -v agentlab/tests/experiments/test_launch_exp.py | |
browsergym-core: | |
runs-on: ubuntu-22.04 | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Run browsergym-core Unit Tests | |
run: pytest -n 5 --durations=10 -m 'not pricy' -v tests/core | |
browsergym-miniwob: | |
runs-on: ubuntu-22.04 | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Fetch MiniWob | |
uses: actions/checkout@v4 | |
with: | |
repository: "Farama-Foundation/miniwob-plusplus" | |
ref: "7fd85d71a4b60325c6585396ec4f48377d049838" | |
path: "miniwob-plusplus" | |
- name: Serve MiniWob | |
uses: Eun/http-server-action@v1 | |
with: | |
directory: "${{ github.workspace }}/miniwob-plusplus/miniwob/html" | |
port: 8080 | |
- name: Run browsergym-miniwob Unit Tests | |
env: | |
MINIWOB_URL: "http://localhost:8080/miniwob/" | |
run: pytest -n 5 --durations=10 -m 'not pricy' -v tests/miniwob | |
browsergym-experiments: | |
runs-on: ubuntu-22.04 | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Fetch MiniWob | |
uses: actions/checkout@v4 | |
with: | |
repository: "Farama-Foundation/miniwob-plusplus" | |
ref: "7fd85d71a4b60325c6585396ec4f48377d049838" | |
path: "miniwob-plusplus" | |
- name: Serve MiniWob | |
uses: Eun/http-server-action@v1 | |
with: | |
directory: "${{ github.workspace }}/miniwob-plusplus/miniwob/html" | |
port: 8080 | |
- name: Run browsergym-experiments Unit Tests | |
env: | |
MINIWOB_URL: "http://localhost:8080/miniwob/" | |
BROWSERGYM_WEBLINX_CACHE_DIR: "${{ runner.temp }}/weblinx_data" | |
run: pytest -n 5 --durations=10 -m 'not pricy' -v tests/experiments | |
browsergym-webarena-fast: | |
runs-on: ubuntu-22.04 | |
if: ${{ false && startsWith(github.ref, 'refs/heads/main') }} | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Run browsergym-webarena not slow Unit Tests | |
env: | |
WA_SHOPPING: "${{ vars.WA_SHOPPING }}" | |
WA_SHOPPING_ADMIN: "${{ vars.WA_SHOPPING_ADMIN }}" | |
WA_REDDIT: "${{ vars.WA_REDDIT }}" | |
WA_GITLAB: "${{ vars.WA_GITLAB }}" | |
WA_WIKIPEDIA: "${{ vars.WA_WIKIPEDIA }}" | |
WA_MAP: "${{ vars.WA_MAP }}" | |
WA_HOMEPAGE: "${{ vars.WA_HOMEPAGE }}" | |
OPENAI_API_KEY: "" | |
run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests/webarena | |
browsergym-webarena-slow: | |
runs-on: ubuntu-22.04 | |
needs: | |
- browsergym-webarena-fast | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Run browsergym-webarena slow Unit Tests | |
env: | |
WA_SHOPPING: "${{ vars.WA_SHOPPING }}" | |
WA_SHOPPING_ADMIN: "${{ vars.WA_SHOPPING_ADMIN }}" | |
WA_REDDIT: "${{ vars.WA_REDDIT }}" | |
WA_GITLAB: "${{ vars.WA_GITLAB }}" | |
WA_WIKIPEDIA: "${{ vars.WA_WIKIPEDIA }}" | |
WA_MAP: "${{ vars.WA_MAP }}" | |
WA_HOMEPAGE: "${{ vars.WA_HOMEPAGE }}" | |
OPENAI_API_KEY: "" | |
run: pytest -n 5 --durations=10 -m 'slow and not pricy' --slowmo 1000 -v tests/webarena | |
browsergym-visualwebarena-fast: | |
runs-on: ubuntu-22.04 | |
if: ${{ false && startsWith(github.ref, 'refs/heads/main') }} | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Run browsergym-visualwebarena not slow Unit Tests | |
env: | |
VWA_CLASSIFIEDS: "${{ vars.VWA_CLASSIFIEDS }}" | |
VWA_CLASSIFIEDS_RESET_TOKEN: "${{ vars.VWA_CLASSIFIEDS_RESET_TOKEN }}" | |
VWA_SHOPPING: "${{ vars.VWA_SHOPPING }}" | |
VWA_REDDIT: "${{ vars.VWA_REDDIT }}" | |
VWA_WIKIPEDIA: "${{ vars.VWA_WIKIPEDIA }}" | |
VWA_HOMEPAGE: "${{ vars.VWA_HOMEPAGE }}" | |
OPENAI_API_KEY: "" | |
run: | | |
pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests/visualwebarena | |
browsergym-visualwebarena-slow: | |
runs-on: ubuntu-22.04 | |
needs: | |
- browsergym-visualwebarena-fast | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Run browsergym-visualwebarena slow Unit Tests | |
env: | |
VWA_CLASSIFIEDS: "${{ vars.VWA_CLASSIFIEDS }}" | |
VWA_CLASSIFIEDS_RESET_TOKEN: "${{ vars.VWA_CLASSIFIEDS_RESET_TOKEN }}" | |
VWA_SHOPPING: "${{ vars.VWA_SHOPPING }}" | |
VWA_REDDIT: "${{ vars.VWA_REDDIT }}" | |
VWA_WIKIPEDIA: "${{ vars.VWA_WIKIPEDIA }}" | |
VWA_HOMEPAGE: "${{ vars.VWA_HOMEPAGE }}" | |
OPENAI_API_KEY: "" | |
run: | | |
pytest -n 5 --durations=10 -m 'slow and not pricy and not serial' --slowmo 1000 -v tests/visualwebarena | |
pytest --durations=10 -m 'slow and not pricy and serial' --slowmo 1000 -v tests/visualwebarena | |
browsergym-assistantbench: | |
runs-on: ubuntu-22.04 | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '>=3.10' | |
cache: 'pip' # caching pip dependencies | |
- name: Pip install | |
working-directory: ./dev | |
run: pip install -r requirements.txt | |
- name: Pip list | |
run: pip list | |
- name: Install Playwright | |
run: playwright install chromium --with-deps | |
- name: Run browsergym-assistantbench Unit Tests | |
env: | |
VWA_CLASSIFIEDS: "${{ vars.VWA_CLASSIFIEDS }}" | |
VWA_CLASSIFIEDS_RESET_TOKEN: "${{ vars.VWA_CLASSIFIEDS_RESET_TOKEN }}" | |
VWA_SHOPPING: "${{ vars.VWA_SHOPPING }}" | |
VWA_REDDIT: "${{ vars.VWA_REDDIT }}" | |
VWA_WIKIPEDIA: "${{ vars.VWA_WIKIPEDIA }}" | |
VWA_HOMEPAGE: "${{ vars.VWA_HOMEPAGE }}" | |
OPENAI_API_KEY: "" | |
run: | | |
pytest -n 5 --durations=10 -m 'not pricy' --slowmo 1000 -v tests/assistantbench |