Skip to content

added some flexibility to create your custom benchmark splits (#307) #825

added some flexibility to create your custom benchmark splits (#307)

added some flexibility to create your custom benchmark splits (#307) #825

Workflow file for this run

name: Unit tests
on:
push:
branches:
- main
pull_request:
workflow_dispatch:
jobs:
code-format:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
run: pip install black[jupyter]==24.2.0 blacken-docs
- name: Code Formatting
run: black . --check
agentlab:
runs-on: ubuntu-22.04
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Checkout AgentLab
uses: actions/checkout@v4
with:
repository: 'ServiceNow/AgentLab'
ref: 'main'
path: 'agentlab'
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Install AgentLab
working-directory: ./agentlab
run: pip install -e .
- name: Install BrowserGym
run: make install
- name: Pip list
run: pip list
- name: Fetch MiniWob
uses: actions/checkout@v4
with:
repository: "Farama-Foundation/miniwob-plusplus"
ref: "7fd85d71a4b60325c6585396ec4f48377d049838"
path: "miniwob-plusplus"
- name: Serve MiniWob
uses: Eun/http-server-action@v1
with:
directory: "${{ github.workspace }}/miniwob-plusplus/miniwob/html"
port: 8080
- name: Pre-download tokenizer ressources (for WebArena)
run: python -c "import nltk; nltk.download('punkt_tab')"
# - name: Run AgentLab Unit Tests
# env:
# MINIWOB_URL: "http://localhost:8080/miniwob/"
# run: pytest -n 5 --durations=10 -m 'not pricy' -v agentlab/tests/experiments/test_launch_exp.py
browsergym-core:
runs-on: ubuntu-22.04
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Run browsergym-core Unit Tests
run: pytest -n 5 --durations=10 -m 'not pricy' -v tests/core
browsergym-miniwob:
runs-on: ubuntu-22.04
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Fetch MiniWob
uses: actions/checkout@v4
with:
repository: "Farama-Foundation/miniwob-plusplus"
ref: "7fd85d71a4b60325c6585396ec4f48377d049838"
path: "miniwob-plusplus"
- name: Serve MiniWob
uses: Eun/http-server-action@v1
with:
directory: "${{ github.workspace }}/miniwob-plusplus/miniwob/html"
port: 8080
- name: Run browsergym-miniwob Unit Tests
env:
MINIWOB_URL: "http://localhost:8080/miniwob/"
run: pytest -n 5 --durations=10 -m 'not pricy' -v tests/miniwob
browsergym-experiments:
runs-on: ubuntu-22.04
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Fetch MiniWob
uses: actions/checkout@v4
with:
repository: "Farama-Foundation/miniwob-plusplus"
ref: "7fd85d71a4b60325c6585396ec4f48377d049838"
path: "miniwob-plusplus"
- name: Serve MiniWob
uses: Eun/http-server-action@v1
with:
directory: "${{ github.workspace }}/miniwob-plusplus/miniwob/html"
port: 8080
- name: Run browsergym-experiments Unit Tests
env:
MINIWOB_URL: "http://localhost:8080/miniwob/"
BROWSERGYM_WEBLINX_CACHE_DIR: "${{ runner.temp }}/weblinx_data"
run: pytest -n 5 --durations=10 -m 'not pricy' -v tests/experiments
browsergym-webarena-fast:
runs-on: ubuntu-22.04
if: ${{ false && startsWith(github.ref, 'refs/heads/main') }}
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Run browsergym-webarena not slow Unit Tests
env:
WA_SHOPPING: "${{ vars.WA_SHOPPING }}"
WA_SHOPPING_ADMIN: "${{ vars.WA_SHOPPING_ADMIN }}"
WA_REDDIT: "${{ vars.WA_REDDIT }}"
WA_GITLAB: "${{ vars.WA_GITLAB }}"
WA_WIKIPEDIA: "${{ vars.WA_WIKIPEDIA }}"
WA_MAP: "${{ vars.WA_MAP }}"
WA_HOMEPAGE: "${{ vars.WA_HOMEPAGE }}"
OPENAI_API_KEY: ""
run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests/webarena
browsergym-webarena-slow:
runs-on: ubuntu-22.04
needs:
- browsergym-webarena-fast
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Run browsergym-webarena slow Unit Tests
env:
WA_SHOPPING: "${{ vars.WA_SHOPPING }}"
WA_SHOPPING_ADMIN: "${{ vars.WA_SHOPPING_ADMIN }}"
WA_REDDIT: "${{ vars.WA_REDDIT }}"
WA_GITLAB: "${{ vars.WA_GITLAB }}"
WA_WIKIPEDIA: "${{ vars.WA_WIKIPEDIA }}"
WA_MAP: "${{ vars.WA_MAP }}"
WA_HOMEPAGE: "${{ vars.WA_HOMEPAGE }}"
OPENAI_API_KEY: ""
run: pytest -n 5 --durations=10 -m 'slow and not pricy' --slowmo 1000 -v tests/webarena
browsergym-visualwebarena-fast:
runs-on: ubuntu-22.04
if: ${{ false && startsWith(github.ref, 'refs/heads/main') }}
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Run browsergym-visualwebarena not slow Unit Tests
env:
VWA_CLASSIFIEDS: "${{ vars.VWA_CLASSIFIEDS }}"
VWA_CLASSIFIEDS_RESET_TOKEN: "${{ vars.VWA_CLASSIFIEDS_RESET_TOKEN }}"
VWA_SHOPPING: "${{ vars.VWA_SHOPPING }}"
VWA_REDDIT: "${{ vars.VWA_REDDIT }}"
VWA_WIKIPEDIA: "${{ vars.VWA_WIKIPEDIA }}"
VWA_HOMEPAGE: "${{ vars.VWA_HOMEPAGE }}"
OPENAI_API_KEY: ""
run: |
pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests/visualwebarena
browsergym-visualwebarena-slow:
runs-on: ubuntu-22.04
needs:
- browsergym-visualwebarena-fast
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Run browsergym-visualwebarena slow Unit Tests
env:
VWA_CLASSIFIEDS: "${{ vars.VWA_CLASSIFIEDS }}"
VWA_CLASSIFIEDS_RESET_TOKEN: "${{ vars.VWA_CLASSIFIEDS_RESET_TOKEN }}"
VWA_SHOPPING: "${{ vars.VWA_SHOPPING }}"
VWA_REDDIT: "${{ vars.VWA_REDDIT }}"
VWA_WIKIPEDIA: "${{ vars.VWA_WIKIPEDIA }}"
VWA_HOMEPAGE: "${{ vars.VWA_HOMEPAGE }}"
OPENAI_API_KEY: ""
run: |
pytest -n 5 --durations=10 -m 'slow and not pricy and not serial' --slowmo 1000 -v tests/visualwebarena
pytest --durations=10 -m 'slow and not pricy and serial' --slowmo 1000 -v tests/visualwebarena
browsergym-assistantbench:
runs-on: ubuntu-22.04
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '>=3.10'
cache: 'pip' # caching pip dependencies
- name: Pip install
working-directory: ./dev
run: pip install -r requirements.txt
- name: Pip list
run: pip list
- name: Install Playwright
run: playwright install chromium --with-deps
- name: Run browsergym-assistantbench Unit Tests
env:
VWA_CLASSIFIEDS: "${{ vars.VWA_CLASSIFIEDS }}"
VWA_CLASSIFIEDS_RESET_TOKEN: "${{ vars.VWA_CLASSIFIEDS_RESET_TOKEN }}"
VWA_SHOPPING: "${{ vars.VWA_SHOPPING }}"
VWA_REDDIT: "${{ vars.VWA_REDDIT }}"
VWA_WIKIPEDIA: "${{ vars.VWA_WIKIPEDIA }}"
VWA_HOMEPAGE: "${{ vars.VWA_HOMEPAGE }}"
OPENAI_API_KEY: ""
run: |
pytest -n 5 --durations=10 -m 'not pricy' --slowmo 1000 -v tests/assistantbench