Skip to content

Commit

Permalink
Merge pull request #70 from ServiceNow/gasse/patch_20
Browse files Browse the repository at this point in the history
Infeasible actions for WebArena
  • Loading branch information
recursix authored Jun 18, 2024
2 parents 1f948d5 + eede5d7 commit cb5dc86
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 0 deletions.
1 change: 1 addition & 0 deletions core/src/browsergym/core/action/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def __init__(
self,
subsets: Optional[ActionSubset | list[ActionSubset]] = [
"chat",
"infeas",
"bid",
"nav",
"tab",
Expand Down
2 changes: 2 additions & 0 deletions webarena/src/browsergym/webarena/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ def validate(
# if any, use the last assistant message as the stop answer for webarena
if chat_messages and chat_messages[-1]["role"] == "assistant":
last_action = {"action_type": ActionTypes.STOP, "answer": chat_messages[-1]["message"]}
if chat_messages and chat_messages[-1]["role"] == "infeasible":
last_action = {"action_type": ActionTypes.STOP, "answer": "N/A"}
else:
last_action = {"action_type": ActionTypes.NONE, "answer": ""}
# llm_fuzzy_match() bugfix
Expand Down
53 changes: 53 additions & 0 deletions webarena/tests/test_infeasible.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import gymnasium as gym
import logging
import os
import pytest
import random

from tenacity import retry, stop_after_attempt, retry_if_exception_type

# register gym environments
import browsergym.webarena

# bugfix: use same playwright instance in browsergym and pytest
from utils import setup_playwright


__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True

INFEAS_TASK_IDS = [101, 115, 166]
FEAS_TASK_IDS = [165, 187, 199]


@retry(
stop=stop_after_attempt(5),
retry=retry_if_exception_type(TimeoutError),
reraise=True,
before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
)
@pytest.mark.parametrize(
"task_id,infeasible",
[(task_id, True) for task_id in INFEAS_TASK_IDS]
+ [(task_id, False) for task_id in FEAS_TASK_IDS],
)
@pytest.mark.slow
def test_infeasible(task_id, infeasible):
env = gym.make(
f"browsergym/webarena.{task_id}",
headless=__HEADLESS,
slow_mo=__SLOW_MO,
)
obs, info = env.reset()

action = 'report_infeasible("Unachievable task.")'

obs, reward, term, trunc, info = env.step(action)

if infeasible:
assert term == True and reward == 1.0

else:
assert term == True and reward == 0.0

env.close()

0 comments on commit cb5dc86

Please sign in to comment.