|
10 | 10 | from openhands.core.main import run_controller
|
11 | 11 | from openhands.core.schema.agent import AgentState
|
12 | 12 | from openhands.events.action.empty import NullAction
|
| 13 | +from openhands.events.action.message import MessageAction |
| 14 | +from openhands.events.event import EventSource |
13 | 15 | from openhands.events.observation.commands import CmdOutputObservation
|
14 | 16 |
|
15 | 17 |
|
@@ -46,6 +48,36 @@ def test_simple_replay(temp_dir, runtime_cls, run_as_openhands):
|
46 | 48 | _close_test_runtime(runtime)
|
47 | 49 |
|
48 | 50 |
|
| 51 | +def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands): |
| 52 | + """ |
| 53 | + A simple replay test that involves simple terminal operations and edits |
| 54 | + (writing a Vue.js App), using the default agent |
| 55 | +
|
| 56 | + Note: |
| 57 | + 1. This trajectory is exported from GUI mode, meaning it has extra |
| 58 | + environmental actions that don't appear in headless mode's trajectories |
| 59 | + 2. In GUI mode, agents typically don't finish; rather, they wait for the next |
| 60 | + task from the user, so this exported trajectory ends with awaiting_user_input |
| 61 | + """ |
| 62 | + runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands) |
| 63 | + |
| 64 | + config = _get_config('basic_gui_mode') |
| 65 | + |
| 66 | + state: State | None = asyncio.run( |
| 67 | + run_controller( |
| 68 | + config=config, |
| 69 | + initial_user_action=NullAction(), |
| 70 | + runtime=runtime, |
| 71 | + # exit on message, otherwise this would be stuck on waiting for user input |
| 72 | + exit_on_message=True, |
| 73 | + ) |
| 74 | + ) |
| 75 | + |
| 76 | + assert state.agent_state == AgentState.FINISHED |
| 77 | + |
| 78 | + _close_test_runtime(runtime) |
| 79 | + |
| 80 | + |
49 | 81 | def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands):
|
50 | 82 | """
|
51 | 83 | Replay requires a consistent initial state to start with, otherwise it might
|
@@ -78,3 +110,43 @@ def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands):
|
78 | 110 | assert has_error_in_action
|
79 | 111 |
|
80 | 112 | _close_test_runtime(runtime)
|
| 113 | + |
| 114 | + |
| 115 | +def test_replay_basic_interactions(temp_dir, runtime_cls, run_as_openhands): |
| 116 | + """ |
| 117 | + Replay a trajectory that involves interactions, i.e. with user messages |
| 118 | + in the middle. This tests two things: |
| 119 | + 1) The controller should be able to replay all actions without human |
| 120 | + interference (no asking for user input). |
| 121 | + 2) The user messages in the trajectory should appear in the history. |
| 122 | + """ |
| 123 | + runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands) |
| 124 | + |
| 125 | + config = _get_config('basic_interactions') |
| 126 | + |
| 127 | + state: State | None = asyncio.run( |
| 128 | + run_controller( |
| 129 | + config=config, |
| 130 | + initial_user_action=NullAction(), |
| 131 | + runtime=runtime, |
| 132 | + ) |
| 133 | + ) |
| 134 | + |
| 135 | + assert state.agent_state == AgentState.FINISHED |
| 136 | + |
| 137 | + # all user messages appear in the history, so that after a replay (assuming |
| 138 | + # the trajectory doesn't end with `finish` action), LLM knows about all the |
| 139 | + # context and can continue |
| 140 | + user_messages = [ |
| 141 | + "what's 1+1?", |
| 142 | + "No, I mean by Goldbach's conjecture!", |
| 143 | + 'Finish please', |
| 144 | + ] |
| 145 | + i = 0 |
| 146 | + for event in state.history: |
| 147 | + if isinstance(event, MessageAction) and event._source == EventSource.USER: |
| 148 | + assert event.message == user_messages[i] |
| 149 | + i += 1 |
| 150 | + assert i == len(user_messages) |
| 151 | + |
| 152 | + _close_test_runtime(runtime) |
0 commit comments