diff --git a/README.md b/README.md index e5cb681b..d84a7785 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,42 @@ Terminal selection is configured through the `terminal_config` in your script co --- +#### 2.5. Timeouts + +`debug-gym` provides several timeout mechanisms to ensure agents don't hang indefinitely: + +| Timeout Type | Description | Default | Configuration | +| :-: | :----- | :-: | :----- | +| **Command Timeout** | Maximum time for a single command (e.g., `bash`, `eval`) to execute. Prevents blocking commands like `serve_forever()` or infinite loops from hanging the agent. | 300s (5 min) | `terminal.command_timeout` | +| **Run Timeout** | Maximum time for a single eval/run (e.g., pytest execution). | 300s (5 min) | `env.run_timeout` | +| **Agent Step Timeout** | Maximum time for the LLM to generate a response. | varies | LLM provider settings | +| **Session Lifetime** | Total time an agent can interact with the environment. | unlimited | Application-level | + +**Command Timeout** is particularly important for exploration agents that might accidentally run blocking scripts. When a command times out, it returns `(False, "Command timed out after X seconds")` with any partial output. + +Example terminal configuration with custom timeout: + +```yaml +terminal: + type: docker + command_timeout: 300 # 5 minutes per command (default: 600) +``` + +For Kubernetes deployments: + +```yaml +terminal: + type: kubernetes + command_timeout: 900 # 15 minutes for longer-running tests + namespace: debug-gym + base_image: your-image:tag +``` + +> [!TIP] +> If your agent runs `eval` or `submit` tools that execute large test suites, consider increasing `command_timeout` to accommodate longer test runs. + +--- + ## 3. Running Baselines We use `.yaml` files to specify configurations. Example config files can be found in `configs/`. To run an agent: diff --git a/analysis/json_log_viewer/README.md b/analysis/json_log_viewer/README.md new file mode 100644 index 00000000..d7d4cf25 --- /dev/null +++ b/analysis/json_log_viewer/README.md @@ -0,0 +1,40 @@ +# JSON Log Viewer + +A Flask-based web viewer for debug-gym trajectory JSON files. Visualize agent exploration sessions with step-by-step action breakdowns. + +## Installation + +```bash +cd analysis/json_log_viewer +pip install -r requirements.txt +``` + +## Usage + +Start the server: + +```bash +python json_log_viewer.py -p 5050 +``` + +Then open http://127.0.0.1:5050 in your browser. + +### Loading Trajectories + +You can load trajectory files in several ways: + +1. **Upload**: Click "Upload" and select a JSON file +2. **Browse**: Click "Browse Files" to navigate your filesystem +3. **API**: Load programmatically via `GET /load_file_from_path?path=/path/to/trajectory.json` + +### Integration with Gray Tree Frog + +The viewer supports CORS requests, allowing Gray Tree Frog's lineage visualization to open trajectories directly. When viewing the lineage graph, click "View trajectory" on any discovery to open its exploration session. + +## Features + +- Step-by-step trajectory visualization +- Color-coded action types (bash, view, edit, etc.) +- Detailed bash command classification +- Statistics view showing action distribution +- Keyboard navigation between steps diff --git a/analysis/json_log_viewer/json_log_viewer.py b/analysis/json_log_viewer/json_log_viewer.py index 4672a16d..9facb05a 100644 --- a/analysis/json_log_viewer/json_log_viewer.py +++ b/analysis/json_log_viewer/json_log_viewer.py @@ -5,6 +5,7 @@ import shlex from flask import Flask, jsonify, redirect, render_template, request, url_for +from flask_cors import cross_origin from werkzeug.utils import secure_filename app = Flask(__name__) @@ -581,6 +582,7 @@ def browse_directory(): @app.route("/load_file_from_path") +@cross_origin() # Allow cross-origin requests (for Gray Tree Frog visualization) def load_file_from_path(): """Load a JSON file from a specific path""" global data, current_file diff --git a/analysis/json_log_viewer/requirements.txt b/analysis/json_log_viewer/requirements.txt new file mode 100644 index 00000000..f1b79921 --- /dev/null +++ b/analysis/json_log_viewer/requirements.txt @@ -0,0 +1,3 @@ +flask +flask-cors +werkzeug diff --git a/configs/free_env.yaml b/configs/free_env.yaml index f154ed5b..6d03189a 100644 --- a/configs/free_env.yaml +++ b/configs/free_env.yaml @@ -20,6 +20,7 @@ task_data: terminal: type: docker + # command_timeout: 300 # Max time (seconds) for a single command (default: 300 = 5 min) agent: type: froggy diff --git a/configs/r2egym.yaml b/configs/r2egym.yaml index 43cb6374..30718c90 100644 --- a/configs/r2egym.yaml +++ b/configs/r2egym.yaml @@ -23,6 +23,7 @@ dataset: terminal: type: docker + # command_timeout: 300 # Max time (seconds) for a single command (default: 300 = 5 min) agent: type: froggy diff --git a/configs/simple.yaml b/configs/simple.yaml index a4896f8c..9250782f 100644 --- a/configs/simple.yaml +++ b/configs/simple.yaml @@ -19,6 +19,7 @@ task_data: terminal: type: docker + # command_timeout: 300 # Max time (seconds) for a single command (default: 300 = 5 min) agent: type: simple_agent diff --git a/configs/swebench.yaml b/configs/swebench.yaml index b67bfeaf..f56a8ab9 100644 --- a/configs/swebench.yaml +++ b/configs/swebench.yaml @@ -25,6 +25,7 @@ dataset: terminal: type: docker + # command_timeout: 300 # Max time (seconds) for a single command (default: 300 = 5 min) agent: type: froggy diff --git a/configs/swesmith.yaml b/configs/swesmith.yaml index acd6f647..93a29ad3 100644 --- a/configs/swesmith.yaml +++ b/configs/swesmith.yaml @@ -22,6 +22,7 @@ dataset: terminal: type: docker + # command_timeout: 300 # Max time (seconds) for a single command (default: 300 = 5 min) agent: type: froggy diff --git a/debug_gym/gym/terminals/docker.py b/debug_gym/gym/terminals/docker.py index 787a0274..c3d0a6b8 100644 --- a/debug_gym/gym/terminals/docker.py +++ b/debug_gym/gym/terminals/docker.py @@ -1,5 +1,6 @@ import atexit import os +import shlex import tarfile import uuid from io import BytesIO @@ -29,16 +30,25 @@ def __init__( base_image: str | None = None, registry: str = "", setup_commands: list[str] | None = None, + command_timeout: int = 300, **kwargs, ): """ - volumes (dict or list): A dictionary to configure volumes mounted - inside the container. The key is either the host path or a - volume name, and the value is a dictionary with the keys: - - - ``bind`` The path to mount the volume inside the container - - ``mode`` Either ``rw`` to mount the volume read/write, or - ``ro`` to mount it read-only. + Args: + working_dir: Working directory inside the container. + session_commands: Commands to run at the start of each session. + env_vars: Environment variables to set in the container. + logger: Logger instance. + base_image: Docker image to use. + registry: Docker registry URL. + setup_commands: Commands to run once when setting up the container. + command_timeout: Default timeout in seconds for individual command execution + (default: 300 = 5 minutes). This is NOT the terminal session lifetime. + Commands that exceed this timeout will be killed. Can be configured via YAML: + terminal_config: + type: docker + command_timeout: 60 + **kwargs: Additional arguments (ignored with debug log). """ super().__init__( working_dir=working_dir, @@ -50,9 +60,17 @@ def __init__( self.base_image = base_image self.registry = registry.rstrip("/") + "/" if registry else "" self.setup_commands = setup_commands or [] - self.docker_client = docker.from_env(timeout=600) + self.command_timeout = command_timeout + self._docker_client = None # Lazily initialized self._container = None + @property + def docker_client(self): + """Lazy initialization of Docker client.""" + if self._docker_client is None: + self._docker_client = docker.from_env(timeout=600) + return self._docker_client + def _ensure_container_running(self): """Verify that the container exists and is running.""" container = self.container @@ -111,15 +129,34 @@ def new_shell_session(self): self.sessions.append(session) return session - def prepare_command(self, entrypoint: str | list[str]) -> list[str]: + def prepare_command( + self, entrypoint: str | list[str], timeout: int | None = None + ) -> list[str]: """Prepares a shell command by combining session commands and entrypoint commands. - Then wraps the command in a shell call.""" + Then wraps the command in a shell call with optional timeout. + + Args: + entrypoint: Command(s) to run. + timeout: Optional timeout in seconds. If provided, the command is wrapped + with the Unix `timeout` command to ensure it doesn't block forever. + """ if isinstance(entrypoint, str): entrypoint = [entrypoint] if self.session_commands: entrypoint = self.session_commands + entrypoint - entrypoint = " && ".join(entrypoint) - command = ["/bin/bash", "-c", entrypoint] + entrypoint_str = " && ".join(entrypoint) + + # Wrap with timeout command if specified + if timeout is not None: + # Use timeout command to kill the process if it exceeds the limit + # Exit code 124 indicates timeout was reached + entrypoint_str = ( + f"timeout {timeout} /bin/bash -c {shlex.quote(entrypoint_str)}" + ) + command = ["/bin/bash", "-c", entrypoint_str] + else: + command = ["/bin/bash", "-c", entrypoint_str] + return command def run( @@ -129,14 +166,27 @@ def run( raises: bool = False, strip_output: bool = True, ) -> tuple[bool, str]: - """Run a command in the terminal. Return command status and output.""" - command = self.prepare_command(entrypoint) + """Run a command in the terminal. Return command status and output. + + Args: + entrypoint: Command(s) to run. + timeout: Timeout in seconds for this command. If the command exceeds this + time, it will be killed and the method returns (False, timeout_message). + If None, uses self.command_timeout. + raises: If True, raise ValueError on command failure. + strip_output: If True, strip trailing newlines from output. + + Returns: + Tuple of (success, output). Success is False if command failed or timed out. + """ + # Use command_timeout if not specified per-call + effective_timeout = timeout if timeout is not None else self.command_timeout + command = self.prepare_command(entrypoint, timeout=effective_timeout) - self.logger.debug(f"Exec run: {command}") + self.logger.debug(f"Exec run (timeout={effective_timeout}s): {command}") self._ensure_container_running() - # TODO: docker exec_run timeout? try: status, output = self.container.exec_run( command, @@ -153,12 +203,25 @@ def run( raise UnrecoverableTerminalError( "Docker exec failed due to an unexpected container error." ) from exc - success = status == 0 output = output.decode() if strip_output: output = output.strip("\r\n").strip("\n") + # Check for timeout (exit code 124 from the timeout command) + if status == 124: + self.logger.warning( + f"Command timed out after {effective_timeout}s: {entrypoint}" + ) + timeout_msg = f"Command timed out after {effective_timeout} seconds" + if output: + output = f"{timeout_msg}\nPartial output:\n{output}" + else: + output = timeout_msg + return False, output + + success = status == 0 + if raises and not success: # Command includes the entrypoint + session commands self.logger.debug(f"Failed to run command `{command}`:\n{output}") @@ -244,6 +307,13 @@ def clean_up(self): def close(self): super().close() self.clean_up() + # Close the Docker client to release connection pool resources + if self._docker_client is not None: + try: + self._docker_client.close() + except Exception as exc: + self.logger.debug(f"Failed to close Docker client: {exc}") + self._docker_client = None def __str__(self): return f"DockerTerminal[{self.container}, {self.working_dir}]" diff --git a/debug_gym/gym/terminals/kubernetes.py b/debug_gym/gym/terminals/kubernetes.py index 5ec7f73a..ddddf786 100644 --- a/debug_gym/gym/terminals/kubernetes.py +++ b/debug_gym/gym/terminals/kubernetes.py @@ -2,6 +2,7 @@ import json import os import random +import shlex import subprocess import time import uuid @@ -261,6 +262,8 @@ def __str__(self): class KubernetesTerminal(Terminal): """ + Kubernetes-based terminal for running commands in pods. + Note: reads values of env variables K8S_NAMESPACE, K8S_DOCKER_SECRET, K8S_DOCKER_CONSTRAINT. """ @@ -281,8 +284,33 @@ def __init__( kube_context: str | None = None, extra_labels: dict | None = None, pod_spec_kwargs: dict = None, + command_timeout: int = 300, **kwargs, ): + """ + Args: + working_dir: Working directory inside the pod. + session_commands: Commands to run at the start of each session. + env_vars: Environment variables to set in the pod. + logger: Logger instance. + setup_commands: Commands to run once when setting up the pod. + pod_name: Custom name for the pod. + base_image: Docker image to use for the pod. + image_pull_secret: Kubernetes secret for pulling images. + registry: Docker registry URL. + namespace: Kubernetes namespace. + kube_config: Path to kubeconfig or "incluster". + kube_context: Kubernetes context to use. + extra_labels: Additional labels for the pod. + pod_spec_kwargs: Additional pod spec configuration. + command_timeout: Default timeout in seconds for individual command execution + (default: 300 = 5 minutes). This is NOT the pod lifetime. Commands that + exceed this timeout will be killed. Can be configured via YAML: + terminal_config: + type: kubernetes + command_timeout: 60 + **kwargs: Additional arguments passed to pod spec. + """ super().__init__( working_dir=working_dir, session_commands=session_commands, @@ -293,6 +321,7 @@ def __init__( self.base_image = base_image self._task_name = base_image self.setup_commands = setup_commands or [] + self.command_timeout = command_timeout self.namespace = namespace or os.environ.get("K8S_NAMESPACE", "default") self.image_pull_secret = image_pull_secret or os.environ.get( "K8S_DOCKER_SECRET" @@ -443,9 +472,17 @@ def new_shell_session(self): self.sessions.append(session) return session - def prepare_command(self, entrypoint: str | list[str]) -> list[str]: + def prepare_command( + self, entrypoint: str | list[str], timeout: int | None = None + ) -> str: """Prepares a shell command by combining session commands and entrypoint commands. - Then wraps the command in a shell call.""" + Then wraps the command in a shell call with optional timeout. + + Args: + entrypoint: Command(s) to run. + timeout: Optional timeout in seconds. If provided, the command is wrapped + with the Unix `timeout` command to ensure it doesn't block forever. + """ if isinstance(entrypoint, str): entrypoint = [entrypoint] if self.session_commands: @@ -465,6 +502,12 @@ def prepare_command(self, entrypoint: str | list[str]) -> list[str]: elif env_prefix: command = f"{env_prefix}{command}" + # Wrap with timeout command if specified + if timeout is not None: + # Use timeout command to kill the process if it exceeds the limit + # Exit code 124 indicates timeout was reached + command = f"timeout {timeout} /bin/bash -c {shlex.quote(command)}" + return command def run( @@ -474,13 +517,30 @@ def run( raises: bool = False, strip_output: bool = True, ) -> tuple[bool, str]: - """Run a command in the pod. Return command status and output.""" + """Run a command in the pod. Return command status and output. + + Args: + entrypoint: Command(s) to run. + timeout: Timeout in seconds for this command. If the command exceeds this + time, it will be killed and the method returns (False, timeout_message). + If None, uses self.command_timeout. + raises: If True, raise ValueError on command failure. + strip_output: If True, strip trailing newlines from output. + + Returns: + Tuple of (success, output). Success is False if command failed or timed out. + """ if not self.pod.is_running(): raise UnrecoverableTerminalError("Pod is not running. Cannot run commands.") - command = self.prepare_command(entrypoint) + # Use command_timeout if not specified per-call + effective_timeout = timeout if timeout is not None else self.command_timeout + command = self.prepare_command(entrypoint, timeout=effective_timeout) - self.logger.debug(f"[{self.pod.name}] Kubernetes exec run: {command}") + self.logger.debug( + f"[{self.pod.name}] Kubernetes exec run (timeout={effective_timeout}s): {command}" + ) + exit_code = None for _ in range(NB_RETRIES_RUN): try: # Execute command using Kubernetes stream API @@ -508,11 +568,25 @@ def run( error_channel = resp.read_channel(ERROR_CHANNEL) # Error channel self.logger.debug(f"[{self.pod.name}] error channel: {error_channel}") status = json.loads(error_channel) - success = status["status"] == "Success" + + # Parse exit code from status + if status["status"] == "Success": + exit_code = 0 + success = True + else: + # Try to extract exit code from status details + exit_code = 1 # Default to 1 for failure + if "details" in status and "causes" in status["details"]: + for cause in status["details"]["causes"]: + if cause.get("reason") == "ExitCode": + exit_code = int(cause.get("message", "1")) + break + success = False break # Command executed successfully, exit the retry loop except ApiException as e: success = False + exit_code = None self.logger.debug( f"[{self.pod.name}] Exception during command `{command}`: {e}" ) @@ -540,6 +614,18 @@ def run( if strip_output: output = output.strip("\r\n").strip("\n") + # Check for timeout (exit code 124 from the timeout command) + if exit_code == 124: + self.logger.warning( + f"[{self.pod.name}] Command timed out after {effective_timeout}s: {entrypoint}" + ) + timeout_msg = f"Command timed out after {effective_timeout} seconds" + if output: + output = f"{timeout_msg}\nPartial output:\n{output}" + else: + output = timeout_msg + return False, output + if raises and not success: self.logger.error(f"Failed to run command `{command}`:\n{output}") raise ValueError(f"Failed to run command `{entrypoint}`") diff --git a/debug_gym/gym/terminals/local.py b/debug_gym/gym/terminals/local.py index 2aabc91d..7c4e0efd 100644 --- a/debug_gym/gym/terminals/local.py +++ b/debug_gym/gym/terminals/local.py @@ -18,8 +18,21 @@ def __init__( logger: DebugGymLogger | None = None, # Local-specific parameters include_os_env_vars: bool = True, + command_timeout: int = 300, **kwargs, ): + """ + Args: + working_dir: Working directory for command execution. + session_commands: Commands to run at the start of each session. + env_vars: Environment variables to set. + logger: Logger instance. + include_os_env_vars: Whether to include current OS environment variables. + command_timeout: Default timeout in seconds for individual command execution + (default: 300 = 5 minutes). This is NOT the terminal session lifetime. + Commands that exceed this timeout will be killed. + **kwargs: Additional arguments (ignored with debug log). + """ env_vars = env_vars or {} if include_os_env_vars: env_vars = env_vars | dict(os.environ) @@ -31,6 +44,7 @@ def __init__( logger=logger, **kwargs, ) + self.command_timeout = command_timeout @property def working_dir(self): @@ -59,9 +73,25 @@ def run( raises: bool = False, strip_output: bool = True, ) -> tuple[bool, str]: - """Run a list of commands in the terminal. Return command status and output.""" + """Run a list of commands in the terminal. Return command status and output. + + Args: + entrypoint: Command(s) to run. + timeout: Timeout in seconds for this command. If the command exceeds this + time, it will be killed and the method returns (False, timeout_message). + If None, uses self.command_timeout. + raises: If True, raise ValueError on command failure. + strip_output: If True, strip trailing newlines from output. + + Returns: + Tuple of (success, output). Success is False if command failed or timed out. + """ + # Use command_timeout if not specified per-call + effective_timeout = timeout if timeout is not None else self.command_timeout command = self.prepare_command(entrypoint) - self.logger.debug(f"Running command in terminal: {command}") + self.logger.debug( + f"Running command in terminal (timeout={effective_timeout}s): {command}" + ) process = subprocess.Popen( command, env=self.env_vars, @@ -71,12 +101,21 @@ def run( text=True, ) try: - stdout, stderr = process.communicate(timeout=timeout) + stdout, stderr = process.communicate(timeout=effective_timeout) success = process.returncode == 0 except subprocess.TimeoutExpired: process.kill() - stdout, stderr = "", "Timeout expired." - success = False + stdout, stderr = process.communicate() # Collect any partial output + self.logger.warning( + f"Command timed out after {effective_timeout}s: {entrypoint}" + ) + timeout_msg = f"Command timed out after {effective_timeout} seconds" + partial = (stdout + stderr).strip() + if partial: + output = f"{timeout_msg}\nPartial output:\n{partial}" + else: + output = timeout_msg + return False, output if raises and not success: # Command includes the entrypoint + session commands diff --git a/debug_gym/gym/terminals/shell_session.py b/debug_gym/gym/terminals/shell_session.py index 76aa2cf0..9f8f67f8 100644 --- a/debug_gym/gym/terminals/shell_session.py +++ b/debug_gym/gym/terminals/shell_session.py @@ -138,6 +138,16 @@ def close(self): if self.process: self.process.terminate() + try: + # Wait for process to actually terminate to avoid zombies + self.process.wait(timeout=5) + except Exception: + # If wait times out or fails, try to kill forcefully + try: + self.process.kill() + self.process.wait(timeout=1) + except Exception: + pass self.process = None def read( diff --git a/debug_gym/llms/azure_openai.py b/debug_gym/llms/azure_openai.py index ef32e802..8534f7e4 100644 --- a/debug_gym/llms/azure_openai.py +++ b/debug_gym/llms/azure_openai.py @@ -60,7 +60,7 @@ def _get_azure_oai_client(self): kwargs = { "azure_endpoint": self.config.endpoint, "api_version": self.config.api_version, - "timeout": None, + "timeout": 300.0, # 5 minute timeout to prevent CLOSE_WAIT hangs } if api_key not in [LLM_API_KEY_PLACEHOLDER, None]: # api key kwargs["api_key"] = api_key diff --git a/debug_gym/logger.py b/debug_gym/logger.py index 201a6668..5b2aeb07 100644 --- a/debug_gym/logger.py +++ b/debug_gym/logger.py @@ -641,6 +641,11 @@ def close(self): if self._log_listener_thread is not None: self._log_listener_stop_event.set() self._log_listener_thread.join() + # Close all file handlers to release file handles + for handler in self.handlers[:]: + if isinstance(handler, logging.FileHandler): + handler.close() + self.removeHandler(handler) def __del__(self): self.close() diff --git a/tests/gym/envs/test_env.py b/tests/gym/envs/test_env.py index 01f9b4f3..b09f636d 100644 --- a/tests/gym/envs/test_env.py +++ b/tests/gym/envs/test_env.py @@ -300,7 +300,9 @@ def test_eval_timeout(tmp_path): env = LocalEnv(path=working_dir, entrypoint="python file.py", run_timeout=1) env.reset() output = env.eval() - assert output == EvalOutput(success=False, output="Timeout expired.") + assert output == EvalOutput( + success=False, output="Command timed out after 1 seconds" + ) def test_event_hooks_initialization(): diff --git a/tests/gym/terminals/test_docker.py b/tests/gym/terminals/test_docker.py index 7a2775ea..270c9248 100644 --- a/tests/gym/terminals/test_docker.py +++ b/tests/gym/terminals/test_docker.py @@ -222,3 +222,53 @@ def test_unrecoverable_error_when_container_stops(tmp_path): terminal.run("echo after stop", timeout=1) finally: terminal.clean_up() + + +@pytest.if_docker_running +def test_docker_terminal_run_timeout(tmp_path): + """Test that commands that exceed the timeout are killed and return failure.""" + working_dir = str(tmp_path) + terminal = DockerTerminal(working_dir=working_dir, base_image="ubuntu:latest") + try: + # Run a command that takes longer than the timeout + entrypoint = "sleep 10 && echo done" + success, output = terminal.run(entrypoint, timeout=2) + assert success is False + assert "timed out" in output.lower() + assert "2 seconds" in output + finally: + terminal.clean_up() + + +@pytest.if_docker_running +def test_docker_terminal_run_default_timeout(tmp_path): + """Test that the default timeout is applied when none is specified.""" + working_dir = str(tmp_path) + terminal = DockerTerminal(working_dir=working_dir, base_image="ubuntu:latest") + try: + # Run a quick command without specifying timeout + entrypoint = "echo 'Hello'" + success, output = terminal.run(entrypoint) # No timeout specified + assert success is True + assert output == "Hello" + # Default command_timeout should be 300 seconds (5 minutes) + assert terminal.command_timeout == 300 + finally: + terminal.clean_up() + + +@pytest.if_docker_running +def test_docker_terminal_custom_command_timeout(tmp_path): + """Test that custom command_timeout can be set via constructor.""" + working_dir = str(tmp_path) + terminal = DockerTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=60 + ) + try: + assert terminal.command_timeout == 60 + # Quick command should still work + success, output = terminal.run("echo 'test'") + assert success is True + assert output == "test" + finally: + terminal.clean_up() diff --git a/tests/gym/terminals/test_kubernetes.py b/tests/gym/terminals/test_kubernetes.py index 6a29e330..b9476173 100644 --- a/tests/gym/terminals/test_kubernetes.py +++ b/tests/gym/terminals/test_kubernetes.py @@ -311,6 +311,56 @@ def test_select_terminal_kubernetes(): terminal.close() +@if_kubernetes_available +def test_kubernetes_terminal_run_timeout(tmp_path): + """Test that commands that exceed the timeout are killed and return failure.""" + working_dir = str(tmp_path) + terminal = KubernetesTerminal(working_dir=working_dir, base_image="ubuntu:latest") + try: + # Run a command that takes longer than the timeout + entrypoint = "sleep 10 && echo done" + success, output = terminal.run(entrypoint, timeout=2) + assert success is False + assert "timed out" in output.lower() + assert "2 seconds" in output + finally: + terminal.close() + + +@if_kubernetes_available +def test_kubernetes_terminal_run_default_timeout(tmp_path): + """Test that the default timeout is applied when none is specified.""" + working_dir = str(tmp_path) + terminal = KubernetesTerminal(working_dir=working_dir, base_image="ubuntu:latest") + try: + # Run a quick command without specifying timeout + entrypoint = "echo 'Hello'" + success, output = terminal.run(entrypoint) # No timeout specified + assert success is True + assert output == "Hello" + # Default command_timeout should be 300 seconds (5 minutes) + assert terminal.command_timeout == 300 + finally: + terminal.close() + + +@if_kubernetes_available +def test_kubernetes_terminal_custom_command_timeout(tmp_path): + """Test that custom command_timeout can be set via constructor.""" + working_dir = str(tmp_path) + terminal = KubernetesTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=120 + ) + try: + assert terminal.command_timeout == 120 + # Quick command should still work + success, output = terminal.run("echo 'test'") + assert success is True + assert output == "test" + finally: + terminal.close() + + def test_kubernetes_terminal_readonly_properties_after_pod_creation(): """Test that working directory cannot be changed after pod creation.""" terminal = KubernetesTerminal(base_image="ubuntu:latest") diff --git a/tests/gym/terminals/test_local_terminal.py b/tests/gym/terminals/test_local_terminal.py index d5b0232c..240d9964 100644 --- a/tests/gym/terminals/test_local_terminal.py +++ b/tests/gym/terminals/test_local_terminal.py @@ -50,6 +50,42 @@ def test_terminal_run_failure(tmp_path): assert re.search(pattern, output) +def test_terminal_run_timeout(tmp_path): + """Test that commands that exceed the timeout are killed and return failure.""" + working_dir = str(tmp_path) + terminal = LocalTerminal(working_dir=working_dir) + # Run a command that takes longer than the timeout + entrypoint = "sleep 10 && echo done" + success, output = terminal.run(entrypoint, timeout=1) + assert success is False + assert "timed out" in output.lower() + assert "1 seconds" in output + + +def test_terminal_run_default_timeout(tmp_path): + """Test that the default timeout is applied when none is specified.""" + working_dir = str(tmp_path) + terminal = LocalTerminal(working_dir=working_dir) + # Run a quick command without specifying timeout + entrypoint = "echo 'Hello'" + success, output = terminal.run(entrypoint) # No timeout specified + assert success is True + assert output == "Hello" + # Default command_timeout should be 300 seconds (5 minutes) + assert terminal.command_timeout == 300 + + +def test_terminal_run_custom_command_timeout(tmp_path): + """Test that custom command_timeout can be set via constructor.""" + working_dir = str(tmp_path) + terminal = LocalTerminal(working_dir=working_dir, command_timeout=60) + assert terminal.command_timeout == 60 + # Quick command should still work + success, output = terminal.run("echo 'test'") + assert success is True + assert output == "test" + + def test_terminal_session(tmp_path): working_dir = str(tmp_path) command = "echo Hello World"