diff --git a/debug_gym/agents/solution_agent.py b/debug_gym/agents/solution_agent.py index 87f97317..f0e6c6ad 100644 --- a/debug_gym/agents/solution_agent.py +++ b/debug_gym/agents/solution_agent.py @@ -63,4 +63,14 @@ def execute_action(self, llm_response, **kwargs): return info def init(self, info: EnvInfo) -> None: + if self.env.has_tool("eval"): + tool_call = ToolCall(name="eval", id="eval", arguments={}) + info = self.env.step(tool_call, None, None) + assert ( + info.resolved is False + ), "Eval tool should not resolve before applying the gold patch." + assert ( + info.score < info.max_score + ), "Score should be less than max score before applying the gold patch." + self._run_pdb_sanity_checks(info) diff --git a/debug_gym/gym/envs/swe_bench.py b/debug_gym/gym/envs/swe_bench.py index f0341281..5837d5d6 100644 --- a/debug_gym/gym/envs/swe_bench.py +++ b/debug_gym/gym/envs/swe_bench.py @@ -114,9 +114,13 @@ def setup_terminal(self): self.terminal.run( "pip install httpbin[mainapp]==0.10.2 pytest-httpbin==2.1.0" ) - self.terminal.run("nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app &") + # Use subshell () with background to properly detach in non-TTY mode + # The subshell exits immediately after launching the background process self.terminal.run( - "nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app &" + "(nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &)" + ) + self.terminal.run( + "(nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &)" ) self.terminal.run('echo "127.0.0.1 httpbin.org" >> /etc/hosts') elif self.task_name == "pylint-dev__pylint-4661": diff --git a/tests/gym/terminals/test_docker.py b/tests/gym/terminals/test_docker.py index 270c9248..1c01a289 100644 --- a/tests/gym/terminals/test_docker.py +++ b/tests/gym/terminals/test_docker.py @@ -272,3 +272,74 @@ def test_docker_terminal_custom_command_timeout(tmp_path): assert output == "test" finally: terminal.clean_up() + + +@pytest.if_docker_running +def test_docker_terminal_nohup_with_subshell_returns_immediately(tmp_path): + """Test that nohup commands with subshell return immediately in non-TTY mode. + + This test verifies the fix for issue #325 where nohup commands would cause + the timeout wrapper to wait in non-TTY mode. Using (...) subshell creates + a subprocess that exits immediately after backgrounding. + """ + working_dir = str(tmp_path) + terminal = DockerTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 + ) + try: + # Warm up the terminal with a dummy command to exclude container startup time + terminal.run("echo 'warming up'") + + # Test that subshell with nohup returns immediately + start_time = time.time() + success, output = terminal.run("(nohup sleep 100 > /dev/null 2>&1 &)") + elapsed = time.time() - start_time + + # Should return almost immediately (within 2 seconds, excluding container startup) + assert success is True + assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s" + + # Verify the background process is actually running + success, output = terminal.run("pgrep -f 'sleep 100'") + assert success is True + # Should have at least one PID (may have multiple due to process hierarchy) + pids = [line.strip() for line in output.strip().split("\n") if line.strip()] + assert ( + len(pids) >= 1 + ), f"Expected to find at least one sleep process, got: {output}" + assert all( + pid.isdigit() for pid in pids + ), f"Expected PIDs to be digits, got: {pids}" + + # Clean up the background process + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.clean_up() + + +@pytest.if_docker_running +def test_docker_terminal_nohup_without_redirection_may_timeout(tmp_path): + """Test that nohup commands without redirection may not return immediately. + + This test demonstrates the problem that was fixed: without output redirection, + the timeout command waits for file descriptors to close. + """ + working_dir = str(tmp_path) + terminal = DockerTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=3 + ) + try: + # Test that nohup WITHOUT redirection may hit the timeout + start_time = time.time() + success, output = terminal.run("nohup sleep 100 &") + elapsed = time.time() - start_time + + # This will likely timeout after 3 seconds + # The exact behavior depends on the shell, but it should take longer + # than the properly redirected version + assert elapsed >= 2, f"Expected command to take longer, took {elapsed:.2f}s" + + # Clean up any background processes + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.clean_up() diff --git a/tests/gym/terminals/test_kubernetes.py b/tests/gym/terminals/test_kubernetes.py index b9476173..31361cbd 100644 --- a/tests/gym/terminals/test_kubernetes.py +++ b/tests/gym/terminals/test_kubernetes.py @@ -1,6 +1,7 @@ import os import platform import subprocess +import time import pytest @@ -361,6 +362,77 @@ def test_kubernetes_terminal_custom_command_timeout(tmp_path): terminal.close() +@if_kubernetes_available +def test_kubernetes_terminal_nohup_with_subshell_returns_immediately(tmp_path): + """Test that nohup commands with subshell return immediately in non-TTY mode. + + This test verifies the fix for issue #325 where nohup commands would cause + the timeout wrapper to wait in non-TTY mode. Using (...) subshell creates + a subprocess that exits immediately after backgrounding. + """ + working_dir = str(tmp_path) + terminal = KubernetesTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 + ) + try: + # Warm up the terminal with a dummy command to exclude pod startup time + terminal.run("echo 'warming up'") + + # Test that subshell with nohup returns immediately + start_time = time.time() + success, output = terminal.run("(nohup sleep 100 > /dev/null 2>&1 &)") + elapsed = time.time() - start_time + + # Should return almost immediately (within 2 seconds, excluding pod startup) + assert success is True + assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s" + + # Verify the background process is actually running + success, output = terminal.run("pgrep -f 'sleep 100'") + assert success is True + # Should have at least one PID (may have multiple due to process hierarchy) + pids = [line.strip() for line in output.strip().split("\n") if line.strip()] + assert ( + len(pids) >= 1 + ), f"Expected to find at least one sleep process, got: {output}" + assert all( + pid.isdigit() for pid in pids + ), f"Expected PIDs to be digits, got: {pids}" + + # Clean up the background process + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.close() + + +@if_kubernetes_available +def test_kubernetes_terminal_nohup_without_redirection_may_timeout(tmp_path): + """Test that nohup commands without redirection may not return immediately. + + This test demonstrates the problem that was fixed: without output redirection, + the timeout command waits for file descriptors to close. + """ + working_dir = str(tmp_path) + terminal = KubernetesTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=3 + ) + try: + # Test that nohup WITHOUT redirection may hit the timeout + start_time = time.time() + success, output = terminal.run("nohup sleep 100 &") + elapsed = time.time() - start_time + + # This will likely timeout after 3 seconds + # The exact behavior depends on the shell, but it should take longer + # than the properly redirected version + assert elapsed >= 2, f"Expected command to take longer, took {elapsed:.2f}s" + + # Clean up any background processes + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.close() + + def test_kubernetes_terminal_readonly_properties_after_pod_creation(): """Test that working directory cannot be changed after pod creation.""" terminal = KubernetesTerminal(base_image="ubuntu:latest")