From e33c21c2d6eb3b4612425b611a8f07179559d8b1 Mon Sep 17 00:00:00 2001 From: "Xingdi (Eric) Yuan" Date: Thu, 8 Jan 2026 14:49:35 -0500 Subject: [PATCH 1/6] Fix nohup commands not returning immediately with timeout wrapper When nohup commands with background execution (&) are run through the timeout wrapper in Docker/Kubernetes terminals, the timeout command doesn't return immediately because background processes inherit the shell's stdout/stderr file descriptors. This fix adds proper output redirection (> /dev/null 2>&1) to the gunicorn nohup commands in SWE-Bench setup, ensuring the timeout wrapper returns immediately after the shell exits instead of waiting for the full timeout period. Also adds comprehensive tests for both Docker and Kubernetes terminals to verify nohup commands with proper redirection return immediately. Fixes issue reported in #325 where requests package setup would hang for 300 seconds when starting background gunicorn servers. Co-Authored-By: Claude Sonnet 4.5 --- debug_gym/gym/envs/swe_bench.py | 6 ++- tests/gym/terminals/test_docker.py | 61 +++++++++++++++++++++++++ tests/gym/terminals/test_kubernetes.py | 62 ++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 2 deletions(-) diff --git a/debug_gym/gym/envs/swe_bench.py b/debug_gym/gym/envs/swe_bench.py index f0341281..f1ec05e6 100644 --- a/debug_gym/gym/envs/swe_bench.py +++ b/debug_gym/gym/envs/swe_bench.py @@ -114,9 +114,11 @@ def setup_terminal(self): self.terminal.run( "pip install httpbin[mainapp]==0.10.2 pytest-httpbin==2.1.0" ) - self.terminal.run("nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app &") self.terminal.run( - "nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app &" + "nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &" + ) + self.terminal.run( + "nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &" ) self.terminal.run('echo "127.0.0.1 httpbin.org" >> /etc/hosts') elif self.task_name == "pylint-dev__pylint-4661": diff --git a/tests/gym/terminals/test_docker.py b/tests/gym/terminals/test_docker.py index 270c9248..5d88d05f 100644 --- a/tests/gym/terminals/test_docker.py +++ b/tests/gym/terminals/test_docker.py @@ -272,3 +272,64 @@ def test_docker_terminal_custom_command_timeout(tmp_path): assert output == "test" finally: terminal.clean_up() + + +@pytest.if_docker_running +def test_docker_terminal_nohup_with_redirection_returns_immediately(tmp_path): + """Test that nohup commands with proper redirection return immediately. + + This test verifies the fix for issue #325 where nohup commands without + output redirection would cause the timeout wrapper to wait for the full + timeout period instead of returning immediately. + """ + working_dir = str(tmp_path) + terminal = DockerTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 + ) + try: + # Test that nohup with proper redirection returns immediately + start_time = time.time() + success, output = terminal.run("nohup sleep 100 > /dev/null 2>&1 &") + elapsed = time.time() - start_time + + # Should return almost immediately (within 2 seconds) + assert success is True + assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s" + + # Verify the background process is actually running + success, output = terminal.run("pgrep -f 'sleep 100'") + assert success is True + assert output.strip().isdigit(), "Expected to find sleep process PID" + + # Clean up the background process + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.clean_up() + + +@pytest.if_docker_running +def test_docker_terminal_nohup_without_redirection_may_timeout(tmp_path): + """Test that nohup commands without redirection may not return immediately. + + This test demonstrates the problem that was fixed: without output redirection, + the timeout command waits for file descriptors to close. + """ + working_dir = str(tmp_path) + terminal = DockerTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=3 + ) + try: + # Test that nohup WITHOUT redirection may hit the timeout + start_time = time.time() + success, output = terminal.run("nohup sleep 100 &") + elapsed = time.time() - start_time + + # This will likely timeout after 3 seconds + # The exact behavior depends on the shell, but it should take longer + # than the properly redirected version + assert elapsed >= 2, f"Expected command to take longer, took {elapsed:.2f}s" + + # Clean up any background processes + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.clean_up() diff --git a/tests/gym/terminals/test_kubernetes.py b/tests/gym/terminals/test_kubernetes.py index b9476173..91cefc4e 100644 --- a/tests/gym/terminals/test_kubernetes.py +++ b/tests/gym/terminals/test_kubernetes.py @@ -1,6 +1,7 @@ import os import platform import subprocess +import time import pytest @@ -361,6 +362,67 @@ def test_kubernetes_terminal_custom_command_timeout(tmp_path): terminal.close() +@if_kubernetes_available +def test_kubernetes_terminal_nohup_with_redirection_returns_immediately(tmp_path): + """Test that nohup commands with proper redirection return immediately. + + This test verifies the fix for issue #325 where nohup commands without + output redirection would cause the timeout wrapper to wait for the full + timeout period instead of returning immediately. + """ + working_dir = str(tmp_path) + terminal = KubernetesTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 + ) + try: + # Test that nohup with proper redirection returns immediately + start_time = time.time() + success, output = terminal.run("nohup sleep 100 > /dev/null 2>&1 &") + elapsed = time.time() - start_time + + # Should return almost immediately (within 2 seconds) + assert success is True + assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s" + + # Verify the background process is actually running + success, output = terminal.run("pgrep -f 'sleep 100'") + assert success is True + assert output.strip().isdigit(), "Expected to find sleep process PID" + + # Clean up the background process + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.close() + + +@if_kubernetes_available +def test_kubernetes_terminal_nohup_without_redirection_may_timeout(tmp_path): + """Test that nohup commands without redirection may not return immediately. + + This test demonstrates the problem that was fixed: without output redirection, + the timeout command waits for file descriptors to close. + """ + working_dir = str(tmp_path) + terminal = KubernetesTerminal( + working_dir=working_dir, base_image="ubuntu:latest", command_timeout=3 + ) + try: + # Test that nohup WITHOUT redirection may hit the timeout + start_time = time.time() + success, output = terminal.run("nohup sleep 100 &") + elapsed = time.time() - start_time + + # This will likely timeout after 3 seconds + # The exact behavior depends on the shell, but it should take longer + # than the properly redirected version + assert elapsed >= 2, f"Expected command to take longer, took {elapsed:.2f}s" + + # Clean up any background processes + terminal.run("pkill -f 'sleep 100'") + finally: + terminal.close() + + def test_kubernetes_terminal_readonly_properties_after_pod_creation(): """Test that working directory cannot be changed after pod creation.""" terminal = KubernetesTerminal(base_image="ubuntu:latest") From 4939cddcc5161a42b0af1e3d435050c3c4042ce1 Mon Sep 17 00:00:00 2001 From: "Xingdi (Eric) Yuan" Date: Thu, 8 Jan 2026 14:54:54 -0500 Subject: [PATCH 2/6] Use setsid to properly detach background processes in non-TTY mode The previous fix using only output redirection was insufficient because in non-TTY mode, the timeout command monitors the entire process group, not just file descriptors. Even with > /dev/null 2>&1, backgrounded processes remain in the same process group as the shell. Using setsid creates a new session, completely detaching the process from timeout's process group. This ensures the timeout-wrapped command returns immediately after the shell exits, even in non-TTY execution contexts like docker exec and kubectl exec. Changes: - Added setsid before nohup gunicorn commands - Updated test names and documentation to reflect setsid usage - Tests verify processes detach properly in non-TTY mode Co-Authored-By: Claude Sonnet 4.5 --- debug_gym/gym/envs/swe_bench.py | 5 +++-- tests/gym/terminals/test_docker.py | 14 +++++++------- tests/gym/terminals/test_kubernetes.py | 14 +++++++------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/debug_gym/gym/envs/swe_bench.py b/debug_gym/gym/envs/swe_bench.py index f1ec05e6..380edfa3 100644 --- a/debug_gym/gym/envs/swe_bench.py +++ b/debug_gym/gym/envs/swe_bench.py @@ -114,11 +114,12 @@ def setup_terminal(self): self.terminal.run( "pip install httpbin[mainapp]==0.10.2 pytest-httpbin==2.1.0" ) + # Use setsid to create new session, detaching from timeout's process group in non-TTY mode self.terminal.run( - "nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &" + "setsid nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &" ) self.terminal.run( - "nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &" + "setsid nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &" ) self.terminal.run('echo "127.0.0.1 httpbin.org" >> /etc/hosts') elif self.task_name == "pylint-dev__pylint-4661": diff --git a/tests/gym/terminals/test_docker.py b/tests/gym/terminals/test_docker.py index 5d88d05f..2994c2fc 100644 --- a/tests/gym/terminals/test_docker.py +++ b/tests/gym/terminals/test_docker.py @@ -275,21 +275,21 @@ def test_docker_terminal_custom_command_timeout(tmp_path): @pytest.if_docker_running -def test_docker_terminal_nohup_with_redirection_returns_immediately(tmp_path): - """Test that nohup commands with proper redirection return immediately. +def test_docker_terminal_nohup_with_setsid_returns_immediately(tmp_path): + """Test that nohup commands with setsid return immediately in non-TTY mode. - This test verifies the fix for issue #325 where nohup commands without - output redirection would cause the timeout wrapper to wait for the full - timeout period instead of returning immediately. + This test verifies the fix for issue #325 where nohup commands would cause + the timeout wrapper to wait in non-TTY mode. Using setsid creates a new + session, detaching the process from timeout's process group. """ working_dir = str(tmp_path) terminal = DockerTerminal( working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 ) try: - # Test that nohup with proper redirection returns immediately + # Test that setsid + nohup returns immediately start_time = time.time() - success, output = terminal.run("nohup sleep 100 > /dev/null 2>&1 &") + success, output = terminal.run("setsid nohup sleep 100 > /dev/null 2>&1 &") elapsed = time.time() - start_time # Should return almost immediately (within 2 seconds) diff --git a/tests/gym/terminals/test_kubernetes.py b/tests/gym/terminals/test_kubernetes.py index 91cefc4e..71958561 100644 --- a/tests/gym/terminals/test_kubernetes.py +++ b/tests/gym/terminals/test_kubernetes.py @@ -363,21 +363,21 @@ def test_kubernetes_terminal_custom_command_timeout(tmp_path): @if_kubernetes_available -def test_kubernetes_terminal_nohup_with_redirection_returns_immediately(tmp_path): - """Test that nohup commands with proper redirection return immediately. +def test_kubernetes_terminal_nohup_with_setsid_returns_immediately(tmp_path): + """Test that nohup commands with setsid return immediately in non-TTY mode. - This test verifies the fix for issue #325 where nohup commands without - output redirection would cause the timeout wrapper to wait for the full - timeout period instead of returning immediately. + This test verifies the fix for issue #325 where nohup commands would cause + the timeout wrapper to wait in non-TTY mode. Using setsid creates a new + session, detaching the process from timeout's process group. """ working_dir = str(tmp_path) terminal = KubernetesTerminal( working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 ) try: - # Test that nohup with proper redirection returns immediately + # Test that setsid + nohup returns immediately start_time = time.time() - success, output = terminal.run("nohup sleep 100 > /dev/null 2>&1 &") + success, output = terminal.run("setsid nohup sleep 100 > /dev/null 2>&1 &") elapsed = time.time() - start_time # Should return almost immediately (within 2 seconds) From 80f6ad519762d1a8afcc8542f0663d2cde87789b Mon Sep 17 00:00:00 2001 From: "Xingdi (Eric) Yuan" Date: Thu, 8 Jan 2026 15:06:24 -0500 Subject: [PATCH 3/6] minor --- tests/gym/terminals/test_docker.py | 9 ++++++++- tests/gym/terminals/test_kubernetes.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/gym/terminals/test_docker.py b/tests/gym/terminals/test_docker.py index 2994c2fc..c8c5ceb6 100644 --- a/tests/gym/terminals/test_docker.py +++ b/tests/gym/terminals/test_docker.py @@ -299,7 +299,14 @@ def test_docker_terminal_nohup_with_setsid_returns_immediately(tmp_path): # Verify the background process is actually running success, output = terminal.run("pgrep -f 'sleep 100'") assert success is True - assert output.strip().isdigit(), "Expected to find sleep process PID" + # Should have at least one PID (may have multiple due to process hierarchy) + pids = [line.strip() for line in output.strip().split("\n") if line.strip()] + assert ( + len(pids) >= 1 + ), f"Expected to find at least one sleep process, got: {output}" + assert all( + pid.isdigit() for pid in pids + ), f"Expected PIDs to be digits, got: {pids}" # Clean up the background process terminal.run("pkill -f 'sleep 100'") diff --git a/tests/gym/terminals/test_kubernetes.py b/tests/gym/terminals/test_kubernetes.py index 71958561..a5c2f3f0 100644 --- a/tests/gym/terminals/test_kubernetes.py +++ b/tests/gym/terminals/test_kubernetes.py @@ -387,7 +387,14 @@ def test_kubernetes_terminal_nohup_with_setsid_returns_immediately(tmp_path): # Verify the background process is actually running success, output = terminal.run("pgrep -f 'sleep 100'") assert success is True - assert output.strip().isdigit(), "Expected to find sleep process PID" + # Should have at least one PID (may have multiple due to process hierarchy) + pids = [line.strip() for line in output.strip().split("\n") if line.strip()] + assert ( + len(pids) >= 1 + ), f"Expected to find at least one sleep process, got: {output}" + assert all( + pid.isdigit() for pid in pids + ), f"Expected PIDs to be digits, got: {pids}" # Clean up the background process terminal.run("pkill -f 'sleep 100'") From df4e650f4e9404bd48060429c39822966631000c Mon Sep 17 00:00:00 2001 From: "Xingdi (Eric) Yuan" Date: Thu, 8 Jan 2026 15:42:14 -0500 Subject: [PATCH 4/6] nested shell --- debug_gym/gym/envs/swe_bench.py | 7 ++++--- tests/gym/terminals/test_docker.py | 12 ++++++------ tests/gym/terminals/test_kubernetes.py | 12 ++++++------ 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/debug_gym/gym/envs/swe_bench.py b/debug_gym/gym/envs/swe_bench.py index 380edfa3..f3f483c5 100644 --- a/debug_gym/gym/envs/swe_bench.py +++ b/debug_gym/gym/envs/swe_bench.py @@ -114,12 +114,13 @@ def setup_terminal(self): self.terminal.run( "pip install httpbin[mainapp]==0.10.2 pytest-httpbin==2.1.0" ) - # Use setsid to create new session, detaching from timeout's process group in non-TTY mode + # Use sh -c with background to properly detach in non-TTY mode + # The nested shell exits immediately after launching the background process self.terminal.run( - "setsid nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &" + "sh -c 'nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &'" ) self.terminal.run( - "setsid nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &" + "sh -c 'nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &'" ) self.terminal.run('echo "127.0.0.1 httpbin.org" >> /etc/hosts') elif self.task_name == "pylint-dev__pylint-4661": diff --git a/tests/gym/terminals/test_docker.py b/tests/gym/terminals/test_docker.py index c8c5ceb6..d4e0cc5e 100644 --- a/tests/gym/terminals/test_docker.py +++ b/tests/gym/terminals/test_docker.py @@ -275,21 +275,21 @@ def test_docker_terminal_custom_command_timeout(tmp_path): @pytest.if_docker_running -def test_docker_terminal_nohup_with_setsid_returns_immediately(tmp_path): - """Test that nohup commands with setsid return immediately in non-TTY mode. +def test_docker_terminal_nohup_with_nested_shell_returns_immediately(tmp_path): + """Test that nohup commands with nested shell return immediately in non-TTY mode. This test verifies the fix for issue #325 where nohup commands would cause - the timeout wrapper to wait in non-TTY mode. Using setsid creates a new - session, detaching the process from timeout's process group. + the timeout wrapper to wait in non-TTY mode. Using sh -c creates a nested + shell that exits immediately after backgrounding the process. """ working_dir = str(tmp_path) terminal = DockerTerminal( working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 ) try: - # Test that setsid + nohup returns immediately + # Test that sh -c with nohup returns immediately start_time = time.time() - success, output = terminal.run("setsid nohup sleep 100 > /dev/null 2>&1 &") + success, output = terminal.run("sh -c 'nohup sleep 100 > /dev/null 2>&1 &'") elapsed = time.time() - start_time # Should return almost immediately (within 2 seconds) diff --git a/tests/gym/terminals/test_kubernetes.py b/tests/gym/terminals/test_kubernetes.py index a5c2f3f0..69ca950d 100644 --- a/tests/gym/terminals/test_kubernetes.py +++ b/tests/gym/terminals/test_kubernetes.py @@ -363,21 +363,21 @@ def test_kubernetes_terminal_custom_command_timeout(tmp_path): @if_kubernetes_available -def test_kubernetes_terminal_nohup_with_setsid_returns_immediately(tmp_path): - """Test that nohup commands with setsid return immediately in non-TTY mode. +def test_kubernetes_terminal_nohup_with_nested_shell_returns_immediately(tmp_path): + """Test that nohup commands with nested shell return immediately in non-TTY mode. This test verifies the fix for issue #325 where nohup commands would cause - the timeout wrapper to wait in non-TTY mode. Using setsid creates a new - session, detaching the process from timeout's process group. + the timeout wrapper to wait in non-TTY mode. Using sh -c creates a nested + shell that exits immediately after backgrounding the process. """ working_dir = str(tmp_path) terminal = KubernetesTerminal( working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 ) try: - # Test that setsid + nohup returns immediately + # Test that sh -c with nohup returns immediately start_time = time.time() - success, output = terminal.run("setsid nohup sleep 100 > /dev/null 2>&1 &") + success, output = terminal.run("sh -c 'nohup sleep 100 > /dev/null 2>&1 &'") elapsed = time.time() - start_time # Should return almost immediately (within 2 seconds) From 0dbcbb4faa9d58341f1854886d7a101076e42208 Mon Sep 17 00:00:00 2001 From: "Xingdi (Eric) Yuan" Date: Thu, 8 Jan 2026 15:53:15 -0500 Subject: [PATCH 5/6] subshell --- debug_gym/gym/envs/swe_bench.py | 8 ++++---- tests/gym/terminals/test_docker.py | 17 ++++++++++------- tests/gym/terminals/test_kubernetes.py | 17 ++++++++++------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/debug_gym/gym/envs/swe_bench.py b/debug_gym/gym/envs/swe_bench.py index f3f483c5..5837d5d6 100644 --- a/debug_gym/gym/envs/swe_bench.py +++ b/debug_gym/gym/envs/swe_bench.py @@ -114,13 +114,13 @@ def setup_terminal(self): self.terminal.run( "pip install httpbin[mainapp]==0.10.2 pytest-httpbin==2.1.0" ) - # Use sh -c with background to properly detach in non-TTY mode - # The nested shell exits immediately after launching the background process + # Use subshell () with background to properly detach in non-TTY mode + # The subshell exits immediately after launching the background process self.terminal.run( - "sh -c 'nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &'" + "(nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &)" ) self.terminal.run( - "sh -c 'nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &'" + "(nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &)" ) self.terminal.run('echo "127.0.0.1 httpbin.org" >> /etc/hosts') elif self.task_name == "pylint-dev__pylint-4661": diff --git a/tests/gym/terminals/test_docker.py b/tests/gym/terminals/test_docker.py index d4e0cc5e..1c01a289 100644 --- a/tests/gym/terminals/test_docker.py +++ b/tests/gym/terminals/test_docker.py @@ -275,24 +275,27 @@ def test_docker_terminal_custom_command_timeout(tmp_path): @pytest.if_docker_running -def test_docker_terminal_nohup_with_nested_shell_returns_immediately(tmp_path): - """Test that nohup commands with nested shell return immediately in non-TTY mode. +def test_docker_terminal_nohup_with_subshell_returns_immediately(tmp_path): + """Test that nohup commands with subshell return immediately in non-TTY mode. This test verifies the fix for issue #325 where nohup commands would cause - the timeout wrapper to wait in non-TTY mode. Using sh -c creates a nested - shell that exits immediately after backgrounding the process. + the timeout wrapper to wait in non-TTY mode. Using (...) subshell creates + a subprocess that exits immediately after backgrounding. """ working_dir = str(tmp_path) terminal = DockerTerminal( working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 ) try: - # Test that sh -c with nohup returns immediately + # Warm up the terminal with a dummy command to exclude container startup time + terminal.run("echo 'warming up'") + + # Test that subshell with nohup returns immediately start_time = time.time() - success, output = terminal.run("sh -c 'nohup sleep 100 > /dev/null 2>&1 &'") + success, output = terminal.run("(nohup sleep 100 > /dev/null 2>&1 &)") elapsed = time.time() - start_time - # Should return almost immediately (within 2 seconds) + # Should return almost immediately (within 2 seconds, excluding container startup) assert success is True assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s" diff --git a/tests/gym/terminals/test_kubernetes.py b/tests/gym/terminals/test_kubernetes.py index 69ca950d..31361cbd 100644 --- a/tests/gym/terminals/test_kubernetes.py +++ b/tests/gym/terminals/test_kubernetes.py @@ -363,24 +363,27 @@ def test_kubernetes_terminal_custom_command_timeout(tmp_path): @if_kubernetes_available -def test_kubernetes_terminal_nohup_with_nested_shell_returns_immediately(tmp_path): - """Test that nohup commands with nested shell return immediately in non-TTY mode. +def test_kubernetes_terminal_nohup_with_subshell_returns_immediately(tmp_path): + """Test that nohup commands with subshell return immediately in non-TTY mode. This test verifies the fix for issue #325 where nohup commands would cause - the timeout wrapper to wait in non-TTY mode. Using sh -c creates a nested - shell that exits immediately after backgrounding the process. + the timeout wrapper to wait in non-TTY mode. Using (...) subshell creates + a subprocess that exits immediately after backgrounding. """ working_dir = str(tmp_path) terminal = KubernetesTerminal( working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10 ) try: - # Test that sh -c with nohup returns immediately + # Warm up the terminal with a dummy command to exclude pod startup time + terminal.run("echo 'warming up'") + + # Test that subshell with nohup returns immediately start_time = time.time() - success, output = terminal.run("sh -c 'nohup sleep 100 > /dev/null 2>&1 &'") + success, output = terminal.run("(nohup sleep 100 > /dev/null 2>&1 &)") elapsed = time.time() - start_time - # Should return almost immediately (within 2 seconds) + # Should return almost immediately (within 2 seconds, excluding pod startup) assert success is True assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s" From ad9975c8466ae071ab49adeefb02b71d1a69b828 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Alexandre=20C=C3=B4t=C3=A9?= Date: Fri, 9 Jan 2026 06:50:59 -0800 Subject: [PATCH 6/6] Run eval (if available) before applying the gold patch in the solution agent. --- debug_gym/agents/solution_agent.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/debug_gym/agents/solution_agent.py b/debug_gym/agents/solution_agent.py index 87f97317..f0e6c6ad 100644 --- a/debug_gym/agents/solution_agent.py +++ b/debug_gym/agents/solution_agent.py @@ -63,4 +63,14 @@ def execute_action(self, llm_response, **kwargs): return info def init(self, info: EnvInfo) -> None: + if self.env.has_tool("eval"): + tool_call = ToolCall(name="eval", id="eval", arguments={}) + info = self.env.step(tool_call, None, None) + assert ( + info.resolved is False + ), "Eval tool should not resolve before applying the gold patch." + assert ( + info.score < info.max_score + ), "Score should be less than max score before applying the gold patch." + self._run_pdb_sanity_checks(info)