Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions debug_gym/agents/solution_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,14 @@ def execute_action(self, llm_response, **kwargs):
return info

def init(self, info: EnvInfo) -> None:
if self.env.has_tool("eval"):
tool_call = ToolCall(name="eval", id="eval", arguments={})
info = self.env.step(tool_call, None, None)
assert (
info.resolved is False
), "Eval tool should not resolve before applying the gold patch."
assert (
info.score < info.max_score
), "Score should be less than max score before applying the gold patch."

self._run_pdb_sanity_checks(info)
8 changes: 6 additions & 2 deletions debug_gym/gym/envs/swe_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,13 @@ def setup_terminal(self):
self.terminal.run(
"pip install httpbin[mainapp]==0.10.2 pytest-httpbin==2.1.0"
)
self.terminal.run("nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app &")
# Use subshell () with background to properly detach in non-TTY mode
# The subshell exits immediately after launching the background process
self.terminal.run(
"nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app &"
"(nohup gunicorn -b 127.0.0.1:80 -k gevent httpbin:app > /dev/null 2>&1 &)"
)
self.terminal.run(
"(nohup gunicorn -b 127.0.0.1:443 --certfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.pem --keyfile=/opt/miniconda3/envs/testbed/lib/python3.9/site-packages/pytest_httpbin/certs/server.key -k gevent httpbin:app > /dev/null 2>&1 &)"
)
self.terminal.run('echo "127.0.0.1 httpbin.org" >> /etc/hosts')
elif self.task_name == "pylint-dev__pylint-4661":
Expand Down
71 changes: 71 additions & 0 deletions tests/gym/terminals/test_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,74 @@ def test_docker_terminal_custom_command_timeout(tmp_path):
assert output == "test"
finally:
terminal.clean_up()


@pytest.if_docker_running
def test_docker_terminal_nohup_with_subshell_returns_immediately(tmp_path):
"""Test that nohup commands with subshell return immediately in non-TTY mode.

This test verifies the fix for issue #325 where nohup commands would cause
the timeout wrapper to wait in non-TTY mode. Using (...) subshell creates
a subprocess that exits immediately after backgrounding.
"""
working_dir = str(tmp_path)
terminal = DockerTerminal(
working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10
)
try:
# Warm up the terminal with a dummy command to exclude container startup time
terminal.run("echo 'warming up'")

# Test that subshell with nohup returns immediately
start_time = time.time()
success, output = terminal.run("(nohup sleep 100 > /dev/null 2>&1 &)")
elapsed = time.time() - start_time

# Should return almost immediately (within 2 seconds, excluding container startup)
assert success is True
assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s"

# Verify the background process is actually running
success, output = terminal.run("pgrep -f 'sleep 100'")
assert success is True
# Should have at least one PID (may have multiple due to process hierarchy)
pids = [line.strip() for line in output.strip().split("\n") if line.strip()]
assert (
len(pids) >= 1
), f"Expected to find at least one sleep process, got: {output}"
assert all(
pid.isdigit() for pid in pids
), f"Expected PIDs to be digits, got: {pids}"

# Clean up the background process
terminal.run("pkill -f 'sleep 100'")
finally:
terminal.clean_up()


@pytest.if_docker_running
def test_docker_terminal_nohup_without_redirection_may_timeout(tmp_path):
"""Test that nohup commands without redirection may not return immediately.

This test demonstrates the problem that was fixed: without output redirection,
the timeout command waits for file descriptors to close.
"""
working_dir = str(tmp_path)
terminal = DockerTerminal(
working_dir=working_dir, base_image="ubuntu:latest", command_timeout=3
)
try:
# Test that nohup WITHOUT redirection may hit the timeout
start_time = time.time()
success, output = terminal.run("nohup sleep 100 &")
elapsed = time.time() - start_time

# This will likely timeout after 3 seconds
# The exact behavior depends on the shell, but it should take longer
# than the properly redirected version
assert elapsed >= 2, f"Expected command to take longer, took {elapsed:.2f}s"

# Clean up any background processes
terminal.run("pkill -f 'sleep 100'")
finally:
terminal.clean_up()
72 changes: 72 additions & 0 deletions tests/gym/terminals/test_kubernetes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import platform
import subprocess
import time

import pytest

Expand Down Expand Up @@ -361,6 +362,77 @@ def test_kubernetes_terminal_custom_command_timeout(tmp_path):
terminal.close()


@if_kubernetes_available
def test_kubernetes_terminal_nohup_with_subshell_returns_immediately(tmp_path):
"""Test that nohup commands with subshell return immediately in non-TTY mode.

This test verifies the fix for issue #325 where nohup commands would cause
the timeout wrapper to wait in non-TTY mode. Using (...) subshell creates
a subprocess that exits immediately after backgrounding.
"""
working_dir = str(tmp_path)
terminal = KubernetesTerminal(
working_dir=working_dir, base_image="ubuntu:latest", command_timeout=10
)
try:
# Warm up the terminal with a dummy command to exclude pod startup time
terminal.run("echo 'warming up'")

# Test that subshell with nohup returns immediately
start_time = time.time()
success, output = terminal.run("(nohup sleep 100 > /dev/null 2>&1 &)")
elapsed = time.time() - start_time

# Should return almost immediately (within 2 seconds, excluding pod startup)
assert success is True
assert elapsed < 2, f"nohup command took {elapsed:.2f}s, expected < 2s"

# Verify the background process is actually running
success, output = terminal.run("pgrep -f 'sleep 100'")
assert success is True
# Should have at least one PID (may have multiple due to process hierarchy)
pids = [line.strip() for line in output.strip().split("\n") if line.strip()]
assert (
len(pids) >= 1
), f"Expected to find at least one sleep process, got: {output}"
assert all(
pid.isdigit() for pid in pids
), f"Expected PIDs to be digits, got: {pids}"

# Clean up the background process
terminal.run("pkill -f 'sleep 100'")
finally:
terminal.close()


@if_kubernetes_available
def test_kubernetes_terminal_nohup_without_redirection_may_timeout(tmp_path):
"""Test that nohup commands without redirection may not return immediately.

This test demonstrates the problem that was fixed: without output redirection,
the timeout command waits for file descriptors to close.
"""
working_dir = str(tmp_path)
terminal = KubernetesTerminal(
working_dir=working_dir, base_image="ubuntu:latest", command_timeout=3
)
try:
# Test that nohup WITHOUT redirection may hit the timeout
start_time = time.time()
success, output = terminal.run("nohup sleep 100 &")
elapsed = time.time() - start_time

# This will likely timeout after 3 seconds
# The exact behavior depends on the shell, but it should take longer
# than the properly redirected version
assert elapsed >= 2, f"Expected command to take longer, took {elapsed:.2f}s"

# Clean up any background processes
terminal.run("pkill -f 'sleep 100'")
finally:
terminal.close()


def test_kubernetes_terminal_readonly_properties_after_pod_creation():
"""Test that working directory cannot be changed after pod creation."""
terminal = KubernetesTerminal(base_image="ubuntu:latest")
Expand Down
Loading