Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roslaunch: Allow passing _TIMEOUT_SIGINT and _TIMEOUT_SIGTERM as parameters #1937

Merged
merged 12 commits into from
May 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tools/roslaunch/resources/timeouts.launch
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<launch>

<node name="signals" pkg="roslaunch" type="signal_logger.py" />

</launch>
11 changes: 11 additions & 0 deletions tools/roslaunch/scripts/roscore
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import sys
from optparse import OptionParser
from rosmaster.master_api import NUM_WORKERS
from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM

NAME = 'roscore'

Expand All @@ -59,6 +60,16 @@ def _get_optparse():
parser.add_option("--master-logger-level",
dest="master_logger_level", default=False, type=str,
help="set rosmaster.master logger level ('debug', 'info', 'warn', 'error', 'fatal')")
parser.add_option("--sigint-timeout",
dest="sigint_timeout",
default=DEFAULT_TIMEOUT_SIGINT, type=float,
help="the SIGINT timeout used when killing the core (in seconds).",
metavar="SIGINT_TIMEOUT")
parser.add_option("--sigterm-timeout",
dest="sigterm_timeout",
default=DEFAULT_TIMEOUT_SIGTERM, type=float,
help="the SIGTERM timeout used when killing core if SIGINT does not stop the core (in seconds).",
metavar="SIGTERM_TIMEOUT")
return parser


Expand Down
19 changes: 17 additions & 2 deletions tools/roslaunch/src/roslaunch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
DEFAULT_MASTER_PORT = 11311

from rosmaster.master_api import NUM_WORKERS
from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM

NAME = 'roslaunch'

Expand Down Expand Up @@ -193,6 +194,16 @@ def _get_optparse():
parser.add_option("--master-logger-level",
dest="master_logger_level", default=False, type=str,
help="set rosmaster.master logger level ('debug', 'info', 'warn', 'error', 'fatal')")
parser.add_option("--sigint-timeout",
dest="sigint_timeout",
default=DEFAULT_TIMEOUT_SIGINT, type=float,
help="the SIGINT timeout used when killing nodes (in seconds).",
metavar="SIGINT_TIMEOUT")
parser.add_option("--sigterm-timeout",
dest="sigterm_timeout",
default=DEFAULT_TIMEOUT_SIGTERM, type=float,
help="the SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds).",
metavar="SIGTERM_TIMEOUT")

return parser

Expand Down Expand Up @@ -296,7 +307,9 @@ def main(argv=sys.argv):
# client spins up an XML-RPC server that waits for
# commands and configuration from the server.
from . import child as roslaunch_child
c = roslaunch_child.ROSLaunchChild(uuid, options.child_name, options.server_uri)
c = roslaunch_child.ROSLaunchChild(uuid, options.child_name, options.server_uri,
sigint_timeout=options.sigint_timeout,
sigterm_timeout=options.sigterm_timeout)
c.run()
else:
logger.info('starting in server mode')
Expand Down Expand Up @@ -326,7 +339,9 @@ def main(argv=sys.argv):
num_workers=options.num_workers, timeout=options.timeout,
master_logger_level=options.master_logger_level,
show_summary=not options.no_summary,
force_required=options.force_required)
force_required=options.force_required,
sigint_timeout=options.sigint_timeout,
sigterm_timeout=options.sigterm_timeout)
p.start()
p.spin()

Expand Down
14 changes: 12 additions & 2 deletions tools/roslaunch/src/roslaunch/child.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import roslaunch.core
import roslaunch.pmon
import roslaunch.server
from roslaunch.nodeprocess import DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM

class ROSLaunchChild(object):
"""
Expand All @@ -57,7 +58,7 @@ class ROSLaunchChild(object):
This must be called from the Python Main thread due to signal registration.
"""

def __init__(self, run_id, name, server_uri):
def __init__(self, run_id, name, server_uri, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM):
"""
Startup roslaunch remote client XML-RPC services. Blocks until shutdown
@param run_id: UUID of roslaunch session
Expand All @@ -66,6 +67,11 @@ def __init__(self, run_id, name, server_uri):
@type name: str
@param server_uri: XML-RPC URI of roslaunch server
@type server_uri: str
@param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds).
@type sigint_timeout: float
@param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (
in seconds).
@type sigterm_timeout: float
@return: XML-RPC URI
@rtype: str
"""
Expand All @@ -77,6 +83,8 @@ def __init__(self, run_id, name, server_uri):
self.server_uri = server_uri
self.child_server = None
self.pm = None
self.sigint_timeout = sigint_timeout
self.sigterm_timeout = sigterm_timeout

roslaunch.pmon._init_signal_handlers()

Expand All @@ -102,7 +110,9 @@ def run(self):
try:
self.logger.info("starting roslaunch child process [%s], server URI is [%s]", self.name, self.server_uri)
self._start_pm()
self.child_server = roslaunch.server.ROSLaunchChildNode(self.run_id, self.name, self.server_uri, self.pm)
self.child_server = roslaunch.server.ROSLaunchChildNode(self.run_id, self.name, self.server_uri,
self.pm, sigint_timeout=self.sigint_timeout,
sigterm_timeout=self.sigterm_timeout)
self.logger.info("... creating XMLRPC server for child")
self.child_server.start()
self.logger.info("... started XMLRPC server for child")
Expand Down
22 changes: 18 additions & 4 deletions tools/roslaunch/src/roslaunch/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@

from roslaunch.core import *
#from roslaunch.core import setup_env
from roslaunch.nodeprocess import create_master_process, create_node_process
from roslaunch.nodeprocess import create_master_process, create_node_process, DEFAULT_TIMEOUT_SIGINT, DEFAULT_TIMEOUT_SIGTERM
from roslaunch.pmon import start_process_monitor, ProcessListener

from roslaunch.rlutil import update_terminal_name
Expand Down Expand Up @@ -235,7 +235,8 @@ class ROSLaunchRunner(object):
monitored.
"""

def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, remote_runner=None, is_child=False, is_rostest=False, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False):
def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, remote_runner=None, is_child=False, is_rostest=False, num_workers=NUM_WORKERS, timeout=None,
master_logger_level=False, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM):
"""
@param run_id: /run_id for this launch. If the core is not
running, this value will be used to initialize /run_id. If
Expand Down Expand Up @@ -266,9 +267,19 @@ def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, re
@type timeout: Float or None
@param master_logger_level: Specify roscore's rosmaster.master logger level, use default if it is False.
@type master_logger_level: str or False
@param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds).
@type sigint_timeout: float
@param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds).
@type sigterm_timeout: float
@raise RLException: If sigint_timeout or sigterm_timeout are nonpositive.
"""
if run_id is None:
raise RLException("run_id is None")
if sigint_timeout <= 0:
raise RLException("sigint_timeout must be a positive number, received %f" % sigint_timeout)
if sigterm_timeout <= 0:
raise RLException("sigterm_timeout must be a positive number, received %f" % sigterm_timeout)

self.run_id = run_id

# In the future we should can separate the notion of a core
Expand All @@ -286,6 +297,8 @@ def __init__(self, run_id, config, server_uri=None, pmon=None, is_core=False, re
self.master_logger_level = master_logger_level
self.logger = logging.getLogger('roslaunch')
self.pm = pmon or start_process_monitor()
self.sigint_timeout = sigint_timeout
self.sigterm_timeout = sigterm_timeout

# wire in ProcessMonitor events to our listeners
# aggregator. We similarly wire in the remote events when we
Expand Down Expand Up @@ -402,7 +415,8 @@ def _launch_master(self):
printlog("auto-starting new master")
p = create_master_process(
self.run_id, m.type, get_ros_root(), m.get_port(), self.num_workers,
self.timeout, master_logger_level=self.master_logger_level)
self.timeout, master_logger_level=self.master_logger_level,
sigint_timeout=self.sigint_timeout, sigterm_timeout=self.sigterm_timeout)
self.pm.register_core_proc(p)
success = p.start()
if not success:
Expand Down Expand Up @@ -541,7 +555,7 @@ def launch_node(self, node, core=False):
master = self.config.master
import roslaunch.node_args
try:
process = create_node_process(self.run_id, node, master.uri)
process = create_node_process(self.run_id, node, master.uri, sigint_timeout=self.sigint_timeout, sigterm_timeout=self.sigterm_timeout)
except roslaunch.node_args.NodeParamsException as e:
self.logger.error(e)
printerrlog("ERROR: cannot launch node of type [%s/%s]: %s"%(node.package, node.type, str(e)))
Expand Down
48 changes: 36 additions & 12 deletions tools/roslaunch/src/roslaunch/nodeprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,16 @@
import logging
_logger = logging.getLogger("roslaunch")

_TIMEOUT_SIGINT = 15.0 #seconds
_TIMEOUT_SIGTERM = 2.0 #seconds
DEFAULT_TIMEOUT_SIGINT = 15.0 #seconds
DEFAULT_TIMEOUT_SIGTERM = 2.0 #seconds

_counter = 0
def _next_counter():
global _counter
_counter += 1
return _counter

def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False):
def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS, timeout=None, master_logger_level=False, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM):
"""
Launch a master
@param type_: name of master executable (currently just Master.ZENMASTER)
Expand All @@ -77,10 +77,14 @@ def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS
@type num_workers: int
@param timeout: socket timeout for connections.
@type timeout: float
@raise RLException: if type_ or port is invalid
@param master_logger_level: rosmaster.master logger debug level
@type master_logger_level=: str or False
"""
@param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds).
@type sigint_timeout: float
@param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds).
@type sigterm_timeout: float
@raise RLException: if type_ or port is invalid or sigint_timeout or sigterm_timeout are nonpositive.
"""
if port < 1 or port > 65535:
raise RLException("invalid port assignment: %s"%port)

Expand All @@ -100,9 +104,10 @@ def create_master_process(run_id, type_, ros_root, port, num_workers=NUM_WORKERS

_logger.info("process[master]: launching with args [%s]"%args)
log_output = False
return LocalProcess(run_id, package, 'master', args, os.environ, log_output, None, required=True)
return LocalProcess(run_id, package, 'master', args, os.environ, log_output, None, required=True,
sigint_timeout=sigint_timeout, sigterm_timeout=sigterm_timeout)

def create_node_process(run_id, node, master_uri):
def create_node_process(run_id, node, master_uri, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM):
"""
Factory for generating processes for launching local ROS
nodes. Also registers the process with the L{ProcessMonitor} so that
Expand All @@ -114,9 +119,14 @@ def create_node_process(run_id, node, master_uri):
@type node: L{Node}
@param master_uri: API URI for master node
@type master_uri: str
@param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds).
@type sigint_timeout: float
@param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds).
@type sigterm_timeout: float
@return: local process instance
@rtype: L{LocalProcess}
@raise NodeParamsException: If the node's parameters are improperly specific
@raise RLException: If sigint_timeout or sigterm_timeout are nonpositive.
"""
_logger.info("create_node_process: package[%s] type[%s] machine[%s] master_uri[%s]", node.package, node.type, node.machine, master_uri)
# check input args
Expand Down Expand Up @@ -150,7 +160,8 @@ def create_node_process(run_id, node, master_uri):
_logger.debug('process[%s]: returning LocalProcess wrapper')
return LocalProcess(run_id, node.package, name, args, env, log_output, \
respawn=node.respawn, respawn_delay=node.respawn_delay, \
required=node.required, cwd=node.cwd)
required=node.required, cwd=node.cwd, \
sigint_timeout=sigint_timeout, sigterm_timeout=sigterm_timeout)


class LocalProcess(Process):
Expand All @@ -160,7 +171,7 @@ class LocalProcess(Process):

def __init__(self, run_id, package, name, args, env, log_output,
respawn=False, respawn_delay=0.0, required=False, cwd=None,
is_node=True):
is_node=True, sigint_timeout=DEFAULT_TIMEOUT_SIGINT, sigterm_timeout=DEFAULT_TIMEOUT_SIGTERM):
"""
@param run_id: unique run ID for this roslaunch. Used to
generate log directory location. run_id may be None if this
Expand All @@ -184,9 +195,20 @@ def __init__(self, run_id, package, name, args, env, log_output,
@type cwd: str
@param is_node: (optional) if True, process is ROS node and accepts ROS node command-line arguments. Default: True
@type is_node: False
@param sigint_timeout: The SIGINT timeout used when killing nodes (in seconds).
@type sigint_timeout: float
@param sigterm_timeout: The SIGTERM timeout used when killing nodes if SIGINT does not stop the node (in seconds).
@type sigterm_timeout: float
@raise RLException: If sigint_timeout or sigterm_timeout are nonpositive.
"""
super(LocalProcess, self).__init__(package, name, args, env,
respawn, respawn_delay, required)

if sigint_timeout <= 0:
raise RLException("sigint_timeout must be a positive number, received %f" % sigint_timeout)
dirk-thomas marked this conversation as resolved.
Show resolved Hide resolved
if sigterm_timeout <= 0:
raise RLException("sigterm_timeout must be a positive number, received %f" % sigterm_timeout)

self.run_id = run_id
self.popen = None
self.log_output = log_output
Expand All @@ -196,6 +218,8 @@ def __init__(self, run_id, package, name, args, env, log_output,
self.log_dir = None
self.pid = -1
self.is_node = is_node
self.sigint_timeout = sigint_timeout
self.sigterm_timeout = sigterm_timeout

# NOTE: in the future, info() is going to have to be sufficient for relaunching a process
def get_info(self):
Expand Down Expand Up @@ -406,15 +430,15 @@ def _stop_unix(self, errors):
_logger.info("[%s] sending SIGINT to pgid [%s]", self.name, pgid)
os.killpg(pgid, signal.SIGINT)
_logger.info("[%s] sent SIGINT to pgid [%s]", self.name, pgid)
timeout_t = time.time() + _TIMEOUT_SIGINT
timeout_t = time.time() + self.sigint_timeout
retcode = self.popen.poll()
while time.time() < timeout_t and retcode is None:
time.sleep(0.1)
retcode = self.popen.poll()
# Escalate non-responsive process
if retcode is None:
printerrlog("[%s] escalating to SIGTERM"%self.name)
timeout_t = time.time() + _TIMEOUT_SIGTERM
timeout_t = time.time() + self.sigterm_timeout
os.killpg(pgid, signal.SIGTERM)
_logger.info("[%s] sent SIGTERM to pgid [%s]"%(self.name, pgid))
retcode = self.popen.poll()
Expand Down Expand Up @@ -474,7 +498,7 @@ def _stop_win32(self, errors):
_logger.info("[%s] running taskkill pid tree [%s]", self.name, pid)
subprocess.call(['taskkill', '/F', '/T', '/PID', str(pid)])
_logger.info("[%s] run taskkill pid tree [%s]", self.name, pid)
timeout_t = time.time() + _TIMEOUT_SIGINT
timeout_t = time.time() + self.sigint_timeout
retcode = self.popen.poll()
while time.time() < timeout_t and retcode is None:
time.sleep(0.1)
Expand Down
Loading