diff --git a/tools/roslaunch/src/roslaunch/core.py b/tools/roslaunch/src/roslaunch/core.py index fafb021911..d12090af58 100644 --- a/tools/roslaunch/src/roslaunch/core.py +++ b/tools/roslaunch/src/roslaunch/core.py @@ -419,13 +419,15 @@ class Node(object): """ __slots__ = ['package', 'type', 'name', 'namespace', \ 'machine_name', 'machine', 'args', 'respawn', \ + 'respawn_delay', \ 'remap_args', 'env_args',\ 'process_name', 'output', 'cwd', 'launch_prefix', 'required', 'filename'] def __init__(self, package, node_type, name=None, namespace='/', \ - machine_name=None, args='', respawn=False, \ + machine_name=None, args='', \ + respawn=False, respawn_delay=0.0, \ remap_args=None,env_args=None, output=None, cwd=None, \ launch_prefix=None, required=False, filename=''): """ @@ -436,6 +438,7 @@ def __init__(self, package, node_type, name=None, namespace='/', \ :param machine_name: name of machine to run node on, ``str`` :param args: argument string to pass to node executable, ``str`` :param respawn: if True, respawn node if it dies, ``bool`` + :param respawn: if respawn is True, respawn node after delay, ``float`` :param remap_args: list of [(from, to)] remapping arguments, ``[(str, str)]`` :param env_args: list of [(key, value)] of additional environment vars to set for node, ``[(str, str)]`` @@ -454,6 +457,7 @@ def __init__(self, package, node_type, name=None, namespace='/', \ self.namespace = rosgraph.names.make_global_ns(namespace or '/') self.machine_name = machine_name or None self.respawn = respawn + self.respawn_delay = respawn_delay self.args = args or '' self.remap_args = remap_args or [] self.env_args = env_args or [] @@ -514,6 +518,7 @@ def xmlattrs(self): ('output', self.output), ('cwd', cwd_str), ('respawn', self.respawn), #not valid on + ('respawn_delay', self.respawn_delay), # not valid on ('name', name_str), ('launch-prefix', self.launch_prefix), ('required', self.required), @@ -616,7 +621,8 @@ def xmlattrs(self): to what it was initialized with, though the properties are the same """ attrs = Node.xmlattrs(self) - attrs = [(a, v) for (a, v) in attrs if a != 'respawn'] + attrs = [(a, v) for (a, v) in attrs if a not in ['respawn', \ + 'respawn_delay']] attrs.append(('test-name', self.test_name)) if self.retry: diff --git a/tools/roslaunch/src/roslaunch/nodeprocess.py b/tools/roslaunch/src/roslaunch/nodeprocess.py index 36fc712905..441d0375e8 100644 --- a/tools/roslaunch/src/roslaunch/nodeprocess.py +++ b/tools/roslaunch/src/roslaunch/nodeprocess.py @@ -135,7 +135,9 @@ def create_node_process(run_id, node, master_uri): # default for node.output not set is 'log' log_output = node.output != 'screen' _logger.debug('process[%s]: returning LocalProcess wrapper') - return LocalProcess(run_id, node.package, name, args, env, log_output, respawn=node.respawn, required=node.required, cwd=node.cwd) + return LocalProcess(run_id, node.package, name, args, env, log_output, \ + respawn=node.respawn, respawn_delay=node.respawn_delay, \ + required=node.required, cwd=node.cwd) class LocalProcess(Process): @@ -143,7 +145,9 @@ class LocalProcess(Process): Process launched on local machine """ - def __init__(self, run_id, package, name, args, env, log_output, respawn=False, required=False, cwd=None, is_node=True): + def __init__(self, run_id, package, name, args, env, log_output, + respawn=False, respawn_delay=0.0, required=False, cwd=None, + is_node=True): """ @param run_id: unique run ID for this roslaunch. Used to generate log directory location. run_id may be None if this @@ -161,12 +165,15 @@ def __init__(self, run_id, package, name, args, env, log_output, respawn=False, @type log_output: bool @param respawn: respawn process if it dies (default is False) @type respawn: bool + @param respawn_delay: respawn process after a delay + @type respawn_delay: float @param cwd: working directory of process, or None @type cwd: str @param is_node: (optional) if True, process is ROS node and accepts ROS node command-line arguments. Default: True @type is_node: False """ - super(LocalProcess, self).__init__(package, name, args, env, respawn, required) + super(LocalProcess, self).__init__(package, name, args, env, + respawn, respawn_delay, required) self.run_id = run_id self.popen = None self.log_output = log_output @@ -322,9 +329,13 @@ def is_alive(self): if not self.started: #not started yet return True if self.stopped or self.popen is None: + if self.time_of_death is None: + self.time_of_death = time.time() return False self.exit_code = self.popen.poll() if self.exit_code is not None: + if self.time_of_death is None: + self.time_of_death = time.time() return False return True diff --git a/tools/roslaunch/src/roslaunch/pmon.py b/tools/roslaunch/src/roslaunch/pmon.py index 828209891c..b20a90485b 100644 --- a/tools/roslaunch/src/roslaunch/pmon.py +++ b/tools/roslaunch/src/roslaunch/pmon.py @@ -183,17 +183,20 @@ class Process(object): for signal handlers to register properly. """ - def __init__(self, package, name, args, env, respawn=False, required=False): + def __init__(self, package, name, args, env, + respawn=False, respawn_delay=0.0, required=False): self.package = package self.name = name self.args = args self.env = env self.respawn = respawn + self.respawn_delay = respawn_delay self.required = required self.lock = Lock() self.exit_code = None # for keeping track of respawning self.spawn_count = 0 + self.time_of_death = None _init_signal_handlers() @@ -217,6 +220,7 @@ def get_info(self): 'name': self.name, 'alive': self.is_alive(), 'respawn': self.respawn, + 'respawn_delay': self.respawn_delay, 'required': self.required, } if self.exit_code is not None: @@ -224,11 +228,26 @@ def get_info(self): return info def start(self): + self.time_of_death = None self.spawn_count += 1 def is_alive(self): + if self.time_of_death is None: + self.time_of_death = time.time() return False + def should_respawn(self): + """ + @return: False if process should not respawn + floating point seconds until respawn otherwise + """ + if not self.respawn: + return False + if self.time_of_death is None: + if self.is_alive(): + return False + return (self.time_of_death + self.respawn_delay) - time.time() + def stop(self, errors=None): """ Stop the process. Record any significant error messages in the errors parameter @@ -254,7 +273,8 @@ class DeadProcess(Process): container allows us to delete the actual Process but still maintain the metadata """ def __init__(self, p): - super(DeadProcess, self).__init__(p.package, p.name, p.args, p.env, p.respawn) + super(DeadProcess, self).__init__(p.package, p.name, p.args, p.env, + p.respawn, p.respawn_delay) self.exit_code = p.exit_code self.lock = None self.spawn_count = p.spawn_count @@ -538,15 +558,15 @@ def _run(self): for p in procs: try: if not p.is_alive(): - logger.debug("Process[%s] has died, respawn=%s, required=%s, exit_code=%s",p.name, p.respawn, p.required, p.exit_code) + logger.debug("Process[%s] has died, respawn=%s, required=%s, exit_code=%s", + p.name, + "True(%f)" % p.respawn_delay if p.respawn else p.respawn, + p.required, p.exit_code) exit_code_str = p.get_exit_description() - if p.respawn: - printlog_bold("[%s] %s\nrespawning..."%(p.name, exit_code_str)) - respawn.append(p) - elif p.required: + if p.required: printerrlog('='*80+"REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n"%(p.name, exit_code_str)+'='*80) self.is_shutdown = True - else: + elif not p in respawn: if p.exit_code: printerrlog("[%s] %s"%(p.name, exit_code_str)) else: @@ -566,13 +586,15 @@ def _run(self): break #stop polling for d in dead: try: - self.unregister(d) - # stop process, don't accumulate errors - d.stop([]) - - # save process data to dead list - with plock: - self.dead_list.append(DeadProcess(d)) + if d.should_respawn(): + respawn.append(d) + else: + self.unregister(d) + # stop process, don't accumulate errors + d.stop([]) + # save process data to dead list + with plock: + self.dead_list.append(DeadProcess(d)) except: logger.error(traceback.format_exc()) @@ -582,18 +604,23 @@ def _run(self): printlog("all processes on machine have died, roslaunch will exit") self.is_shutdown = True del dead[:] + _respawn=[] for r in respawn: try: if self.is_shutdown: break - printlog("[%s] restarting process"%r.name) - # stop process, don't accumulate errors - r.stop([]) - r.start() + if r.should_respawn() <= 0.0: + printlog("[%s] restarting process" % r.name) + # stop process, don't accumulate errors + r.stop([]) + r.start() + else: + # not ready yet, keep it around + _respawn.append(r) except: traceback.print_exc() logger.error("Restart failed %s",traceback.format_exc()) - del respawn[:] + respawn = _respawn time.sleep(0.1) #yield thread #moved this to finally block of _post_run #self._post_run() #kill all processes diff --git a/tools/roslaunch/src/roslaunch/xmlloader.py b/tools/roslaunch/src/roslaunch/xmlloader.py index 7a683cbfb9..671461517f 100644 --- a/tools/roslaunch/src/roslaunch/xmlloader.py +++ b/tools/roslaunch/src/roslaunch/xmlloader.py @@ -127,6 +127,30 @@ def _bool_attr(v, default, label): else: raise XmlParseException("invalid bool value for %s: %s"%(label, v)) +def _float_attr(v, default, label): + """ + Validate float xml attribute. + @param v: parameter value or None if no value provided + @type v: any + @param default: default value + @type default: float + @param label: parameter name/label + @type label: str + @return: float value for attribute + @rtype: float + @raise XmlParseException: if v is not in correct range or is empty. + """ + if v is None: + return default + if not v: + raise XmlParseException("bool value for %s must be non-empty"%(label)) + try: + x = float(v) + except ValueError: + raise XmlParseException("invalid float value for %s: %s"%(label, v)) + return x + + # maps machine 'default' attribute to Machine default property _is_default = {'true': True, 'false': False, 'never': False } # maps machine 'default' attribute to Machine assignable property @@ -284,7 +308,7 @@ def _test_attrs(self, tag, context): @return: test_name, time_limit @rtype: str, int """ - for attr in ['respawn', 'output']: + for attr in ['respawn', 'respawn_delay', 'output']: if tag.hasAttribute(attr): raise XmlParseException(" tags cannot have '%s' attribute"%attr) @@ -306,7 +330,9 @@ def _test_attrs(self, tag, context): return test_name, time_limit, retry - NODE_ATTRS = ['pkg', 'type', 'machine', 'name', 'args', 'output', 'respawn', 'cwd', NS, CLEAR_PARAMS, 'launch-prefix', 'required'] + NODE_ATTRS = ['pkg', 'type', 'machine', 'name', 'args', 'output', \ + 'respawn', 'respawn_delay', 'cwd', NS, CLEAR_PARAMS, \ + 'launch-prefix', 'required'] TEST_ATTRS = NODE_ATTRS + ['test-name','time-limit', 'retry'] @ifunless @@ -347,8 +373,10 @@ def _node_tag(self, tag, context, ros_config, default_machine, is_test=False, ve pkg, node_type = self.reqd_attrs(tag, context, ('pkg', 'type')) # optional attributes - machine, args, output, respawn, cwd, launch_prefix, required = \ - self.opt_attrs(tag, context, ('machine', 'args', 'output', 'respawn', 'cwd', 'launch-prefix', 'required')) + machine, args, output, respawn, respawn_delay, cwd, launch_prefix, \ + required = self.opt_attrs(tag, context, ('machine', 'args', + 'output', 'respawn', 'respawn_delay', 'cwd', + 'launch-prefix', 'required')) if tag.hasAttribute('machine') and not len(machine.strip()): raise XmlParseException(" 'machine' must be non-empty: [%s]"%machine) if not machine and default_machine: @@ -356,6 +384,7 @@ def _node_tag(self, tag, context, ros_config, default_machine, is_test=False, ve # validate respawn, required required, respawn = [_bool_attr(*rr) for rr in ((required, False, 'required'),\ (respawn, False, 'respawn'))] + respawn_delay = _float_attr(respawn_delay, 0.0, 'respawn_delay') # each node gets its own copy of arguments, which # it inherits from its parent @@ -394,7 +423,8 @@ def _node_tag(self, tag, context, ros_config, default_machine, is_test=False, ve if not is_test: return Node(pkg, node_type, name=name, namespace=child_ns.ns, machine_name=machine, - args=args, respawn=respawn, + args=args, respawn=respawn, + respawn_delay=respawn_delay, remap_args=remap_context.remap_args(), env_args=env_context.env_args, output=output, cwd=cwd, launch_prefix=launch_prefix, required=required, filename=context.filename) diff --git a/tools/roslaunch/test/unit/test_roslaunch_core.py b/tools/roslaunch/test/unit/test_roslaunch_core.py index 07c57b3619..9307cb7f38 100644 --- a/tools/roslaunch/test/unit/test_roslaunch_core.py +++ b/tools/roslaunch/test/unit/test_roslaunch_core.py @@ -83,7 +83,11 @@ def test_Node(): assert n.package == 'package' assert n.type == 'node_type' assert n.xmltype() == 'node' - assert n.xmlattrs() == [('pkg', 'package'), ('type', 'node_type'), ('machine', None), ('ns', '/'), ('args', ''), ('output', None), ('cwd', None), ('respawn', False), ('name', None), ('launch-prefix', None), ('required', False)], n.xmlattrs() + assert n.xmlattrs() == [('pkg', 'package'), ('type', 'node_type'), + ('machine', None), ('ns', '/'), ('args', ''), ('output', None), + ('cwd', None), ('respawn', False), ('respawn_delay', 0.0), + ('name', None), ('launch-prefix', None), ('required', False)], \ + n.xmlattrs() assert n.output == None #tripwire for now diff --git a/tools/roslaunch/test/unit/test_roslaunch_pmon.py b/tools/roslaunch/test/unit/test_roslaunch_pmon.py index 26b90ac536..abca7cdba6 100644 --- a/tools/roslaunch/test/unit/test_roslaunch_pmon.py +++ b/tools/roslaunch/test/unit/test_roslaunch_pmon.py @@ -116,19 +116,41 @@ def __init__(self, package, name, args, env, respawn=False): self.stopped = False def stop(self, errors): self.stopped = True - + class RespawnOnceProcessMock(ProcessMock): - def __init__(self, package, name, args, env, respawn=False): + def __init__(self, package, name, args, env, respawn=True): super(ProcessMock, self).__init__(package, name, args, env, respawn) self.spawn_count = 0 def is_alive(self): + self.time_of_death = time.time() return False def start(self): self.spawn_count += 1 if self.spawn_count > 1: self.respawn = False + self.time_of_death = None + +class RespawnOnceWithDelayProcessMock(ProcessMock): + def __init__(self, package, name, args, env, respawn=True, + respawn_delay=1.0): + super(ProcessMock, self).__init__(package, name, args, env, respawn, + respawn_delay=respawn_delay) + self.spawn_count = 0 + self.respawn_interval = None + + def is_alive(self): + if self.time_of_death is None: + self.time_of_death = time.time() + return False + + def start(self): + self.spawn_count += 1 + if self.spawn_count > 1: + self.respawn = False + self.respawn_interval = time.time() - self.time_of_death + self.time_of_death = None ## Test roslaunch.server class TestRoslaunchPmon(unittest.TestCase): @@ -137,12 +159,13 @@ def setUp(self): self.pmon = roslaunch.pmon.ProcessMonitor() ## test all apis of Process instance. part coverage/sanity test - def _test_Process(self, p, package, name, args, env, respawn): + def _test_Process(self, p, package, name, args, env, respawn, respawn_delay): self.assertEquals(package, p.package) self.assertEquals(name, p.name) self.assertEquals(args, p.args) self.assertEquals(env, p.env) self.assertEquals(respawn, p.respawn) + self.assertEquals(respawn_delay, p.respawn_delay) self.assertEquals(0, p.spawn_count) self.assertEquals(None, p.exit_code) self.assert_(p.get_exit_description()) @@ -154,6 +177,7 @@ def _test_Process(self, p, package, name, args, env, respawn): self.assertEquals(args, info['args']) self.assertEquals(env, info['env']) self.assertEquals(respawn, info['respawn']) + self.assertEquals(respawn_delay, info['respawn_delay']) self.assertEquals(0, info['spawn_count']) self.failIf('exit_code' in info) @@ -184,14 +208,17 @@ def test_Process(self): args = [time.time(), time.time(), time.time()] env = { 'key': time.time(), 'key2': time.time() } - p = Process(package, name, args, env) - self._test_Process(p, package, name, args, env, False) - p = Process(package, name, args, env, True) - self._test_Process(p, package, name, args, env, True) - p = Process(package, name, args, env, False) - self._test_Process(p, package, name, args, env, False) - - def _test_DeadProcess(self, p0, package, name, args, env, respawn): + p = Process(package, name, args, env, 0.0) + self._test_Process(p, package, name, args, env, False, 0.0) + p = Process(package, name, args, env, True, 0.0) + self._test_Process(p, package, name, args, env, True, 0.0) + p = Process(package, name, args, env, False, 0.0) + self._test_Process(p, package, name, args, env, False, 0.0) + p = Process(package, name, args, env, True, 1.0) + self._test_Process(p, package, name, args, env, True, 1.0) + + def _test_DeadProcess(self, p0, package, name, args, env, respawn, + respawn_delay): from roslaunch.pmon import DeadProcess p0.exit_code = -1 dp = DeadProcess(p0) @@ -200,6 +227,7 @@ def _test_DeadProcess(self, p0, package, name, args, env, respawn): self.assertEquals(args, dp.args) self.assertEquals(env, dp.env) self.assertEquals(respawn, dp.respawn) + self.assertEquals(respawn_delay, dp.respawn_delay) self.assertEquals(0, dp.spawn_count) self.assertEquals(-1, dp.exit_code) self.failIf(dp.is_alive()) @@ -211,6 +239,7 @@ def _test_DeadProcess(self, p0, package, name, args, env, respawn): self.assertEquals(info0['args'], info['args']) self.assertEquals(info0['env'], info['env']) self.assertEquals(info0['respawn'], info['respawn']) + self.assertEquals(info0['respawn_delay'], info['respawn_delay']) self.assertEquals(0, info['spawn_count']) try: @@ -245,12 +274,14 @@ def test_DeadProcess(self): args = [time.time(), time.time(), time.time()] env = { 'key': time.time(), 'key2': time.time() } - p = Process(package, name, args, env) - self._test_DeadProcess(p, package, name, args, env, False) - p = Process(package, name, args, env, True) - self._test_DeadProcess(p, package, name, args, env, True) - p = Process(package, name, args, env, False) - self._test_DeadProcess(p, package, name, args, env, False) + p = Process(package, name, args, env, 0.0) + self._test_DeadProcess(p, package, name, args, env, False, 0.0) + p = Process(package, name, args, env, True, 0.0) + self._test_DeadProcess(p, package, name, args, env, True, 0.0) + p = Process(package, name, args, env, False, 0.0) + self._test_DeadProcess(p, package, name, args, env, False, 0.0) + p = Process(package, name, args, env, True, 1.0) + self._test_DeadProcess(p, package, name, args, env, True, 1.0) def test_start_shutdown_process_monitor(self): def failer(): @@ -411,6 +442,10 @@ def f(): p3 = RespawnOnceProcessMock('bar', 'name3', [], {}) pmon.register(p3) + # give pmon a process that wants to respawn once after a delay + p4 = RespawnOnceWithDelayProcessMock('bar', 'name4', [], {}) + pmon.register(p4) + # test assumptions about pmon's internal data structures # before we begin test self.assert_(p1 in pmon.procs) @@ -427,6 +462,12 @@ def f(): self.failIf(marker.marked, "pmon had to be externally killed") + self.failIf(p3.spawn_count < 2, "process did not respawn") + + self.failIf(p4.respawn_interval < p4.respawn_delay, + "Respawn delay not respected: %s %s" % (p4.respawn_interval, + p4.respawn_delay)) + # retest assumptions self.failIf(pmon.procs) self.assert_(pmon.is_shutdown) diff --git a/tools/roslaunch/test/unit/test_roslaunch_remote.py b/tools/roslaunch/test/unit/test_roslaunch_remote.py index 41c416fac1..3fbed6abd9 100644 --- a/tools/roslaunch/test/unit/test_roslaunch_remote.py +++ b/tools/roslaunch/test/unit/test_roslaunch_remote.py @@ -44,55 +44,57 @@ def test_remote_node_xml(self): # these are fairly brittle tests, but need to make sure there aren't regressions here Node = roslaunch.core.Node n = Node('pkg1', 'type1') - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) n = Node('pkg2', 'type2', namespace="/ns2/") - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) # machine_name should be a noop for remote xml n = Node('pkg3', 'type3', namespace="/ns3/", machine_name="machine3") - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) # test args n = Node('pkg4', 'type4', args="arg4a arg4b") - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) # test respawn n = Node('pkg5', 'type5', respawn=True) - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) + n = Node('pkg5', 'type5', respawn=True, respawn_delay=1.0) + self.assertEquals('\n', n.to_remote_xml()) n = Node('pkg6', 'type6', respawn=False) - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) # test remap_args n = Node('pkg6', 'type6', remap_args=[('from6a', 'to6a'), ('from6b', 'to6b')]) - self.assertEquals(""" + self.assertEquals(""" """, n.to_remote_xml()) # test env args n = Node('pkg7', 'type7', env_args=[('key7a', 'val7a'), ('key7b', 'val7b')]) - self.assertEquals(""" + self.assertEquals(""" """, n.to_remote_xml()) # test cwd n = Node('pkg8', 'type8', cwd='ROS_HOME') - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) n = Node('pkg9', 'type9', cwd='node') - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) # test output n = Node('pkg10', 'type10', output='screen') - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) n = Node('pkg11', 'type11', output='log') - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) # test launch-prefix n = Node('pkg12', 'type12', launch_prefix='xterm -e') - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) # test required n = Node('pkg13', 'type13', required=True) - self.assertEquals('\n', n.to_remote_xml()) + self.assertEquals('\n', n.to_remote_xml()) #test everything n = Node('pkg20', 'type20', namespace="/ns20/", machine_name="foo", remap_args=[('from20a', 'to20a'), ('from20b', 'to20b')], env_args=[('key20a', 'val20a'), ('key20b', 'val20b')], output="screen", cwd="ROS_HOME", respawn=True, args="arg20a arg20b", launch_prefix="nice", required=False) - self.assertEquals(""" + self.assertEquals("""