Skip to content

Commit

Permalink
improvement(remoter): re-runing command on retriable exceptions
Browse files Browse the repository at this point in the history
We can safely retry the command when it didn't run on remote.
This situation can happen when SSH/channel connection was not
successfully initiated.
Related issues: scylladb#1793, scylladb#1631, scylladb#1815
  • Loading branch information
Bentsi Magidovich committed Feb 18, 2020
1 parent a98fb4c commit a654cc8
Showing 1 changed file with 38 additions and 5 deletions.
43 changes: 38 additions & 5 deletions sdcm/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from invoke.exceptions import UnexpectedExit, Failure
from invoke.watchers import StreamWatcher, Responder
from paramiko import SSHException, RSAKey
from paramiko.ssh_exception import NoValidConnectionsError
from paramiko.ssh_exception import NoValidConnectionsError, AuthenticationException

from sdcm.log import SDCMAdapter
from sdcm.utils.common import retrying
Expand All @@ -53,10 +53,28 @@ class SSHConnectTimeoutError(Exception):
"""


NETWORK_EXCEPTIONS = (NoValidConnectionsError, SSHException, SSHConnectTimeoutError, EOFError,
NETWORK_EXCEPTIONS = (NoValidConnectionsError, SSHException, SSHConnectTimeoutError, EOFError, AuthenticationException,
ConnectionResetError, ConnectionAbortedError, ConnectionError, ConnectionRefusedError)


class RetriableNetworkException(Exception):
"""
SSH protocol exception that can be safely retried
"""


def is_exception_retriable(err_str):
"""Check that exception can be safely retried"""
exceptions = ("Authentication timeout", "Error reading SSH protocol banner", "Timeout opening channel",
"Unable to open channel", "Key-exchange timed out waiting for key negotiation",
"ssh_exchange_identification: Connection closed by remote host",
)
for exception_str in exceptions:
if exception_str in err_str:
return True
return False


def _scp_remote_escape(filename):
"""
Escape special chars for SCP use.
Expand Down Expand Up @@ -252,6 +270,7 @@ def ssh_debug_cmd(self):
return "SSH access -> 'ssh %s@%s'" % (self.user,
self.hostname)

@retrying(n=3, sleep_time=5, allowed_exceptions=(RetriableNetworkException, ))
def run(self, cmd, timeout=None, ignore_status=False, # pylint: disable=too-many-arguments
verbose=True, new_session=False, log_file=None, retry=1, watchers=None):

Expand Down Expand Up @@ -280,9 +299,11 @@ def _run():
setattr(result, 'duration', time.time() - start_time)
setattr(result, 'exit_status', result.exited)
return result
except SSHException as ex:
except NETWORK_EXCEPTIONS as ex:
LOGGER.error(ex)
self._ssh_is_up.clear()
if is_exception_retriable(str(ex)):
raise RetriableNetworkException(str(ex))
raise
except Exception as details: # pylint: disable=broad-except
if hasattr(details, "result"):
Expand Down Expand Up @@ -327,6 +348,7 @@ def stop_ssh_up_thread(self):
self._ssh_up_thread.join(5)
self._ssh_up_thread = None

@retrying(n=3, sleep_time=5, allowed_exceptions=(RetriableNetworkException, ))
def receive_files(self, src, dst, delete_dst=False, # pylint: disable=too-many-arguments
preserve_perm=True, preserve_symlinks=False):
"""
Expand Down Expand Up @@ -392,7 +414,12 @@ def receive_files(self, src, dst, delete_dst=False, # pylint: disable=too-many-
escape=False)
local_dest = quote(dst)
scp = self._make_scp_cmd([remote_source], local_dest)
result = LocalCmdRunner().run(scp)
try:
result = LocalCmdRunner().run(scp)
except UnexpectedExit as ex:
if is_exception_retriable(ex.result.stderr):
raise RetriableNetworkException(ex.result.stderr)
raise
self.log.info("Command {} with status {}".format(result.command, result.exited))
if result.exited:
files_received = False
Expand All @@ -409,6 +436,7 @@ def receive_files(self, src, dst, delete_dst=False, # pylint: disable=too-many-
self._set_umask_perms(dst)
return files_received

@retrying(n=3, sleep_time=5, allowed_exceptions=(RetriableNetworkException, ))
def send_files(self, src, dst, delete_dst=False, # pylint: disable=too-many-arguments,too-many-statements
preserve_symlinks=False, verbose=False):
"""
Expand Down Expand Up @@ -499,7 +527,12 @@ def send_files(self, src, dst, delete_dst=False, # pylint: disable=too-many-arg
local_sources = self._make_rsync_compatible_source(src, True)
if local_sources:
scp = self._make_scp_cmd(local_sources, remote_dest)
result = LocalCmdRunner().run(scp)
try:
result = LocalCmdRunner().run(scp)
except UnexpectedExit as ex:
if is_exception_retriable(ex.result.stderr):
raise RetriableNetworkException(ex.result.stderr)
raise
self.log.info('Command {} with status {}'.format(result.command, result.exited))
if result.exited:
files_sent = False
Expand Down

0 comments on commit a654cc8

Please sign in to comment.