Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry tar operations after 'Unexpected EOF in archive' during node setup #2891

Merged
merged 41 commits into from
Aug 15, 2023
Merged
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
da72c37
Update version to dummy 1.0.0.0'
maddieford Nov 8, 2022
59dbd22
Revert version change
maddieford Nov 8, 2022
633a826
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Nov 21, 2022
14a743f
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Dec 8, 2022
54ea0f3
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Jan 10, 2023
e79c4c5
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Feb 8, 2023
498b612
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Feb 14, 2023
1e269f4
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Mar 13, 2023
7b49e76
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Mar 24, 2023
0a426cc
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Apr 6, 2023
17fbf6a
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Apr 7, 2023
995cbb9
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Apr 13, 2023
eaadc83
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Apr 24, 2023
fb03e07
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Apr 27, 2023
6a8e0d6
Merge remote-tracking branch 'upstream/develop' into develop
maddieford May 19, 2023
b4951c8
Merge branch 'develop' of github.com:Azure/WALinuxAgent into develop
maddieford Jun 6, 2023
c6d9300
Merge branch 'develop' of github.com:maddieford/WALinuxAgent into dev…
maddieford Jun 23, 2023
f650fe4
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Jul 10, 2023
a10bdfa
Merge branch 'develop' of github.com:maddieford/WALinuxAgent into dev…
maddieford Jul 10, 2023
50dcec5
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Jul 18, 2023
b87db37
merge changes
maddieford Jul 20, 2023
56e84c1
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Jul 25, 2023
fd5d0f5
Merge remote-tracking branch 'upstream/develop' into develop
maddieford Aug 2, 2023
67ad5b0
Capture output of the copy commands during setup
maddieford Aug 2, 2023
d49fe22
Add verbose to copy command
maddieford Aug 2, 2023
a1972b4
Update typing for copy to node methods
maddieford Aug 3, 2023
013ccac
Print contents of tar before extracting
maddieford Aug 3, 2023
e017ed8
Print contents of tar before extracting
maddieford Aug 3, 2023
3a041de
Print contents of tar before extracting
maddieford Aug 3, 2023
5deea8c
Print contents of tar before extracting
maddieford Aug 3, 2023
83cf5e3
Retry copying tarball if contents on test node do not match
maddieford Aug 3, 2023
6cafb43
Revert copy method def
maddieford Aug 3, 2023
530a950
Revert copy method def
maddieford Aug 3, 2023
5b0f0b1
Merge branch 'develop' into tar_eof
maddieford Aug 14, 2023
cbd8321
Catch EOF error
maddieford Aug 14, 2023
531cf58
Retry tar operations if we see failure
maddieford Aug 14, 2023
1447fb3
Revert target_path
maddieford Aug 14, 2023
a88a2ab
Remove accidental copy of exception
maddieford Aug 14, 2023
d408f4f
Remove blank line
maddieford Aug 14, 2023
68e1c10
tar cvf and copy commands overwrite
maddieford Aug 14, 2023
a229177
Merge branch 'develop' into tar_eof
maddieford Aug 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 56 additions & 33 deletions tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.logging import set_current_thread_log
from tests_e2e.tests.lib.agent_log import AgentLogRecord
from tests_e2e.tests.lib.shell import run_command
from tests_e2e.tests.lib.shell import run_command, CommandError
from tests_e2e.tests.lib.ssh_client import SshClient


Expand Down Expand Up @@ -304,29 +304,6 @@ def _setup_node(self, install_test_agent: bool) -> None:
log.info("Downloading %s to %s", pypy_download, pypy_path)
run_command(["wget", pypy_download, "-O", pypy_path])

#
# Create a tarball with the files we need to copy to the test node. The tarball includes two directories:
#
# * bin - Executables file (Bash and Python scripts)
# * lib - Library files (Python modules)
#
# After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'.
#
# Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib.
#
tarball_path: Path = Path("/tmp/waagent.tar")
log.info("Creating %s with the files need on the test node", tarball_path)
log.info("Adding orchestrator/scripts")
command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path))
log.info("%s\n%s", command, run_command(command, shell=True))
log.info("Adding tests/scripts")
command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path))
log.info("%s\n%s", command, run_command(command, shell=True))
log.info("Adding tests/lib")
command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path))
log.info("%s\n%s", command, run_command(command, shell=True))
log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)]))

#
# Cleanup the test node (useful for developer runs)
#
Expand All @@ -335,24 +312,70 @@ def _setup_node(self, install_test_agent: bool) -> None:
self.context.ssh_client.run_command("rm -rvf ~/{bin,lib,tmp}", use_sudo=True)

#
# Copy the tarball, Pypy and the test Agent to the test node
# Copy Pypy and the test Agent to the test node
#
target_path = Path("~")/"tmp"
self.context.ssh_client.run_command(f"mkdir {target_path}")
log.info("Copying %s to %s:%s", tarball_path, self.context.node.name, target_path)
self.context.ssh_client.copy_to_node(tarball_path, target_path)
log.info("Copying %s to %s:%s", pypy_path, self.context.node.name, target_path)
self.context.ssh_client.copy_to_node(pypy_path, target_path)
agent_package_path: Path = self._get_agent_package_path()
log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, target_path)
self.context.ssh_client.copy_to_node(agent_package_path, target_path)

#
# Extract the tarball and execute the install scripts
#
log.info('Installing tools on the test node')
command = f"tar xvf {target_path/tarball_path.name} && ~/bin/install-tools"
log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command))
# tar commands sometimes fail with 'tar: Unexpected EOF in archive' error. Retry tarball creation, copy, and
# extraction if we hit this error
tar_retries = 3
while tar_retries > 0:
try:
#
# Create a tarball with the files we need to copy to the test node. The tarball includes two directories:
#
# * bin - Executables file (Bash and Python scripts)
# * lib - Library files (Python modules)
#
# After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'.
#
# Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib.
#
tarball_path: Path = Path("/tmp/waagent.tar")
log.info("Creating %s with the files need on the test node", tarball_path)
log.info("Adding orchestrator/scripts")
command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path))
log.info("%s\n%s", command, run_command(command, shell=True))
log.info("Adding tests/scripts")
command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path))
log.info("%s\n%s", command, run_command(command, shell=True))
log.info("Adding tests/lib")
command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path))
log.info("%s\n%s", command, run_command(command, shell=True))
log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)]))

#
# Copy the tarball to the test node
#
log.info("Copying %s to %s:%s", tarball_path, self.context.node.name, target_path)
self.context.ssh_client.copy_to_node(tarball_path, target_path)

#
# Extract the tarball and execute the install scripts
#
log.info('Installing tools on the test node')
command = f"tar xvf {target_path/tarball_path.name} && ~/bin/install-tools"
log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command))

# Tarball creation and extraction was successful - no need to retry
tar_retries = 0

except CommandError as error:
if "tar: Unexpected EOF in archive" in error.stderr:
tar_retries -= 1
# Log the error with traceback to see which tar operation failed
log.info(f"Tarball creation or extraction failed: \n{error}")
# Retry tar operations
if tar_retries > 0:
nagworld9 marked this conversation as resolved.
Show resolved Hide resolved
log.info("Retrying tarball creation and extraction...")
else:
raise Exception(f"Unexpected error when creating or extracting tarball during node setup: {error}")

if self.context.is_vhd:
log.info("Using a VHD; will not install the Test Agent.")
Expand Down