Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added retries for agent cgroups test #3075

Merged
merged 4 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 20 additions & 16 deletions tests_e2e/tests/lib/cgroup_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION
from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false

BASE_CGROUP = '/sys/fs/cgroup'
AGENT_CGROUP_NAME = 'WALinuxAgent'
Expand Down Expand Up @@ -93,23 +94,26 @@ def verify_agent_cgroup_assigned_correctly():
This method checks agent is running and assigned to the correct cgroup using service status output
"""
log.info("===== Verifying the daemon and the agent are assigned to the same correct cgroup using systemd")
service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()])
log.info("Agent service status output:\n%s", service_status)
is_active = False
is_cgroup_assigned = False
cgroup_mount_path = get_agent_cgroup_mount_path()
is_active_pattern = re.compile(r".*Active:\s+active.*")

for line in service_status.splitlines():
if re.match(is_active_pattern, line):
is_active = True
elif cgroup_mount_path in line:
is_cgroup_assigned = True

if not is_active:
fail('walinuxagent service was not active/running. Service status:{0}'.format(service_status))
if not is_cgroup_assigned:
fail('walinuxagent service was not assigned to the expected cgroup:{0}'.format(cgroup_mount_path))
service_status = ""

def check_agent_service_cgroup():
is_active = False
is_cgroup_assigned = False
service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()])
log.info("Agent service status output:\n%s", service_status)
is_active_pattern = re.compile(r".*Active:\s+active.*")

for line in service_status.splitlines():
if re.match(is_active_pattern, line):
is_active = True
elif cgroup_mount_path in line:
is_cgroup_assigned = True

return is_active and is_cgroup_assigned

if not retry_if_false(check_agent_service_cgroup):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add comments on why the retry is needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed

fail('walinuxagent service was not assigned to the expected cgroup:{0}. Current agent status:{1}'.format(cgroup_mount_path, service_status))

log.info("Successfully verified the agent cgroup assigned correctly by systemd\n")

Expand Down
23 changes: 13 additions & 10 deletions tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,22 +61,25 @@ def verify_agent_cgroup_created_on_file_system():
"""
log.info("===== Verifying the agent cgroup paths exist on file system")
agent_cgroup_mount_path = get_agent_cgroup_mount_path()
all_agent_cgroup_controllers_path_exist = True
log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path)

missing_agent_cgroup_controllers_path = []
verified_agent_cgroup_controllers_path = []

log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path)
def is_agent_cgroup_controllers_path_exist():
all_controllers_path_exist = True

for controller in AGENT_CONTROLLERS:
agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:])
for controller in AGENT_CONTROLLERS:
agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:])

if not os.path.exists(agent_controller_path):
all_agent_cgroup_controllers_path_exist = False
missing_agent_cgroup_controllers_path.append(agent_controller_path)
else:
verified_agent_cgroup_controllers_path.append(agent_controller_path)
if not os.path.exists(agent_controller_path):
all_controllers_path_exist = False
missing_agent_cgroup_controllers_path.append(agent_controller_path)
else:
verified_agent_cgroup_controllers_path.append(agent_controller_path)
return all_controllers_path_exist

if not all_agent_cgroup_controllers_path_exist:
if not retry_if_false(is_agent_cgroup_controllers_path_exist):
fail("Agent's cgroup paths couldn't be found on file system. Missing agent cgroups path :{0}.\n Verified agent cgroups path:{1}".format(missing_agent_cgroup_controllers_path, verified_agent_cgroup_controllers_path))

log.info('Verified all agent cgroup paths are present.\n {0}'.format(verified_agent_cgroup_controllers_path))
Expand Down
Loading