Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move setupslice after cgroupsv2 check, remove unit file for log collector and remove fiirewall daemon-reload #3223

Merged
merged 7 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 9 additions & 21 deletions azurelinuxagent/ga/cgroupconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,6 @@
LOGCOLLECTOR_SLICE = "azure-walinuxagent-logcollector.slice"
# More info on resource limits properties in systemd here:
# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/resource_management_guide/sec-modifying_control_groups
_LOGCOLLECTOR_SLICE_CONTENTS_FMT = """
[Unit]
Description=Slice for Azure VM Agent Periodic Log Collector
DefaultDependencies=no
Before=slices.target
[Slice]
CPUAccounting=yes
CPUQuota={cpu_quota}
MemoryAccounting=yes
"""
LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2 = "5%"
LOGCOLLECTOR_MEMORY_THROTTLE_LIMIT_FOR_V2 = "170M"
LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2 = 10
Expand Down Expand Up @@ -181,6 +171,11 @@ def initialize(self):
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
return

# TODO: Move this and systemd system check to cgroups_supported logic above
if self.using_cgroup_v2():
log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2")
return

# We check the agent unit 'Slice' property before setting up azure.slice. This check is done first
# because the agent's Slice unit property will be 'azure.slice' if the slice drop-in file exists, even
# though systemd has not moved the agent to azure.slice yet. Systemd will only move the agent to
Expand All @@ -192,18 +187,12 @@ def initialize(self):
return

# Notes about slice setup:
# 1. On first agent update (for machines where daemon version did not already create azure.slice), the
# On first agent update (for machines where daemon version did not already create azure.slice), the
# agent creates azure.slice and the agent unit Slice drop-in file, but systemd does not move the agent
# unit to azure.slice until service restart. It is ok to enable cgroup usage in this case if agent is
# running in system.slice.
# 2. We setup the slices before v2 check. Cgroup v2 usage is disabled for agent and extensions, but
# can be enabled for log collector in waagent.conf. The log collector slice should be created in case
# v2 usage is enabled for log collector.
self.__setup_azure_slice()

if self.using_cgroup_v2():
log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2")
return
self.__setup_azure_slice()

# Log mount points/root paths for cgroup controllers
self._cgroups_api.log_root_paths()
Expand Down Expand Up @@ -295,9 +284,8 @@ def __setup_azure_slice():
if not os.path.exists(vmextensions_slice):
files_to_create.append((vmextensions_slice, _VMEXTENSIONS_SLICE_CONTENTS))

# Update log collector slice contents
slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2)
files_to_create.append((logcollector_slice, slice_contents))
# New agent will setup limits for scope instead slice, so removing existing logcollector slice.
CGroupConfigurator._Impl.__cleanup_unit_file(logcollector_slice)
maddieford marked this conversation as resolved.
Show resolved Hide resolved

if fileutil.findre_in_file(agent_unit_file, r"Slice=") is not None:
CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_slice)
Expand Down
10 changes: 1 addition & 9 deletions azurelinuxagent/ga/persist_firewall_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ def _setup_network_setup_service(self):

# Create unit file with default values
self.__set_service_unit_file()
# Reload systemd configurations when we setup the service for the first time to avoid systemctl warnings
self.__reload_systemd_conf()
# After modifying the service, systemctl may issue a warning when checking the service, and daemon-reload should not be used to clear the warning, since it can affect other services
logger.info("Successfully added and enabled the {0}".format(self._network_setup_service_name))

def __setup_binary_file(self):
Expand Down Expand Up @@ -297,13 +296,6 @@ def __log_network_setup_service_logs(self):
message=msg,
log_event=False)

def __reload_systemd_conf(self):
try:
logger.info("Executing systemctl daemon-reload for setting up {0}".format(self._network_setup_service_name))
shellutil.run_command(["systemctl", "daemon-reload"])
except Exception as exception:
logger.warn("Unable to reload systemctl configurations: {0}".format(ustr(exception)))

def __get_unit_file_version(self):
if not os.path.exists(self.get_service_file_path()):
raise OSError("{0} not found".format(self.get_service_file_path()))
Expand Down
18 changes: 6 additions & 12 deletions tests/ga/test_cgroupconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,26 +221,20 @@ def test_initialize_should_create_unit_files_when_the_agent_service_file_is_not_
self.assertTrue(os.path.exists(agent_drop_in_file_cpu_accounting), "{0} was not created".format(agent_drop_in_file_cpu_accounting))
self.assertTrue(os.path.exists(agent_drop_in_file_memory_accounting), "{0} was not created".format(agent_drop_in_file_memory_accounting))

def test_initialize_should_update_logcollector_memorylimit(self):
def test_initialize_should_clear_logcollector_slice(self):
with self._get_cgroup_configurator(initialize=False) as configurator:
log_collector_unit_file = configurator.mocks.get_mapped_path(UnitFilePaths.logcollector)
original_memory_limit = "MemoryLimit=30M"

# The mock creates the slice unit file with memory limit
# The mock creates the slice unit file
configurator.mocks.add_data_file(os.path.join(data_dir, 'init', "azure-walinuxagent-logcollector.slice"),
UnitFilePaths.logcollector)
if not os.path.exists(log_collector_unit_file):
raise Exception("{0} should have been created during test setup".format(log_collector_unit_file))
if not fileutil.findre_in_file(log_collector_unit_file, original_memory_limit):
raise Exception("MemoryLimit was not set correctly. Expected: {0}. Got:\n{1}".format(
original_memory_limit, fileutil.read_file(log_collector_unit_file)))

self.assertTrue(os.path.exists(log_collector_unit_file), "{0} was not created".format(log_collector_unit_file))

configurator.initialize()

# initialize() should update the unit file to remove the memory limit
self.assertFalse(fileutil.findre_in_file(log_collector_unit_file, original_memory_limit),
"Log collector slice unit file was not updated correctly. Expected no memory limit. Got:\n{0}".format(
fileutil.read_file(log_collector_unit_file)))
# initialize() should remove the unit file
self.assertFalse(os.path.exists(log_collector_unit_file), "{0} should not have been created".format(log_collector_unit_file))

def test_setup_extension_slice_should_create_unit_files(self):
with self._get_cgroup_configurator() as configurator:
Expand Down
11 changes: 0 additions & 11 deletions tests/ga/test_persist_firewall_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,6 @@ def __assert_systemctl_called(self, cmd="enable", validate_command_called=True):
else:
self.assertNotIn(systemctl_command, self.__executed_commands, "Systemctl command {0} found".format(cmd))

def __assert_systemctl_reloaded(self, validate_command_called=True):
systemctl_reload = ["systemctl", "daemon-reload"]
if validate_command_called:
self.assertIn(systemctl_reload, self.__executed_commands, "Systemctl config not reloaded")
else:
self.assertNotIn(systemctl_reload, self.__executed_commands, "Systemctl config reloaded")

def __assert_firewall_cmd_running_called(self, validate_command_called=True):
cmd = PersistFirewallRulesHandler._FIREWALLD_RUNNING_CMD
if validate_command_called:
Expand All @@ -144,7 +137,6 @@ def __assert_firewall_cmd_running_called(self, validate_command_called=True):
def __assert_network_service_setup_properly(self):
self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True)
self.__assert_systemctl_called(cmd="enable", validate_command_called=True)
self.__assert_systemctl_reloaded()
self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=False)
self.assertTrue(os.path.exists(self._network_service_unit_file), "Service unit file should be there")
self.assertTrue(os.path.exists(self._binary_file), "Binary file should be there")
Expand Down Expand Up @@ -200,7 +192,6 @@ def __setup_and_assert_network_service_setup_scenario(self, handler, mock_popen=

self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True)
self.__assert_systemctl_called(cmd="enable", validate_command_called=True)
self.__assert_systemctl_reloaded(validate_command_called=True)
self.__assert_firewall_cmd_running_called(validate_command_called=True)
self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False)
self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False)
Expand Down Expand Up @@ -234,7 +225,6 @@ def test_it_should_skip_setup_if_agent_network_setup_service_already_enabled_and

self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True)
self.__assert_systemctl_called(cmd="enable", validate_command_called=False)
self.__assert_systemctl_reloaded(validate_command_called=False)
self.__assert_firewall_cmd_running_called(validate_command_called=True)
self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False)
self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False)
Expand Down Expand Up @@ -396,7 +386,6 @@ def test_it_should_delete_custom_service_files_if_firewalld_enabled(self):
self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=True)
self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=False)
self.__assert_systemctl_called(cmd="enable", validate_command_called=False)
self.__assert_systemctl_reloaded(validate_command_called=False)
self.assertFalse(os.path.exists(handler.get_service_file_path()), "Service unit file found")
self.assertFalse(os.path.exists(os.path.join(conf.get_lib_dir(), handler.BINARY_FILE_NAME)), "Binary file found")

Expand Down
10 changes: 8 additions & 2 deletions tests/lib/mock_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,18 @@
import os
import sys

if len(sys.argv) != 4:
if len(sys.argv) < 4:
sys.stderr.write("usage: {0} <stdout> <return_value> <stderr>".format(os.path.basename(__file__)))

# W0632: Possible unbalanced tuple unpacking with sequence: left side has 3 label(s), right side has 0 value(s) (unbalanced-tuple-unpacking)
# Disabled: Unpacking is balanced: there is a check for the length on line 5
stdout, return_value, stderr = sys.argv[1:] # pylint: disable=W0632

# This script will be used for mocking cgroups commands in test, when popen called this script will be executed instead of actual commands
# We pass stdout, return_value, stderr of the mocked command output as arguments to this script and this script will print them to stdout, stderr and exit with the return value
# So that popen gets the output of the mocked command. Ideally we should get 4 arguments in sys.argv, first one is the script name, next 3 are the actual command output
# But somehow when we run the tests from pycharm, it adds extra arguments next to the script name, so we need to handle that when reading the arguments
# ex: /home/nag/Documents/repos/WALinuxAgent/tests/lib/mock_command.py /snap/pycharm-professional/412/plugins/python-ce/helpers/py... +BLKID +ELFUTILS +KMOD -IDN2 +IDN -PCRE2 default-hierarchy=hybrid\n 0
stdout, return_value, stderr = sys.argv[-3:] # pylint: disable=W0632

if stdout != '':
sys.stdout.write(stdout)
Expand Down
5 changes: 5 additions & 0 deletions tests/lib/mock_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,14 @@ def __init__(self, tmp_dir, commands=None, paths=None, files=None, data_files=No
self._original_popen = subprocess.Popen
self._original_mkdir = fileutil.mkdir
self._original_path_exists = os.path.exists
self._original_os_remove = os.remove
self._original_open = open

self.patchers = [
patch_builtin("open", side_effect=self._mock_open),
patch("subprocess.Popen", side_effect=self._mock_popen),
patch("os.path.exists", side_effect=self._mock_path_exists),
patch("os.remove", side_effect=self._mock_os_remove),
patch("azurelinuxagent.common.utils.fileutil.mkdir", side_effect=self._mock_mkdir)
]

Expand Down Expand Up @@ -166,3 +168,6 @@ def _mock_open(self, path, *args, **kwargs):
def _mock_path_exists(self, path):
return self._original_path_exists(self.get_mapped_path(path))

def _mock_os_remove(self, path):
return self._original_os_remove(self.get_mapped_path(path))

Loading