-
Notifications
You must be signed in to change notification settings - Fork 375
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
agent and ext cgroups scenario (#2866)
* agent-cgroups scenario * address comments * address comments * fix-pylint * pylint warn * address comments * improved logging" * improved ext cgroups scenario * new changes * pylint fix * updated * address comments * pylint warn * address comment * merge conflicts
- Loading branch information
Showing
12 changed files
with
710 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# | ||
# The test suite verify the agent running in expected cgroups and also, checks agent tracking the cgroups for polling resource metrics. | ||
# | ||
name: "AgentCgroups" | ||
tests: | ||
- "agent_cgroups/agent_cgroups.py" | ||
images: "cgroups-endorsed" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# | ||
# The test suite installs the few extensions and | ||
# verify those extensions are running in expected cgroups and also, checks agent tracking those cgroups for polling resource metrics. | ||
# | ||
name: "ExtCgroups" | ||
tests: | ||
- "ext_cgroups/ext_cgroups.py" | ||
images: "cgroups-endorsed" | ||
# The DCR test extension installs sample service, so this test suite uses it to test services cgroups but this is only published in southcentralus region in public cloud. | ||
locations: "AzureCloud:southcentralus" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Microsoft Azure Linux Agent | ||
# | ||
# Copyright 2018 Microsoft Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
from tests_e2e.tests.lib.agent_test import AgentTest | ||
from tests_e2e.tests.lib.agent_test_context import AgentTestContext | ||
from tests_e2e.tests.lib.logging import log | ||
|
||
|
||
class AgentCgroups(AgentTest): | ||
""" | ||
This test verifies that the agent is running in the expected cgroups. | ||
""" | ||
|
||
def __init__(self, context: AgentTestContext): | ||
super().__init__(context) | ||
self._ssh_client = self._context.create_ssh_client() | ||
|
||
def run(self): | ||
log.info("=====Validating agent cgroups=====") | ||
self._run_remote_test("agent_cgroups-check_cgroups_agent.py") | ||
log.info("Successfully Verified that agent present in correct cgroups") | ||
|
||
|
||
if __name__ == "__main__": | ||
AgentCgroups.run_from_command_line() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Microsoft Azure Linux Agent | ||
# | ||
# Copyright 2018 Microsoft Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
from tests_e2e.tests.ext_cgroups.install_extensions import InstallExtensions | ||
from tests_e2e.tests.lib.agent_test import AgentTest | ||
from tests_e2e.tests.lib.agent_test_context import AgentTestContext | ||
from tests_e2e.tests.lib.logging import log | ||
|
||
|
||
class ExtCgroups(AgentTest): | ||
""" | ||
This test verifies the installed extensions assigned correctly in their cgroups. | ||
""" | ||
|
||
def __init__(self, context: AgentTestContext): | ||
super().__init__(context) | ||
self._ssh_client = self._context.create_ssh_client() | ||
|
||
def run(self): | ||
log.info("=====Installing extensions to validate ext cgroups scenario") | ||
InstallExtensions(self._context).run() | ||
log.info("=====Executing remote script check_cgroups_extensions.py to validate extension cgroups") | ||
self._run_remote_test("ext_cgroups-check_cgroups_extensions.py", use_sudo=True) | ||
log.info("Successfully verified that extensions present in correct cgroup") | ||
|
||
|
||
if __name__ == "__main__": | ||
ExtCgroups.run_from_command_line() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Microsoft Azure Linux Agent | ||
# | ||
# Copyright 2018 Microsoft Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
from datetime import datetime, timedelta | ||
from pathlib import Path | ||
|
||
from tests_e2e.tests.lib.agent_test_context import AgentTestContext | ||
from tests_e2e.tests.lib.identifiers import VmExtensionIds | ||
from tests_e2e.tests.lib.logging import log | ||
from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient | ||
|
||
|
||
class InstallExtensions: | ||
""" | ||
This test installs the multiple extensions in order to verify extensions cgroups in the next test. | ||
""" | ||
|
||
def __init__(self, context: AgentTestContext): | ||
self._context = context | ||
self._ssh_client = self._context.create_ssh_client() | ||
|
||
def run(self): | ||
self._prepare_agent() | ||
# Install the GATest extension to test service cgroups | ||
self._install_gatest_extension() | ||
# Install the Azure Monitor Agent to test long running process cgroup | ||
self._install_ama() | ||
# Install the VM Access extension to test sample extension | ||
self._install_vmaccess() | ||
# Install the CSE extension to test extension cgroup | ||
self._install_cse() | ||
|
||
def _prepare_agent(self): | ||
log.info("=====Executing update-waagent-conf remote script to update monitoring deadline flag for tracking azuremonitoragent service") | ||
future_date = datetime.utcnow() + timedelta(days=2) | ||
expiry_time = future_date.date().strftime("%Y-%m-%d") | ||
# Agent needs extension info and it's services info in the handlermanifest.xml to monitor and limit the resource usage. | ||
# As part of pilot testing , agent hardcoded azuremonitoragent service name to monitor it for sometime in production without need of manifest update from extesnion side. | ||
# So that they can get sense of resource usage for their extensions. This we did for few months and now we no logner monitoring it in production. | ||
# But we are changing the config flag expiry time to future date in this test. So that test agent will start track the cgroups that is used by the service. | ||
result = self._ssh_client.run_command(f"update-waagent-conf Debug.CgroupMonitorExpiryTime={expiry_time}", use_sudo=True) | ||
log.info(result) | ||
log.info("Updated agent cgroups config(CgroupMonitorExpiryTime)") | ||
|
||
def _install_ama(self): | ||
ama_extension = VirtualMachineExtensionClient( | ||
self._context.vm, VmExtensionIds.AzureMonitorLinuxAgent, | ||
resource_name="AMAAgent") | ||
log.info("Installing %s", ama_extension) | ||
ama_extension.enable() | ||
ama_extension.assert_instance_view() | ||
|
||
def _install_vmaccess(self): | ||
# fetch the public key | ||
public_key_file: Path = Path(self._context.private_key_file).with_suffix(".pub") | ||
with public_key_file.open() as f: | ||
public_key = f.read() | ||
# Invoke the extension | ||
vm_access = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.VmAccess, resource_name="VmAccess") | ||
log.info("Installing %s", vm_access) | ||
vm_access.enable( | ||
protected_settings={ | ||
'username': self._context.username, | ||
'ssh_key': public_key, | ||
'reset_ssh': 'false' | ||
} | ||
) | ||
vm_access.assert_instance_view() | ||
|
||
def _install_gatest_extension(self): | ||
gatest_extension = VirtualMachineExtensionClient( | ||
self._context.vm, VmExtensionIds.GATestExtension, | ||
resource_name="GATestExt") | ||
log.info("Installing %s", gatest_extension) | ||
gatest_extension.enable() | ||
gatest_extension.assert_instance_view() | ||
|
||
|
||
def _install_cse(self): | ||
# Use custom script to output the cgroups assigned to it at runtime and save to /var/lib/waagent/tmp/custom_script_check. | ||
script_contents = """ | ||
mkdir /var/lib/waagent/tmp | ||
cp /proc/$$/cgroup /var/lib/waagent/tmp/custom_script_check | ||
""" | ||
custom_script_2_0 = VirtualMachineExtensionClient( | ||
self._context.vm, | ||
VmExtensionIds.CustomScript, | ||
resource_name="CustomScript") | ||
|
||
log.info("Installing %s", custom_script_2_0) | ||
custom_script_2_0.enable( | ||
protected_settings={ | ||
'commandToExecute': f"echo \'{script_contents}\' | bash" | ||
} | ||
) | ||
custom_script_2_0.assert_instance_view() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
import os | ||
import re | ||
|
||
from assertpy import assert_that, fail | ||
|
||
from azurelinuxagent.common.osutil import systemd | ||
from azurelinuxagent.common.utils import shellutil | ||
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION | ||
from tests_e2e.tests.lib.agent_log import AgentLog | ||
from tests_e2e.tests.lib.logging import log | ||
|
||
BASE_CGROUP = '/sys/fs/cgroup' | ||
AGENT_CGROUP_NAME = 'WALinuxAgent' | ||
AGENT_SERVICE_NAME = systemd.get_agent_unit_name() | ||
AGENT_CONTROLLERS = ['cpu', 'memory'] | ||
EXT_CONTROLLERS = ['cpu', 'memory'] | ||
|
||
CGROUP_TRACKED_PATTERN = re.compile(r'Started tracking cgroup ([^\s]+)\s+\[(?P<path>[^\s]+)\]') | ||
|
||
GATESTEXT_FULL_NAME = "Microsoft.Azure.Extensions.Edp.GATestExtGo" | ||
GATESTEXT_SERVICE = "gatestext.service" | ||
AZUREMONITOREXT_FULL_NAME = "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent" | ||
AZUREMONITORAGENT_SERVICE = "azuremonitoragent.service" | ||
MDSD_SERVICE = "mdsd.service" | ||
|
||
|
||
def verify_if_distro_supports_cgroup(): | ||
""" | ||
checks if agent is running in a distro that supports cgroups | ||
""" | ||
log.info("===== Checking if distro supports cgroups") | ||
|
||
base_cgroup_fs_exists = os.path.exists(BASE_CGROUP) | ||
|
||
assert_that(base_cgroup_fs_exists).is_true().described_as("Cgroup file system:{0} not found in Distro {1}-{2}".format(BASE_CGROUP, DISTRO_NAME, DISTRO_VERSION)) | ||
|
||
log.info('Distro %s-%s supports cgroups\n', DISTRO_NAME, DISTRO_VERSION) | ||
|
||
|
||
def print_cgroups(): | ||
""" | ||
log the mounted cgroups information | ||
""" | ||
log.info("====== Currently mounted cgroups ======") | ||
for m in shellutil.run_command(['mount']).splitlines(): | ||
# output is similar to | ||
# mount | ||
# sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime,seclabel) | ||
# proc on /proc type proc (rw,nosuid,nodev,noexec,relatime) | ||
# devtmpfs on /dev type devtmpfs (rw,nosuid,seclabel,size=1842988k,nr_inodes=460747,mode=755) | ||
# cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd) | ||
# cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,pids) | ||
# cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,memory) | ||
# cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,blkio) | ||
# cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,hugetlb) | ||
if 'type cgroup' in m: | ||
log.info('\t%s', m) | ||
|
||
|
||
def print_service_status(): | ||
log.info("====== Agent Service status ======") | ||
output = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) | ||
for line in output.splitlines(): | ||
log.info("\t%s", line) | ||
|
||
|
||
def get_agent_cgroup_mount_path(): | ||
return os.path.join('/', 'azure.slice', AGENT_SERVICE_NAME) | ||
|
||
|
||
def get_extension_cgroup_mount_path(extension_name): | ||
return os.path.join('/', 'azure.slice/azure-vmextensions.slice', | ||
"azure-vmextensions-" + extension_name + ".slice") | ||
|
||
|
||
def get_unit_cgroup_mount_path(unit_name): | ||
""" | ||
Returns the cgroup mount path for the given unit | ||
""" | ||
output = shellutil.run_command(["systemctl", "show", unit_name, "--property", "ControlGroup"]) | ||
# Output is similar to | ||
# systemctl show walinuxagent.service --property ControlGroup | ||
# ControlGroup=/azure.slice/walinuxagent.service | ||
# matches above output and extract right side value | ||
match = re.match("[^=]+=(?P<value>.+)", output) | ||
if match is not None: | ||
return match.group('value') | ||
return None | ||
|
||
|
||
def verify_agent_cgroup_assigned_correctly(): | ||
""" | ||
This method checks agent is running and assigned to the correct cgroup using service status output | ||
""" | ||
log.info("===== Verifying the daemon and the agent are assigned to the same correct cgroup using systemd") | ||
service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) | ||
log.info("Agent service status output:\n%s", service_status) | ||
is_active = False | ||
is_cgroup_assigned = False | ||
cgroup_mount_path = get_agent_cgroup_mount_path() | ||
is_active_pattern = re.compile(r".*Active:\s+active.*") | ||
|
||
for line in service_status.splitlines(): | ||
if re.match(is_active_pattern, line): | ||
is_active = True | ||
elif cgroup_mount_path in line: | ||
is_cgroup_assigned = True | ||
|
||
if not is_active: | ||
fail('walinuxagent service was not active/running. Service status:{0}'.format(service_status)) | ||
if not is_cgroup_assigned: | ||
fail('walinuxagent service was not assigned to the expected cgroup:{0}'.format(cgroup_mount_path)) | ||
|
||
log.info("Successfully verified the agent cgroup assigned correctly by systemd\n") | ||
|
||
|
||
def get_agent_cpu_quota(): | ||
""" | ||
Returns the cpu quota for the agent service | ||
""" | ||
output = shellutil.run_command(["systemctl", "show", AGENT_SERVICE_NAME, "--property", "CPUQuotaPerSecUSec"]) | ||
# Output is similar to | ||
# systemctl show walinuxagent --property CPUQuotaPerSecUSec | ||
# CPUQuotaPerSecUSec=infinity | ||
match = re.match("[^=]+=(?P<value>.+)", output) | ||
if match is not None: | ||
return match.group('value') | ||
return None | ||
|
||
|
||
def check_agent_quota_disabled(): | ||
""" | ||
Returns True if the cpu quota is infinity | ||
""" | ||
cpu_quota = get_agent_cpu_quota() | ||
return cpu_quota == 'infinity' | ||
|
||
|
||
def check_cgroup_disabled_with_unknown_process(): | ||
""" | ||
Returns True if the cgroup is disabled with unknown process | ||
""" | ||
for record in AgentLog().read(): | ||
match = re.search("Disabling resource usage monitoring. Reason: Check on cgroups failed:.+UNKNOWN", | ||
record.message, flags=re.DOTALL) | ||
if match is not None: | ||
log.info("Found message:\n\t%s", record.text.replace("\n", "\n\t")) | ||
return True | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.