Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initialize CPU usage #1725

Merged
merged 3 commits into from
Dec 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 45 additions & 47 deletions azurelinuxagent/common/cgroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,9 @@ def _get_file_contents(self, file_name):
:return: Entire contents of the file
:rtype: str
"""

parameter_file = self._get_cgroup_file(file_name)

try:
return fileutil.read_file(parameter_file)
except Exception:
raise
return fileutil.read_file(parameter_file)

def _get_parameters(self, parameter_name, first_line_only=False):
"""
Expand Down Expand Up @@ -118,76 +114,78 @@ def is_active(self):

class CpuCgroup(CGroup):
def __init__(self, name, cgroup_path):
"""
Initialize _data collection for the Cpu controller. User must call update() before attempting to get
any useful metrics.

:return: CpuCgroup
"""
super(CpuCgroup, self).__init__(name, cgroup_path, "cpu")

self._osutil = get_osutil()
self._current_cpu_total = 0
self._previous_cpu_total = 0
self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot()
self._previous_system_cpu = 0
self._previous_cgroup_cpu = None
self._previous_system_cpu = None
self._current_cgroup_cpu = None
self._current_system_cpu = None

def __str__(self):
return "cgroup: Name: {0}, cgroup_path: {1}; Controller: {2}".format(
self.name, self.path, self.controller
)

def _get_current_cpu_total(self):
def _get_cpu_ticks(self, allow_no_such_file_or_directory_error=False):
"""
Compute the number of USER_HZ of CPU time (user and system) consumed by this cgroup since boot.
Returns the number of USER_HZ of CPU time (user and system) consumed by this cgroup.

:return: int
If allow_no_such_file_or_directory_error is set to True and cpuacct.stat does not exist the function
returns 0; this is useful when the function can be called before the cgroup has been created.
"""
cpu_total = 0
try:
cpu_stat = self._get_file_contents('cpuacct.stat')
except Exception as e:
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT:
if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT:
raise CGroupsException("Failed to read cpuacct.stat: {0}".format(ustr(e)))
if not allow_no_such_file_or_directory_error:
raise e
raise CGroupsException("Exception while attempting to read {0}".format("cpuacct.stat"), e)
cpu_stat = None

cpu_ticks = 0

if cpu_stat is not None:
match = re_user_system_times.match(cpu_stat)
if not match:
raise CGroupsException("The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpuacct.stat'), cpu_stat))
cpu_ticks = int(match.groups()[0]) + int(match.groups()[1])

return cpu_ticks

if cpu_stat:
m = re_user_system_times.match(cpu_stat)
if m:
cpu_total = int(m.groups()[0]) + int(m.groups()[1])
return cpu_total
def _cpu_usage_initialized(self):
return self._current_cgroup_cpu is not None and self._current_system_cpu is not None

def _update_cpu_data(self):
def initialize_cpu_usage(self):
Copy link
Member

@vrdmr vrdmr Dec 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we call initialize_cpu_usage within the __init__ itself? Asking for my knowledge.

Copy link
Member Author

@narrieta narrieta Dec 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get*cgroup functions instantiate cgroups that may never be tracked... I thought it was not worth initializing the cpu usage for those (especially since the actual cgroups may not exist on disk yet --- if we initialize on init then we would need to ensure the cgroups exist before calling the getter).

"""
Update all raw _data required to compute metrics of interest. The intent is to call update() once, then
call the various get_*() methods which use this _data, which we've collected exactly once.
Sets the initial values of CPU usage. This function must be invoked before calling get_cpu_usage().
"""
self._previous_cpu_total = self._current_cpu_total
self._previous_system_cpu = self._current_system_cpu
self._current_cpu_total = self._get_current_cpu_total()
if self._cpu_usage_initialized():
raise CGroupsException("initialize_cpu_usage() should be invoked only once")
self._current_cgroup_cpu = self._get_cpu_ticks(allow_no_such_file_or_directory_error=True)
self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot()

def _get_cpu_percent(self):
def get_cpu_usage(self):
"""
Compute the percent CPU time used by this cgroup over the elapsed time since the last time this instance was
update()ed. If the cgroup fully consumed 2 cores on a 4 core system, return 200.
Computes the CPU used by the cgroup since the last call to this function.

The usage is measured as a percentage of utilization of all cores in the system. For example,
using 1 core at 100% on a 4-core system would be reported as 25%.

:return: CPU usage in percent of a single core
:rtype: float
NOTE: initialize_cpu_usage() must be invoked before calling get_cpu_usage()
"""
cpu_delta = self._current_cpu_total - self._previous_cpu_total
system_delta = max(1, self._current_system_cpu - self._previous_system_cpu)
if not self._cpu_usage_initialized():
raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_usage()")

return round(float(cpu_delta * self._osutil.get_processor_cores() * 100) / float(system_delta), 3)
self._previous_cgroup_cpu = self._current_cgroup_cpu
self._previous_system_cpu = self._current_system_cpu
self._current_cgroup_cpu = self._get_cpu_ticks()
self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot()

def get_cpu_usage(self):
"""
Collects and return the cpu usage.
cgroup_delta = self._current_cgroup_cpu - self._previous_cgroup_cpu
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need the absolute value here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update to my comment: I assume that since we care about the ratio of the deltas, and assuming that if the cgroup delta is negative, then the system one will be, they will cancel each other out.
I'm not sure this is always the case though, that the sign is always the same for both deltas?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CPU usage is measured in ticks. More recent values would never be less than older values so the difference (both for cgroup and system) would always be positive (or zero).

system_delta = max(1, self._current_system_cpu - self._previous_system_cpu)

:rtype: float
"""
self._update_cpu_data()
return self._get_cpu_percent()
return round(100.0 * float(cgroup_delta) / float(system_delta), 3)


class MemoryCgroup(CGroup):
Expand Down
5 changes: 5 additions & 0 deletions azurelinuxagent/common/cgroupstelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from datetime import datetime as dt

from azurelinuxagent.common import logger
from azurelinuxagent.common.cgroup import CpuCgroup
from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.common.future import ustr

Expand Down Expand Up @@ -68,6 +69,10 @@ def track_cgroup(cgroup):
"""
Adds the given item to the dictionary of tracked cgroups
"""
if isinstance(cgroup, CpuCgroup):
# set the current cpu usage
cgroup.initialize_cpu_usage()

with CGroupsTelemetry._rlock:
if not CGroupsTelemetry.is_tracked(cgroup.path):
CGroupsTelemetry._tracked.append(cgroup)
Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/common/osutil/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -1354,7 +1354,7 @@ def get_total_cpu_ticks_since_boot():
if proc_stat is not None:
for line in proc_stat.splitlines():
if ALL_CPUS_REGEX.match(line):
system_cpu = sum(int(i) for i in line.split()[1:7])
system_cpu = sum(int(i) for i in line.split()[1:8]) # see "man proc" for a description of these fields
break
return system_cpu

Expand Down
21 changes: 12 additions & 9 deletions tests/common/test_cgroupconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,15 +203,18 @@ def test_start_extension_command_should_forward_to_cgroups_api_when_groups_are_e
self.assertEqual(mock_start_extension_command.call_count, 1)

def test_start_extension_command_should_start_tracking_the_extension_cgroups(self):
CGroupConfigurator.get_instance().start_extension_command(
extension_name="Microsoft.Compute.TestExtension-1.2.3",
command="date",
timeout=300,
shell=False,
cwd=self.tmp_dir,
env={},
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# CPU usage is initialized when we begin tracking a CPU cgroup; since this test does not retrieve the
# CPU usage, there is no need for initialization
with patch("azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage"):
CGroupConfigurator.get_instance().start_extension_command(
extension_name="Microsoft.Compute.TestExtension-1.2.3",
command="date",
timeout=300,
shell=False,
cwd=self.tmp_dir,
env={},
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join(
self.cgroups_file_system_root, "cpu", "walinuxagent.extensions/Microsoft.Compute.TestExtension_1.2.3")))
Expand Down
Loading