Skip to content

Commit

Permalink
Fix agent memory usage check (#2903)
Browse files Browse the repository at this point in the history
* fix memory usage check

* add test

* added comment

* fix test
  • Loading branch information
nagworld9 authored Sep 7, 2023
1 parent 601954a commit 7df5506
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
10 changes: 7 additions & 3 deletions azurelinuxagent/ga/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def __init__(self):
self._heartbeat_id = str(uuid.uuid4()).upper()
self._heartbeat_counter = 0

self._last_check_memory_usage = datetime.min
self._initial_attempt_check_memory_usage = True
self._last_check_memory_usage_time = time.time()
self._check_memory_usage_last_error_report = datetime.min

# VM Size is reported via the heartbeat, default it here.
Expand Down Expand Up @@ -1016,8 +1017,11 @@ def _check_agent_memory_usage(self):
"""
try:
if conf.get_enable_agent_memory_usage_check() and self._extensions_summary.converged:
if self._last_check_memory_usage == datetime.min or datetime.utcnow() >= (self._last_check_memory_usage + UpdateHandler.CHECK_MEMORY_USAGE_PERIOD):
self._last_check_memory_usage = datetime.utcnow()
# we delay first attempt memory usage check, so that current agent won't get blacklisted due to multiple restarts(because of memory limit reach) too frequently
if (self._initial_attempt_check_memory_usage and time.time() - self._last_check_memory_usage_time > CHILD_LAUNCH_INTERVAL) or \
(not self._initial_attempt_check_memory_usage and time.time() - self._last_check_memory_usage_time > conf.get_cgroup_check_period()):
self._last_check_memory_usage_time = time.time()
self._initial_attempt_check_memory_usage = False
CGroupConfigurator.get_instance().check_agent_memory_usage()
except AgentMemoryExceededException as exception:
msg = "Check on agent memory usage:\n{0}".format(ustr(exception))
Expand Down
13 changes: 11 additions & 2 deletions tests/ga/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2378,7 +2378,7 @@ def test_check_agent_memory_usage_raises_exit_exception(self, patch_add_event, p
with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True):
with self.assertRaises(ExitException) as context_manager:
update_handler = get_update_handler()

update_handler._last_check_memory_usage_time = time.time() - 24 * 60
update_handler._check_agent_memory_usage()
self.assertEqual(1, patch_add_event.call_count)
self.assertTrue(any("Check on agent memory usage" in call_args[0]
Expand All @@ -2393,7 +2393,7 @@ def test_check_agent_memory_usage_fails(self, patch_add_event, patch_warn, *_):
with patch("azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage", side_effect=Exception()):
with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True):
update_handler = get_update_handler()

update_handler._last_check_memory_usage_time = time.time() - 24 * 60
update_handler._check_agent_memory_usage()
self.assertTrue(any("Error checking the agent's memory usage" in call_args[0]
for call_args in patch_warn.call_args),
Expand All @@ -2409,6 +2409,15 @@ def test_check_agent_memory_usage_fails(self, patch_add_event, patch_warn, *_):
add_events[0]["message"],
"The error message is not correct when memory usage check failed")

@patch("azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage")
@patch("azurelinuxagent.ga.update.add_event")
def test_check_agent_memory_usage_not_called(self, patch_add_event, patch_memory_usage, *_):
# This test ensures that agent not called immediately on startup, instead waits for CHILD_LAUNCH_INTERVAL
with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True):
update_handler = get_update_handler()
update_handler._check_agent_memory_usage()
self.assertEqual(0, patch_memory_usage.call_count)
self.assertEqual(0, patch_add_event.call_count)

class GoalStateIntervalTestCase(AgentTestCase):
def test_initial_goal_state_period_should_default_to_goal_state_period(self):
Expand Down

0 comments on commit 7df5506

Please sign in to comment.