Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add time window for agent manifest download #2860

Merged
merged 4 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions azurelinuxagent/common/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,17 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"ResourceDisk.EnableSwapEncryption": False,
"AutoUpdate.Enabled": True,
"EnableOverProvisioning": True,
"GAUpdates.Enabled": True,
#
# "Debug" options are experimental and may be removed in later
# versions of the Agent.
#
"Debug.CgroupLogMetrics": False,
"Debug.CgroupDisableOnProcessCheckFailure": True,
"Debug.CgroupDisableOnQuotaCheckFailure": True,
"Debug.DownloadNewAgents": True,
"Debug.EnableAgentMemoryUsageCheck": False,
"Debug.EnableFastTrack": True,
"Debug.EnableGAVersioning": False
"Debug.EnableGAVersioning": True
}


Expand Down Expand Up @@ -503,12 +503,15 @@ def get_monitor_network_configuration_changes(conf=__conf__):
return conf.get_switch("Monitor.NetworkConfigurationChanges", False)


def get_ga_updates_enabled(conf=__conf__):
def get_download_new_agents(conf=__conf__):
"""
If True, the agent go through update logic to look for new agents otherwise it will stop agent updates.
NOTE: This option is needed in e2e tests to control agent updates.
If True, the agent go through update logic to look for new agents to download otherwise it will stop agent updates.
NOTE: AutoUpdate.Enabled controls whether the Agent downloads new update and also whether any downloaded updates are started or not, while DownloadNewAgents controls only the former.
AutoUpdate.Enabled == false -> Agent preinstalled on the image will process extensions and will not update (regardless of DownloadNewAgents flag)
AutoUpdate.Enabled == true and DownloadNewAgents == true, any update already downloaded will be started, and agent look for future updates
AutoUpdate.Enabled == true and DownloadNewAgents == false, any update already downloaded will be started, but the agent will not look for future updates
"""
return conf.get_switch("GAUpdates.Enabled", True)
return conf.get_switch("Debug.DownloadNewAgents", True)


def get_cgroup_check_period(conf=__conf__):
Expand Down Expand Up @@ -637,7 +640,7 @@ def get_normal_upgrade_frequency(conf=__conf__):

def get_enable_ga_versioning(conf=__conf__):
"""
If True, the agent uses GA Versioning for auto-updating the agent vs automatically auto-updating to the highest version.
If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableGAVersioning", True)
Expand Down
23 changes: 22 additions & 1 deletion azurelinuxagent/ga/agent_update_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(self):
self.last_attempted_requested_version_update_time = datetime.datetime.min
self.last_attempted_hotfix_update_time = datetime.datetime.min
self.last_attempted_normal_update_time = datetime.datetime.min
self.last_attempted_manifest_download_time = datetime.datetime.min


class AgentUpdateHandler(object):
Expand Down Expand Up @@ -86,6 +87,23 @@ def __update_last_attempt_update_times(self):
else:
self.update_state.last_attempted_normal_update_time = now
self.update_state.last_attempted_hotfix_update_time = now
self.update_state.last_attempted_manifest_download_time = now

def __should_agent_attempt_manifest_download(self):
"""
The agent should attempt to download the manifest if
the agent has not attempted to download the manifest in the last 1 hour
"""
now = datetime.datetime.now()

if self.update_state.last_attempted_manifest_download_time != datetime.datetime.min:
next_attempt_time = self.update_state.last_attempted_manifest_download_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency())
else:
next_attempt_time = now

if next_attempt_time > now:
return False
return True

@staticmethod
def __get_agent_upgrade_type(requested_version):
Expand Down Expand Up @@ -256,7 +274,7 @@ def __log_event(level, msg, success=True):
def run(self, goal_state):
try:
# Ignore new agents if update is disabled. The latter flag only used in e2e tests.
if not self._autoupdate_enabled or not conf.get_ga_updates_enabled():
if not self._autoupdate_enabled or not conf.get_download_new_agents():
return

self._gs_id = goal_state.extensions_goal_state.id
Expand All @@ -265,6 +283,9 @@ def run(self, goal_state):
agent_manifest = None # This is to make sure fetch agent manifest once per update
warn_msg = ""
if requested_version is None:
# Do not proceed with update if self-update needs to download the manifest again with in an hour
if not self.__should_agent_attempt_manifest_download():
return
if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled
warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id)
GAUpdateReportState.report_error_msg = warn_msg
Expand Down
76 changes: 37 additions & 39 deletions tests/ga/test_agent_update_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def setUp(self):
clear_singleton_instances(ProtocolUtil)

@contextlib.contextmanager
def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True):
def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False):
# Default to DATA_FILE of test_data parameter raises the pylint warning
# W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value)
test_data = DATA_FILE if test_data is None else test_data
Expand All @@ -37,9 +37,12 @@ def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001,

def get_handler(url, **kwargs):
if HttpRequestPredicates.is_agent_package_request(url):
agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir)
protocol.mock_wire_data.call_counts['agentArtifact'] += 1
return MockHttpResponse(status=httpclient.OK, body=agent_pkg)
if not protocol_get_error:
agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir)
return MockHttpResponse(status=httpclient.OK, body=agent_pkg)
else:
return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE)

return protocol.mock_wire_data.mock_http_get(url, **kwargs)

def put_handler(url, *args, **_):
Expand All @@ -59,6 +62,7 @@ def put_handler(url, *args, **_):
agent_update_handler._protocol = protocol
yield agent_update_handler, mock_telemetry


def __assert_agent_directories_available(self, versions):
for version in versions:
self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version))
Expand Down Expand Up @@ -174,6 +178,33 @@ def test_it_should_update_to_largest_version_if_requested_version_not_available(
self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"])
self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason))

def test_it_should_not_download_manifest_again_if_last_attempted_download_time_not_elapsed(self):
self.prepare_agents(count=1)
data_file = DATA_FILE.copy()
data_file['ext_conf'] = "wire/ext_conf.xml"
with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10, protocol_get_error=True) as (agent_update_handler, _):
# making multiple agent update attempts
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())

mock_wire_data = agent_update_handler._protocol.mock_wire_data
self.assertEqual(1, mock_wire_data.call_counts['manifest_of_ga.xml'], "Agent manifest should not be downloaded again")

def test_it_should_download_manifest_if_last_attempted_download_time_is_elapsed(self):
self.prepare_agents(count=1)
data_file = DATA_FILE.copy()
data_file['ext_conf'] = "wire/ext_conf.xml"

with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=0.00001, protocol_get_error=True) as (agent_update_handler, _):
# making multiple agent update attempts
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())

mock_wire_data = agent_update_handler._protocol.mock_wire_data
self.assertEqual(3, mock_wire_data.call_counts['manifest_of_ga.xml'], "Agent manifest should be downloaded in all attempts")

def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
Expand Down Expand Up @@ -313,24 +344,7 @@ def test_it_should_report_update_status_with_error_on_download_fail(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"

@contextlib.contextmanager
def mock_agent_update_handler(test_data):
with mock_wire_protocol(test_data) as protocol:

def get_handler(url, **kwargs):
if HttpRequestPredicates.is_agent_package_request(url):
return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE)
return protocol.mock_wire_data.mock_http_get(url, **kwargs)

protocol.set_http_handlers(http_get_handler=get_handler)

with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True):
with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001):
with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"):
agent_update_handler_local = get_agent_update_handler(protocol)
yield agent_update_handler_local

with mock_agent_update_handler(test_data=data_file) as (agent_update_handler):
with self.__get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _):
GAUpdateReportState.report_error_msg = ""
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
vm_agent_update_status = agent_update_handler.get_vmagent_update_status()
Expand All @@ -343,23 +357,7 @@ def test_it_should_report_update_status_with_missing_requested_version_error(sel
data_file = DATA_FILE.copy()
data_file['ext_conf'] = "wire/ext_conf.xml"

@contextlib.contextmanager
def mock_agent_update_handler(test_data):
with mock_wire_protocol(test_data) as protocol:
def get_handler(url, **kwargs):
if HttpRequestPredicates.is_agent_package_request(url):
return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE)
return protocol.mock_wire_data.mock_http_get(url, **kwargs)

protocol.set_http_handlers(http_get_handler=get_handler)

with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True):
with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001):
with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"):
agent_update_handler_local = get_agent_update_handler(protocol)
yield agent_update_handler_local

with mock_agent_update_handler(test_data=data_file) as (agent_update_handler):
with self.__get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _):
GAUpdateReportState.report_error_msg = ""
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
vm_agent_update_status = agent_update_handler.get_vmagent_update_status()
Expand Down
4 changes: 2 additions & 2 deletions tests/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@
Debug.CgroupLogMetrics = False
Debug.CgroupMonitorExpiryTime = 2022-03-31
Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent
Debug.DownloadNewAgents = True
Debug.EnableAgentMemoryUsageCheck = False
Debug.EnableFastTrack = True
Debug.EnableGAVersioning = False
Debug.EnableGAVersioning = True
Debug.EtpCollectionPeriod = 300
Debug.FirewallRulesLogPeriod = 86400
DetectScvmmEnv = False
Expand All @@ -51,7 +52,6 @@
Extensions.Enabled = True
Extensions.GoalStatePeriod = 6
Extensions.InitialGoalStatePeriod = 6
GAUpdates.Enabled = True
HttpProxy.Host = None
HttpProxy.Port = None
Lib.Dir = /var/lib/waagent
Expand Down
2 changes: 1 addition & 1 deletion tests_e2e/orchestrator/scripts/install-agent
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ echo "Agent's conf path: $waagent_conf_path"
sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path"
# By default GAUpdates flag set to True, so that agent go through update logic to look for new agents.
# But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9.
sed -i '$a GAUpdates.Enabled=n' "$waagent_conf_path"
sed -i '$a Debug.DownloadNewAgents=n' "$waagent_conf_path"

#
# Restart the service
Expand Down