Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

logging manifest type and don't log same error until next period in agent update. #2778

Merged
merged 4 commits into from
Mar 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion azurelinuxagent/common/protocol/goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def fetch_extension_manifest(self, extension_name, uris):
def _fetch_manifest(self, manifest_type, name, uris):
try:
is_fast_track = self.extensions_goal_state.source == GoalStateSource.FastTrack
xml_text = self._wire_client.fetch_manifest(uris, use_verify_header=is_fast_track)
xml_text = self._wire_client.fetch_manifest(manifest_type, uris, use_verify_header=is_fast_track)
self._history.save_manifest(name, xml_text)
return ExtensionManifest(xml_text)
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,8 +582,8 @@ def call_storage_service(http_req, *args, **kwargs):
def fetch_artifacts_profile_blob(self, uri):
return self._fetch_content("artifacts profile blob", [uri], use_verify_header=False)[1] # _fetch_content returns a (uri, content) tuple

def fetch_manifest(self, uris, use_verify_header):
uri, content = self._fetch_content("manifest", uris, use_verify_header=use_verify_header)
def fetch_manifest(self, manifest_type, uris, use_verify_header):
uri, content = self._fetch_content("{0} manifest".format(manifest_type), uris, use_verify_header=use_verify_header)
self.get_host_plugin().update_manifest_uri(uri)
return content

Expand Down
27 changes: 19 additions & 8 deletions azurelinuxagent/ga/agent_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def __init__(self, protocol):
self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id
self._last_attempted_update_time = datetime.datetime.min
self._last_attempted_update_version = FlexibleVersion("0.0.0.0")
self._last_warning = ""
self._last_warning_time = datetime.datetime.min

def __should_update_agent(self, requested_version):
"""
Expand Down Expand Up @@ -170,15 +172,24 @@ def __get_all_agents_on_disk():
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)]

@staticmethod
def __log_event(level, msg_, success_=True):
if level == LogLevel.WARNING:
logger.warn(msg_)
elif level == LogLevel.ERROR:
logger.error(msg_)
elif level == LogLevel.INFO:
def __log_event(self, level, msg_, success_=True):
if level == LogLevel.INFO:
logger.info(msg_)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False)
else:
msg_ += "[NOTE: Will not log the same error for the next 6 hours]"
# Incarnation may change if we get new goal state that would make whole string unique every time. So comparing only the substring until Incarnation if Incarnation included in msg
# Example msg "Unable to update Agent: No manifest links found for agent family: Prod for incarnation: incarnation_1, skipping agent update"
prefix_msg = msg_.split("incarnation", 1)[0]
prefix_last_warning_msg = self._last_warning.split("incarnation", 1)[0]
if prefix_msg != prefix_last_warning_msg or self._last_warning_time == datetime.datetime.min or datetime.datetime.now() >= self._last_warning_time + datetime.timedelta(hours=6):
if level == LogLevel.WARNING:
logger.warn(msg_)
elif level == LogLevel.ERROR:
logger.error(msg_)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False)
self._last_warning_time = datetime.datetime.now()
self._last_warning = msg_

def run(self, goal_state):
try:
Expand Down
26 changes: 26 additions & 0 deletions tests/ga/test_agent_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,3 +309,29 @@ def get_handler(url, **kwargs):
self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status)
self.assertEqual(1, vm_agent_update_status.code)
self.assertIn("Missing requested version", vm_agent_update_status.message)

def test_it_should_not_log_same_error_next_hours(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_missing_family.xml"

# Set the test environment by adding 20 random agents to the agent directory
self.prepare_agents()
self.assertEqual(20, self.agent_count(), "Agent directories not set properly")

with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())

self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")),
"New agent directory should not be found")

self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"No manifest links found for agent family" in kwarg[
'message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS")

agent_update_handler.run(agent_update_handler._protocol.get_goal_state())

self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"No manifest links found for agent family" in kwarg[
'message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS")
2 changes: 1 addition & 1 deletion tests/ga/test_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3185,7 +3185,7 @@ def manifest_location_handler(url, **kwargs):
wire._DOWNLOAD_TIMEOUT = datetime.timedelta(minutes=0)
try:
with self.assertRaises(ExtensionDownloadError):
protocol.client.fetch_manifest(ext_handlers[0].manifest_uris, use_verify_header=False)
protocol.client.fetch_manifest("extension", ext_handlers[0].manifest_uris, use_verify_header=False)
finally:
wire._DOWNLOAD_TIMEOUT = download_timeout

Expand Down
8 changes: 4 additions & 4 deletions tests/protocol/test_wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def http_get_handler(url, *_, **__):
with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol:
HostPluginProtocol.is_default_channel = False

manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded')
Expand All @@ -707,7 +707,7 @@ def http_get_handler(url, *_, **kwargs):
HostPluginProtocol.is_default_channel = False

try:
manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded')
Expand Down Expand Up @@ -744,7 +744,7 @@ def http_get_handler(url, *_, **kwargs):
protocol.client.get_host_plugin()

protocol.set_http_handlers(http_get_handler=http_get_handler)
manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(manifest, manifest_xml)
Expand Down Expand Up @@ -778,7 +778,7 @@ def http_get_handler(url, *_, **kwargs):
protocol.set_http_handlers(http_get_handler=http_get_handler)

with self.assertRaises(ExtensionDownloadError):
protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(len(urls), 4, "Unexpected number of HTTP requests: [{0}]".format(urls))
Expand Down
2 changes: 1 addition & 1 deletion tests_e2e/pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ parameters:
- name: test_suites
displayName: Test Suites
type: string
default: agent_bvt
default: agent_bvt,agent_update
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added for daily monitoring agent update. After this merge, I'll schedule daily run for agent_update


# NOTES:
# * 'image', 'location' and 'vm_size' override any values in the test suites/images definition
Expand Down