From da72c37017784a87bf7876bd5066b3ad44b8d38e Mon Sep 17 00:00:00 2001 From: "Maddie Ford (SHE/HER)" Date: Tue, 8 Nov 2022 15:09:09 -0800 Subject: [PATCH 01/15] Update version to dummy 1.0.0.0' --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index ff9c903b93..9a41c2e109 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 # -AGENT_VERSION = '9.9.9.9' +AGENT_VERSION = '1.0.0.0' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 59dbd2245d6f55e8e4c0eb494bd0d01dcd44a2e3 Mon Sep 17 00:00:00 2001 From: "Maddie Ford (SHE/HER)" Date: Tue, 8 Nov 2022 15:40:54 -0800 Subject: [PATCH 02/15] Revert version change --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 9a41c2e109..ff9c903b93 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 # -AGENT_VERSION = '1.0.0.0' +AGENT_VERSION = '9.9.9.9' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From af3e2117378c2448dac462b17286a1fcd9abd63b Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Thu, 17 Aug 2023 11:44:10 -0700 Subject: [PATCH 03/15] Barebones for etp --- tests_e2e/orchestrator/runbook.yml | 2 +- .../test_suites/ext_telemetry_pipeline.yml | 8 +++ .../ext_telemetry_pipeline.py | 72 +++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 tests_e2e/test_suites/ext_telemetry_pipeline.yml create mode 100755 tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 7e19205747..df817044b3 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, ext_telemetry_pipeline" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/test_suites/ext_telemetry_pipeline.yml b/tests_e2e/test_suites/ext_telemetry_pipeline.yml new file mode 100644 index 0000000000..dc9bfb2547 --- /dev/null +++ b/tests_e2e/test_suites/ext_telemetry_pipeline.yml @@ -0,0 +1,8 @@ +# +# This test ensures that the agent does not throw any errors while trying to transmit events to wireserver. It does not +# validate if the events actually make it to wireserver +# +name: "ExtTelemetryPipeline" +tests: + - "ext_telemetry_pipeline/ext_telemetry_pipeline.py" +images: "random(endorsed)" diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py new file mode 100755 index 0000000000..9172760e38 --- /dev/null +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test ensures that the agent does not throw any errors while trying to transmit events to wireserver. It does not +# # validate if the events actually make it to wireserver +# + +# from tests_e2e.tests.agent_bvt.vm_access import VmAccessBvt +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class ExtTelemetryPipeline(AgentTest): + def run(self): + ssh_client: SshClient = self._context.create_ssh_client() + + # Set the etp collection period to 30 seconds instead of default 5 minutes + log.info("") + log.info("Set ETP collection period to 30 seconds on the test VM [%s]", self._context.vm.name) + output = ssh_client.run_command("update-waagent-conf Debug.EtpCollectionPeriod=30", use_sudo=True) + log.info("Updated waagent conf with Debug.ETPCollectionPeriod=30 completed:\n%s", output) + + # Add VmAccess to the test VM + log.info("") + log.info("Add VmAccess ext to the test VM and ensure vm is accessible with the new credentials...") + # TODO VmAccessBvt.run() + + # Add CSE to the test VM + log.info("") + log.info("Add CSE to the test VM...") + cse = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") + cse.enable(settings={'commandToExecute': "date"}) + cse.assert_instance_view() + + # Check agent log to verify ETP is enabled + command = f"agent_ext_workflow-check_data_in_agent_log.py --data 'Extension Telemetry pipeline enabled: True'" + log.info("") + log.info("Check agent log to verify ETP is enabled...".format(command)) + log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) + + # TODO: Validate there is an events dir for each extension on the machine + + # TODO: Ensure good events are reported + + # TODO: Ensure bad events are reported + + # TODO: Ensure all events are deleted + + +if __name__ == "__main__": + ExtTelemetryPipeline.run_from_command_line() From 854d26d951bc06019abb39bf585ede178d454d84 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sat, 19 Aug 2023 13:02:44 -0700 Subject: [PATCH 04/15] Scenario should own VM because of conf change --- tests_e2e/test_suites/ext_telemetry_pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests_e2e/test_suites/ext_telemetry_pipeline.yml b/tests_e2e/test_suites/ext_telemetry_pipeline.yml index dc9bfb2547..769312c840 100644 --- a/tests_e2e/test_suites/ext_telemetry_pipeline.yml +++ b/tests_e2e/test_suites/ext_telemetry_pipeline.yml @@ -6,3 +6,4 @@ name: "ExtTelemetryPipeline" tests: - "ext_telemetry_pipeline/ext_telemetry_pipeline.py" images: "random(endorsed)" +owns_vm: true From 0042de64a67197d8aaeb6040d357a85cdd6a692b Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sat, 19 Aug 2023 17:43:59 -0700 Subject: [PATCH 05/15] Add extension telemetry pipeline test --- ...telemetry_pipeline-add_extension_events.py | 227 ++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100755 tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py diff --git a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py new file mode 100755 index 0000000000..6080afeb74 --- /dev/null +++ b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py @@ -0,0 +1,227 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Asserts that goal state processing completed no more than 15 seconds after agent start +# + +import argparse +import json +import os +import sys +import time +import uuid + +from assertpy import fail +from datetime import datetime, timedelta +from random import choice +from typing import List + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.logging import log + + +def add_extension_events(extensions: List[str], bad_event_count=0, no_of_events_per_extension=50): + def missing_key(bad_event): + key = choice(list(bad_event.keys())) + del bad_event[key] + return "MissingKeyError: {0}".format(key) + + def oversize_error(bad_event): + bad_event["EventLevel"] = "ThisIsAnOversizeError\n" * 300 + return "OversizeEventError" + + def empty_message(bad_event): + bad_event["Message"] = "" + return "EmptyMessageError" + + errors = [ + missing_key, + oversize_error, + empty_message + ] + + sample_ext_event = { + "EventLevel": "INFO", + "Message": "Starting IaaS ScriptHandler Extension v1", + "Version": "1.0", + "TaskName": "Extension Info", + "EventPid": "3228", + "EventTid": "1", + "OperationId": "519e4beb-018a-4bd9-8d8e-c5226cf7f56e", + "TimeStamp": "2019-12-12T01:20:05.0950244Z" + } + + sample_messages = [ + "Starting IaaS ScriptHandler Extension v1", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + "The quick brown fox jumps over the lazy dog", + "Cursus risus at ultrices mi.", + "Doing Something", + "Iaculis eu non diam phasellus.", + "Doing other thing", + "Look ma, lemons", + "Pretium quam vulputate dignissim suspendisse.", + "Man this is insane", + "I wish it worked as it should and not as it ain't", + "Ut faucibus pulvinar elementum integer enim neque volutpat ac tincidunt." + "Did you get any of that?", + "Non-English message - 此文字不是英文的" + "κόσμε", + "�", + "Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Wolther spillede på xylofon.", + "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg", + "Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich", + "Heizölrückstoßabdämpfung", + "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο", + "Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία", + "El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.", + "Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à côté de l'alcôve ovoïde, où les bûches", + "se consument dans l'âtre, ce qui lui permet de penser à la cænogenèse de l'être dont il est question", + "dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue çà et là la qualité de son œuvre.", + "D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh", + "Árvíztűrő tükörfúrógép", + "Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa", + "Sævör grét áðan því úlpan var ónýt", + "いろはにほへとちりぬるを わかよたれそつねならむ うゐのおくやまけふこえて あさきゆめみしゑひもせす", + "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン", + "? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה" + "Pchnąć w tę łódź jeża lub ośm skrzyń fig", + "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!", + "๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน", + "Pijamalı hasta, yağız şoföre çabucak güvendi." + ] + + for ext in extensions: + bad_count = bad_event_count + event_dir = os.path.join("/var/log/azure/", ext, "events") + if not os.path.isdir(event_dir): + # TODO check this fails correctly + fail(f"Expected events dir: {event_dir} does not exist") + else: + log.info("Expected dir: {0} exists".format(event_dir)) + + log.info("") + log.info("Creating random extension events for {0}. No of Good Events: {1}, No of Bad Events: {2}".format( + ext, no_of_events_per_extension - bad_event_count, bad_event_count)) + + new_opr_id = str(uuid.uuid4()) + event_list = [] + + for _ in range(no_of_events_per_extension): + event = sample_ext_event.copy() + event["OperationId"] = new_opr_id + event["TimeStamp"] = datetime.utcnow().strftime(u'%Y-%m-%dT%H:%M:%S.%fZ') + event["Message"] = choice(sample_messages) + + if bad_count != 0: + # Make this event a bad event + reason = choice(errors)(event) + bad_count -= 1 + + # Missing key error might delete the TaskName key from the event + if "TaskName" in event: + event["TaskName"] = "{0}. This is a bad event: {1}".format(event["TaskName"], reason) + else: + event["EventLevel"] = "{0}. This is a bad event: {1}".format(event["EventLevel"], reason) + + event_list.append(event) + + file_name = os.path.join(event_dir, '{0}.json'.format(int(time.time() * 1000000))) + log.info("Create json with extension events: {0}".format(file_name)) + with open("{0}.tmp".format(file_name), 'w+') as f: + json.dump(event_list, f) + os.rename("{0}.tmp".format(file_name), file_name) + + +def wait_for_extension_events_dir_empty(extensions: List[str]): + # By ensuring events dir to be empty, we verify that the telemetry events collector has completed its run + start_time = datetime.now() + timeout = timedelta(minutes=2) + ext_event_dirs = [os.path.join("/var/log/azure/", ext, "events") for ext in extensions] + + while (start_time + timeout) >= datetime.now(): + log.info("") + log.info("Checking that extension event directories are empty...") + all_dir_empty = True + for event_dir in ext_event_dirs: + if not os.path.exists(event_dir) or len(os.listdir(event_dir)) != 0: + log.info("Dir: {0} is not yet empty".format(event_dir)) + all_dir_empty = False + + if all_dir_empty: + log.info("Extension event directories are empty") + return + + time.sleep(20) + # TODO check this fails correctly + fail("Extension events dir not empty before 2 minute timeout") + + +def main(): + # This test is a best effort test to ensure that the agent does not throw any errors while trying to transmit + # events to wireserver. We're not validating if the events actually make it to wireserver. + + parser = argparse.ArgumentParser() + parser.add_argument("--extensions", dest='extensions', type=str, required=True) + parser.add_argument("--num_events_total", dest='num_events_total', type=int, required=True) + parser.add_argument("--num_events_bad", dest='num_events_bad', type=int, required=False, default=0) + args, _ = parser.parse_known_args() + + extensions = [ext for ext in args.extensions.split(',')] + add_extension_events(extensions=extensions, bad_event_count=args.num_events_bad, + no_of_events_per_extension=args.num_events_total) + + # Ensure that the event collector ran after adding the events + wait_for_extension_events_dir_empty(extensions=extensions) + + # Sleep for a min to ensure that the TelemetryService has enough time to send events and report errors if any + time.sleep(60) + found_error = False + agent_log = AgentLog() + + # TODO determine if bad num check is needed + if args.num_events_bad != 0: + log.info("") + log.info("Check that the TelemetryEventsCollector did not emit any errors...") + telemetry_event_collector_name = "TelemetryEventsCollector" + for agent_record in agent_log.read(): + if agent_record.thread == telemetry_event_collector_name and agent_record.level == "ERROR": + found_error = True + log.info("waagent.log contains the following errors emitted by the {0} thread: \n{1}".format(telemetry_event_collector_name, agent_record)) + + if found_error: + fail("Found error(s) emitted by the TelemetryEventsCollector, but none were expected.") + + for ext in extensions: + good_count = args.num_events_total - args.num_events_bad + log.info("") + if not agent_log.agent_log_contains("Collected {0} events for extension: {1}".format(good_count, ext)): + fail("The TelemetryEventsCollector did not collect the expected number of events: {0} for {1}".format(good_count, ext)) + log.info("All good events for {0} were collected by the TelemetryEventsCollector".format(ext)) + + log.info("") + if args.num_events_bad != 0: + if not agent_log.agent_log_contains("Dropped events for Extension: {0}".format(ext)): + fail("The TelemetryEventsCollector did not drop bad events for {0} as expected".format(ext)) + log.info("The TelemetryEventsCollector dropped bad events for {0} as expected".format(ext)) + + sys.exit(0) + + +if __name__ == "__main__": + main() From 7dabf43b7572edb6d78b22f565e06d1d057f3e1d Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sun, 20 Aug 2023 15:45:43 -0700 Subject: [PATCH 06/15] Clean up code --- ...telemetry_pipeline-add_extension_events.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py index 6080afeb74..c6c94d9a3a 100755 --- a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py +++ b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py @@ -110,10 +110,8 @@ def empty_message(bad_event): bad_count = bad_event_count event_dir = os.path.join("/var/log/azure/", ext, "events") if not os.path.isdir(event_dir): - # TODO check this fails correctly fail(f"Expected events dir: {event_dir} does not exist") - else: - log.info("Expected dir: {0} exists".format(event_dir)) + log.info("Expected dir: {0} exists".format(event_dir)) log.info("") log.info("Creating random extension events for {0}. No of Good Events: {1}, No of Bad Events: {2}".format( @@ -168,7 +166,7 @@ def wait_for_extension_events_dir_empty(extensions: List[str]): return time.sleep(20) - # TODO check this fails correctly + fail("Extension events dir not empty before 2 minute timeout") @@ -194,18 +192,16 @@ def main(): found_error = False agent_log = AgentLog() - # TODO determine if bad num check is needed - if args.num_events_bad != 0: - log.info("") - log.info("Check that the TelemetryEventsCollector did not emit any errors...") - telemetry_event_collector_name = "TelemetryEventsCollector" - for agent_record in agent_log.read(): - if agent_record.thread == telemetry_event_collector_name and agent_record.level == "ERROR": - found_error = True - log.info("waagent.log contains the following errors emitted by the {0} thread: \n{1}".format(telemetry_event_collector_name, agent_record)) - - if found_error: - fail("Found error(s) emitted by the TelemetryEventsCollector, but none were expected.") + log.info("") + log.info("Check that the TelemetryEventsCollector did not emit any errors...") + telemetry_event_collector_name = "TelemetryEventsCollector" + for agent_record in agent_log.read(): + if agent_record.thread == telemetry_event_collector_name and agent_record.level == "ERROR": + found_error = True + log.info("waagent.log contains the following errors emitted by the {0} thread: \n{1}".format(telemetry_event_collector_name, agent_record)) + + if found_error: + fail("Found error(s) emitted by the TelemetryEventsCollector, but none were expected.") for ext in extensions: good_count = args.num_events_total - args.num_events_bad From d3e0bcfc088a1bba3a98efa4f57f8e5677c1a8ec Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sun, 20 Aug 2023 18:37:33 -0700 Subject: [PATCH 07/15] Improve log messages --- ...ext_telemetry_pipeline-add_extension_events.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py index c6c94d9a3a..3d2bb0840b 100755 --- a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py +++ b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py @@ -111,9 +111,9 @@ def empty_message(bad_event): event_dir = os.path.join("/var/log/azure/", ext, "events") if not os.path.isdir(event_dir): fail(f"Expected events dir: {event_dir} does not exist") - log.info("Expected dir: {0} exists".format(event_dir)) log.info("") + log.info("Expected dir: {0} exists".format(event_dir)) log.info("Creating random extension events for {0}. No of Good Events: {1}, No of Bad Events: {2}".format( ext, no_of_events_per_extension - bad_event_count, bad_event_count)) @@ -140,7 +140,7 @@ def empty_message(bad_event): event_list.append(event) file_name = os.path.join(event_dir, '{0}.json'.format(int(time.time() * 1000000))) - log.info("Create json with extension events: {0}".format(file_name)) + log.info("Create json with extension events in event directory: {0}".format(file_name)) with open("{0}.tmp".format(file_name), 'w+') as f: json.dump(event_list, f) os.rename("{0}.tmp".format(file_name), file_name) @@ -154,7 +154,7 @@ def wait_for_extension_events_dir_empty(extensions: List[str]): while (start_time + timeout) >= datetime.now(): log.info("") - log.info("Checking that extension event directories are empty...") + log.info("Waiting for extension event directories to be empty...") all_dir_empty = True for event_dir in ext_event_dirs: if not os.path.exists(event_dir) or len(os.listdir(event_dir)) != 0: @@ -162,7 +162,7 @@ def wait_for_extension_events_dir_empty(extensions: List[str]): all_dir_empty = False if all_dir_empty: - log.info("Extension event directories are empty") + log.info("Extension event directories are empty: \n{0}".format(ext_event_dirs)) return time.sleep(20) @@ -193,7 +193,7 @@ def main(): agent_log = AgentLog() log.info("") - log.info("Check that the TelemetryEventsCollector did not emit any errors...") + log.info("Check that the TelemetryEventsCollector did not emit any errors while collecting and reporting events...") telemetry_event_collector_name = "TelemetryEventsCollector" for agent_record in agent_log.read(): if agent_record.thread == telemetry_event_collector_name and agent_record.level == "ERROR": @@ -202,16 +202,17 @@ def main(): if found_error: fail("Found error(s) emitted by the TelemetryEventsCollector, but none were expected.") + log.info("The TelemetryEventsCollector did not emit any errors while collecting and reporting events") for ext in extensions: good_count = args.num_events_total - args.num_events_bad log.info("") if not agent_log.agent_log_contains("Collected {0} events for extension: {1}".format(good_count, ext)): fail("The TelemetryEventsCollector did not collect the expected number of events: {0} for {1}".format(good_count, ext)) - log.info("All good events for {0} were collected by the TelemetryEventsCollector".format(ext)) + log.info("All {0} good events for {1} were collected by the TelemetryEventsCollector".format(good_count, ext)) - log.info("") if args.num_events_bad != 0: + log.info("") if not agent_log.agent_log_contains("Dropped events for Extension: {0}".format(ext)): fail("The TelemetryEventsCollector did not drop bad events for {0} as expected".format(ext)) log.info("The TelemetryEventsCollector dropped bad events for {0} as expected".format(ext)) From d515c4f20f9b6099cd66b69c46e46456be444122 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sun, 20 Aug 2023 18:46:52 -0700 Subject: [PATCH 08/15] Fix pylint errors --- .../scripts/ext_telemetry_pipeline-add_extension_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py index 3d2bb0840b..20c8753e92 100755 --- a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py +++ b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py @@ -180,7 +180,7 @@ def main(): parser.add_argument("--num_events_bad", dest='num_events_bad', type=int, required=False, default=0) args, _ = parser.parse_known_args() - extensions = [ext for ext in args.extensions.split(',')] + extensions = args.extensions.split(',') add_extension_events(extensions=extensions, bad_event_count=args.num_events_bad, no_of_events_per_extension=args.num_events_total) From f4725a96651d6b4f3c97c0b75fe76b5fc935fd7e Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sun, 20 Aug 2023 18:49:29 -0700 Subject: [PATCH 09/15] Improve logging --- .../test_suites/ext_telemetry_pipeline.yml | 1 + .../ext_telemetry_pipeline.py | 50 +++++++++++++------ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/tests_e2e/test_suites/ext_telemetry_pipeline.yml b/tests_e2e/test_suites/ext_telemetry_pipeline.yml index 769312c840..4230569b60 100644 --- a/tests_e2e/test_suites/ext_telemetry_pipeline.yml +++ b/tests_e2e/test_suites/ext_telemetry_pipeline.yml @@ -4,6 +4,7 @@ # name: "ExtTelemetryPipeline" tests: + - "agent_bvt/vm_access.py" - "ext_telemetry_pipeline/ext_telemetry_pipeline.py" images: "random(endorsed)" owns_vm: true diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index 9172760e38..9d4bd21566 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -22,12 +22,13 @@ # # validate if the events actually make it to wireserver # -# from tests_e2e.tests.agent_bvt.vm_access import VmAccessBvt +import random +from typing import List, Dict, Any + from tests_e2e.tests.lib.agent_test import AgentTest from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient @@ -41,31 +42,50 @@ def run(self): output = ssh_client.run_command("update-waagent-conf Debug.EtpCollectionPeriod=30", use_sudo=True) log.info("Updated waagent conf with Debug.ETPCollectionPeriod=30 completed:\n%s", output) - # Add VmAccess to the test VM - log.info("") - log.info("Add VmAccess ext to the test VM and ensure vm is accessible with the new credentials...") - # TODO VmAccessBvt.run() - - # Add CSE to the test VM + # Add CSE to the test VM twice to ensure its events directory still exists after re-enabling log.info("") log.info("Add CSE to the test VM...") cse = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") - cse.enable(settings={'commandToExecute': "date"}) + cse.enable(settings={'commandToExecute': "echo 'enable'"}) + cse.assert_instance_view() + + log.info("") + log.info("Add CSE to the test VM again...") + cse.enable(settings={'commandToExecute': "echo 'enable again'"}) cse.assert_instance_view() # Check agent log to verify ETP is enabled - command = f"agent_ext_workflow-check_data_in_agent_log.py --data 'Extension Telemetry pipeline enabled: True'" + command = "agent_ext_workflow-check_data_in_agent_log.py --data 'Extension Telemetry pipeline enabled: True'" log.info("") - log.info("Check agent log to verify ETP is enabled...".format(command)) + log.info("Check agent log to verify ETP is enabled...") log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) - # TODO: Validate there is an events dir for each extension on the machine + extensions = ["Microsoft.OSTCExtensions.VMAccessForLinux", "Microsoft.Azure.Extensions.CustomScript"] - # TODO: Ensure good events are reported + log.info("") + log.info("Add good extension events and check they are reported...") + max_events = random.randint(10, 50) + output = ssh_client.run_command(f"ext_telemetry_pipeline-add_extension_events.py " + f"--extensions {','.join(extensions)} " + f"--num_events_total {max_events}", use_sudo=True) + log.info(output) + log.info("") + log.info("Good extension events were successfully reported.") - # TODO: Ensure bad events are reported + log.info("") + log.info("Add bad extension events and check they are reported...") + output = ssh_client.run_command(f"ext_telemetry_pipeline-add_extension_events.py " + f"--extensions {','.join(extensions)} " + f"--num_events_total {max_events} " + f"--num_events_bad {random.randint(5, max_events-5)}", use_sudo=True) + log.info(output) + log.info("") + log.info("Bad extension events were successfully dropped.") - # TODO: Ensure all events are deleted + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + return [ + {'message': r"Dropped events for Extension.*"} + ] if __name__ == "__main__": From a9748e8a5f58dba4836eb67ed6eb5ed9f21f25a0 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sun, 20 Aug 2023 18:59:00 -0700 Subject: [PATCH 10/15] Improve code comments --- .../tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py | 3 ++- .../scripts/ext_telemetry_pipeline-add_extension_events.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index 9d4bd21566..6d03d3abe2 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -60,8 +60,8 @@ def run(self): log.info("Check agent log to verify ETP is enabled...") log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) + # Add good extension events for each extension and check that the TelemetryEventsCollector collects them extensions = ["Microsoft.OSTCExtensions.VMAccessForLinux", "Microsoft.Azure.Extensions.CustomScript"] - log.info("") log.info("Add good extension events and check they are reported...") max_events = random.randint(10, 50) @@ -72,6 +72,7 @@ def run(self): log.info("") log.info("Good extension events were successfully reported.") + # Add invalid events for each extension and check that the TelemetryEventsCollector drops them log.info("") log.info("Add bad extension events and check they are reported...") output = ssh_client.run_command(f"ext_telemetry_pipeline-add_extension_events.py " diff --git a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py index 20c8753e92..2e5776c714 100755 --- a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py +++ b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# Asserts that goal state processing completed no more than 15 seconds after agent start +# Adds extension events for each provided extension and verifies the TelemetryEventsCollector collected or dropped them # import argparse From 66789776ff74ef444ffadae4ed742435322eb41c Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Sun, 20 Aug 2023 19:09:35 -0700 Subject: [PATCH 11/15] VmAccess is not supported on flatcar --- .../tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index 6d03d3abe2..a6d10636b2 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -36,6 +36,12 @@ class ExtTelemetryPipeline(AgentTest): def run(self): ssh_client: SshClient = self._context.create_ssh_client() + # Extensions we will create events for + extensions = ["Microsoft.OSTCExtensions.VMAccessForLinux", "Microsoft.Azure.Extensions.CustomScript"] + if "-flatcar" in ssh_client.run_command("uname -a"): + # Currently VmAccess is not supported on flatcar + extensions = ["Microsoft.Azure.Extensions.CustomScript"] + # Set the etp collection period to 30 seconds instead of default 5 minutes log.info("") log.info("Set ETP collection period to 30 seconds on the test VM [%s]", self._context.vm.name) @@ -61,7 +67,6 @@ def run(self): log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) # Add good extension events for each extension and check that the TelemetryEventsCollector collects them - extensions = ["Microsoft.OSTCExtensions.VMAccessForLinux", "Microsoft.Azure.Extensions.CustomScript"] log.info("") log.info("Add good extension events and check they are reported...") max_events = random.randint(10, 50) From 3627b2ed93a59fbb41bd4dc3dc0d359ecb93070b Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 21 Aug 2023 17:36:47 -0700 Subject: [PATCH 12/15] Address PR comments --- .../test_suites/ext_telemetry_pipeline.yml | 1 - tests_e2e/tests/agent_bvt/vm_access.py | 4 ++-- .../ext_telemetry_pipeline.py | 23 +++++++++++++++---- tests_e2e/tests/lib/identifiers.py | 16 +++++++++++++ 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/tests_e2e/test_suites/ext_telemetry_pipeline.yml b/tests_e2e/test_suites/ext_telemetry_pipeline.yml index 4230569b60..f309f5cb8a 100644 --- a/tests_e2e/test_suites/ext_telemetry_pipeline.yml +++ b/tests_e2e/test_suites/ext_telemetry_pipeline.yml @@ -7,4 +7,3 @@ tests: - "agent_bvt/vm_access.py" - "ext_telemetry_pipeline/ext_telemetry_pipeline.py" images: "random(endorsed)" -owns_vm: true diff --git a/tests_e2e/tests/agent_bvt/vm_access.py b/tests_e2e/tests/agent_bvt/vm_access.py index 7983d41479..9b52ac2453 100755 --- a/tests_e2e/tests/agent_bvt/vm_access.py +++ b/tests_e2e/tests/agent_bvt/vm_access.py @@ -39,8 +39,8 @@ class VmAccessBvt(AgentTest): def run(self): ssh: SshClient = self._context.create_ssh_client() - if "-flatcar" in ssh.run_command("uname -a"): - raise TestSkipped("Currently VMAccess is not supported on Flatcar") + if not VmExtensionIds.VmAccess.supports_distro(ssh.run_command("uname -a")): + raise TestSkipped("Currently VMAccess is not supported on this distro") # Try to use a unique username for each test run (note that we truncate to 32 chars to # comply with the rules for usernames) diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index a6d10636b2..5a30989182 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -19,12 +19,16 @@ # # This test ensures that the agent does not throw any errors while trying to transmit events to wireserver. It does not -# # validate if the events actually make it to wireserver +# validate if the events actually make it to wireserver +# TODO: Update this test suite to verify that the agent picks up AND sends telemetry produced by extensions +# (work item https://dev.azure.com/msazure/One/_workitems/edit/24903999) # import random from typing import List, Dict, Any +from azurelinuxagent.common.conf import get_etp_collection_period + from tests_e2e.tests.lib.agent_test import AgentTest from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log @@ -37,12 +41,12 @@ def run(self): ssh_client: SshClient = self._context.create_ssh_client() # Extensions we will create events for - extensions = ["Microsoft.OSTCExtensions.VMAccessForLinux", "Microsoft.Azure.Extensions.CustomScript"] - if "-flatcar" in ssh_client.run_command("uname -a"): - # Currently VmAccess is not supported on flatcar - extensions = ["Microsoft.Azure.Extensions.CustomScript"] + extensions = ["Microsoft.Azure.Extensions.CustomScript"] + if VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("uname -a")): + extensions.append("Microsoft.OSTCExtensions.VMAccessForLinux") # Set the etp collection period to 30 seconds instead of default 5 minutes + default_collection_period = get_etp_collection_period() log.info("") log.info("Set ETP collection period to 30 seconds on the test VM [%s]", self._context.vm.name) output = ssh_client.run_command("update-waagent-conf Debug.EtpCollectionPeriod=30", use_sudo=True) @@ -67,6 +71,9 @@ def run(self): log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) # Add good extension events for each extension and check that the TelemetryEventsCollector collects them + # TODO: Update test suite to check that the agent is picking up the events generated by the extension, instead + # of generating on the extensions' behalf + # (work item - https://dev.azure.com/msazure/One/_workitems/edit/24903999) log.info("") log.info("Add good extension events and check they are reported...") max_events = random.randint(10, 50) @@ -88,6 +95,12 @@ def run(self): log.info("") log.info("Bad extension events were successfully dropped.") + # Reset the etp collection period to the default value so this VM can be shared with other suites + log.info("") + log.info("Reset ETP collection period to {0} seconds on the test VM [{1}]".format(default_collection_period, self._context.vm.name)) + output = ssh_client.run_command("update-waagent-conf Debug.EtpCollectionPeriod={0}".format(default_collection_period), use_sudo=True) + log.info("Updated waagent conf with default collection period completed:\n%s", output) + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: return [ {'message': r"Dropped events for Extension.*"} diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 149d89ce3b..0850871899 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -15,6 +15,8 @@ # limitations under the License. # +from typing import Dict, List + class VmIdentifier(object): def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): @@ -45,6 +47,20 @@ def __init__(self, publisher: str, ext_type: str, version: str): self.type: str = ext_type self.version: str = version + unsupported_distros: Dict[str, List[str]] = { + "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"] + } + + def supports_distro(self, system_info: str) -> bool: + """ + Returns true if an unsupported distro name for the extension is found in the provided system info + """ + ext_unsupported_distros = VmExtensionIdentifier.unsupported_distros.get(self.publisher + self.type) + for d in ext_unsupported_distros: + if d in system_info: + return False + return True + def __str__(self): return f"{self.publisher}.{self.type}" From 9b6952b13536ac44b69bf0e7afe7c03593a525ad Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 21 Aug 2023 17:56:08 -0700 Subject: [PATCH 13/15] Add support_distros in VmExtensionIdentifier --- tests_e2e/tests/lib/identifiers.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 0850871899..3751fc0f5a 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -55,10 +55,9 @@ def supports_distro(self, system_info: str) -> bool: """ Returns true if an unsupported distro name for the extension is found in the provided system info """ - ext_unsupported_distros = VmExtensionIdentifier.unsupported_distros.get(self.publisher + self.type) - for d in ext_unsupported_distros: - if d in system_info: - return False + ext_unsupported_distros = VmExtensionIdentifier.unsupported_distros.get(self.publisher + "." + self.type) + if ext_unsupported_distros is not None: + return any(distro in system_info for distro in ext_unsupported_distros) return True def __str__(self): From bf43a65ec9c13ace70880bcb1bb81c9818b3ac7b Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 21 Aug 2023 18:05:07 -0700 Subject: [PATCH 14/15] Fix logic for support_distros in VmExtensionIdentifier --- tests_e2e/tests/lib/identifiers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 3751fc0f5a..491f308b64 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -56,8 +56,8 @@ def supports_distro(self, system_info: str) -> bool: Returns true if an unsupported distro name for the extension is found in the provided system info """ ext_unsupported_distros = VmExtensionIdentifier.unsupported_distros.get(self.publisher + "." + self.type) - if ext_unsupported_distros is not None: - return any(distro in system_info for distro in ext_unsupported_distros) + if ext_unsupported_distros is not None and any(distro in system_info for distro in ext_unsupported_distros): + return False return True def __str__(self): From 4bf9c7a7a1fe9d34d135d932088bb6a4f010a81e Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Wed, 23 Aug 2023 16:38:44 -0700 Subject: [PATCH 15/15] Use run_remote_test for remote script --- .../ext_telemetry_pipeline.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index 5a30989182..de051485ad 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -77,21 +77,19 @@ def run(self): log.info("") log.info("Add good extension events and check they are reported...") max_events = random.randint(10, 50) - output = ssh_client.run_command(f"ext_telemetry_pipeline-add_extension_events.py " - f"--extensions {','.join(extensions)} " - f"--num_events_total {max_events}", use_sudo=True) - log.info(output) + self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + f"--extensions {','.join(extensions)} " + f"--num_events_total {max_events}", use_sudo=True) log.info("") log.info("Good extension events were successfully reported.") # Add invalid events for each extension and check that the TelemetryEventsCollector drops them log.info("") log.info("Add bad extension events and check they are reported...") - output = ssh_client.run_command(f"ext_telemetry_pipeline-add_extension_events.py " - f"--extensions {','.join(extensions)} " - f"--num_events_total {max_events} " - f"--num_events_bad {random.randint(5, max_events-5)}", use_sudo=True) - log.info(output) + self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + f"--extensions {','.join(extensions)} " + f"--num_events_total {max_events} " + f"--num_events_bad {random.randint(5, max_events-5)}", use_sudo=True) log.info("") log.info("Bad extension events were successfully dropped.")