Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use self-hosted pool for automation runs #3007

Merged
merged 4 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,16 @@
import makepkg
from azurelinuxagent.common.version import AGENT_VERSION

from tests_e2e.tests.lib.add_network_security_group import AddNetworkSecurityGroup
from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient
from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient

import tests_e2e
from tests_e2e.orchestrator.lib.agent_test_loader import TestSuiteInfo
from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.agent_log import AgentLog, AgentLogRecord
from tests_e2e.tests.lib.agent_test import TestSkipped, RemoteTestError
from tests_e2e.tests.lib.agent_test_context import AgentTestContext, AgentVmTestContext, AgentVmssTestContext
from tests_e2e.tests.lib.logging import log, set_thread_name, set_current_thread_log
from tests_e2e.tests.lib.agent_log import AgentLogRecord
from tests_e2e.tests.lib.network_security_rule import NetworkSecurityRule
from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient
from tests_e2e.tests.lib.shell import run_command, CommandError
from tests_e2e.tests.lib.ssh_client import SshClient
Expand Down Expand Up @@ -161,6 +160,9 @@ def __init__(self, metadata: TestSuiteMetadata) -> None:
self._user: str
self._identity_file: str

# If not empty, adds a Network Security Rule allowing SSH access from the specified IP address to any test VMs created by the test suite.
self._allow_ssh: str

self._skip_setup: bool # If True, skip the setup of the test VMs
self._collect_logs: str # Whether to collect logs from the test VMs (one of 'always', 'failed', or 'no')
self._keep_environment: str # Whether to skip deletion of the resources created by the test suite (one of 'always', 'failed', or 'no')
Expand Down Expand Up @@ -218,6 +220,8 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_
self._user = variables["user"]
self._identity_file = variables["identity_file"]

self._allow_ssh = variables["allow_ssh"]

self._skip_setup = variables["skip_setup"]
self._keep_environment = variables["keep_environment"]
self._collect_logs = variables["collect_logs"]
Expand Down Expand Up @@ -923,7 +927,8 @@ def read_file(path: str) -> str:
"publisher": "[parameters('publisher')]"
}

AddNetworkSecurityGroup().update(template, is_lisa_template=False)
if self._allow_ssh != '':
nagworld9 marked this conversation as resolved.
Show resolved Hide resolved
NetworkSecurityRule(template, is_lisa_template=False).add_allow_ssh_rule(self._allow_ssh)

return template, {
"username": {"value": self._user},
Expand Down
7 changes: 5 additions & 2 deletions tests_e2e/orchestrator/lib/agent_test_suite_combinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class AgentTestSuitesCombinatorSchema(schema.Combinator):
The runbook is a static document and always passes all these parameters to the combinator, so they are all
marked as required. Optional parameters can pass an empty value to indicate that they are not specified.
"""
allow_ssh: str = field(default_factory=str, metadata=field_metadata(required=True))
cloud: str = field(default_factory=str, metadata=field_metadata(required=True))
identity_file: str = field(default_factory=str, metadata=field_metadata(required=True))
image: str = field(default_factory=str, metadata=field_metadata(required=True))
Expand Down Expand Up @@ -332,14 +333,16 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]:
def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]:
#
# Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template
# hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "template" and pass the
# hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "templates" and pass the
# names of the custom templates in its value. The hook can then retrieve the value from the Platform object (see wiki for more details).
# We also use a dummy item, "vm_tags" in the environment dictionary in order to concatenate templates from multiple test suites when they
# share the same test environment.
# share the same test environment. Similarly, we use a dummy VM tag named "allow_ssh" to pass the value of the "allow_ssh" runbook parameter.
#
vm_tags = {}
if test_suite_info.template != '':
vm_tags["templates"] = test_suite_info.template
if self.runbook.allow_ssh != '':
vm_tags["allow_ssh"] = self.runbook.allow_ssh
return {
"c_platform": [
{
Expand Down
15 changes: 9 additions & 6 deletions tests_e2e/orchestrator/lib/update_arm_template_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# pylint: enable=E0401

import tests_e2e
from tests_e2e.tests.lib.add_network_security_group import AddNetworkSecurityGroup
from tests_e2e.tests.lib.network_security_rule import NetworkSecurityRule
from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate


Expand All @@ -42,17 +42,20 @@ class UpdateArmTemplateHook:
def azure_update_arm_template(self, template: Any, environment: Environment) -> None:
log: logging.Logger = logging.getLogger("lisa")

azure_runbook: AzurePlatformSchema = environment.platform.runbook.get_extended_runbook(AzurePlatformSchema)
vm_tags = azure_runbook.vm_tags

#
# Add the network security group for the test VM. This group includes a rule allowing SSH access from the current machine.
# Add the allow SSH security rule if requested by the runbook
#
log.info("******** Waagent: Adding network security rule to the ARM template")
AddNetworkSecurityGroup().update(template, is_lisa_template=True)
allow_ssh: str = vm_tags.get("allow_ssh")
if allow_ssh is not None:
log.info("******** Waagent: Adding network security rule to allow SSH connections from %s", allow_ssh)
NetworkSecurityRule(template, is_lisa_template=True).add_allow_ssh_rule(allow_ssh)

#
# Apply any template customizations provided by the tests.
#
azure_runbook: AzurePlatformSchema = environment.platform.runbook.get_extended_runbook(AzurePlatformSchema)
vm_tags = azure_runbook.vm_tags
# The "templates" tag is a comma-separated list of the template customizations provided by the tests
test_templates = vm_tags.get("templates")
if test_templates is not None:
Expand Down
8 changes: 8 additions & 0 deletions tests_e2e/orchestrator/runbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ variable:
value: false
is_case_visible: true

#
# Takes an IP address as value; if not empty, it adds a Network Security Rule allowing SSH access from the specified IP address to any test VMs created by the runbook execution.
#
- name: allow_ssh
value: ""
is_case_visible: true

#
# These variables are handled by LISA to use an SSH proxy when executing the runbook
#
Expand Down Expand Up @@ -177,6 +184,7 @@ platform: $(c_platform)

combinator:
type: agent_test_suites
allow_ssh: $(allow_ssh)
cloud: $(cloud)
identity_file: $(identity_file)
image: $(image)
Expand Down
4 changes: 2 additions & 2 deletions tests_e2e/pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ parameters:
- name: collect_lisa_logs
displayName: Collect LISA logs
type: boolean
default: true
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've been meaning to disable this for several PRs; finally, I remember to do it in this one.

default: false

- name: keep_environment
displayName: Keep the test VMs (do not delete them)
Expand All @@ -58,7 +58,7 @@ parameters:
- no

pool:
vmImage: ubuntu-latest
name: waagent-pool

jobs:
- job: "ExecuteTests"
Expand Down
21 changes: 18 additions & 3 deletions tests_e2e/pipeline/scripts/execute_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

set -euxo pipefail

echo "Hostname: $(hostname)"
echo "\$USER: $USER"

#
# UID of 'waagent' in the Docker container
#
Expand All @@ -10,7 +13,7 @@ WAAGENT_UID=1000
#
# Set the correct mode and owner for the private SSH key and generate the public key.
#
cd "$HOME"
cd "$AGENT_TEMPDIRECTORY"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

initially i was re-using the agent vms (currently they are destroyed after each run), so I moved these from $HOME to the temp directory

mkdir ssh
cp "$DOWNLOADSSHKEY_SECUREFILEPATH" ssh
chmod 700 ssh/id_rsa
Expand All @@ -26,11 +29,17 @@ chmod a+w "$BUILD_SOURCESDIRECTORY"
#
# Create the directory where the Docker container will create the test logs and give ownership to 'waagent'
#
LOGS_DIRECTORY="$HOME/logs"
LOGS_DIRECTORY="$AGENT_TEMPDIRECTORY/logs"
echo "##vso[task.setvariable variable=logs_directory]$LOGS_DIRECTORY"
mkdir "$LOGS_DIRECTORY"
sudo chown "$WAAGENT_UID" "$LOGS_DIRECTORY"

#
# Give the current user access to the Docker daemon
#
sudo usermod -aG docker $USER
newgrp docker < /dev/null

#
# Pull the container image used to execute the tests
#
Expand All @@ -55,9 +64,14 @@ if [[ $VM_SIZE == "-" ]]; then
VM_SIZE=""
fi

#
# Get the external IP address of the VM.
#
IP_ADDRESS=$(curl -4 ifconfig.io/ip)

docker run --rm \
--volume "$BUILD_SOURCESDIRECTORY:/home/waagent/WALinuxAgent" \
--volume "$HOME"/ssh:/home/waagent/.ssh \
--volume "$AGENT_TEMPDIRECTORY"/ssh:/home/waagent/.ssh \
--volume "$LOGS_DIRECTORY":/home/waagent/logs \
--env AZURE_CLIENT_ID \
--env AZURE_CLIENT_SECRET \
Expand All @@ -77,4 +91,5 @@ docker run --rm \
-v image:\"$IMAGE\" \
-v location:\"$LOCATION\" \
-v vm_size:\"$VM_SIZE\" \
-v allow_ssh:\"$IP_ADDRESS\" \
$TEST_SUITES"
50 changes: 50 additions & 0 deletions tests_e2e/pipeline/scripts/setup-agent.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env bash

# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Script to setup the agent VM for the Azure Pipelines agent pool; it simply installs the Azure CLI and the Docker Engine.
#

set -euox pipefail

# Add delay per Azure Pipelines documentation
sleep 30

# Install Azure CLI
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash

# Add Docker's official GPG key:
sudo apt-get update
sudo apt-get install ca-certificates curl gnupg
sudo install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
sudo chmod a+r /etc/apt/keyrings/docker.gpg

# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update

# Install Docker Engine
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin

# Verify that Docker Engine is installed correctly by running the hello-world image.
sudo docker run hello-world
Original file line number Diff line number Diff line change
Expand Up @@ -19,59 +19,71 @@

from typing import Any, Dict, List

from azurelinuxagent.common.utils import shellutil
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry
from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate

# Name of the security group added by this class
NETWORK_SECURITY_GROUP: str = "waagent-nsg"


class AddNetworkSecurityGroup(UpdateArmTemplate):
class NetworkSecurityRule:
"""
Updates the ARM template to add a network security group allowing SSH access from the current machine.
Provides methods to add network security rules to the given ARM template.

The security rules are added under _NETWORK_SECURITY_GROUP, which is also added to the template.
"""
def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None:
resources: List[Dict[str, Any]] = template["resources"]
def __init__(self, template: Dict[str, Any], is_lisa_template: bool):
self._template = template
self._is_lisa_template = is_lisa_template

# Append the NSG to the list of resources
network_security_group = json.loads(f"""{{
"type": "Microsoft.Network/networkSecurityGroups",
"name": "{NETWORK_SECURITY_GROUP}",
"location": "[resourceGroup().location]",
"apiVersion": "2020-05-01",
"properties": {{
"securityRules": []
}}
}}""")
resources.append(network_security_group)
_NETWORK_SECURITY_GROUP: str = "waagent-nsg"

# Add the SSH rule, but if anything fails just go ahead without it
try:
network_security_group["properties"]["securityRules"].append(json.loads(f"""{{
def add_allow_ssh_rule(self, ip_address: str) -> None:
self.add_security_rule(
json.loads(f"""{{
"name": "waagent-ssh",
"properties": {{
"description": "Allows inbound SSH connections from the orchestrator machine.",
"protocol": "Tcp",
"sourcePortRange": "*",
"destinationPortRange": "22",
"sourceAddressPrefix": "{self._my_ip_address}",
"sourceAddressPrefix": "{ip_address}",
"destinationAddressPrefix": "*",
"access": "Allow",
"priority": 100,
"direction": "Inbound"
}}
}}"""))
except Exception as e:
log.warning("******** Waagent: Failed to create Allow security rule for SSH, skipping rule: %s", e)

def add_security_rule(self, security_rule: Dict[str, Any]) -> None:
self._get_network_security_group()["properties"]["securityRules"].append(security_rule)

def _get_network_security_group(self) -> Dict[str, Any]:
resources: List[Dict[str, Any]] = self._template["resources"]
#
# If the NSG already exists, just return it
#
try:
return UpdateArmTemplate.get_resource_by_name(resources, self._NETWORK_SECURITY_GROUP, "Microsoft.Network/networkSecurityGroups")
except KeyError:
pass

#
# Otherwise, create it and append it to the list of resources
#
network_security_group = json.loads(f"""{{
"type": "Microsoft.Network/networkSecurityGroups",
"name": "{self._NETWORK_SECURITY_GROUP}",
"location": "[resourceGroup().location]",
"apiVersion": "2020-05-01",
"properties": {{
"securityRules": []
}}
}}""")
resources.append(network_security_group)

#
# Add a dependency on the NSG to the virtual network
#
network_resource = self._get_resource(resources, "Microsoft.Network/virtualNetworks")
network_resource = UpdateArmTemplate.get_resource(resources, "Microsoft.Network/virtualNetworks")
network_resource_dependencies = network_resource.get("dependsOn")
nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]"
nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{self._NETWORK_SECURITY_GROUP}')]"
if network_resource_dependencies is None:
network_resource["dependsOn"] = [nsg_reference]
else:
Expand All @@ -82,11 +94,11 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None:
#
nsg_reference = json.loads(f"""{{
"networkSecurityGroup": {{
"id": "[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]"
"id": "[resourceId('Microsoft.Network/networkSecurityGroups', '{self._NETWORK_SECURITY_GROUP}')]"
}}
}}""")

if is_lisa_template:
if self._is_lisa_template:
# The subnets are a copy property of the virtual network in LISA's ARM template:
#
# {
Expand Down Expand Up @@ -167,18 +179,4 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None:
else:
subnets_properties.update(nsg_reference)

@property
def _my_ip_address(self) -> str:
"""
Gets the IP address of the current machine.
"""
if self.__my_ip_address is None:
def get_my_address():
# Forcing -4 option to fetch the ipv4 address
cmd = ["curl", "-4", "ifconfig.io/ip"]
stdout = shellutil.run_command(cmd)
return stdout.strip()
self.__my_ip_address = retry(get_my_address, attempts=3, delay=10)
return self.__my_ip_address

__my_ip_address: str = None
return network_security_group
Loading
Loading