Skip to content

Commit

Permalink
[vm-repair] arm64 support And Bug Fix for telemetry (#6649)
Browse files Browse the repository at this point in the history
  • Loading branch information
haagha authored Sep 1, 2023
1 parent 99418f8 commit 99f1152
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 9 deletions.
6 changes: 6 additions & 0 deletions src/vm-repair/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
Release History
===============

0.5.5
++++++
Adding ARM64 support.
Fix for telemetry for repair-and-restore command.
Repair VM fix for gen1 VM attaching disk on SCSI controller, preventing nested VM from booting (by Ryan McCallum)

0.5.4
++++++
Adding repair-and-restore command to create a one command flow for vm-repair with fstab scripts.
Expand Down
5 changes: 4 additions & 1 deletion src/vm-repair/azext_vm_repair/command_helper_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@

from azure.cli.core.commands.client_factory import get_subscription_id

from .telemetry import _track_command_telemetry, _track_run_command_telemetry
from .telemetry import _track_command_telemetry, _track_run_command_telemetry, _track_command_telemetry_repair_and_restore

from .repair_utils import _get_function_param_dict

STATUS_SUCCESS = 'SUCCESS'
STATUS_ERROR = 'ERROR'
VM_REPAIR_RUN_COMMAND = 'vm repair run'
VM_REPAIR_AND_RESTORE_COMMAND = 'vm repair repair-and-restore'


class command_helper:
Expand Down Expand Up @@ -88,6 +89,8 @@ def __del__(self):
elapsed_time = timeit.default_timer() - self.start_time
if self.command_name == VM_REPAIR_RUN_COMMAND:
_track_run_command_telemetry(self.logger, self.command_name, self.command_params, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, get_subscription_id(self.cmd.cli_ctx), self.return_dict, self.script.run_id, self.script.status, self.script.output, self.script.run_time)
if self.command_name == VM_REPAIR_AND_RESTORE_COMMAND:
_track_command_telemetry_repair_and_restore(self.logger, self.command_name, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, get_subscription_id(self.cmd.cli_ctx))
else:
_track_command_telemetry(self.logger, self.command_name, self.command_params, self.status, self.message, self.error_message, self.error_stack_trace, elapsed_time, get_subscription_id(self.cmd.cli_ctx), self.return_dict)

Expand Down
25 changes: 23 additions & 2 deletions src/vm-repair/azext_vm_repair/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
_unlock_encrypted_vm_run,
_create_repair_vm,
_check_n_start_vm,
_check_existing_rg
_check_existing_rg,
_fetch_architecture,
_select_distro_linux_Arm64
)
from .exceptions import AzCommandError, RunScriptNotFoundForIdError, SupportingResourceNotFoundError, CommandCanceledByUserError
logger = get_logger(__name__)
Expand All @@ -70,15 +72,19 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern
copy_disk_id = None
resource_tag = _get_repair_resource_tag(resource_group_name, vm_name)
created_resources = []
architecture_type = _fetch_architecture(source_vm)

# Fetch OS image urn and set OS type for disk create
if is_linux and _uses_managed_disk(source_vm):
# os_image_urn = "UbuntuLTS"
os_type = 'Linux'
hyperV_generation_linux = _check_linux_hyperV_gen(source_vm)
if hyperV_generation_linux == 'V2':
logger.info('Generation 2 VM detected, RHEL/Centos/Oracle 6 distros not available to be used for rescue VM ')
logger.info('Generation 2 VM detected')
os_image_urn = _select_distro_linux_gen2(distro)
if architecture_type == 'Arm64':
logger.info('ARM64 VM detected')
os_image_urn = _select_distro_linux_Arm64(distro)
else:
os_image_urn = _select_distro_linux(distro)
else:
Expand Down Expand Up @@ -732,3 +738,18 @@ def repair_and_restore(cmd, vm_name, resource_group_name, repair_password=None,
repair_vm_id = _call_az_command(show_vm_id)

restore(cmd, vm_name, resource_group_name, copy_disk_name, repair_vm_id, yes=True)

command.message = 'fstab script has been applied to the source VM. A new repair VM \'{n}\' was created in the resource group \'{repair_rg}\' with disk \'{d}\' attached as data disk. ' \
'The repairs were complete using the fstab script and the repair VM was then deleted. ' \
'The repair disk was restored to the source VM. ' \
.format(n=repair_vm_name, repair_rg=repair_group_name, d=copy_disk_name)

command.set_status_success()
if command.error_stack_trace:
logger.debug(command.error_stack_trace)
# Generate return object and log errors if needed
return_dict = command.init_return_dict()

logger.info('\n%s\n', command.message)

return return_dict
38 changes: 36 additions & 2 deletions src/vm-repair/azext_vm_repair/repair_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,13 +538,32 @@ def _select_distro_linux(distro):
os_image_urn = distro
else:
logger.info('No specific distro was provided , using the default Ubuntu distro')
os_image_urn = "UbuntuLTS"
os_image_urn = "Ubuntu2204"
return os_image_urn


def _select_distro_linux_Arm64(distro):
image_lookup = {
'rhel8': 'RedHat:rhel-arm64:8_8-arm64:latest',
'rhel9': 'RedHat:rhel-arm64:9_2-arm64:latest',
'ubuntu18': 'Canonical:UbuntuServer:18_04-lts-arm64:latest',
'ubuntu20': 'Canonical:0001-com-ubuntu-server-focal:20_04-lts-arm64:latest',
'centos7': 'OpenLogic:CentOS:7_9-arm64:latest',
}
if distro in image_lookup:
os_image_urn = image_lookup[distro]
else:
if distro.count(":") == 3:
logger.info('A custom URN was provided , will be used as distro for the recovery VM')
os_image_urn = distro
else:
logger.info('No specific distro was provided , using the default ARM64 Ubuntu distro')
os_image_urn = "Canonical:UbuntuServer:18_04-lts-arm64:latest"
return os_image_urn


def _select_distro_linux_gen2(distro):
# base on the document : https://docs.microsoft.com/en-us/azure/virtual-machines/generation-2#generation-2-vm-images-in-azure-marketplace
# RHEL/Centos/Oracle 6 are not supported for Gen 2
image_lookup = {
'rhel6': 'RedHat:rhel-raw:7-raw-gen2:latest',
'rhel7': 'RedHat:rhel-raw:7-raw-gen2:latest',
Expand Down Expand Up @@ -720,3 +739,18 @@ def _create_repair_vm(copy_disk_id, create_repair_vm_command, repair_password, r
_call_az_command(create_repair_vm_command + ' --validate', secure_params=[repair_password, repair_username])
logger.info('Creating repair VM...')
_call_az_command(create_repair_vm_command, secure_params=[repair_password, repair_username])


def _fetch_architecture(source_vm):
"""
Returns the architecture of the source VM.
"""
location = source_vm.location
vm_size = source_vm.hardware_profile.vm_size
architecture_type_cmd = 'az vm list-skus -l {loc} --size {vm_size} --query "[].capabilities[?name==\'CpuArchitectureType\'].value" -o json' \
.format(loc=location, vm_size=vm_size)

logger.info('Fetching architecture type of the source VM...')
architecture = loads(_call_az_command(architecture_type_cmd).strip('\n'))

return architecture[0][0]
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ if ($hyperv.Installed -and $hypervTools.Installed -and $hypervPowerShell.Install
$return = Set-DhcpServerv4OptionValue -DnsServer 168.63.129.16 -Router 192.168.0.1 -ErrorAction Stop

# Create the nested guest VM
if (!$gen) {
if (!$gen -or ($gen -eq 1)) {
Log-Info 'Creating Gen1 VM with 4GB memory' | Out-File -FilePath $logFile -Append
$return = New-VM -Name $nestedGuestVmName -MemoryStartupBytes 4GB -NoVHD -BootDevice IDE -Generation 1 -ErrorAction Stop
}
Expand All @@ -84,7 +84,7 @@ if ($hyperv.Installed -and $hypervTools.Installed -and $hypervPowerShell.Install
$disk = get-disk -ErrorAction Stop | where {$_.FriendlyName -eq 'Msft Virtual Disk'}
$return = $disk | set-disk -IsOffline $true -ErrorAction Stop

if (!$gen) {
if (!$gen -or ($gen -eq 1)) {
Log-Info "Gen1: Adding hard drive to IDE controller" | Out-File -FilePath $logFile -Append
$return = $disk | Add-VMHardDiskDrive -VMName $nestedGuestVmName -ErrorAction Stop
}
Expand Down
17 changes: 17 additions & 0 deletions src/vm-repair/azext_vm_repair/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,20 @@ def _track_run_command_telemetry(logger, command_name, parameters, status, messa
tc.flush()
except Exception as exception:
logger.error('Unexpected error sending telemetry with exception: %s', str(exception))


def _track_command_telemetry_repair_and_restore(logger, command_name, status, message, error_message, error_stack_trace, duration, subscription_id):
try:
properties = {
'command_name': command_name,
'command_status': status,
'message': message,
'error_message': error_message,
'error_stack_trace': error_stack_trace,
'subscription_id': subscription_id
}
measurements = {'command_duration': duration}
tc.track_event(command_name, properties, measurements)
tc.flush()
except Exception as exception:
logger.error('Unexpected error sending telemetry with exception: %s', str(exception))
Original file line number Diff line number Diff line change
Expand Up @@ -741,4 +741,39 @@ def test_vmrepair_RepairAndRestoreLinuxVM(self, resource_group):
# Check swapped OS disk
vms = self.cmd('vm list -g {rg} -o json').get_output_in_json()
source_vm = vms[0]
assert source_vm['storageProfile']['osDisk']['name'] == result['copied_disk_name']
assert source_vm['storageProfile']['osDisk']['name'] == result['copied_disk_name']


@pytest.mark.arm64
class LinuxARMManagedDiskCreateRestoreTest(LiveScenarioTest):

@ResourceGroupPreparer(location='eastus')
def test_vmrepair_LinuxManagedCreateRestore(self, resource_group):
self.kwargs.update({
'vm': 'vm1'
})

# Create test VM
self.cmd('vm create -g {rg} -n {vm} --image Canonical:UbuntuServer:18_04-lts-arm64:latest --admin-username azureadmin --admin-password !Passw0rd2018')
vms = self.cmd('vm list -g {rg} -o json').get_output_in_json()
# Something wrong with vm create command if it fails here
assert len(vms) == 1

# Test create
result = self.cmd('vm repair create -g {rg} -n {vm} --repair-username azureadmin --repair-password !Passw0rd2018 --yes -o json').get_output_in_json()
assert result['status'] == STATUS_SUCCESS, result['error_message']

# Check repair VM
repair_vms = self.cmd('vm list -g {} -o json'.format(result['repair_resource_group'])).get_output_in_json()
assert len(repair_vms) == 1
repair_vm = repair_vms[0]
# Check attached data disk
assert repair_vm['storageProfile']['dataDisks'][0]['name'] == result['copied_disk_name']

# Call Restore
self.cmd('vm repair restore -g {rg} -n {vm} --yes')

# Check swapped OS disk
vms = self.cmd('vm list -g {rg} -o json').get_output_in_json()
source_vm = vms[0]
assert source_vm['storageProfile']['osDisk']['name'] == result['copied_disk_name']
2 changes: 1 addition & 1 deletion src/vm-repair/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from codecs import open
from setuptools import setup, find_packages

VERSION = "0.5.4"
VERSION = "0.5.5"

CLASSIFIERS = [
'Development Status :: 4 - Beta',
Expand Down

0 comments on commit 99f1152

Please sign in to comment.