From 99af5576e2a89bcf4d38c667a9be73f67da74130 Mon Sep 17 00:00:00 2001 From: Sergio Vega Date: Wed, 8 May 2019 16:17:51 -0500 Subject: [PATCH] F #2453: Catch monitor errors --- .../remotes/lib/vcenter_driver/host.rb | 330 +++++++++--------- 1 file changed, 167 insertions(+), 163 deletions(-) diff --git a/src/vmm_mad/remotes/lib/vcenter_driver/host.rb b/src/vmm_mad/remotes/lib/vcenter_driver/host.rb index 7ce13e92b28..39d726daa09 100644 --- a/src/vmm_mad/remotes/lib/vcenter_driver/host.rb +++ b/src/vmm_mad/remotes/lib/vcenter_driver/host.rb @@ -344,199 +344,203 @@ def monitor_host_systems end def monitor_vms(host_id) + begin + vc_uuid = @vi_client.vim.serviceContent.about.instanceUuid + cluster_name = self["name"] + cluster_ref = self["_ref"] + + # Get info of the host where the VM/template is located + one_host = VCenterDriver::VIHelper.one_item(OpenNebula::Host, host_id) + if !one_host + STDERR.puts "Failed to retieve host with id #{host.id}" + STDERR.puts e.inspect + STDERR.puts e.backtrace + end - vc_uuid = @vi_client.vim.serviceContent.about.instanceUuid - cluster_name = self["name"] - cluster_ref = self["_ref"] - - # Get info of the host where the VM/template is located - one_host = VCenterDriver::VIHelper.one_item(OpenNebula::Host, host_id) - if !one_host - STDERR.puts "Failed to retieve host with id #{host.id}" - STDERR.puts e.inspect - STDERR.puts e.backtrace - end - - host_id = one_host["ID"] if one_host - - - # Extract CPU info and name for each esx host in cluster - esx_hosts = {} - @item.host.each do |esx_host| - info = {} - info[:name] = esx_host.name - info[:cpu] = esx_host.summary.hardware.cpuMhz.to_f - esx_hosts[esx_host._ref] = info - end - - @monitored_vms = Set.new - str_info = "" - - view = @vi_client.vim.serviceContent.viewManager.CreateContainerView({ - container: @item, #View for VMs inside this cluster - type: ['VirtualMachine'], - recursive: true - }) + host_id = one_host["ID"] if one_host - pc = @vi_client.vim.serviceContent.propertyCollector - monitored_properties = [ - "name", #VM name - "config.template", #To filter out templates - "summary.runtime.powerState", #VM power state - "summary.quickStats.hostMemoryUsage", #Memory usage - "summary.quickStats.overallCpuUsage", #CPU used by VM - "runtime.host", #ESX host - "resourcePool", #RP - "guest.guestFullName", - "guest.net", #IP addresses as seen by guest tools, - "guest.guestState", - "guest.toolsVersion", - "guest.toolsRunningStatus", - "guest.toolsVersionStatus2", #IP addresses as seen by guest tools, - "config.extraConfig", #VM extraconfig info e.g opennebula.vm.running - "config.hardware.numCPU", - "config.hardware.memoryMB", - "config.annotation", - "datastore" - ] + # Extract CPU info and name for each esx host in cluster + esx_hosts = {} + @item.host.each do |esx_host| + info = {} + info[:name] = esx_host.name + info[:cpu] = esx_host.summary.hardware.cpuMhz.to_f + esx_hosts[esx_host._ref] = info + end - filterSpec = RbVmomi::VIM.PropertyFilterSpec( - :objectSet => [ - :obj => view, - :skip => true, - :selectSet => [ - RbVmomi::VIM.TraversalSpec( - :name => 'traverseEntities', - :type => 'ContainerView', - :path => 'view', - :skip => false - ) - ] - ], - :propSet => [ - { :type => 'VirtualMachine', :pathSet => monitored_properties } + @monitored_vms = Set.new + str_info = "" + + view = @vi_client.vim.serviceContent.viewManager.CreateContainerView({ + container: @item, #View for VMs inside this cluster + type: ['VirtualMachine'], + recursive: true + }) + + pc = @vi_client.vim.serviceContent.propertyCollector + + monitored_properties = [ + "name", #VM name + "config.template", #To filter out templates + "summary.runtime.powerState", #VM power state + "summary.quickStats.hostMemoryUsage", #Memory usage + "summary.quickStats.overallCpuUsage", #CPU used by VM + "runtime.host", #ESX host + "resourcePool", #RP + "guest.guestFullName", + "guest.net", #IP addresses as seen by guest tools, + "guest.guestState", + "guest.toolsVersion", + "guest.toolsRunningStatus", + "guest.toolsVersionStatus2", #IP addresses as seen by guest tools, + "config.extraConfig", #VM extraconfig info e.g opennebula.vm.running + "config.hardware.numCPU", + "config.hardware.memoryMB", + "config.annotation", + "datastore" ] - ) - result = pc.RetrieveProperties(:specSet => [filterSpec]) + filterSpec = RbVmomi::VIM.PropertyFilterSpec( + :objectSet => [ + :obj => view, + :skip => true, + :selectSet => [ + RbVmomi::VIM.TraversalSpec( + :name => 'traverseEntities', + :type => 'ContainerView', + :path => 'view', + :skip => false + ) + ] + ], + :propSet => [ + { :type => 'VirtualMachine', :pathSet => monitored_properties } + ] + ) - vms = {} - vm_objects = [] - result.each do |r| - hashed_properties = r.to_hash - if r.obj.is_a?(RbVmomi::VIM::VirtualMachine) - #Only take care of VMs, not templates - if !hashed_properties["config.template"] - vms[r.obj._ref] = hashed_properties - vm_objects << r.obj + result = pc.RetrieveProperties(:specSet => [filterSpec]) + + vms = {} + vm_objects = [] + result.each do |r| + hashed_properties = r.to_hash + if r.obj.is_a?(RbVmomi::VIM::VirtualMachine) + #Only take care of VMs, not templates + if !hashed_properties["config.template"] + vms[r.obj._ref] = hashed_properties + vm_objects << r.obj + end end end - end - pm = @vi_client.vim.serviceContent.perfManager + pm = @vi_client.vim.serviceContent.perfManager - stats = {} + stats = {} - max_samples = 9 - refresh_rate = 20 #Real time stats takes samples every 20 seconds + max_samples = 9 + refresh_rate = 20 #Real time stats takes samples every 20 seconds - last_mon_time = one_host["TEMPLATE/VCENTER_LAST_PERF_POLL"] + last_mon_time = one_host["TEMPLATE/VCENTER_LAST_PERF_POLL"] - if last_mon_time - interval = (Time.now.to_i - last_mon_time.to_i) - interval = 3601 if interval < 0 - samples = (interval / refresh_rate) - samples = 1 if samples == 0 - max_samples = interval > 3600 ? 9 : samples - end + if last_mon_time + interval = (Time.now.to_i - last_mon_time.to_i) + interval = 3601 if interval < 0 + samples = (interval / refresh_rate) + samples = 1 if samples == 0 + max_samples = interval > 3600 ? 9 : samples + end - if !vm_objects.empty? - stats = pm.retrieve_stats( - vm_objects, - ['net.transmitted','net.bytesRx','net.bytesTx','net.received', - 'virtualDisk.numberReadAveraged','virtualDisk.numberWriteAveraged', - 'virtualDisk.read','virtualDisk.write'], - {max_samples: max_samples} - ) rescue {} - end + if !vm_objects.empty? + stats = pm.retrieve_stats( + vm_objects, + ['net.transmitted','net.bytesRx','net.bytesTx','net.received', + 'virtualDisk.numberReadAveraged','virtualDisk.numberWriteAveraged', + 'virtualDisk.read','virtualDisk.write'], + {max_samples: max_samples} + ) rescue {} + end - if !stats.empty? - last_mon_time = Time.now.to_i.to_s - end + if !stats.empty? + last_mon_time = Time.now.to_i.to_s + end - get_resource_pool_list if !@rp_list + get_resource_pool_list if !@rp_list - vm_pool = VCenterDriver::VIHelper.one_pool(OpenNebula::VirtualMachinePool) + vm_pool = VCenterDriver::VIHelper.one_pool(OpenNebula::VirtualMachinePool) - # opts common to all vms - opts = { - pool: vm_pool, - vc_uuid: vc_uuid, - } + # opts common to all vms + opts = { + pool: vm_pool, + vc_uuid: vc_uuid, + } - vms.each do |vm_ref,info| - vm_info = "" - begin - esx_host = esx_hosts[info["runtime.host"]._ref] - info[:esx_host_name] = esx_host[:name] - info[:esx_host_cpu] = esx_host[:cpu] - info[:cluster_name] = cluster_name - info[:cluster_ref] = cluster_ref - info[:vc_uuid] = vc_uuid - info[:host_id] = host_id - info[:rp_list] = @rp_list - - # Check the running flag - running_flag = info["config.extraConfig"].select do |val| - val[:key] == "opennebula.vm.running" - end + vms.each do |vm_ref,info| + vm_info = "" + begin + esx_host = esx_hosts[info["runtime.host"]._ref] + info[:esx_host_name] = esx_host[:name] + info[:esx_host_cpu] = esx_host[:cpu] + info[:cluster_name] = cluster_name + info[:cluster_ref] = cluster_ref + info[:vc_uuid] = vc_uuid + info[:host_id] = host_id + info[:rp_list] = @rp_list + + # Check the running flag + running_flag = info["config.extraConfig"].select do |val| + val[:key] == "opennebula.vm.running" + end - if !running_flag.empty? && running_flag.first - running_flag = running_flag[0][:value] - end + if !running_flag.empty? && running_flag.first + running_flag = running_flag[0][:value] + end - next if running_flag == "no" + next if running_flag == "no" - # retrieve vcenter driver machine - vm = VCenterDriver::VirtualMachine.new_from_ref(@vi_client, vm_ref, info["name"], opts) - id = vm.vm_id + # retrieve vcenter driver machine + vm = VCenterDriver::VirtualMachine.new_from_ref(@vi_client, vm_ref, info["name"], opts) + id = vm.vm_id - #skip if it's already monitored - if vm.one_exist? - next if @monitored_vms.include? id - @monitored_vms << id - end + #skip if it's already monitored + if vm.one_exist? + next if @monitored_vms.include? id + @monitored_vms << id + end - vm.vm_info = info - vm.monitor(stats) - - vm_name = "#{info["name"]} - #{cluster_name}" - vm_info << %Q{ - VM = [ - ID="#{id}", - VM_NAME="#{vm_name}", - DEPLOY_ID="#{vm_ref}", - } - - # if the machine does not exist in opennebula it means that is a wild: - unless vm.one_exist? - vm_template_64 = Base64.encode64(vm.vm_to_one(vm_name)).gsub("\n","") - vm_info << "VCENTER_TEMPLATE=\"YES\"," - vm_info << "IMPORT_TEMPLATE=\"#{vm_template_64}\"," + vm.vm_info = info + vm.monitor(stats) + + vm_name = "#{info["name"]} - #{cluster_name}" + vm_info << %Q{ + VM = [ + ID="#{id}", + VM_NAME="#{vm_name}", + DEPLOY_ID="#{vm_ref}", + } + + # if the machine does not exist in opennebula it means that is a wild: + unless vm.one_exist? + vm_template_64 = Base64.encode64(vm.vm_to_one(vm_name)).gsub("\n","") + vm_info << "VCENTER_TEMPLATE=\"YES\"," + vm_info << "IMPORT_TEMPLATE=\"#{vm_template_64}\"," + end + + vm_info << "POLL=\"#{vm.info.gsub('"', "\\\"")}\"]" + rescue Exception => e + vm_info = error_monitoring(e, vm_ref, info) end - vm_info << "POLL=\"#{vm.info.gsub('"', "\\\"")}\"]" - rescue Exception => e - vm_info = error_monitoring(e, vm_ref, info) + str_info << vm_info end - str_info << vm_info - end + view.DestroyView # Destroy the view - view.DestroyView # Destroy the view - - return str_info, last_mon_time + return str_info, last_mon_time + + rescue + return "", "" + end end def error_monitoring(e, vm_ref, info = {})