diff --git a/vsphere/datadog_checks/vsphere/api.py b/vsphere/datadog_checks/vsphere/api.py index 629e2e8c8598f..36276b806bff7 100644 --- a/vsphere/datadog_checks/vsphere/api.py +++ b/vsphere/datadog_checks/vsphere/api.py @@ -80,7 +80,12 @@ def check_health(self): @smart_retry def get_perf_counter_by_level(self, collection_level): - """Requests and returns the list of counter available for a given collection_level.""" + """ + Requests and returns the list of counter available for a given collection_level. + + :return list of vim.PerformanceManager.CounterInfo: + https://vdc-download.vmware.com/vmwb-repository/dcr-public/fe08899f-1eec-4d8d-b3bc-a6664c168c2c/7fdf97a1-4c0d-4be0-9d43-2ceebbc174d9/doc/vim.PerformanceManager.CounterInfo.html + """ return self._conn.content.perfManager.QueryPerfCounterByLevel(collection_level) @smart_retry diff --git a/vsphere/datadog_checks/vsphere/config.py b/vsphere/datadog_checks/vsphere/config.py index 653e022e6f20e..c2173b3a64d5e 100644 --- a/vsphere/datadog_checks/vsphere/config.py +++ b/vsphere/datadog_checks/vsphere/config.py @@ -60,7 +60,11 @@ def __init__(self, instance, log): # Filters self.resource_filters = self._parse_resource_filters(instance.get("resource_filters", {})) - self.metric_filters = self._parse_metric_filters(instance.get("metric_filters", {})) + self.metric_filters = self._parse_metric_regex_filters(instance.get("metric_filters", {})) + # Since `collect_per_instance_filters` have the same structure as `metric_filters` we use the same parser + self.collect_per_instance_filters = self._parse_metric_regex_filters( + instance.get("collect_per_instance_filters", {}) + ) self.validate_config() @@ -147,7 +151,7 @@ def _parse_resource_filters(self, all_resource_filters): return formatted_resource_filters - def _parse_metric_filters(self, all_metric_filters): + def _parse_metric_regex_filters(self, all_metric_filters): allowed_resource_types = [MOR_TYPE_AS_STRING[k] for k in self.collected_resource_types] metric_filters = {} for resource_type, filters in iteritems(all_metric_filters): diff --git a/vsphere/datadog_checks/vsphere/data/conf.yaml.example b/vsphere/datadog_checks/vsphere/data/conf.yaml.example index e1b06ab8f9438..8aca89a0250c1 100644 --- a/vsphere/datadog_checks/vsphere/data/conf.yaml.example +++ b/vsphere/datadog_checks/vsphere/data/conf.yaml.example @@ -101,7 +101,7 @@ instances: ## you can choose which metric you want to collect using a list of regex. ## If you do not specify a regex for the resource, all metrics will be collected. ## See https://github.com/DataDog/integrations-core/blob/master/vsphere/datadog_checks/vsphere/metrics.py - ## for the list of collected metrics (do not prefix then with `vsphere`) + ## for the list of collected metrics (do not prefix them with `vsphere`) ## Note1: 'datastore', 'datacenter' and 'cluster' filters are ignored when collecting realtime metrics as those ## resources do not have realtime metrics ## Note2: 'vm' and 'host' filters are ignored when collecting historical metrics as those resources do no @@ -119,6 +119,25 @@ instances: # cluster: # - # Only possible with "collection_type: historical" + ## @param collect_per_instance_filters - object - optional - default: none + ## For each resource type (vm, host, datastore, datacenter, cluster) to collect, + ## you can choose which metric you want to collect the instance value using a list of regex. + ## See https://github.com/DataDog/integrations-core/blob/master/vsphere/datadog_checks/vsphere/metrics.py + ## for the list of collected metrics (do not prefix them with `vsphere`) + ## /!\ Use with parsimony, collecting per-instance metrics might be very expensive for big environments. + # + # collect_per_instance_filters: + # vm: + # - # Only possible with "collection_type: realtime" + # host: + # - # Only possible with "collection_type: realtime" + # datastore: + # - # Only possible with "collection_type: historical" + # datacenter: + # - # Only possible with "collection_type: historical" + # cluster: + # - # Only possible with "collection_type: historical" + ## @param use_guest_hostname - boolean - optional - default: false ## If true, the check will use the guest hostname for VMs instead of the VM name ## This requires the VM to have VMware tools installed in it. If the guest hostname is diff --git a/vsphere/datadog_checks/vsphere/metrics.py b/vsphere/datadog_checks/vsphere/metrics.py index 9706c1aa3ba6c..4b839e5d81c13 100644 --- a/vsphere/datadog_checks/vsphere/metrics.py +++ b/vsphere/datadog_checks/vsphere/metrics.py @@ -77,551 +77,539 @@ } # All metrics that can be collected from VirtualMachines. -# The table maps a dd-formatted metric_name to a tuple containing: -# (collection_level, per_instance_collection_level, (optional)is_available_per_instance) -# Note: Only the keys of the dict are used, but the values can be useful later -# to decide how and when to collect per-instance level metrics. VM_METRICS = { - 'cpu.costop.sum': (2, 3), - 'cpu.demand.avg': (2, 3), - 'cpu.demandEntitlementRatio.latest': (4, 4), - 'cpu.entitlement.latest': (2, 3), - 'cpu.idle.sum': (2, 3, True), - 'cpu.latency.avg': (2, 3), - 'cpu.maxlimited.sum': (2, 3, True), - 'cpu.overlap.sum': (3, 3, True), - 'cpu.readiness.avg': (4, 4), - 'cpu.ready.sum': (1, 3), - 'cpu.run.sum': (2, 3, True), - 'cpu.swapwait.sum': (3, 3), - 'cpu.system.sum': (3, 3, True), - 'cpu.usage.avg': (1, 3, True), - 'cpu.usage.max': (4, 4, True), - 'cpu.usage.min': (4, 4, True), - 'cpu.usage.raw': (4, 4, True), - 'cpu.usagemhz.avg': (1, 3), - 'cpu.usagemhz.max': (4, 4), - 'cpu.usagemhz.min': (4, 4), - 'cpu.usagemhz.raw': (4, 4), - 'cpu.used.sum': (3, 3, True), - 'cpu.wait.sum': (3, 3), - 'datastore.maxTotalLatency.latest': (3, 3), - 'datastore.numberReadAveraged.avg': (1, 3), - 'datastore.numberWriteAveraged.avg': (1, 3), - 'datastore.read.avg': (2, 2, True), - 'datastore.totalReadLatency.avg': (1, 3, True), - 'datastore.totalWriteLatency.avg': (1, 3, True), - 'datastore.write.avg': (2, 2, True), - 'disk.busResets.sum': (2, 3, True), - 'disk.commands.sum': (2, 3, True), - 'disk.commandsAborted.sum': (2, 3, True), - 'disk.commandsAveraged.avg': (2, 3, True), - 'disk.maxTotalLatency.latest': (1, 3), - 'disk.numberRead.sum': (3, 3, True), - 'disk.numberReadAveraged.avg': (1, 3), - 'disk.numberWrite.sum': (3, 3, True), - 'disk.numberWriteAveraged.avg': (1, 3), - 'disk.read.avg': (2, 3, True), - 'disk.usage.avg': (1, 3), - 'disk.usage.max': (4, 4), - 'disk.usage.min': (4, 4), - 'disk.usage.raw': (4, 4), - 'disk.write.avg': (2, 3, True), - 'hbr.hbrNetRx.avg': (4, 4), - 'hbr.hbrNetTx.avg': (4, 4), - 'mem.active.avg': (2, 3), - 'mem.active.max': (4, 4), - 'mem.active.min': (4, 4), - 'mem.active.raw': (4, 4), - 'mem.activewrite.avg': (2, 3), - 'mem.compressed.avg': (2, 3), - 'mem.compressionRate.avg': (2, 3), - 'mem.consumed.avg': (1, 3), - 'mem.consumed.max': (4, 4), - 'mem.consumed.min': (4, 4), - 'mem.consumed.raw': (4, 4), - 'mem.decompressionRate.avg': (2, 3), - 'mem.entitlement.avg': (2, 3), - 'mem.granted.avg': (2, 3), - 'mem.granted.max': (4, 4), - 'mem.granted.min': (4, 4), - 'mem.granted.raw': (4, 4), - 'mem.latency.avg': (2, 3), - 'mem.llSwapInRate.avg': (2, 3), - 'mem.llSwapOutRate.avg': (2, 3), - 'mem.llSwapUsed.avg': (4, 4), - 'mem.llSwapUsed.max': (4, 4), - 'mem.llSwapUsed.min': (4, 4), - 'mem.llSwapUsed.raw': (4, 4), - 'mem.overhead.avg': (1, 1), - 'mem.overhead.max': (4, 4), - 'mem.overhead.min': (4, 4), - 'mem.overhead.raw': (4, 4), - 'mem.overheadMax.avg': (2, 3), - 'mem.overheadTouched.avg': (4, 4), - 'mem.shared.avg': (2, 3), - 'mem.shared.max': (4, 4), - 'mem.shared.min': (4, 4), - 'mem.shared.raw': (4, 4), - 'mem.swapin.avg': (2, 3), - 'mem.swapin.max': (4, 4), - 'mem.swapin.min': (4, 4), - 'mem.swapin.raw': (4, 4), - 'mem.swapinRate.avg': (1, 3), - 'mem.swapout.avg': (2, 3), - 'mem.swapout.max': (4, 4), - 'mem.swapout.min': (4, 4), - 'mem.swapout.raw': (4, 4), - 'mem.swapoutRate.avg': (1, 3), - 'mem.swapped.avg': (2, 3), - 'mem.swapped.max': (4, 4), - 'mem.swapped.min': (4, 4), - 'mem.swapped.raw': (4, 4), - 'mem.swaptarget.avg': (2, 3), - 'mem.swaptarget.max': (4, 4), - 'mem.swaptarget.min': (4, 4), - 'mem.swaptarget.raw': (4, 4), - 'mem.usage.avg': (1, 3), - 'mem.usage.max': (4, 4), - 'mem.usage.min': (4, 4), - 'mem.usage.raw': (4, 4), - 'mem.vmmemctl.avg': (1, 3), - 'mem.vmmemctl.max': (4, 4), - 'mem.vmmemctl.min': (4, 4), - 'mem.vmmemctl.raw': (4, 4), - 'mem.vmmemctltarget.avg': (2, 3), - 'mem.vmmemctltarget.max': (4, 4), - 'mem.vmmemctltarget.min': (4, 4), - 'mem.vmmemctltarget.raw': (4, 4), - 'mem.zero.avg': (2, 3), - 'mem.zero.max': (4, 4), - 'mem.zero.min': (4, 4), - 'mem.zero.raw': (4, 4), - 'mem.zipSaved.latest': (2, 3), - 'mem.zipped.latest': (2, 3), - 'net.broadcastRx.sum': (2, 3, True), - 'net.broadcastTx.sum': (2, 3, True), - 'net.bytesRx.avg': (2, 3, True), - 'net.bytesTx.avg': (2, 3, True), - 'net.droppedRx.sum': (2, 3, True), - 'net.droppedTx.sum': (2, 3, True), - 'net.multicastRx.sum': (2, 3, True), - 'net.multicastTx.sum': (2, 3, True), - 'net.packetsRx.sum': (2, 3, True), - 'net.packetsTx.sum': (2, 3, True), - 'net.pnicBytesRx.avg': (4, 4, True), - 'net.pnicBytesTx.avg': (4, 4, True), - 'net.received.avg': (2, 3, True), - 'net.transmitted.avg': (2, 3, True), - 'net.usage.avg': (1, 3, True), - 'net.usage.max': (4, 4, True), - 'net.usage.min': (4, 4, True), - 'net.usage.raw': (4, 4, True), - 'power.energy.sum': (3, 3), - 'power.power.avg': (2, 3), - 'rescpu.actav1.latest': (3, 3), - 'rescpu.actav15.latest': (3, 3), - 'rescpu.actav5.latest': (3, 3), - 'rescpu.actpk1.latest': (3, 3), - 'rescpu.actpk15.latest': (3, 3), - 'rescpu.actpk5.latest': (3, 3), - 'rescpu.maxLimited1.latest': (3, 3), - 'rescpu.maxLimited15.latest': (3, 3), - 'rescpu.maxLimited5.latest': (3, 3), - 'rescpu.runav1.latest': (3, 3), - 'rescpu.runav15.latest': (3, 3), - 'rescpu.runav5.latest': (3, 3), - 'rescpu.runpk1.latest': (3, 3), - 'rescpu.runpk15.latest': (3, 3), - 'rescpu.runpk5.latest': (3, 3), - 'rescpu.sampleCount.latest': (3, 3), - 'rescpu.samplePeriod.latest': (3, 3), - 'sys.heartbeat.latest': (4, 4), - 'sys.heartbeat.sum': (1, 3), - 'sys.osUptime.latest': (4, 4), - 'sys.uptime.latest': (1, 3), - 'virtualDisk.busResets.sum': (2, 4, True), - 'virtualDisk.commandsAborted.sum': (2, 4, True), - 'virtualDisk.largeSeeks.latest': (4, 4, True), - 'virtualDisk.mediumSeeks.latest': (4, 4, True), - 'virtualDisk.numberReadAveraged.avg': (1, 3, True), - 'virtualDisk.numberWriteAveraged.avg': (1, 3, True), - 'virtualDisk.read.avg': (2, 2, True), - 'virtualDisk.readIOSize.latest': (4, 4, True), - 'virtualDisk.readLatencyUS.latest': (4, 4, True), - 'virtualDisk.readLoadMetric.latest': (2, 2, True), - 'virtualDisk.readOIO.latest': (2, 2, True), - 'virtualDisk.smallSeeks.latest': (4, 4, True), - 'virtualDisk.totalReadLatency.avg': (1, 3, True), - 'virtualDisk.totalWriteLatency.avg': (1, 3, True), - 'virtualDisk.write.avg': (2, 2, True), - 'virtualDisk.writeIOSize.latest': (4, 4, True), - 'virtualDisk.writeLatencyUS.latest': (4, 4, True), - 'virtualDisk.writeLoadMetric.latest': (2, 2, True), - 'virtualDisk.writeOIO.latest': (2, 2, True), + 'cpu.costop.sum', + 'cpu.demand.avg', + 'cpu.demandEntitlementRatio.latest', + 'cpu.entitlement.latest', + 'cpu.idle.sum', + 'cpu.latency.avg', + 'cpu.maxlimited.sum', + 'cpu.overlap.sum', + 'cpu.readiness.avg', + 'cpu.ready.sum', + 'cpu.run.sum', + 'cpu.swapwait.sum', + 'cpu.system.sum', + 'cpu.usage.avg', + 'cpu.usage.max', + 'cpu.usage.min', + 'cpu.usage.raw', + 'cpu.usagemhz.avg', + 'cpu.usagemhz.max', + 'cpu.usagemhz.min', + 'cpu.usagemhz.raw', + 'cpu.used.sum', + 'cpu.wait.sum', + 'datastore.maxTotalLatency.latest', + 'datastore.numberReadAveraged.avg', + 'datastore.numberWriteAveraged.avg', + 'datastore.read.avg', + 'datastore.totalReadLatency.avg', + 'datastore.totalWriteLatency.avg', + 'datastore.write.avg', + 'disk.busResets.sum', + 'disk.commands.sum', + 'disk.commandsAborted.sum', + 'disk.commandsAveraged.avg', + 'disk.maxTotalLatency.latest', + 'disk.numberRead.sum', + 'disk.numberReadAveraged.avg', + 'disk.numberWrite.sum', + 'disk.numberWriteAveraged.avg', + 'disk.read.avg', + 'disk.usage.avg', + 'disk.usage.max', + 'disk.usage.min', + 'disk.usage.raw', + 'disk.write.avg', + 'hbr.hbrNetRx.avg', + 'hbr.hbrNetTx.avg', + 'mem.active.avg', + 'mem.active.max', + 'mem.active.min', + 'mem.active.raw', + 'mem.activewrite.avg', + 'mem.compressed.avg', + 'mem.compressionRate.avg', + 'mem.consumed.avg', + 'mem.consumed.max', + 'mem.consumed.min', + 'mem.consumed.raw', + 'mem.decompressionRate.avg', + 'mem.entitlement.avg', + 'mem.granted.avg', + 'mem.granted.max', + 'mem.granted.min', + 'mem.granted.raw', + 'mem.latency.avg', + 'mem.llSwapInRate.avg', + 'mem.llSwapOutRate.avg', + 'mem.llSwapUsed.avg', + 'mem.llSwapUsed.max', + 'mem.llSwapUsed.min', + 'mem.llSwapUsed.raw', + 'mem.overhead.avg', + 'mem.overhead.max', + 'mem.overhead.min', + 'mem.overhead.raw', + 'mem.overheadMax.avg', + 'mem.overheadTouched.avg', + 'mem.shared.avg', + 'mem.shared.max', + 'mem.shared.min', + 'mem.shared.raw', + 'mem.swapin.avg', + 'mem.swapin.max', + 'mem.swapin.min', + 'mem.swapin.raw', + 'mem.swapinRate.avg', + 'mem.swapout.avg', + 'mem.swapout.max', + 'mem.swapout.min', + 'mem.swapout.raw', + 'mem.swapoutRate.avg', + 'mem.swapped.avg', + 'mem.swapped.max', + 'mem.swapped.min', + 'mem.swapped.raw', + 'mem.swaptarget.avg', + 'mem.swaptarget.max', + 'mem.swaptarget.min', + 'mem.swaptarget.raw', + 'mem.usage.avg', + 'mem.usage.max', + 'mem.usage.min', + 'mem.usage.raw', + 'mem.vmmemctl.avg', + 'mem.vmmemctl.max', + 'mem.vmmemctl.min', + 'mem.vmmemctl.raw', + 'mem.vmmemctltarget.avg', + 'mem.vmmemctltarget.max', + 'mem.vmmemctltarget.min', + 'mem.vmmemctltarget.raw', + 'mem.zero.avg', + 'mem.zero.max', + 'mem.zero.min', + 'mem.zero.raw', + 'mem.zipSaved.latest', + 'mem.zipped.latest', + 'net.broadcastRx.sum', + 'net.broadcastTx.sum', + 'net.bytesRx.avg', + 'net.bytesTx.avg', + 'net.droppedRx.sum', + 'net.droppedTx.sum', + 'net.multicastRx.sum', + 'net.multicastTx.sum', + 'net.packetsRx.sum', + 'net.packetsTx.sum', + 'net.pnicBytesRx.avg', + 'net.pnicBytesTx.avg', + 'net.received.avg', + 'net.transmitted.avg', + 'net.usage.avg', + 'net.usage.max', + 'net.usage.min', + 'net.usage.raw', + 'power.energy.sum', + 'power.power.avg', + 'rescpu.actav1.latest', + 'rescpu.actav15.latest', + 'rescpu.actav5.latest', + 'rescpu.actpk1.latest', + 'rescpu.actpk15.latest', + 'rescpu.actpk5.latest', + 'rescpu.maxLimited1.latest', + 'rescpu.maxLimited15.latest', + 'rescpu.maxLimited5.latest', + 'rescpu.runav1.latest', + 'rescpu.runav15.latest', + 'rescpu.runav5.latest', + 'rescpu.runpk1.latest', + 'rescpu.runpk15.latest', + 'rescpu.runpk5.latest', + 'rescpu.sampleCount.latest', + 'rescpu.samplePeriod.latest', + 'sys.heartbeat.latest', + 'sys.heartbeat.sum', + 'sys.osUptime.latest', + 'sys.uptime.latest', + 'virtualDisk.busResets.sum', + 'virtualDisk.commandsAborted.sum', + 'virtualDisk.largeSeeks.latest', + 'virtualDisk.mediumSeeks.latest', + 'virtualDisk.numberReadAveraged.avg', + 'virtualDisk.numberWriteAveraged.avg', + 'virtualDisk.read.avg', + 'virtualDisk.readIOSize.latest', + 'virtualDisk.readLatencyUS.latest', + 'virtualDisk.readLoadMetric.latest', + 'virtualDisk.readOIO.latest', + 'virtualDisk.smallSeeks.latest', + 'virtualDisk.totalReadLatency.avg', + 'virtualDisk.totalWriteLatency.avg', + 'virtualDisk.write.avg', + 'virtualDisk.writeIOSize.latest', + 'virtualDisk.writeLatencyUS.latest', + 'virtualDisk.writeLoadMetric.latest', + 'virtualDisk.writeOIO.latest', } # All metrics that can be collected from ESXi Hosts. -# The table maps a dd-formatted metric_name to a tuple containing: -# (collection_level, per_instance_collection_level, (optional)is_available_per_instance) HOST_METRICS = { - 'cpu.coreUtilization.avg': (2, 3, True), - 'cpu.coreUtilization.max': (4, 4, True), - 'cpu.coreUtilization.min': (4, 4, True), - 'cpu.coreUtilization.raw': (4, 4, True), - 'cpu.costop.sum': (2, 3), - 'cpu.demand.avg': (2, 3), - 'cpu.idle.sum': (2, 3, True), - 'cpu.latency.avg': (2, 3), - 'cpu.readiness.avg': (4, 4), - 'cpu.ready.sum': (1, 3), - 'cpu.reservedCapacity.avg': (2, 3), - 'cpu.swapwait.sum': (3, 3), - 'cpu.totalCapacity.avg': (2, 3), - 'cpu.usage.avg': (1, 3, True), - 'cpu.usage.max': (4, 4, True), - 'cpu.usage.min': (4, 4, True), - 'cpu.usage.raw': (4, 4, True), - 'cpu.usagemhz.avg': (1, 3), - 'cpu.usagemhz.max': (4, 4), - 'cpu.usagemhz.min': (4, 4), - 'cpu.usagemhz.raw': (4, 4), - 'cpu.used.sum': (3, 3, True), - 'cpu.utilization.avg': (2, 3, True), - 'cpu.utilization.max': (4, 4, True), - 'cpu.utilization.min': (4, 4, True), - 'cpu.utilization.raw': (4, 4, True), - 'cpu.wait.sum': (3, 3), - 'datastore.datastoreIops.avg': (1, 3, True), - 'datastore.datastoreMaxQueueDepth.latest': (1, 3, True), - 'datastore.datastoreNormalReadLatency.latest': (2, 2, True), - 'datastore.datastoreNormalWriteLatency.latest': (2, 2, True), - 'datastore.datastoreReadBytes.latest': (2, 2, True), - 'datastore.datastoreReadIops.latest': (1, 3, True), - 'datastore.datastoreReadLoadMetric.latest': (4, 4, True), - 'datastore.datastoreReadOIO.latest': (1, 3, True), - 'datastore.datastoreVMObservedLatency.latest': (1, 3, True), - 'datastore.datastoreWriteBytes.latest': (2, 2, True), - 'datastore.datastoreWriteIops.latest': (1, 3, True), - 'datastore.datastoreWriteLoadMetric.latest': (4, 4, True), - 'datastore.datastoreWriteOIO.latest': (1, 3, True), - 'datastore.maxTotalLatency.latest': (3, 3), - 'datastore.numberReadAveraged.avg': (1, 3), - 'datastore.numberWriteAveraged.avg': (1, 3), - 'datastore.read.avg': (2, 2, True), - 'datastore.siocActiveTimePercentage.avg': (1, 3, True), - 'datastore.sizeNormalizedDatastoreLatency.avg': (1, 3, True), - 'datastore.totalReadLatency.avg': (1, 3, True), - 'datastore.totalWriteLatency.avg': (1, 3, True), - 'datastore.write.avg': (2, 2, True), - 'disk.busResets.sum': (2, 3, True), - 'disk.commands.sum': (2, 3, True), - 'disk.commandsAborted.sum': (2, 3, True), - 'disk.commandsAveraged.avg': (2, 3, True), - 'disk.deviceLatency.avg': (1, 3, True), - 'disk.deviceReadLatency.avg': (2, 3, True), - 'disk.deviceWriteLatency.avg': (2, 3, True), - 'disk.kernelLatency.avg': (2, 3, True), - 'disk.kernelReadLatency.avg': (2, 3, True), - 'disk.kernelWriteLatency.avg': (2, 3, True), - 'disk.maxQueueDepth.avg': (1, 3, True), - 'disk.maxTotalLatency.latest': (1, 3), - 'disk.numberRead.sum': (3, 3, True), - 'disk.numberReadAveraged.avg': (1, 3), - 'disk.numberWrite.sum': (3, 3, True), - 'disk.numberWriteAveraged.avg': (1, 3), - 'disk.queueLatency.avg': (2, 3, True), - 'disk.queueReadLatency.avg': (2, 3, True), - 'disk.queueWriteLatency.avg': (2, 3, True), - 'disk.read.avg': (2, 3, True), - 'disk.scsiReservationCnflctsPct.avg': (4, 4, True), - 'disk.scsiReservationConflicts.sum': (2, 2, True), - 'disk.totalLatency.avg': (3, 3, True), - 'disk.totalReadLatency.avg': (2, 3, True), - 'disk.totalWriteLatency.avg': (2, 3, True), - 'disk.usage.avg': (1, 3), - 'disk.usage.max': (4, 4), - 'disk.usage.min': (4, 4), - 'disk.usage.raw': (4, 4), - 'disk.write.avg': (2, 3, True), - 'hbr.hbrNetRx.avg': (4, 4), - 'hbr.hbrNetTx.avg': (4, 4), - 'hbr.hbrNumVms.avg': (4, 4), - 'mem.active.avg': (2, 3), - 'mem.active.max': (4, 4), - 'mem.active.min': (4, 4), - 'mem.active.raw': (4, 4), - 'mem.activewrite.avg': (2, 3), - 'mem.compressed.avg': (2, 3), - 'mem.compressionRate.avg': (2, 3), - 'mem.consumed.avg': (1, 3), - 'mem.consumed.max': (4, 4), - 'mem.consumed.min': (4, 4), - 'mem.consumed.raw': (4, 4), - 'mem.consumed.userworlds.avg': (2, 4), - 'mem.consumed.vms.avg': (2, 4), - 'mem.decompressionRate.avg': (2, 3), - 'mem.granted.avg': (2, 3), - 'mem.granted.max': (4, 4), - 'mem.granted.min': (4, 4), - 'mem.granted.raw': (4, 4), - 'mem.heap.avg': (4, 4), - 'mem.heap.max': (4, 4), - 'mem.heap.min': (4, 4), - 'mem.heap.raw': (4, 4), - 'mem.heapfree.avg': (4, 4), - 'mem.heapfree.max': (4, 4), - 'mem.heapfree.min': (4, 4), - 'mem.heapfree.raw': (4, 4), - 'mem.latency.avg': (2, 3), - 'mem.llSwapIn.avg': (4, 4), - 'mem.llSwapIn.max': (4, 4), - 'mem.llSwapIn.min': (4, 4), - 'mem.llSwapIn.raw': (4, 4), - 'mem.llSwapInRate.avg': (2, 3), - 'mem.llSwapOut.avg': (4, 4), - 'mem.llSwapOut.max': (4, 4), - 'mem.llSwapOut.min': (4, 4), - 'mem.llSwapOut.raw': (4, 4), - 'mem.llSwapOutRate.avg': (2, 3), - 'mem.llSwapUsed.avg': (4, 4), - 'mem.llSwapUsed.max': (4, 4), - 'mem.llSwapUsed.min': (4, 4), - 'mem.llSwapUsed.raw': (4, 4), - 'mem.lowfreethreshold.avg': (2, 3), - 'mem.overhead.avg': (1, 1), - 'mem.overhead.max': (4, 4), - 'mem.overhead.min': (4, 4), - 'mem.overhead.raw': (4, 4), - 'mem.reservedCapacity.avg': (2, 3), - 'mem.shared.avg': (2, 3), - 'mem.shared.max': (4, 4), - 'mem.shared.min': (4, 4), - 'mem.shared.raw': (4, 4), - 'mem.sharedcommon.avg': (2, 3), - 'mem.sharedcommon.max': (4, 4), - 'mem.sharedcommon.min': (4, 4), - 'mem.sharedcommon.raw': (4, 4), - 'mem.state.latest': (2, 3), - 'mem.swapin.avg': (2, 3), - 'mem.swapin.max': (4, 4), - 'mem.swapin.min': (4, 4), - 'mem.swapin.raw': (4, 4), - 'mem.swapinRate.avg': (1, 3), - 'mem.swapout.avg': (2, 3), - 'mem.swapout.max': (4, 4), - 'mem.swapout.min': (4, 4), - 'mem.swapout.raw': (4, 4), - 'mem.swapoutRate.avg': (1, 3), - 'mem.swapused.avg': (2, 3), - 'mem.swapused.max': (4, 4), - 'mem.swapused.min': (4, 4), - 'mem.swapused.raw': (4, 4), - 'mem.sysUsage.avg': (2, 3), - 'mem.sysUsage.max': (4, 4), - 'mem.sysUsage.min': (4, 4), - 'mem.sysUsage.raw': (4, 4), - 'mem.totalCapacity.avg': (2, 3), - 'mem.unreserved.avg': (2, 3), - 'mem.unreserved.max': (4, 4), - 'mem.unreserved.min': (4, 4), - 'mem.unreserved.raw': (4, 4), - 'mem.usage.avg': (1, 3), - 'mem.usage.max': (4, 4), - 'mem.usage.min': (4, 4), - 'mem.usage.raw': (4, 4), - 'mem.vmfs.pbc.capMissRatio.latest': (4, 4), - 'mem.vmfs.pbc.overhead.latest': (4, 4), - 'mem.vmfs.pbc.size.latest': (4, 4), - 'mem.vmfs.pbc.sizeMax.latest': (4, 4), - 'mem.vmfs.pbc.workingSet.latest': (4, 4), - 'mem.vmfs.pbc.workingSetMax.latest': (4, 4), - 'mem.vmmemctl.avg': (1, 3), - 'mem.vmmemctl.max': (4, 4), - 'mem.vmmemctl.min': (4, 4), - 'mem.vmmemctl.raw': (4, 4), - 'mem.zero.avg': (2, 3), - 'mem.zero.max': (4, 4), - 'mem.zero.min': (4, 4), - 'mem.zero.raw': (4, 4), - 'net.broadcastRx.sum': (2, 3, True), - 'net.broadcastTx.sum': (2, 3, True), - 'net.bytesRx.avg': (2, 3, True), - 'net.bytesTx.avg': (2, 3, True), - 'net.droppedRx.sum': (2, 3, True), - 'net.droppedTx.sum': (2, 3, True), - 'net.errorsRx.sum': (2, 3, True), - 'net.errorsTx.sum': (2, 3, True), - 'net.multicastRx.sum': (2, 3, True), - 'net.multicastTx.sum': (2, 3, True), - 'net.packetsRx.sum': (2, 3, True), - 'net.packetsTx.sum': (2, 3, True), - 'net.received.avg': (2, 3, True), - 'net.transmitted.avg': (2, 3, True), - 'net.unknownProtos.sum': (2, 3, True), - 'net.usage.avg': (1, 3, True), - 'net.usage.max': (4, 4, True), - 'net.usage.min': (4, 4, True), - 'net.usage.raw': (4, 4, True), - 'power.energy.sum': (3, 3), - 'power.power.avg': (2, 3), - 'power.powerCap.avg': (3, 3), - 'rescpu.actav1.latest': (3, 3), - 'rescpu.actav15.latest': (3, 3), - 'rescpu.actav5.latest': (3, 3), - 'rescpu.actpk1.latest': (3, 3), - 'rescpu.actpk15.latest': (3, 3), - 'rescpu.actpk5.latest': (3, 3), - 'rescpu.maxLimited1.latest': (3, 3), - 'rescpu.maxLimited15.latest': (3, 3), - 'rescpu.maxLimited5.latest': (3, 3), - 'rescpu.runav1.latest': (3, 3), - 'rescpu.runav15.latest': (3, 3), - 'rescpu.runav5.latest': (3, 3), - 'rescpu.runpk1.latest': (3, 3), - 'rescpu.runpk15.latest': (3, 3), - 'rescpu.runpk5.latest': (3, 3), - 'rescpu.sampleCount.latest': (3, 3), - 'rescpu.samplePeriod.latest': (3, 3), - 'storageAdapter.commandsAveraged.avg': (2, 2, True), - 'storageAdapter.maxTotalLatency.latest': (3, 3), - 'storageAdapter.numberReadAveraged.avg': (2, 2, True), - 'storageAdapter.numberWriteAveraged.avg': (2, 2, True), - 'storageAdapter.outstandingIOs.avg': (2, 2, True), - 'storageAdapter.queueDepth.avg': (2, 2, True), - 'storageAdapter.queueLatency.avg': (2, 2, True), - 'storageAdapter.queued.avg': (2, 2, True), - 'storageAdapter.read.avg': (2, 2, True), - 'storageAdapter.totalReadLatency.avg': (2, 2, True), - 'storageAdapter.totalWriteLatency.avg': (2, 2, True), - 'storageAdapter.write.avg': (2, 2, True), - 'storagePath.busResets.sum': (2, 3, True), - 'storagePath.commandsAborted.sum': (2, 3, True), - 'storagePath.commandsAveraged.avg': (3, 3, True), - 'storagePath.maxTotalLatency.latest': (3, 3), - 'storagePath.numberReadAveraged.avg': (3, 3, True), - 'storagePath.numberWriteAveraged.avg': (3, 3, True), - 'storagePath.read.avg': (3, 3, True), - 'storagePath.totalReadLatency.avg': (3, 3, True), - 'storagePath.totalWriteLatency.avg': (3, 3, True), - 'storagePath.write.avg': (3, 3, True), - 'sys.resourceCpuAct1.latest': (3, 3, True), - 'sys.resourceCpuAct5.latest': (3, 3, True), - 'sys.resourceCpuAllocMax.latest': (3, 3, True), - 'sys.resourceCpuAllocMin.latest': (3, 3, True), - 'sys.resourceCpuAllocShares.latest': (3, 3, True), - 'sys.resourceCpuMaxLimited1.latest': (3, 3, True), - 'sys.resourceCpuMaxLimited5.latest': (3, 3, True), - 'sys.resourceCpuRun1.latest': (3, 3, True), - 'sys.resourceCpuRun5.latest': (3, 3, True), - 'sys.resourceCpuUsage.avg': (3, 3, True), - 'sys.resourceCpuUsage.max': (4, 4, True), - 'sys.resourceCpuUsage.min': (4, 4, True), - 'sys.resourceCpuUsage.raw': (4, 4, True), - 'sys.resourceFdUsage.latest': (4, 4, True), - 'sys.resourceMemAllocMax.latest': (3, 3, True), - 'sys.resourceMemAllocMin.latest': (3, 3, True), - 'sys.resourceMemAllocShares.latest': (3, 3, True), - 'sys.resourceMemConsumed.latest': (4, 4, True), - 'sys.resourceMemCow.latest': (3, 3, True), - 'sys.resourceMemMapped.latest': (3, 3, True), - 'sys.resourceMemOverhead.latest': (3, 3, True), - 'sys.resourceMemShared.latest': (3, 3, True), - 'sys.resourceMemSwapped.latest': (3, 3, True), - 'sys.resourceMemTouched.latest': (3, 3, True), - 'sys.resourceMemZero.latest': (3, 3, True), - 'sys.uptime.latest': (1, 3), - 'virtualDisk.busResets.sum': (2, 4, True), - 'virtualDisk.commandsAborted.sum': (2, 4, True), + 'cpu.coreUtilization.avg', + 'cpu.coreUtilization.max', + 'cpu.coreUtilization.min', + 'cpu.coreUtilization.raw', + 'cpu.costop.sum', + 'cpu.demand.avg', + 'cpu.idle.sum', + 'cpu.latency.avg', + 'cpu.readiness.avg', + 'cpu.ready.sum', + 'cpu.reservedCapacity.avg', + 'cpu.swapwait.sum', + 'cpu.totalCapacity.avg', + 'cpu.usage.avg', + 'cpu.usage.max', + 'cpu.usage.min', + 'cpu.usage.raw', + 'cpu.usagemhz.avg', + 'cpu.usagemhz.max', + 'cpu.usagemhz.min', + 'cpu.usagemhz.raw', + 'cpu.used.sum', + 'cpu.utilization.avg', + 'cpu.utilization.max', + 'cpu.utilization.min', + 'cpu.utilization.raw', + 'cpu.wait.sum', + 'datastore.datastoreIops.avg', + 'datastore.datastoreMaxQueueDepth.latest', + 'datastore.datastoreNormalReadLatency.latest', + 'datastore.datastoreNormalWriteLatency.latest', + 'datastore.datastoreReadBytes.latest', + 'datastore.datastoreReadIops.latest', + 'datastore.datastoreReadLoadMetric.latest', + 'datastore.datastoreReadOIO.latest', + 'datastore.datastoreVMObservedLatency.latest', + 'datastore.datastoreWriteBytes.latest', + 'datastore.datastoreWriteIops.latest', + 'datastore.datastoreWriteLoadMetric.latest', + 'datastore.datastoreWriteOIO.latest', + 'datastore.maxTotalLatency.latest', + 'datastore.numberReadAveraged.avg', + 'datastore.numberWriteAveraged.avg', + 'datastore.read.avg', + 'datastore.siocActiveTimePercentage.avg', + 'datastore.sizeNormalizedDatastoreLatency.avg', + 'datastore.totalReadLatency.avg', + 'datastore.totalWriteLatency.avg', + 'datastore.write.avg', + 'disk.busResets.sum', + 'disk.commands.sum', + 'disk.commandsAborted.sum', + 'disk.commandsAveraged.avg', + 'disk.deviceLatency.avg', + 'disk.deviceReadLatency.avg', + 'disk.deviceWriteLatency.avg', + 'disk.kernelLatency.avg', + 'disk.kernelReadLatency.avg', + 'disk.kernelWriteLatency.avg', + 'disk.maxQueueDepth.avg', + 'disk.maxTotalLatency.latest', + 'disk.numberRead.sum', + 'disk.numberReadAveraged.avg', + 'disk.numberWrite.sum', + 'disk.numberWriteAveraged.avg', + 'disk.queueLatency.avg', + 'disk.queueReadLatency.avg', + 'disk.queueWriteLatency.avg', + 'disk.read.avg', + 'disk.scsiReservationCnflctsPct.avg', + 'disk.scsiReservationConflicts.sum', + 'disk.totalLatency.avg', + 'disk.totalReadLatency.avg', + 'disk.totalWriteLatency.avg', + 'disk.usage.avg', + 'disk.usage.max', + 'disk.usage.min', + 'disk.usage.raw', + 'disk.write.avg', + 'hbr.hbrNetRx.avg', + 'hbr.hbrNetTx.avg', + 'hbr.hbrNumVms.avg', + 'mem.active.avg', + 'mem.active.max', + 'mem.active.min', + 'mem.active.raw', + 'mem.activewrite.avg', + 'mem.compressed.avg', + 'mem.compressionRate.avg', + 'mem.consumed.avg', + 'mem.consumed.max', + 'mem.consumed.min', + 'mem.consumed.raw', + 'mem.consumed.userworlds.avg', + 'mem.consumed.vms.avg', + 'mem.decompressionRate.avg', + 'mem.granted.avg', + 'mem.granted.max', + 'mem.granted.min', + 'mem.granted.raw', + 'mem.heap.avg', + 'mem.heap.max', + 'mem.heap.min', + 'mem.heap.raw', + 'mem.heapfree.avg', + 'mem.heapfree.max', + 'mem.heapfree.min', + 'mem.heapfree.raw', + 'mem.latency.avg', + 'mem.llSwapIn.avg', + 'mem.llSwapIn.max', + 'mem.llSwapIn.min', + 'mem.llSwapIn.raw', + 'mem.llSwapInRate.avg', + 'mem.llSwapOut.avg', + 'mem.llSwapOut.max', + 'mem.llSwapOut.min', + 'mem.llSwapOut.raw', + 'mem.llSwapOutRate.avg', + 'mem.llSwapUsed.avg', + 'mem.llSwapUsed.max', + 'mem.llSwapUsed.min', + 'mem.llSwapUsed.raw', + 'mem.lowfreethreshold.avg', + 'mem.overhead.avg', + 'mem.overhead.max', + 'mem.overhead.min', + 'mem.overhead.raw', + 'mem.reservedCapacity.avg', + 'mem.shared.avg', + 'mem.shared.max', + 'mem.shared.min', + 'mem.shared.raw', + 'mem.sharedcommon.avg', + 'mem.sharedcommon.max', + 'mem.sharedcommon.min', + 'mem.sharedcommon.raw', + 'mem.state.latest', + 'mem.swapin.avg', + 'mem.swapin.max', + 'mem.swapin.min', + 'mem.swapin.raw', + 'mem.swapinRate.avg', + 'mem.swapout.avg', + 'mem.swapout.max', + 'mem.swapout.min', + 'mem.swapout.raw', + 'mem.swapoutRate.avg', + 'mem.swapused.avg', + 'mem.swapused.max', + 'mem.swapused.min', + 'mem.swapused.raw', + 'mem.sysUsage.avg', + 'mem.sysUsage.max', + 'mem.sysUsage.min', + 'mem.sysUsage.raw', + 'mem.totalCapacity.avg', + 'mem.unreserved.avg', + 'mem.unreserved.max', + 'mem.unreserved.min', + 'mem.unreserved.raw', + 'mem.usage.avg', + 'mem.usage.max', + 'mem.usage.min', + 'mem.usage.raw', + 'mem.vmfs.pbc.capMissRatio.latest', + 'mem.vmfs.pbc.overhead.latest', + 'mem.vmfs.pbc.size.latest', + 'mem.vmfs.pbc.sizeMax.latest', + 'mem.vmfs.pbc.workingSet.latest', + 'mem.vmfs.pbc.workingSetMax.latest', + 'mem.vmmemctl.avg', + 'mem.vmmemctl.max', + 'mem.vmmemctl.min', + 'mem.vmmemctl.raw', + 'mem.zero.avg', + 'mem.zero.max', + 'mem.zero.min', + 'mem.zero.raw', + 'net.broadcastRx.sum', + 'net.broadcastTx.sum', + 'net.bytesRx.avg', + 'net.bytesTx.avg', + 'net.droppedRx.sum', + 'net.droppedTx.sum', + 'net.errorsRx.sum', + 'net.errorsTx.sum', + 'net.multicastRx.sum', + 'net.multicastTx.sum', + 'net.packetsRx.sum', + 'net.packetsTx.sum', + 'net.received.avg', + 'net.transmitted.avg', + 'net.unknownProtos.sum', + 'net.usage.avg', + 'net.usage.max', + 'net.usage.min', + 'net.usage.raw', + 'power.energy.sum', + 'power.power.avg', + 'power.powerCap.avg', + 'rescpu.actav1.latest', + 'rescpu.actav15.latest', + 'rescpu.actav5.latest', + 'rescpu.actpk1.latest', + 'rescpu.actpk15.latest', + 'rescpu.actpk5.latest', + 'rescpu.maxLimited1.latest', + 'rescpu.maxLimited15.latest', + 'rescpu.maxLimited5.latest', + 'rescpu.runav1.latest', + 'rescpu.runav15.latest', + 'rescpu.runav5.latest', + 'rescpu.runpk1.latest', + 'rescpu.runpk15.latest', + 'rescpu.runpk5.latest', + 'rescpu.sampleCount.latest', + 'rescpu.samplePeriod.latest', + 'storageAdapter.commandsAveraged.avg', + 'storageAdapter.maxTotalLatency.latest', + 'storageAdapter.numberReadAveraged.avg', + 'storageAdapter.numberWriteAveraged.avg', + 'storageAdapter.outstandingIOs.avg', + 'storageAdapter.queueDepth.avg', + 'storageAdapter.queueLatency.avg', + 'storageAdapter.queued.avg', + 'storageAdapter.read.avg', + 'storageAdapter.totalReadLatency.avg', + 'storageAdapter.totalWriteLatency.avg', + 'storageAdapter.write.avg', + 'storagePath.busResets.sum', + 'storagePath.commandsAborted.sum', + 'storagePath.commandsAveraged.avg', + 'storagePath.maxTotalLatency.latest', + 'storagePath.numberReadAveraged.avg', + 'storagePath.numberWriteAveraged.avg', + 'storagePath.read.avg', + 'storagePath.totalReadLatency.avg', + 'storagePath.totalWriteLatency.avg', + 'storagePath.write.avg', + 'sys.resourceCpuAct1.latest', + 'sys.resourceCpuAct5.latest', + 'sys.resourceCpuAllocMax.latest', + 'sys.resourceCpuAllocMin.latest', + 'sys.resourceCpuAllocShares.latest', + 'sys.resourceCpuMaxLimited1.latest', + 'sys.resourceCpuMaxLimited5.latest', + 'sys.resourceCpuRun1.latest', + 'sys.resourceCpuRun5.latest', + 'sys.resourceCpuUsage.avg', + 'sys.resourceCpuUsage.max', + 'sys.resourceCpuUsage.min', + 'sys.resourceCpuUsage.raw', + 'sys.resourceFdUsage.latest', + 'sys.resourceMemAllocMax.latest', + 'sys.resourceMemAllocMin.latest', + 'sys.resourceMemAllocShares.latest', + 'sys.resourceMemConsumed.latest', + 'sys.resourceMemCow.latest', + 'sys.resourceMemMapped.latest', + 'sys.resourceMemOverhead.latest', + 'sys.resourceMemShared.latest', + 'sys.resourceMemSwapped.latest', + 'sys.resourceMemTouched.latest', + 'sys.resourceMemZero.latest', + 'sys.uptime.latest', + 'virtualDisk.busResets.sum', + 'virtualDisk.commandsAborted.sum', } # All metrics that can be collected from Datastores. -# The table maps a dd-formatted metric_name to a tuple containing: -# (collection_level, per_instance_collection_level, (optional)is_available_per_instance) DATASTORE_METRICS = { - 'datastore.busResets.sum': (2, 2, True), - 'datastore.commandsAborted.sum': (2, 2, True), - 'datastore.numberReadAveraged.avg': (1, 3), - 'datastore.numberWriteAveraged.avg': (1, 3), - 'datastore.throughput.contention.avg': (4, 4, True), - 'datastore.throughput.usage.avg': (4, 4, True), - 'disk.busResets.sum': (2, 3, True), - 'disk.capacity.contention.avg': (4, 4), - 'disk.capacity.latest': (1, 3), - 'disk.capacity.provisioned.avg': (4, 4), - 'disk.capacity.usage.avg': (4, 4, True), - 'disk.numberReadAveraged.avg': (1, 3), - 'disk.numberWriteAveraged.avg': (1, 3), - 'disk.provisioned.latest': (1, 1, True), - 'disk.unshared.latest': (1, 1, True), - 'disk.used.latest': (1, 1, True), + 'datastore.busResets.sum', + 'datastore.commandsAborted.sum', + 'datastore.numberReadAveraged.avg', + 'datastore.numberWriteAveraged.avg', + 'datastore.throughput.contention.avg', + 'datastore.throughput.usage.avg', + 'disk.busResets.sum', + 'disk.capacity.contention.avg', + 'disk.capacity.latest', + 'disk.capacity.provisioned.avg', + 'disk.capacity.usage.avg', + 'disk.numberReadAveraged.avg', + 'disk.numberWriteAveraged.avg', + 'disk.provisioned.latest', + 'disk.unshared.latest', + 'disk.used.latest', } # All metrics that can be collected from Datacenters. -# The table maps a dd-formatted metric_name to a tuple containing: -# (collection_level, per_instance_collection_level, (optional)is_available_per_instance) DATACENTER_METRICS = { - 'vmop.numChangeDS.latest': (1, 3), - 'vmop.numChangeHost.latest': (1, 3), - 'vmop.numChangeHostDS.latest': (1, 3), - 'vmop.numClone.latest': (1, 3), - 'vmop.numCreate.latest': (1, 3), - 'vmop.numDeploy.latest': (1, 3), - 'vmop.numDestroy.latest': (1, 3), - 'vmop.numPoweroff.latest': (1, 3), - 'vmop.numPoweron.latest': (1, 3), - 'vmop.numRebootGuest.latest': (1, 3), - 'vmop.numReconfigure.latest': (1, 3), - 'vmop.numRegister.latest': (1, 3), - 'vmop.numReset.latest': (1, 3), - 'vmop.numSVMotion.latest': (1, 3), - 'vmop.numShutdownGuest.latest': (1, 3), - 'vmop.numStandbyGuest.latest': (1, 3), - 'vmop.numSuspend.latest': (1, 3), - 'vmop.numUnregister.latest': (1, 3), - 'vmop.numVMotion.latest': (1, 3), - 'vmop.numXVMotion.latest': (1, 3), + 'vmop.numChangeDS.latest', + 'vmop.numChangeHost.latest', + 'vmop.numChangeHostDS.latest', + 'vmop.numClone.latest', + 'vmop.numCreate.latest', + 'vmop.numDeploy.latest', + 'vmop.numDestroy.latest', + 'vmop.numPoweroff.latest', + 'vmop.numPoweron.latest', + 'vmop.numRebootGuest.latest', + 'vmop.numReconfigure.latest', + 'vmop.numRegister.latest', + 'vmop.numReset.latest', + 'vmop.numSVMotion.latest', + 'vmop.numShutdownGuest.latest', + 'vmop.numStandbyGuest.latest', + 'vmop.numSuspend.latest', + 'vmop.numUnregister.latest', + 'vmop.numVMotion.latest', + 'vmop.numXVMotion.latest', } # All metrics that can be collected from Clusters. -# The table maps a dd-formatted metric_name to a tuple containing: -# (collection_level, per_instance_collection_level, (optional)is_available_per_instance) CLUSTER_METRICS = { # clusterServices are only available for DRS and HA clusters, and are causing errors. Let's deactivate for now # but they were collected before so investigate why - 'clusterServices.cpufairness.latest': (1, 3), - 'clusterServices.effectivecpu.avg': (1, 3), - 'clusterServices.effectivemem.avg': (1, 3), - 'clusterServices.failover.latest': (1, 3), - 'clusterServices.memfairness.latest': (1, 3), - 'cpu.totalmhz.avg': (1, 3), - 'cpu.usage.avg': (1, 3, True), - 'cpu.usagemhz.avg': (1, 3), - 'mem.consumed.avg': (1, 3), - 'mem.overhead.avg': (1, 1), - 'mem.totalmb.avg': (1, 3), - 'mem.usage.avg': (1, 3), - 'mem.vmmemctl.avg': (1, 3), - 'vmop.numChangeDS.latest': (1, 3), - 'vmop.numChangeHost.latest': (1, 3), - 'vmop.numChangeHostDS.latest': (1, 3), - 'vmop.numClone.latest': (1, 3), - 'vmop.numCreate.latest': (1, 3), - 'vmop.numDeploy.latest': (1, 3), - 'vmop.numDestroy.latest': (1, 3), - 'vmop.numPoweroff.latest': (1, 3), - 'vmop.numPoweron.latest': (1, 3), - 'vmop.numRebootGuest.latest': (1, 3), - 'vmop.numReconfigure.latest': (1, 3), - 'vmop.numRegister.latest': (1, 3), - 'vmop.numReset.latest': (1, 3), - 'vmop.numSVMotion.latest': (1, 3), - 'vmop.numShutdownGuest.latest': (1, 3), - 'vmop.numStandbyGuest.latest': (1, 3), - 'vmop.numSuspend.latest': (1, 3), - 'vmop.numUnregister.latest': (1, 3), - 'vmop.numVMotion.latest': (1, 3), - 'vmop.numXVMotion.latest': (1, 3), + 'clusterServices.cpufairness.latest', + 'clusterServices.effectivecpu.avg', + 'clusterServices.effectivemem.avg', + 'clusterServices.failover.latest', + 'clusterServices.memfairness.latest', + 'cpu.totalmhz.avg', + 'cpu.usage.avg', + 'cpu.usagemhz.avg', + 'mem.consumed.avg', + 'mem.overhead.avg', + 'mem.totalmb.avg', + 'mem.usage.avg', + 'mem.vmmemctl.avg', + 'vmop.numChangeDS.latest', + 'vmop.numChangeHost.latest', + 'vmop.numChangeHostDS.latest', + 'vmop.numClone.latest', + 'vmop.numCreate.latest', + 'vmop.numDeploy.latest', + 'vmop.numDestroy.latest', + 'vmop.numPoweroff.latest', + 'vmop.numPoweron.latest', + 'vmop.numRebootGuest.latest', + 'vmop.numReconfigure.latest', + 'vmop.numRegister.latest', + 'vmop.numReset.latest', + 'vmop.numSVMotion.latest', + 'vmop.numShutdownGuest.latest', + 'vmop.numStandbyGuest.latest', + 'vmop.numSuspend.latest', + 'vmop.numUnregister.latest', + 'vmop.numVMotion.latest', + 'vmop.numXVMotion.latest', } ALLOWED_METRICS_FOR_MOR = { diff --git a/vsphere/datadog_checks/vsphere/utils.py b/vsphere/datadog_checks/vsphere/utils.py index a53d2cecf03e9..84e531416d86e 100644 --- a/vsphere/datadog_checks/vsphere/utils.py +++ b/vsphere/datadog_checks/vsphere/utils.py @@ -2,10 +2,32 @@ # All rights reserved # Licensed under Simplified BSD License (see LICENSE) from pyVmomi import vim +from six import iteritems from datadog_checks.base import to_string from datadog_checks.vsphere.constants import MOR_TYPE_AS_STRING, REFERENCE_METRIC, SHORT_ROLLUP +METRIC_TO_INSTANCE_TAG_MAPPING = { + # Structure: + # prefix: tag key used for instance value + 'cpu.': 'cpu_core', + # Examples: 0, 15 + 'datastore.': 'vmfs_uuid', + # Examples: fd3f776b-2ca26041, 5deed40f-cef2b3f6-0bcd-000c2927ce06 + 'disk.': 'device_path', + # Examples: mpx.vmhba0:C0:T1:L0, mpx.vmhba0:C0:T1:L0 + 'net.': 'nic', + # Examples: vmnic1, 4000 + 'storageAdapter.': 'storage_adapter', + # Examples: vmhba1, vmhba64 + 'storagePath.': 'storage_path', + # Examples: ide.vmhba64-ide.0:0-mpx.vmhba64:C0:T0:L0, pscsi.vmhba0-pscsi.0:1-mpx.vmhba0:C0:T1:L0 + 'sys.resource': 'resource_path', + # Examples: host/system/vmotion, host/system + 'virtualDisk.': 'disk', + # Examples: scsi0:0, scsi0:0 +} + def format_metric_name(counter): return "{}.{}.{}".format( @@ -115,22 +137,19 @@ def get_parent_tags_recursively(mor, infrastructure_data): return [] -def should_collect_per_instance_values(metric_name, resource_type): - # TODO: Implement. For now we don't collect per-instance level metrics (aka per-core for cpu, per-vm for disk etc.) - # TODO: Collecting per-instance metrics is really expensive for big environments and has usually little value. - # TODO: Also that adds an extra layer of complexity where users have to set `instance:none` to see the correct - # value. - return False +def should_collect_per_instance_values(config, metric_name, resource_type): + filters = config.collect_per_instance_filters.get(MOR_TYPE_AS_STRING[resource_type], []) + metric_matched = match_any_regex(metric_name, filters) + return metric_matched def get_mapped_instance_tag(metric_name): - """When collecting per-instance metric, the `instance` tag can mean a lot of different things. The meaning of the - tag cannot be guessed by looking at the api results and has to be infered using documentation or experience. + """ + When collecting per-instance metric, the `instance` tag can mean a lot of different things. The meaning of the + tag cannot be guessed by looking at the api results and has to be inferred using documentation or experience. This method acts as a utility to map a metric_name to the meaning of its instance tag. - TODO: More """ - if metric_name.startswith('cpu'): - return 'cpu_core' - elif metric_name.startswith('disk'): - return 'vm' + for prefix, tag_key in iteritems(METRIC_TO_INSTANCE_TAG_MAPPING): + if metric_name.startswith(prefix): + return tag_key return 'instance' diff --git a/vsphere/datadog_checks/vsphere/vsphere.py b/vsphere/datadog_checks/vsphere/vsphere.py index 5e89f7f1439b7..18694e78c58f7 100644 --- a/vsphere/datadog_checks/vsphere/vsphere.py +++ b/vsphere/datadog_checks/vsphere/vsphere.py @@ -174,7 +174,24 @@ def refresh_infrastructure_cache(self): self.infrastructure_cache.set_mor_data(mor, mor_payload) def submit_metrics_callback(self, query_results): - """Callback of the collection of metrics. This is run in the main thread!""" + """ + Callback of the collection of metrics. This is run in the main thread! + + `query_results` currently contain results of one resource type in practice, but this function is generic + and can handle results with mixed resource types. + """ + + # `have_instance_value` is used later to avoid collecting aggregated metrics + # when instance metrics are collected. + have_instance_value = defaultdict(set) + for results_per_mor in query_results: + resource_type = type(results_per_mor.entity) + metadata = self.metrics_metadata_cache.get_metadata(resource_type) + for result in results_per_mor.value: + metric_name = metadata.get(result.id.counterId) + if result.id.instance: + have_instance_value[resource_type].add(metric_name) + for results_per_mor in query_results: mor_props = self.infrastructure_cache.get_mor_props(results_per_mor.entity) if mor_props is None: @@ -210,16 +227,18 @@ def submit_metrics_callback(self, query_results): to_string(metric_name), ) continue - value = valid_values[-1] - if metric_name in PERCENT_METRICS: - # Convert the percentage to a float. - value /= 100.0 tags = [] - if should_collect_per_instance_values(metric_name, resource_type): + if should_collect_per_instance_values(self.config, metric_name, resource_type) and ( + metric_name in have_instance_value[resource_type] + ): + instance_value = result.id.instance + # When collecting per instance values, it's possible that both aggregated metric and per instance + # metrics are received. In that case, the metric with no instance value is skipped. + if not instance_value: + continue instance_tag_key = get_mapped_instance_tag(metric_name) - instance_tag_value = result.id.instance or 'none' - tags.append('{}:{}'.format(instance_tag_key, instance_tag_value)) + tags.append('{}:{}'.format(instance_tag_key, instance_value)) if resource_type in HISTORICAL_RESOURCES: # Tags are attached to the metrics @@ -235,8 +254,12 @@ def submit_metrics_callback(self, query_results): tags.extend(self.config.base_tags) - # vsphere "rates" should be submitted as gauges (rate is - # precomputed). + value = valid_values[-1] + if metric_name in PERCENT_METRICS: + # Convert the percentage to a float. + value /= 100.0 + + # vSphere "rates" should be submitted as gauges (rate is precomputed). self.gauge(to_string(metric_name), value, hostname=hostname, tags=tags) def query_metrics_wrapper(self, query_specs): @@ -249,14 +272,26 @@ def query_metrics_wrapper(self, query_specs): return metrics_values def make_query_specs(self): + """ + Build query specs using MORs and metrics metadata. + + :returns a list of vim.PerformanceManager.QuerySpec: + https://www.vmware.com/support/developer/vc-sdk/visdk41pubs/ApiReference/vim.PerformanceManager.QuerySpec.html + """ for resource_type in self.config.collected_resource_types: mors = self.infrastructure_cache.get_mors(resource_type) counters = self.metrics_metadata_cache.get_metadata(resource_type) metric_ids = [] for counter_key, metric_name in iteritems(counters): - instance = "" - if should_collect_per_instance_values(metric_name, resource_type): + # PerformanceManager.MetricId `instance` kwarg: + # - An asterisk (*) to specify all instances of the metric for the specified counterId + # - Double-quotes ("") to specify aggregated statistics + # More info https://code.vmware.com/apis/704/vsphere/vim.PerformanceManager.MetricId.html + if should_collect_per_instance_values(self.config, metric_name, resource_type): instance = "*" + else: + instance = '' + metric_ids.append(vim.PerformanceManager.MetricId(counterId=counter_key, instance=instance)) for batch in self.make_batch(mors, metric_ids, resource_type): diff --git a/vsphere/tests/fixtures/metrics_realtime.json b/vsphere/tests/fixtures/metrics_realtime.json index 57769389e5569..21d78203d51ae 100644 --- a/vsphere/tests/fixtures/metrics_realtime.json +++ b/vsphere/tests/fixtures/metrics_realtime.json @@ -5,7 +5,7 @@ 1581 ], "counterId": 1, - "instance": "" + "instance": "4" }, { "entity": "VM4-2", @@ -13,7 +13,7 @@ 1581 ], "counterId": 2, - "instance": "" + "instance": "4" }, { "entity": "VM4-2", @@ -21,7 +21,7 @@ 1581 ], "counterId": 3, - "instance": "" + "instance": "4" }, { "entity": "VM4-2", @@ -29,7 +29,7 @@ 1581 ], "counterId": 4, - "instance": "" + "instance": "4" }, { "entity": "VM4-2", @@ -11853,7 +11853,7 @@ 9181 ], "counterId": 389, - "instance": "" + "instance": "16" }, { "entity": "10.0.0.104", @@ -11861,7 +11861,7 @@ 9181 ], "counterId": 390, - "instance": "" + "instance": "16" }, { "entity": "10.0.0.104", @@ -11869,7 +11869,7 @@ 9181 ], "counterId": 391, - "instance": "" + "instance": "16" }, { "entity": "10.0.0.104", @@ -11877,7 +11877,7 @@ 9181 ], "counterId": 392, - "instance": "" + "instance": "16" }, { "entity": "10.0.0.104", @@ -13333,7 +13333,7 @@ 29028344 ], "counterId": 155, - "instance": "" + "instance": "sys-uptime-instance-value" }, { "entity": "VM4-11", @@ -22751,6 +22751,22 @@ "counterId": 127, "instance": "" }, + { + "entity": "VM4-1", + "value": [ + 0 + ], + "counterId": 130, + "instance": "value-aa" + }, + { + "entity": "VM4-1", + "value": [ + 0 + ], + "counterId": 130, + "instance": "value-bb" + }, { "entity": "VM4-1", "value": [ diff --git a/vsphere/tests/mocked_api.py b/vsphere/tests/mocked_api.py index 06476095e76b9..6b9807dd480b9 100644 --- a/vsphere/tests/mocked_api.py +++ b/vsphere/tests/mocked_api.py @@ -82,7 +82,7 @@ def query_metrics(self, query_specs): results = [m for m in self.metrics_data if m.entity == entity_name and m.counterId in counter_ids] values = [] for r in results: - values.append(MagicMock(id=MagicMock(counterId=r.counterId), value=r.value)) + values.append(MagicMock(id=MagicMock(counterId=r.counterId, instance=r.instance), value=r.value)) if results: data.append(MagicMock(entity=spec.entity, value=values)) diff --git a/vsphere/tests/test_check.py b/vsphere/tests/test_check.py index 04dc1c2b80114..eea8ac7b9dfc3 100644 --- a/vsphere/tests/test_check.py +++ b/vsphere/tests/test_check.py @@ -81,3 +81,48 @@ def test_external_host_tags(aggregator, realtime_instance): check.set_external_tags = MagicMock() check.submit_external_host_tags() + + +@pytest.mark.usefixtures('mock_type', 'mock_threadpool', 'mock_api') +def test_collect_metric_instance_values(aggregator, dd_run_check, realtime_instance): + realtime_instance.update( + { + 'collect_per_instance_filters': { + 'vm': [r'cpu\.usage\.raw', r'disk\..*'], + 'host': [r'cpu\.coreUtilization\..*', r'sys\.uptime\..*', r'disk\..*'], + } + } + ) + check = VSphereCheck('vsphere', {}, [realtime_instance]) + dd_run_check(check) + + # Following metrics should match and have instance value tag + aggregator.assert_metric( + 'vsphere.cpu.usage.raw', tags=['cpu_core:4', 'vcenter_server:FAKE'], + ) + for suffix in ['min', 'max', 'raw', 'avg']: + aggregator.assert_metric( + 'vsphere.cpu.coreUtilization.{}'.format(suffix), + hostname='10.0.0.104', + tags=['cpu_core:16', 'vcenter_server:FAKE'], + ) + + # Following metrics should NOT match and do NOT have instance value tag + aggregator.assert_metric( + 'vsphere.cpu.usage.min', tags=['vcenter_server:FAKE'], + ) + aggregator.assert_metric( + 'vsphere.cpu.totalCapacity.avg', tags=['vcenter_server:FAKE'], + ) + + # None of `vsphere.disk.usage.avg` metrics have instance values for specific metric+resource_type + # Hence the aggregated metric IS collected + aggregator.assert_metric('vsphere.disk.usage.avg', tags=['vcenter_server:FAKE'], hostname='VM4-1', count=1) + + # Some of `vsphere.disk.read.avg` metrics have instance values for specific metric+resource_type + # Hence the aggregated metric IS NOT collected + aggregator.assert_metric('vsphere.disk.read.avg', tags=['vcenter_server:FAKE'], hostname='VM4-1', count=0) + for instance_tag in ['device_path:value-aa', 'device_path:value-bb']: + aggregator.assert_metric( + 'vsphere.disk.read.avg', tags=['vcenter_server:FAKE'] + [instance_tag], hostname='VM4-1', count=1 + ) diff --git a/vsphere/tests/test_utils.py b/vsphere/tests/test_utils.py new file mode 100644 index 0000000000000..5d532c2edeef4 --- /dev/null +++ b/vsphere/tests/test_utils.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# (C) Datadog, Inc. 2019-present +# All rights reserved +# Licensed under Simplified BSD License (see LICENSE) + +import pytest +from pytest import param +from pyVmomi import vim + +from datadog_checks.vsphere.config import VSphereConfig +from datadog_checks.vsphere.utils import get_mapped_instance_tag, should_collect_per_instance_values + + +@pytest.mark.parametrize( + 'metric_name, expected_tag_key', + [ + ('cpu.coreUtilization.avg', 'cpu_core'), + ('datastore.datastoreIops.avg', 'vmfs_uuid'), + ('disk.busResets.sum', 'device_path'), + ('net.broadcastRx.sum', 'nic'), + ('storageAdapter.commandsAveraged.avg', 'storage_adapter'), + ('storagePath.commandsAveraged.avg', 'storage_path'), + ('sys.resourceCpuAct1.latest', 'resource_path'), + ('virtualDisk.largeSeeks.latest', 'disk'), + ('foo.bar', 'instance'), + ], +) +def test_get_mapped_instance_tag(metric_name, expected_tag_key): + assert expected_tag_key == get_mapped_instance_tag(metric_name) + + +@pytest.mark.parametrize( + 'metric_name, resource_type, expect_match', + [ + param('cpu.idle.sum', vim.VirtualMachine, True, id='found_1'), + param('cpu.overlap.sum', vim.VirtualMachine, True, id='found_2'), + param('cpu.usage.avg', vim.VirtualMachine, False, id='does_not_match'), + param('cpu.overlap.sum', vim.ClusterComputeResource, False, id='wrong_resource_type'), + ], +) +def test_should_collect_per_instance_values(metric_name, resource_type, expect_match): + config = VSphereConfig( + { + 'host': 'foo', + 'username': 'bar', + 'password': 'baz', + 'collect_per_instance_filters': {'vm': [r'cpu\..*\.sum']}, + }, + None, + ) + + assert expect_match == should_collect_per_instance_values(config, metric_name, resource_type)