Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vSphere new implementation #5251

Merged
merged 23 commits into from
Feb 3, 2020
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ ddtrace==0.13.0
dnspython==1.16.0
flup==1.0.3.dev-20110405; python_version < '3.0'
flup-py3==1.0.3; python_version > '3.0'
futures==3.3.0; python_version < '3.0'
gearman==2.0.2; sys_platform != 'win32' and python_version < '3.0'
httplib2==0.10.3
in-toto==0.4.1
Expand Down
3 changes: 3 additions & 0 deletions vsphere/datadog_checks/vsphere/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# (C) Datadog, Inc. 2018-present
# All rights reserved
# Licensed under Simplified BSD License (see LICENSE)
from .__about__ import __version__
from .vsphere import VSphereCheck

Expand Down
176 changes: 176 additions & 0 deletions vsphere/datadog_checks/vsphere/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# (C) Datadog, Inc. 2019-present
# All rights reserved
# Licensed under Simplified BSD License (see LICENSE)
import functools
import ssl

from pyVim import connect
from pyVmomi import vim, vmodl

from datadog_checks.vsphere.constants import ALL_RESOURCES, MAX_QUERY_METRICS_OPTION, UNLIMITED_HIST_METRICS_PER_QUERY

# Python 3 only
PROTOCOL_TLS_CLIENT = getattr(ssl, 'PROTOCOL_TLS_CLIENT', ssl.PROTOCOL_TLS)


def smart_retry(f):
"""A function decorated with this `@smart_retry` will trigger a new authentication if it fails. The function
will then be retried.
This is useful when the integration keeps a semi-healthy connection to the vSphere API"""

@functools.wraps(f)
def wrapper(api_instance, *args, **kwargs):
try:
return f(api_instance, *args, **kwargs)
except Exception as e:
api_instance.log.debug(
"An exception occurred when executing %s: %s. Refreshing the connection to vCenter and retrying",
f.__name__,
e,
)
api_instance.smart_connect()
return f(api_instance, *args, **kwargs)

return wrapper


class APIConnectionError(Exception):
pass


class VSphereAPI(object):
"""Abstraction class over the vSphere SOAP api using the pyvmomi library"""

def __init__(self, config, log):
self.config = config
self.log = log

self._conn = None
self.smart_connect()

def smart_connect(self):
"""Creates the connection object to the vSphere API using parameters supplied from the configuration.
"""
context = None
if not self.config.ssl_verify:
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
context.verify_mode = ssl.CERT_NONE
elif self.config.ssl_capath:
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context.verify_mode = ssl.CERT_REQUIRED
context.load_verify_locations(capath=self.config.ssl_capath)

try:
# Object returned by SmartConnect is a ServerInstance
# https://www.vmware.com/support/developer/vc-sdk/visdk2xpubs/ReferenceGuide/vim.ServiceInstance.html
conn = connect.SmartConnect(
host=self.config.hostname, user=self.config.username, pwd=self.config.password, sslContext=context
)
# Next line tries a simple API call to check the health of the connection.
conn.CurrentTime()
FlorianVeaux marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
AlexandreYang marked this conversation as resolved.
Show resolved Hide resolved
err_msg = "Connection to {} failed: {}".format(self.config.hostname, e)
raise APIConnectionError(err_msg)
FlorianVeaux marked this conversation as resolved.
Show resolved Hide resolved

self._conn = conn

@smart_retry
def check_health(self):
self._conn.CurrentTime()

@smart_retry
def get_perf_counter_by_level(self, collection_level):
"""Requests and returns the list of counter available for a given collection_level."""
return self._conn.content.perfManager.QueryPerfCounterByLevel(collection_level)

@smart_retry
def get_infrastructure(self):
"""Traverse the whole vSphere infrastructure and outputs a dict mapping the mors to their properties.

:return: {
'vim.VirtualMachine-VM0': {
'name': 'VM-0',
...
}
...
}
"""
content = self._conn.content # vim.ServiceInstanceContent reference from the connection

property_specs = []
# Specify which attributes we want to retrieve per object
for resource in ALL_RESOURCES:
property_spec = vmodl.query.PropertyCollector.PropertySpec()
property_spec.type = resource
property_spec.pathSet = ["name", "parent", "customValue"]
if resource == vim.VirtualMachine:
property_spec.pathSet.append("runtime.powerState")
property_spec.pathSet.append("runtime.host")
property_spec.pathSet.append("guest.hostName")
property_specs.append(property_spec)

# Specify the attribute of the root object to traverse to obtain all the attributes
traversal_spec = vmodl.query.PropertyCollector.TraversalSpec()
traversal_spec.path = "view"
traversal_spec.skip = False
traversal_spec.type = vim.view.ContainerView

retr_opts = vmodl.query.PropertyCollector.RetrieveOptions()
# To limit the number of objects retrieved per call.
# If batch_collector_size is 0, collect maximum number of objects.
retr_opts.maxObjects = self.config.batch_collector_size

# Specify the root object from where we collect the rest of the objects
obj_spec = vmodl.query.PropertyCollector.ObjectSpec()
obj_spec.skip = True
obj_spec.selectSet = [traversal_spec]

# Create our filter spec from the above specs
filter_spec = vmodl.query.PropertyCollector.FilterSpec()
filter_spec.propSet = property_specs

view_ref = content.viewManager.CreateContainerView(content.rootFolder, ALL_RESOURCES, True)
try:
obj_spec.obj = view_ref
filter_spec.objectSet = [obj_spec]

# Collect the objects and their properties
res = content.propertyCollector.RetrievePropertiesEx([filter_spec], retr_opts)
mors = res.objects
# Results can be paginated
while res.token is not None:
res = content.propertyCollector.ContinueRetrievePropertiesEx(res.token)
mors.extend(res.objects)
finally:
view_ref.Destroy()

infrastructure_data = {mor.obj: {prop.name: prop.val for prop in mor.propSet} for mor in mors if mor.propSet}

root_folder = self._conn.content.rootFolder
infrastructure_data[root_folder] = {"name": root_folder.name, "parent": None}
return infrastructure_data

@smart_retry
def query_metrics(self, query_specs):
perf_manager = self._conn.content.perfManager
values = perf_manager.QueryPerf(query_specs)
return values

@smart_retry
def get_new_events(self, start_time):
event_manager = self._conn.content.eventManager
query_filter = vim.event.EventFilterSpec()
time_filter = vim.event.EventFilterSpec.ByTime(beginTime=start_time)
query_filter.time = time_filter
return event_manager.QueryEvents(query_filter)

@smart_retry
def get_latest_event_timestamp(self):
event_manager = self._conn.content.eventManager
return event_manager.latestEvent.createdTime

@smart_retry
def get_max_query_metrics(self):
vcenter_settings = self._conn.content.setting.QueryOptions(MAX_QUERY_METRICS_OPTION)
max_historical_metrics = int(vcenter_settings[0].value)
return max_historical_metrics if max_historical_metrics > 0 else UNLIMITED_HIST_METRICS_PER_QUERY
90 changes: 90 additions & 0 deletions vsphere/datadog_checks/vsphere/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# (C) Datadog, Inc. 2019-present
# All rights reserved
# Licensed under Simplified BSD License (see LICENSE)
import time
from contextlib import contextmanager


class VSphereCache(object):
"""
Wraps configuration and status for the Morlist and Metadata caches.
VSphereCache is *not* threadsafe.
"""

def __init__(self, interval_sec):
self._last_ts = 0
self._interval = interval_sec
self._content = {}

@contextmanager
def update(self):
"""A context manager to allow modification of the cache. It will restore the previous value
on any error.
Usage:
```
with cache.update():
cache.set_XXX(SOME_DATA)
```
"""
old_content = self._content
self._content = {} # 1. clear the content
try:
yield # 2. Actually update the cache
self._last_ts = time.time() # 3. Cache was updated successfully
except Exception:
# Restore old data
self._content = old_content
raise

def is_expired(self):
"""The cache has a global time to live, all elements expire at the same time.
:return True if the cache is expired."""
elapsed = time.time() - self._last_ts
return elapsed > self._interval


class MetricsMetadataCache(VSphereCache):
"""A VSphere cache dedicated to store the metrics metadata from a user environment.
Data is stored like this:

_content = {
vim.HostSystem: {
<COUNTER_KEY>: <DD_METRIC_NAME>,
...
},
vim.VirtualMachine: {...},
...
}
"""

def get_metadata(self, resource_type):
return self._content.get(resource_type)

def set_metadata(self, resource_type, metadata):
self._content[resource_type] = metadata


class InfrastructureCache(VSphereCache):
"""A VSphere cache dedicated to store the infrastructure data from a user environment.
Data is stored like this:

_content = {
vim.VirtualMachine: {
<MOR_REFERENCE>: <MOR_PROPS_DICT>
},
...
}
"""

def get_mor_props(self, mor, default=None):
mor_type = type(mor)
return self._content.get(mor_type, {}).get(mor, default)

def get_mors(self, resource_type):
return self._content.get(resource_type, {}).keys()

def set_mor_data(self, mor, mor_data):
mor_type = type(mor)
if mor_type not in self._content:
self._content[mor_type] = {}
self._content[mor_type][mor] = mor_data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use something else than a MOR object as cache key for self._content[mor_type][mor] ?

Maybe use a "MOR Id".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into pyvmomi code https://github.com/vmware/pyvmomi/blob/master/pyVmomi/VmomiSupport.py#L596-L610.

What this means is that is it safe to use the mor directly as the cache key, the string representation of it should be unique so there should be no hash collision, and even in case of the same string for two different mors, the __eq__ method should prevent collisions.

So another possibility would be to use str(mor) as the cache key, but note that we would lose a bit in case two mors have the same string representation. Note also that the str method can be quite complex in regard of the very simple __hash__ and __eq__ methods.

Loading