ceilometer.compute.discovery

Source code for ceilometer.compute.discovery

#
# Copyright 2014 Red Hat, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import hashlib
from lxml import etree
import operator
import threading

import cachetools
from novaclient import exceptions
from oslo_config import cfg
from oslo_log import log
from oslo_utils import timeutils


try:
    import libvirt
except ImportError:
    libvirt = None

from ceilometer.compute.virt.libvirt import utils as libvirt_utils
from ceilometer import nova_client
from ceilometer.polling import plugin_base

OPTS = [
    cfg.StrOpt('instance_discovery_method',
               default='libvirt_metadata',
               choices=['naive', 'workload_partitioning', 'libvirt_metadata'],
               help="Ceilometer offers many methods to discover the instance "
                    "running on a compute node: \n"
                    "* naive: poll nova to get all instances\n"
                    "* workload_partitioning: poll nova to get instances of "
                    "the compute\n"
                    "* libvirt_metadata: get instances from libvirt metadata "
                    "  but without instance metadata (recommended for Gnocchi "
                    "  backend"),
    cfg.IntOpt('resource_update_interval',
               default=0,
               min=0,
               help="New instances will be discovered periodically based"
                    " on this option (in seconds). By default, "
                    "the agent discovers instances according to pipeline "
                    "polling interval. If option is greater than 0, "
                    "the instance list to poll will be updated based "
                    "on this option's interval. Measurements relating "
                    "to the instances will match intervals "
                    "defined in pipeline. This option is only used "
                    "for agent polling to Nova API, so it will work only "
                    "when 'instance_discovery_method' is set to 'naive'."),
    cfg.IntOpt('resource_cache_expiry',
               default=3600,
               min=0,
               help="The expiry to totally refresh the instances resource "
                    "cache, since the instance may be migrated to another "
                    "host, we need to clean the legacy instances info in "
                    "local cache by totally refreshing the local cache. "
                    "The minimum should be the value of the config option "
                    "of resource_update_interval. This option is only used "
                    "for agent polling to Nova API, so it will work only "
                    "when 'instance_discovery_method' is set to 'naive'.")
]

LOG = log.getLogger(__name__)


[docs]class NovaLikeServer(object): def __init__(self, **kwargs): self.id = kwargs.pop('id') for k, v in kwargs.items(): setattr(self, k, v) def __repr__(self): return '<NovaLikeServer: %s>' % getattr(self, 'name', 'unknown-name') def __eq__(self, other): return self.id == other.id
[docs]class InstanceDiscovery(plugin_base.DiscoveryBase): method = None def __init__(self, conf): super(InstanceDiscovery, self).__init__(conf) if not self.method: self.method = conf.compute.instance_discovery_method self.nova_cli = nova_client.Client(conf) self.expiration_time = conf.compute.resource_update_interval self.cache_expiry = conf.compute.resource_cache_expiry if self.method == "libvirt_metadata": # 4096 instances on a compute should be enough :) self._flavor_cache = cachetools.LRUCache(4096) else: self.lock = threading.Lock() self.instances = {} self.last_run = None self.last_cache_expire = None @property def connection(self): return libvirt_utils.refresh_libvirt_connection(self.conf, self)
[docs] def discover(self, manager, param=None): """Discover resources to monitor.""" if self.method != "libvirt_metadata": return self.discover_nova_polling(manager, param=None) else: return self.discover_libvirt_polling(manager, param=None)
@staticmethod def _safe_find_int(xml, path): elem = xml.find("./%s" % path) if elem is not None: return int(elem.text) return 0
[docs] @cachetools.cachedmethod(operator.attrgetter('_flavor_cache')) def get_flavor_id(self, name): try: return self.nova_cli.nova_client.flavors.find(name=name).id except exceptions.NotFound: return None
[docs] @libvirt_utils.retry_on_disconnect def discover_libvirt_polling(self, manager, param=None): instances = [] for domain in self.connection.listAllDomains(): try: xml_string = domain.metadata( libvirt.VIR_DOMAIN_METADATA_ELEMENT, "http://openstack.org/xmlns/libvirt/nova/1.0") except libvirt.libvirtError as e: if libvirt_utils.is_disconnection_exception(e): # Re-raise the exception so it's handled and retries raise LOG.error( "Fail to get domain uuid %s metadata, libvirtError: %s", domain.UUIDString(), e.message) continue full_xml = etree.fromstring(domain.XMLDesc()) os_type_xml = full_xml.find("./os/type") metadata_xml = etree.fromstring(xml_string) # TODO(sileht): We don't have the flavor ID here So the Gnocchi # resource update will fail for compute sample (or put None ?) # We currently poll nova to get the flavor ID, but storing the # flavor_id doesn't have any sense because the flavor description # can change over the time, we should store the detail of the # flavor. this is why nova doesn't put the id in the libvirt # metadata try: flavor_xml = metadata_xml.find( "./flavor") user_id = metadata_xml.find( "./owner/user").attrib["uuid"] project_id = metadata_xml.find( "./owner/project").attrib["uuid"] instance_name = metadata_xml.find( "./name").text instance_arch = os_type_xml.attrib["arch"] flavor = { "id": self.get_flavor_id(flavor_xml.attrib["name"]), "name": flavor_xml.attrib["name"], "vcpus": self._safe_find_int(flavor_xml, "vcpus"), "ram": self._safe_find_int(flavor_xml, "memory"), "disk": self._safe_find_int(flavor_xml, "disk"), "ephemeral": self._safe_find_int(flavor_xml, "ephemeral"), "swap": self._safe_find_int(flavor_xml, "swap"), } # The image description is partial, but Gnocchi only care about # the id, so we are fine image_xml = metadata_xml.find("./root[@type='image']") image = ({'id': image_xml.attrib['uuid']} if image_xml is not None else None) except AttributeError as e: LOG.error( "Fail to get domain uuid %s metadata: " "metadata was missing expected attributes", domain.UUIDString()) continue dom_state = domain.state()[0] vm_state = libvirt_utils.LIBVIRT_POWER_STATE.get(dom_state) status = libvirt_utils.LIBVIRT_STATUS.get(dom_state) # From: # https://github.com/openstack/nova/blob/852f40fd0c6e9d8878212ff3120556668023f1c4/nova/api/openstack/compute/views/servers.py#L214-L220 host_id = hashlib.sha224( (project_id + self.conf.host).encode('utf-8')).hexdigest() instance_data = { "id": domain.UUIDString(), "name": instance_name, "flavor": flavor, "image": image, "os_type": os_type_xml.text, "architecture": instance_arch, "OS-EXT-SRV-ATTR:instance_name": domain.name(), "OS-EXT-SRV-ATTR:host": self.conf.host, "OS-EXT-STS:vm_state": vm_state, "tenant_id": project_id, "user_id": user_id, "hostId": host_id, "status": status, # NOTE(sileht): Other fields that Ceilometer tracks # where we can't get the value here, but their are # retrieved by notification "metadata": {}, # "OS-EXT-STS:task_state" # 'reservation_id', # 'OS-EXT-AZ:availability_zone', # 'kernel_id', # 'ramdisk_id', # some image detail } LOG.debug("instance data: %s", instance_data) instances.append(NovaLikeServer(**instance_data)) return instances
[docs] def discover_nova_polling(self, manager, param=None): secs_from_last_update = 0 utc_now = timeutils.utcnow(True) secs_from_last_expire = 0 if self.last_run: secs_from_last_update = timeutils.delta_seconds( self.last_run, utc_now) if self.last_cache_expire: secs_from_last_expire = timeutils.delta_seconds( self.last_cache_expire, utc_now) instances = [] # NOTE(ityaptin) we update make a nova request only if # it's a first discovery or resources expired with self.lock: if (not self.last_run or secs_from_last_update >= self.expiration_time): try: if (secs_from_last_expire < self.cache_expiry and self.last_run): since = self.last_run.isoformat() else: since = None self.instances.clear() self.last_cache_expire = utc_now instances = self.nova_cli.instance_get_all_by_host( self.conf.host, since) self.last_run = utc_now except Exception: # NOTE(zqfan): instance_get_all_by_host is wrapped and will # log exception when there is any error. It is no need to # raise it again and print one more time. return [] for instance in instances: if getattr(instance, 'OS-EXT-STS:vm_state', None) in [ 'deleted', 'error']: self.instances.pop(instance.id, None) else: self.instances[instance.id] = instance return self.instances.values()
@property def group_id(self): return self.conf.host
Creative Commons Attribution 3.0 License

Except where otherwise noted, this document is licensed under Creative Commons Attribution 3.0 License. See all OpenStack Legal Documents.