watcher.decision_engine.strategy.strategies.noisy_neighbor

Source code for watcher.decision_engine.strategy.strategies.noisy_neighbor

# -*- encoding: utf-8 -*-
# Copyright (c) 2017 Intel Corp
#
# Authors: Prudhvi Rao Shedimbi <prudhvi.rao.shedimbi@intel.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from oslo_config import cfg
from oslo_log import log

from watcher._i18n import _
from watcher.common import exception as wexc
from watcher.decision_engine.strategy.strategies import base

LOG = log.getLogger(__name__)
CONF = cfg.CONF


[docs]class NoisyNeighbor(base.NoisyNeighborBaseStrategy): """Noisy Neighbor strategy using live migration *Description* This strategy can identify and migrate a Noisy Neighbor - a low priority VM that negatively affects performance of a high priority VM in terms of IPC by over utilizing Last Level Cache. *Requirements* To enable LLC metric, latest Intel server with CMT support is required. *Limitations* This is a proof of concept that is not meant to be used in production *Spec URL* http://specs.openstack.org/openstack/watcher-specs/specs/pike/implemented/noisy_neighbor_strategy.html """ MIGRATION = "migrate" DATASOURCE_METRICS = ['instance_l3_cache_usage'] # The meter to report L3 cache in ceilometer METER_NAME_L3 = "cpu_l3_cache" DEFAULT_WATCHER_PRIORITY = 5 def __init__(self, config, osc=None): super(NoisyNeighbor, self).__init__(config, osc) self.meter_name = self.METER_NAME_L3
[docs] @classmethod def get_name(cls): return "noisy_neighbor"
[docs] @classmethod def get_display_name(cls): return _("Noisy Neighbor")
[docs] @classmethod def get_translatable_display_name(cls): return "Noisy Neighbor"
[docs] @classmethod def get_schema(cls): # Mandatory default setting for each element return { "properties": { "cache_threshold": { "description": "Performance drop in L3_cache threshold " "for migration", "type": "number", "default": 35.0 }, "period": { "description": "Aggregate time period of " "ceilometer and gnocchi", "type": "number", "default": 100.0 }, }, }
[docs] @classmethod def get_config_opts(cls): return [ cfg.ListOpt( "datasources", help="Datasources to use in order to query the needed metrics." " If one of strategy metric isn't available in the first" " datasource, the next datasource will be chosen.", item_type=cfg.types.String(choices=['gnocchi', 'ceilometer', 'monasca']), default=['gnocchi', 'ceilometer', 'monasca']) ]
[docs] def get_current_and_previous_cache(self, instance): try: curr_cache = self.datasource_backend.get_instance_l3_cache_usage( instance.uuid, self.period, 'mean', granularity=300) previous_cache = 2 * ( self.datasource_backend.get_instance_l3_cache_usage( instance.uuid, 2 * self.period, 'mean', granularity=300)) - curr_cache except Exception as exc: LOG.exception(exc) return None, None return curr_cache, previous_cache
[docs] def find_priority_instance(self, instance): current_cache, previous_cache = \ self.get_current_and_previous_cache(instance) if None in (current_cache, previous_cache): LOG.warning("Datasource unable to pick L3 Cache " "values. Skipping the instance") return None if (current_cache < (1 - (self.cache_threshold / 100.0)) * previous_cache): return instance else: return None
[docs] def find_noisy_instance(self, instance): noisy_current_cache, noisy_previous_cache = \ self.get_current_and_previous_cache(instance) if None in (noisy_current_cache, noisy_previous_cache): LOG.warning("Datasource unable to pick " "L3 Cache. Skipping the instance") return None if (noisy_current_cache > (1 + (self.cache_threshold / 100.0)) * noisy_previous_cache): return instance else: return None
[docs] def group_hosts(self): nodes = self.compute_model.get_all_compute_nodes() size_cluster = len(nodes) if size_cluster == 0: raise wexc.ClusterEmpty() hosts_need_release = {} hosts_target = [] for node in nodes.values(): instances_of_node = self.compute_model.get_node_instances(node) node_instance_count = len(instances_of_node) # Flag that tells us whether to skip the node or not. If True, # the node is skipped. Will be true if we find a noisy instance or # when potential priority instance will be same as potential noisy # instance loop_break_flag = False if node_instance_count > 1: instance_priority_list = [] for instance in instances_of_node: instance_priority_list.append(instance) # If there is no metadata regarding watcher-priority, it takes # DEFAULT_WATCHER_PRIORITY as priority. instance_priority_list.sort(key=lambda a: ( a.get('metadata').get('watcher-priority'), self.DEFAULT_WATCHER_PRIORITY)) instance_priority_list_reverse = list(instance_priority_list) instance_priority_list_reverse.reverse() for potential_priority_instance in instance_priority_list: priority_instance = self.find_priority_instance( potential_priority_instance) if (priority_instance is not None): for potential_noisy_instance in ( instance_priority_list_reverse): if(potential_noisy_instance == potential_priority_instance): loop_break_flag = True break noisy_instance = self.find_noisy_instance( potential_noisy_instance) if noisy_instance is not None: hosts_need_release[node.uuid] = { 'priority_vm': potential_priority_instance, 'noisy_vm': potential_noisy_instance} LOG.debug("Priority VM found: %s", potential_priority_instance.uuid) LOG.debug("Noisy VM found: %s", potential_noisy_instance.uuid) loop_break_flag = True break # No need to check other instances in the node if loop_break_flag is True: break if node.uuid not in hosts_need_release: hosts_target.append(node) return hosts_need_release, hosts_target
[docs] def calc_used_resource(self, node): """Calculate the used vcpus, memory and disk based on VM flavors""" instances = self.compute_model.get_node_instances(node) vcpus_used = 0 memory_mb_used = 0 disk_gb_used = 0 for instance in instances: vcpus_used += instance.vcpus memory_mb_used += instance.memory disk_gb_used += instance.disk return vcpus_used, memory_mb_used, disk_gb_used
[docs] def filter_dest_servers(self, hosts, instance_to_migrate): required_cores = instance_to_migrate.vcpus required_disk = instance_to_migrate.disk required_memory = instance_to_migrate.memory dest_servers = [] for host in hosts: cores_used, mem_used, disk_used = self.calc_used_resource(host) cores_available = host.vcpus - cores_used disk_available = host.disk - disk_used mem_available = host.memory - mem_used if (cores_available >= required_cores and disk_available >= required_disk and mem_available >= required_memory): dest_servers.append(host) return dest_servers
[docs] def pre_execute(self): LOG.debug("Initializing Noisy Neighbor strategy") if not self.compute_model: raise wexc.ClusterStateNotDefined() if self.compute_model.stale: raise wexc.ClusterStateStale() LOG.debug(self.compute_model.to_string())
[docs] def do_execute(self): self.cache_threshold = self.input_parameters.cache_threshold self.period = self.input_parameters.period hosts_need_release, hosts_target = self.group_hosts() if len(hosts_need_release) == 0: LOG.debug("No hosts require optimization") return if len(hosts_target) == 0: LOG.debug("No hosts available to migrate") return mig_source_node_name = max(hosts_need_release.keys(), key=lambda a: hosts_need_release[a]['priority_vm']) instance_to_migrate = hosts_need_release[mig_source_node_name][ 'noisy_vm'] if instance_to_migrate is None: return dest_servers = self.filter_dest_servers(hosts_target, instance_to_migrate) if len(dest_servers) == 0: LOG.info("No proper target host could be found") return # Destination node will be the first available node in the list. mig_destination_node = dest_servers[0] mig_source_node = self.compute_model.get_node_by_uuid( mig_source_node_name) if self.compute_model.migrate_instance(instance_to_migrate, mig_source_node, mig_destination_node): parameters = {'migration_type': 'live', 'source_node': mig_source_node.uuid, 'destination_node': mig_destination_node.uuid} self.solution.add_action(action_type=self.MIGRATION, resource_id=instance_to_migrate.uuid, input_parameters=parameters)
[docs] def post_execute(self): self.solution.model = self.compute_model LOG.debug(self.compute_model.to_string())
Creative Commons Attribution 3.0 License

Except where otherwise noted, this document is licensed under Creative Commons Attribution 3.0 License. See all OpenStack Legal Documents.