Source code for ironic.drivers.modules.agent_power

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""
The agent power interface.
"""

import time

from oslo_config import cfg
from oslo_log import log
import tenacity

from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import states
from ironic.common import utils
from ironic.conductor import utils as cond_utils
from ironic.drivers import base
from ironic.drivers.modules import agent_client


CONF = cfg.CONF

LOG = log.getLogger(__name__)

_POWER_WAIT = 30


[docs] class AgentPower(base.PowerInterface): """Power interface using the running agent for power actions.""" def __init__(self): super(AgentPower, self).__init__() self._client = agent_client.AgentClient()
[docs] def get_properties(self): """Return the properties of the interface. :returns: dictionary of <property name>:<property description> entries. """ return {}
[docs] def validate(self, task): """Validate the driver-specific Node deployment info. :param task: A TaskManager instance containing the node to act on. :raises: InvalidParameterValue on malformed parameter(s) """ # NOTE(dtantsur): the fast_track option is mutable, so we have to check # it again on validation. if not utils.fast_track_enabled(task.node): raise exception.InvalidParameterValue( _('Fast track mode must be enabled to use the agent ' 'power interface')) # TODO(dtantsur): support ACTIVE nodes if not cond_utils.agent_is_alive(task.node): raise exception.InvalidParameterValue( _('Agent seems offline for node %s, the agent power interface ' 'cannot be used') % task.node.uuid)
[docs] def supports_power_sync(self, task): """Check if power sync is supported for the given node. Not supported for the agent power since it is not possible to power on/off nodes. :param task: A TaskManager instance containing the node to act on with a **shared** lock. :returns: boolean, whether power sync is supported. """ return False
[docs] def get_supported_power_states(self, task): """Get a list of the supported power states. Only contains REBOOT. :param task: A TaskManager instance containing the node to act on. :returns: A list with the supported power states defined in :mod:`ironic.common.states`. """ return [states.REBOOT, states.SOFT_REBOOT]
[docs] def get_power_state(self, task): """Return the power state of the task's node. Essentially, the only known state is POWER ON, everything else is an error (or more precisely ``None``). :param task: A TaskManager instance containing the node to act on. :returns: A power state. One of :mod:`ironic.common.states`. """ # TODO(dtantsur): support ACTIVE nodes if cond_utils.agent_is_alive(task.node): return states.POWER_ON else: LOG.error('Node %s is not fast-track-able, cannot determine ' 'its power state via the "agent" power interface', task.node.uuid) return None
[docs] def set_power_state(self, task, power_state, timeout=None): """Set the power state of the task's node. :param task: A TaskManager instance containing the node to act on. :param power_state: Power state from :mod:`ironic.common.states`. Only REBOOT and SOFT_REBOOT are supported and are synonymous. :param timeout: timeout (in seconds) positive integer (> 0) for any power state. ``None`` indicates to use default timeout. :raises: PowerStateFailure on non-supported power state. """ if power_state in (states.REBOOT, states.SOFT_REBOOT): return self.reboot(task) else: LOG.error('Power state %(state)s is not implemented for node ' '%(node)s using the "agent" power interface', {'node': task.node.uuid, 'state': power_state}) raise exception.PowerStateFailure(pstate=power_state)
[docs] def reboot(self, task, timeout=None): """Perform a reboot of the task's node. Only soft reboot is implemented. :param task: A TaskManager instance containing the node to act on. :param timeout: timeout (in seconds) positive integer (> 0) for any power state. ``None`` indicates to use default timeout. """ node = task.node self._client.reboot(node) # NOTE(dtantsur): wipe the agent token, otherwise the rebooted agent # won't be able to heartbeat. This is mostly a precaution since the # calling code in conductor is expected to handle it. if not node.driver_internal_info.get( 'agent_secret_token_pregenerated'): node.del_driver_internal_info('agent_secret_token') # NOTE(dtantsur): the URL may change on reboot, wipe it as well (but # only after we call reboot). node.del_driver_internal_info('agent_url') node.save() LOG.debug('Requested reboot of node %(node)s via the agent, waiting ' '%(wait)d seconds for the node to power down', {'node': task.node.uuid, 'wait': _POWER_WAIT}) time.sleep(_POWER_WAIT) if (node.provision_state in (states.DEPLOYING, states.CLEANING) and (node.driver_internal_info.get('deployment_reboot') or node.driver_internal_info.get('cleaning_reboot'))): # NOTE(dtantsur): we need to downgrade the lock otherwise # heartbeats won't be processed. It should not have side effects # for nodes in DEPLOYING/CLEANING. task.downgrade_lock() try: self._wait_for_reboot(task, timeout) finally: # The caller probably expects a lock, so re-acquire it task.upgrade_lock()
def _wait_for_reboot(self, task, timeout): wait = CONF.agent.post_deploy_get_power_state_retry_interval if not timeout: timeout = CONF.agent.post_deploy_get_power_state_retries * wait @tenacity.retry( stop=tenacity.stop_after_delay(timeout), retry=(tenacity.retry_if_result(lambda result: not result) | tenacity.retry_if_exception_type( exception.AgentConnectionFailed)), wait=tenacity.wait_fixed(wait), reraise=True) def _wait_until_rebooted(task): try: status = self._client.get_commands_status( task.node, retry_connection=False, expect_errors=True) except exception.AgentConnectionFailed: LOG.debug('Still waiting for the agent to come back on the ' 'node %s', task.node.uuid) raise if any(cmd['command_name'] == agent_client.REBOOT_COMMAND for cmd in status): LOG.debug('Still waiting for the agent to power off on the ' 'node %s', task.node.uuid) return False return True try: _wait_until_rebooted(task) except exception.AgentConnectionFailed as exc: msg = _('Agent failed to come back on %(node)s with the "agent" ' 'power interface: %(exc)s') % { 'node': task.node.uuid, 'exc': exc} LOG.error(msg) raise exception.PowerStateFailure(msg) except Exception as exc: LOG.error('Could not reboot node %(node)s with the "agent" power ' 'interface: %(exc)s', {'node': task.node.uuid, 'exc': exc}) raise exception.PowerStateFailure( _('Unexpected error when rebooting through the agent: %s') % exc)