Source code for ironic.drivers.modules.agent_client

# Copyright 2014 Rackspace, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from http import client as http_client
import os
import ssl
import time

from oslo_log import log
from oslo_serialization import jsonutils
from oslo_utils import excutils
from oslo_utils import strutils
import requests
import tenacity

from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import metrics_utils
from ironic.common import utils
from ironic.conf import CONF

LOG = log.getLogger(__name__)

METRICS = metrics_utils.get_metrics_logger(__name__)

DEFAULT_IPA_PORTAL_PORT = 3260

REBOOT_COMMAND = 'run_image'



[docs]
def get_client(task):
    """Get client for this node."""
    try:
        return task.cached_agent_client
    except AttributeError:
        task.cached_agent_client = AgentClient()
        return task.cached_agent_client




[docs]
def get_command_error(command):
    """Extract an error string from the command result.

    :param command: Command information from the agent.
    :return: Error string.
    """
    error = command.get('command_error')
    if error is None:
        LOG.error('Agent returned invalid response: missing command_error in '
                  '%s', command)
        return _('Invalid agent response')

    if isinstance(error, dict):
        return error.get('details') or error.get('message') or str(error)
    else:
        return error



def _sanitize_for_logging(var):
    if not var:
        return var
    elif isinstance(var, str):
        return strutils.mask_password(var)
    else:
        return utils.remove_large_keys(strutils.mask_dict_password(var))



[docs]
class AgentClient(object):
    """Client for interacting with nodes via a REST API."""
    @METRICS.timer('AgentClient.__init__')
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({'Content-Type': 'application/json'})

    def _get_command_url(self, node):
        """Get URL endpoint for agent command request"""
        agent_url = node.driver_internal_info.get('agent_url')
        if not agent_url:
            raise exception.AgentConnectionFailed(_('Agent driver requires '
                                                    'agent_url in '
                                                    'driver_internal_info'))
        return ('%(agent_url)s/%(api_version)s/commands/' %
                {'agent_url': agent_url,
                 'api_version': CONF.agent.agent_api_version})

    def _get_command_body(self, method, params):
        """Generate command body from method and params"""
        return jsonutils.dumps({
            'name': method,
            'params': params,
        })

    def _get_verify(self, node):
        # False is a valid value, don't use 'or'
        value = node.driver_internal_info.get('agent_verify_ca')
        if value is None:
            value = node.driver_info.get('agent_verify_ca')
        if value is None:
            value = CONF.agent.verify_ca

        if isinstance(value, str):
            try:
                value = strutils.bool_from_string(value, strict=True)
            except ValueError:
                if not os.path.exists(value):
                    raise exception.InvalidParameterValue(
                        _('Agent CA %s is neither a path nor a boolean')
                        % value)
        return value

    def _raise_if_typeerror(self, result, node, method):
        error = result.get('command_error')
        if error and error.get('type') == 'TypeError':
            LOG.error('Agent command %(method)s for node %(node)s failed. '
                      'Internal TypeError detected: Error %(error)s',
                      {'method': method, 'node': node.uuid, 'error': error})
            raise exception.AgentAPIError(node=node.uuid,
                                          status=error.get('code'),
                                          error=get_command_error(result))

    @METRICS.timer('AgentClient._wait_for_command')
    @tenacity.retry(
        retry=tenacity.retry_if_exception_type(
            exception.AgentCommandTimeout),
        stop=tenacity.stop_after_attempt(CONF.agent.command_wait_attempts),
        wait=tenacity.wait_fixed(CONF.agent.command_wait_interval),
        reraise=True)
    def _wait_for_command(self, node, method):
        """Wait for a command to complete.

        :param node: A Node object.
        :param method: A string represents the command executed by agent.
        :raises: AgentCommandTimeout if timeout is reached.
        """
        # NOTE(dtantsur): this function uses AgentCommandTimeout on every
        # failure, but unless the timeout is reached, the exception is caught
        # and retried by the @retry decorator above.
        result = self.get_last_command_status(node, method)
        if result is None:
            raise exception.AgentCommandTimeout(command=method, node=node.uuid)

        if result.get('command_status') == 'RUNNING':
            LOG.debug('Command %(cmd)s has not finished yet for node %(node)s',
                      {'cmd': method, 'node': node.uuid})
            raise exception.AgentCommandTimeout(command=method, node=node.uuid)
        else:
            LOG.debug('Command %(cmd)s has finished for node %(node)s with '
                      'result %(result)s',
                      {'cmd': method, 'node': node.uuid, 'result': result})
            self._raise_if_typeerror(result, node, method)
            return result

    @METRICS.timer('AgentClient._command')
    @tenacity.retry(
        retry=tenacity.retry_if_exception_type(
            exception.AgentConnectionFailed),
        stop=tenacity.stop_after_attempt(CONF.agent.max_command_attempts),
        reraise=True)
    def _command(self, node, method, params, wait=False, poll=False):
        """Sends command to agent.

        :param node: A Node object.
        :param method: A string represents the command to be executed by
                       agent.
        :param params: A dictionary containing params used to form the request
                       body.
        :param wait: True to wait for the command to finish executing, False
                     otherwise.
        :param poll: Whether to poll the command until completion. Provides
                     a better alternative to `wait` for long-running commands.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :raises: AgentConnectionFailed when connectivity has failed and there
                 is no prior command to determine the status of.
        :returns: A dict containing command result from agent, see
                  get_commands_status for a sample.
        """
        assert not (wait and poll)

        url = self._get_command_url(node)
        body = self._get_command_body(method, params)
        request_params = {
            'wait': str(wait).lower()
        }
        agent_token = node.driver_internal_info.get('agent_secret_token')
        if agent_token:
            request_params['agent_token'] = agent_token
        LOG.debug('Executing agent command %(method)s for node %(node)s '
                  'with params %(params)s',
                  {'node': node.uuid, 'method': method,
                   'params': _sanitize_for_logging(request_params)})

        try:
            response = self.session.post(
                url, params=request_params, data=body,
                verify=self._get_verify(node),
                timeout=CONF.agent.command_timeout)
        except (requests.ConnectionError, requests.Timeout, ssl.SSLError) as e:
            result = self._handle_timeout_on_command_execution(node, method,
                                                               params, e)
            response = None
        except requests.RequestException as e:
            msg = (_('Error invoking agent command %(method)s for node '
                     '%(node)s. Error: %(error)s') %
                   {'method': method, 'node': node.uuid, 'error': e})
            LOG.error(msg)
            raise exception.IronicException(msg)

        if response is not None:
            # TODO(russellhaering): real error handling
            try:
                result = response.json()
            except ValueError:
                msg = _(
                    'Unable to decode response as JSON.\n'
                    'Request URL: %(url)s\nRequest body: "%(body)s"\n'
                    'Response status code: %(code)s\n'
                    'Response: "%(response)s"'
                ) % ({'response': response.text, 'body': body, 'url': url,
                      'code': response.status_code})
                LOG.error(msg)
                raise exception.IronicException(msg)

        error = result.get('command_error')
        LOG.debug('Agent command %(method)s for node %(node)s returned '
                  'result %(res)s, error %(error)s, HTTP status code %(code)s',
                  {'node': node.uuid, 'method': method,
                   'res': _sanitize_for_logging(result.get('command_result')),
                   'error': error,
                   'code': response.status_code if response is not None
                   else 'unknown'})
        if (response is not None
                and response.status_code >= http_client.BAD_REQUEST):
            faultstring = result.get('faultstring')
            if 'agent_token' in faultstring:
                LOG.error('Agent command %(method)s for node %(node)s '
                          'failed. Expected 2xx HTTP status code, got '
                          '%(code)d. Error suggests an older ramdisk '
                          'which does not support ``agent_token``. '
                          'This is a fatal error.',
                          {'method': method, 'node': node.uuid,
                           'code': response.status_code})
            else:
                LOG.error('Agent command %(method)s for node %(node)s failed. '
                          'Expected 2xx HTTP status code, got %(code)d.',
                          {'method': method, 'node': node.uuid,
                           'code': response.status_code})
            if (response.status_code == http_client.CONFLICT
                or 'agent is busy' in faultstring.lower()):
                # HTTP 409 check as an explicit check of if the agent
                # is already busy.
                # NOTE(TheJulia): The agent sends upper case A as of
                # late victoria, but lower case the entire message
                # for compatibility with pre-late victoria agents
                # which returns HTTP 409.
                raise exception.AgentInProgress(node=node.uuid,
                                                command=method,
                                                error=faultstring)
            raise exception.AgentAPIError(node=node.uuid,
                                          status=response.status_code,
                                          error=faultstring)

        self._raise_if_typeerror(result, node, method)

        if poll:
            result = self._wait_for_command(node, method)

        return result


[docs]
    @METRICS.timer('AgentClient.get_commands_status')
    def get_commands_status(self, node, retry_connection=True,
                            expect_errors=False):
        """Get command status from agent.

        :param node: A Node object.
        :param retry_connection: Whether to retry connection problems.
        :param expect_errors: If True, do not log connection problems as
            errors.
        :return: A list of command results, each result is related to a
            command been issued to agent. A typical result can be:

            ::

              {
                'command_name': <command name related to the result>,
                'command_params': <params related with the command>,
                'command_status': <current command status,
                                  e.g. 'RUNNING', 'SUCCEEDED', 'FAILED'>,
                'command_error': <error message if command execution
                                 failed>,
                'command_result': <command result if command execution
                                  succeeded, the value is command specific,
                                  e.g.:
                                  * a dictionary containing keys clean_result
                                    and clean_step for the command
                                    clean.execute_clean_step;
                                  * a dictionary containing keys deploy_result
                                    and deploy_step for the command
                                    deploy.execute_deploy_step;
                                  * a string representing result message for
                                    the command standby.cache_image;
                                  * None for the command standby.sync.>
              }
        """
        url = self._get_command_url(node)
        LOG.debug('Fetching status of agent commands for node %s', node.uuid)

        request_params = {}
        agent_token = node.driver_internal_info.get('agent_secret_token')
        if agent_token:
            request_params['agent_token'] = agent_token

        def _get():
            try:
                return self.session.get(url,
                                        params=request_params,
                                        verify=self._get_verify(node),
                                        timeout=CONF.agent.command_timeout)
            except (requests.ConnectionError, requests.Timeout) as e:
                msg = (_('Failed to connect to the agent running on node '
                         '%(node)s to collect commands status. '
                         'Error: %(error)s') %
                       {'node': node.uuid, 'error': e})
                logging_call = LOG.debug if expect_errors else LOG.error
                logging_call(msg)
                raise exception.AgentConnectionFailed(reason=msg)

        if retry_connection:
            _get = tenacity.retry(
                retry=tenacity.retry_if_exception_type(
                    exception.AgentConnectionFailed),
                stop=tenacity.stop_after_attempt(
                    CONF.agent.max_command_attempts),
                reraise=True)(_get)

        result = _get().json()['commands']
        status = '; '.join('%(cmd)s: result "%(res)s", error "%(err)s"' %
                           {'cmd': r.get('command_name'),
                            'res': _sanitize_for_logging(
                                r.get('command_result')),
                            'err': r.get('command_error')}
                           for r in result)
        LOG.debug('Status of agent commands for node %(node)s: %(status)s',
                  {'node': node.uuid, 'status': status})
        return result


    def _status_if_last_command_matches(self, node, method, params):
        """Return the status of the given command if it's the last running."""
        try:
            method = method.split('.', 1)[1]
        except IndexError:
            pass

        commands = self.get_commands_status(node)
        if not commands:
            return None

        # TODO(dtantsur): a more reliable way to detect repeated execution
        # would be to pass a sort of require ID to the agent.

        command = commands[-1]
        if command['command_name'] != method:
            LOG.debug('Command %(cmd)s is not currently executing, the last '
                      'command is %(curr)s',
                      {'cmd': method, 'curr': command['command_name']})
            return None

        if command['command_status'] != 'RUNNING':
            LOG.debug('Command %(cmd)s is not currently executing, its status '
                      'is %(curr)s',
                      {'cmd': method, 'curr': command['command_status']})
            return None

        return command

    def _handle_timeout_on_command_execution(self, node, method, params,
                                             error):
        result = None
        # NOTE(dtantsur): it is possible, especially with eventlet+TLS, that
        # agent receives a command but fails to return the result to Ironic.
        # To avoid a failure, check if the last command is the one we're trying
        # to execute.
        try:
            result = self._status_if_last_command_matches(node, method, params)
        except Exception as e:
            msg = (_('Failed to connect to the agent running on node '
                     '%(node)s for checking the last command status '
                     'after failing to invoke command %(method)s. '
                     'Error: %(error)s') %
                   {'node': node.uuid, 'method': method, 'error': e})
            LOG.error(msg)

        if result is None:
            msg = (_('Failed to connect to the agent running on node %(node)s '
                     'for invoking command %(method)s. Error: %(error)s') %
                   {'node': node.uuid, 'method': method, 'error': error})
            LOG.error(msg)
            raise exception.AgentConnectionFailed(reason=msg)

        return result


[docs]
    def get_last_command_status(self, node, method):
        """Get the last status for the given command.

        :param node: A Node object.
        :param method: Command name.
        :returns: A dict containing command status from agent or None
            if the command was not found.
        """
        try:
            method = method.split('.', 1)[1]
        except IndexError:
            pass

        commands = self.get_commands_status(node)
        try:
            return next(c for c in reversed(commands)
                        if c.get('command_name') == method)
        except StopIteration:
            LOG.debug('Command %(cmd)s is not in the executing commands list '
                      'for node %(node)s',
                      {'cmd': method, 'node': node.uuid})
            return None



[docs]
    @METRICS.timer('AgentClient.install_bootloader')
    def install_bootloader(self, node, root_uuid, target_boot_mode,
                           efi_system_part_uuid=None,
                           prep_boot_part_uuid=None,
                           software_raid=False):
        """Install a boot loader on the image.

        :param node: A node object.
        :param root_uuid: The UUID of the root partition.
        :param target_boot_mode: The target deployment boot mode.
        :param efi_system_part_uuid: The UUID of the efi system partition
               where the bootloader will be installed to, only used for uefi
               boot mode.
        :param prep_boot_part_uuid: The UUID of the PReP Boot partition where
               the bootloader will be installed to when local booting a
               partition image on a ppc64* system.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
                  See :func:`get_commands_status` for a command result sample.
        """
        params = {'root_uuid': root_uuid,
                  'efi_system_part_uuid': efi_system_part_uuid,
                  'prep_boot_part_uuid': prep_boot_part_uuid,
                  'target_boot_mode': target_boot_mode
                  }

        # NOTE(TheJulia): This command explicitly sends a larger timeout
        # factor to the _command call such that the agent ramdisk has enough
        # time to perform its work.
        # TODO(TheJulia): We should likely split install_bootloader into many
        # commands at some point, even though that would not be backwards
        # compatible. We could at least begin to delineate the commands apart
        # over the next cycle or two so we don't need a command timeout
        # extension factor.
        try:
            return self._command(node=node,
                                 method='image.install_bootloader',
                                 params=params,
                                 poll=True)
        except exception.AgentAPIError:
            # NOTE(arne_wiebalck): If for software RAID and 'uefi' as the boot
            # mode, we find that the IPA does not yet support the additional
            # 'target_boot_mode' parameter, we need to fail. For 'bios' boot
            # mode on the other hand we can retry without the parameter,
            # since 'bios' is the default value the IPA will use.
            if target_boot_mode == 'uefi' and software_raid:
                LOG.error('Unable to pass UEFI boot mode to an out of date '
                          'agent ramdisk. Please contact the administrator '
                          'to update the ramdisk to contain an '
                          'ironic-python-agent version of at least 6.0.0.')
                raise
            else:
                params = {'root_uuid': root_uuid,
                          'efi_system_part_uuid': efi_system_part_uuid,
                          'prep_boot_part_uuid': prep_boot_part_uuid
                          }
                LOG.warning('Failed to install bootloader on first attempt '
                            'for node %(node)s. Falling back to older IPA '
                            'format.', {'node': node.uuid})
                return self._command(node=node,
                                     method='image.install_bootloader',
                                     params=params,
                                     poll=True)



[docs]
    @METRICS.timer('AgentClient.get_clean_steps')
    def get_clean_steps(self, node, ports):
        """Get clean steps from agent.

        :param node: A node object.
        :param ports: Ports associated with the node.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
            See :func:`get_commands_status` for a command result sample.
            The value of key command_result is in the form of:

            ::

              {
                'clean_steps': <a list of clean steps>,
                'hardware_manager_version': <manager version>
              }

        """
        params = {
            'node': node.as_dict(secure=True),
            'ports': [port.as_dict() for port in ports]
        }
        return self._command(node=node,
                             method='clean.get_clean_steps',
                             params=params,
                             wait=True)



[docs]
    @METRICS.timer('AgentClient.execute_clean_step')
    def execute_clean_step(self, step, node, ports):
        """Execute specified clean step.

        :param step: A clean step dictionary to execute.
        :param node: A Node object.
        :param ports: Ports associated with the node.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
            See :func:`get_commands_status` for a command result sample.
            The value of key command_result is in the form of:

            ::

              {
                'clean_result': <the result of execution, step specific>,
                'clean_step': <the clean step issued to agent>
              }

        """
        params = {
            'step': step,
            'node': node.as_dict(secure=True),
            'ports': [port.as_dict() for port in ports],
            'clean_version': node.driver_internal_info.get(
                'hardware_manager_version')
        }
        return self._command(node=node,
                             method='clean.execute_clean_step',
                             params=params)



[docs]
    @METRICS.timer('AgentClient.get_deploy_steps')
    def get_deploy_steps(self, node, ports):
        """Get deploy steps from agent.

        :param node: A node object.
        :param ports: Ports associated with the node.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentConnectionFailed when an a transient connection failure
                 breaks the connection while the request is being processed.
        :returns: A dict containing command response from agent.
            See :func:`get_commands_status` for a command result sample.
            The value of key command_result is in the form of:

            ::

              {
                'deploy_steps': <a list of deploy steps>,
                'hardware_manager_version': <manager version>
              }

        """
        params = {
            'node': node.as_dict(secure=True),
            'ports': [port.as_dict() for port in ports]
        }
        return self._command(node=node,
                             method='deploy.get_deploy_steps',
                             params=params,
                             wait=True)



[docs]
    @METRICS.timer('AgentClient.execute_deploy_step')
    def execute_deploy_step(self, step, node, ports):
        """Execute specified deploy step.

        :param step: A deploy step dictionary to execute.
        :param node: A Node object.
        :param ports: Ports associated with the node.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
            See :func:`get_commands_status` for a command result sample.
            The value of key command_result is in the form of:

            ::

              {
                'deploy_result': <the result of execution, step specific>,
                'deploy_step': <the deploy step issued to agent>
              }

        """
        params = {
            'step': step,
            'node': node.as_dict(secure=True, mask_configdrive=False),
            'ports': [port.as_dict() for port in ports],
            'deploy_version': node.driver_internal_info.get(
                'hardware_manager_version')
        }
        return self._command(node=node,
                             method='deploy.execute_deploy_step',
                             params=params)



[docs]
    @METRICS.timer('AgentClient.get_service_steps')
    def get_service_steps(self, node, ports):
        """Get service steps from agent.

        :param node: A node object.
        :param ports: Ports associated with the node.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
            See :func:`get_commands_status` for a command result sample.
            The value of key command_result is in the form of:

            ::

              {
                'service_steps': <a list of service steps>,
                'hardware_manager_version': <manager version>
              }

        """
        params = {
            'node': node.as_dict(secure=True),
            'ports': [port.as_dict() for port in ports]
        }
        try:
            response = self._command(node=node,
                                     method='service.get_service_steps',
                                     params=params,
                                     wait=True)
        except exception.AgentAPIError:
            # NOTE(TheJulia): This seems logical to do to handle an
            # older ironic-python-agent, since the net-effect will be
            # "there is nothing we can issue to the agent".
            # TODO(TheJulia): Once we know the version where this *is*
            # supported, we should actually update this log message.
            # We won't know that until after the initial merge.
            LOG.warning('Unable to retrieve service steps for node %s.'
                        'Please upgrade your ironic-python-agent.',
                        node.uuid)
            response = {
                'service_steps': [],
                'hardware_manager_version': 0,
            }
        return response



[docs]
    @METRICS.timer('AgentClient.execute_service_step')
    def execute_service_step(self, step, node, ports):
        """Execute specified service step.

        :param step: A service step dictionary to execute.
        :param node: A Node object.
        :param ports: Ports associated with the node.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
            See :func:`get_commands_status` for a command result sample.
            The value of key command_result is in the form of:

            ::

              {
                'service_result': <the result of execution, step specific>,
                'service_step': <the service step issued to agent>
              }

        """
        params = {
            'step': step,
            'node': node.as_dict(secure=True),
            'ports': [port.as_dict() for port in ports],
            'service_version': node.driver_internal_info.get(
                'hardware_manager_version')
        }
        return self._command(node=node,
                             method='service.execute_service_step',
                             params=params)



[docs]
    @METRICS.timer('AgentClient.get_partition_uuids')
    def get_partition_uuids(self, node):
        """Get deploy steps from agent.

        :param node: A node object.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.

        """
        return self._command(node=node,
                             method='standby.get_partition_uuids',
                             params={},
                             wait=True)



[docs]
    @METRICS.timer('AgentClient.power_off')
    def power_off(self, node):
        """Soft powers off the bare metal node by shutting down ramdisk OS.

        :param node: A Node object.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
                  See :func:`get_commands_status` for a command result sample.
        """
        return self._command(node=node,
                             method='standby.power_off',
                             params={})



[docs]
    @METRICS.timer('AgentClient.reboot')
    def reboot(self, node):
        """Soft reboots the bare metal node by shutting down ramdisk OS.

        :param node: A Node object.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
                  See :func:`get_commands_status` for a command result sample.
        """
        return self._command(node=node,
                             method='standby.%s' % REBOOT_COMMAND,
                             params={})



[docs]
    @METRICS.timer('AgentClient.sync')
    def sync(self, node):
        """Flush file system buffers forcing changed blocks to disk.

        :param node: A Node object.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
                  See :func:`get_commands_status` for a command result sample.
        """
        return self._command(node=node,
                             method='standby.sync',
                             params={},
                             wait=True)



[docs]
    @METRICS.timer('AgentClient.collect_system_logs')
    def collect_system_logs(self, node):
        """Collect and package diagnostic and support data from the ramdisk.

        :param node: A Node object.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :returns: A dict containing command response from agent.
                  See :func:`get_commands_status` for a command result sample.
        """
        return self._command(node=node,
                             method='log.collect_system_logs',
                             params={},
                             wait=True)



[docs]
    @METRICS.timer('AgentClient.finalize_rescue')
    def finalize_rescue(self, node):
        """Instruct the ramdisk to finalize entering of rescue mode.

        :param node: A Node object.
        :raises: IronicException if rescue_password is missing, or when failed
                 to issue the request, or there was a malformed response from
                 the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        :raises: InstanceRescueFailure when the agent ramdisk is too old
                 to support transmission of the rescue password.
        :returns: A dict containing command response from agent.
                  See :func:`get_commands_status` for a command result sample.
        """
        rescue_pass = node.instance_info.get('hashed_rescue_password')
        # TODO(TheJulia): Remove fallback to use the fallback_rescue_password
        # in the Victoria cycle.
        fallback_rescue_pass = node.instance_info.get(
            'rescue_password')
        if not rescue_pass:
            raise exception.IronicException(_('Agent rescue requires '
                                              'rescue_password in '
                                              'instance_info'))
        params = {'rescue_password': rescue_pass,
                  'hashed': True}
        try:
            return self._command(node=node,
                                 method='rescue.finalize_rescue',
                                 params=params)
        except exception.AgentAPIError:
            if CONF.conductor.require_rescue_password_hashed:
                raise exception.InstanceRescueFailure(
                    _('Unable to rescue node due to an out of date agent '
                      'ramdisk. Please contact the administrator to update '
                      'the rescue ramdisk to contain an ironic-python-agent '
                      'version of at least 6.0.0.'))
            else:
                params = {'rescue_password': fallback_rescue_pass}
                return self._command(node=node,
                                     method='rescue.finalize_rescue',
                                     params=params)



[docs]
    @METRICS.timer('AgentClient.lockdown')
    def lockdown(self, node, fail_if_unavailable=True):
        """Lock down the agent so that it's not usable any more.

        :param node: A Node object.
        :param fail_if_unavailable: Whether to fail this call if the agent is
                                    already unavailable.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :raises: AgentInProgress when the command fails to execute as the agent
                 is presently executing the prior command.
        """
        if not fail_if_unavailable:
            try:
                self.get_commands_status(node, expect_errors=True)
            except exception.AgentConnectionFailed:
                LOG.debug('Agent is already down when trying to lock down '
                          'node %s', node.uuid)
                return
            except Exception:
                LOG.exception('Unexpected exception when checking agent '
                              'status on node %s, proceeding with lockdown',
                              node.uuid)

        wait = CONF.agent.post_deploy_get_power_state_retry_interval
        attempts = CONF.agent.post_deploy_get_power_state_retries + 1

        @tenacity.retry(stop=tenacity.stop_after_attempt(attempts),
                        retry=tenacity.retry_unless_exception_type(
                            exception.AgentConnectionFailed),
                        wait=tenacity.wait_fixed(wait),
                        sleep=time.sleep,  # for unit testing
                        reraise=True)
        def _wait_until_locked_down(node):
            self.get_commands_status(node, expect_errors=True)
            LOG.debug('Agent is still available on node %s, waiting for the '
                      'lockdown command to take effect', node.uuid)

        self.sync(node)

        try:
            self._command(node=node, method='system.lockdown', params={})
        except Exception as e:
            with excutils.save_and_reraise_exception():
                LOG.error('Failed to lock down node %(node_uuid)s. '
                          '%(cls)s: %(error)s',
                          {'node_uuid': node.uuid,
                           'cls': e.__class__.__name__, 'error': e},
                          exc_info=not isinstance(
                              e, exception.IronicException))

        try:
            _wait_until_locked_down(node)
        except exception.AgentConnectionFailed:
            pass  # expected
        except tenacity.RetryError:
            LOG.error('Failed to lock down node %(node_uuid)s in at least '
                      '%(timeout)d seconds: agent is still available',
                      {'node_uuid': node.uuid,
                       'timeout': (wait * (attempts - 1))})
            raise exception.AgentCommandTimeout(command='system.lockdown',
                                                node=node.uuid)
        except Exception as e:
            with excutils.save_and_reraise_exception():
                LOG.error('Failed to lock down node %(node_uuid)s. '
                          '%(cls)s: %(error)s',
                          {'node_uuid': node.uuid,
                           'cls': e.__class__.__name__, 'error': e},
                          exc_info=not isinstance(
                              e, exception.IronicException))
ironic.drivers.modules.agent_client

Source code for ironic.drivers.modules.agent_client

ironic 30.0.1.dev54