Source code for ironic.drivers.modules.agent_client

# Copyright 2014 Rackspace, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ironic_lib import metrics_utils
from oslo_log import log
from oslo_serialization import jsonutils
import requests
import retrying
from six.moves import http_client

from ironic.common import exception
from ironic.common.i18n import _
from ironic.conf import CONF

LOG = log.getLogger(__name__)

METRICS = metrics_utils.get_metrics_logger(__name__)

DEFAULT_IPA_PORTAL_PORT = 3260


[docs]class AgentClient(object): """Client for interacting with nodes via a REST API.""" @METRICS.timer('AgentClient.__init__') def __init__(self): self.session = requests.Session() self.session.headers.update({'Content-Type': 'application/json'}) def _get_command_url(self, node): """Get URL endpoint for agent command request""" agent_url = node.driver_internal_info.get('agent_url') if not agent_url: raise exception.IronicException(_('Agent driver requires ' 'agent_url in ' 'driver_internal_info')) return ('%(agent_url)s/%(api_version)s/commands' % {'agent_url': agent_url, 'api_version': CONF.agent.agent_api_version}) def _get_command_body(self, method, params): """Generate command body from method and params""" return jsonutils.dumps({ 'name': method, 'params': params, }) @METRICS.timer('AgentClient._command') @retrying.retry( retry_on_exception=( lambda e: isinstance(e, exception.AgentConnectionFailed)), stop_max_attempt_number=CONF.agent.max_command_attempts) def _command(self, node, method, params, wait=False, command_timeout_factor=1): """Sends command to agent. :param node: A Node object. :param method: A string represents the command to be executed by agent. :param params: A dictionary containing params used to form the request body. :param wait: True to wait for the command to finish executing, False otherwise. :param command_timeout_factor: An integer, default 1, by which to multiply the [agent]command_timeout value. This is intended for use with extremely long running commands to the agent ramdisk where a general timeout value should not be extended in all cases. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command result from agent, see get_commands_status for a sample. """ url = self._get_command_url(node) body = self._get_command_body(method, params) request_params = { 'wait': str(wait).lower() } LOG.debug('Executing agent command %(method)s for node %(node)s', {'node': node.uuid, 'method': method}) try: response = self.session.post( url, params=request_params, data=body, timeout=CONF.agent.command_timeout * command_timeout_factor) except (requests.ConnectionError, requests.Timeout) as e: msg = (_('Failed to connect to the agent running on node %(node)s ' 'for invoking command %(method)s. Error: %(error)s') % {'node': node.uuid, 'method': method, 'error': e}) LOG.error(msg) raise exception.AgentConnectionFailed(reason=msg) except requests.RequestException as e: msg = (_('Error invoking agent command %(method)s for node ' '%(node)s. Error: %(error)s') % {'method': method, 'node': node.uuid, 'error': e}) LOG.error(msg) raise exception.IronicException(msg) # TODO(russellhaering): real error handling try: result = response.json() except ValueError: msg = _( 'Unable to decode response as JSON.\n' 'Request URL: %(url)s\nRequest body: "%(body)s"\n' 'Response status code: %(code)s\n' 'Response: "%(response)s"' ) % ({'response': response.text, 'body': body, 'url': url, 'code': response.status_code}) LOG.error(msg) raise exception.IronicException(msg) LOG.debug('Agent command %(method)s for node %(node)s returned ' 'result %(res)s, error %(error)s, HTTP status code %(code)d', {'node': node.uuid, 'method': method, 'res': result.get('command_result'), 'error': result.get('command_error'), 'code': response.status_code}) if response.status_code >= http_client.BAD_REQUEST: LOG.error('Agent command %(method)s for node %(node)s failed. ' 'Expected 2xx HTTP status code, got %(code)d.', {'method': method, 'node': node.uuid, 'code': response.status_code}) raise exception.AgentAPIError(node=node.uuid, status=response.status_code, error=result.get('faultstring')) return result
[docs] @METRICS.timer('AgentClient.get_commands_status') @retrying.retry( retry_on_exception=( lambda e: isinstance(e, exception.AgentConnectionFailed)), stop_max_attempt_number=CONF.agent.max_command_attempts) def get_commands_status(self, node): """Get command status from agent. :param node: A Node object. :return: A list of command results, each result is related to a command been issued to agent. A typical result can be: :: { 'command_name': <command name related to the result>, 'command_params': <params related with the command>, 'command_status': <current command status, e.g. 'RUNNING', 'SUCCEEDED', 'FAILED'>, 'command_error': <error message if command execution failed>, 'command_result': <command result if command execution succeeded, the value is command specific, e.g.: * a dictionary containing keys clean_result and clean_step for the command clean.execute_clean_step; * a string representing result message for the command standby.cache_image; * None for the command standby.sync.> } """ url = self._get_command_url(node) LOG.debug('Fetching status of agent commands for node %s', node.uuid) try: resp = self.session.get(url, timeout=CONF.agent.command_timeout) except (requests.ConnectionError, requests.Timeout) as e: msg = (_('Failed to connect to the agent running on node %(node)s ' 'to collect commands status. ' 'Error: %(error)s') % {'node': node.uuid, 'error': e}) LOG.error(msg) raise exception.AgentConnectionFailed(reason=msg) result = resp.json()['commands'] status = '; '.join('%(cmd)s: result "%(res)s", error "%(err)s"' % {'cmd': r.get('command_name'), 'res': r.get('command_result'), 'err': r.get('command_error')} for r in result) LOG.debug('Status of agent commands for node %(node)s: %(status)s', {'node': node.uuid, 'status': status}) return result
[docs] @METRICS.timer('AgentClient.prepare_image') def prepare_image(self, node, image_info, wait=False): """Call the `prepare_image` method on the node. :param node: A Node object. :param image_info: A dictionary containing various image related information. :param wait: True to wait for the command to finish executing, False otherwise. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command status from agent. See :func:`get_commands_status` for a command result sample. """ LOG.debug('Preparing image %(image)s on node %(node)s.', {'image': image_info.get('id'), 'node': node.uuid}) params = {'image_info': image_info} # this should be an http(s) URL configdrive = node.instance_info.get('configdrive') if configdrive is not None: params['configdrive'] = configdrive return self._command(node=node, method='standby.prepare_image', params=params, wait=wait)
[docs] @METRICS.timer('AgentClient.start_iscsi_target') def start_iscsi_target(self, node, iqn, portal_port=DEFAULT_IPA_PORTAL_PORT, wipe_disk_metadata=False): """Expose the node's disk as an ISCSI target. :param node: an Ironic node object :param iqn: iSCSI target IQN :param portal_port: iSCSI portal port :param wipe_disk_metadata: True if the agent should wipe first the disk magic strings like the partition table, RAID or filesystem signature. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. """ params = {'iqn': iqn, 'portal_port': portal_port, 'wipe_disk_metadata': wipe_disk_metadata} return self._command(node=node, method='iscsi.start_iscsi_target', params=params, wait=True)
[docs] @METRICS.timer('AgentClient.install_bootloader') def install_bootloader(self, node, root_uuid, efi_system_part_uuid=None, prep_boot_part_uuid=None): """Install a boot loader on the image. :param node: A node object. :param root_uuid: The UUID of the root partition. :param efi_system_part_uuid: The UUID of the efi system partition where the bootloader will be installed to, only used for uefi boot mode. :param prep_boot_part_uuid: The UUID of the PReP Boot partition where the bootloader will be installed to when local booting a partition image on a ppc64* system. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. """ params = {'root_uuid': root_uuid, 'efi_system_part_uuid': efi_system_part_uuid, 'prep_boot_part_uuid': prep_boot_part_uuid} # NOTE(TheJulia): This command explicitly sends a larger timeout # factor to the _command call such that the agent ramdisk has enough # time to perform its work. # TODO(TheJulia): We should likely split install_bootloader into many # commands at some point, even though that would not be backwards # compatible. We could at least begin to delineate the commands apart # over the next cycle or two so we don't need a command timeout # extension factor. return self._command(node=node, method='image.install_bootloader', params=params, wait=True, command_timeout_factor=2)
[docs] @METRICS.timer('AgentClient.get_clean_steps') def get_clean_steps(self, node, ports): """Get clean steps from agent. :param node: A node object. :param ports: Ports associated with the node. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. The value of key command_result is in the form of: :: { 'clean_steps': <a list of clean steps>, 'hardware_manager_version': <manager version> } """ params = { 'node': node.as_dict(secure=True), 'ports': [port.as_dict() for port in ports] } return self._command(node=node, method='clean.get_clean_steps', params=params, wait=True)
[docs] @METRICS.timer('AgentClient.execute_clean_step') def execute_clean_step(self, step, node, ports): """Execute specified clean step. :param step: A clean step dictionary to execute. :param node: A Node object. :param ports: Ports associated with the node. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. The value of key command_result is in the form of: :: { 'clean_result': <the result of execution, step specific>, 'clean_step': <the clean step issued to agent> } """ params = { 'step': step, 'node': node.as_dict(secure=True), 'ports': [port.as_dict() for port in ports], 'clean_version': node.driver_internal_info.get( 'hardware_manager_version') } return self._command(node=node, method='clean.execute_clean_step', params=params)
[docs] @METRICS.timer('AgentClient.power_off') def power_off(self, node): """Soft powers off the bare metal node by shutting down ramdisk OS. :param node: A Node object. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. """ return self._command(node=node, method='standby.power_off', params={})
[docs] @METRICS.timer('AgentClient.sync') def sync(self, node): """Flush file system buffers forcing changed blocks to disk. :param node: A Node object. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. """ return self._command(node=node, method='standby.sync', params={}, wait=True)
[docs] @METRICS.timer('AgentClient.collect_system_logs') def collect_system_logs(self, node): """Collect and package diagnostic and support data from the ramdisk. :param node: A Node object. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. """ return self._command(node=node, method='log.collect_system_logs', params={}, wait=True)
[docs] @METRICS.timer('AgentClient.finalize_rescue') def finalize_rescue(self, node): """Instruct the ramdisk to finalize entering of rescue mode. :param node: A Node object. :raises: IronicException if rescue_password is missing, or when failed to issue the request, or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command response from agent. See :func:`get_commands_status` for a command result sample. """ rescue_pass = node.instance_info.get('rescue_password') if not rescue_pass: raise exception.IronicException(_('Agent rescue requires ' 'rescue_password in ' 'instance_info')) params = {'rescue_password': rescue_pass} return self._command(node=node, method='rescue.finalize_rescue', params=params)