# Copyright 2017 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
from datetime import timezone
import time
from urllib.parse import urlparse
from dateutil import parser
from oslo_log import log
from oslo_utils import timeutils
import sushy
import tenacity
from ironic.common import boot_devices
from ironic.common import boot_modes
from ironic.common import components
from ironic.common import exception
from ironic.common import health_states
from ironic.common.i18n import _
from ironic.common import indicator_states
from ironic.common import metrics_utils
from ironic.common import states
from ironic.common import utils
from ironic.conductor import periodics
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
from ironic.drivers import base
from ironic.drivers.modules import boot_mode_utils
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules.redfish import boot as redfish_boot
from ironic.drivers.modules.redfish import firmware_utils
from ironic.drivers.modules.redfish import utils as redfish_utils
LOG = log.getLogger(__name__)
METRICS = metrics_utils.get_metrics_logger(__name__)
BOOT_MODE_CONFIG_INTERVAL = 15
BOOT_DEVICE_MAP = {
sushy.BOOT_SOURCE_TARGET_PXE: boot_devices.PXE,
sushy.BOOT_SOURCE_TARGET_HDD: boot_devices.DISK,
sushy.BOOT_SOURCE_TARGET_CD: boot_devices.CDROM,
sushy.BOOT_SOURCE_TARGET_BIOS_SETUP: boot_devices.BIOS,
sushy.BOOT_SOURCE_TARGET_UEFI_HTTP: boot_devices.UEFIHTTP
}
BOOT_DEVICE_MAP_REV = {v: k for k, v in BOOT_DEVICE_MAP.items()}
# Previously we used sushy constants in driver_internal_info. This mapping
# is provided for backward compatibility, taking into account that sushy
# constants will change from strings to enums.
BOOT_DEVICE_MAP_REV_COMPAT = dict(
BOOT_DEVICE_MAP_REV,
pxe=sushy.BOOT_SOURCE_TARGET_PXE,
hdd=sushy.BOOT_SOURCE_TARGET_HDD,
cd=sushy.BOOT_SOURCE_TARGET_CD,
**{'bios setup': sushy.BOOT_SOURCE_TARGET_BIOS_SETUP}
)
VMEDIA_DEVICES_MAP = {
sushy.VIRTUAL_MEDIA_CD: boot_devices.CDROM,
sushy.VIRTUAL_MEDIA_FLOPPY: boot_devices.FLOPPY,
sushy.VIRTUAL_MEDIA_USBSTICK: boot_devices.DISK
}
VMEDIA_DEVICES_MAP_REV = {v: k for k, v in VMEDIA_DEVICES_MAP.items()}
BOOT_MODE_MAP = {
sushy.BOOT_SOURCE_MODE_UEFI: boot_modes.UEFI,
sushy.BOOT_SOURCE_MODE_BIOS: boot_modes.LEGACY_BIOS
}
BOOT_MODE_MAP_REV = {v: k for k, v in BOOT_MODE_MAP.items()}
BOOT_DEVICE_PERSISTENT_MAP = {
sushy.BOOT_SOURCE_ENABLED_CONTINUOUS: True,
sushy.BOOT_SOURCE_ENABLED_ONCE: False
}
BOOT_DEVICE_PERSISTENT_MAP_REV = {v: k for k, v in
BOOT_DEVICE_PERSISTENT_MAP.items()}
VENDORS_REQUIRING_FULL_BOOT_REQUEST = [
"american megatrends international",
"ami",
"asrockrack",
"redfish_compatible"
]
INDICATOR_MAP = {
sushy.INDICATOR_LED_LIT: indicator_states.ON,
sushy.INDICATOR_LED_OFF: indicator_states.OFF,
sushy.INDICATOR_LED_BLINKING: indicator_states.BLINKING,
sushy.INDICATOR_LED_UNKNOWN: indicator_states.UNKNOWN
}
INDICATOR_MAP_REV = {
v: k for k, v in INDICATOR_MAP.items()}
HEALTH_MAP = {
sushy.HEALTH_OK: health_states.HealthState.OK,
sushy.HEALTH_WARNING: health_states.HealthState.WARNING,
sushy.HEALTH_CRITICAL: health_states.HealthState.CRITICAL,
}
_FIRMWARE_UPDATE_ARGS = {
'firmware_images': {
'description': (
'A list of firmware images to apply.'),
'required': True
}}
def _is_during_post_error(exc):
"""Check if a Sushy exception is an 'UnableToModifyDuringSystemPOST'.
HPE iLO BMCs reject boot device changes while the system is in POST
(Power-On Self-Test), typically after a firmware update or reboot.
:param exc: An exception instance
:returns: True if this is a POST-related error, False otherwise
"""
if not isinstance(exc, sushy.exceptions.BadRequestError):
return False
is_post_error = 'UnableToModifyDuringSystemPOST' in str(exc)
if is_post_error:
LOG.debug('Detected UnableToModifyDuringSystemPOST error from BMC, '
'will trigger retry logic. Error: %s', exc)
return is_post_error
def _set_boot_device(task, system, device, persistent=False,
http_boot_url=None):
"""An internal routine to set the boot device.
:param task: a task from TaskManager.
:param system: a Redfish System object.
:param device: the Redfish boot device.
:param persistent: Boolean value. True if the boot device will
persist to all future boots, False if not.
Default: False.
:param http_boot_url: A string value to be sent to the sushy library,
which is sent to the BMC as the url to boot from.
:raises: SushyError on an error from the Sushy library
Vendor-specific logic:
- Vendors listed in VENDORS_REQUIRING_FULL_BOOT_REQUEST:
Require setting full boot parameters
(mode, enabled, target) even if unchanged.
"""
# The BMC handling of the persistent setting is vendor specific.
# Some vendors require that it not be set if currently equal to
# desired state (see https://storyboard.openstack.org/#!/story/2007355).
# Supermicro BMCs handle it in the opposite manner - the
# persistent setting must be set when setting the boot device
# (see https://storyboard.openstack.org/#!/story/2008547).
vendor = task.node.properties.get('vendor', None)
LOG.debug("Vendor : %(vendor)s node %(uuid)s",
{'vendor': vendor, 'uuid': task.node.uuid})
requires_full_boot_request = (
vendor and any(vendor_id in vendor.lower()
for vendor_id in
VENDORS_REQUIRING_FULL_BOOT_REQUEST)
)
if vendor and vendor.lower() == 'supermicro':
enabled = BOOT_DEVICE_PERSISTENT_MAP_REV[persistent]
LOG.debug('Setting BootSourceOverrideEnable to %(enable)s '
'on Supermicro BMC, node %(node)s',
{'enable': enabled, 'node': task.node.uuid})
else:
desired_enabled = BOOT_DEVICE_PERSISTENT_MAP_REV[persistent]
current_enabled = system.boot.get('enabled')
# NOTE(etingof): this can be racy, esp if BMC is not RESTful
enabled = (desired_enabled
if desired_enabled != current_enabled else None)
# Logging callback for retry attempts (closure captures task)
def _log_post_boot_retry(retry_state):
LOG.warning('BMC is in POST, unable to modify boot device for '
'node %(node)s. Retrying in %(delay).1f seconds '
'(attempt %(attempt)d/%(total)d)',
{'node': task.node.uuid,
'delay': retry_state.next_action.sleep,
'attempt': retry_state.attempt_number,
'total': CONF.redfish.post_boot_retry_attempts})
@tenacity.retry(
retry=tenacity.retry_if_exception(_is_during_post_error),
stop=tenacity.stop_after_attempt(CONF.redfish.post_boot_retry_attempts),
wait=tenacity.wait_exponential(
multiplier=1,
min=CONF.redfish.post_boot_retry_delay,
max=CONF.redfish.post_boot_retry_delay * 6),
before_sleep=_log_post_boot_retry,
reraise=True)
def _do_set_boot_options():
# NOTE(TheJulia): In sushy, it is uri, due to the convention used
# in the standard. URL is used internally in ironic.
if requires_full_boot_request:
# Some vendors require sending all boot parameters every time
desired_mode = system.boot.get('mode') \
or sushy.BOOT_SOURCE_MODE_UEFI
BOOT_DEVICE_PERSISTENT_MAP_REV[persistent]
current_enabled = system.boot.get('enabled') \
or sushy.BOOT_SOURCE_ENABLED_ONCE
current_target = system.boot.get('target') \
or sushy.BOOT_SOURCE_TARGET_NONE
LOG.debug('Vendor "%(vendor)s" requires full boot settings. '
'Sending: mode=%(mode)s, enabled=%(enabled)s, '
'target=%(target)s for node %(node)s',
{'vendor': vendor, 'mode': desired_mode,
'enabled': current_enabled,
'target': current_target, 'node': task.node.uuid})
system.set_system_boot_options(
device,
mode=desired_mode,
enabled=desired_enabled,
http_boot_uri=http_boot_url
)
else:
LOG.debug('Sending minimal Redfish boot device change for '
'node %(node)s', {'node': task.node.uuid})
system.set_system_boot_options(device, enabled=enabled,
http_boot_uri=http_boot_url)
try:
_do_set_boot_options()
except sushy.exceptions.SushyError as e:
if enabled == sushy.BOOT_SOURCE_ENABLED_CONTINUOUS:
# NOTE(dtantsur): continuous boot device settings have been
# removed from Redfish, and some vendors stopped supporting
# it before an alternative was provided. As a work around,
# use one-time boot and restore the boot device on every
# reboot via RedfishPower.
LOG.debug('Error %(error)s when trying to set a '
'persistent boot device on node %(node)s, '
'falling back to one-time boot settings',
{'error': e, 'node': task.node.uuid})
system.set_system_boot_options(
device, enabled=sushy.BOOT_SOURCE_ENABLED_ONCE,
http_boot_uri=http_boot_url)
LOG.warning('Could not set persistent boot device to '
'%(dev)s for node %(node)s, using one-time '
'boot device instead',
{'dev': device, 'node': task.node.uuid})
utils.set_node_nested_field(
task.node, 'driver_internal_info',
'redfish_boot_device', BOOT_DEVICE_MAP[device])
task.node.save()
else:
raise
[docs]
class RedfishManagement(base.ManagementInterface):
[docs]
def get_properties(self):
"""Return the properties of the interface.
:returns: dictionary of <property name>:<property description> entries.
"""
return redfish_utils.COMMON_PROPERTIES.copy()
[docs]
def validate(self, task):
"""Validates the driver information needed by the redfish driver.
:param task: a TaskManager instance containing the node to act on.
:raises: InvalidParameterValue on malformed parameter(s)
:raises: MissingParameterValue on missing parameter(s)
"""
redfish_utils.parse_driver_info(task.node)
[docs]
def get_supported_boot_devices(self, task):
"""Get a list of the supported boot devices.
:param task: a task from TaskManager.
:returns: A list with the supported boot devices defined
in :mod:`ironic.common.boot_devices`.
"""
return list(BOOT_DEVICE_MAP_REV)
[docs]
@task_manager.require_exclusive_lock
def restore_boot_device(self, task, system):
"""Restore boot device if needed.
Checks the redfish_boot_device internal flag and sets the one-time
boot device accordingly. A warning is issued if it fails.
This method is supposed to be called from the Redfish power interface
and should be considered private to the Redfish hardware type.
:param task: a task from TaskManager.
:param system: a Redfish System object.
"""
device = task.node.driver_internal_info.get('redfish_boot_device')
if not device:
return
try:
# We used to store Redfish constants, now we're storing Ironic
# values (which is more appropriate). Provide a compatibility layer
# for already deployed nodes.
redfish_device = BOOT_DEVICE_MAP_REV_COMPAT[device.lower()]
except KeyError:
LOG.error('BUG: unexpected redfish_boot_device %(dev)s for node '
'%(node)s', {'dev': device, 'node': task.node.uuid})
raise
LOG.debug('Restoring boot device %(dev)s on node %(node)s',
{'dev': device, 'node': task.node.uuid})
try:
_set_boot_device(task, system, redfish_device)
except sushy.exceptions.SushyError as e:
LOG.warning('Unable to recover boot device %(dev)s for node '
'%(node)s, relying on the pre-configured boot order. '
'Error: %(error)s',
{'dev': device, 'node': task.node.uuid, 'error': e})
[docs]
@task_manager.require_exclusive_lock
def set_boot_device(self, task, device, persistent=False):
"""Set the boot device for a node.
Set the boot device to use on next reboot of the node.
:param task: a task from TaskManager.
:param device: the boot device, one of
:mod:`ironic.common.boot_devices`.
:param persistent: Boolean value. True if the boot device will
persist to all future boots, False if not.
Default: False.
:raises: InvalidParameterValue on malformed parameter(s)
:raises: MissingParameterValue on missing parameter(s)
:raises: RedfishConnectionError when it fails to connect to Redfish
:raises: RedfishError on an error from the Sushy library
"""
utils.pop_node_nested_field(
task.node, 'driver_internal_info', 'redfish_boot_device')
http_boot_url = utils.pop_node_nested_field(
task.node, 'driver_internal_info', 'redfish_uefi_http_url')
task.node.save()
system = redfish_utils.get_system(task.node)
try:
_set_boot_device(
task, system, BOOT_DEVICE_MAP_REV[device],
persistent=persistent, http_boot_url=http_boot_url)
except sushy.exceptions.SushyError as e:
error_msg = (_('Redfish set boot device failed for node '
'%(node)s. Error: %(error)s') %
{'node': task.node.uuid, 'error': e})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
# Ensure that boot mode is synced with what is set.
# Some BMCs reset it to default (BIOS) when changing the boot device.
# It should only be synced on these vendors as other vendor
# implementations will result in an error
# (see https://storyboard.openstack.org/#!/story/2008712)
vendor = task.node.properties.get('vendor', None)
if vendor and vendor.lower() == 'supermicro':
boot_mode_utils.sync_boot_mode(task)
[docs]
def get_boot_device(self, task):
"""Get the current boot device for a node.
:param task: a task from TaskManager.
:raises: InvalidParameterValue on malformed parameter(s)
:raises: MissingParameterValue on missing parameter(s)
:raises: RedfishConnectionError when it fails to connect to Redfish
:raises: RedfishError on an error from the Sushy library
:returns: a dictionary containing:
:boot_device:
the boot device, one of :mod:`ironic.common.boot_devices` or
None if it is unknown.
:persistent:
Boolean value or None, True if the boot device persists,
False otherwise. None if it's unknown.
"""
system = redfish_utils.get_system(task.node)
return {'boot_device': BOOT_DEVICE_MAP.get(system.boot.get('target')),
'persistent': BOOT_DEVICE_PERSISTENT_MAP.get(
system.boot.get('enabled'))}
[docs]
def get_supported_boot_modes(self, task):
"""Get a list of the supported boot modes.
:param task: A task from TaskManager.
:returns: A list with the supported boot modes defined
in :mod:`ironic.common.boot_modes`. If boot
mode support can't be determined, empty list
is returned.
"""
return list(BOOT_MODE_MAP_REV)
[docs]
@task_manager.require_exclusive_lock
def set_boot_mode(self, task, mode):
"""Set the boot mode for a node.
Set the boot mode to use on next reboot of the node.
:param task: A task from TaskManager.
:param mode: The boot mode, one of
:mod:`ironic.common.boot_modes`.
:raises: InvalidParameterValue if an invalid boot mode is
specified.
:raises: MissingParameterValue if a required parameter is missing
:raises: RedfishConnectionError when it fails to connect to Redfish
:raises: RedfishError on an error from the Sushy library
"""
system = redfish_utils.get_system(task.node)
vendor = task.node.properties.get('vendor', None)
requires_full_boot_request = (
vendor and any(vendor_id in vendor.lower()
for vendor_id in
VENDORS_REQUIRING_FULL_BOOT_REQUEST)
)
LOG.debug('Requested %(vendor)s to set boot mode'
' to "%(mode)s" for node %(node)s',
{'mode': mode, 'node': task.node.uuid, 'vendor': vendor})
# NOTE(dtantsur): check the readability of the current mode before
# modifying anything. I suspect it can become None transiently after
# the update, while we need to know if it is supported *at all*.
get_mode_unsupported = (system.boot.get('mode') is None)
desired_mode = BOOT_MODE_MAP_REV[mode]
LOG.debug('Current boot mode read from Redfish for '
'node %(node)s is: %(mode)s',
{'node': task.node.uuid,
'mode': desired_mode})
try:
if requires_full_boot_request:
current_enabled = system.boot.get('enable') \
or sushy.BOOT_SOURCE_ENABLED_ONCE
current_target = system.boot.get('enable') \
or sushy.BOOT_SOURCE_TARGET_PXE
LOG.debug('Vendor "%(vendor)s" requires full boot settings. '
'Sending: mode=%(mode)s, enabled=%(enabled)s, '
'target=%(target)s for node %(node)s',
{'vendor': vendor, 'mode': desired_mode,
'enabled': current_enabled, 'node': task.node.uuid,
'target': current_target
})
system.set_system_boot_options(
mode=desired_mode,
enabled=current_enabled,
target=current_target
)
else:
LOG.debug('Sending minimal Redfish boot mode '
'change for node %(node)s',
{'node': task.node.uuid})
system.set_system_boot_options(mode=desired_mode)
except sushy.exceptions.SushyError as e:
error_msg = (_('Setting boot mode to %(mode)s '
'failed for node %(node)s. '
'Error: %(error)s') %
{'node': task.node.uuid, 'mode': mode,
'error': e})
LOG.error(error_msg)
# NOTE(sbaker): Some systems such as HPE Gen9 do not support
# getting or setting the boot mode. When setting failed and the
# mode attribute is missing from the boot field, raising
# UnsupportedDriverExtension will allow the deploy to continue.
if get_mode_unsupported:
LOG.info(_('Attempt to set boot mode on node %(node)s '
'failed to set boot mode as the node does not '
'appear to support overriding the boot mode. '
'Possibly partial Redfish implementation?'),
{'node': task.node.uuid})
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='set_boot_mode')
raise exception.RedfishError(error=error_msg)
# NOTE(dtantsur): this case is rather hypothetical, but in our own
# emulator, it's possible that mode is constantly set to None, while
# the request to change the mode succeeds.
if get_mode_unsupported:
LOG.warning('The request to set boot mode for node %(node)s to '
'%(value)s has succeeded, but the current mode is '
'not known. Skipping reboot and assuming '
'the operation has succeeded.',
{'node': task.node.uuid, 'value': mode})
return
self._wait_for_boot_mode(task, system, mode)
LOG.info('Boot mode for node %(node)s has been set to '
'%(value)s', {'node': task.node.uuid, 'value': mode})
def _wait_for_boot_mode(self, task, system, mode):
system.refresh(force=True)
# NOTE(dtantsur/janders): at least Dell machines change boot mode via
# a BIOS configuration job. A reboot is needed to apply it.
if system.boot.get('mode') == BOOT_MODE_MAP_REV[mode]:
LOG.debug('Node %(node)s is already configured with requested '
'boot mode %(new_value)s.',
{'node': task.node.uuid,
'new_value': BOOT_MODE_MAP_REV[mode]})
return
LOG.info('Rebooting node %(node)s to change boot mode from '
'%(old_value)s to %(new_value)s',
{'node': task.node.uuid,
'old_value': system.boot.get('mode'),
'new_value': BOOT_MODE_MAP_REV[mode]})
old_power_state = task.driver.power.get_power_state(task)
manager_utils.node_power_action(task, states.REBOOT)
if CONF.redfish.boot_mode_config_timeout:
threshold = time.time() + CONF.redfish.boot_mode_config_timeout
while (time.time() <= threshold
and system.boot.get('mode') != BOOT_MODE_MAP_REV[mode]):
LOG.debug('Still waiting for boot mode of node %(node)s '
'to become %(value)s, current is %(current)s',
{'node': task.node.uuid,
'value': BOOT_MODE_MAP_REV[mode],
'current': system.boot.get('mode')})
time.sleep(BOOT_MODE_CONFIG_INTERVAL)
system.refresh(force=True)
if system.boot.get('mode') != BOOT_MODE_MAP_REV[mode]:
msg = (_('Timeout reached while waiting for boot mode of '
'node %(node)s to become %(value)s, '
'current is %(current)s')
% {'node': task.node.uuid,
'value': BOOT_MODE_MAP_REV[mode],
'current': system.boot.get('mode')})
LOG.error(msg)
raise exception.RedfishError(error=msg)
manager_utils.node_power_action(task, old_power_state)
[docs]
def get_boot_mode(self, task):
"""Get the current boot mode for a node.
Provides the current boot mode of the node.
:param task: A task from TaskManager.
:raises: MissingParameterValue if a required parameter is missing
:raises: DriverOperationError or its derivative in case
of driver runtime error.
:returns: The boot mode, one of :mod:`ironic.common.boot_mode` or
None if it is unknown.
"""
system = redfish_utils.get_system(task.node)
return BOOT_MODE_MAP.get(system.boot.get('mode'))
@staticmethod
def _sensor2dict(resource, *fields):
return {
field: attr.value if hasattr(attr, 'value') else attr
for field in fields
if hasattr(resource, field)
for attr in [getattr(resource, field)]
}
[docs]
@base.clean_step(priority=0, abortable=False, argsinfo={
'target_datetime': {
'description': 'The datetime to set in ISO8601 format',
'required': True
},
'datetime_local_offset': {
'description': 'The local time offset from UTC',
'required': False
}
})
@task_manager.require_exclusive_lock
def set_bmc_clock(self, task, target_datetime, datetime_local_offset=None):
"""Set the BMC clock using Redfish Manager resource.
:param task: a TaskManager instance containing the node to act on.
:param target_datetime: The datetime to set in ISO8601 format
:param datetime_local_offset: The local time offset from UTC (optional)
:raises: RedfishError if the operation fails
"""
try:
system = redfish_utils.get_system(task.node)
manager = redfish_utils.get_manager(task.node, system)
LOG.debug("Setting BMC clock to %s (offset: %s)",
target_datetime, datetime_local_offset)
manager._conn.timeout = 30
manager.set_datetime(
target_datetime,
datetime_local_offset
)
manager.refresh()
if manager.datetime != target_datetime:
raise exception.RedfishError(
"BMC clock update failed: mismatch after setting datetime")
LOG.info(
"Successfully updated BMC clock for node %s",
task.node.uuid
)
except Exception as e:
LOG.exception("BMC clock update failed: %s", e)
raise exception.RedfishError(error=str(e))
[docs]
@base.verify_step(priority=1)
@task_manager.require_exclusive_lock
def verify_bmc_clock(self, task):
"""Verify and auto-set the BMC clock to the current UTC time.
This step compares the system UTC time to the BMC's Redfish datetime.
If the difference exceeds 1 second, it attempts to sync the time.
Verification fails only if the BMC time remains incorrect
after the update.
"""
if not CONF.redfish.enable_verify_bmc_clock:
LOG.info("Skipping BMC clock verify step: disabled via config")
return
try:
system_time = timeutils.utcnow().replace(
tzinfo=timezone.utc).isoformat()
system = redfish_utils.get_system(task.node)
manager = redfish_utils.get_manager(task.node, system)
manager.refresh()
manager_time = parser.isoparse(manager.datetime)
local_time = parser.isoparse(system_time)
LOG.debug("BMC time: %s, Local time: %s",
manager_time, local_time)
LOG.debug("manager.datetime_local_offset: %s",
manager.datetimelocaloffset)
# Fail if the BMC clock differs from system time
# by more than 1 second
if abs((manager_time - local_time).total_seconds()) > 1:
LOG.info("BMC clock is out of sync. Updating...")
manager.set_datetime(system_time,
datetime_local_offset="+00:00")
manager.refresh()
updated_time = parser.isoparse(manager.datetime)
if abs((updated_time - local_time).total_seconds()) > 1:
raise exception.RedfishError(
"BMC clock still incorrect after update")
LOG.info("BMC clock update successful for node %s",
task.node.uuid)
except Exception as e:
LOG.exception("BMC clock auto-update failed during verify: %s", e)
raise exception.NodeVerifyFailure(
node=getattr(task.node, 'uuid', 'unknown'),
reason="BMC clock verify step failed: %s" % str(e)
)
@classmethod
def _get_sensors_fan(cls, chassis):
"""Get fan sensors reading.
:param chassis: Redfish `chassis` object
:returns: returns a dict of sensor data.
"""
sensors = {}
for fan in chassis.thermal.fans:
sensor = cls._sensor2dict(
fan, 'identity', 'max_reading_range',
'min_reading_range', 'reading', 'reading_units',
'serial_number', 'physical_context')
sensor.update(cls._sensor2dict(fan.status, 'state', 'health'))
unique_name = '%s@%s' % (fan.identity, chassis.identity)
sensors[unique_name] = sensor
return sensors
@classmethod
def _get_sensors_temperatures(cls, chassis):
"""Get temperature sensors reading.
:param chassis: Redfish `chassis` object
:returns: returns a dict of sensor data.
"""
sensors = {}
for temps in chassis.thermal.temperatures:
sensor = cls._sensor2dict(
temps, 'identity', 'max_reading_range_temp',
'min_reading_range_temp', 'reading_celsius',
'physical_context', 'sensor_number')
sensor.update(cls._sensor2dict(temps.status, 'state', 'health'))
unique_name = '%s@%s' % (temps.identity, chassis.identity)
sensors[unique_name] = sensor
return sensors
@classmethod
def _get_sensors_power(cls, chassis):
"""Get power supply sensors reading.
:param chassis: Redfish `chassis` object
:returns: returns a dict of sensor data.
"""
sensors = {}
for power in chassis.power.power_supplies:
sensor = cls._sensor2dict(
power, 'power_capacity_watts',
'line_input_voltage', 'last_power_output_watts',
'serial_number')
sensor.update(cls._sensor2dict(power.status, 'state', 'health'))
unique_name = '%s:%s@%s' % (
power.identity, chassis.power.identity,
chassis.identity)
sensors[unique_name] = sensor
return sensors
@classmethod
def _get_sensor_drive(cls, drive, storage_identity, system_identity):
"""Get sensor data for a single drive.
:param drive: Individual drive or device object
:param storage_identity: Identity of the parent storage/simple_storage
:param system_identity: Identity of the parent system
:returns: tuple of (unique_name, sensor_data)
"""
sensor = cls._sensor2dict(
drive, 'name', 'model', 'capacity_bytes')
sensor.update(cls._sensor2dict(
drive.status, 'state', 'health'))
# Some vendors (e.g., HPE) omit State from Drive Status
# If drive is reporting health status, assume enabled
if not sensor.get('state') and sensor.get('health'):
sensor['state'] = 'Enabled'
unique_name = '%s:%s@%s' % (
drive.name, storage_identity, system_identity)
return unique_name, sensor
[docs]
def get_sensors_data(self, task):
"""Get sensors data.
Collects sensor data from chassis (fans, temperature, power) and
storage (drives) with minimal redfish API calls.
:param task: a TaskManager instance.
:returns: returns a dict of sensor data grouped by sensor type.
"""
# Note for dev: This function is called frequently (e.g 60s)
# So we must be careful about introducing new Redfish API
# calls as they tend to be slow. Depending on vendor
# this function may take up to 20s to finish.
# We are leveraging $expand the as much as possible
# So if you need to make additional call, first check
# if calling with expand already has your data.
node = task.node
sensors = collections.defaultdict(dict)
# 1 API call to get Chassis and Storage Links
system = redfish_utils.get_system(node)
# Collect hardware metadata
baremetal_fields = {
'Manufacturer': system.manufacturer,
'Model': system.model,
'UUID': system.uuid
}
sensors['Extra'] = baremetal_fields
# Get chassis with expanded data and process sensors
chassis_data = self._process_chassis_sensors(node, system)
sensors['Fan'].update(chassis_data['Fan'])
sensors['Temperature'].update(chassis_data['Temperature'])
sensors['Power'].update(chassis_data['Power'])
# Process storage sensors (drives)
# Prioritize SimpleStorage as it requires fewer API calls
drive_data = {}
simple_storage_available = False
try:
# SimpleStorage has drive data inline (1 API call)
drive_data = self._process_simple_storage_sensors(
node, system)
simple_storage_available = True
except sushy.exceptions.MissingAttributeError:
pass
# Fall back to Storage only if SimpleStorage is not available
if not simple_storage_available:
try:
# Storage requires following drive links (1+M calls)
drive_data = self._process_storage_sensors(
node, system)
except sushy.exceptions.MissingAttributeError:
LOG.debug("Storage not available for node %s", node.uuid)
sensors['Drive'].update(drive_data.get('Drive', {}))
return sensors
def _process_chassis_sensors(self, node, system):
"""Process all chassis sensors using single expanded.
Process all chassis sensors (Fan, Temperature, Power) using single
expanded Redfish API call.
:param node: Ironic node object
:param system: Redfish System object
:returns: Dictionary with Fan, Temperature, and Power sensor data
"""
sensors = {'Fan': {}, 'Temperature': {}, 'Power': {}}
try:
# 1 API call to get all chassis with expanded data
chassis_list = system.chassis_expanded
# Use first chassis if only one available, otherwise use first one
chassis = chassis_list[0] if chassis_list else None
if not chassis:
LOG.debug("No chassis found for node %s", node.uuid)
return sensors
try:
sensors['Fan'].update(self._get_sensors_fan(chassis))
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading fan information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
try:
sensors['Temperature'].update(
self._get_sensors_temperatures(chassis))
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading temperature information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
try:
sensors['Power'].update(self._get_sensors_power(chassis))
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading power information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
except Exception as exc:
LOG.debug("Failed reading expanded chassis information for "
"node %(node)s: %(error)s",
{'node': node.uuid, 'error': exc})
return sensors
def _process_storage_sensors(self, node, system):
"""Process all storage sensors using storage expansion optimization.
Processes all storage sensors (Drive) with expand call.
Extracts only the available drive links from the expanded
storage collection and processes them directly.
:param node: Ironic node object
:param system: Redfish System object
:returns: Dictionary with Drive sensor data
"""
storage_sensors = {'Drive': {}}
# Get system identity from driver info
driver_info = redfish_utils.parse_driver_info(node)
system_identity = driver_info['system_id'].split('/')[-1]
try:
drives = {}
# 1 API call to get all the available Drives uri using $expand
storage_collection_expanded = system.storage_expanded
# Process drives from all storage controllers
# M API calls (M is the number of drives)
for storage in storage_collection_expanded.get_members():
try:
if storage.drives_identities:
# Process drives from Storage
for drive in storage.drives:
unique_name, sensor = self._get_sensor_drive(
drive, storage.identity, system_identity)
drives[unique_name] = sensor
except Exception as drive_exc:
LOG.debug("Failed to process drives from storage %s: %s",
storage.identity, drive_exc)
continue
storage_sensors['Drive'].update(drives)
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading drive information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
return storage_sensors
def _process_simple_storage_sensors(self, node, system):
"""Process drive sensors from SimpleStorage.
SimpleStorage provides drive data inline, requiring only 1 API call
instead of following individual drive links like Storage.
:param node: Ironic node object
:param system: Redfish System object
:returns: Dictionary with Drive sensor data
"""
simple_storage_sensors = {'Drive': {}}
# Get system identity from driver info
driver_info = redfish_utils.parse_driver_info(node)
system_identity = driver_info['system_id'].split('/')[-1]
try:
drives = {}
# 1 API call to get all SimpleStorage with devices expanded
simple_storage_collection_expanded = system.simple_storage_expanded
# Process devices from all simple storage controllers
for simple_storage in (
simple_storage_collection_expanded.get_members()):
try:
# Process devices directly from SimpleStorage
for device in simple_storage.devices:
# Skip devices without capacity (e.g., backplanes,
# enclosures). These are physical infrastructure, not
# storage drives. Including them causes Prometheus
# label inconsistency errors since they lack fields
# like capacity_bytes that actual drives have.
if not device.capacity_bytes:
continue
unique_name, sensor = self._get_sensor_drive(
device, simple_storage.identity, system_identity)
drives[unique_name] = sensor
except Exception as device_exc:
LOG.debug("Failed to process devices from simple "
"storage %s: %s",
simple_storage.identity, device_exc)
continue
simple_storage_sensors['Drive'].update(drives)
except sushy.exceptions.MissingAttributeError:
# Re-raise MissingAttributeError so caller can fall back to Storage
raise
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading simple storage information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
return simple_storage_sensors
[docs]
@task_manager.require_exclusive_lock
def inject_nmi(self, task):
"""Inject NMI, Non Maskable Interrupt.
Inject NMI (Non Maskable Interrupt) for a node immediately.
:param task: A TaskManager instance containing the node to act on.
:raises: InvalidParameterValue on malformed parameter(s)
:raises: MissingParameterValue on missing parameter(s)
:raises: RedfishConnectionError when it fails to connect to Redfish
:raises: RedfishError on an error from the Sushy library
"""
system = redfish_utils.get_system(task.node)
try:
system.reset_system(sushy.RESET_NMI)
except sushy.exceptions.SushyError as e:
error_msg = (_('Redfish inject NMI failed for node %(node)s. '
'Error: %(error)s') % {'node': task.node.uuid,
'error': e})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
[docs]
def get_node_health(self, task):
"""Get the current health status for a node.
Retrieves the hardware health status from the Redfish System resource.
The health status represents the overall condition of the server
hardware, including components like processors, memory, fans,
power supplies, storage, etc.
:param task: A task from TaskManager.
:raises: RedfishConnectionError when it fails to connect to Redfish
:raises: RedfishError on an error from the Sushy library
:returns: One of :mod:`ironic.common.health_states` constants
(OK, WARNING, CRITICAL) or None if health status is
not available.
"""
system = redfish_utils.get_system(task.node)
# Try to get health status from system.status
if system.status and system.status.health:
return HEALTH_MAP.get(system.status.health)
# Health status not available
return None
[docs]
def get_supported_indicators(self, task, component=None):
"""Get a map of the supported indicators (e.g. LEDs).
:param task: A task from TaskManager.
:param component: If not `None`, return indicator information
for just this component, otherwise return indicators for
all existing components.
:returns: A dictionary of hardware components
(:mod:`ironic.common.components`) as keys with values
being dictionaries having indicator IDs as keys and indicator
properties as values.
::
{
'chassis': {
'enclosure-0': {
"readonly": true,
"states": [
"OFF",
"ON"
]
}
},
'system':
'blade-A': {
"readonly": true,
"states": [
"OFF",
"ON"
]
}
},
'drive':
'ssd0': {
"readonly": true,
"states": [
"OFF",
"ON"
]
}
}
}
"""
properties = {
"readonly": False,
"states": [
indicator_states.BLINKING,
indicator_states.OFF,
indicator_states.ON
]
}
indicators = {}
system = redfish_utils.get_system(task.node)
try:
if component in (None, components.CHASSIS) and system.chassis:
indicators[components.CHASSIS] = {
chassis.uuid: properties for chassis in system.chassis
if chassis.indicator_led
}
except sushy.exceptions.SushyError as e:
LOG.debug('Chassis indicator not available for node %(node)s: '
'%(error)s', {'node': task.node.uuid, 'error': e})
try:
if component in (None, components.SYSTEM) and system.indicator_led:
indicators[components.SYSTEM] = {
system.uuid: properties
}
except sushy.exceptions.SushyError as e:
LOG.debug('System indicator not available for node %(node)s: '
'%(error)s', {'node': task.node.uuid, 'error': e})
try:
if (component in (None, components.DISK)
and system.storage):
indicators[components.DISK] = {
# NOTE(vanou) There is no uuid property in Drive resource.
# There is no guarantee Id property of Drive is unique
# across all drives attached to server.
':'.join([storage.identity, drive.identity]): properties
for storage in system.storage.get_members()
for drive in storage.drives
if drive.indicator_led
}
except sushy.exceptions.SushyError as e:
LOG.debug('Drive indicator not available for node %(node)s: '
'%(error)s', {'node': task.node.uuid, 'error': e})
return indicators
[docs]
def set_indicator_state(self, task, component, indicator, state):
"""Set indicator on the hardware component to the desired state.
:param task: A task from TaskManager.
:param component: The hardware component, one of
:mod:`ironic.common.components`.
:param indicator: Indicator ID (as reported by
`get_supported_indicators`).
:param state: Desired state of the indicator, one of
:mod:`ironic.common.indicator_states`.
:raises: InvalidParameterValue if an invalid component, indicator
or state is specified.
:raises: MissingParameterValue if a required parameter is missing
:raises: RedfishError on an error from the Sushy library
"""
system = redfish_utils.get_system(task.node)
try:
if (component == components.SYSTEM
and indicator == system.uuid):
system.set_indicator_led(INDICATOR_MAP_REV[state])
return
elif (component == components.CHASSIS
and system.chassis):
for chassis in system.chassis:
if chassis.uuid == indicator:
chassis.set_indicator_led(
INDICATOR_MAP_REV[state])
return
elif (component == components.DISK
and system.storage and len(indicator.split(':')) == 2):
for storage in system.storage.get_members():
if storage.identity == indicator.split(':')[0]:
for drive in storage.drives:
if drive.identity == indicator.split(':')[1]:
drive.set_indicator_led(
INDICATOR_MAP_REV[state])
return
except sushy.exceptions.SushyError as e:
error_msg = (_('Redfish set %(component)s indicator %(indicator)s '
'state %(state)s failed for node %(node)s. Error: '
'%(error)s') % {'component': component,
'indicator': indicator,
'state': state,
'node': task.node.uuid,
'error': e})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
raise exception.MissingParameterValue(_(
"Unknown indicator %(indicator)s for component %(component)s of "
"node %(uuid)s") % {'indicator': indicator,
'component': component,
'uuid': task.node.uuid})
[docs]
def get_indicator_state(self, task, component, indicator):
"""Get current state of the indicator of the hardware component.
:param task: A task from TaskManager.
:param component: The hardware component, one of
:mod:`ironic.common.components`.
:param indicator: Indicator ID (as reported by
`get_supported_indicators`).
:raises: MissingParameterValue if a required parameter is missing
:raises: RedfishError on an error from the Sushy library
:returns: Current state of the indicator, one of
:mod:`ironic.common.indicator_states`.
"""
system = redfish_utils.get_system(task.node)
try:
if (component == components.SYSTEM
and indicator == system.uuid):
return INDICATOR_MAP[system.indicator_led]
if (component == components.CHASSIS
and system.chassis):
for chassis in system.chassis:
if chassis.uuid == indicator:
return INDICATOR_MAP[chassis.indicator_led]
if (component == components.DISK
and system.storage and len(indicator.split(':')) == 2):
for storage in system.storage.get_members():
if storage.identity == indicator.split(':')[0]:
for drive in storage.drives:
if drive.identity == indicator.split(':')[1]:
return INDICATOR_MAP[drive.indicator_led]
except sushy.exceptions.SushyError as e:
error_msg = (_('Redfish get %(component)s indicator %(indicator)s '
'state failed for node %(node)s. Error: '
'%(error)s') % {'component': component,
'indicator': indicator,
'node': task.node.uuid,
'error': e})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
raise exception.MissingParameterValue(_(
"Unknown indicator %(indicator)s for component %(component)s of "
"node %(uuid)s") % {'indicator': indicator,
'component': component,
'uuid': task.node.uuid})
[docs]
def detect_vendor(self, task):
"""Detects and returns the hardware vendor.
Uses the System's Manufacturer field.
:param task: A task from TaskManager.
:raises: InvalidParameterValue if an invalid component, indicator
or state is specified.
:raises: MissingParameterValue if a required parameter is missing
:raises: RedfishError on driver-specific problems.
:returns: String representing the BMC reported Vendor or
Manufacturer, otherwise returns None.
"""
return redfish_utils.get_system(task.node).manufacturer
[docs]
@METRICS.timer('RedfishManagement.update_firmware')
@base.clean_step(priority=0, abortable=False,
argsinfo=_FIRMWARE_UPDATE_ARGS)
@base.service_step(priority=0, abortable=False,
argsinfo=_FIRMWARE_UPDATE_ARGS)
def update_firmware(self, task, firmware_images):
"""Updates the firmware on the node.
:param task: a TaskManager instance containing the node to act on.
:param firmware_images: A list of firmware images are to apply.
:returns: None if it is completed.
:raises: RedfishError on an error from the Sushy library.
"""
firmware_utils.validate_update_firmware_args(firmware_images)
node = task.node
LOG.debug('Updating firmware on node %(node_uuid)s with firmware '
'%(firmware_images)s',
{'node_uuid': node.uuid,
'firmware_images': firmware_images})
update_service = redfish_utils.get_update_service(task.node)
# The cleaning infrastructure has an exclusive lock on the node, so
# there is no need to get one here.
self._apply_firmware_update(node, update_service, firmware_images)
# set_async_step_flags calls node.save()
deploy_utils.set_async_step_flags(
node,
reboot=True,
skip_current_step=True,
polling=True)
return deploy_utils.reboot_to_finish_step(task)
def _apply_firmware_update(self, node, update_service, firmware_updates):
"""Applies the next firmware update to the node
Applies the first firmware update in the firmware_updates list to
the node.
Note that the caller must have an exclusive lock on the node and
the caller must ensure node.save() is called after making this
call.
:param node: the node to apply the next update to
:param update_service: the sushy firmware update service
:param firmware_updates: the remaining firmware updates to apply
"""
firmware_update = firmware_updates[0]
firmware_url, need_cleanup = self._stage_firmware_file(
node, firmware_update)
LOG.debug('Applying firmware %(firmware_image)s to node '
'%(node_uuid)s',
{'firmware_image': firmware_url,
'node_uuid': node.uuid})
task_monitor = update_service.simple_update(firmware_url)
firmware_update['task_monitor'] = task_monitor.task_monitor_uri
node.set_driver_internal_info('firmware_updates', firmware_updates)
if need_cleanup:
fw_cleanup = node.driver_internal_info.get('firmware_cleanup')
if not fw_cleanup:
fw_cleanup = [need_cleanup]
elif need_cleanup not in fw_cleanup:
fw_cleanup.append(need_cleanup)
node.set_driver_internal_info('firmware_cleanup', fw_cleanup)
def _continue_firmware_updates(self, task, update_service,
firmware_updates):
"""Continues processing the firmware updates
Continues to process the firmware updates on the node.
Note that the caller must have an exclusive lock on the node.
:param task: a TaskManager instance containing the node to act on.
:param update_service: the sushy firmware update service
:param firmware_updates: the remaining firmware updates to apply
"""
node = task.node
firmware_update = firmware_updates[0]
wait_interval = firmware_update.get('wait')
if wait_interval:
time_now = str(timeutils.utcnow().isoformat())
firmware_update['wait_start_time'] = time_now
LOG.debug('Waiting at %(time)s for %(seconds)s seconds after '
'firmware update %(firmware_image)s on node %(node)s',
{'time': time_now,
'seconds': wait_interval,
'firmware_image': firmware_update['url'],
'node': node.uuid})
node.set_driver_internal_info('firmware_updates',
firmware_updates)
node.save()
return
if len(firmware_updates) == 1:
self._clear_firmware_updates(node)
LOG.info('Firmware updates completed for node %(node)s',
{'node': node.uuid})
if task.node.clean_step:
manager_utils.notify_conductor_resume_clean(task)
elif task.node.service_step:
manager_utils.notify_conductor_resume_service(task)
elif task.node.deploy_step:
manager_utils.notify_conductor_resume_deploy(task)
else:
firmware_updates.pop(0)
self._apply_firmware_update(node,
update_service,
firmware_updates)
node.save()
manager_utils.node_power_action(task, states.REBOOT)
def _clear_firmware_updates(self, node):
"""Clears firmware updates artifacts
Clears firmware updates from driver_internal_info and any files
that were staged.
Note that the caller must have an exclusive lock on the node.
:param node: the node to clear the firmware updates from
"""
firmware_utils.cleanup(node)
node.del_driver_internal_info('firmware_updates')
node.del_driver_internal_info('firmware_cleanup')
node.save()
@METRICS.timer('RedfishManagement._query_firmware_update_failed')
@periodics.node_periodic(
purpose='checking if async firmware update failed',
spacing=CONF.redfish.firmware_update_fail_interval,
filters={'reserved': False,
'provision_state_in': {states.CLEANFAIL,
states.SERVICEFAIL,
states.DEPLOYFAIL},
'maintenance': True},
predicate_extra_fields=['driver_internal_info'],
predicate=lambda n: n.driver_internal_info.get('firmware_updates'),
)
def _query_firmware_update_failed(self, task, manager, context):
"""Periodic job to check for failed firmware updates."""
# A firmware update failed. Discard any remaining firmware
# updates so when the user takes the node out of
# maintenance mode, pending firmware updates do not
# automatically continue.
LOG.warning('Firmware update failed for node %(node)s. '
'Discarding remaining firmware updates.',
{'node': task.node.uuid})
task.upgrade_lock()
self._clear_firmware_updates(task.node)
@METRICS.timer('RedfishManagement._query_firmware_update_status')
@periodics.node_periodic(
purpose='checking async firmware update tasks',
spacing=CONF.redfish.firmware_update_status_interval,
filters={'reserved': False,
'provision_state_in': {states.CLEANWAIT,
states.SERVICEWAIT,
states.DEPLOYWAIT}},
predicate_extra_fields=['driver_internal_info'],
predicate=lambda n: n.driver_internal_info.get('firmware_updates'),
)
def _query_firmware_update_status(self, task, manager, context):
"""Periodic job to check firmware update tasks."""
self._check_node_firmware_update(task)
@METRICS.timer('RedfishManagement._check_node_firmware_update')
def _check_node_firmware_update(self, task):
"""Check the progress of running firmware update on a node."""
node = task.node
firmware_updates = node.driver_internal_info['firmware_updates']
current_update = firmware_updates[0]
try:
update_service = redfish_utils.get_update_service(node)
except exception.RedfishConnectionError as e:
# If the BMC firmware is being updated, the BMC will be
# unavailable for some amount of time.
LOG.warning('Unable to communicate with firmware update service '
'on node %(node)s. Will try again on the next poll. '
'Error: %(error)s',
{'node': node.uuid,
'error': e})
return
wait_start_time = current_update.get('wait_start_time')
if wait_start_time:
wait_start = timeutils.parse_isotime(wait_start_time)
elapsed_time = timeutils.utcnow(True) - wait_start
if elapsed_time.seconds >= current_update['wait']:
LOG.debug('Finished waiting after firmware update '
'%(firmware_image)s on node %(node)s. '
'Elapsed time: %(seconds)s seconds',
{'firmware_image': current_update['url'],
'node': node.uuid,
'seconds': elapsed_time.seconds})
current_update.pop('wait', None)
current_update.pop('wait_start_time', None)
task.upgrade_lock()
self._continue_firmware_updates(task,
update_service,
firmware_updates)
else:
LOG.debug('Continuing to wait after firmware update '
'%(firmware_image)s on node %(node)s. '
'Elapsed time: %(seconds)s seconds',
{'firmware_image': current_update['url'],
'node': node.uuid,
'seconds': elapsed_time.seconds})
return
try:
task_monitor = redfish_utils.get_task_monitor(
node, current_update['task_monitor'])
except exception.RedfishError:
# The BMC deleted the Task before we could query it
LOG.warning('Firmware update completed for node %(node)s, '
'firmware %(firmware_image)s, but success of the '
'update is unknown. Assuming update was successful.',
{'node': node.uuid,
'firmware_image': current_update['url']})
task.upgrade_lock()
self._continue_firmware_updates(task,
update_service,
firmware_updates)
return
if not task_monitor.is_processing:
# The last response does not necessarily contain a Task,
# so get it
sushy_task = task_monitor.get_task()
# Only parse the messages if the BMC did not return parsed
# messages
messages = []
if sushy_task.messages and not sushy_task.messages[0].message:
sushy_task.parse_messages()
messages = [m.message for m in sushy_task.messages]
if (sushy_task.task_state == sushy.TASK_STATE_COMPLETED
and sushy_task.task_status in
[sushy.HEALTH_OK, sushy.HEALTH_WARNING]):
LOG.info('Firmware update succeeded for node %(node)s, '
'firmware %(firmware_image)s: %(messages)s',
{'node': node.uuid,
'firmware_image': current_update['url'],
'messages': ", ".join(messages)})
task.upgrade_lock()
self._continue_firmware_updates(task,
update_service,
firmware_updates)
else:
error_msg = (_('Firmware update failed for node %(node)s, '
'firmware %(firmware_image)s. '
'Error: %(errors)s') %
{'node': node.uuid,
'firmware_image': current_update['url'],
'errors': ", ".join(messages)})
task.upgrade_lock()
self._clear_firmware_updates(node)
manager_utils.cleaning_error_handler(task, error_msg)
else:
LOG.debug('Firmware update in progress for node %(node)s, '
'firmware %(firmware_image)s.',
{'node': node.uuid,
'firmware_image': current_update['url']})
def _stage_firmware_file(self, node, firmware_update):
"""Stage firmware update according to configuration.
:param node: Node for which to stage the firmware file
:param firmware_update: Firmware update to stage
:returns: Tuple of staged URL and source that needs cleanup of
staged files afterwards. If not staging, then return
original URL and None for source that needs cleanup.
:raises IronicException: If something goes wrong with staging.
"""
try:
url = firmware_update['url']
parsed_url = urlparse(url)
scheme = parsed_url.scheme.lower()
source = (firmware_update.get('source')
or CONF.redfish.firmware_source).lower()
# Keep it simple, in further processing TLS does not matter
if scheme == 'https':
scheme = 'http'
# If source and scheme is HTTP, then no staging,
# returning original location
if scheme == 'http' and source == scheme:
LOG.debug('For node %(node)s serving firmware from original '
'location %(url)s', {'node': node.uuid, 'url': url})
return url, None
# If source and scheme is Swift, then not moving, but
# returning Swift temp URL
if scheme == 'swift' and source == scheme:
temp_url = firmware_utils.get_swift_temp_url(parsed_url)
LOG.debug('For node %(node)s serving original firmware at '
'%(url)s via Swift temporary url %(temp_url)s',
{'node': node.uuid, 'url': url,
'temp_url': temp_url})
return temp_url, None
# For remaining, download the image to temporary location
temp_file = firmware_utils.download_to_temp(node, url)
firmware_utils.verify_checksum(
node, firmware_update.get('checksum'), temp_file)
return firmware_utils.stage(node, source, temp_file)
except exception.IronicException as error:
firmware_utils.cleanup(node)
raise error
[docs]
def get_secure_boot_state(self, task):
"""Get the current secure boot state for the node.
:param task: A task from TaskManager.
:raises: MissingParameterValue if a required parameter is missing
:raises: RedfishError or its derivative in case of a driver
runtime error.
:raises: UnsupportedDriverExtension if secure boot is
not supported by the hardware.
:returns: Boolean or None if status cannot be retrieved
"""
system = redfish_utils.get_system(task.node)
try:
return system.secure_boot.enabled
except sushy.exceptions.AccessError as e:
if 'OemLicenseNotPassed' in str(e):
# NOTE(cid): Supermicro gear requires a license to report
# secure boot status.
LOG.info("Secure boot status request through Redfish failed "
"for node %(node)s: %(error)s",
{'node': task.node.uuid, 'error': e})
return None
raise
except sushy.exceptions.MissingAttributeError:
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='get_secure_boot_state')
[docs]
def set_secure_boot_state(self, task, state):
"""Set the current secure boot state for the node.
:param task: A task from TaskManager.
:param state: A new state as a boolean.
:raises: MissingParameterValue if a required parameter is missing
:raises: RedfishError or its derivative in case of a driver
runtime error.
:raises: UnsupportedDriverExtension if secure boot is
not supported by the hardware.
"""
system = redfish_utils.get_system(task.node)
try:
sb = system.secure_boot
except sushy.exceptions.MissingAttributeError:
LOG.error('Secure boot has been requested for node %s but its '
'Redfish BMC does not have a SecureBoot object',
task.node.uuid)
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='set_secure_boot_state')
if sb.enabled == state:
LOG.info('Secure boot state for node %(node)s is already '
'%(value)s', {'node': task.node.uuid, 'value': state})
return
boot_mode = system.boot.get('mode')
if boot_mode == sushy.BOOT_SOURCE_MODE_BIOS:
# NOTE(dtantsur): the case of disabling secure boot when boot mode
# is legacy should be covered by the check above.
msg = (_("Configuring secure boot requires UEFI for node %s")
% task.node.uuid)
LOG.error(msg)
raise exception.RedfishError(error=msg)
try:
sb.set_enabled(state)
except sushy.exceptions.SushyError as exc:
msg = (_('Failed to set secure boot state on node %(node)s to '
'%(value)s: %(exc)s')
% {'node': task.node.uuid, 'value': state, 'exc': exc})
LOG.error(msg)
raise exception.RedfishError(error=msg)
self._wait_for_secure_boot(task, sb, state)
LOG.info('Secure boot state for node %(node)s has been set to '
'%(value)s', {'node': task.node.uuid, 'value': state})
def _wait_for_secure_boot(self, task, sb, state):
# NOTE(dtantsur): at least Dell machines change secure boot status via
# a BIOS configuration job. A reboot is needed to apply it.
def _try_refresh():
try:
sb.refresh(force=True)
except sushy.exceptions.ServerSideError:
return False # sushy already does logging, just return
else:
return True
if _try_refresh() and sb.enabled == state:
return
LOG.info('Rebooting node %(node)s to change secure boot state to '
'%(value)s', {'node': task.node.uuid, 'value': state})
old_power_state = task.driver.power.get_power_state(task)
manager_utils.node_power_action(task, states.REBOOT)
if CONF.redfish.boot_mode_config_timeout:
threshold = time.time() + CONF.redfish.boot_mode_config_timeout
while time.time() <= threshold and sb.enabled != state:
LOG.debug(
'Still waiting for secure boot state of node %(node)s '
'to become %(value)s, current is %(current)s',
{'node': task.node.uuid, 'value': state,
'current': sb.enabled})
time.sleep(BOOT_MODE_CONFIG_INTERVAL)
_try_refresh()
if sb.enabled != state:
msg = (_('Timeout reached while waiting for secure boot state '
'of node %(node)s to become %(state)s, '
'current is %(current)s')
% {'node': task.node.uuid, 'state': state,
'current': sb.enabled})
LOG.error(msg)
raise exception.RedfishError(error=msg)
manager_utils.node_power_action(task, old_power_state)
def _reset_keys(self, task, reset_type):
system = redfish_utils.get_system(task.node)
try:
sb = system.secure_boot
except sushy.exceptions.MissingAttributeError:
LOG.error('Resetting secure boot keys has been requested for node '
'%s but its Redfish BMC does not have a SecureBoot '
'object', task.node.uuid)
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='reset_keys')
try:
sb.reset_keys(reset_type)
except sushy.exceptions.SushyError as exc:
msg = (_('Failed to reset secure boot keys on node %(node)s: '
'%(exc)s')
% {'node': task.node.uuid, 'exc': exc})
LOG.error(msg)
raise exception.RedfishError(error=msg)
[docs]
@METRICS.timer('RedfishManagement.reset_secure_boot_keys_to_default')
@base.deploy_step(priority=0)
@base.clean_step(priority=0)
def reset_secure_boot_keys_to_default(self, task):
"""Reset secure boot keys to manufacturing defaults.
:param task: a task from TaskManager.
:raises: UnsupportedDriverExtension if secure boot is now supported.
:raises: RedfishError on runtime driver error.
"""
self._reset_keys(task, sushy.SECURE_BOOT_RESET_KEYS_TO_DEFAULT)
LOG.info('Secure boot keys have been reset to their defaults on '
'node %s', task.node.uuid)
[docs]
@METRICS.timer('RedfishManagement.clear_secure_boot_keys')
@base.deploy_step(priority=0)
@base.clean_step(priority=0)
def clear_secure_boot_keys(self, task):
"""Clear all secure boot keys.
:param task: a task from TaskManager.
:raises: UnsupportedDriverExtension if secure boot is now supported.
:raises: RedfishError on runtime driver error.
"""
self._reset_keys(task, sushy.SECURE_BOOT_RESET_KEYS_DELETE_ALL)
LOG.info('Secure boot keys have been removed from node %s',
task.node.uuid)
[docs]
def get_mac_addresses(self, task):
"""Get MAC address information for the node.
:param task: A TaskManager instance containing the node to act on.
:raises: RedfishConnectionError when it fails to connect to Redfish
:raises: RedfishError on an error from the Sushy library
:returns: A list of MAC addresses for the node
"""
system = redfish_utils.get_system(task.node)
try:
return list(redfish_utils.get_enabled_macs(task, system))
# NOTE(janders) we should handle MissingAttributeError separately
# from other SushyErrors - some servers (e.g. some Cisco UCSB and UCSX
# blades) are missing EthernetInterfaces attribute yet could be
# provisioned successfully if MAC information is provided manually AND
# this exception is caught and handled accordingly.
except sushy.exceptions.MissingAttributeError as exc:
LOG.warning('Cannot get MAC addresses for node %(node)s: %(exc)s',
{'node': task.node.uuid, 'exc': exc})
# if the exception is not a MissingAttributeError, raise it
except sushy.exceptions.SushyError as exc:
msg = (_('Failed to get network interface information on node '
'%(node)s: %(exc)s')
% {'node': task.node.uuid, 'exc': exc})
LOG.error(msg)
raise exception.RedfishError(error=msg)