Skip to content

Commit

Permalink
Merge "Checksum files before raw conversion" into stable/2024.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Zuul authored and openstack-gerrit committed Oct 3, 2024
2 parents 99c786d + 0d6b5c9 commit ebce0fd
Show file tree
Hide file tree
Showing 14 changed files with 1,115 additions and 71 deletions.
34 changes: 34 additions & 0 deletions doc/source/admin/security.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,40 @@ OpenStack deployment.
.. TODO: add "Multi-tenancy Considerations" section
Image Checksums
===============

Ironic has long provided a capacity to supply and check a checksum for disk
images being deployed. However, one aspect which Ironic has not asserted is
"Why?" in terms of "Is it for security?" or "Is it for data integrity?".

The answer is both to ensure a higher level of security with remote
image files, *and* provide faster feedback should a image being transferred
happens to be corrupted.

Normally checksums are verified by the ``ironic-python-agent`` **OR** the
deployment interface responsible for overall deployment operation. That being
said, not *every* deployment interface relies on disk images which have
checksums, and those deployment interfaces are for specific use cases which
Ironic users leverage, outside of the "general" use case capabilities provided
by the ``direct`` deployment interface.

.. NOTE::
Use of the node ``instance_info/image_checksum`` field is discouraged
for integrated OpenStack Users as usage of the matching Glance Image
Service field is also deprecated. That being said, Ironic retains this
feature by popular demand while also enabling also retain simplified
operator interaction.
The newer field values supported by Glance are also specifically
supported by Ironic as ``instance_info/image_os_hash_value`` for
checksum values and ``instance_info/image_os_hash_algo`` field for
the checksum algorithm.

.. WARNING::
Setting a checksum value to a URL is supported, *however* doing this is
making a "tradeoff" with security as the remote checksum *can* change.
Conductor support this functionality can be disabled using the
:oslo.config:option:`conductor.disable_support_for_checksum_files` setting.

REST API: user roles and policy settings
========================================
Expand Down
258 changes: 258 additions & 0 deletions ironic/common/checksum_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
# Copyright (c) 2024 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import os
import re
import time
from urllib import parse as urlparse

from oslo_log import log as logging
from oslo_utils import fileutils

from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import image_service
from ironic.conf import CONF

LOG = logging.getLogger(__name__)


# REGEX matches for Checksum file payloads
# If this list requires changes, it should be changed in
# ironic-python-agent (extensions/standby.py) as well.

MD5_MATCH = r"^([a-fA-F\d]{32})\s" # MD5 at beginning of line
MD5_MATCH_END = r"\s([a-fA-F\d]{32})$" # MD5 at end of line
MD5_MATCH_ONLY = r"^([a-fA-F\d]{32})$" # MD5 only
SHA256_MATCH = r"^([a-fA-F\d]{64})\s" # SHA256 at beginning of line
SHA256_MATCH_END = r"\s([a-fA-F\d]{64})$" # SHA256 at end of line
SHA256_MATCH_ONLY = r"^([a-fA-F\d]{64})$" # SHA256 only
SHA512_MATCH = r"^([a-fA-F\d]{128})\s" # SHA512 at beginning of line
SHA512_MATCH_END = r"\s([a-fA-F\d]{128})$" # SHA512 at end of line
SHA512_MATCH_ONLY = r"^([a-fA-F\d]{128})$" # SHA512 only
FILENAME_MATCH_END = r"\s[*]?{filename}$" # Filename binary/text end of line
FILENAME_MATCH_PARENTHESES = r"\s\({filename}\)\s" # CentOS images

CHECKSUM_MATCHERS = (MD5_MATCH, MD5_MATCH_END, SHA256_MATCH, SHA256_MATCH_END,
SHA512_MATCH, SHA512_MATCH_END)
CHECKSUM_ONLY_MATCHERS = (MD5_MATCH_ONLY, SHA256_MATCH_ONLY, SHA512_MATCH_ONLY)
FILENAME_MATCHERS = (FILENAME_MATCH_END, FILENAME_MATCH_PARENTHESES)


def validate_checksum(path, checksum, checksum_algo=None):
"""Validate image checksum.
:param path: File path in the form of a string to calculate a checksum
which is compared to the checksum field.
:param checksum: The supplied checksum value, a string, which will be
compared to the file.
:param checksum_algo: The checksum type of the algorithm.
:raises: ImageChecksumError if the supplied data cannot be parsed or
if the supplied value does not match the supplied checksum
value.
"""
# TODO(TheJilia): At some point, we likely need to compare
# the incoming checksum algorithm upfront, ut if one is invoked which
# is not supported, hashlib will raise ValueError.
use_checksum_algo = None
if ":" in checksum:
# A form of communicating the checksum algorithm is to delimit the
# type from the value. See ansible deploy interface where this
# is most evident.
split_checksum = checksum.split(":")
use_checksum = split_checksum[1]
use_checksum_algo = split_checksum[0]
else:
use_checksum = checksum
if not use_checksum_algo:
use_checksum_algo = checksum_algo
# If we have a zero length value, but we split it, we have
# invalid input. Also, checksum is what we expect, algorithm is
# optional. This guards against the split of a value which is
# image_checksum = "sha256:" which is a potential side effect of
# splitting the string.
if use_checksum == '':
raise exception.ImageChecksumError()

# Make everything lower case since we don't expect mixed case,
# but we may have human originated input on the supplied algorithm.
try:
if not use_checksum_algo:
# This is backwards compatible support for a bare checksum.
calculated = compute_image_checksum(path)
else:
calculated = compute_image_checksum(path,
use_checksum_algo.lower())
except ValueError:
# ValueError is raised when an invalid/unsupported/unknown
# checksum algorithm is invoked.
LOG.error("Failed to generate checksum for file %(path)s, possible "
"invalid checksum algorithm: %(algo)s",
{"path": path,
"algo": use_checksum_algo})
raise exception.ImageChecksumAlgorithmFailure()
except OSError:
LOG.error("Failed to read file %(path)s to compute checksum.",
{"path": path})
raise exception.ImageChecksumFileReadFailure()
if (use_checksum is not None
and calculated.lower() != use_checksum.lower()):
LOG.error("We were supplied a checksum value of %(supplied)s, but "
"calculated a value of %(value)s. This is a fatal error.",
{"supplied": use_checksum,
"value": calculated})
raise exception.ImageChecksumError()


def compute_image_checksum(image_path, algorithm='md5'):
"""Compute checksum by given image path and algorithm.
:param image_path: The path to the file to undergo checksum calculation.
:param algorithm: The checksum algorithm to utilize. Defaults
to 'md5' due to historical support reasons in Ironic.
:returns: The calculated checksum value.
:raises: ValueError when the checksum algorithm is not supported
by the system.
"""

time_start = time.time()
LOG.debug('Start computing %(algo)s checksum for image %(image)s.',
{'algo': algorithm, 'image': image_path})

checksum = fileutils.compute_file_checksum(image_path,
algorithm=algorithm)
time_elapsed = time.time() - time_start
LOG.debug('Computed %(algo)s checksum for image %(image)s in '
'%(delta).2f seconds, checksum value: %(checksum)s.',
{'algo': algorithm, 'image': image_path, 'delta': time_elapsed,
'checksum': checksum})
return checksum


def get_checksum_and_algo(instance_info):
"""Get and return the image checksum and algo.
:param instance_info: The node instance info, or newly updated/generated
instance_info value.
:returns: A tuple containing two values, a checksum and algorithm,
if available.
"""
checksum_algo = None
if 'image_os_hash_value' in instance_info.keys():
# A value set by image_os_hash_value supersedes other
# possible uses as it is specific.
checksum = instance_info.get('image_os_hash_value')
checksum_algo = instance_info.get('image_os_hash_algo')
else:
checksum = instance_info.get('image_checksum')
if is_checksum_url(checksum):
image_source = instance_info.get('image_source')
checksum = get_checksum_from_url(checksum, image_source)

# NOTE(TheJulia): This is all based on SHA-2 lengths.
# SHA-3 would require a hint and it would not be a fixed length.
# That said, SHA-2 is still valid and has not been withdrawn.
checksum_len = len(checksum)
if checksum_len == 128:
# SHA2-512 is 512 bits, 128 characters.
checksum_algo = "sha512"
elif checksum_len == 64:
checksum_algo = "sha256"

if checksum_len == 32 and not CONF.agent.allow_md5_checksum:
# MD5 not permitted and the checksum is the length of MD5
# and not otherwise defined.
LOG.error('Cannot compute the checksum as it uses MD5 '
'and is disabled by configuration. If the checksum '
'is *not* MD5, please specify the algorithm.')
raise exception.ImageChecksumAlgorithmFailure()

return checksum, checksum_algo


def is_checksum_url(checksum):
"""Identify if checksum is not a url.
:param checksum: The user supplied checksum value.
:returns: True if the checksum is a url, otherwise False.
:raises: ImageChecksumURLNotSupported should the conductor have this
support disabled.
"""
if (checksum.startswith('http://') or checksum.startswith('https://')):
if CONF.conductor.disable_support_for_checksum_files:
raise exception.ImageChecksumURLNotSupported()
return True
else:
return False


def get_checksum_from_url(checksum, image_source):
"""Gets a checksum value based upon a remote checksum URL file.
:param checksum: The URL to the checksum URL content.
:param image_soource: The image source utilized to match with
the contents of the URL payload file.
:raises: ImageDownloadFailed when the checksum file cannot be
accessed or cannot be parsed.
"""

LOG.debug('Attempting to download checksum from: %(checksum)s.',
{'checksum': checksum})

# Directly invoke the image service and get the checksum data.
resp = image_service.HttpImageService.get(checksum)
checksum_url = str(checksum)

# NOTE(TheJulia): The rest of this method is taken from
# ironic-python-agent. If a change is required here, it may
# be required in ironic-python-agent (extensions/standby.py).
lines = [line.strip() for line in resp.split('\n') if line.strip()]
if not lines:
raise exception.ImageDownloadFailed(image_href=checksum,
reason=_('Checksum file empty.'))
elif len(lines) == 1:
# Special case - checksums file with only the checksum itself
if ' ' not in lines[0]:
for matcher in CHECKSUM_ONLY_MATCHERS:
checksum = re.findall(matcher, lines[0])
if checksum:
return checksum[0]
raise exception.ImageDownloadFailed(
image_href=checksum_url,
reason=(
_("Invalid checksum file (No valid checksum found)")))
# FIXME(dtantsur): can we assume the same name for all images?
expected_fname = os.path.basename(urlparse.urlparse(
image_source).path)
for line in lines:
# Ignore comment lines
if line.startswith("#"):
continue

# Ignore checksums for other files
for matcher in FILENAME_MATCHERS:
if re.findall(matcher.format(filename=expected_fname), line):
break
else:
continue

for matcher in CHECKSUM_MATCHERS:
checksum = re.findall(matcher, line)
if checksum:
return checksum[0]

raise exception.ImageDownloadFailed(
image_href=checksum,
reason=(_("Checksum file does not contain name %s")
% expected_fname))
22 changes: 22 additions & 0 deletions ironic/common/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,3 +897,25 @@ class UnsupportedHardwareFeature(Invalid):

class InvalidImage(ImageUnacceptable):
_msg_fmt = _("The requested image is not valid for use.")


class ImageChecksumError(InvalidImage):
"""Exception indicating checksum failed to match."""
_msg_fmt = _("The supplied image checksum is invalid or does not match.")


class ImageChecksumAlgorithmFailure(InvalidImage):
"""Cannot load the requested or required checksum algorithm."""
_msg_fmt = _("The requested image checksum algorithm cannot be loaded.")


class ImageChecksumURLNotSupported(InvalidImage):
"""Exception indicating we cannot support the remote checksum file."""
_msg_fmt = _("Use of remote checksum files is not supported.")


class ImageChecksumFileReadFailure(InvalidImage):
"""An OSError was raised when trying to read the file."""
_msg_fmt = _("Failed to read the file from local storage "
"to perform a checksum operation.")
code = http_client.SERVICE_UNAVAILABLE
37 changes: 37 additions & 0 deletions ironic/common/image_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,43 @@ def show(self, image_href):
'no_cache': no_cache,
}

@staticmethod
def get(image_href):
"""Downloads content and returns the response text.
:param image_href: Image reference.
:raises: exception.ImageRefValidationFailed if GET request returned
response code not equal to 200.
:raises: exception.ImageDownloadFailed if:
* IOError happened during file write;
* GET request failed.
"""

try:

verify = strutils.bool_from_string(CONF.webserver_verify_ca,
strict=True)
except ValueError:
verify = CONF.webserver_verify_ca

try:
auth = HttpImageService.gen_auth_from_conf_user_pass(image_href)
response = requests.get(image_href, stream=False, verify=verify,
timeout=CONF.webserver_connection_timeout,
auth=auth)
if response.status_code != http_client.OK:
raise exception.ImageRefValidationFailed(
image_href=image_href,
reason=_("Got HTTP code %s instead of 200 in response "
"to GET request.") % response.status_code)

return response.text

except (OSError, requests.ConnectionError, requests.RequestException,
IOError) as e:
raise exception.ImageDownloadFailed(image_href=image_href,
reason=str(e))


class FileImageService(BaseImageService):
"""Provides retrieval of disk images available locally on the conductor."""
Expand Down
7 changes: 6 additions & 1 deletion ironic/common/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from oslo_utils import fileutils
import pycdlib

from ironic.common import checksum_utils
from ironic.common import exception
from ironic.common.glance_service import service_utils as glance_utils
from ironic.common.i18n import _
Expand Down Expand Up @@ -386,9 +387,13 @@ def fetch_into(context, image_href, image_file):
{'image_href': image_href, 'time': time.time() - start})


def fetch(context, image_href, path, force_raw=False):
def fetch(context, image_href, path, force_raw=False,
checksum=None, checksum_algo=None):
with fileutils.remove_path_on_error(path):
fetch_into(context, image_href, path)
if (not CONF.conductor.disable_file_checksum
and checksum):
checksum_utils.validate_checksum(path, checksum, checksum_algo)
if force_raw:
image_to_raw(image_href, path, "%s.part" % path)

Expand Down
Loading

0 comments on commit ebce0fd

Please sign in to comment.