From 0913534a79b419905960e57f3059855f1ebc55b3 Mon Sep 17 00:00:00 2001 From: "Mitch Harding (the weird one)" Date: Thu, 29 Aug 2024 16:26:43 -0400 Subject: [PATCH] Improve server code that validates incoming data --- CHANGELOG.md | 3 + Dockerfile | 87 ++++++-- constraints.txt.in | 3 +- convert-oas-requirements.txt | 2 + requirements.txt | 1 + src/bos/server/controllers/v2/boot_set.py | 10 +- src/bos/server/controllers/v2/components.py | 110 +++------- src/bos/server/controllers/v2/options.py | 6 +- src/bos/server/controllers/v2/sessions.py | 39 ++-- .../server/controllers/v2/sessiontemplates.py | 129 ++++++------ src/bos/server/schema.py | 61 ++++++ src/bos/server/utils.py | 13 +- utils/convert_oas/convert_oas.py | 188 ++++++++++++++++++ 13 files changed, 450 insertions(+), 202 deletions(-) create mode 100644 convert-oas-requirements.txt create mode 100644 src/bos/server/schema.py create mode 100644 utils/convert_oas/convert_oas.py diff --git a/CHANGELOG.md b/CHANGELOG.md index bddd5634..f2198fd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed +- Improve server code that validates incoming data + ### Fixed - Added missing required Python modules to `requirements.txt` diff --git a/Dockerfile b/Dockerfile index 3a2ffe06..b1f32636 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,42 +43,80 @@ RUN /usr/local/bin/docker-entrypoint.sh generate \ -c config/autogen-server.json \ --generate-alias-as-model -# Base image -FROM $ALPINE_BASE_IMAGE AS base + +# Start by taking a base Alpine image, copying in our generated code, +# applying some updates, and creating our virtual Python environment +FROM $ALPINE_BASE_IMAGE AS alpine-base WORKDIR /app -# We apply all generated code first +# Copy in generated code COPY --from=codegen /app/lib/ /app/lib +# Copy in Python constraints file +COPY constraints.txt /app/ +# Update packages to avoid security problems +RUN --mount=type=secret,id=netrc,target=/root/.netrc \ + apk add --upgrade --no-cache apk-tools busybox && \ + apk update && \ + apk add --no-cache python3-dev py3-pip && \ + apk -U upgrade --no-cache +ENV VIRTUAL_ENV=/app/venv +RUN python3 -m venv $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN --mount=type=secret,id=netrc,target=/root/.netrc \ + pip3 install --no-cache-dir -U pip -c constraints.txt && \ + pip3 list --format freeze + + +# Generate JSON version of openapi spec and then convert its +# schemas using our convert_oas utility +FROM alpine-base AS openapi-json-converter +WORKDIR /app +COPY api/openapi.yaml convert-oas-requirements.txt /app +COPY utils/convert_oas/ /app/convert_oas +RUN --mount=type=secret,id=netrc,target=/root/.netrc \ + apk add --no-cache yq && \ + apk -U upgrade --no-cache && \ + yq -o=json /app/openapi.yaml > /app/openapi.json && \ + pip3 install --no-cache-dir -r convert-oas-requirements.txt && \ + pip3 list --format freeze && \ + python3 /app/convert_oas/convert_oas.py /app/openapi.json /app/lib/bos/server/openapi.jsonschema && \ + cat /app/lib/bos/server/openapi.jsonschema + + +# Base image +FROM alpine-base AS base +WORKDIR /app # Move autogenerated server requirements aside so that they can be referenced by # project-wide requirements.txt; this allows us to specify download source and # additional required libraries necessary for developer authored controller/database # code. -RUN mv lib/requirements.txt lib/bos/server/requirements.txt +# # The openapi-generator creates a requirements file that specifies exactly Flask==2.1.1 # However, using Flask 2.2.5 is also compatible, and resolves a CVE. # Accordingly, we relax their requirements file. -RUN cat lib/bos/server/requirements.txt && \ +RUN mv -v lib/requirements.txt lib/bos/server/requirements.txt && \ + cat lib/bos/server/requirements.txt && \ sed -i 's/Flask == 2\(.*\)$/Flask >= 2\1\nFlask < 3/' lib/bos/server/requirements.txt && \ cat lib/bos/server/requirements.txt # Then copy all src into the base image COPY src/bos/ /app/lib/bos/ -COPY constraints.txt requirements.txt /app/ -# Update packages to avoid security problems -RUN apk add --upgrade --no-cache apk-tools busybox && \ - apk update && \ - apk add --no-cache gcc g++ python3-dev py3-pip musl-dev libffi-dev openssl-dev && \ - apk -U upgrade --no-cache -# Create a virtual environment in which we can install Python packages. This -# isolates our installation from the system installation. -ENV VIRTUAL_ENV=/app/venv -RUN python3 -m venv $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" -RUN pip3 install --no-cache-dir -U pip -c constraints.txt && \ - pip3 list --format freeze -RUN --mount=type=secret,id=netrc,target=/root/.netrc pip3 install --no-cache-dir -r requirements.txt && \ - pip3 list --format freeze -RUN cd lib && pip3 install --no-cache-dir . -c ../constraints.txt && \ +# Copy jsonschema data file over from previous layer +COPY --from=openapi-json-converter /app/lib/bos/server/openapi.jsonschema /app/lib/bos/server/openapi.jsonschema +COPY requirements.txt /app/ +# 1. Install and update packages to avoid security problems +# 2. Create a virtual environment in which we can install Python packages. This +# isolates our installation from the system installation. +RUN --mount=type=secret,id=netrc,target=/root/.netrc \ + apk add --no-cache gcc g++ musl-dev libffi-dev openssl-dev && \ + apk -U upgrade --no-cache && \ + pip3 install --no-cache-dir -U pip -c constraints.txt && \ + pip3 list --format freeze && \ + pip3 install --no-cache-dir -r requirements.txt && \ + pip3 list --format freeze && \ + cd lib && \ + pip3 install --no-cache-dir . -c ../constraints.txt && \ pip3 list --format freeze + # Base testing image FROM base AS testing WORKDIR /app @@ -87,12 +125,14 @@ RUN --mount=type=secret,id=netrc,target=/root/.netrc cd /app && \ pip3 install --no-cache-dir -r test-requirements.txt && \ pip3 list --format freeze + # Codestyle reporting FROM testing AS codestyle WORKDIR /app COPY docker_codestyle_entry.sh setup.cfg ./ CMD [ "./docker_codestyle_entry.sh" ] + # API Testing image FROM testing AS api-testing WORKDIR /app @@ -100,6 +140,7 @@ COPY docker_api_test_entry.sh run_apitests.py ./ COPY api_tests/ api_tests/ CMD [ "./docker_api_test_entry.sh" ] + # Intermediate image FROM base AS intermediate WORKDIR /app @@ -108,16 +149,18 @@ RUN apk add --no-cache uwsgi uwsgi-python3 COPY config/uwsgi.ini ./ ENTRYPOINT ["uwsgi", "--ini", "/app/uwsgi.ini"] + # Debug image FROM intermediate AS debug ENV PYTHONPATH "/app/lib/server" WORKDIR /app RUN apk add --no-cache busybox-extras && \ + apk -U upgrade --no-cache && \ pip3 install --no-cache-dir rpdb -c constraints.txt && \ pip3 list --format freeze + # Application image FROM intermediate AS application WORKDIR /app USER 65534:65534 - diff --git a/constraints.txt.in b/constraints.txt.in index 3efbfe41..e648ad50 100644 --- a/constraints.txt.in +++ b/constraints.txt.in @@ -18,7 +18,8 @@ inflection>=0.5.1,<0.6 itsdangerous>=2.1.2,<2.2 Jinja2>=3.0.3,<3.1 jmespath>=1.0.1,<1.1 -jsonschema>=4.17.3,<4.18 +jsonref>=1.1,<1.2 +jsonschema>=4.23,<4.24 # CSM 1.6 uses Kubernetes 1.22, so use client v22.x to ensure compatability kubernetes>=22.6.0,<22.7 liveness>=1.4,<1.5 diff --git a/convert-oas-requirements.txt b/convert-oas-requirements.txt new file mode 100644 index 00000000..033d527d --- /dev/null +++ b/convert-oas-requirements.txt @@ -0,0 +1,2 @@ +-c constraints.txt +jsonref diff --git a/requirements.txt b/requirements.txt index 5fe74047..079e8db6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ boto3 botocore connexion flask +jsonschema kubernetes liveness PyYAML diff --git a/src/bos/server/controllers/v2/boot_set.py b/src/bos/server/controllers/v2/boot_set.py index 0d0a6883..f5f12c6f 100644 --- a/src/bos/server/controllers/v2/boot_set.py +++ b/src/bos/server/controllers/v2/boot_set.py @@ -33,6 +33,9 @@ BOOT_SET_WARNING = 1 BOOT_SET_ERROR = 2 +# Valid boot sets are required to have at least one of these fields +HARDWARE_SPECIFIER_FIELDS = ( "node_list", "node_roles_groups", "node_groups" ) + def validate_boot_sets(session_template: dict, operation: str, @@ -40,7 +43,7 @@ def validate_boot_sets(session_template: dict, """ Validates the boot sets listed in a session template. It ensures that there are boot sets. - It checks that each boot set specifies nodes via one of the specifier fields. + It checks that each boot set specifies nodes via at least one of the specifier fields. Ensures that the boot artifacts exist. Inputs: @@ -63,16 +66,15 @@ def validate_boot_sets(session_template: dict, msg = f"Session template '{template_name}' requires at least 1 boot set." return BOOT_SET_ERROR, msg - hardware_specifier_fields = ('node_roles_groups', 'node_list', 'node_groups') for bs_name, bs in session_template['boot_sets'].items(): # Verify that the hardware is specified specified = [bs.get(field, None) - for field in hardware_specifier_fields] + for field in HARDWARE_SPECIFIER_FIELDS] if not any(specified): msg = f"Session template: '{template_name}' boot set: '{bs_name}' " \ f"must have at least one " \ f"hardware specifier field provided (%s); None were provided." \ - % (', '.join(sorted(hardware_specifier_fields))) + % (', '.join(sorted(HARDWARE_SPECIFIER_FIELDS))) LOGGER.error(msg) return BOOT_SET_ERROR, msg if operation in ['boot', 'reboot']: diff --git a/src/bos/server/controllers/v2/components.py b/src/bos/server/controllers/v2/components.py index bad62a2f..0c03b406 100644 --- a/src/bos/server/controllers/v2/components.py +++ b/src/bos/server/controllers/v2/components.py @@ -32,10 +32,7 @@ from bos.server import redis_db_utils as dbutils from bos.server.controllers.v2.options import get_v2_options_data from bos.server.dbs.boot_artifacts import get_boot_artifacts, BssTokenUnknown -from bos.server.models.v2_component import V2Component as Component # noqa: E501 -from bos.server.models.v2_component_array import V2ComponentArray as ComponentArray # noqa: E501 -from bos.server.models.v2_components_update import V2ComponentsUpdate as \ - ComponentsUpdate # noqa: E501 +from bos.server.utils import get_request_json LOGGER = logging.getLogger('bos.server.controllers.v2.components') DB = dbutils.get_wrapper(db='components') @@ -173,24 +170,13 @@ def _matches_filter(data, enabled, session, staged_session, phase, status): def put_v2_components(): """Used by the PUT /components API operation""" LOGGER.debug("PUT /v2/components invoked put_v2_components") - if not connexion.request.is_json: - msg = "Must be in JSON format" - LOGGER.error(msg) - return msg, 400 - - LOGGER.debug("connexion.request.is_json") - data=connexion.request.get_json() - LOGGER.debug("type=%s", type(data)) - LOGGER.debug("Received: %s", data) - try: - # This call is just to ensure that the data - # coming in is valid per the API schema - ComponentArray.from_dict(data) # noqa: E501 + data = get_request_json() except Exception as err: - msg="Provided data does not follow API spec" - LOGGER.error("%s: %s", msg, exc_type_msg(err)) - return connexion.problem(status=400, title=msg,detail=str(err)) + LOGGER.error("Error parsing PUT request data: %s", exc_type_msg(err)) + return connexion.problem( + status=400, title="Error parsing the data provided.", + detail=str(err)) components = [] for component_data in data: @@ -213,35 +199,17 @@ def put_v2_components(): def patch_v2_components(): """Used by the PATCH /components API operation""" LOGGER.debug("PATCH /v2/components invoked patch_v2_components") - if not connexion.request.is_json: - msg = "Must be in JSON format" - LOGGER.error(msg) - return msg, 400 - - LOGGER.debug("connexion.request.is_json") - data=connexion.request.get_json() - LOGGER.debug("type=%s", type(data)) - LOGGER.debug("Received: %s", data) + try: + data = get_request_json() + except Exception as err: + LOGGER.error("Error parsing PATCH request data: %s", exc_type_msg(err)) + return connexion.problem( + status=400, title="Error parsing the data provided.", + detail=str(err)) if isinstance(data, list): - try: - # This call is just to ensure that the data - # coming in is valid per the API schema - ComponentArray.from_dict(data) # noqa: E501 - except Exception as err: - msg="Provided data does not follow API spec" - LOGGER.error("%s: %s", msg, exc_type_msg(err)) - return connexion.problem(status=400, title=msg,detail=str(err)) return patch_v2_components_list(data) if isinstance(data, dict): - try: - # This call is just to ensure that the data - # coming in is valid per the API schema - ComponentsUpdate.from_dict(data) # noqa: E501 - except Exception as err: - msg="Provided data does not follow API spec" - LOGGER.error("%s: %s", msg, exc_type_msg(err)) - return connexion.problem(status=400, title=msg,detail=str(err)) return patch_v2_components_dict(data) LOGGER.error("Unexpected data type %s", str(type(data))) @@ -340,24 +308,14 @@ def get_v2_component(component_id): def put_v2_component(component_id): """Used by the PUT /components/{component_id} API operation""" LOGGER.debug("PUT /v2/components/%s invoked put_v2_component", component_id) - if not connexion.request.is_json: - msg = "Must be in JSON format" - LOGGER.error(msg) - return msg, 400 - - LOGGER.debug("connexion.request.is_json") - data=connexion.request.get_json() - LOGGER.debug("type=%s", type(data)) - LOGGER.debug("Received: %s", data) - try: - # This call is just to ensure that the data - # coming in is valid per the API schema - Component.from_dict(data) # noqa: E501 + data = get_request_json() except Exception as err: - msg="Provided data does not follow API spec" - LOGGER.error("%s: %s", msg, exc_type_msg(err)) - return connexion.problem(status=400, title=msg,detail=str(err)) + LOGGER.error("Error parsing PUT '%s' request data: %s", component_id, exc_type_msg(err)) + return connexion.problem( + status=400, title="Error parsing the data provided.", + detail=str(err)) + data['id'] = component_id data = _set_auto_fields(data) return DB.put(component_id, data), 200 @@ -368,24 +326,13 @@ def put_v2_component(component_id): def patch_v2_component(component_id): """Used by the PATCH /components/{component_id} API operation""" LOGGER.debug("PATCH /v2/components/%s invoked patch_v2_component", component_id) - if not connexion.request.is_json: - msg = "Must be in JSON format" - LOGGER.error(msg) - return msg, 400 - - LOGGER.debug("connexion.request.is_json") - data=connexion.request.get_json() - LOGGER.debug("type=%s", type(data)) - LOGGER.debug("Received: %s", data) - try: - # This call is just to ensure that the data - # coming in is valid per the API schema - Component.from_dict(data) # noqa: E501 + data = get_request_json() except Exception as err: - msg="Provided data does not follow API spec" - LOGGER.error("%s: %s", msg, exc_type_msg(err)) - return connexion.problem(status=400, title=msg,detail=str(err)) + LOGGER.error("Error parsing PATCH '%s' request data: %s", component_id, exc_type_msg(err)) + return connexion.problem( + status=400, title="Error parsing the data provided.", + detail=str(err)) if component_id not in DB or not _is_valid_tenant_component(component_id): LOGGER.warning("Component %s could not be found", component_id) @@ -440,12 +387,19 @@ def delete_v2_component(component_id): def post_v2_apply_staged(): """Used by the POST /applystaged API operation""" LOGGER.debug("POST /v2/applystaged invoked post_v2_apply_staged") + try: + data = get_request_json() + except Exception as err: + LOGGER.error("Error parsing POST request data: %s", exc_type_msg(err)) + return connexion.problem( + status=400, title="Error parsing the data provided.", + detail=str(err)) + response = {"succeeded": [], "failed": [], "ignored": []} # Obtain latest desired behavior for how to clear staging information # for all components clear_staged = get_v2_options_data().get('clear_stage', False) try: - data = connexion.request.get_json() xnames = data.get("xnames", []) allowed_xnames, rejected_xnames = _apply_tenant_limit(xnames) response["ignored"] = rejected_xnames diff --git a/src/bos/server/controllers/v2/options.py b/src/bos/server/controllers/v2/options.py index 6db6f701..949693ef 100644 --- a/src/bos/server/controllers/v2/options.py +++ b/src/bos/server/controllers/v2/options.py @@ -30,6 +30,7 @@ from bos.common.utils import exc_type_msg from bos.server import redis_db_utils as dbutils from bos.server.models.v2_options import V2Options as Options +from bos.server.utils import get_request_json LOGGER = logging.getLogger('bos.server.controllers.v2.options') DB = dbutils.get_wrapper(db='options') @@ -118,12 +119,13 @@ def patch_v2_options(): """Used by the PATCH /options API operation""" LOGGER.debug("PATCH /v2/options invoked patch_v2_options") try: - data = connexion.request.get_json() + data = get_request_json() except Exception as err: - LOGGER.error("Error parsing request data: %s", exc_type_msg(err)) + LOGGER.error("Error parsing PATCH request data: %s", exc_type_msg(err)) return connexion.problem( status=400, title="Error parsing the data provided.", detail=str(err)) + if OPTIONS_KEY not in DB: DB.put(OPTIONS_KEY, {}) return DB.patch(OPTIONS_KEY, data), 200 diff --git a/src/bos/server/controllers/v2/sessions.py b/src/bos/server/controllers/v2/sessions.py index e0916494..127f82db 100644 --- a/src/bos/server/controllers/v2/sessions.py +++ b/src/bos/server/controllers/v2/sessions.py @@ -40,8 +40,7 @@ from bos.server.controllers.v2.sessiontemplates import get_v2_sessiontemplate from bos.server.models.v2_session import V2Session as Session # noqa: E501 from bos.server.models.v2_session_create import V2SessionCreate as SessionCreate # noqa: E501 -from bos.server.models.v2_session_update import V2SessionUpdate as SessionUpdate # noqa: E501 -from bos.server.utils import ParsingException +from bos.server.utils import get_request_json, ParsingException from .boot_set import validate_boot_sets, BOOT_SET_ERROR LOGGER = logging.getLogger('bos.server.controllers.v2.session') @@ -63,15 +62,13 @@ def post_v2_session(): # noqa: E501 """ LOGGER.debug("POST /v2/sessions invoked post_v2_session") # -- Validation -- - if connexion.request.is_json: - LOGGER.debug("connexion.request.is_json") - LOGGER.debug("type=%s", type(connexion.request.get_json())) - LOGGER.debug("Received: %s", connexion.request.get_json()) - session_create = SessionCreate.from_dict(connexion.request.get_json()) # noqa: E501 - else: - msg = "Post must be in JSON format" - LOGGER.error(msg) - return msg, 400 + try: + session_create = SessionCreate.from_dict(get_request_json()) # noqa: E501 + except Exception as err: + LOGGER.error("Error parsing POST request data: %s", exc_type_msg(err)) + return connexion.problem( + status=400, title="Error parsing the data provided.", + detail=str(err)) # If no limit is specified, check to see if we require one if not session_create.limit and get_v2_options_data().get('session_limit_required', False): @@ -141,19 +138,13 @@ def patch_v2_session(session_id): Session Dictionary, Status Code """ LOGGER.debug("PATCH /v2/sessions/%s invoked patch_v2_session", session_id) - if not connexion.request.is_json: - msg = "Post must be in JSON format" - LOGGER.error(msg) - return msg, 400 - - LOGGER.debug("connexion.request.is_json") - patch_data_json=connexion.request.get_json() - LOGGER.debug("type=%s", type(patch_data_json)) - LOGGER.debug("Received: %s", patch_data_json) - - # This call is just to ensure that the patch data - # coming in is valid per the API schema - SessionUpdate.from_dict(patch_data_json) # noqa: E501 + try: + patch_data_json = get_request_json() + except Exception as err: + LOGGER.error("Error parsing PATCH '%s' request data: %s", session_id, exc_type_msg(err)) + return connexion.problem( + status=400, title="Error parsing the data provided.", + detail=str(err)) session_key = get_tenant_aware_key(session_id, get_tenant_from_header()) if session_key not in DB: diff --git a/src/bos/server/controllers/v2/sessiontemplates.py b/src/bos/server/controllers/v2/sessiontemplates.py index 92eb56cd..7d5c6509 100644 --- a/src/bos/server/controllers/v2/sessiontemplates.py +++ b/src/bos/server/controllers/v2/sessiontemplates.py @@ -27,10 +27,10 @@ from bos.common.tenant_utils import get_tenant_from_header, get_tenant_aware_key, \ reject_invalid_tenant from bos.common.utils import exc_type_msg -from bos.server.models.v2_session_template import V2SessionTemplate as SessionTemplate # noqa: E501 from bos.server import redis_db_utils as dbutils -from bos.server.utils import _canonize_xname, _get_request_json, ParsingException -from .boot_set import validate_boot_sets +from bos.server.schema import validator +from bos.server.utils import canonize_xname, get_request_json, ParsingException +from .boot_set import validate_boot_sets, HARDWARE_SPECIFIER_FIELDS LOGGER = logging.getLogger('bos.server.controllers.v2.sessiontemplates') DB = dbutils.get_wrapper(db='session_templates') @@ -54,63 +54,6 @@ "enable_cfs": True, "name": "name-your-template"} -BOOT_SET_NODE_FIELDS = [ "node_list", "node_roles_groups", "node_groups" ] - -def _sanitize_xnames(st_json): - """ - Sanitize xnames - Canonize the xnames - Args: - st_json (dict): The Session Template as a JSON object - - Returns: - Nothing - """ - # There should always be a boot_sets field -- this function - # is only called after the template has been verified - for boot_set in st_json['boot_sets'].values(): - if 'node_list' not in boot_set: - continue - boot_set['node_list'] = [_canonize_xname(node) for node in boot_set['node_list']] - - -def _validate_sanitize_session_template(session_template_id, template_data): - """ - Used when creating or patching session templates - """ - # The boot_sets field is required. - if "boot_sets" not in template_data: - raise ParsingException("Missing required 'boot_sets' field") - - for bs_name, bs in template_data["boot_sets"].items(): - # All keys in the boot_sets mapping must match the 'name' fields in the - # boot sets to which they map (if they contain a 'name' field). - if "name" not in bs: - # Set the field here -- this allows the name to be validated - # per the schema later - bs["name"] = bs_name - elif bs["name"] != bs_name: - raise ParsingException(f"boot_sets key ({bs_name}) does not match 'name' " - f"field of corresponding boot set ({bs["name"]})") - - # Also, validate that each boot set has at least one of the BOOT_SET_NODE_FIELDS - if not any(field_name in bs for field_name in BOOT_SET_NODE_FIELDS): - raise ParsingException(f"Boot set {bs_name} has none of the following " - f"fields: {BOOT_SET_NODE_FIELDS}") - - # Convert the JSON request data into a SessionTemplate object. - # Any exceptions raised here would be generated from the model - # (i.e. bos.server.models.v2_session_template). - SessionTemplate.from_dict(template_data) - - # We do not bother storing the boot set names inside the boot sets, so delete them. - # We know every boot set has a name field because we verified that earlier. - for bs in template_data["boot_sets"].values(): - del bs["name"] - - _sanitize_xnames(template_data) - template_data['name'] = session_template_id - return - @reject_invalid_tenant @dbutils.redis_error_handler @@ -121,18 +64,16 @@ def put_v2_sessiontemplate(session_template_id): # noqa: E501 """ LOGGER.debug("PUT /v2/sessiontemplates/%s invoked put_v2_sessiontemplate", session_template_id) try: - template_data = _get_request_json() + template_data = get_request_json() except Exception as err: LOGGER.error("Error parsing PUT '%s' request data: %s", session_template_id, exc_type_msg(err)) return connexion.problem( status=400, title="Error parsing the data provided.", detail=str(err)) - LOGGER.debug("type=%s", type(template_data)) - LOGGER.debug("Received: %s", template_data) try: - _validate_sanitize_session_template(session_template_id, template_data) + validate_sanitize_session_template(session_template_id, template_data) except Exception as err: LOGGER.error("Error creating session template '%s': %s", session_template_id, exc_type_msg(err)) @@ -224,18 +165,16 @@ def patch_v2_sessiontemplate(session_template_id): detail=f"Sessiontemplate {session_template_id} could not be found") try: - template_data = _get_request_json() + template_data = get_request_json() except Exception as err: LOGGER.error("Error parsing PATCH '%s' request data: %s", session_template_id, exc_type_msg(err)) return connexion.problem( status=400, title="Error parsing the data provided.", detail=str(err)) - LOGGER.debug("type=%s", type(template_data)) - LOGGER.debug("Received: %s", template_data) try: - _validate_sanitize_session_template(session_template_id, template_data) + validate_sanitize_session_template(session_template_id, template_data) except Exception as err: LOGGER.error("Error patching session template '%s': %s", session_template_id, exc_type_msg(err)) @@ -280,3 +219,57 @@ def _matches_filter(data, tenant): if tenant and tenant != data.get("tenant"): return False return True + + +def _sanitize_xnames(st_json): + """ + Sanitize xnames - Canonize the xnames + Args: + st_json (dict): The Session Template as a JSON object + + Returns: + Nothing + """ + # There should always be a boot_sets field -- this function + # is only called after the template has been verified + for boot_set in st_json['boot_sets'].values(): + if 'node_list' not in boot_set: + continue + boot_set['node_list'] = [canonize_xname(node) for node in boot_set['node_list']] + + +def validate_sanitize_session_template(session_template_id, template_data): + """ + Used when creating or patching session templates + """ + # The boot_sets field is required. + if "boot_sets" not in template_data: + raise ParsingException("Missing required 'boot_sets' field") + + # All keys in the boot_sets mapping must match the 'name' fields in the + # boot sets to which they map (if they contain a 'name' field). + for bs_name, bs in template_data["boot_sets"].items(): + if "name" not in bs: + # Set the field here -- this allows the name to be validated + # per the schema later + bs["name"] = bs_name + elif bs["name"] != bs_name: + raise ParsingException(f"boot_sets key ({bs_name}) does not match 'name' " + f"field of corresponding boot set ({bs['name']})") + + # Also, validate that each boot set has at least one of the HARDWARE_SPECIFIER_FIELDS + if not any(field_name in bs for field_name in HARDWARE_SPECIFIER_FIELDS): + raise ParsingException(f"Boot set {bs_name} has none of the following " + f"fields: {HARDWARE_SPECIFIER_FIELDS}") + + # We do not bother storing the boot set names inside the boot sets, so delete them. + # We know every boot set has a name field because we verified that earlier. + for bs in template_data["boot_sets"].values(): + del bs["name"] + + _sanitize_xnames(template_data) + template_data['name'] = session_template_id + + # Finally, validate this against the API schema + # An exception will be raised if it does not follow it + validator.validate_session_template(template_data) diff --git a/src/bos/server/schema.py b/src/bos/server/schema.py new file mode 100644 index 00000000..93a27ddb --- /dev/null +++ b/src/bos/server/schema.py @@ -0,0 +1,61 @@ +# +# MIT License +# +# (C) Copyright 2024 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import json +import logging + +import jsonschema + + +LOGGER = logging.getLogger('bos.server.schema') + +API_JSON_SCHEMA_PATH = "/app/lib/bos/server/openapi.jsonschema" + + +class Validator: + def __init__(self): + LOGGER.info("Loading API schema from %s", API_JSON_SCHEMA_PATH) + with open(API_JSON_SCHEMA_PATH, "rt") as f: + oas = json.load(f) + self.api_schema = oas["components"]["schemas"] + + def validate(self, data, schema_name): + jsonschema.validate(data, self.api_schema[schema_name]) + + def validate_component(self, data): + self.validate(data, "V2ComponentWithId") + + def validate_extended_session_status(self, data): + self.validate(data, "V2SessionExtendedStatus") + + def validate_options(self, data): + self.validate(data, "V2Options") + + def validate_session(self, data): + self.validate(data, "V2Session") + + def validate_session_template(self, data): + self.validate(data, "V2SessionTemplate") + +validator = Validator() diff --git a/src/bos/server/utils.py b/src/bos/server/utils.py index cbc06cc4..1634f647 100644 --- a/src/bos/server/utils.py +++ b/src/bos/server/utils.py @@ -21,15 +21,19 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # +import logging import re import connexion +LOGGER = logging.getLogger('bos.server.utils') + + class ParsingException(Exception): pass -def _canonize_xname(xname): +def canonize_xname(xname): """Ensure the xname is canonical. * Its components should be lowercase. * Any leading zeros should be stripped off. @@ -43,7 +47,7 @@ def _canonize_xname(xname): return re.sub(r'x0*(\d+)c0*(\d+)s0*(\d+)b0*(\d+)n0*(\d+)', r'x\1c\2s\3b\4n\5', xname.lower()) -def _get_request_json(): +def get_request_json(log_data = True): """ Used by endpoints which are expecting a JSON payload in the request body. Returns the JSON payload. @@ -51,4 +55,7 @@ def _get_request_json(): """ if not connexion.request.is_json: raise ParsingException("Non-JSON request received") - return connexion.request.get_json() + json_data = connexion.request.get_json() + if log_data: + LOGGER.debug("type=%s content=%s", type(json_data).__name__, json_data) + return json_data diff --git a/utils/convert_oas/convert_oas.py b/utils/convert_oas/convert_oas.py new file mode 100644 index 00000000..9a21146f --- /dev/null +++ b/utils/convert_oas/convert_oas.py @@ -0,0 +1,188 @@ +# +# MIT License +# +# (C) Copyright 2024 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +""" +Converts/cleanses the schemas in the BOS OpenAPI spec to be a valid json schema + +This module uses the match construct, which means it requires Python 3.10+. +Currently, the only time any of the BOS code uses a lower Python version is BOS state reporter, +and that has no need to call this module. In fact, it is expected that this module is only +going to be called at build time from the Dockerfile. +""" + +import argparse +import json +import sys +from typing import TextIO + +import jsonref + +class ConversionException(Exception): + """ + Raised for some errors during the conversion process + """ + +def _cleanse_schema(schema): + if not isinstance(schema, dict): + raise ConversionException( + f"Expecting schema to be type dict, but found type {type(schema).__name__}: {schema}") + if len(schema) == 1: + key = list(schema.keys())[0] + match key: + case '$ref': + # If this is a $ref, then we're done + return + case 'not': + # If this is a not, then we just parse what it maps to + _cleanse_schema(schema[key]) + return + case 'oneOf' | 'anyOf' | 'allOf': + # If this is oneOf, anyOf, or allOf, then it should map to a list, and we need to + # parse each element of that list + if not isinstance(schema[key], list): + raise ConversionException( + f"Expecting '{key}' to map to a list, but it does not: {schema}") + for v in schema[key]: + _cleanse_schema(v) + return + + try: + schema_type = schema["type"] + except KeyError as exc: + raise ConversionException(f"Schema is missing 'type' field: {schema}") from exc + + match schema_type: + case "array": + _cleanse_array_schema(schema) + case "boolean" | "string": + _cleanse_generic_schema(schema) + case "integer" | "number": + _cleanse_numeric_schema(schema) + case "object": + _cleanse_object_schema(schema) + case _: + raise ConversionException(f"Schema has unknown type '{schema_type}': {schema}") + + +def _cleanse_generic_schema(schema): + # The nullable keyword works for OAS 3.0 but not 3.1 + if schema.pop("nullable", False): + schema["type"] = [ schema["type"], "null" ] + + # Remove keywords that are not part of JSON schema, as well as ones which are not needed for + # validation, and have different meanings between OAS and JSON schema + for k in ["deprecated", "discriminator", "example", "externalDocs", "readOnly", "writeOnly", + "xml", "description"]: + schema.pop(k, None) + + +def _cleanse_array_schema(schema): + _cleanse_generic_schema(schema) + try: + items_schema = schema["items"] + except KeyError as exc: + raise ConversionException( + f"Array schema is missing required 'items' field: {schema}") from exc + _cleanse_schema(items_schema) + + +def _cleanse_numeric_schema(schema): + _cleanse_generic_schema(schema) + if any(field in schema for field in [ "exclusiveMinimum", "exclusiveMaximum" ]): + # Rather than worry about dealing with this programmatically, we should just fail. + # This is run at build time, so if it fails, the API spec can be fixed before this + # gets checked in. + raise ConversionException( + f"Integer/Number schema has exclusiveMinimum/Maximum field. Schema: {schema}") + + +def _cleanse_object_schema(schema): + _cleanse_generic_schema(schema) + object_properties = schema.get("properties", {}) + if not isinstance(object_properties, dict): + raise ConversionException( + f"Object schema has non-dict 'properties' value. Schema: {schema}") + for v in object_properties.values(): + _cleanse_schema(v) + + # additionalProperties is allowed to map to a schema dict. But it's also allowed to map + # to a boolean. Or to be absent. If it is present and mapped to a non-empty dict, then we + # need to cleanse it. + try: + additional_properties = schema['additionalProperties'] + except KeyError: + return + if not isinstance(additional_properties, dict): + return + if not additional_properties: + return + _cleanse_schema(additional_properties) + + +def convert_oas(input_file: TextIO, output_file: TextIO|None=None) -> dict: + """ + Reads in the JSON OpenAPI 3.0.x spec file. + Converts to OpenAPI 3.1 / JSON schema. + * Replaces all 'nullable' fields to be compliant with JSON schemas. + * Replaces all $refs with what they are referencing. + * Removes keywords which are either invalid or have a different meaning in JSON schemas + (and that we don't need for our purposes for this file: validating data against + the schema) + * Raises an exception in cases where we'd prefer to change the API spec rather than + handle the conversion here. Since this runs at build time, we'll know quickly + if a change to the API spec introduces this kind of problem. + + If an output file path is specified, the result is written there in JSON. + Either way, the result is returned. + """ + oas = json.load(input_file) + + for oas_schema_name, oas_schema in oas["components"]["schemas"].items(): + try: + _cleanse_schema(oas_schema) + except Exception as exc: + raise ConversionException(f"Error parsing schema {oas_schema_name}") from exc + + # Parse the $refs + oas_jsonref = jsonref.loads(json.dumps(oas)) + + # Replace the $refs + oas_json_norefs = jsonref.replace_refs(oas_jsonref) + + if output_file: + # Write to file + json.dump(oas_json_norefs, output_file) + + return oas_json_norefs + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("input_json_file", type=argparse.FileType('rt'), + help="Input JSON openapi file") + parser.add_argument("output_json_file", type=argparse.FileType('wt'), default=sys.stdout, + nargs='?', help="Output jsonschema-compatible openapi JSON file " + "(outputs to stdout if not specified)") + args = parser.parse_args() + convert_oas(args.input_json_file, args.output_json_file)