Merge pull request #366 from Cray-HPE/validate

Improve server code that validates incoming data
Cray-HPE · Aug 29, 2024 · 6aaa6ff · 6aaa6ff
2 parents 17d93ee + 0913534
commit 6aaa6ff
Show file tree

Hide file tree

Showing 13 changed files with 450 additions and 202 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Changed
+- Improve server code that validates incoming data
+
 ### Fixed
 - Added missing required Python modules to `requirements.txt`
 

diff --git a/Dockerfile b/Dockerfile
@@ -43,42 +43,80 @@ RUN /usr/local/bin/docker-entrypoint.sh generate \
     -c config/autogen-server.json \
     --generate-alias-as-model
 
-# Base image
-FROM $ALPINE_BASE_IMAGE AS base
+
+# Start by taking a base Alpine image, copying in our generated code,
+# applying some updates, and creating our virtual Python environment
+FROM $ALPINE_BASE_IMAGE AS alpine-base
 WORKDIR /app
-# We apply all generated code first
+# Copy in generated code
 COPY --from=codegen /app/lib/ /app/lib
+# Copy in Python constraints file
+COPY constraints.txt /app/
+# Update packages to avoid security problems
+RUN --mount=type=secret,id=netrc,target=/root/.netrc \
+    apk add --upgrade --no-cache apk-tools busybox && \
+    apk update && \
+    apk add --no-cache python3-dev py3-pip && \
+    apk -U upgrade --no-cache
+ENV VIRTUAL_ENV=/app/venv
+RUN python3 -m venv $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+RUN --mount=type=secret,id=netrc,target=/root/.netrc \
+    pip3 install --no-cache-dir -U pip -c constraints.txt && \
+    pip3 list --format freeze
+
+
+# Generate JSON version of openapi spec and then convert its
+# schemas using our convert_oas utility
+FROM alpine-base AS openapi-json-converter
+WORKDIR /app
+COPY api/openapi.yaml convert-oas-requirements.txt /app
+COPY utils/convert_oas/ /app/convert_oas
+RUN --mount=type=secret,id=netrc,target=/root/.netrc \
+    apk add --no-cache yq && \
+    apk -U upgrade --no-cache && \
+    yq -o=json /app/openapi.yaml > /app/openapi.json && \
+    pip3 install --no-cache-dir -r convert-oas-requirements.txt && \
+    pip3 list --format freeze && \
+    python3 /app/convert_oas/convert_oas.py /app/openapi.json /app/lib/bos/server/openapi.jsonschema && \
+    cat /app/lib/bos/server/openapi.jsonschema
+
+
+# Base image
+FROM alpine-base AS base
+WORKDIR /app
 # Move autogenerated server requirements aside so that they can be referenced by
 # project-wide requirements.txt; this allows us to specify download source and
 # additional required libraries necessary for developer authored controller/database
 # code.
-RUN mv lib/requirements.txt lib/bos/server/requirements.txt
+#
 # The openapi-generator creates a requirements file that specifies exactly Flask==2.1.1
 # However, using Flask 2.2.5 is also compatible, and resolves a CVE.
 # Accordingly, we relax their requirements file.
-RUN cat lib/bos/server/requirements.txt && \
+RUN mv -v lib/requirements.txt lib/bos/server/requirements.txt && \
+    cat lib/bos/server/requirements.txt && \
     sed -i 's/Flask == 2\(.*\)$/Flask >= 2\1\nFlask < 3/' lib/bos/server/requirements.txt && \
     cat lib/bos/server/requirements.txt
 # Then copy all src into the base image
 COPY src/bos/ /app/lib/bos/
-COPY constraints.txt requirements.txt /app/
-# Update packages to avoid security problems
-RUN apk add --upgrade --no-cache apk-tools busybox && \
-    apk update && \
-    apk add --no-cache gcc g++ python3-dev py3-pip musl-dev libffi-dev openssl-dev && \
-    apk -U upgrade --no-cache
-# Create a virtual environment in which we can install Python packages. This
-# isolates our installation from the system installation.
-ENV VIRTUAL_ENV=/app/venv
-RUN python3 -m venv $VIRTUAL_ENV
-ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-RUN pip3 install --no-cache-dir -U pip -c constraints.txt && \
-    pip3 list --format freeze
-RUN --mount=type=secret,id=netrc,target=/root/.netrc pip3 install --no-cache-dir -r requirements.txt && \
-    pip3 list --format freeze
-RUN cd lib && pip3 install --no-cache-dir . -c ../constraints.txt && \
+# Copy jsonschema data file over from previous layer
+COPY --from=openapi-json-converter /app/lib/bos/server/openapi.jsonschema /app/lib/bos/server/openapi.jsonschema
+COPY requirements.txt /app/
+# 1. Install and update packages to avoid security problems
+# 2. Create a virtual environment in which we can install Python packages. This
+#    isolates our installation from the system installation.
+RUN --mount=type=secret,id=netrc,target=/root/.netrc \
+    apk add --no-cache gcc g++ musl-dev libffi-dev openssl-dev && \
+    apk -U upgrade --no-cache && \
+    pip3 install --no-cache-dir -U pip -c constraints.txt && \
+    pip3 list --format freeze && \
+    pip3 install --no-cache-dir -r requirements.txt && \
+    pip3 list --format freeze && \
+    cd lib && \
+    pip3 install --no-cache-dir . -c ../constraints.txt && \
     pip3 list --format freeze
 
+
 # Base testing image
 FROM base AS testing
 WORKDIR /app
@@ -87,19 +125,22 @@ RUN --mount=type=secret,id=netrc,target=/root/.netrc cd /app && \
     pip3 install --no-cache-dir -r test-requirements.txt && \
     pip3 list --format freeze
 
+
 # Codestyle reporting
 FROM testing AS codestyle
 WORKDIR /app
 COPY docker_codestyle_entry.sh setup.cfg ./
 CMD [ "./docker_codestyle_entry.sh" ]
 
+
 # API Testing image
 FROM testing AS api-testing
 WORKDIR /app
 COPY docker_api_test_entry.sh run_apitests.py ./
 COPY api_tests/ api_tests/
 CMD [ "./docker_api_test_entry.sh" ]
 
+
 # Intermediate image
 FROM base AS intermediate
 WORKDIR /app
@@ -108,16 +149,18 @@ RUN apk add --no-cache uwsgi uwsgi-python3
 COPY config/uwsgi.ini ./
 ENTRYPOINT ["uwsgi", "--ini", "/app/uwsgi.ini"]
 
+
 # Debug image
 FROM intermediate AS debug
 ENV PYTHONPATH "/app/lib/server"
 WORKDIR /app
 RUN apk add --no-cache busybox-extras && \
+    apk -U upgrade --no-cache && \
     pip3 install --no-cache-dir rpdb -c constraints.txt && \
     pip3 list --format freeze
 
+
 # Application image
 FROM intermediate AS application
 WORKDIR /app
 USER 65534:65534
-
diff --git a/constraints.txt.in b/constraints.txt.in
@@ -18,7 +18,8 @@ inflection>=0.5.1,<0.6
 itsdangerous>=2.1.2,<2.2
 Jinja2>=3.0.3,<3.1
 jmespath>=1.0.1,<1.1
-jsonschema>=4.17.3,<4.18
+jsonref>=1.1,<1.2
+jsonschema>=4.23,<4.24
 # CSM 1.6 uses Kubernetes 1.22, so use client v22.x to ensure compatability
 kubernetes>=22.6.0,<22.7
 liveness>=1.4,<1.5

diff --git a/convert-oas-requirements.txt b/convert-oas-requirements.txt
@@ -0,0 +1,2 @@
+-c constraints.txt
+jsonref
diff --git a/requirements.txt b/requirements.txt
@@ -8,6 +8,7 @@ boto3
 botocore
 connexion
 flask
+jsonschema
 kubernetes
 liveness
 PyYAML

diff --git a/src/bos/server/controllers/v2/boot_set.py b/src/bos/server/controllers/v2/boot_set.py
@@ -33,14 +33,17 @@
 BOOT_SET_WARNING = 1
 BOOT_SET_ERROR = 2
 
+# Valid boot sets are required to have at least one of these fields
+HARDWARE_SPECIFIER_FIELDS = ( "node_list", "node_roles_groups", "node_groups" )
+
 
 def validate_boot_sets(session_template: dict,
                        operation: str,
                        template_name: str) -> tuple[str, int]:
     """
     Validates the boot sets listed in a session template.
     It ensures that there are boot sets.
-    It checks that each boot set specifies nodes via one of the specifier fields.
+    It checks that each boot set specifies nodes via at least one of the specifier fields.
     Ensures that the boot artifacts exist.
 
     Inputs:
@@ -63,16 +66,15 @@ def validate_boot_sets(session_template: dict,
         msg = f"Session template '{template_name}' requires at least 1 boot set."
         return BOOT_SET_ERROR, msg
 
-    hardware_specifier_fields = ('node_roles_groups', 'node_list', 'node_groups')
     for bs_name, bs in session_template['boot_sets'].items():
         # Verify that the hardware is specified
         specified = [bs.get(field, None)
-                     for field in hardware_specifier_fields]
+                     for field in HARDWARE_SPECIFIER_FIELDS]
         if not any(specified):
             msg = f"Session template: '{template_name}' boot set: '{bs_name}' " \
                   f"must have at least one " \
                 f"hardware specifier field provided (%s); None were provided." \
-                % (', '.join(sorted(hardware_specifier_fields)))
+                % (', '.join(sorted(HARDWARE_SPECIFIER_FIELDS)))
             LOGGER.error(msg)
             return BOOT_SET_ERROR, msg
         if operation in ['boot', 'reboot']:

diff --git a/src/bos/server/controllers/v2/components.py b/src/bos/server/controllers/v2/components.py
@@ -32,10 +32,7 @@
 from bos.server import redis_db_utils as dbutils
 from bos.server.controllers.v2.options import get_v2_options_data
 from bos.server.dbs.boot_artifacts import get_boot_artifacts, BssTokenUnknown
-from bos.server.models.v2_component import V2Component as Component # noqa: E501
-from bos.server.models.v2_component_array import V2ComponentArray as ComponentArray # noqa: E501
-from bos.server.models.v2_components_update import V2ComponentsUpdate as \
-                                                   ComponentsUpdate # noqa: E501
+from bos.server.utils import get_request_json
 
 LOGGER = logging.getLogger('bos.server.controllers.v2.components')
 DB = dbutils.get_wrapper(db='components')
@@ -173,24 +170,13 @@ def _matches_filter(data, enabled, session, staged_session, phase, status):
 def put_v2_components():
     """Used by the PUT /components API operation"""
     LOGGER.debug("PUT /v2/components invoked put_v2_components")
-    if not connexion.request.is_json:
-        msg = "Must be in JSON format"
-        LOGGER.error(msg)
-        return msg, 400
-
-    LOGGER.debug("connexion.request.is_json")
-    data=connexion.request.get_json()
-    LOGGER.debug("type=%s", type(data))
-    LOGGER.debug("Received: %s", data)
-
     try:
-        # This call is just to ensure that the data
-        # coming in is valid per the API schema
-        ComponentArray.from_dict(data)  # noqa: E501
+        data = get_request_json()
     except Exception as err:
-        msg="Provided data does not follow API spec"
-        LOGGER.error("%s: %s", msg, exc_type_msg(err))
-        return connexion.problem(status=400, title=msg,detail=str(err))
+        LOGGER.error("Error parsing PUT request data: %s", exc_type_msg(err))
+        return connexion.problem(
+            status=400, title="Error parsing the data provided.",
+            detail=str(err))
 
     components = []
     for component_data in data:
@@ -213,35 +199,17 @@ def put_v2_components():
 def patch_v2_components():
     """Used by the PATCH /components API operation"""
     LOGGER.debug("PATCH /v2/components invoked patch_v2_components")
-    if not connexion.request.is_json:
-        msg = "Must be in JSON format"
-        LOGGER.error(msg)
-        return msg, 400
-
-    LOGGER.debug("connexion.request.is_json")
-    data=connexion.request.get_json()
-    LOGGER.debug("type=%s", type(data))
-    LOGGER.debug("Received: %s", data)
+    try:
+        data = get_request_json()
+    except Exception as err:
+        LOGGER.error("Error parsing PATCH request data: %s", exc_type_msg(err))
+        return connexion.problem(
+            status=400, title="Error parsing the data provided.",
+            detail=str(err))
 
     if isinstance(data, list):
-        try:
-            # This call is just to ensure that the data
-            # coming in is valid per the API schema
-            ComponentArray.from_dict(data)  # noqa: E501
-        except Exception as err:
-            msg="Provided data does not follow API spec"
-            LOGGER.error("%s: %s", msg, exc_type_msg(err))
-            return connexion.problem(status=400, title=msg,detail=str(err))
         return patch_v2_components_list(data)
     if isinstance(data, dict):
-        try:
-            # This call is just to ensure that the data
-            # coming in is valid per the API schema
-            ComponentsUpdate.from_dict(data)  # noqa: E501
-        except Exception as err:
-            msg="Provided data does not follow API spec"
-            LOGGER.error("%s: %s", msg, exc_type_msg(err))
-            return connexion.problem(status=400, title=msg,detail=str(err))
         return patch_v2_components_dict(data)
 
     LOGGER.error("Unexpected data type %s", str(type(data)))
@@ -340,24 +308,14 @@ def get_v2_component(component_id):
 def put_v2_component(component_id):
     """Used by the PUT /components/{component_id} API operation"""
     LOGGER.debug("PUT /v2/components/%s invoked put_v2_component", component_id)
-    if not connexion.request.is_json:
-        msg = "Must be in JSON format"
-        LOGGER.error(msg)
-        return msg, 400
-
-    LOGGER.debug("connexion.request.is_json")
-    data=connexion.request.get_json()
-    LOGGER.debug("type=%s", type(data))
-    LOGGER.debug("Received: %s", data)
-
     try:
-        # This call is just to ensure that the data
-        # coming in is valid per the API schema
-        Component.from_dict(data)  # noqa: E501
+        data = get_request_json()
     except Exception as err:
-        msg="Provided data does not follow API spec"
-        LOGGER.error("%s: %s", msg, exc_type_msg(err))
-        return connexion.problem(status=400, title=msg,detail=str(err))
+        LOGGER.error("Error parsing PUT '%s' request data: %s", component_id, exc_type_msg(err))
+        return connexion.problem(
+            status=400, title="Error parsing the data provided.",
+            detail=str(err))
+
     data['id'] = component_id
     data = _set_auto_fields(data)
     return DB.put(component_id, data), 200
@@ -368,24 +326,13 @@ def put_v2_component(component_id):
 def patch_v2_component(component_id):
     """Used by the PATCH /components/{component_id} API operation"""
     LOGGER.debug("PATCH /v2/components/%s invoked patch_v2_component", component_id)
-    if not connexion.request.is_json:
-        msg = "Must be in JSON format"
-        LOGGER.error(msg)
-        return msg, 400
-
-    LOGGER.debug("connexion.request.is_json")
-    data=connexion.request.get_json()
-    LOGGER.debug("type=%s", type(data))
-    LOGGER.debug("Received: %s", data)
-
     try:
-        # This call is just to ensure that the data
-        # coming in is valid per the API schema
-        Component.from_dict(data)  # noqa: E501
+        data = get_request_json()
     except Exception as err:
-        msg="Provided data does not follow API spec"
-        LOGGER.error("%s: %s", msg, exc_type_msg(err))
-        return connexion.problem(status=400, title=msg,detail=str(err))
+        LOGGER.error("Error parsing PATCH '%s' request data: %s", component_id, exc_type_msg(err))
+        return connexion.problem(
+            status=400, title="Error parsing the data provided.",
+            detail=str(err))
 
     if component_id not in DB or not _is_valid_tenant_component(component_id):
         LOGGER.warning("Component %s could not be found", component_id)
@@ -440,12 +387,19 @@ def delete_v2_component(component_id):
 def post_v2_apply_staged():
     """Used by the POST /applystaged API operation"""
     LOGGER.debug("POST /v2/applystaged invoked post_v2_apply_staged")
+    try:
+        data = get_request_json()
+    except Exception as err:
+        LOGGER.error("Error parsing POST request data: %s", exc_type_msg(err))
+        return connexion.problem(
+            status=400, title="Error parsing the data provided.",
+            detail=str(err))
+
     response = {"succeeded": [], "failed": [], "ignored": []}
     # Obtain latest desired behavior for how to clear staging information
     # for all components
     clear_staged = get_v2_options_data().get('clear_stage', False)
     try:
-        data = connexion.request.get_json()
         xnames = data.get("xnames", [])
         allowed_xnames, rejected_xnames = _apply_tenant_limit(xnames)
         response["ignored"] = rejected_xnames