From b723a1ceaa204d266fa205435d79fa58504628a6 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Fri, 6 May 2022 11:23:16 -0500 Subject: [PATCH 01/27] Add backend support for Kubernetes secrets-based env vars --- elyra/kfp/operator.py | 15 +++++++++ elyra/pipeline/airflow/processor_airflow.py | 4 ++- elyra/pipeline/kfp/processor_kfp.py | 2 ++ elyra/pipeline/pipeline.py | 18 +++++++++++ elyra/pipeline/pipeline_constants.py | 1 + elyra/pipeline/pipeline_definition.py | 10 ++++++ elyra/pipeline/processor.py | 31 +++++++++++++++++++ .../templates/airflow/airflow_template.jinja2 | 25 ++++++++++++--- .../generic_properties_template.jinja2 | 27 +++++++++++++++- .../pipeline_properties_template.jinja2 | 25 +++++++++++++++ 10 files changed, 152 insertions(+), 6 deletions(-) diff --git a/elyra/kfp/operator.py b/elyra/kfp/operator.py index 296026bcb..6c393606f 100644 --- a/elyra/kfp/operator.py +++ b/elyra/kfp/operator.py @@ -19,6 +19,7 @@ from typing import Dict from typing import List from typing import Optional +from typing import Tuple from kfp.dsl import ContainerOp from kfp.dsl import RUN_ID_PLACEHOLDER @@ -27,6 +28,7 @@ from kubernetes.client.models import V1EnvVarSource from kubernetes.client.models import V1ObjectFieldSelector from kubernetes.client.models import V1PersistentVolumeClaimVolumeSource +from kubernetes.client.models import V1SecretKeySelector from kubernetes.client.models import V1Volume from kubernetes.client.models import V1VolumeMount @@ -82,6 +84,7 @@ def __init__( gpu_limit: Optional[str] = None, workflow_engine: Optional[str] = "argo", volume_mounts: Optional[Dict[str, str]] = None, + kubernetes_secrets: Optional[List[Tuple]] = None, **kwargs, ): """Create a new instance of ContainerOp. @@ -106,6 +109,7 @@ def __init__( gpu_limit: maximum number of GPUs allowed for the operation workflow_engine: Kubeflow workflow engine, defaults to 'argo' volume_mounts: data volumes to be mounted + kubernetes_secrets: secrets to be made available as environment variables kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler. See Kubeflow pipelines ContainerOp definition for more parameters or how to use https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp @@ -133,6 +137,7 @@ def __init__( self.mem_request = mem_request self.gpu_limit = gpu_limit self.volume_mounts = volume_mounts # optional data volumes to be mounted to the pod + self.kubernetes_secrets = kubernetes_secrets # optional secrets to be made available as env vars argument_list = [] @@ -244,6 +249,16 @@ def __init__( for key, value in self.pipeline_envs.items(): # Convert dict entries to format kfp needs self.container.add_env_variable(V1EnvVar(name=key, value=value)) + if self.kubernetes_secrets: + for secret in self.kubernetes_secrets: # Convert tuple entries to format kfp needs + env_var_name, secret_name, secret_key = secret + self.container.add_env_variable( + V1EnvVar( + name=env_var_name, + value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name=secret_name, key=secret_key)), + ) + ) + # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as # its container runtime if self.emptydir_volume_size: diff --git a/elyra/pipeline/airflow/processor_airflow.py b/elyra/pipeline/airflow/processor_airflow.py index b9065de38..4079c4b16 100644 --- a/elyra/pipeline/airflow/processor_airflow.py +++ b/elyra/pipeline/airflow/processor_airflow.py @@ -318,6 +318,7 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance ) volume_mounts = self._get_volume_mounts(operation=operation) + kubernetes_secrets = self._get_kubernetes_secrets(operation=operation) target_op = { "notebook": operation.name, @@ -334,6 +335,7 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance "is_generic_operator": True, "doc": operation.doc, "volume_mounts": volume_mounts, + "kubernetes_secrets": kubernetes_secrets, } if runtime_image_pull_secret is not None: @@ -507,7 +509,7 @@ def create_pipeline_file( python_output = template.render( operations_list=target_ops, pipeline_name=pipeline_instance_id, - namespace=user_namespace, + user_namespace=user_namespace, cos_secret=cos_secret, kube_config_path=None, is_paused_upon_creation="False", diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py index aa41539a5..9c8780efa 100644 --- a/elyra/pipeline/kfp/processor_kfp.py +++ b/elyra/pipeline/kfp/processor_kfp.py @@ -521,6 +521,7 @@ def _cc_pipeline( ) volume_mounts = self._get_volume_mounts(operation=operation) + kubernetes_secrets = self._get_kubernetes_secrets(operation=operation) target_ops[operation.id] = ExecuteFileOp( name=sanitized_operation_name, @@ -547,6 +548,7 @@ def _cc_pipeline( "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json", # noqa }, volume_mounts=volume_mounts, + kubernetes_secrets=kubernetes_secrets, ) if operation.doc: diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 4519c1197..3a94bbf9e 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -487,6 +487,24 @@ def merge(cls, primary: "KeyValueList", secondary: "KeyValueList") -> "KeyValueL return KeyValueList.from_dict({**secondary_dict, **primary_dict}) + @classmethod + def difference(cls, minuend: "KeyValueList", subtrahend: "KeyValueList") -> "KeyValueList": + """ + Given two lists, convert to dictionaries and remove any keys found in the + subtrahend from the minuend, if present. + + :param minuend: list to be subtracted from + :param subtrahend: list whose keys will be removed from the minuend + + :returns: the difference of the two lists + """ + subtract_dict = minuend.to_dict() + for key in subtrahend.to_dict().keys(): + if key in subtract_dict: + subtract_dict.pop(key) + + return KeyValueList.from_dict(subtract_dict) + @staticmethod def log_message(msg: str, logger: Optional[Logger] = None, level: Optional[int] = logging.DEBUG): """ diff --git a/elyra/pipeline/pipeline_constants.py b/elyra/pipeline/pipeline_constants.py index 42d75eafb..0c2be96cb 100644 --- a/elyra/pipeline/pipeline_constants.py +++ b/elyra/pipeline/pipeline_constants.py @@ -18,6 +18,7 @@ RUNTIME_IMAGE = "runtime_image" ENV_VARIABLES = "env_vars" MOUNTED_VOLUMES = "mounted_volumes" +KUBERNETES_SECRETS = "kubernetes_secrets" PIPELINE_META_PROPERTIES = ["name", "description", "runtime"] # optional static prefix to be used when generating an object name for object storage COS_OBJECT_PREFIX = "cos_object_prefix" diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index 386fe24a0..a32762350 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -538,6 +538,16 @@ def propagate_pipeline_default_properties(self): merged_list: KeyValueList = KeyValueList.merge(node_value, pipeline_default_value) node.set_component_parameter(property_name, merged_list) + if self.primary_pipeline.runtime_config != "local": + # In the case of a duplicate between env vars and kubernetes secrets, + # prefer kubernetes secrets and remove any matching env vars + new_list = KeyValueList.difference( + minuend=node.get_component_parameter(pipeline_constants.ENV_VARIABLES), + subtrahend=node.get_component_parameter(pipeline_constants.KUBERNETES_SECRETS), + ) + if new_list: + node.set_component_parameter(pipeline_constants.ENV_VARIABLES, new_list) + def is_valid(self) -> bool: """ Represents whether or not the pipeline structure is valid diff --git a/elyra/pipeline/processor.py b/elyra/pipeline/processor.py index 8179e8bc1..5bb8f3fdf 100644 --- a/elyra/pipeline/processor.py +++ b/elyra/pipeline/processor.py @@ -25,6 +25,7 @@ from typing import List from typing import Optional from typing import Set +from typing import Tuple from typing import Union import entrypoints @@ -39,8 +40,10 @@ from elyra.pipeline.component import Component from elyra.pipeline.component_catalog import ComponentCache from elyra.pipeline.pipeline import GenericOperation +from elyra.pipeline.pipeline import KeyValueList from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.runtime_type import RuntimeProcessorType from elyra.pipeline.runtime_type import RuntimeTypeResources @@ -605,3 +608,31 @@ def _get_volume_mounts(self, operation: Operation) -> Optional[Dict[str, str]]: formatted_mount_path = f"/{mount_path.strip('/')}" volume_mounts_valid[formatted_mount_path] = pvc_name return volume_mounts_valid + + def _get_kubernetes_secrets(self, operation: Operation) -> Optional[List[Tuple]]: + """ + Loops through an Operation's kubernetes secrets to strip whitespace + and re-format as a tuple. + + :param operation: the operation to check for secrets + :return: tuple of env var name, secret name, and secret key + """ + valid_secrets = [] + secrets = operation.component_params.get(KUBERNETES_SECRETS) + if secrets and isinstance(secrets, KeyValueList): + for env_var_name, secret in secrets.to_dict().items(): + secret_tuple = secret.split(":", 1) + if len(secret_tuple) != 2: + self.log.warning(f"Ignoring invalid secret for '{env_var_name}': missing secret name and/or key.") + continue + secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() + if not secret_name: + self.log.warning(f"Ignoring invalid secret for '{env_var_name}': missing secret name.") + continue + if not secret_key: + self.log.warning(f"Ignoring invalid secret for '{env_var_name}': missing secret key.") + continue + + valid_secrets.append((env_var_name, secret_name, secret_key)) + + return valid_secrets diff --git a/elyra/templates/airflow/airflow_template.jinja2 b/elyra/templates/airflow/airflow_template.jinja2 index 08db9b09e..4630b4ab3 100644 --- a/elyra/templates/airflow/airflow_template.jinja2 +++ b/elyra/templates/airflow/airflow_template.jinja2 @@ -33,8 +33,10 @@ env_var_secret_key = Secret(deploy_type='env', key='AWS_SECRET_ACCESS_KEY', ) {% endif %} +{% set ns = namespace() %} {% for key, operation in operations_list.items() %} +{% set ns.operation_kubernetes_secrets = "" %} {% if not operation.is_generic_operator %} {% for import_statement in operation.imports %} @@ -44,6 +46,21 @@ env_var_secret_key = Secret(deploy_type='env', from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator {% endif %} +{% if operation.kubernetes_secrets and not cos_secret%} +from airflow.kubernetes.secret import Secret +# Kubernetes secrets for operation '{{ operation.id|regex_replace }}' + {% for secret in operation.kubernetes_secrets %}{% set env_var_name = secret[0] %}{% set secret_name = secret[1] %}{% set secret_key = secret[2] %} +secret_{{ operation.id|regex_replace }}_{{ loop.index }} = Secret(deploy_type='env', + deploy_target='{{env_var_name}}', + secret='{{ secret_name }}', + key='{{ secret_key }}', +) + {% set ns.operation_kubernetes_secrets = ns.operation_kubernetes_secrets ~ 'secret_' ~ operation.id|regex_replace ~ '_' ~ loop.index ~ ', ' %} + {% endfor %} +{% endif %} + +{% if cos_secret %}{% set ns.operation_kubernetes_secrets = ns.operation_kubernetes_secrets ~ "env_var_secret_id, env_var_secret_key" %}{% endif %} + {% if operation.operator_source %}# Operator source: {{ operation.operator_source }}{% endif %}{% if not operation.is_generic_operator %} op_{{ operation.id|regex_replace }} = {{ operation.class_name }}( task_id='{{ operation.notebook|regex_replace }}', @@ -73,7 +90,7 @@ volume_mounts_{{ operation.id|regex_replace }}.append(volume_mount_{{ loop.index {% endfor %} {% endif %} op_{{ operation.id|regex_replace }} = KubernetesPodOperator(name='{{ operation.notebook|regex_replace }}', - namespace='{{ namespace }}', + namespace='{{ user_namespace }}', image='{{ operation.runtime_image }}', {% if operation.runtime_image_pull_secret %} image_pull_secrets='{{ operation.runtime_image_pull_secret }}', @@ -100,9 +117,9 @@ op_{{ operation.id|regex_replace }} = KubernetesPodOperator(name='{{ operation.n }, {% endif %} - {% if cos_secret %} - secrets=[env_var_secret_id, env_var_secret_key], - {% endif %} + {% if ns.operation_kubernetes_secrets %} + secrets=[{{ ns.operation_kubernetes_secrets }}], + {% endif %} in_cluster={{ in_cluster }}, config_file="{{ kube_config_path }}", {% endif %} diff --git a/elyra/templates/components/generic_properties_template.jinja2 b/elyra/templates/components/generic_properties_template.jinja2 index a693ff057..8e0e34af6 100644 --- a/elyra/templates/components/generic_properties_template.jinja2 +++ b/elyra/templates/components/generic_properties_template.jinja2 @@ -8,6 +8,7 @@ "elyra_memory": null, "elyra_outputs": [], "elyra_env_vars": [], + "elyra_kubernetes_secrets": [], "elyra_dependencies": [], "elyra_include_subdirectories": false, "elyra_mounted_volumes": [] @@ -40,6 +41,9 @@ { "id": "elyra_env_vars" }, + { + "id": "elyra_kubernetes_secrets" + }, { "id": "elyra_outputs" }, @@ -189,6 +193,22 @@ "keyValueEntries": true } }, + { + "parameter_ref": "elyra_kubernetes_secrets", + "control": "custom", + "custom_control_id": "StringArrayControl", + "label": { + "default": "Kubernetes Secrets" + }, + "description": { + "default": "Kubernetes secrets to make available as environment variables to this node. The secret name and key given must be present in the Kubernetes namespace where the node is executed or this node will not run.", + "placement": "on_panel" + }, + "data": { + "placeholder": "env_var_name=secret_name:secret_key", + "keyValueEntries": true + } + }, { "parameter_ref": "elyra_outputs", "control": "custom", @@ -218,7 +238,7 @@ "data": { "placeholder": "/mount/path=pvc-name" } - } + } ], "group_info": [ { @@ -260,6 +280,11 @@ "type": "controls", "parameter_refs": ["elyra_env_vars"] }, + { + "id": "elyra_kubernetes_secrets", + "type": "controls", + "parameter_refs": ["elyra_kubernetes_secrets"] + }, { "id": "elyra_outputs", "type": "controls", diff --git a/elyra/templates/pipeline/pipeline_properties_template.jinja2 b/elyra/templates/pipeline/pipeline_properties_template.jinja2 index 7683df5f1..e171556ed 100644 --- a/elyra/templates/pipeline/pipeline_properties_template.jinja2 +++ b/elyra/templates/pipeline/pipeline_properties_template.jinja2 @@ -6,6 +6,7 @@ "cos_object_prefix": "", "elyra_runtime_image": null, "elyra_env_vars": [], + "elyra_kubernetes_secrets": [], "elyra_mounted_volumes": [] }, "parameters": [ @@ -27,6 +28,9 @@ { "id": "elyra_env_vars" }, + { + "id": "elyra_kubernetes_secrets" + }, { "id": "elyra_mounted_volumes" } @@ -99,6 +103,22 @@ "keyValueEntries": true } }, + { + "parameter_ref": "elyra_kubernetes_secrets", + "control": "custom", + "custom_control_id": "StringArrayControl", + "label": { + "default": "Kubernetes Secrets" + }, + "description": { + "default": "Kubernetes secrets to make available as environment variables to all nodes.", + "placement": "on_panel" + }, + "data": { + "placeholder": "env_var_name=secret_name:secret_key", + "keyValueEntries": true + } + }, { "parameter_ref": "elyra_mounted_volumes", "control": "custom", @@ -163,6 +183,11 @@ "type": "controls", "parameter_refs": ["elyra_env_vars"] }, + { + "id": "elyra_kubernetes_secrets", + "type": "controls", + "parameter_refs": ["elyra_kubernetes_secrets"] + }, { "id": "elyra_mounted_volumes", "type": "controls", From 7b66723d8256ce38c7145b781a0a225aa3fd8a56 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Fri, 6 May 2022 12:35:14 -0500 Subject: [PATCH 02/27] Fix server tests --- elyra/pipeline/pipeline_definition.py | 25 ++++++++++++------- .../tests/pipeline/resources/properties.json | 25 +++++++++++++++++++ elyra/tests/pipeline/test_handlers.py | 1 + 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index a32762350..53f4c6217 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -23,9 +23,12 @@ from jinja2 import Environment, Undefined from jinja2 import PackageLoader -from elyra.pipeline import pipeline_constants from elyra.pipeline.pipeline import KeyValueList from elyra.pipeline.pipeline import Operation +from elyra.pipeline.pipeline_constants import ENV_VARIABLES +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import PIPELINE_DEFAULTS +from elyra.pipeline.pipeline_constants import PIPELINE_META_PROPERTIES class AppDataBase(object): # ABC @@ -174,7 +177,7 @@ def pipeline_parameters(self) -> Dict[str, Any]: and runtime) or the pipeline defaults dictionary """ all_properties = self._node["app_data"].get("properties", {}) - excluded_properties = pipeline_constants.PIPELINE_META_PROPERTIES + [pipeline_constants.PIPELINE_DEFAULTS] + excluded_properties = PIPELINE_META_PROPERTIES + [PIPELINE_DEFAULTS] pipeline_parameters = {} for property_name, value in all_properties.items(): @@ -215,7 +218,7 @@ def convert_kv_properties(self, kv_properties: List[str]): Convert pipeline defaults-level list properties that have been identified as sets of key-value pairs from a plain list type to the KeyValueList type. """ - pipeline_defaults = self.get_property(pipeline_constants.PIPELINE_DEFAULTS, {}) + pipeline_defaults = self.get_property(PIPELINE_DEFAULTS, {}) for property_name, value in pipeline_defaults.items(): if property_name not in kv_properties: continue @@ -224,7 +227,7 @@ def convert_kv_properties(self, kv_properties: List[str]): pipeline_defaults[property_name] = KeyValueList(value) if pipeline_defaults: - self.set_property(pipeline_constants.PIPELINE_DEFAULTS, pipeline_defaults) + self.set_property(PIPELINE_DEFAULTS, pipeline_defaults) class Node(AppDataBase): @@ -515,7 +518,7 @@ def propagate_pipeline_default_properties(self): kv_properties = PipelineDefinition.get_kv_properties() self.primary_pipeline.convert_kv_properties(kv_properties) - pipeline_default_properties = self.primary_pipeline.get_property(pipeline_constants.PIPELINE_DEFAULTS, {}) + pipeline_default_properties = self.primary_pipeline.get_property(PIPELINE_DEFAULTS, {}) for node in self.pipeline_nodes: if not Operation.is_generic_operation(node.op): continue @@ -538,15 +541,19 @@ def propagate_pipeline_default_properties(self): merged_list: KeyValueList = KeyValueList.merge(node_value, pipeline_default_value) node.set_component_parameter(property_name, merged_list) - if self.primary_pipeline.runtime_config != "local": + if ( + self.primary_pipeline.runtime_config != "local" + and node.get_component_parameter(ENV_VARIABLES) + and node.get_component_parameter(KUBERNETES_SECRETS) + ): # In the case of a duplicate between env vars and kubernetes secrets, # prefer kubernetes secrets and remove any matching env vars new_list = KeyValueList.difference( - minuend=node.get_component_parameter(pipeline_constants.ENV_VARIABLES), - subtrahend=node.get_component_parameter(pipeline_constants.KUBERNETES_SECRETS), + minuend=node.get_component_parameter(ENV_VARIABLES), + subtrahend=node.get_component_parameter(KUBERNETES_SECRETS), ) if new_list: - node.set_component_parameter(pipeline_constants.ENV_VARIABLES, new_list) + node.set_component_parameter(ENV_VARIABLES, new_list) def is_valid(self) -> bool: """ diff --git a/elyra/tests/pipeline/resources/properties.json b/elyra/tests/pipeline/resources/properties.json index e0b61e493..cc7be107e 100644 --- a/elyra/tests/pipeline/resources/properties.json +++ b/elyra/tests/pipeline/resources/properties.json @@ -9,6 +9,7 @@ "elyra_mounted_volumes": [], "elyra_outputs": [], "elyra_env_vars": [], + "elyra_kubernetes_secrets": [], "elyra_dependencies": [], "elyra_include_subdirectories": false }, @@ -40,6 +41,9 @@ { "id": "elyra_env_vars" }, + { + "id": "elyra_kubernetes_secrets" + }, { "id": "elyra_outputs" }, @@ -189,6 +193,22 @@ "keyValueEntries": true } }, + { + "parameter_ref": "elyra_kubernetes_secrets", + "control": "custom", + "custom_control_id": "StringArrayControl", + "label": { + "default": "Kubernetes Secrets" + }, + "description": { + "default": "Kubernetes secrets to make available as environment variables to this node. The secret name and key given must be present in the Kubernetes namespace where the node is executed or this node will not run.", + "placement": "on_panel" + }, + "data": { + "placeholder": "env_var_name=secret_name:secret_key", + "keyValueEntries": true + } + }, { "parameter_ref": "elyra_outputs", "control": "custom", @@ -260,6 +280,11 @@ "type": "controls", "parameter_refs": ["elyra_env_vars"] }, + { + "id": "elyra_kubernetes_secrets", + "type": "controls", + "parameter_refs": ["elyra_kubernetes_secrets"] + }, { "id": "elyra_outputs", "type": "controls", diff --git a/elyra/tests/pipeline/test_handlers.py b/elyra/tests/pipeline/test_handlers.py index 8b4f49145..434c86bfd 100644 --- a/elyra/tests/pipeline/test_handlers.py +++ b/elyra/tests/pipeline/test_handlers.py @@ -201,5 +201,6 @@ async def test_get_pipeline_properties_definition(jp_fetch): {"id": "cos_object_prefix"}, {"id": "elyra_runtime_image"}, {"id": "elyra_env_vars"}, + {"id": "elyra_kubernetes_secrets"}, {"id": "elyra_mounted_volumes"}, ] From 7e0592e31b903367aa4852a0276d642273c2b82c Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Fri, 6 May 2022 14:31:14 -0500 Subject: [PATCH 03/27] Address review: docstring and resource name-checking --- elyra/pipeline/pipeline.py | 4 ++-- elyra/pipeline/processor.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 3a94bbf9e..0890244e8 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -490,8 +490,8 @@ def merge(cls, primary: "KeyValueList", secondary: "KeyValueList") -> "KeyValueL @classmethod def difference(cls, minuend: "KeyValueList", subtrahend: "KeyValueList") -> "KeyValueList": """ - Given two lists, convert to dictionaries and remove any keys found in the - subtrahend from the minuend, if present. + Given KeyValueLists, convert to dictionaries and remove any keys found in the + second (subtrahend) from the first (minuend), if present. :param minuend: list to be subtracted from :param subtrahend: list whose keys will be removed from the minuend diff --git a/elyra/pipeline/processor.py b/elyra/pipeline/processor.py index 5bb8f3fdf..c6429610b 100644 --- a/elyra/pipeline/processor.py +++ b/elyra/pipeline/processor.py @@ -626,11 +626,13 @@ def _get_kubernetes_secrets(self, operation: Operation) -> Optional[List[Tuple]] self.log.warning(f"Ignoring invalid secret for '{env_var_name}': missing secret name and/or key.") continue secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() - if not secret_name: - self.log.warning(f"Ignoring invalid secret for '{env_var_name}': missing secret name.") + if not is_valid_kubernetes_resource_name(secret_name): + self.log.warning(f"Ignoring invalid secret for '{env_var_name}': the secret name " + f"'{secret_name} 'is not a valid Kubernetes resource name.") continue - if not secret_key: - self.log.warning(f"Ignoring invalid secret for '{env_var_name}': missing secret key.") + if not is_valid_kubernetes_resource_name(secret_key): + self.log.warning(f"Ignoring invalid secret for '{env_var_name}': the secret key " + f"'{secret_key} 'is not a valid Kubernetes resource name.") continue valid_secrets.append((env_var_name, secret_name, secret_key)) From 2a36791c574b10ee8a49a5dbeb66baf9491f0618 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Fri, 6 May 2022 14:37:15 -0500 Subject: [PATCH 04/27] Update snapshots --- .../matches-complex-pipeline-snapshot.1.snap | 5 +++++ .../matches-simple-pipeline-snapshot.1.snap | 1 + 2 files changed, 6 insertions(+) diff --git a/tests/snapshots/pipeline-editor-tests/matches-complex-pipeline-snapshot.1.snap b/tests/snapshots/pipeline-editor-tests/matches-complex-pipeline-snapshot.1.snap index c37998ea4..4df26a317 100644 --- a/tests/snapshots/pipeline-editor-tests/matches-complex-pipeline-snapshot.1.snap +++ b/tests/snapshots/pipeline-editor-tests/matches-complex-pipeline-snapshot.1.snap @@ -26,6 +26,7 @@ Object { ], "filename": "producer.ipynb", "include_subdirectories": false, + "kubernetes_secrets": Array [], "mounted_volumes": Array [], "outputs": Array [ "output-1.csv", @@ -81,6 +82,7 @@ Object { "env_vars": Array [], "filename": "consumer.ipynb", "include_subdirectories": false, + "kubernetes_secrets": Array [], "mounted_volumes": Array [], "outputs": Array [], "runtime_image": "continuumio/anaconda3:2021.11", @@ -135,6 +137,7 @@ Object { "env_vars": Array [], "filename": "../scripts/setup.py", "include_subdirectories": false, + "kubernetes_secrets": Array [], "mounted_volumes": Array [], "outputs": Array [], "runtime_image": "continuumio/anaconda3:2021.11", @@ -187,6 +190,7 @@ Object { "env_vars": Array [], "filename": "create-source-files.py", "include_subdirectories": false, + "kubernetes_secrets": Array [], "mounted_volumes": Array [], "outputs": Array [ "input-1.csv", @@ -242,6 +246,7 @@ Object { "env_vars": Array [], "filename": "producer-script.py", "include_subdirectories": false, + "kubernetes_secrets": Array [], "mounted_volumes": Array [], "outputs": Array [ "output-3.csv", diff --git a/tests/snapshots/pipeline-editor-tests/matches-simple-pipeline-snapshot.1.snap b/tests/snapshots/pipeline-editor-tests/matches-simple-pipeline-snapshot.1.snap index 5e639f0d5..96bb64bfb 100644 --- a/tests/snapshots/pipeline-editor-tests/matches-simple-pipeline-snapshot.1.snap +++ b/tests/snapshots/pipeline-editor-tests/matches-simple-pipeline-snapshot.1.snap @@ -27,6 +27,7 @@ Object { ], "filename": "helloworld.ipynb", "include_subdirectories": false, + "kubernetes_secrets": Array [], "mounted_volumes": Array [], "outputs": Array [], }, From 6568f5cd4d7c89cd6f7c383ef009b3437b99250a Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Fri, 6 May 2022 14:41:31 -0500 Subject: [PATCH 05/27] Fix lint --- elyra/pipeline/processor.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/elyra/pipeline/processor.py b/elyra/pipeline/processor.py index c6429610b..aa730b7c8 100644 --- a/elyra/pipeline/processor.py +++ b/elyra/pipeline/processor.py @@ -627,12 +627,16 @@ def _get_kubernetes_secrets(self, operation: Operation) -> Optional[List[Tuple]] continue secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() if not is_valid_kubernetes_resource_name(secret_name): - self.log.warning(f"Ignoring invalid secret for '{env_var_name}': the secret name " - f"'{secret_name} 'is not a valid Kubernetes resource name.") + self.log.warning( + f"Ignoring invalid secret for '{env_var_name}': the secret name " + f"'{secret_name}' is not a valid Kubernetes resource name." + ) continue if not is_valid_kubernetes_resource_name(secret_key): - self.log.warning(f"Ignoring invalid secret for '{env_var_name}': the secret key " - f"'{secret_key} 'is not a valid Kubernetes resource name.") + self.log.warning( + f"Ignoring invalid secret for '{env_var_name}': the secret key " + f"'{secret_key}' is not a valid Kubernetes resource name." + ) continue valid_secrets.append((env_var_name, secret_name, secret_key)) From e2fcc3b9d27e176a738df8a8bcf0e20edd994b9d Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 17 May 2022 18:21:43 -0500 Subject: [PATCH 06/27] Define data classes for secrets and mounted volumes --- elyra/kfp/operator.py | 26 +++-- elyra/pipeline/airflow/processor_airflow.py | 4 +- elyra/pipeline/kfp/processor_kfp.py | 4 +- elyra/pipeline/pipeline.py | 105 ++++++++++++++++-- elyra/pipeline/processor.py | 63 ----------- .../templates/airflow/airflow_template.jinja2 | 20 ++-- 6 files changed, 122 insertions(+), 100 deletions(-) diff --git a/elyra/kfp/operator.py b/elyra/kfp/operator.py index 6c393606f..1ed4a0571 100644 --- a/elyra/kfp/operator.py +++ b/elyra/kfp/operator.py @@ -19,7 +19,6 @@ from typing import Dict from typing import List from typing import Optional -from typing import Tuple from kfp.dsl import ContainerOp from kfp.dsl import RUN_ID_PLACEHOLDER @@ -33,6 +32,8 @@ from kubernetes.client.models import V1VolumeMount from elyra._version import __version__ +from elyra.pipeline.pipeline import KubernetesSecret +from elyra.pipeline.pipeline import VolumeMount """ The ExecuteFileOp uses a python script to bootstrap the user supplied image with the required dependencies. @@ -83,8 +84,8 @@ def __init__( mem_request: Optional[str] = None, gpu_limit: Optional[str] = None, workflow_engine: Optional[str] = "argo", - volume_mounts: Optional[Dict[str, str]] = None, - kubernetes_secrets: Optional[List[Tuple]] = None, + volume_mounts: Optional[List[VolumeMount]] = None, + kubernetes_secrets: Optional[List[KubernetesSecret]] = None, **kwargs, ): """Create a new instance of ContainerOp. @@ -232,16 +233,18 @@ def __init__( # or this generic operation will fail if self.volume_mounts: unique_pvcs = [] - for mount_path, pvc_name in self.volume_mounts.items(): - if pvc_name not in unique_pvcs: + for volume_mount in self.volume_mounts: + if volume_mount.pvc_name not in unique_pvcs: self.add_volume( V1Volume( - name=pvc_name, - persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(claim_name=pvc_name), + name=volume_mount.pvc_name, + persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( + claim_name=volume_mount.pvc_name + ), ) ) - unique_pvcs.append(pvc_name) - self.container.add_volume_mount(V1VolumeMount(mount_path=mount_path, name=pvc_name)) + unique_pvcs.append(volume_mount.pvc_name) + self.container.add_volume_mount(V1VolumeMount(mount_path=volume_mount.path, name=volume_mount.pvc_name)) # We must deal with the envs after the superclass initialization since these amend the # container attribute that isn't available until now. @@ -251,11 +254,10 @@ def __init__( if self.kubernetes_secrets: for secret in self.kubernetes_secrets: # Convert tuple entries to format kfp needs - env_var_name, secret_name, secret_key = secret self.container.add_env_variable( V1EnvVar( - name=env_var_name, - value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name=secret_name, key=secret_key)), + name=secret.env_var, + value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name=secret.name, key=secret.key)), ) ) diff --git a/elyra/pipeline/airflow/processor_airflow.py b/elyra/pipeline/airflow/processor_airflow.py index 4079c4b16..059462526 100644 --- a/elyra/pipeline/airflow/processor_airflow.py +++ b/elyra/pipeline/airflow/processor_airflow.py @@ -317,8 +317,8 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance outputs=operation.outputs, ) - volume_mounts = self._get_volume_mounts(operation=operation) - kubernetes_secrets = self._get_kubernetes_secrets(operation=operation) + volume_mounts = operation.get_volume_mounts() + kubernetes_secrets = operation.get_kubernetes_secrets() target_op = { "notebook": operation.name, diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py index 9c8780efa..df45f571b 100644 --- a/elyra/pipeline/kfp/processor_kfp.py +++ b/elyra/pipeline/kfp/processor_kfp.py @@ -520,8 +520,8 @@ def _cc_pipeline( f"Creating pipeline component archive '{operation_artifact_archive}' for operation '{operation}'" ) - volume_mounts = self._get_volume_mounts(operation=operation) - kubernetes_secrets = self._get_kubernetes_secrets(operation=operation) + volume_mounts = operation.get_volume_mounts() + kubernetes_secrets = operation.get_kubernetes_secrets() target_ops[operation.id] = ExecuteFileOp( name=sanitized_operation_name, diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 0890244e8..97ad93e2e 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from dataclasses import dataclass import logging from logging import Logger import os @@ -23,6 +24,9 @@ from typing import Optional from elyra.pipeline.pipeline_constants import ENV_VARIABLES +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES +from elyra.util.kubernetes import is_valid_kubernetes_resource_name # TODO: Make pipeline version available more widely # as today is only available on the pipeline editor @@ -194,6 +198,16 @@ def _scrub_list(dirty: Optional[List[Optional[str]]]) -> List[str]: def is_generic_operation(operation_classifier) -> bool: return operation_classifier in Operation.generic_node_types + @staticmethod + def log_message(msg: str, logger: Optional[Logger] = None, level: Optional[int] = logging.DEBUG): + """ + Log a message with the given logger at the given level or simply print. + """ + if logger: + logger.log(level, msg) + else: + print(msg) + class GenericOperation(Operation): """ @@ -314,6 +328,72 @@ def __eq__(self, other: "GenericOperation") -> bool: def _validate_range(self, value: str, min_value: int = 0, max_value: int = sys.maxsize) -> bool: return int(value) in range(min_value, max_value) + def get_volume_mounts(self, logger: Optional[Logger] = None) -> Optional[List["VolumeMount"]]: + """ + Loops through the Operation's mounted volumes to re-format path and remove + invalid PVC names. + + :return: dictionary of mount path to valid PVC names + """ + valid_volume_mounts = [] + volume_mounts = self.component_params.get(MOUNTED_VOLUMES) + if volume_mounts and isinstance(volume_mounts, KeyValueList): + for mount_path, pvc_name in volume_mounts.to_dict().items(): + # Ensure the PVC name is syntactically a valid Kubernetes resource name + if not is_valid_kubernetes_resource_name(pvc_name): + msg = ( + f"Ignoring invalid volume mount entry '{mount_path}': the PVC " + f"name '{pvc_name}' is not a valid Kubernetes resource name." + ) + Operation.log_message(msg=msg, logger=logger, level=logging.WARN) + continue + + formatted_mount_path = f"/{mount_path.strip('/')}" + + # Volume mount is valid, create a VolumeMount class instance and add to list + volume_mount = VolumeMount(formatted_mount_path, pvc_name) + valid_volume_mounts.append(volume_mount) + + return valid_volume_mounts + + def get_kubernetes_secrets(self, logger: Optional[Logger] = None) -> Optional[List["KubernetesSecret"]]: + """ + Loops through the Operation's kubernetes secrets to strip whitespace + and re-format as a tuple. + + :return: tuple of env var name, secret name, and secret key + """ + valid_secrets = [] + secrets = self.component_params.get(KUBERNETES_SECRETS) + if secrets and isinstance(secrets, KeyValueList): + for env_var_name, secret in secrets.to_dict().items(): + secret_tuple = secret.split(":", 1) + if len(secret_tuple) != 2: + msg = f"Ignoring invalid secret for '{env_var_name}': missing secret name and/or key." + Operation.log_message(msg=msg, logger=logger, level=logging.WARN) + continue + secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() + if not is_valid_kubernetes_resource_name(secret_name): + msg = ( + f"Ignoring invalid secret for '{env_var_name}': the secret name " + f"'{secret_name}' is not a valid Kubernetes resource name." + ) + Operation.log_message(msg=msg, logger=logger, level=logging.WARN) + continue + if not is_valid_kubernetes_resource_name(secret_key): + msg = ( + f"Ignoring invalid secret for '{env_var_name}': the secret key " + f"'{secret_key}' is not a valid Kubernetes resource name." + ) + Operation.log_message(msg=msg, logger=logger, level=logging.WARN) + continue + + # Secret is valid, create a KubernetesSecret class instance and add to list + kubernetes_secret = KubernetesSecret(env_var_name, secret_name, secret_key) + valid_secrets.append(kubernetes_secret) + + return valid_secrets + class Pipeline(object): """ @@ -454,12 +534,12 @@ def to_dict(self, logger: Optional[Logger] = None) -> Dict[str, str]: key = key.strip() if not key: - KeyValueList.log_message(f"Skipping inclusion of property '{kv}': no key found", logger, logging.WARN) + Operation.log_message(f"Skipping inclusion of property '{kv}': no key found", logger, logging.WARN) continue if isinstance(value, str): value = value.strip() if not value: - KeyValueList.log_message( + Operation.log_message( f"Skipping inclusion of property '{key}': no value specified", logger, logging.DEBUG ) continue @@ -505,12 +585,15 @@ def difference(cls, minuend: "KeyValueList", subtrahend: "KeyValueList") -> "Key return KeyValueList.from_dict(subtract_dict) - @staticmethod - def log_message(msg: str, logger: Optional[Logger] = None, level: Optional[int] = logging.DEBUG): - """ - Log a message with the given logger at the given level or simply print. - """ - if logger: - logger.log(level, msg) - else: - print(msg) + +@dataclass +class VolumeMount: + path: str + pvc_name: str + + +@dataclass +class KubernetesSecret: + env_var: str + name: str + key: str diff --git a/elyra/pipeline/processor.py b/elyra/pipeline/processor.py index aa730b7c8..b3da9a454 100644 --- a/elyra/pipeline/processor.py +++ b/elyra/pipeline/processor.py @@ -25,7 +25,6 @@ from typing import List from typing import Optional from typing import Set -from typing import Tuple from typing import Union import entrypoints @@ -40,16 +39,12 @@ from elyra.pipeline.component import Component from elyra.pipeline.component_catalog import ComponentCache from elyra.pipeline.pipeline import GenericOperation -from elyra.pipeline.pipeline import KeyValueList from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline -from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS -from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.runtime_type import RuntimeProcessorType from elyra.pipeline.runtime_type import RuntimeTypeResources from elyra.util.archive import create_temp_archive from elyra.util.cos import CosClient -from elyra.util.kubernetes import is_valid_kubernetes_resource_name from elyra.util.path import get_expanded_path elyra_log_pipeline_info = os.getenv("ELYRA_LOG_PIPELINE_INFO", True) @@ -584,61 +579,3 @@ def _process_list_value(self, value: str) -> Union[List, str]: return value return converted_list - - def _get_volume_mounts(self, operation: Operation) -> Optional[Dict[str, str]]: - """ - Loops through an Operation mounted volumes to re-format path and remove - invalid PVC names. - - :param operation: the operation to check for volume mounts - :return: dictionary of mount path to valid PVC names - """ - volume_mounts_valid = {} - if operation.component_params.get(MOUNTED_VOLUMES): - volume_mounts = operation.component_params.get(MOUNTED_VOLUMES).to_dict() - for mount_path, pvc_name in volume_mounts.items(): - # Ensure the PVC name is syntactically a valid Kubernetes resource name - if not is_valid_kubernetes_resource_name(pvc_name): - self.log.warning( - f"Ignoring invalid volume mount entry '{mount_path}': the PVC " - f"name '{pvc_name}' is not a valid Kubernetes resource name." - ) - continue - - formatted_mount_path = f"/{mount_path.strip('/')}" - volume_mounts_valid[formatted_mount_path] = pvc_name - return volume_mounts_valid - - def _get_kubernetes_secrets(self, operation: Operation) -> Optional[List[Tuple]]: - """ - Loops through an Operation's kubernetes secrets to strip whitespace - and re-format as a tuple. - - :param operation: the operation to check for secrets - :return: tuple of env var name, secret name, and secret key - """ - valid_secrets = [] - secrets = operation.component_params.get(KUBERNETES_SECRETS) - if secrets and isinstance(secrets, KeyValueList): - for env_var_name, secret in secrets.to_dict().items(): - secret_tuple = secret.split(":", 1) - if len(secret_tuple) != 2: - self.log.warning(f"Ignoring invalid secret for '{env_var_name}': missing secret name and/or key.") - continue - secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() - if not is_valid_kubernetes_resource_name(secret_name): - self.log.warning( - f"Ignoring invalid secret for '{env_var_name}': the secret name " - f"'{secret_name}' is not a valid Kubernetes resource name." - ) - continue - if not is_valid_kubernetes_resource_name(secret_key): - self.log.warning( - f"Ignoring invalid secret for '{env_var_name}': the secret key " - f"'{secret_key}' is not a valid Kubernetes resource name." - ) - continue - - valid_secrets.append((env_var_name, secret_name, secret_key)) - - return valid_secrets diff --git a/elyra/templates/airflow/airflow_template.jinja2 b/elyra/templates/airflow/airflow_template.jinja2 index 4630b4ab3..246ff6e14 100644 --- a/elyra/templates/airflow/airflow_template.jinja2 +++ b/elyra/templates/airflow/airflow_template.jinja2 @@ -46,14 +46,14 @@ env_var_secret_key = Secret(deploy_type='env', from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator {% endif %} -{% if operation.kubernetes_secrets and not cos_secret%} +{% if operation.kubernetes_secrets %} from airflow.kubernetes.secret import Secret # Kubernetes secrets for operation '{{ operation.id|regex_replace }}' - {% for secret in operation.kubernetes_secrets %}{% set env_var_name = secret[0] %}{% set secret_name = secret[1] %}{% set secret_key = secret[2] %} + {% for secret in operation.kubernetes_secrets %} secret_{{ operation.id|regex_replace }}_{{ loop.index }} = Secret(deploy_type='env', - deploy_target='{{env_var_name}}', - secret='{{ secret_name }}', - key='{{ secret_key }}', + deploy_target='{{ secret.env_var }}', + secret='{{ secret.name }}', + key='{{ secret.key }}', ) {% set ns.operation_kubernetes_secrets = ns.operation_kubernetes_secrets ~ 'secret_' ~ operation.id|regex_replace ~ '_' ~ loop.index ~ ', ' %} {% endfor %} @@ -73,18 +73,18 @@ from airflow.contrib.kubernetes.volume import Volume from airflow.contrib.kubernetes.volume_mount import VolumeMount volumes_{{ operation.id|regex_replace }} = [] volume_mounts_{{ operation.id|regex_replace }} = [] - {% for mount_path, volume_claim in operation.volume_mounts.items() %} -volume_mount_{{loop.index}} = VolumeMount(name='{{ volume_claim }}', - mount_path='{{ mount_path }}', + {% for volume_mount in operation.volume_mounts %} +volume_mount_{{loop.index}} = VolumeMount(name='{{ volume_mount.pvc_name }}', + mount_path='{{ volume_mount.path }}', sub_path=None, read_only=False) volume_config_{{ loop.index }}= { 'persistentVolumeClaim': { - 'claimName': '{{ volume_claim }}' + 'claimName': '{{ volume_mount.pvc_name }}' } } -volume_{{ loop.index }} = Volume(name='{{ volume_claim }}', configs=volume_config_{{ loop.index }}) +volume_{{ loop.index }} = Volume(name='{{ volume_mount.pvc_name }}', configs=volume_config_{{ loop.index }}) volumes_{{ operation.id|regex_replace }}.append(volume_{{ loop.index }}) volume_mounts_{{ operation.id|regex_replace }}.append(volume_mount_{{ loop.index }}) {% endfor %} From b954d47e09d57aa84295eea56c591b31fe9f7ad5 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 17 May 2022 18:22:00 -0500 Subject: [PATCH 07/27] Update tests --- elyra/tests/pipeline/test_processor_base.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/elyra/tests/pipeline/test_processor_base.py b/elyra/tests/pipeline/test_processor_base.py index aa94a6a28..aaebc17a6 100644 --- a/elyra/tests/pipeline/test_processor_base.py +++ b/elyra/tests/pipeline/test_processor_base.py @@ -20,6 +20,7 @@ from elyra.pipeline.parser import PipelineParser from elyra.pipeline.pipeline import GenericOperation from elyra.pipeline.pipeline import KeyValueList +from elyra.pipeline.pipeline import VolumeMount from elyra.pipeline.processor import RuntimePipelineProcessor from elyra.tests.pipeline.test_pipeline_parser import _read_pipeline_resource @@ -49,7 +50,7 @@ def sample_metadata(): } -def test_get_volume_mounts(runtime_processor): +def test_get_volume_mounts(): mounted_volumes = KeyValueList(["/mount/test=rwx-test-claim", "/mount/test_two=second-claim"]) component_parameters = { "filename": "pipelines_test_file", @@ -64,8 +65,8 @@ def test_get_volume_mounts(runtime_processor): name="test", component_params=component_parameters, ) - parsed_volumes_dict = runtime_processor._get_volume_mounts(operation=test_operation) - assert parsed_volumes_dict == { - "/mount/test": "rwx-test-claim", - "/mount/test_two": "second-claim", - } + parsed_volumes_list = test_operation.get_volume_mounts() + assert parsed_volumes_list == [ + VolumeMount("/mount/test", "rwx-test-claim"), + VolumeMount("/mount/test_two", "second-claim"), + ] From bc0946265e415f7918a944777dacf4766dd40af1 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Wed, 18 May 2022 14:56:38 -0500 Subject: [PATCH 08/27] Add test for secret formatting --- .../pipeline/test_pipeline_constructor.py | 57 +++++++++++++++++++ elyra/tests/pipeline/test_processor_base.py | 25 -------- 2 files changed, 57 insertions(+), 25 deletions(-) diff --git a/elyra/tests/pipeline/test_pipeline_constructor.py b/elyra/tests/pipeline/test_pipeline_constructor.py index d95acd987..77cf12c86 100644 --- a/elyra/tests/pipeline/test_pipeline_constructor.py +++ b/elyra/tests/pipeline/test_pipeline_constructor.py @@ -18,8 +18,11 @@ import pytest from elyra.pipeline.pipeline import GenericOperation +from elyra.pipeline.pipeline import KeyValueList +from elyra.pipeline.pipeline import KubernetesSecret from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline +from elyra.pipeline.pipeline import VolumeMount @pytest.fixture @@ -547,3 +550,57 @@ def test_scrub_list_function(): env_variables_output = ["FOO=Bar", "BAR=Foo"] assert Operation._scrub_list(env_variables_input) == env_variables_output + + +def test_get_volume_mounts(): + mounted_volumes = KeyValueList(["/mount/test=rwx-test-claim", "/mount/test_two=second-claim"]) + component_parameters = { + "filename": "pipelines_test_file", + "env_vars": [], + "runtime_image": "tensorflow/tensorflow:latest", + "mounted_volumes": mounted_volumes, + } + test_operation = GenericOperation( + id="this-is-a-test-id", + type="execution-node", + classifier="execute-notebook-node", + name="test", + component_params=component_parameters, + ) + parsed_volumes_list = test_operation.get_volume_mounts() + assert parsed_volumes_list == [ + VolumeMount("/mount/test", "rwx-test-claim"), + VolumeMount("/mount/test_two", "second-claim"), + ] + + +def test_get_kubernetes_secrets(): + kubernetes_secrets = KeyValueList( + [ + "ENV_VAR1=test-secret:test-key1", # valid + "ENV_VAR2=test-secret:test-key2", # valid + "ENV_VAR3=test-secret", # invalid: no key given + "ENV_VAR4=test:secret:test-key", # invalid: too many fields given + "ENV_VAR5=test%secret:test-key", # invalid secret name + "ENV_VAR6=test-secret:test$key2", # invalid secret key + "ENV_VAR7=", # invalid: no value after separator ('=') + ] + ) + component_parameters = { + "filename": "pipelines_test_file", + "env_vars": [], + "runtime_image": "tensorflow/tensorflow:latest", + "kubernetes_secrets": kubernetes_secrets, + } + test_operation = GenericOperation( + id="this-is-a-test-id", + type="execution-node", + classifier="execute-notebook-node", + name="test", + component_params=component_parameters, + ) + parsed_secrets_list = test_operation.get_kubernetes_secrets() + assert parsed_secrets_list == [ + KubernetesSecret("ENV_VAR1", "test-secret", "test-key1"), + KubernetesSecret("ENV_VAR2", "test-secret", "test-key2"), + ] diff --git a/elyra/tests/pipeline/test_processor_base.py b/elyra/tests/pipeline/test_processor_base.py index aaebc17a6..23684ed19 100644 --- a/elyra/tests/pipeline/test_processor_base.py +++ b/elyra/tests/pipeline/test_processor_base.py @@ -18,9 +18,6 @@ import pytest from elyra.pipeline.parser import PipelineParser -from elyra.pipeline.pipeline import GenericOperation -from elyra.pipeline.pipeline import KeyValueList -from elyra.pipeline.pipeline import VolumeMount from elyra.pipeline.processor import RuntimePipelineProcessor from elyra.tests.pipeline.test_pipeline_parser import _read_pipeline_resource @@ -48,25 +45,3 @@ def sample_metadata(): "engine": "Argo", "tags": [], } - - -def test_get_volume_mounts(): - mounted_volumes = KeyValueList(["/mount/test=rwx-test-claim", "/mount/test_two=second-claim"]) - component_parameters = { - "filename": "pipelines_test_file", - "env_vars": [], - "runtime_image": "tensorflow/tensorflow:latest", - "mounted_volumes": mounted_volumes, - } - test_operation = GenericOperation( - id="this-is-a-test-id", - type="execution-node", - classifier="execute-notebook-node", - name="test", - component_params=component_parameters, - ) - parsed_volumes_list = test_operation.get_volume_mounts() - assert parsed_volumes_list == [ - VolumeMount("/mount/test", "rwx-test-claim"), - VolumeMount("/mount/test_two", "second-claim"), - ] From 029d2d682e20512df0b04433b4af381436abd193 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Wed, 18 May 2022 17:20:38 -0500 Subject: [PATCH 09/27] Refactor calls to get mounts/secrets into static methods --- elyra/pipeline/airflow/processor_airflow.py | 14 +++-- elyra/pipeline/kfp/processor_kfp.py | 14 +++-- elyra/pipeline/pipeline.py | 23 +++++--- .../pipeline/test_pipeline_constructor.py | 57 ------------------- elyra/tests/pipeline/test_processor_base.py | 40 +++++++++++++ 5 files changed, 74 insertions(+), 74 deletions(-) diff --git a/elyra/pipeline/airflow/processor_airflow.py b/elyra/pipeline/airflow/processor_airflow.py index 059462526..99eae2f75 100644 --- a/elyra/pipeline/airflow/processor_airflow.py +++ b/elyra/pipeline/airflow/processor_airflow.py @@ -41,6 +41,8 @@ from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.processor import PipelineProcessor from elyra.pipeline.processor import PipelineProcessorResponse from elyra.pipeline.processor import RuntimePipelineProcessor @@ -317,8 +319,12 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance outputs=operation.outputs, ) - volume_mounts = operation.get_volume_mounts() - kubernetes_secrets = operation.get_kubernetes_secrets() + valid_volume_mounts = GenericOperation.get_valid_volume_mounts( + volume_mounts=operation.component_params.get(MOUNTED_VOLUMES), logger=self.log + ) + valid_kubernetes_secrets = GenericOperation.get_valid_kubernetes_secrets( + secrets=operation.component_params.get(KUBERNETES_SECRETS), logger=self.log + ) target_op = { "notebook": operation.name, @@ -334,8 +340,8 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance "operator_source": operation.component_params["filename"], "is_generic_operator": True, "doc": operation.doc, - "volume_mounts": volume_mounts, - "kubernetes_secrets": kubernetes_secrets, + "volume_mounts": valid_volume_mounts, + "kubernetes_secrets": valid_kubernetes_secrets, } if runtime_image_pull_secret is not None: diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py index df45f571b..048a6175c 100644 --- a/elyra/pipeline/kfp/processor_kfp.py +++ b/elyra/pipeline/kfp/processor_kfp.py @@ -47,6 +47,8 @@ from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.processor import PipelineProcessor from elyra.pipeline.processor import PipelineProcessorResponse from elyra.pipeline.processor import RuntimePipelineProcessor @@ -520,8 +522,12 @@ def _cc_pipeline( f"Creating pipeline component archive '{operation_artifact_archive}' for operation '{operation}'" ) - volume_mounts = operation.get_volume_mounts() - kubernetes_secrets = operation.get_kubernetes_secrets() + valid_volume_mounts = GenericOperation.get_valid_volume_mounts( + volume_mounts=operation.component_params.get(MOUNTED_VOLUMES), logger=self.log + ) + valid_kubernetes_secrets = GenericOperation.get_valid_kubernetes_secrets( + secrets=operation.component_params.get(KUBERNETES_SECRETS), logger=self.log + ) target_ops[operation.id] = ExecuteFileOp( name=sanitized_operation_name, @@ -547,8 +553,8 @@ def _cc_pipeline( "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json", # noqa "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json", # noqa }, - volume_mounts=volume_mounts, - kubernetes_secrets=kubernetes_secrets, + volume_mounts=valid_volume_mounts, + kubernetes_secrets=valid_kubernetes_secrets, ) if operation.doc: diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 97ad93e2e..19f58dbf0 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -24,8 +24,6 @@ from typing import Optional from elyra.pipeline.pipeline_constants import ENV_VARIABLES -from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS -from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.util.kubernetes import is_valid_kubernetes_resource_name # TODO: Make pipeline version available more widely @@ -328,15 +326,17 @@ def __eq__(self, other: "GenericOperation") -> bool: def _validate_range(self, value: str, min_value: int = 0, max_value: int = sys.maxsize) -> bool: return int(value) in range(min_value, max_value) - def get_volume_mounts(self, logger: Optional[Logger] = None) -> Optional[List["VolumeMount"]]: + @staticmethod + def get_valid_volume_mounts( + volume_mounts: "KeyValueList", logger: Optional[Logger] = None + ) -> Optional[List["VolumeMount"]]: """ Loops through the Operation's mounted volumes to re-format path and remove invalid PVC names. - :return: dictionary of mount path to valid PVC names + :return: a list of VolumeMount objects with mount path and valid PVC names """ valid_volume_mounts = [] - volume_mounts = self.component_params.get(MOUNTED_VOLUMES) if volume_mounts and isinstance(volume_mounts, KeyValueList): for mount_path, pvc_name in volume_mounts.to_dict().items(): # Ensure the PVC name is syntactically a valid Kubernetes resource name @@ -356,20 +356,25 @@ def get_volume_mounts(self, logger: Optional[Logger] = None) -> Optional[List["V return valid_volume_mounts - def get_kubernetes_secrets(self, logger: Optional[Logger] = None) -> Optional[List["KubernetesSecret"]]: + @staticmethod + def get_valid_kubernetes_secrets( + secrets: "KeyValueList", logger: Optional[Logger] = None + ) -> Optional[List["KubernetesSecret"]]: """ Loops through the Operation's kubernetes secrets to strip whitespace and re-format as a tuple. - :return: tuple of env var name, secret name, and secret key + :return: a list of KubernetesSecret objects with env var name and valid secret name and key """ valid_secrets = [] - secrets = self.component_params.get(KUBERNETES_SECRETS) if secrets and isinstance(secrets, KeyValueList): for env_var_name, secret in secrets.to_dict().items(): secret_tuple = secret.split(":", 1) if len(secret_tuple) != 2: - msg = f"Ignoring invalid secret for '{env_var_name}': missing secret name and/or key." + msg = ( + f"Ignoring invalid secret for '{env_var_name}': the secret name " + f"and key must follow the format 'secret_name:secret_key'." + ) Operation.log_message(msg=msg, logger=logger, level=logging.WARN) continue secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() diff --git a/elyra/tests/pipeline/test_pipeline_constructor.py b/elyra/tests/pipeline/test_pipeline_constructor.py index 77cf12c86..d95acd987 100644 --- a/elyra/tests/pipeline/test_pipeline_constructor.py +++ b/elyra/tests/pipeline/test_pipeline_constructor.py @@ -18,11 +18,8 @@ import pytest from elyra.pipeline.pipeline import GenericOperation -from elyra.pipeline.pipeline import KeyValueList -from elyra.pipeline.pipeline import KubernetesSecret from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline -from elyra.pipeline.pipeline import VolumeMount @pytest.fixture @@ -550,57 +547,3 @@ def test_scrub_list_function(): env_variables_output = ["FOO=Bar", "BAR=Foo"] assert Operation._scrub_list(env_variables_input) == env_variables_output - - -def test_get_volume_mounts(): - mounted_volumes = KeyValueList(["/mount/test=rwx-test-claim", "/mount/test_two=second-claim"]) - component_parameters = { - "filename": "pipelines_test_file", - "env_vars": [], - "runtime_image": "tensorflow/tensorflow:latest", - "mounted_volumes": mounted_volumes, - } - test_operation = GenericOperation( - id="this-is-a-test-id", - type="execution-node", - classifier="execute-notebook-node", - name="test", - component_params=component_parameters, - ) - parsed_volumes_list = test_operation.get_volume_mounts() - assert parsed_volumes_list == [ - VolumeMount("/mount/test", "rwx-test-claim"), - VolumeMount("/mount/test_two", "second-claim"), - ] - - -def test_get_kubernetes_secrets(): - kubernetes_secrets = KeyValueList( - [ - "ENV_VAR1=test-secret:test-key1", # valid - "ENV_VAR2=test-secret:test-key2", # valid - "ENV_VAR3=test-secret", # invalid: no key given - "ENV_VAR4=test:secret:test-key", # invalid: too many fields given - "ENV_VAR5=test%secret:test-key", # invalid secret name - "ENV_VAR6=test-secret:test$key2", # invalid secret key - "ENV_VAR7=", # invalid: no value after separator ('=') - ] - ) - component_parameters = { - "filename": "pipelines_test_file", - "env_vars": [], - "runtime_image": "tensorflow/tensorflow:latest", - "kubernetes_secrets": kubernetes_secrets, - } - test_operation = GenericOperation( - id="this-is-a-test-id", - type="execution-node", - classifier="execute-notebook-node", - name="test", - component_params=component_parameters, - ) - parsed_secrets_list = test_operation.get_kubernetes_secrets() - assert parsed_secrets_list == [ - KubernetesSecret("ENV_VAR1", "test-secret", "test-key1"), - KubernetesSecret("ENV_VAR2", "test-secret", "test-key2"), - ] diff --git a/elyra/tests/pipeline/test_processor_base.py b/elyra/tests/pipeline/test_processor_base.py index 23684ed19..acf7b3933 100644 --- a/elyra/tests/pipeline/test_processor_base.py +++ b/elyra/tests/pipeline/test_processor_base.py @@ -18,6 +18,10 @@ import pytest from elyra.pipeline.parser import PipelineParser +from elyra.pipeline.pipeline import GenericOperation +from elyra.pipeline.pipeline import KeyValueList +from elyra.pipeline.pipeline import KubernetesSecret +from elyra.pipeline.pipeline import VolumeMount from elyra.pipeline.processor import RuntimePipelineProcessor from elyra.tests.pipeline.test_pipeline_parser import _read_pipeline_resource @@ -45,3 +49,39 @@ def sample_metadata(): "engine": "Argo", "tags": [], } + + +def test_get_volume_mounts(): + mounted_volumes = KeyValueList( + [ + "/mount/test=rwx-test-claim", # valid + "/mount/test_two=second-claim", # valid + "/mount/test_three=", # invalid: no pvc name + "/mount/test_four=second#claim", # invalid pvc name + ] + ) + parsed_volumes_list = GenericOperation.get_valid_volume_mounts(volume_mounts=mounted_volumes) + assert parsed_volumes_list == [ + VolumeMount("/mount/test", "rwx-test-claim"), + VolumeMount("/mount/test_two", "second-claim"), + ] + + +def test_get_kubernetes_secrets(): + kubernetes_secrets = KeyValueList( + [ + "ENV_VAR1=test-secret:test-key1", # valid + "ENV_VAR2=test-secret:test-key2", # valid + "ENV_VAR3=test-secret", # invalid: no key given + "ENV_VAR4=test:secret:test-key", # invalid: too many fields given + "ENV_VAR5=test%secret:test-key", # invalid secret name + "ENV_VAR6=test-secret:test$key2", # invalid secret key + "ENV_VAR7=", # invalid: no value after separator ('=') + ] + ) + + parsed_secrets_list = GenericOperation.get_valid_kubernetes_secrets(secrets=kubernetes_secrets) + assert parsed_secrets_list == [ + KubernetesSecret("ENV_VAR1", "test-secret", "test-key1"), + KubernetesSecret("ENV_VAR2", "test-secret", "test-key2"), + ] From ed5eb35a890c2662d0304faf22841cf785ec9266 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Thu, 19 May 2022 13:39:03 -0500 Subject: [PATCH 10/27] Standardize env_var as the placeholder for env var names --- elyra/templates/components/generic_properties_template.jinja2 | 2 +- elyra/templates/pipeline/pipeline_properties_template.jinja2 | 2 +- elyra/tests/pipeline/resources/properties.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/elyra/templates/components/generic_properties_template.jinja2 b/elyra/templates/components/generic_properties_template.jinja2 index 8e0e34af6..b2e8e3c39 100644 --- a/elyra/templates/components/generic_properties_template.jinja2 +++ b/elyra/templates/components/generic_properties_template.jinja2 @@ -205,7 +205,7 @@ "placement": "on_panel" }, "data": { - "placeholder": "env_var_name=secret_name:secret_key", + "placeholder": "env_var=secret_name:secret_key", "keyValueEntries": true } }, diff --git a/elyra/templates/pipeline/pipeline_properties_template.jinja2 b/elyra/templates/pipeline/pipeline_properties_template.jinja2 index e171556ed..9b4c4ef55 100644 --- a/elyra/templates/pipeline/pipeline_properties_template.jinja2 +++ b/elyra/templates/pipeline/pipeline_properties_template.jinja2 @@ -115,7 +115,7 @@ "placement": "on_panel" }, "data": { - "placeholder": "env_var_name=secret_name:secret_key", + "placeholder": "env_var=secret_name:secret_key", "keyValueEntries": true } }, diff --git a/elyra/tests/pipeline/resources/properties.json b/elyra/tests/pipeline/resources/properties.json index cc7be107e..07e85957c 100644 --- a/elyra/tests/pipeline/resources/properties.json +++ b/elyra/tests/pipeline/resources/properties.json @@ -205,7 +205,7 @@ "placement": "on_panel" }, "data": { - "placeholder": "env_var_name=secret_name:secret_key", + "placeholder": "env_var=secret_name:secret_key", "keyValueEntries": true } }, From 4de9bd65b73a1fff3fd6b7844fb58ffb1af5b53d Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Thu, 19 May 2022 13:39:50 -0500 Subject: [PATCH 11/27] Add new function to check for the format of secret key --- elyra/pipeline/pipeline.py | 3 ++- elyra/util/kubernetes.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 19f58dbf0..444337206 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -24,6 +24,7 @@ from typing import Optional from elyra.pipeline.pipeline_constants import ENV_VARIABLES +from elyra.util.kubernetes import is_valid_kubernetes_key_name from elyra.util.kubernetes import is_valid_kubernetes_resource_name # TODO: Make pipeline version available more widely @@ -385,7 +386,7 @@ def get_valid_kubernetes_secrets( ) Operation.log_message(msg=msg, logger=logger, level=logging.WARN) continue - if not is_valid_kubernetes_resource_name(secret_key): + if not is_valid_kubernetes_key_name(secret_key): msg = ( f"Ignoring invalid secret for '{env_var_name}': the secret key " f"'{secret_key}' is not a valid Kubernetes resource name." diff --git a/elyra/util/kubernetes.py b/elyra/util/kubernetes.py index b0a81e145..6c7d344e2 100644 --- a/elyra/util/kubernetes.py +++ b/elyra/util/kubernetes.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import re def is_valid_kubernetes_resource_name(name: str) -> bool: @@ -37,3 +38,16 @@ def is_valid_kubernetes_resource_name(name: str) -> bool: elif char not in ["-", "."]: return False return True + + +def is_valid_kubernetes_key_name(name: str) -> bool: + """ + Returns a truthy value indicating whether name meets the kubernetes + naming constraints, as outlined in the link below. + + https://kubernetes.io/docs/concepts/configuration/secret/#restriction-names-data + """ + if name is None: + return False + + return re.match("^[\w\-_.]+$", name) is not None From c49a939dc4caf3091bbc7f5f842f00187d053309 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Thu, 19 May 2022 13:41:13 -0500 Subject: [PATCH 12/27] Fix lint --- elyra/util/kubernetes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elyra/util/kubernetes.py b/elyra/util/kubernetes.py index 6c7d344e2..43361feeb 100644 --- a/elyra/util/kubernetes.py +++ b/elyra/util/kubernetes.py @@ -50,4 +50,4 @@ def is_valid_kubernetes_key_name(name: str) -> bool: if name is None: return False - return re.match("^[\w\-_.]+$", name) is not None + return re.match(r"^[\w\-_.]+$", name) is not None From dc5b06a59fd6ff8410badd8b46cec7d5025b1a9c Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Mon, 23 May 2022 13:03:20 -0500 Subject: [PATCH 13/27] Move certain secret and volume logic to validation service --- elyra/pipeline/airflow/processor_airflow.py | 13 +- elyra/pipeline/kfp/processor_kfp.py | 13 +- elyra/pipeline/pipeline.py | 129 +++++++----------- elyra/pipeline/pipeline_definition.py | 30 ++-- elyra/pipeline/validation.py | 102 +++++++++++++- .../generic_properties_template.jinja2 | 3 +- elyra/util/kubernetes.py | 2 +- 7 files changed, 175 insertions(+), 117 deletions(-) diff --git a/elyra/pipeline/airflow/processor_airflow.py b/elyra/pipeline/airflow/processor_airflow.py index 99eae2f75..b6a510c81 100644 --- a/elyra/pipeline/airflow/processor_airflow.py +++ b/elyra/pipeline/airflow/processor_airflow.py @@ -41,8 +41,6 @@ from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX -from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS -from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.processor import PipelineProcessor from elyra.pipeline.processor import PipelineProcessorResponse from elyra.pipeline.processor import RuntimePipelineProcessor @@ -319,13 +317,6 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance outputs=operation.outputs, ) - valid_volume_mounts = GenericOperation.get_valid_volume_mounts( - volume_mounts=operation.component_params.get(MOUNTED_VOLUMES), logger=self.log - ) - valid_kubernetes_secrets = GenericOperation.get_valid_kubernetes_secrets( - secrets=operation.component_params.get(KUBERNETES_SECRETS), logger=self.log - ) - target_op = { "notebook": operation.name, "id": operation.id, @@ -340,8 +331,8 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance "operator_source": operation.component_params["filename"], "is_generic_operator": True, "doc": operation.doc, - "volume_mounts": valid_volume_mounts, - "kubernetes_secrets": valid_kubernetes_secrets, + "volume_mounts": operation.volume_mounts, + "kubernetes_secrets": operation.kubernetes_secrets, } if runtime_image_pull_secret is not None: diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py index 048a6175c..c105d84b3 100644 --- a/elyra/pipeline/kfp/processor_kfp.py +++ b/elyra/pipeline/kfp/processor_kfp.py @@ -47,8 +47,6 @@ from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX -from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS -from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.processor import PipelineProcessor from elyra.pipeline.processor import PipelineProcessorResponse from elyra.pipeline.processor import RuntimePipelineProcessor @@ -522,13 +520,6 @@ def _cc_pipeline( f"Creating pipeline component archive '{operation_artifact_archive}' for operation '{operation}'" ) - valid_volume_mounts = GenericOperation.get_valid_volume_mounts( - volume_mounts=operation.component_params.get(MOUNTED_VOLUMES), logger=self.log - ) - valid_kubernetes_secrets = GenericOperation.get_valid_kubernetes_secrets( - secrets=operation.component_params.get(KUBERNETES_SECRETS), logger=self.log - ) - target_ops[operation.id] = ExecuteFileOp( name=sanitized_operation_name, pipeline_name=pipeline_name, @@ -553,8 +544,8 @@ def _cc_pipeline( "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json", # noqa "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json", # noqa }, - volume_mounts=valid_volume_mounts, - kubernetes_secrets=valid_kubernetes_secrets, + volume_mounts=operation.volume_mounts, + kubernetes_secrets=operation.kubernetes_secrets, ) if operation.doc: diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 444337206..a4de0542d 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -24,8 +24,8 @@ from typing import Optional from elyra.pipeline.pipeline_constants import ENV_VARIABLES -from elyra.util.kubernetes import is_valid_kubernetes_key_name -from elyra.util.kubernetes import is_valid_kubernetes_resource_name +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES # TODO: Make pipeline version available more widely # as today is only available on the pipeline editor @@ -158,6 +158,16 @@ def outputs(self) -> Optional[List[str]]: def outputs(self, value: List[str]): self._component_params["outputs"] = value + @property + def volume_mounts(self) -> List["VolumeMount"]: + volumes = self._component_params.get(MOUNTED_VOLUMES, []) + return Operation.get_valid_volume_mounts(volumes) + + @property + def kubernetes_secrets(self) -> List["KubernetesSecret"]: + secrets = self._component_params.get(KUBERNETES_SECRETS, []) + return Operation.get_valid_kubernetes_secrets(secrets) + def __eq__(self, other: "Operation") -> bool: if isinstance(self, other.__class__): return ( @@ -207,6 +217,42 @@ def log_message(msg: str, logger: Optional[Logger] = None, level: Optional[int] else: print(msg) + @staticmethod + def get_valid_volume_mounts(volume_mounts: "KeyValueList") -> Optional[List["VolumeMount"]]: + """ + Loops through the Operation's mounted volumes to re-format path and remove + invalid PVC names. + + :return: a list of VolumeMount objects with mount path and valid PVC names + """ + valid_volume_mounts = [] + if volume_mounts and isinstance(volume_mounts, KeyValueList): + for mount_path, pvc_name in volume_mounts.to_dict().items(): + formatted_mount_path = f"/{mount_path.strip('/')}" + + # Create a VolumeMount class instance and add to list + valid_volume_mounts.append(VolumeMount(formatted_mount_path, pvc_name)) + + return valid_volume_mounts + + @staticmethod + def get_valid_kubernetes_secrets(secrets: "KeyValueList") -> Optional[List["KubernetesSecret"]]: + """ + Loops through the Operation's kubernetes secrets to strip whitespace + and re-format as a tuple. + + :return: a list of KubernetesSecret objects with env var name and valid secret name and key + """ + valid_secrets = [] + if secrets and isinstance(secrets, KeyValueList): + for env_var_name, secret in secrets.to_dict().items(): + secret_name, secret_key = secret.split(":", 1) + + # Create a KubernetesSecret class instance and add to list + valid_secrets.append(KubernetesSecret(env_var_name, secret_name.strip(), secret_key.strip())) + + return valid_secrets + class GenericOperation(Operation): """ @@ -327,79 +373,6 @@ def __eq__(self, other: "GenericOperation") -> bool: def _validate_range(self, value: str, min_value: int = 0, max_value: int = sys.maxsize) -> bool: return int(value) in range(min_value, max_value) - @staticmethod - def get_valid_volume_mounts( - volume_mounts: "KeyValueList", logger: Optional[Logger] = None - ) -> Optional[List["VolumeMount"]]: - """ - Loops through the Operation's mounted volumes to re-format path and remove - invalid PVC names. - - :return: a list of VolumeMount objects with mount path and valid PVC names - """ - valid_volume_mounts = [] - if volume_mounts and isinstance(volume_mounts, KeyValueList): - for mount_path, pvc_name in volume_mounts.to_dict().items(): - # Ensure the PVC name is syntactically a valid Kubernetes resource name - if not is_valid_kubernetes_resource_name(pvc_name): - msg = ( - f"Ignoring invalid volume mount entry '{mount_path}': the PVC " - f"name '{pvc_name}' is not a valid Kubernetes resource name." - ) - Operation.log_message(msg=msg, logger=logger, level=logging.WARN) - continue - - formatted_mount_path = f"/{mount_path.strip('/')}" - - # Volume mount is valid, create a VolumeMount class instance and add to list - volume_mount = VolumeMount(formatted_mount_path, pvc_name) - valid_volume_mounts.append(volume_mount) - - return valid_volume_mounts - - @staticmethod - def get_valid_kubernetes_secrets( - secrets: "KeyValueList", logger: Optional[Logger] = None - ) -> Optional[List["KubernetesSecret"]]: - """ - Loops through the Operation's kubernetes secrets to strip whitespace - and re-format as a tuple. - - :return: a list of KubernetesSecret objects with env var name and valid secret name and key - """ - valid_secrets = [] - if secrets and isinstance(secrets, KeyValueList): - for env_var_name, secret in secrets.to_dict().items(): - secret_tuple = secret.split(":", 1) - if len(secret_tuple) != 2: - msg = ( - f"Ignoring invalid secret for '{env_var_name}': the secret name " - f"and key must follow the format 'secret_name:secret_key'." - ) - Operation.log_message(msg=msg, logger=logger, level=logging.WARN) - continue - secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() - if not is_valid_kubernetes_resource_name(secret_name): - msg = ( - f"Ignoring invalid secret for '{env_var_name}': the secret name " - f"'{secret_name}' is not a valid Kubernetes resource name." - ) - Operation.log_message(msg=msg, logger=logger, level=logging.WARN) - continue - if not is_valid_kubernetes_key_name(secret_key): - msg = ( - f"Ignoring invalid secret for '{env_var_name}': the secret key " - f"'{secret_key}' is not a valid Kubernetes resource name." - ) - Operation.log_message(msg=msg, logger=logger, level=logging.WARN) - continue - - # Secret is valid, create a KubernetesSecret class instance and add to list - kubernetes_secret = KubernetesSecret(env_var_name, secret_name, secret_key) - valid_secrets.append(kubernetes_secret) - - return valid_secrets - class Pipeline(object): """ @@ -553,13 +526,17 @@ def to_dict(self, logger: Optional[Logger] = None) -> Dict[str, str]: kv_dict[key] = value return kv_dict + @classmethod + def to_str(cls, key: str, value: str) -> str: + return f"{key}{cls._key_value_separator}{value}" + @classmethod def from_dict(cls, kv_dict: Dict) -> "KeyValueList": """ Convert a set of key-value pairs stored in a dictionary to a KeyValueList of strings with the defined separator. """ - str_list = [f"{key}{cls._key_value_separator}{value}" for key, value in kv_dict.items()] + str_list = [KeyValueList.to_str(key, value) for key, value in kv_dict.items()] return KeyValueList(str_list) @classmethod diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index 53f4c6217..8b25976ae 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -341,6 +341,19 @@ def convert_kv_properties(self, kv_properties: List[str]): # Convert plain list to KeyValueList self.set_component_parameter(property_name, KeyValueList(value)) + def remove_env_vars_with_matching_secrets(self): + """ + In the case of a matching key between env vars and kubernetes secrets, + prefer the Kubernetes Secret and remove the matching env var + """ + if self.get_component_parameter(ENV_VARIABLES) and self.get_component_parameter(KUBERNETES_SECRETS): + new_list = KeyValueList.difference( + minuend=self.get_component_parameter(ENV_VARIABLES), + subtrahend=self.get_component_parameter(KUBERNETES_SECRETS), + ) + if new_list: + self.set_component_parameter(ENV_VARIABLES, new_list) + class PipelineDefinition(object): """ @@ -463,7 +476,7 @@ def validate(self) -> list: return self._validation_issues # Has not been validated before - validation_issues = list() + validation_issues = [] # Validate pipeline schema version if "version" not in self._pipeline_definition: validation_issues.append("Pipeline schema version field is missing.") @@ -541,19 +554,8 @@ def propagate_pipeline_default_properties(self): merged_list: KeyValueList = KeyValueList.merge(node_value, pipeline_default_value) node.set_component_parameter(property_name, merged_list) - if ( - self.primary_pipeline.runtime_config != "local" - and node.get_component_parameter(ENV_VARIABLES) - and node.get_component_parameter(KUBERNETES_SECRETS) - ): - # In the case of a duplicate between env vars and kubernetes secrets, - # prefer kubernetes secrets and remove any matching env vars - new_list = KeyValueList.difference( - minuend=node.get_component_parameter(ENV_VARIABLES), - subtrahend=node.get_component_parameter(KUBERNETES_SECRETS), - ) - if new_list: - node.set_component_parameter(ENV_VARIABLES, new_list) + if self.primary_pipeline.runtime_config != "local": + node.remove_env_vars_with_matching_secrets() def is_valid(self) -> bool: """ diff --git a/elyra/pipeline/validation.py b/elyra/pipeline/validation.py index 38a134894..221c95acd 100644 --- a/elyra/pipeline/validation.py +++ b/elyra/pipeline/validation.py @@ -30,13 +30,20 @@ from elyra.metadata.schemaspaces import Runtimes from elyra.pipeline.component import Component from elyra.pipeline.component_catalog import ComponentCache +from elyra.pipeline.pipeline import KeyValueList from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import PIPELINE_CURRENT_SCHEMA from elyra.pipeline.pipeline import PIPELINE_CURRENT_VERSION +from elyra.pipeline.pipeline_constants import ENV_VARIABLES +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES +from elyra.pipeline.pipeline_constants import RUNTIME_IMAGE from elyra.pipeline.pipeline_definition import Node from elyra.pipeline.pipeline_definition import PipelineDefinition from elyra.pipeline.processor import PipelineProcessorManager from elyra.pipeline.runtime_type import RuntimeProcessorType +from elyra.util.kubernetes import is_valid_kubernetes_key +from elyra.util.kubernetes import is_valid_kubernetes_resource_name from elyra.util.path import get_expanded_path @@ -397,10 +404,12 @@ def _validate_generic_node_properties(self, node: Node, response: ValidationResp :return: """ node_label = node.label - image_name = node.get_component_parameter("runtime_image") + image_name = node.get_component_parameter(RUNTIME_IMAGE) filename = node.get_component_parameter("filename") dependencies = node.get_component_parameter("dependencies") - env_vars = node.get_component_parameter("env_vars") + env_vars = node.get_component_parameter(ENV_VARIABLES) + volumes = node.get_component_parameter(MOUNTED_VOLUMES) + secrets = node.get_component_parameter(KUBERNETES_SECRETS) self._validate_filepath( node_id=node.id, node_label=node_label, property_name="filename", filename=filename, response=response @@ -420,6 +429,11 @@ def _validate_generic_node_properties(self, node: Node, response: ValidationResp response=response, ) + if volumes: + self._validate_mounted_volumes(node.id, node_label, volumes, response=response) + if secrets: + self._validate_kubernetes_secrets(node.id, node_label, secrets, response=response) + self._validate_label(node_id=node.id, node_label=node_label, response=response) if dependencies: notebook_root_relative_path = os.path.dirname(filename) @@ -600,6 +614,88 @@ def _validate_resource_value( }, ) + def _validate_mounted_volumes( + self, node_id: str, node_label: str, volumes: KeyValueList, response: ValidationResponse + ) -> None: + """ + Checks the format of mounted volumes to ensure they're in the correct form + e.g. foo/path=pvc_name + :param node_id: the unique ID of the node + :param node_label: the given node name or user customized name/label of the node + :param volumes: a KeyValueList of volumes to check + :param response: ValidationResponse containing the issue list to be updated + """ + if volumes and isinstance(volumes, KeyValueList): + for mount_path, pvc_name in volumes.to_dict().items(): + # Ensure the PVC name is syntactically a valid Kubernetes resource name + if not is_valid_kubernetes_resource_name(pvc_name): + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidVolumeMount", + message=f"PVC name '{pvc_name}' is not a valid Kubernetes resource name.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": MOUNTED_VOLUMES, + "value": KeyValueList.to_str(mount_path, pvc_name), + }, + ) + + def _validate_kubernetes_secrets( + self, node_id: str, node_label: str, secrets: KeyValueList, response: ValidationResponse + ) -> None: + """ + Checks the format of Kubernetes secrets to ensure they're in the correct form + e.g. FOO=SECRET_NAME:KEY + :param node_id: the unique ID of the node + :param node_label: the given node name or user customized name/label of the node + :param secrets: a KeyValueList of secrets to check + :param response: ValidationResponse containing the issue list to be updated + """ + if secrets and isinstance(secrets, KeyValueList): + for env_var_name, secret in secrets.to_dict().items(): + secret_tuple = secret.split(":", 1) + if len(secret_tuple) != 2: + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidKubernetesSecret", + message="Property has an improperly formatted representation of secret name and key.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": KUBERNETES_SECRETS, + "value": KeyValueList.to_str(env_var_name, secret), + }, + ) + continue + secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() + # Ensure the secret name is syntactically a valid Kubernetes resource name + if not is_valid_kubernetes_resource_name(secret_name): + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidKubernetesSecret", + message=f"Secret name '{secret_name}' is not a valid Kubernetes resource name.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": KUBERNETES_SECRETS, + "value": KeyValueList.to_str(env_var_name, secret), + }, + ) + # Ensure the secret key is a syntactically valid Kubernetes key + if not is_valid_kubernetes_key(secret_key): + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidKubernetesSecret", + message=f"Key '{secret_key}' is not a valid Kubernetes secret key.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": KUBERNETES_SECRETS, + "value": KeyValueList.to_str(env_var_name, secret), + }, + ) + def _validate_filepath( self, node_id: str, @@ -683,7 +779,7 @@ def _validate_environmental_variables( severity=ValidationSeverity.Error, message_type="invalidEnvPair", message="Property has an improperly formatted env variable key value pair.", - data={"nodeID": node_id, "nodeName": node_label, "propertyName": "env_vars", "value": env_var}, + data={"nodeID": node_id, "nodeName": node_label, "propertyName": ENV_VARIABLES, "value": env_var}, ) def _validate_label(self, node_id: str, node_label: str, response: ValidationResponse) -> None: diff --git a/elyra/templates/components/generic_properties_template.jinja2 b/elyra/templates/components/generic_properties_template.jinja2 index b2e8e3c39..f1dd19cce 100644 --- a/elyra/templates/components/generic_properties_template.jinja2 +++ b/elyra/templates/components/generic_properties_template.jinja2 @@ -236,7 +236,8 @@ "placement": "on_panel" }, "data": { - "placeholder": "/mount/path=pvc-name" + "placeholder": "/mount/path=pvc-name", + "keyValueEntries": true } } ], diff --git a/elyra/util/kubernetes.py b/elyra/util/kubernetes.py index 43361feeb..887dbee90 100644 --- a/elyra/util/kubernetes.py +++ b/elyra/util/kubernetes.py @@ -40,7 +40,7 @@ def is_valid_kubernetes_resource_name(name: str) -> bool: return True -def is_valid_kubernetes_key_name(name: str) -> bool: +def is_valid_kubernetes_key(name: str) -> bool: """ Returns a truthy value indicating whether name meets the kubernetes naming constraints, as outlined in the link below. From 8fc364575e35ceaffa2ea0d8105ecb9a15b96c4c Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Mon, 23 May 2022 13:47:04 -0500 Subject: [PATCH 14/27] Update and add tests --- .../generic_properties_template.jinja2 | 3 +- elyra/tests/pipeline/test_processor_base.py | 9 +--- elyra/tests/pipeline/test_validation.py | 49 +++++++++++++++++++ 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/elyra/templates/components/generic_properties_template.jinja2 b/elyra/templates/components/generic_properties_template.jinja2 index f1dd19cce..b2e8e3c39 100644 --- a/elyra/templates/components/generic_properties_template.jinja2 +++ b/elyra/templates/components/generic_properties_template.jinja2 @@ -236,8 +236,7 @@ "placement": "on_panel" }, "data": { - "placeholder": "/mount/path=pvc-name", - "keyValueEntries": true + "placeholder": "/mount/path=pvc-name" } } ], diff --git a/elyra/tests/pipeline/test_processor_base.py b/elyra/tests/pipeline/test_processor_base.py index acf7b3933..cd924bd8a 100644 --- a/elyra/tests/pipeline/test_processor_base.py +++ b/elyra/tests/pipeline/test_processor_base.py @@ -56,8 +56,7 @@ def test_get_volume_mounts(): [ "/mount/test=rwx-test-claim", # valid "/mount/test_two=second-claim", # valid - "/mount/test_three=", # invalid: no pvc name - "/mount/test_four=second#claim", # invalid pvc name + "/mount/test_three=", # invalid: no value after separator ('=') ] ) parsed_volumes_list = GenericOperation.get_valid_volume_mounts(volume_mounts=mounted_volumes) @@ -72,11 +71,7 @@ def test_get_kubernetes_secrets(): [ "ENV_VAR1=test-secret:test-key1", # valid "ENV_VAR2=test-secret:test-key2", # valid - "ENV_VAR3=test-secret", # invalid: no key given - "ENV_VAR4=test:secret:test-key", # invalid: too many fields given - "ENV_VAR5=test%secret:test-key", # invalid secret name - "ENV_VAR6=test-secret:test$key2", # invalid secret key - "ENV_VAR7=", # invalid: no value after separator ('=') + "ENV_VAR3=", # invalid: no value after separator ('=') ] ) diff --git a/elyra/tests/pipeline/test_validation.py b/elyra/tests/pipeline/test_validation.py index 1dadee195..28f22f9ab 100644 --- a/elyra/tests/pipeline/test_validation.py +++ b/elyra/tests/pipeline/test_validation.py @@ -20,7 +20,10 @@ from conftest import KFP_COMPONENT_CACHE_INSTANCE import pytest +from elyra.pipeline.pipeline import KeyValueList from elyra.pipeline.pipeline import PIPELINE_CURRENT_VERSION +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.pipeline_definition import PipelineDefinition from elyra.pipeline.validation import PipelineValidationManager from elyra.pipeline.validation import ValidationResponse @@ -412,6 +415,52 @@ def test_invalid_node_property_env_var(validation_manager): assert issues[0]["data"]["nodeID"] == "test-id" +def test_invalid_node_property_volumes(validation_manager): + response = ValidationResponse() + node = {"id": "test-id", "app_data": {"label": "test"}} + volumes = KeyValueList( + [ + "/mount/test=rwx-test-claim", # valid + "/mount/test_two=second-claim", # valid + "/mount/test_four=second#claim", # invalid pvc name + ] + ) + validation_manager._validate_mounted_volumes( + node_id=node["id"], node_label=node["app_data"]["label"], volumes=volumes, response=response + ) + issues = response.to_json().get("issues") + assert issues[0]["severity"] == 1 + assert issues[0]["type"] == "invalidVolumeMount" + assert issues[0]["data"]["propertyName"] == MOUNTED_VOLUMES + assert issues[0]["data"]["nodeID"] == "test-id" + assert "not a valid Kubernetes resource name" in issues[0]["message"] + + +def test_invalid_node_property_secrets(validation_manager): + response = ValidationResponse() + node = {"id": "test-id", "app_data": {"label": "test"}} + secrets = KeyValueList( + [ + "ENV_VAR1=test-secret:test-key1", # valid + "ENV_VAR2=test-secret:test-key2", # valid + "ENV_VAR3=test-secret", # invalid: improperly formatted representation of secret name/key + "ENV_VAR5=test%secret:test-key", # invalid: not a valid Kubernetes resource name + "ENV_VAR6=test-secret:test$key2", # invalid: not a valid Kubernetes secret key + ] + ) + validation_manager._validate_kubernetes_secrets( + node_id=node["id"], node_label=node["app_data"]["label"], secrets=secrets, response=response + ) + issues = response.to_json().get("issues") + assert issues[0]["severity"] == 1 + assert issues[0]["type"] == "invalidKubernetesSecret" + assert issues[0]["data"]["propertyName"] == KUBERNETES_SECRETS + assert issues[0]["data"]["nodeID"] == "test-id" + assert "improperly formatted representation of secret name and key" in issues[0]["message"] + assert "not a valid Kubernetes resource name" in issues[1]["message"] + assert "not a valid Kubernetes secret key" in issues[2]["message"] + + def test_valid_node_property_label(validation_manager): response = ValidationResponse() node = {"id": "test-id"} From 8daa10ad6a61e0813fa3a336a0b2339b3d2dd286 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Mon, 23 May 2022 16:12:22 -0500 Subject: [PATCH 15/27] Coerce volume and secret kv-lists into lists of objects early --- elyra/pipeline/airflow/processor_airflow.py | 6 +- elyra/pipeline/kfp/processor_kfp.py | 6 +- elyra/pipeline/pipeline.py | 64 +++------- elyra/pipeline/pipeline_definition.py | 64 ++++++++-- elyra/pipeline/validation.py | 129 ++++++++++---------- elyra/tests/pipeline/test_processor_base.py | 35 ------ elyra/tests/pipeline/test_validation.py | 31 +++-- 7 files changed, 160 insertions(+), 175 deletions(-) diff --git a/elyra/pipeline/airflow/processor_airflow.py b/elyra/pipeline/airflow/processor_airflow.py index b6a510c81..55661d5be 100644 --- a/elyra/pipeline/airflow/processor_airflow.py +++ b/elyra/pipeline/airflow/processor_airflow.py @@ -41,6 +41,8 @@ from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.processor import PipelineProcessor from elyra.pipeline.processor import PipelineProcessorResponse from elyra.pipeline.processor import RuntimePipelineProcessor @@ -331,8 +333,8 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance "operator_source": operation.component_params["filename"], "is_generic_operator": True, "doc": operation.doc, - "volume_mounts": operation.volume_mounts, - "kubernetes_secrets": operation.kubernetes_secrets, + "volume_mounts": operation.component_params.get(MOUNTED_VOLUMES, []), + "kubernetes_secrets": operation.component_params.get(KUBERNETES_SECRETS, []), } if runtime_image_pull_secret is not None: diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py index c105d84b3..ef0464422 100644 --- a/elyra/pipeline/kfp/processor_kfp.py +++ b/elyra/pipeline/kfp/processor_kfp.py @@ -47,6 +47,8 @@ from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.processor import PipelineProcessor from elyra.pipeline.processor import PipelineProcessorResponse from elyra.pipeline.processor import RuntimePipelineProcessor @@ -544,8 +546,8 @@ def _cc_pipeline( "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json", # noqa "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json", # noqa }, - volume_mounts=operation.volume_mounts, - kubernetes_secrets=operation.kubernetes_secrets, + volume_mounts=operation.component_params.get(MOUNTED_VOLUMES, []), + kubernetes_secrets=operation.component_params.get(KUBERNETES_SECRETS, []), ) if operation.doc: diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index a4de0542d..762e969e1 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import dataclasses from dataclasses import dataclass +import json import logging from logging import Logger import os @@ -24,8 +26,6 @@ from typing import Optional from elyra.pipeline.pipeline_constants import ENV_VARIABLES -from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS -from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES # TODO: Make pipeline version available more widely # as today is only available on the pipeline editor @@ -158,16 +158,6 @@ def outputs(self) -> Optional[List[str]]: def outputs(self, value: List[str]): self._component_params["outputs"] = value - @property - def volume_mounts(self) -> List["VolumeMount"]: - volumes = self._component_params.get(MOUNTED_VOLUMES, []) - return Operation.get_valid_volume_mounts(volumes) - - @property - def kubernetes_secrets(self) -> List["KubernetesSecret"]: - secrets = self._component_params.get(KUBERNETES_SECRETS, []) - return Operation.get_valid_kubernetes_secrets(secrets) - def __eq__(self, other: "Operation") -> bool: if isinstance(self, other.__class__): return ( @@ -217,42 +207,6 @@ def log_message(msg: str, logger: Optional[Logger] = None, level: Optional[int] else: print(msg) - @staticmethod - def get_valid_volume_mounts(volume_mounts: "KeyValueList") -> Optional[List["VolumeMount"]]: - """ - Loops through the Operation's mounted volumes to re-format path and remove - invalid PVC names. - - :return: a list of VolumeMount objects with mount path and valid PVC names - """ - valid_volume_mounts = [] - if volume_mounts and isinstance(volume_mounts, KeyValueList): - for mount_path, pvc_name in volume_mounts.to_dict().items(): - formatted_mount_path = f"/{mount_path.strip('/')}" - - # Create a VolumeMount class instance and add to list - valid_volume_mounts.append(VolumeMount(formatted_mount_path, pvc_name)) - - return valid_volume_mounts - - @staticmethod - def get_valid_kubernetes_secrets(secrets: "KeyValueList") -> Optional[List["KubernetesSecret"]]: - """ - Loops through the Operation's kubernetes secrets to strip whitespace - and re-format as a tuple. - - :return: a list of KubernetesSecret objects with env var name and valid secret name and key - """ - valid_secrets = [] - if secrets and isinstance(secrets, KeyValueList): - for env_var_name, secret in secrets.to_dict().items(): - secret_name, secret_key = secret.split(":", 1) - - # Create a KubernetesSecret class instance and add to list - valid_secrets.append(KubernetesSecret(env_var_name, secret_name.strip(), secret_key.strip())) - - return valid_secrets - class GenericOperation(Operation): """ @@ -580,3 +534,17 @@ class KubernetesSecret: env_var: str name: str key: str + + +class DataClassJSONEncoder(json.JSONEncoder): + """ + A JSON Encoder class to prevent errors during serialization of dataclasses. + """ + + def default(self, o): + """ + Render dataclass content as dict + """ + if dataclasses.is_dataclass(o): + return dataclasses.asdict(o) + return super().default(o) diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index 8b25976ae..1ad296bb5 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -24,9 +24,12 @@ from jinja2 import PackageLoader from elyra.pipeline.pipeline import KeyValueList +from elyra.pipeline.pipeline import KubernetesSecret from elyra.pipeline.pipeline import Operation +from elyra.pipeline.pipeline import VolumeMount from elyra.pipeline.pipeline_constants import ENV_VARIABLES from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.pipeline_constants import PIPELINE_DEFAULTS from elyra.pipeline.pipeline_constants import PIPELINE_META_PROPERTIES @@ -326,6 +329,20 @@ def get_all_component_parameters(self) -> Dict[str, Any]: """ return self._node["app_data"]["component_parameters"] + def kv_properties_not_converted(self, kv_properties: List[str]) -> bool: + """ + TODO + """ + for kv_property in kv_properties: + value = self.get_component_parameter(kv_property) + if not value: + continue + + if isinstance(value, KeyValueList): + return False + + return True + def convert_kv_properties(self, kv_properties: List[str]): """ Convert node-level list properties that have been identified as sets of @@ -346,7 +363,9 @@ def remove_env_vars_with_matching_secrets(self): In the case of a matching key between env vars and kubernetes secrets, prefer the Kubernetes Secret and remove the matching env var """ - if self.get_component_parameter(ENV_VARIABLES) and self.get_component_parameter(KUBERNETES_SECRETS): + env_vars = self.get_component_parameter(ENV_VARIABLES) + secrets = self.get_component_parameter(KUBERNETES_SECRETS) + if env_vars and isinstance(env_vars, KeyValueList) and secrets and isinstance(secrets, KeyValueList): new_list = KeyValueList.difference( minuend=self.get_component_parameter(ENV_VARIABLES), subtrahend=self.get_component_parameter(KUBERNETES_SECRETS), @@ -354,6 +373,36 @@ def remove_env_vars_with_matching_secrets(self): if new_list: self.set_component_parameter(ENV_VARIABLES, new_list) + def convert_data_class_properties(self): + """ + TODO + """ + volume_mounts = self.get_component_parameter(MOUNTED_VOLUMES) + if volume_mounts and isinstance(volume_mounts, KeyValueList): + volume_objects = [] + for mount_path, pvc_name in volume_mounts.to_dict().items(): + formatted_mount_path = f"/{mount_path.strip('/')}" + + # Create a VolumeMount class instance and add to list + volume_objects.append(VolumeMount(formatted_mount_path, pvc_name)) + + self.set_component_parameter(MOUNTED_VOLUMES, volume_objects) + + secrets = self.get_component_parameter(KUBERNETES_SECRETS) + if secrets and isinstance(secrets, KeyValueList): + secret_objects = [] + for env_var_name, secret in secrets.to_dict().items(): + secret_name, *optional_key = secret.split(":", 1) + + secret_key = "" + if optional_key: + secret_key = optional_key[0].strip() + + # Create a KubernetesSecret class instance and add to list + secret_objects.append(KubernetesSecret(env_var_name, secret_name.strip(), secret_key)) + + self.set_component_parameter(KUBERNETES_SECRETS, secret_objects) + class PipelineDefinition(object): """ @@ -536,8 +585,9 @@ def propagate_pipeline_default_properties(self): if not Operation.is_generic_operation(node.op): continue - # Convert any key-value list node properties to the KeyValueList type - node.convert_kv_properties(kv_properties) + # Convert any key-value list node properties to the KeyValueList type if not done already + if node.kv_properties_not_converted(kv_properties): + node.convert_kv_properties(kv_properties) for property_name, pipeline_default_value in pipeline_default_properties.items(): if not pipeline_default_value: @@ -548,15 +598,15 @@ def propagate_pipeline_default_properties(self): node.set_component_parameter(property_name, pipeline_default_value) continue - if isinstance(pipeline_default_value, KeyValueList): - if not isinstance(node_value, KeyValueList): - raise TypeError(f"The value of node property '{property_name}' is not of type KeyValueList") - merged_list: KeyValueList = KeyValueList.merge(node_value, pipeline_default_value) + if isinstance(pipeline_default_value, KeyValueList) and isinstance(node_value, KeyValueList): + merged_list = KeyValueList.merge(node_value, pipeline_default_value) node.set_component_parameter(property_name, merged_list) if self.primary_pipeline.runtime_config != "local": node.remove_env_vars_with_matching_secrets() + node.convert_data_class_properties() + def is_valid(self) -> bool: """ Represents whether or not the pipeline structure is valid diff --git a/elyra/pipeline/validation.py b/elyra/pipeline/validation.py index 221c95acd..da275acd4 100644 --- a/elyra/pipeline/validation.py +++ b/elyra/pipeline/validation.py @@ -30,10 +30,13 @@ from elyra.metadata.schemaspaces import Runtimes from elyra.pipeline.component import Component from elyra.pipeline.component_catalog import ComponentCache +from elyra.pipeline.pipeline import DataClassJSONEncoder from elyra.pipeline.pipeline import KeyValueList +from elyra.pipeline.pipeline import KubernetesSecret from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import PIPELINE_CURRENT_SCHEMA from elyra.pipeline.pipeline import PIPELINE_CURRENT_VERSION +from elyra.pipeline.pipeline import VolumeMount from elyra.pipeline.pipeline_constants import ENV_VARIABLES from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES @@ -615,7 +618,7 @@ def _validate_resource_value( ) def _validate_mounted_volumes( - self, node_id: str, node_label: str, volumes: KeyValueList, response: ValidationResponse + self, node_id: str, node_label: str, volumes: List[VolumeMount], response: ValidationResponse ) -> None: """ Checks the format of mounted volumes to ensure they're in the correct form @@ -625,24 +628,23 @@ def _validate_mounted_volumes( :param volumes: a KeyValueList of volumes to check :param response: ValidationResponse containing the issue list to be updated """ - if volumes and isinstance(volumes, KeyValueList): - for mount_path, pvc_name in volumes.to_dict().items(): - # Ensure the PVC name is syntactically a valid Kubernetes resource name - if not is_valid_kubernetes_resource_name(pvc_name): - response.add_message( - severity=ValidationSeverity.Error, - message_type="invalidVolumeMount", - message=f"PVC name '{pvc_name}' is not a valid Kubernetes resource name.", - data={ - "nodeID": node_id, - "nodeName": node_label, - "propertyName": MOUNTED_VOLUMES, - "value": KeyValueList.to_str(mount_path, pvc_name), - }, - ) + for volume in volumes: + # Ensure the PVC name is syntactically a valid Kubernetes resource name + if not is_valid_kubernetes_resource_name(volume.pvc_name): + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidVolumeMount", + message=f"PVC name '{volume.pvc_name}' is not a valid Kubernetes resource name.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": MOUNTED_VOLUMES, + "value": KeyValueList.to_str(volume.path, volume.pvc_name), + }, + ) def _validate_kubernetes_secrets( - self, node_id: str, node_label: str, secrets: KeyValueList, response: ValidationResponse + self, node_id: str, node_label: str, secrets: List[KubernetesSecret], response: ValidationResponse ) -> None: """ Checks the format of Kubernetes secrets to ensure they're in the correct form @@ -652,49 +654,48 @@ def _validate_kubernetes_secrets( :param secrets: a KeyValueList of secrets to check :param response: ValidationResponse containing the issue list to be updated """ - if secrets and isinstance(secrets, KeyValueList): - for env_var_name, secret in secrets.to_dict().items(): - secret_tuple = secret.split(":", 1) - if len(secret_tuple) != 2: - response.add_message( - severity=ValidationSeverity.Error, - message_type="invalidKubernetesSecret", - message="Property has an improperly formatted representation of secret name and key.", - data={ - "nodeID": node_id, - "nodeName": node_label, - "propertyName": KUBERNETES_SECRETS, - "value": KeyValueList.to_str(env_var_name, secret), - }, - ) - continue - secret_name, secret_key = secret_tuple[0].strip(), secret_tuple[1].strip() - # Ensure the secret name is syntactically a valid Kubernetes resource name - if not is_valid_kubernetes_resource_name(secret_name): - response.add_message( - severity=ValidationSeverity.Error, - message_type="invalidKubernetesSecret", - message=f"Secret name '{secret_name}' is not a valid Kubernetes resource name.", - data={ - "nodeID": node_id, - "nodeName": node_label, - "propertyName": KUBERNETES_SECRETS, - "value": KeyValueList.to_str(env_var_name, secret), - }, - ) - # Ensure the secret key is a syntactically valid Kubernetes key - if not is_valid_kubernetes_key(secret_key): - response.add_message( - severity=ValidationSeverity.Error, - message_type="invalidKubernetesSecret", - message=f"Key '{secret_key}' is not a valid Kubernetes secret key.", - data={ - "nodeID": node_id, - "nodeName": node_label, - "propertyName": KUBERNETES_SECRETS, - "value": KeyValueList.to_str(env_var_name, secret), - }, - ) + for secret in secrets: + if not secret.name or not secret.key: + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidKubernetesSecret", + message=f"Environment variable '{secret.env_var}' has an improperly formatted representation of " + f"secret name and key.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": KUBERNETES_SECRETS, + "value": KeyValueList.to_str(secret.env_var, f"{(secret.name or '')}:{(secret.key or '')}"), + }, + ) + continue + + # Ensure the secret name is syntactically a valid Kubernetes resource name + if not is_valid_kubernetes_resource_name(secret.name): + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidKubernetesSecret", + message=f"Secret name '{secret.name}' is not a valid Kubernetes resource name.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": KUBERNETES_SECRETS, + "value": KeyValueList.to_str(secret.env_var, f"{secret.name}:{secret.key}"), + }, + ) + # Ensure the secret key is a syntactically valid Kubernetes key + if not is_valid_kubernetes_key(secret.key): + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidKubernetesSecret", + message=f"Key '{secret.key}' is not a valid Kubernetes secret key.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": KUBERNETES_SECRETS, + "value": KeyValueList.to_str(secret.env_var, f"{secret.name}:{secret.key}"), + }, + ) def _validate_filepath( self, @@ -839,7 +840,7 @@ def _validate_pipeline_graph(self, pipeline: dict, response: ValidationResponse) :param response: ValidationResponse containing the issue list to be updated :param pipeline: A dictionary describing the pipeline """ - pipeline_json = json.loads(json.dumps(pipeline)) + pipeline_json = json.loads(json.dumps(pipeline, cls=DataClassJSONEncoder)) graph = nx.DiGraph() @@ -890,7 +891,7 @@ def _get_pipeline_id(self, pipeline: dict, node_id: str) -> Optional[str]: :param node_id: the node ID of the node :return: the pipeline ID of where the node is located """ - pipeline_json = json.loads(json.dumps(pipeline)) + pipeline_json = json.loads(json.dumps(pipeline, cls=DataClassJSONEncoder)) for single_pipeline in pipeline_json["pipelines"]: node_list = single_pipeline["nodes"] for node in node_list: @@ -931,7 +932,7 @@ def _get_node_names(self, pipeline: dict, node_id_list: list) -> List: :return: a string representing the name of the node """ node_name_list = [] - pipeline_json = json.loads(json.dumps(pipeline)) + pipeline_json = json.loads(json.dumps(pipeline, cls=DataClassJSONEncoder)) for node_id in node_id_list: found = False for single_pipeline in pipeline_json["pipelines"]: @@ -957,7 +958,7 @@ def _get_node_labels(self, pipeline: dict, link_ids: List[str]) -> Optional[List if link_ids is None: return None - pipeline_json = json.loads(json.dumps(pipeline)) + pipeline_json = json.loads(json.dumps(pipeline, cls=DataClassJSONEncoder)) node_labels = [] for link_id in link_ids: for single_pipeline in pipeline_json["pipelines"]: diff --git a/elyra/tests/pipeline/test_processor_base.py b/elyra/tests/pipeline/test_processor_base.py index cd924bd8a..23684ed19 100644 --- a/elyra/tests/pipeline/test_processor_base.py +++ b/elyra/tests/pipeline/test_processor_base.py @@ -18,10 +18,6 @@ import pytest from elyra.pipeline.parser import PipelineParser -from elyra.pipeline.pipeline import GenericOperation -from elyra.pipeline.pipeline import KeyValueList -from elyra.pipeline.pipeline import KubernetesSecret -from elyra.pipeline.pipeline import VolumeMount from elyra.pipeline.processor import RuntimePipelineProcessor from elyra.tests.pipeline.test_pipeline_parser import _read_pipeline_resource @@ -49,34 +45,3 @@ def sample_metadata(): "engine": "Argo", "tags": [], } - - -def test_get_volume_mounts(): - mounted_volumes = KeyValueList( - [ - "/mount/test=rwx-test-claim", # valid - "/mount/test_two=second-claim", # valid - "/mount/test_three=", # invalid: no value after separator ('=') - ] - ) - parsed_volumes_list = GenericOperation.get_valid_volume_mounts(volume_mounts=mounted_volumes) - assert parsed_volumes_list == [ - VolumeMount("/mount/test", "rwx-test-claim"), - VolumeMount("/mount/test_two", "second-claim"), - ] - - -def test_get_kubernetes_secrets(): - kubernetes_secrets = KeyValueList( - [ - "ENV_VAR1=test-secret:test-key1", # valid - "ENV_VAR2=test-secret:test-key2", # valid - "ENV_VAR3=", # invalid: no value after separator ('=') - ] - ) - - parsed_secrets_list = GenericOperation.get_valid_kubernetes_secrets(secrets=kubernetes_secrets) - assert parsed_secrets_list == [ - KubernetesSecret("ENV_VAR1", "test-secret", "test-key1"), - KubernetesSecret("ENV_VAR2", "test-secret", "test-key2"), - ] diff --git a/elyra/tests/pipeline/test_validation.py b/elyra/tests/pipeline/test_validation.py index 28f22f9ab..453d962e1 100644 --- a/elyra/tests/pipeline/test_validation.py +++ b/elyra/tests/pipeline/test_validation.py @@ -20,8 +20,9 @@ from conftest import KFP_COMPONENT_CACHE_INSTANCE import pytest -from elyra.pipeline.pipeline import KeyValueList +from elyra.pipeline.pipeline import KubernetesSecret from elyra.pipeline.pipeline import PIPELINE_CURRENT_VERSION +from elyra.pipeline.pipeline import VolumeMount from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.pipeline.pipeline_definition import PipelineDefinition @@ -418,13 +419,11 @@ def test_invalid_node_property_env_var(validation_manager): def test_invalid_node_property_volumes(validation_manager): response = ValidationResponse() node = {"id": "test-id", "app_data": {"label": "test"}} - volumes = KeyValueList( - [ - "/mount/test=rwx-test-claim", # valid - "/mount/test_two=second-claim", # valid - "/mount/test_four=second#claim", # invalid pvc name - ] - ) + volumes = [ + VolumeMount("/mount/test", "rwx-test-claim"), # valid + VolumeMount("/mount/test_two", "second-claim"), # valid + VolumeMount("/mount/test_four", "second#claim"), # invalid pvc name + ] validation_manager._validate_mounted_volumes( node_id=node["id"], node_label=node["app_data"]["label"], volumes=volumes, response=response ) @@ -439,15 +438,13 @@ def test_invalid_node_property_volumes(validation_manager): def test_invalid_node_property_secrets(validation_manager): response = ValidationResponse() node = {"id": "test-id", "app_data": {"label": "test"}} - secrets = KeyValueList( - [ - "ENV_VAR1=test-secret:test-key1", # valid - "ENV_VAR2=test-secret:test-key2", # valid - "ENV_VAR3=test-secret", # invalid: improperly formatted representation of secret name/key - "ENV_VAR5=test%secret:test-key", # invalid: not a valid Kubernetes resource name - "ENV_VAR6=test-secret:test$key2", # invalid: not a valid Kubernetes secret key - ] - ) + secrets = [ + KubernetesSecret("ENV_VAR1", "test-secret", "test-key1"), # valid + KubernetesSecret("ENV_VAR2", "test-secret", "test-key2"), # valid + KubernetesSecret("ENV_VAR3", "test-secret", ""), # invalid: improper format of secret name/key + KubernetesSecret("ENV_VAR5", "test%secret", "test-key"), # invalid: not a valid Kubernetes resource name + KubernetesSecret("ENV_VAR6", "test-secret", "test$key2"), # invalid: not a valid Kubernetes secret key + ] validation_manager._validate_kubernetes_secrets( node_id=node["id"], node_label=node["app_data"]["label"], secrets=secrets, response=response ) From e658423872dec1acd177724544923c3caf3a6bdd Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 11:19:09 -0500 Subject: [PATCH 16/27] Refactor convert_kv_properties to return if already converted --- elyra/pipeline/pipeline_definition.py | 31 ++++++++++----------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index 1ad296bb5..f1f06f428 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -329,9 +329,12 @@ def get_all_component_parameters(self) -> Dict[str, Any]: """ return self._node["app_data"]["component_parameters"] - def kv_properties_not_converted(self, kv_properties: List[str]) -> bool: + def convert_kv_properties(self, kv_properties: List[str]): """ - TODO + Convert node-level list properties that have been identified as sets of + key-value pairs from a plain list type to the KeyValueList type. If any + k-v property has already been converted to a KeyValueList, all k-v + properties are assumed to have already been converted. """ for kv_property in kv_properties: value = self.get_component_parameter(kv_property) @@ -339,29 +342,16 @@ def kv_properties_not_converted(self, kv_properties: List[str]) -> bool: continue if isinstance(value, KeyValueList): - return False - - return True - - def convert_kv_properties(self, kv_properties: List[str]): - """ - Convert node-level list properties that have been identified as sets of - key-value pairs from a plain list type to the KeyValueList type. - """ - for property_name, value in self.get_all_component_parameters().items(): - if property_name not in kv_properties: - continue - - if not value: - continue + # Any KeyValueList instance implies all relevant properties have already been converted + return # Convert plain list to KeyValueList - self.set_component_parameter(property_name, KeyValueList(value)) + self.set_component_parameter(kv_property, KeyValueList(value)) def remove_env_vars_with_matching_secrets(self): """ In the case of a matching key between env vars and kubernetes secrets, - prefer the Kubernetes Secret and remove the matching env var + prefer the Kubernetes Secret and remove the matching env var. """ env_vars = self.get_component_parameter(ENV_VARIABLES) secrets = self.get_component_parameter(KUBERNETES_SECRETS) @@ -375,7 +365,8 @@ def remove_env_vars_with_matching_secrets(self): def convert_data_class_properties(self): """ - TODO + Convert select node-level list properties to their corresponding dataclass + object type. No validation is performed. """ volume_mounts = self.get_component_parameter(MOUNTED_VOLUMES) if volume_mounts and isinstance(volume_mounts, KeyValueList): From a320e1626421418ec133e04e24d788546113dc71 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 11:21:46 -0500 Subject: [PATCH 17/27] Update property template descriptions --- elyra/pipeline/pipeline_definition.py | 3 +-- elyra/templates/components/generic_properties_template.jinja2 | 2 +- elyra/templates/pipeline/pipeline_properties_template.jinja2 | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index f1f06f428..3a1c63b56 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -577,8 +577,7 @@ def propagate_pipeline_default_properties(self): continue # Convert any key-value list node properties to the KeyValueList type if not done already - if node.kv_properties_not_converted(kv_properties): - node.convert_kv_properties(kv_properties) + node.convert_kv_properties(kv_properties) for property_name, pipeline_default_value in pipeline_default_properties.items(): if not pipeline_default_value: diff --git a/elyra/templates/components/generic_properties_template.jinja2 b/elyra/templates/components/generic_properties_template.jinja2 index b2e8e3c39..0809021cd 100644 --- a/elyra/templates/components/generic_properties_template.jinja2 +++ b/elyra/templates/components/generic_properties_template.jinja2 @@ -205,7 +205,7 @@ "placement": "on_panel" }, "data": { - "placeholder": "env_var=secret_name:secret_key", + "placeholder": "env_var=secret-name:secret-key", "keyValueEntries": true } }, diff --git a/elyra/templates/pipeline/pipeline_properties_template.jinja2 b/elyra/templates/pipeline/pipeline_properties_template.jinja2 index 9b4c4ef55..7afa1c2e0 100644 --- a/elyra/templates/pipeline/pipeline_properties_template.jinja2 +++ b/elyra/templates/pipeline/pipeline_properties_template.jinja2 @@ -111,11 +111,11 @@ "default": "Kubernetes Secrets" }, "description": { - "default": "Kubernetes secrets to make available as environment variables to all nodes.", + "default": "Kubernetes secrets to make available as environment variables to this node. The secret name and key given must be present in the Kubernetes namespace where the nodes are executed or the pipeline will not run.", "placement": "on_panel" }, "data": { - "placeholder": "env_var=secret_name:secret_key", + "placeholder": "env_var=secret-name:secret-key", "keyValueEntries": true } }, From 5ac3f6ea6fe2083247a0a336b1c5259240ce8f67 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 11:25:59 -0500 Subject: [PATCH 18/27] Update docs with info on secrets --- docs/source/user_guide/pipelines.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/source/user_guide/pipelines.md b/docs/source/user_guide/pipelines.md index dc46b980a..68fd15fc8 100644 --- a/docs/source/user_guide/pipelines.md +++ b/docs/source/user_guide/pipelines.md @@ -91,6 +91,12 @@ The [tutorials](/getting_started/tutorials.md) provide comprehensive step-by-ste - A list of [Persistent Volume Claims](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) (PVC) to be mounted into the container that executes the Jupyter notebook or script. Format: `/mnt/path=existing-pvc-name`. Entries that are empty (`/mnt/path=`) or malformed are ignored. - The referenced PVCs must exist in the Kubernetes namespace where the generic pipeline nodes are executed. - Data volumes are not mounted when the pipeline is executed locally. + - **Kubernetes secrets** + - A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. + - The referenced secrets must exist in the Kubernetes namespace where the generic pipeline nodes are executed. + - Secrets are ignored when the pipeline is executed locally + - For remote execution, in the case where a node has a Kubernetes secret and an environment variable with the same name, the Kubernetes secret will be preferred and the corresponding environment variable will be removed from the list. + #### Adding nodes @@ -102,11 +108,11 @@ The [tutorials](/getting_started/tutorials.md) provide comprehensive step-by-ste ![Add generic components from file browser](../images/user_guide/pipelines/add-components-from-file-browser.gif) -1. Define the dependencies between nodes by connecting them, essentially creating an execution graph. +2. Define the dependencies between nodes by connecting them, essentially creating an execution graph. ![Connect nodes](../images/user_guide/pipelines/connect-nodes.gif) -1. Define the runtime properties for each node. Highlight a node, right click, and select `Open Properties`. Runtime properties configure a component and govern its execution behavior. +3. Define the runtime properties for each node. Highlight a node, right click, and select `Open Properties`. Runtime properties configure a component and govern its execution behavior. ![Configure node](../images/user_guide/pipelines/configure-node.gif) @@ -140,11 +146,16 @@ The [tutorials](/getting_started/tutorials.md) provide comprehensive step-by-ste - Data volumes are not mounted when the pipeline is executed locally. - Example: `/mnt/vol1=data-pvc` -1. Associate each node with a comment to document its purpose. + **Kubernetes Secrets** + - Optional. A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. The referenced secrets must exist in the Kubernetes namespace where the generic pipeline nodes are executed. + - Secrets are ignored when the pipeline is executed locally. For remote execution, in the case where a node has a Kubernetes secret and an environment variable with the same name, the Kubernetes secret will be preferred and the corresponding environment variable will be removed from the list. + - Example: `ENV_VAR=secret-name:secret-key` + +5. Associate each node with a comment to document its purpose. ![Add comment to node](../images/user_guide/pipelines/add-comment-to-node.gif) -1. Save the pipeline file. +6. Save the pipeline file. Note: You can rename the pipeline file in the JupyterLab _File Browser_. From 7d28e1190d611eb6a30b82105c8e5cd9d982adf5 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 11:39:12 -0500 Subject: [PATCH 19/27] Add info about validation of secrets and volumes to docs --- docs/source/user_guide/pipelines.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/source/user_guide/pipelines.md b/docs/source/user_guide/pipelines.md index 68fd15fc8..152b82728 100644 --- a/docs/source/user_guide/pipelines.md +++ b/docs/source/user_guide/pipelines.md @@ -88,14 +88,13 @@ The [tutorials](/getting_started/tutorials.md) provide comprehensive step-by-ste - **Environment variables** - A list of environment variables to be set in the container that executes the Jupyter notebook or script. Format: `ENV_VAR_NAME=value`. Entries that are empty (`ENV_VAR_NAME=`) or malformed are ignored. - **Data volume** - - A list of [Persistent Volume Claims](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) (PVC) to be mounted into the container that executes the Jupyter notebook or script. Format: `/mnt/path=existing-pvc-name`. Entries that are empty (`/mnt/path=`) or malformed are ignored. + - A list of [Persistent Volume Claims](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) (PVC) to be mounted into the container that executes the Jupyter notebook or script. Format: `/mnt/path=existing-pvc-name`. Entries that are empty (`/mnt/path=`) or malformed are ignored. Entries with a PVC name considered to be an [invalid Kubernetes resource name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names) will raise a validation error after pipeline submission or export. - The referenced PVCs must exist in the Kubernetes namespace where the generic pipeline nodes are executed. - Data volumes are not mounted when the pipeline is executed locally. - **Kubernetes secrets** - - A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. + - A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. Entries with a secret name considered to be an [invalid Kubernetes resource name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names) or with [an invalid secret key](https://kubernetes.io/docs/concepts/configuration/secret/#restriction-names-data) will raise a validation error after pipeline submission or export. - The referenced secrets must exist in the Kubernetes namespace where the generic pipeline nodes are executed. - - Secrets are ignored when the pipeline is executed locally - - For remote execution, in the case where a node has a Kubernetes secret and an environment variable with the same name, the Kubernetes secret will be preferred and the corresponding environment variable will be removed from the list. + - Secrets are ignored when the pipeline is executed locally. For remote execution, in the case where a node has a Kubernetes secret and an environment variable with the same name, the Kubernetes secret will be preferred and the corresponding environment variable will be removed from the list. #### Adding nodes @@ -142,12 +141,12 @@ The [tutorials](/getting_started/tutorials.md) provide comprehensive step-by-ste - Example: `data/*.csv` **Data Volumes** - - Optional. A list of [Persistent Volume Claims](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) (PVC) to be mounted into the container that executes the Jupyter notebook or script. Format: `/mnt/path=existing-pvc-name`. Entries that are empty (`/mnt/path=`) or malformed are ignored. The referenced PVCs must exist in the Kubernetes namespace where the node is executed. + - Optional. A list of [Persistent Volume Claims](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) (PVC) to be mounted into the container that executes the Jupyter notebook or script. Format: `/mnt/path=existing-pvc-name`. Entries that are empty (`/mnt/path=`) or malformed are ignored. Entries with a PVC name considered to be an [invalid Kubernetes resource name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names) will raise a validation error after pipeline submission or export. The referenced PVCs must exist in the Kubernetes namespace where the node is executed. - Data volumes are not mounted when the pipeline is executed locally. - Example: `/mnt/vol1=data-pvc` **Kubernetes Secrets** - - Optional. A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. The referenced secrets must exist in the Kubernetes namespace where the generic pipeline nodes are executed. + - Optional. A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. Entries with a secret name considered to be an [invalid Kubernetes resource name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names) or with [an invalid secret key](https://kubernetes.io/docs/concepts/configuration/secret/#restriction-names-data) will raise a validation error after pipeline submission or export. The referenced secrets must exist in the Kubernetes namespace where the generic pipeline nodes are executed. - Secrets are ignored when the pipeline is executed locally. For remote execution, in the case where a node has a Kubernetes secret and an environment variable with the same name, the Kubernetes secret will be preferred and the corresponding environment variable will be removed from the list. - Example: `ENV_VAR=secret-name:secret-key` From 121cd857fff72b8d9f543f9a8035105d365ea28c Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 11:55:56 -0500 Subject: [PATCH 20/27] Update properties json resource to reflect template update --- elyra/tests/pipeline/resources/properties.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elyra/tests/pipeline/resources/properties.json b/elyra/tests/pipeline/resources/properties.json index 07e85957c..06f5f75f8 100644 --- a/elyra/tests/pipeline/resources/properties.json +++ b/elyra/tests/pipeline/resources/properties.json @@ -205,7 +205,7 @@ "placement": "on_panel" }, "data": { - "placeholder": "env_var=secret_name:secret_key", + "placeholder": "env_var=secret-name:secret-key", "keyValueEntries": true } }, From 798a419b8a76ac175f23895087def77a33490f49 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 15:41:12 -0500 Subject: [PATCH 21/27] Address code review --- elyra/pipeline/pipeline_definition.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index 3a1c63b56..73edeb4da 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -318,7 +318,7 @@ def set_component_parameter(self, key: str, value: Any): if not key: raise ValueError("Key is required") - if not value: + if value is None: raise ValueError("Value is required") self._node["app_data"]["component_parameters"][key] = value @@ -355,11 +355,8 @@ def remove_env_vars_with_matching_secrets(self): """ env_vars = self.get_component_parameter(ENV_VARIABLES) secrets = self.get_component_parameter(KUBERNETES_SECRETS) - if env_vars and isinstance(env_vars, KeyValueList) and secrets and isinstance(secrets, KeyValueList): - new_list = KeyValueList.difference( - minuend=self.get_component_parameter(ENV_VARIABLES), - subtrahend=self.get_component_parameter(KUBERNETES_SECRETS), - ) + if isinstance(env_vars, KeyValueList) and isinstance(secrets, KeyValueList): + new_list = KeyValueList.difference(minuend=env_vars, subtrahend=secrets) if new_list: self.set_component_parameter(ENV_VARIABLES, new_list) From f24ca9b9ec6c30bd0e147e88e84a45b5aabfb4cb Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 16:11:23 -0500 Subject: [PATCH 22/27] Address documentation review --- docs/source/user_guide/best-practices-file-based-nodes.md | 7 +++++++ docs/source/user_guide/pipelines.md | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/source/user_guide/best-practices-file-based-nodes.md b/docs/source/user_guide/best-practices-file-based-nodes.md index 0b653a119..165250f29 100644 --- a/docs/source/user_guide/best-practices-file-based-nodes.md +++ b/docs/source/user_guide/best-practices-file-based-nodes.md @@ -85,6 +85,8 @@ The Visual Pipeline Editor can detect which environment variables notebooks/scri Refer to the next section for a list of proprietary environment variables that cannot be modified using the node properties settings. +If using environment variables containing sensitive information, [Kubernetes secrets](TODO) should be used instead. + ### Proprietary environment variables Elyra makes a set of proprietary environment variables available to notebooks and scripts during execution. Unless indicated otherwise, these variables are defined in all runtime environments. @@ -104,3 +106,8 @@ notebook or script is executed in: - `local` - JupyterLab - `kfp` - Kubeflow Pipelines - `airflow` - Apache Airflow + +### Kubernetes Secrets +The Kubernetes Secrets property can be used to associate environment variable names with secrets, preventing sensitive information from being exposed in the pipeline file, the pipeline editor, and the runtime environment. As with static environment variables, secret-based environment variable values can be set on an individual node and/or defined as pipeline default values and shared across nodes belonging to the same pipeline. A default value can also be overridden for a particular node by redefining the secret for a given variable name in the node properties. + +Secrets are ignored when the pipeline is executed locally. For remote execution, if an environment variable was assigned both a static value (via the 'Environment Variables' property) and a Kubernetes secret value, the secret's value is used. diff --git a/docs/source/user_guide/pipelines.md b/docs/source/user_guide/pipelines.md index 152b82728..21ba0b626 100644 --- a/docs/source/user_guide/pipelines.md +++ b/docs/source/user_guide/pipelines.md @@ -87,14 +87,14 @@ The [tutorials](/getting_started/tutorials.md) provide comprehensive step-by-ste - The value is ignored when the pipeline is executed locally. - **Environment variables** - A list of environment variables to be set in the container that executes the Jupyter notebook or script. Format: `ENV_VAR_NAME=value`. Entries that are empty (`ENV_VAR_NAME=`) or malformed are ignored. - - **Data volume** + - **Data volumes** - A list of [Persistent Volume Claims](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) (PVC) to be mounted into the container that executes the Jupyter notebook or script. Format: `/mnt/path=existing-pvc-name`. Entries that are empty (`/mnt/path=`) or malformed are ignored. Entries with a PVC name considered to be an [invalid Kubernetes resource name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names) will raise a validation error after pipeline submission or export. - The referenced PVCs must exist in the Kubernetes namespace where the generic pipeline nodes are executed. - Data volumes are not mounted when the pipeline is executed locally. - **Kubernetes secrets** - A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. Entries with a secret name considered to be an [invalid Kubernetes resource name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names) or with [an invalid secret key](https://kubernetes.io/docs/concepts/configuration/secret/#restriction-names-data) will raise a validation error after pipeline submission or export. - The referenced secrets must exist in the Kubernetes namespace where the generic pipeline nodes are executed. - - Secrets are ignored when the pipeline is executed locally. For remote execution, in the case where a node has a Kubernetes secret and an environment variable with the same name, the Kubernetes secret will be preferred and the corresponding environment variable will be removed from the list. + - Secrets are ignored when the pipeline is executed locally. For remote execution, if an environment variable was assigned both a static value (via the 'Environment Variables' property) and a Kubernetes secret value, the secret's value is used. #### Adding nodes @@ -147,7 +147,7 @@ The [tutorials](/getting_started/tutorials.md) provide comprehensive step-by-ste **Kubernetes Secrets** - Optional. A list of [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/) to be accessed as environment variables during Jupyter notebook or script execution. Format: `ENV_VAR=secret-name:secret-key`. Entries that are empty (`ENV_VAR=`) or malformed are ignored. Entries with a secret name considered to be an [invalid Kubernetes resource name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names) or with [an invalid secret key](https://kubernetes.io/docs/concepts/configuration/secret/#restriction-names-data) will raise a validation error after pipeline submission or export. The referenced secrets must exist in the Kubernetes namespace where the generic pipeline nodes are executed. - - Secrets are ignored when the pipeline is executed locally. For remote execution, in the case where a node has a Kubernetes secret and an environment variable with the same name, the Kubernetes secret will be preferred and the corresponding environment variable will be removed from the list. + - Secrets are ignored when the pipeline is executed locally. For remote execution, if an environment variable was assigned both a static value (via the 'Environment Variables' property) and a Kubernetes secret value, the secret's value is used. - Example: `ENV_VAR=secret-name:secret-key` 5. Associate each node with a comment to document its purpose. From 50a228fb058a224cac83fd7b1cc42aeadd42a363 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 16:22:10 -0500 Subject: [PATCH 23/27] Fix lint --- docs/source/user_guide/best-practices-file-based-nodes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user_guide/best-practices-file-based-nodes.md b/docs/source/user_guide/best-practices-file-based-nodes.md index 165250f29..1142c786b 100644 --- a/docs/source/user_guide/best-practices-file-based-nodes.md +++ b/docs/source/user_guide/best-practices-file-based-nodes.md @@ -85,7 +85,7 @@ The Visual Pipeline Editor can detect which environment variables notebooks/scri Refer to the next section for a list of proprietary environment variables that cannot be modified using the node properties settings. -If using environment variables containing sensitive information, [Kubernetes secrets](TODO) should be used instead. +If using environment variables containing sensitive information, [Kubernetes secrets](#kubernetes-secrets) should be used instead. ### Proprietary environment variables From c809f587f5903d9b533bf3982d73f8f4dc5989bf Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Tue, 24 May 2022 16:57:08 -0500 Subject: [PATCH 24/27] Remove test_processor_base --- elyra/tests/pipeline/test_processor_base.py | 47 --------------------- 1 file changed, 47 deletions(-) delete mode 100644 elyra/tests/pipeline/test_processor_base.py diff --git a/elyra/tests/pipeline/test_processor_base.py b/elyra/tests/pipeline/test_processor_base.py deleted file mode 100644 index 23684ed19..000000000 --- a/elyra/tests/pipeline/test_processor_base.py +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright 2018-2022 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os - -import pytest - -from elyra.pipeline.parser import PipelineParser -from elyra.pipeline.processor import RuntimePipelineProcessor -from elyra.tests.pipeline.test_pipeline_parser import _read_pipeline_resource - - -@pytest.fixture -def runtime_processor(setup_factory_data): - processor = RuntimePipelineProcessor(os.getcwd()) - return processor - - -@pytest.fixture -def parsed_pipeline(request): - pipeline_resource = _read_pipeline_resource(request.param) - return PipelineParser().parse(pipeline_json=pipeline_resource) - - -@pytest.fixture -def sample_metadata(): - return { - "api_endpoint": "http://examples.com:31737", - "cos_endpoint": "http://examples.com:31671", - "cos_username": "example", - "cos_password": "example123", - "cos_bucket": "test", - "engine": "Argo", - "tags": [], - } From dec4a8b9e294797b760c743df802a787f322d0a1 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Wed, 25 May 2022 09:57:03 -0500 Subject: [PATCH 25/27] Add test for env vars and secret duplicate keys --- elyra/pipeline/pipeline_definition.py | 3 +-- elyra/tests/pipeline/test_pipeline_definition.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/elyra/pipeline/pipeline_definition.py b/elyra/pipeline/pipeline_definition.py index 73edeb4da..92e71e586 100644 --- a/elyra/pipeline/pipeline_definition.py +++ b/elyra/pipeline/pipeline_definition.py @@ -357,8 +357,7 @@ def remove_env_vars_with_matching_secrets(self): secrets = self.get_component_parameter(KUBERNETES_SECRETS) if isinstance(env_vars, KeyValueList) and isinstance(secrets, KeyValueList): new_list = KeyValueList.difference(minuend=env_vars, subtrahend=secrets) - if new_list: - self.set_component_parameter(ENV_VARIABLES, new_list) + self.set_component_parameter(ENV_VARIABLES, new_list) def convert_data_class_properties(self): """ diff --git a/elyra/tests/pipeline/test_pipeline_definition.py b/elyra/tests/pipeline/test_pipeline_definition.py index 976b0c049..4dabd548a 100644 --- a/elyra/tests/pipeline/test_pipeline_definition.py +++ b/elyra/tests/pipeline/test_pipeline_definition.py @@ -19,6 +19,8 @@ from elyra.pipeline import pipeline_constants from elyra.pipeline.pipeline import KeyValueList +from elyra.pipeline.pipeline_constants import ENV_VARIABLES +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS from elyra.pipeline.pipeline_definition import PipelineDefinition from elyra.tests.pipeline.util import _read_pipeline_resource @@ -153,6 +155,19 @@ def test_propagate_pipeline_default_properties(monkeypatch): assert node.get_component_parameter(kv_test_property_name) == kv_list_correct +def test_remove_env_vars_with_matching_secrets(): + pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid_with_pipeline_default.json") + pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) + node = pipeline_definition.primary_pipeline.nodes.pop() + + # Set kubernetes_secret property to have all the same keys as those in the env_vars property + kubernetes_secrets = KeyValueList(["var1=name1:key1", "var2=name2:key2", "var3=name3:key3"]) + node.set_component_parameter(KUBERNETES_SECRETS, kubernetes_secrets) + + node.remove_env_vars_with_matching_secrets() + assert node.get_component_parameter(ENV_VARIABLES) == [] + + def _check_pipeline_correct_pipeline_name(): pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) From 2e6924bee6a34b3d82ee53eb98170df0b0c7bfa3 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Wed, 25 May 2022 11:38:37 -0500 Subject: [PATCH 26/27] Make documentation topic more action-oriented --- docs/source/user_guide/best-practices-file-based-nodes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user_guide/best-practices-file-based-nodes.md b/docs/source/user_guide/best-practices-file-based-nodes.md index 1142c786b..eeb9e9e61 100644 --- a/docs/source/user_guide/best-practices-file-based-nodes.md +++ b/docs/source/user_guide/best-practices-file-based-nodes.md @@ -85,7 +85,7 @@ The Visual Pipeline Editor can detect which environment variables notebooks/scri Refer to the next section for a list of proprietary environment variables that cannot be modified using the node properties settings. -If using environment variables containing sensitive information, [Kubernetes secrets](#kubernetes-secrets) should be used instead. +If using environment variables containing sensitive information, refer to the [Handling sensitive information topic](#handling-sensitive-information) for details. ### Proprietary environment variables @@ -107,7 +107,7 @@ notebook or script is executed in: - `kfp` - Kubeflow Pipelines - `airflow` - Apache Airflow -### Kubernetes Secrets +### Handling sensitive information The Kubernetes Secrets property can be used to associate environment variable names with secrets, preventing sensitive information from being exposed in the pipeline file, the pipeline editor, and the runtime environment. As with static environment variables, secret-based environment variable values can be set on an individual node and/or defined as pipeline default values and shared across nodes belonging to the same pipeline. A default value can also be overridden for a particular node by redefining the secret for a given variable name in the node properties. Secrets are ignored when the pipeline is executed locally. For remote execution, if an environment variable was assigned both a static value (via the 'Environment Variables' property) and a Kubernetes secret value, the secret's value is used. From 9ad4fff91a0e87b98a7060c9adaacaa84d501de0 Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Wed, 25 May 2022 12:22:52 -0500 Subject: [PATCH 27/27] Adjust import statements for dataclasses --- elyra/pipeline/pipeline.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 762e969e1..69cf44f48 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -13,8 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import dataclasses +from dataclasses import asdict as dataclass_asdict from dataclasses import dataclass +from dataclasses import is_dataclass import json import logging from logging import Logger @@ -545,6 +546,6 @@ def default(self, o): """ Render dataclass content as dict """ - if dataclasses.is_dataclass(o): - return dataclasses.asdict(o) + if is_dataclass(o): + return dataclass_asdict(o) return super().default(o)