diff --git a/docs/source/user_guide/pipelines.md b/docs/source/user_guide/pipelines.md index 73df99a0e..28fe6ada6 100644 --- a/docs/source/user_guide/pipelines.md +++ b/docs/source/user_guide/pipelines.md @@ -372,12 +372,16 @@ To export a pipeline from the Visual Pipeline Editor: #### Exporting a pipeline from the command line interface -Use the [`elyra-pipeline`](command-line-interface.html#working-with-pipelines) `export` command to export a pipeline to a runtime-specific format, such as YAML for Kubeflow Pipelines or Python DAG for Apache Airflow. +Use the [`elyra-pipeline`](command-line-interface.html#working-with-pipelines) `export` command to export a pipeline to a runtime-specific format: +- Kubeflow Pipelines: [Python DSL](https://v1-5-branch.kubeflow.org/docs/components/pipelines/sdk/build-pipeline/) or YAML +- Apache Airflow: Python DAG ```bash $ elyra-pipeline export a-notebook.pipeline --runtime-config kfp_dev_env --output /path/to/exported.yaml --overwrite ``` +By default, export produces YAML formatted output for Kubeflow Pipelines and ONLY Python DAGs for Apache Airflow. To choose a different format for Kubeflow Pipelines, specify the `--format` option. Supported values are `py` and `yaml` for Kubeflow Pipelines. + To learn more about supported parameters, run ```bash $ elyra-pipeline export --help diff --git a/elyra/cli/pipeline_app.py b/elyra/cli/pipeline_app.py index 6bfa58f02..e4517c5e1 100644 --- a/elyra/cli/pipeline_app.py +++ b/elyra/cli/pipeline_app.py @@ -670,10 +670,16 @@ def describe(json_option, pipeline_path): "--output", required=False, type=Path, - help="Exported file name (including optional path). Defaults to " " the current directory and the pipeline name.", + help="Exported file name (including optional path). Defaults to the current directory and the pipeline name.", +) +@click.option( + "--format", + required=False, + type=str, + help="File export format.", ) @click.option("--overwrite", is_flag=True, help="Overwrite output file if it already exists.") -def export(pipeline_path, runtime_config, output, overwrite): +def export(pipeline_path, runtime_config, output, format, overwrite): """ Export a pipeline to a runtime-specific format """ @@ -699,14 +705,20 @@ def export(pipeline_path, runtime_config, output, overwrite): param_hint="--runtime-config", ) + # Determine which export format(s) the runtime processor supports resources = RuntimeTypeResources.get_instance_by_type(RuntimeProcessorType.get_instance_by_name(runtime_type)) supported_export_formats = resources.get_export_extensions() if len(supported_export_formats) == 0: raise click.ClickException(f"Runtime type '{runtime_type}' does not support export.") - # If, in the future, a runtime supports multiple export output formats, - # the user can choose one. For now, choose the only option. - selected_export_format = supported_export_formats[0] + # Verify that the user selected a valid format. If none was specified, + # the first from the supported list is selected as default. + selected_export_format = (format or supported_export_formats[0]).lower() + if selected_export_format not in supported_export_formats: + raise click.BadParameter( + f"Valid export formats are {supported_export_formats}.", + param_hint="--format", + ) selected_export_format_suffix = f".{selected_export_format}" # generate output file name from the user-provided input diff --git a/elyra/kfp/operator.py b/elyra/kfp/operator.py deleted file mode 100644 index 36b26165b..000000000 --- a/elyra/kfp/operator.py +++ /dev/null @@ -1,374 +0,0 @@ -# -# Copyright 2018-2022 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import string -from typing import Dict -from typing import List -from typing import Optional - -from kfp.dsl import ContainerOp -from kfp.dsl import RUN_ID_PLACEHOLDER -from kubernetes.client.models import V1EmptyDirVolumeSource -from kubernetes.client.models import V1EnvVar -from kubernetes.client.models import V1EnvVarSource -from kubernetes.client.models import V1ObjectFieldSelector -from kubernetes.client.models import V1Volume -from kubernetes.client.models import V1VolumeMount - -from elyra._version import __version__ - -""" -The ExecuteFileOp uses a python script to bootstrap the user supplied image with the required dependencies. -In order for the script run properly, the image used, must at a minimum, have the 'curl' utility available -and have python3 -""" - -# Inputs and Outputs separator character. If updated, -# same-named variable in bootstrapper.py must be updated! -INOUT_SEPARATOR = ";" - -ELYRA_GITHUB_ORG = os.getenv("ELYRA_GITHUB_ORG", "elyra-ai") -ELYRA_GITHUB_BRANCH = os.getenv("ELYRA_GITHUB_BRANCH", "main" if "dev" in __version__ else "v" + __version__) -ELYRA_PIP_CONFIG_URL = os.getenv( - "ELYRA_PIP_CONFIG_URL", - f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/elyra/{ELYRA_GITHUB_BRANCH}/etc/kfp/pip.conf", -) -ELYRA_BOOTSTRAP_SCRIPT_URL = os.getenv( - "ELYRA_BOOTSTRAP_SCRIPT_URL", - f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/elyra/{ELYRA_GITHUB_BRANCH}/elyra/kfp/bootstrapper.py", -) -ELYRA_REQUIREMENTS_URL = os.getenv( - "ELYRA_REQUIREMENTS_URL", - f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/" - f"elyra/{ELYRA_GITHUB_BRANCH}/etc/generic/requirements-elyra.txt", -) -ELYRA_REQUIREMENTS_URL_PY37 = os.getenv( - "ELYRA_REQUIREMENTS_URL_PY37", - f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/" - f"elyra/{ELYRA_GITHUB_BRANCH}/etc/generic/requirements-elyra-py37.txt", -) - - -class ExecuteFileOp(ContainerOp): - def __init__( - self, - pipeline_name: str, - experiment_name: str, - notebook: str, - cos_endpoint: str, - cos_bucket: str, - cos_directory: str, - cos_dependencies_archive: str, - pipeline_version: Optional[str] = "", - pipeline_source: Optional[str] = None, - pipeline_outputs: Optional[List[str]] = None, - pipeline_inputs: Optional[List[str]] = None, - pipeline_envs: Optional[Dict[str, str]] = None, - requirements_url: Optional[str] = None, - bootstrap_script_url: Optional[str] = None, - emptydir_volume_size: Optional[str] = None, - cpu_request: Optional[str] = None, - mem_request: Optional[str] = None, - gpu_limit: Optional[str] = None, - workflow_engine: Optional[str] = "argo", - **kwargs, - ): - """Create a new instance of ContainerOp. - Args: - pipeline_name: pipeline that this op belongs to - experiment_name: the experiment where pipeline_name is executed - notebook: name of the notebook that will be executed per this operation - cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442 - cos_bucket: bucket to retrieve archive from - cos_directory: name of the directory in the object storage bucket to pull - cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz - pipeline_version: optional version identifier - pipeline_source: pipeline source - pipeline_outputs: comma delimited list of files produced by the notebook - pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook - pipeline_envs: dictionary of environmental variables to set in the container prior to execution - requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook - bootstrap_script_url: URL to a custom python bootstrap script to run - emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime - cpu_request: number of CPUs requested for the operation - mem_request: memory requested for the operation (in Gi) - gpu_limit: maximum number of GPUs allowed for the operation - workflow_engine: Kubeflow workflow engine, defaults to 'argo' - kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler. - See Kubeflow pipelines ContainerOp definition for more parameters or how to use - https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp - """ - - self.pipeline_name = pipeline_name - self.pipeline_version = pipeline_version - self.pipeline_source = pipeline_source - self.experiment_name = experiment_name - self.notebook = notebook - self.notebook_name = os.path.basename(notebook) - self.cos_endpoint = cos_endpoint - self.cos_bucket = cos_bucket - self.cos_directory = cos_directory - self.cos_dependencies_archive = cos_dependencies_archive - self.container_work_dir_root_path = "./" - self.container_work_dir_name = "jupyter-work-dir/" - self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name - self.bootstrap_script_url = bootstrap_script_url - self.requirements_url = requirements_url - self.pipeline_outputs = pipeline_outputs - self.pipeline_inputs = pipeline_inputs - self.pipeline_envs = pipeline_envs - self.cpu_request = cpu_request - self.mem_request = mem_request - self.gpu_limit = gpu_limit - - argument_list = [] - - """ CRI-o support for kfp pipelines - We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow - us to write to the base image layer file system, only to volumes. - """ - self.emptydir_volume_name = "workspace" - self.emptydir_volume_size = emptydir_volume_size - self.python_user_lib_path = "" - self.python_user_lib_path_target = "" - self.python_pip_config_url = "" - - if self.emptydir_volume_size: - self.container_work_dir_root_path = "/opt/app-root/src/" - self.container_python_dir_name = "python3/" - self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name - self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name - self.python_user_lib_path_target = "--target=" + self.python_user_lib_path - self.python_pip_config_url = ELYRA_PIP_CONFIG_URL - - if not self.bootstrap_script_url: - self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL - - if not self.requirements_url: - self.requirements_url = ELYRA_REQUIREMENTS_URL - - if "name" not in kwargs: - raise TypeError("You need to provide a name for the operation.") - elif not kwargs.get("name"): - raise ValueError("You need to provide a name for the operation.") - - if "image" not in kwargs: - raise ValueError("You need to provide an image.") - - if not notebook: - raise ValueError("You need to provide a notebook.") - - if "arguments" not in kwargs: - """If no arguments are passed, we use our own. - If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed - NOTE: Images being pulled must have python3 available on PATH and cURL utility - """ - - common_curl_options = '--fail -H "Cache-Control: no-cache"' - - argument_list.append( - f"mkdir -p {self.container_work_dir} && cd {self.container_work_dir} && " - f"echo 'Downloading {self.bootstrap_script_url}' && " - f"curl {common_curl_options} -L {self.bootstrap_script_url} --output bootstrapper.py && " - f"echo 'Downloading {self.requirements_url}' && " - f"curl {common_curl_options} -L {self.requirements_url} --output requirements-elyra.txt && " - f"echo 'Downloading {ELYRA_REQUIREMENTS_URL_PY37}' && " - f"curl {common_curl_options} -L {ELYRA_REQUIREMENTS_URL_PY37} --output requirements-elyra-py37.txt && " - ) - - if self.emptydir_volume_size: - argument_list.append( - f"mkdir {self.container_python_dir_name} && cd {self.container_python_dir_name} && " - f"echo 'Downloading {self.python_pip_config_url}' && " - f"curl {common_curl_options} -L {self.python_pip_config_url} --output pip.conf && cd .. &&" - ) - - argument_list.append( - f"python3 -m pip install {self.python_user_lib_path_target} packaging && " - "python3 -m pip freeze > requirements-current.txt && " - "python3 bootstrapper.py " - f'--pipeline-name "{self.pipeline_name}" ' - f"--cos-endpoint {self.cos_endpoint} " - f"--cos-bucket {self.cos_bucket} " - f'--cos-directory "{self.cos_directory}" ' - f'--cos-dependencies-archive "{self.cos_dependencies_archive}" ' - f'--file "{self.notebook}" ' - ) - - if self.pipeline_inputs: - inputs_str = self._artifact_list_to_str(self.pipeline_inputs) - argument_list.append(f'--inputs "{inputs_str}" ') - - if self.pipeline_outputs: - outputs_str = self._artifact_list_to_str(self.pipeline_outputs) - argument_list.append(f'--outputs "{outputs_str}" ') - - if self.emptydir_volume_size: - argument_list.append(f'--user-volume-path "{self.python_user_lib_path}" ') - - kwargs["command"] = ["sh", "-c"] - kwargs["arguments"] = "".join(argument_list) - - super().__init__(**kwargs) - - # We must deal with the envs after the superclass initialization since these amend the - # container attribute that isn't available until now. - if self.pipeline_envs: - for key, value in self.pipeline_envs.items(): # Convert dict entries to format kfp needs - self.container.add_env_variable(V1EnvVar(name=key, value=value)) - - # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as - # its container runtime - if self.emptydir_volume_size: - self.add_volume( - V1Volume( - empty_dir=V1EmptyDirVolumeSource(medium="", size_limit=self.emptydir_volume_size), - name=self.emptydir_volume_name, - ) - ) - - self.container.add_volume_mount( - V1VolumeMount(mount_path=self.container_work_dir_root_path, name=self.emptydir_volume_name) - ) - - # Append to PYTHONPATH location of elyra dependencies in installed in Volume - self.container.add_env_variable(V1EnvVar(name="PYTHONPATH", value=self.python_user_lib_path)) - - if self.cpu_request: - self.container.set_cpu_request(cpu=str(cpu_request)) - - if self.mem_request: - self.container.set_memory_request(memory=str(mem_request) + "G") - - if self.gpu_limit: - gpu_vendor = self.pipeline_envs.get("GPU_VENDOR", "nvidia") - self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor) - - # Generate unique ELYRA_RUN_NAME value and expose it as an environment - # variable in the container - if not workflow_engine: - raise ValueError("workflow_engine is missing and needs to be specified.") - if workflow_engine.lower() == "argo": - # attach RUN_ID_PLACEHOLDER as run name - # '{{workflow.annotations.pipelines.kubeflow.org/run_name}}' variable - # cannot be resolved by Argo in KF 1.4 - run_name_placeholder = RUN_ID_PLACEHOLDER - self.container.add_env_variable(V1EnvVar(name="ELYRA_RUN_NAME", value=run_name_placeholder)) - elif workflow_engine.lower() == "tekton": - try: - from kfp_tekton import TektonClient # noqa: F401 - except ImportError: - raise ValueError( - "kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine." - ) - - # For Tekton derive the value from the specified pod annotation - annotation = "pipelines.kubeflow.org/run_name" - field_path = f"metadata.annotations['{annotation}']" - self.container.add_env_variable( - V1EnvVar( - name="ELYRA_RUN_NAME", - value_from=V1EnvVarSource(field_ref=V1ObjectFieldSelector(field_path=field_path)), - ) - ) - else: - raise ValueError(f"{workflow_engine} is not a supported workflow engine.") - - # Attach metadata to the pod - # Node type (a static type for this op) - self.add_pod_label("elyra/node-type", ExecuteFileOp._normalize_label_value("notebook-script")) - # Pipeline name - self.add_pod_label("elyra/pipeline-name", ExecuteFileOp._normalize_label_value(self.pipeline_name)) - # Pipeline version - self.add_pod_label("elyra/pipeline-version", ExecuteFileOp._normalize_label_value(self.pipeline_version)) - # Experiment name - self.add_pod_label("elyra/experiment-name", ExecuteFileOp._normalize_label_value(self.experiment_name)) - # Pipeline node name - self.add_pod_label("elyra/node-name", ExecuteFileOp._normalize_label_value(kwargs.get("name"))) - # Pipeline node file - self.add_pod_annotation("elyra/node-file-name", self.notebook) - - # Identify the pipeline source, which can be a - # pipeline file (mypipeline.pipeline), a Python - # script or notebook that was submitted - if self.pipeline_source is not None: - self.add_pod_annotation("elyra/pipeline-source", self.pipeline_source) - - def _artifact_list_to_str(self, pipeline_array): - trimmed_artifact_list = [] - for artifact_name in pipeline_array: - if INOUT_SEPARATOR in artifact_name: # if INOUT_SEPARATOR is in name, throw since this is our separator - raise ValueError(f"Illegal character ({INOUT_SEPARATOR}) found in filename '{artifact_name}'.") - trimmed_artifact_list.append(artifact_name.strip()) - return INOUT_SEPARATOR.join(trimmed_artifact_list) - - @staticmethod - def _normalize_label_value(value): - """Produce a Kubernetes-compliant label from value - - Valid label values must be 63 characters or less and - must be empty or begin and end with an alphanumeric - character ([a-z0-9A-Z]) with dashes (-), underscores - (_), dots (.), and alphanumerics between. - """ - - if value is None or len(value) == 0: - return "" # nothing to do - - max_length = 63 - # This char is added at the front and/or back - # of value, if the first and/or last character - # is invalid. For example a value of "-abc" - # is converted to "a-abc". The specified character - # must meet the label value constraints. - valid_char = "a" - # This char is used to replace invalid characters - # that are in the "middle" of value. For example - # a value of "abc%def" is converted to "abc_def". - # The specified character must meet the label value - # constraints. - valid_middle_char = "_" - - # must begin with [0-9a-zA-Z] - valid_chars = string.ascii_letters + string.digits - if value[0] not in valid_chars: - value = valid_char + value - - value = value[:max_length] # enforce max length - - # must end with [0-9a-zA-Z] - if value[-1] not in valid_chars: - if len(value) <= max_length - 1: - # append valid character if max length - # would not be exceeded - value = value + valid_char - else: - # replace with valid character - value = value[:-1] + valid_char - - # middle chars must be [0-9a-zA-Z\-_.] - valid_chars = valid_chars + "-_." - - newstr = "" - for c in range(len(value)): - if value[c] not in valid_chars: - newstr = newstr + valid_middle_char - else: - newstr = newstr + value[c] - value = newstr - - return value diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py index 7c7ddbcff..969c2c122 100644 --- a/elyra/pipeline/kfp/processor_kfp.py +++ b/elyra/pipeline/kfp/processor_kfp.py @@ -14,29 +14,34 @@ # limitations under the License. # from datetime import datetime +from enum import Enum +from enum import unique +import hashlib +import importlib +import json import os +from pathlib import Path import re +import string +import sys import tempfile import time from typing import Any from typing import Dict +from typing import List +from typing import Optional from typing import Set from urllib.parse import urlsplit +from autopep8 import fix_code +from jinja2 import Environment +from jinja2 import PackageLoader from kfp import Client as ArgoClient from kfp import compiler as kfp_argo_compiler from kfp import components as components from kfp.dsl import PipelineConf -from kfp.aws import use_aws_secret # noqa H306 +from kfp.dsl import RUN_ID_PLACEHOLDER from kubernetes import client as k8s_client -from kubernetes.client import V1EmptyDirVolumeSource -from kubernetes.client import V1EnvVar -from kubernetes.client import V1EnvVarSource -from kubernetes.client import V1PersistentVolumeClaimVolumeSource -from kubernetes.client import V1SecretKeySelector -from kubernetes.client import V1Toleration -from kubernetes.client import V1Volume -from kubernetes.client import V1VolumeMount try: from kfp_tekton import compiler as kfp_tekton_compiler @@ -47,7 +52,6 @@ TektonClient = None from elyra._version import __version__ -from elyra.kfp.operator import ExecuteFileOp from elyra.metadata.schemaspaces import RuntimeImages from elyra.metadata.schemaspaces import Runtimes from elyra.pipeline import pipeline_constants @@ -63,7 +67,6 @@ from elyra.pipeline.component_parameter import VolumeMount from elyra.pipeline.kfp.kfp_authentication import AuthenticationError from elyra.pipeline.kfp.kfp_authentication import KFPAuthenticator -from elyra.pipeline.pipeline import GenericOperation from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.processor import PipelineProcessor @@ -71,9 +74,43 @@ from elyra.pipeline.processor import RuntimePipelineProcessorResponse from elyra.pipeline.runtime_type import RuntimeProcessorType from elyra.util.cos import join_paths +from elyra.util.kubernetes import sanitize_label_value from elyra.util.path import get_absolute_path +@unique +class WorkflowEngineType(Enum): + """ + Identifies Kubeflow Pipelines workflow engines that this + processor supports. + """ + + ARGO = "argo" + TEKTON = "tekton" + + @staticmethod + def get_instance_by_value(value: str) -> "WorkflowEngineType": + """ + Produces an WorkflowEngineType enum instance if the provided value + identifies a supported workflow engine type. + Raises KeyError if value is not a support workflow engine type. + """ + if value: + for instance in WorkflowEngineType.__members__.values(): + if instance.value == value.lower(): + return instance + raise KeyError(f"'{value}'") + + +# Externalize these constants to make them available to the code gen tests +CRIO_VOL_DEF_NAME = "workspace" +CRIO_VOL_DEF_SIZE = "20Gi" +CRIO_VOL_DEF_MEDIUM = "" +CRIO_VOL_MOUNT_PATH = "/opt/app-root/src" +CRIO_VOL_WORKDIR_PATH = f"{CRIO_VOL_MOUNT_PATH}/jupyter-work-dir" +CRIO_VOL_PYTHON_PATH = f"{CRIO_VOL_WORKDIR_PATH}/python3" + + class KfpPipelineProcessor(RuntimePipelineProcessor): _type = RuntimeProcessorType.KUBEFLOW_PIPELINES _name = "kfp" @@ -107,8 +144,8 @@ def process(self, pipeline): api_username = runtime_configuration.metadata.get("api_username") api_password = runtime_configuration.metadata.get("api_password") user_namespace = runtime_configuration.metadata.get("user_namespace") - engine = runtime_configuration.metadata.get("engine") - if engine == "Tekton" and not TektonClient: + workflow_engine = WorkflowEngineType.get_instance_by_value(runtime_configuration.metadata.get("engine", "argo")) + if workflow_engine == WorkflowEngineType.TEKTON and not TektonClient: raise ValueError( "Python package `kfp-tekton` is not installed. " "Please install using `elyra[kfp-tekton]` to use Tekton engine." @@ -141,7 +178,7 @@ def process(self, pipeline): # Create Kubeflow Client ############# try: - if engine == "Tekton": + if workflow_engine == WorkflowEngineType.TEKTON: client = TektonClient( host=api_endpoint, cookies=auth_info.get("cookies", None), @@ -246,29 +283,26 @@ def process(self, pipeline): # the pipelines' dependencies, if applicable pipeline_instance_id = f"{pipeline_name}-{timestamp}" - pipeline_function = lambda: self._cc_pipeline( # nopep8 E731 - pipeline, + # Generate Python DSL from workflow + pipeline_dsl = self._generate_pipeline_dsl( + pipeline=pipeline, pipeline_name=pipeline_name, - pipeline_version=pipeline_version_name, - experiment_name=experiment_name, pipeline_instance_id=pipeline_instance_id, + workflow_engine=workflow_engine, ) - # collect pipeline configuration information - pipeline_conf = self._generate_pipeline_conf(pipeline) + # Collect pipeline configuration information + pipeline_conf = self._generate_pipeline_conf(pipeline=pipeline) + + # Compile the Python DSL, producing the input for the upload to + # Kubeflow Pipelines + self._compile_pipeline_dsl(pipeline_dsl, workflow_engine, pipeline_path, pipeline_conf) - # compile the pipeline - if engine == "Tekton": - kfp_tekton_compiler.TektonCompiler().compile( - pipeline_function, pipeline_path, pipeline_conf=pipeline_conf - ) - else: - kfp_argo_compiler.Compiler().compile(pipeline_function, pipeline_path, pipeline_conf=pipeline_conf) except RuntimeError: raise except Exception as ex: raise RuntimeError( - f"Failed to compile pipeline '{pipeline_name}' with engine '{engine}' to: '{pipeline_path}'" + f"Error compiling pipeline '{pipeline_name}' with engine '{workflow_engine.value}'." ) from ex self.log_pipeline_info(pipeline_name, "pipeline compiled", duration=time.time() - t0) @@ -394,10 +428,28 @@ def process(self, pipeline): object_storage_path=object_storage_path, ) - def export(self, pipeline: Pipeline, pipeline_export_format: str, pipeline_export_path: str, overwrite: bool): - # Verify that the KfpPipelineProcessor supports the given export format + def export( + self, pipeline: Pipeline, pipeline_export_format: str, pipeline_export_path: str, overwrite: bool + ) -> str: + """ + Export pipeline to the specified format and store the output + in the specified file. + + :param pipeline: The pipeline to be exported + :type pipeline: Pipeline + :param pipeline_export_format: "py" for KFP Python DSL or "yaml" for YAML + :type pipeline_export_format: str + :param pipeline_export_path: name and location of exported file + :type pipeline_export_path: str + :param overwrite: If false, export raises an error if the output file exists. + :type overwrite: bool + :raises ValueError: raised if a parameter is invalid + :raises RuntimeError: an error occurred during export + :return: location of the exported file + :rtype: str + """ + # Verify that the processor supports the given export format self._verify_export_format(pipeline_export_format) - t0_all = time.time() timestamp = datetime.now().strftime("%m%d%H%M%S") pipeline_name = pipeline.name @@ -413,28 +465,34 @@ def export(self, pipeline: Pipeline, pipeline_export_format: str, pipeline_expor schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config ) - engine = runtime_configuration.metadata.get("engine") - if engine == "Tekton" and not TektonClient: + workflow_engine = WorkflowEngineType.get_instance_by_value(runtime_configuration.metadata.get("engine", "argo")) + if workflow_engine == WorkflowEngineType.TEKTON and not TektonClient: raise ValueError("kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine.") - if os.path.exists(absolute_pipeline_export_path) and not overwrite: + if Path(absolute_pipeline_export_path).exists() and not overwrite: raise ValueError("File " + absolute_pipeline_export_path + " already exists.") self.log_pipeline_info(pipeline_name, f"Exporting pipeline as a .{pipeline_export_format} file") - # Export pipeline as static configuration file (YAML formatted) try: - # Exported pipeline is not associated with an experiment - # or a version. The association is established when the - # pipeline is imported into KFP by the user. - pipeline_function = lambda: self._cc_pipeline( - pipeline, pipeline_name, pipeline_instance_id=pipeline_instance_id - ) # nopep8 - if engine == "Tekton": - self.log.info("Compiling pipeline for Tekton engine") - kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, absolute_pipeline_export_path) + # Generate Python DSL + pipeline_dsl = self._generate_pipeline_dsl( + pipeline=pipeline, + pipeline_name=pipeline_name, + pipeline_instance_id=pipeline_instance_id, + workflow_engine=workflow_engine, + ) + + if pipeline_export_format == "py": + # Write Python DSL to file + with open(absolute_pipeline_export_path, "w") as dsl_output: + dsl_output.write(pipeline_dsl) else: - self.log.info("Compiling pipeline for Argo engine") - kfp_argo_compiler.Compiler().compile(pipeline_function, absolute_pipeline_export_path) + # Generate pipeline configuration + pipeline_conf = self._generate_pipeline_conf(pipeline=pipeline) + # + # Export pipeline as static configuration file (YAML formatted) + # by invoking the compiler for the selected engine + self._compile_pipeline_dsl(pipeline_dsl, workflow_engine, absolute_pipeline_export_path, pipeline_conf) except RuntimeError: raise except Exception as ex: @@ -462,54 +520,170 @@ def _collect_envs(self, operation: Operation, **kwargs) -> Dict: envs["ELYRA_WRITABLE_CONTAINER_DIR"] = self.WCD return envs - def _cc_pipeline( + def _generate_pipeline_dsl( self, pipeline: Pipeline, pipeline_name: str, + workflow_engine: WorkflowEngineType, pipeline_version: str = "", experiment_name: str = "", pipeline_instance_id: str = None, - export=False, - ): + ) -> str: + """ + Generate Python DSL for Kubeflow Pipelines v1 + """ - runtime_configuration = self._get_metadata_configuration( - schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config + # Load Kubeflow Pipelines Python DSL template + loader = PackageLoader("elyra", "templates/kubeflow/v1") + template_env = Environment(loader=loader) + # Add filter that produces a Python-safe variable name + template_env.filters["python_safe"] = lambda x: re.sub(r"[" + re.escape(string.punctuation) + "\\s]", "_", x) + # Add filter that escapes the " character in strings + template_env.filters["string_delimiter_safe"] = lambda string: re.sub('"', '\\"', string) + template = template_env.get_template("python_dsl_template.jinja2") + + # Convert pipeline into workflow tasks + workflow_tasks = self._generate_workflow_tasks( + pipeline, + pipeline_name, + workflow_engine, + pipeline_instance_id=pipeline_instance_id, + pipeline_version=pipeline_version, + experiment_name=experiment_name, ) - cos_endpoint = runtime_configuration.metadata["cos_endpoint"] - cos_username = runtime_configuration.metadata.get("cos_username") - cos_password = runtime_configuration.metadata.get("cos_password") - cos_secret = runtime_configuration.metadata.get("cos_secret") - cos_bucket = runtime_configuration.metadata.get("cos_bucket") - engine = runtime_configuration.metadata["engine"] + # Gather unique component definitions from workflow task list. + unique_component_definitions = {} + for key, operation in workflow_tasks.items(): + unique_component_definitions[operation["component_definition_hash"]] = operation["component_definition"] + + # render the Kubeflow Pipelines Python DSL template + pipeline_dsl = template.render( + elyra_version=__version__, + pipeline_name=pipeline_name, + pipeline_description=pipeline.description, + pipeline_parameters=None, + workflow_tasks=workflow_tasks, + component_definitions=unique_component_definitions, + workflow_engine=workflow_engine.value, + ) - pipeline_instance_id = pipeline_instance_id or pipeline_name + # Prettify generated Python DSL + # Defer the import to postpone logger messages: https://github.com/psf/black/issues/2058 + import black - artifact_object_prefix = join_paths( - pipeline.pipeline_properties.get(pipeline_constants.COS_OBJECT_PREFIX), pipeline_instance_id - ) + try: + pipeline_dsl = black.format_str(fix_code(pipeline_dsl), mode=black.FileMode()) + except Exception: + # if an error was encountered log the generated DSL for troubleshooting + self.log.error("Error post-processing generated Python DSL:") + self.log.error(pipeline_dsl) + raise + + return pipeline_dsl + + def _compile_pipeline_dsl( + self, dsl: str, workflow_engine: WorkflowEngineType, output_file: str, pipeline_conf: PipelineConf + ) -> None: + """ + Compile Python DSL using the compiler for the specified workflow_engine. + + :param dsl: the Python DSL to be compiled + :type dsl: str + :param workflow_engine: Compiler to be used + :type workflow_engine: str + :param output_file: output file name + :type output_file: str + :param pipeline_conf: Pipeline configuration to apply + :type pipeline_conf: PipelineConf + :raises RuntimeError: raised when a fatal error is encountered + """ + + with tempfile.TemporaryDirectory() as temp_dir: + module_name = "generated_dsl" + try: + # Add temporary directory to Python module search path. + sys.path.insert(0, temp_dir) + # Save DSL in temporary file so we can import it as a module. + dsl_file = Path(temp_dir) / f"{module_name}.py" + with open(dsl_file, "w") as dsl_output: + dsl_output.write(dsl) + # Load DSL by importing the "generated_dsl" module. + mod = importlib.import_module(module_name) + # If this module was previously imported it won't reflect + # changes that might be in the DSL we are about to compile. + # Force a module re-load to pick up any changes. + mod = importlib.reload(mod) + # Obtain handle to pipeline function, which is named + # in the generated Python DSL "generated_pipeline" + pipeline_function = getattr(mod, "generated_pipeline") + # compile the DSL + if workflow_engine == WorkflowEngineType.TEKTON: + kfp_tekton_compiler.TektonCompiler().compile( + pipeline_function, output_file, pipeline_conf=pipeline_conf + ) + else: + kfp_argo_compiler.Compiler().compile(pipeline_function, output_file, pipeline_conf=pipeline_conf) + except Exception as ex: + raise RuntimeError( + f"Failed to compile pipeline with workflow_engine '{workflow_engine.value}' to '{output_file}'" + ) from ex + finally: + # remove temporary directory from Python module search path + del sys.path[0] + # remove module entry; it's no longer needed now that it was + # processed by the Kubeflow Pipelines compiler + sys.modules.pop(module_name, None) + + def _generate_workflow_tasks( + self, + pipeline: Pipeline, + pipeline_name: str, + workflow_engine: WorkflowEngineType, + pipeline_version: str = "", + experiment_name: str = "", + pipeline_instance_id: str = None, + export: bool = False, + ) -> Dict[str, Dict]: + """ + Produce the workflow tasks that implement the pipeline nodes. The output is + a dictionary containing task ids as keys and task definitions as values. + """ + + pipeline_instance_id = pipeline_instance_id or pipeline_name self.log_pipeline_info( pipeline_name, - f"processing pipeline dependencies for upload to '{cos_endpoint}' " - f"bucket '{cos_bucket}' folder '{artifact_object_prefix}'", + "Processing pipeline", ) t0_all = time.time() - emptydir_volume_size = "" - container_runtime = bool(os.getenv("CRIO_RUNTIME", "False").lower() == "true") - - # Create dictionary that maps component Id to its ContainerOp instance - target_ops = {} - # Sort operations based on dependency graph (topological order) sorted_operations = PipelineProcessor._sort_operations(pipeline.operations) - # Determine whether access to cloud storage is required - for operation in sorted_operations: - if isinstance(operation, GenericOperation): - self._verify_cos_connectivity(runtime_configuration) - break + if any(operation.is_generic for operation in sorted_operations): + # The pipeline contains atleast one node that is implemented + # using a generic component: collect and verify relevant information + runtime_configuration = self._get_metadata_configuration( + schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config + ) + # - verify that cloud storage can be accessed + self._verify_cos_connectivity(runtime_configuration) + # - collect runtime configuration information + cos_username = runtime_configuration.metadata.get("cos_username") + cos_password = runtime_configuration.metadata.get("cos_password") + cos_secret = runtime_configuration.metadata.get("cos_secret") + cos_endpoint = runtime_configuration.metadata["cos_endpoint"] + cos_bucket = runtime_configuration.metadata.get("cos_bucket") + artifact_object_prefix = join_paths( + pipeline.pipeline_properties.get(pipeline_constants.COS_OBJECT_PREFIX), pipeline_instance_id + ) + # - load the generic component definition template + generic_component_template = Environment( + loader=PackageLoader("elyra", "templates/kubeflow/v1") + ).get_template("generic_component_definition_template.jinja2") + # Determine whether we are executing in a CRI-O runtime environment + is_crio_runtime = os.getenv("CRIO_RUNTIME", "False").lower() == "true" # All previous operation outputs should be propagated throughout the pipeline. # In order to process this recursively, the current operation's inputs should be combined @@ -518,82 +692,200 @@ def _cc_pipeline( PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations) + # Scrub all node labels of invalid characters for operation in sorted_operations: + operation.name = re.sub("-+", "-", re.sub("[^-_0-9A-Za-z ]+", "-", operation.name)).lstrip("-").rstrip("-") - if container_runtime: - # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi - emptydir_volume_size = "20Gi" - - sanitized_operation_name = self._sanitize_operation_name(operation.name) + # Generate unique names for all operations + unique_names = {} + for operation in sorted_operations: + # Ensure operation name is unique + new_name = operation.name + while new_name in unique_names: + new_name = f"{operation.name}_{unique_names[operation.name]}" + unique_names[operation.name] += 1 + operation.name = new_name + unique_names[operation.name] = 1 + + # Create workflow task list that is used as input for the DSL code generator + # from the sorted list of pipeline.pipeline.Operations + workflow_tasks = {} + for operation in sorted_operations: + # Create workflow task, which Jinja2 uses to generate the task specific + # source code. + workflow_task = { + "id": operation.id, + "escaped_task_id": re.sub(r"[" + re.escape(string.punctuation) + "\\s]", "_", operation.id), + "name": operation.name, + "doc": operation.doc, + "upstream_workflow_task_ids": operation.parent_operation_ids or [], + "task_inputs": {}, # as defined in the component specification + "task_outputs": {}, # as defined in the component specification + "task_modifiers": {}, # attached volumes, resources, env variables, metadata, etc + } + + # Add Elyra-owned properties (data volume mounts, kubernetes labels, etc) + # to the task_modifiers property. + for value in operation.elyra_params.values(): + if isinstance(value, (ElyraProperty, ElyraPropertyList)): + value.add_to_execution_object( + runtime_processor=self, execution_object=workflow_task["task_modifiers"] + ) - # Create pipeline operation - # If operation is one of the "generic" set of NBs or scripts, construct custom ExecuteFileOp - if isinstance(operation, GenericOperation): - component = ComponentCache.get_generic_component_from_op(operation.classifier) + if operation.is_generic: + # The task is implemented using a generic component + workflow_task["uses_custom_component"] = False - # Collect env variables - pipeline_envs = self._collect_envs( - operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password + component_definition = generic_component_template.render( + container_image=operation.runtime_image, + command_args=self._compose_container_command_args( + pipeline_name=pipeline_name, + cos_endpoint=cos_endpoint, + cos_bucket=cos_bucket, + cos_directory=artifact_object_prefix, + cos_dependencies_archive=self._get_dependency_archive_name(operation), + filename=operation.filename, + cos_inputs=operation.inputs, + cos_outputs=operation.outputs, + is_crio_runtime=is_crio_runtime, + ), ) + workflow_task["component_definition"] = component_definition + workflow_task["component_definition_hash"] = hashlib.sha256(component_definition.encode()).hexdigest() - operation_artifact_archive = self._get_dependency_archive_name(operation) - - self.log.debug( - f"Creating pipeline component archive '{operation_artifact_archive}' for operation '{operation}'" + # attach environment variables + workflow_task["task_modifiers"]["env_variables"] = self._collect_envs( + operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password ) - container_op = ExecuteFileOp( - name=sanitized_operation_name, - pipeline_name=pipeline_name, - experiment_name=experiment_name, - notebook=operation.filename, - cos_endpoint=cos_endpoint, - cos_bucket=cos_bucket, - cos_directory=artifact_object_prefix, - cos_dependencies_archive=operation_artifact_archive, - pipeline_version=pipeline_version, - pipeline_source=pipeline.source, - pipeline_inputs=operation.inputs, - pipeline_outputs=operation.outputs, - pipeline_envs=pipeline_envs, - emptydir_volume_size=emptydir_volume_size, - cpu_request=operation.cpu, - mem_request=operation.memory, - gpu_limit=operation.gpu, - workflow_engine=engine, - image=operation.runtime_image, - file_outputs={ - "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json", # noqa - "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json", # noqa - }, - ) + # hack only: since we don't use the ContainerOp constructor anymore + # we cannot use the file_outputs parameter to provide the information + # https://www.kubeflow.org/docs/components/pipelines/v1/sdk/output-viewer/ + workflow_task["task_modifiers"]["special_output_files"] = { + "mlpipeline_ui_metadata": (Path(self.WCD) / "mlpipeline-ui-metadata.json").as_posix(), + "mlpipeline_metrics": (Path(self.WCD) / "mlpipeline-metrics.json").as_posix(), + } + # apply object storage Kubernetes secret, if one was provided if cos_secret and not export: - container_op.apply(use_aws_secret(cos_secret)) + workflow_task["task_modifiers"]["object_storage_secret"] = cos_secret - image_namespace = self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID) - for image_instance in image_namespace: + # apply container image pull policy, if one was specified + for image_instance in self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID): if image_instance.metadata["image_name"] == operation.runtime_image and image_instance.metadata.get( "pull_policy" ): - container_op.container.set_image_pull_policy(image_instance.metadata["pull_policy"]) + workflow_task["task_modifiers"]["image_pull_policy"] = image_instance.metadata["pull_policy"] + break - self.log_pipeline_info( - pipeline_name, - f"processing operation dependencies for id '{operation.id}'", - operation_name=operation.name, + # set resource constraints + workflow_task["task_modifiers"]["cpu_request"] = operation.cpu + workflow_task["task_modifiers"]["mem_request"] = { + "size": operation.memory, + "units": "G", + } + workflow_task["task_modifiers"]["gpu_limit"] = { + "size": operation.gpu, + "vendor": workflow_task["task_modifiers"]["env_variables"].get("GPU_VENDOR", "nvidia"), + } + + if is_crio_runtime: + # Attach empty dir volume + workflow_task["task_modifiers"]["crio_runtime"] = { + "emptydir_volume_name": CRIO_VOL_DEF_NAME, + "emptydir_volume_size": CRIO_VOL_DEF_SIZE, + "emptydir_volume_medium": CRIO_VOL_DEF_MEDIUM, + "emptydir_mount_path": CRIO_VOL_MOUNT_PATH, + } + # Set Python module search path + workflow_task["task_modifiers"]["env_variables"]["PYTHONPATH"] = CRIO_VOL_PYTHON_PATH + + # Attach identifying metadata + if workflow_task["task_modifiers"].get("pod_labels") is None: + workflow_task["task_modifiers"]["pod_labels"] = {} + # Node type (a static type for this op) + workflow_task["task_modifiers"]["pod_labels"]["elyra/node-type"] = sanitize_label_value( + "notebook-script" ) + # Pipeline name + workflow_task["task_modifiers"]["pod_labels"]["elyra/pipeline-name"] = sanitize_label_value( + pipeline_name + ) + # Pipeline version + workflow_task["task_modifiers"]["pod_labels"]["elyra/pipeline-version"] = sanitize_label_value( + pipeline_version + ) + # Experiment name + workflow_task["task_modifiers"]["pod_labels"]["elyra/experiment-name"] = sanitize_label_value( + experiment_name + ) + # Pipeline node name + workflow_task["task_modifiers"]["pod_labels"]["elyra/node-name"] = sanitize_label_value(operation.name) + + # Add non-identifying metadata + if workflow_task["task_modifiers"].get("pod_annotations") is None: + workflow_task["task_modifiers"]["pod_annotations"] = {} + # Pipeline node file + workflow_task["task_modifiers"]["pod_annotations"]["elyra/node-file-name"] = operation.filename + + # Identify the pipeline source, which can be a pipeline file (mypipeline.pipeline), a Python + # script or notebook that was submitted + if pipeline.source is not None: + workflow_task["task_modifiers"]["pod_annotations"]["elyra/pipeline-source"] = pipeline.source + + # Generate unique ELYRA_RUN_NAME value, which gets exposed as an environment + # variable + if workflow_engine == WorkflowEngineType.TEKTON: + # Value is derived from an existing annotation; use dummy value + workflow_task["task_modifiers"]["set_run_name"] = "dummy value" + else: + # Use Kubeflow Pipelines provided RUN_ID_PLACEHOLDER as run name + workflow_task["task_modifiers"]["set_run_name"] = RUN_ID_PLACEHOLDER + # Upload dependencies to cloud storage self._upload_dependencies_to_object_store( runtime_configuration, pipeline_name, operation, prefix=artifact_object_prefix ) - # If operation is a "non-standard" component, load it's spec and create operation with factory function else: + # ---------------------------------------- + # The task is implemented using a custom component + workflow_task["uses_custom_component"] = True + # Retrieve component from cache component = ComponentCache.instance().get_component(self._type, operation.classifier) - # Convert the user-entered value of certain properties according to their type + workflow_task["component_definition"] = component.definition + workflow_task["component_definition_hash"] = hashlib.sha256(component.definition.encode()).hexdigest() + + # Identify task inputs and outputs using the component spec + # If no data type was specified, string is assumed + factory_function = components.load_component_from_text(component.definition) + for input in factory_function.component_spec.inputs or []: + sanitized_input_name = self._sanitize_param_name(input.name) + workflow_task["task_inputs"][sanitized_input_name] = { + "value": None, + "task_output_reference": None, + "pipeline_parameter_reference": None, + "data_type": (input.type or "string").lower(), + } + # Determine whether the value needs to be rendered in quotes + # in the generated DSL code. For example "my name" (string), and 34 (integer). + workflow_task["task_inputs"][sanitized_input_name]["requires_quoted_rendering"] = workflow_task[ + "task_inputs" + ][sanitized_input_name]["data_type"] not in [ + "integer", + "float", + "bool", + ] + + for output in factory_function.component_spec.outputs or []: + workflow_task["task_outputs"][self._sanitize_param_name(output.name)] = { + "data_type": output.type, + } + + # Iterate over component parameters and assign values to + # task inputs and task add-ons for component_property in component.properties: self.log.debug( f"Processing component parameter '{component_property.name}' " @@ -601,23 +893,31 @@ def _cc_pipeline( ) if component_property.allowed_input_types == [None]: - # Outputs are skipped + # The property does not support inputs. Ignore continue + sanitized_component_property_id = self._sanitize_param_name(component_property.ref) + if sanitized_component_property_id in workflow_task["task_inputs"]: + reference = workflow_task["task_inputs"][sanitized_component_property_id] + else: + workflow_task["task_modifiers"][sanitized_component_property_id] = {} + reference = workflow_task["task_modifiers"][sanitized_component_property_id] + # Get corresponding property's value from parsed pipeline property_value_dict = operation.component_params.get(component_property.ref) data_entry_type = property_value_dict.get("widget", None) # one of: inputpath, file, raw data type property_value = property_value_dict.get("value", None) if data_entry_type == "inputpath": - # KFP path-based parameters accept an input from a parent + # task input is the output of an upstream task output_node_id = property_value["value"] # parent node id output_node_parameter_key = property_value["option"].replace("output_", "") # parent param - operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[ - output_node_parameter_key - ] + reference["task_output_reference"] = { + "task_id": re.sub(r"[" + re.escape(string.punctuation) + "\\s]", "_", output_node_id), + "output_id": self._sanitize_param_name(output_node_parameter_key), + } else: # Parameter is either of a raw data type or file contents if data_entry_type == "file" and property_value: - # Read a value from a file + # Read value from the specified file absolute_path = get_absolute_path(self.root_dir, property_value) with open(absolute_path, "r") as f: property_value = f.read() if os.path.getsize(absolute_path) else None @@ -628,80 +928,26 @@ def _cc_pipeline( # Process the value according to its type, if necessary if component_property.json_data_type == "object": - processed_value = self._process_dictionary_value(property_value) - operation.component_params[component_property.ref] = processed_value + reference["value"] = self._process_dictionary_value(property_value) elif component_property.json_data_type == "array": - processed_value = self._process_list_value(property_value) - operation.component_params[component_property.ref] = processed_value + reference["value"] = self._process_list_value(property_value) else: - operation.component_params[component_property.ref] = property_value - - # Build component task factory - try: - factory_function = components.load_component_from_text(component.definition) - except Exception as e: - # TODO Fix error messaging and break exceptions down into categories - self.log.error(f"Error loading component spec for {operation.name}: {str(e)}") - raise RuntimeError(f"Error loading component spec for {operation.name}.") - - # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict - try: - comp_spec_inputs = [ - inputs.name.lower().replace(" ", "_") for inputs in factory_function.component_spec.inputs or [] - ] - - # Remove inputs and outputs from params dict - # TODO: need to have way to retrieve only required params - parameter_removal_list = ["inputs", "outputs"] - for component_param in operation.component_params_as_dict.keys(): - if component_param not in comp_spec_inputs: - parameter_removal_list.append(component_param) - - for parameter in parameter_removal_list: - operation.component_params_as_dict.pop(parameter, None) - - # Create ContainerOp instance and assign appropriate user-provided name - sanitized_component_params = { - self._sanitize_param_name(name): value - for name, value in operation.component_params_as_dict.items() - } - container_op = factory_function(**sanitized_component_params) - container_op.set_display_name(operation.name) + reference["value"] = property_value - except Exception as e: - # TODO Fix error messaging and break exceptions down into categories - self.log.error(f"Error constructing component {operation.name}: {str(e)}") - raise RuntimeError(f"Error constructing component {operation.name}.") + self.log.debug(f"Completed processing of task '{workflow_task['name']}':") + self.log.debug(json.dumps(workflow_task, sort_keys=False, indent=4)) - # Attach node comment - if operation.doc: - container_op.add_pod_annotation("elyra/node-user-doc", operation.doc) + # append task to task list + workflow_tasks[workflow_task["id"]] = workflow_task - # Process Elyra-owned properties as required for each type - for value in operation.elyra_params.values(): - if isinstance(value, (ElyraProperty, ElyraPropertyList)): - value.add_to_execution_object(runtime_processor=self, execution_object=container_op) - - # Add ContainerOp to target_ops dict - target_ops[operation.id] = container_op - - # Process dependencies after all the operations have been created - for operation in pipeline.operations.values(): - op = target_ops[operation.id] - for parent_operation_id in operation.parent_operation_ids: - parent_op = target_ops[parent_operation_id] # Parent Operation - op.after(parent_op) - - self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all)) + # end of processing + self.log_pipeline_info(pipeline_name, "Pipeline processed", duration=(time.time() - t0_all)) + return workflow_tasks - return target_ops - - def _generate_pipeline_conf(self, pipeline: dict) -> PipelineConf: + def _generate_pipeline_conf(self, pipeline: Pipeline) -> PipelineConf: """ Returns a KFP pipeline configuration for this pipeline, which can be empty. - :param pipeline: pipeline dictionary - :type pipeline: dict :return: https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.PipelineConf :rtype: kfp.dsl import PipelineConf """ @@ -713,14 +959,14 @@ def _generate_pipeline_conf(self, pipeline: dict) -> PipelineConf: # Gather input for container image pull secrets in support of private container image registries # https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.PipelineConf.set_image_pull_secrets # - image_namespace = self._get_metadata_configuration(schemaspace=RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID) - - # iterate through pipeline operations and create list of Kubernetes secret names - # that are associated with generic components + # Retrieve all runtime image configurations + runtime_image_configurations = self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID) + # For each generic pipeline operation determine wether its runtime image + # is protected by a pull secret container_image_pull_secret_names = [] for operation in pipeline.operations.values(): - if isinstance(operation, GenericOperation): - for image_instance in image_namespace: + if operation.is_generic: + for image_instance in runtime_image_configurations: if image_instance.metadata["image_name"] == operation.runtime_image: if image_instance.metadata.get("pull_secret"): container_image_pull_secret_names.append(image_instance.metadata.get("pull_secret")) @@ -739,13 +985,112 @@ def _generate_pipeline_conf(self, pipeline: dict) -> PipelineConf: return pipeline_conf - @staticmethod - def _sanitize_operation_name(name: str) -> str: + def _compose_container_command_args( + self, + pipeline_name: str, + cos_endpoint: str, + cos_bucket: str, + cos_directory: str, + cos_dependencies_archive: str, + filename: str, + cos_inputs: Optional[List[str]] = [], + cos_outputs: Optional[List[str]] = [], + is_crio_runtime: bool = False, + ) -> str: """ - In KFP, only letters, numbers, spaces, "_", and "-" are allowed in name. - :param name: name of the operation + Compose the container command arguments for a generic component, taking into + account wether the container will run in a CRI-O environment. """ - return re.sub("-+", "-", re.sub("[^-_0-9A-Za-z ]+", "-", name)).lstrip("-").rstrip("-") + elyra_github_org = os.getenv("ELYRA_GITHUB_ORG", "elyra-ai") + elyra_github_branch = os.getenv("ELYRA_GITHUB_BRANCH", "main" if "dev" in __version__ else "v" + __version__) + elyra_bootstrap_script_url = os.getenv( + "ELYRA_BOOTSTRAP_SCRIPT_URL", + f"https://raw.githubusercontent.com/{elyra_github_org}/elyra/{elyra_github_branch}/elyra/kfp/bootstrapper.py", # noqa E501 + ) + elyra_requirements_url = os.getenv( + "ELYRA_REQUIREMENTS_URL", + f"https://raw.githubusercontent.com/{elyra_github_org}/" + f"elyra/{elyra_github_branch}/etc/generic/requirements-elyra.txt", + ) + elyra_requirements_url_py37 = os.getenv( + "elyra_requirements_url_py37", + f"https://raw.githubusercontent.com/{elyra_github_org}/" + f"elyra/{elyra_github_branch}/etc/generic/requirements-elyra-py37.txt", + ) + + if is_crio_runtime: + container_work_dir = CRIO_VOL_WORKDIR_PATH + container_python_path = CRIO_VOL_PYTHON_PATH + python_pip_config_url = os.getenv( + "ELYRA_PIP_CONFIG_URL", + f"https://raw.githubusercontent.com/{elyra_github_org}/elyra/{elyra_github_branch}/etc/kfp/pip.conf", + ) + python_user_lib_path_target = f"--target={CRIO_VOL_PYTHON_PATH}" + else: + container_work_dir = "./jupyter-work-dir" + python_user_lib_path_target = "" + + common_curl_options = "--fail -H 'Cache-Control: no-cache'" + + command_args = [] + + command_args.append( + f"mkdir -p {container_work_dir} && cd {container_work_dir} && " + f"echo 'Downloading {elyra_bootstrap_script_url}' && " + f"curl {common_curl_options} -L {elyra_bootstrap_script_url} --output bootstrapper.py && " + f"echo 'Downloading {elyra_requirements_url}' && " + f"curl {common_curl_options} -L {elyra_requirements_url} --output requirements-elyra.txt && " + f"echo 'Downloading {elyra_requirements_url_py37}' && " + f"curl {common_curl_options} -L {elyra_requirements_url_py37} --output requirements-elyra-py37.txt && " + ) + + if is_crio_runtime: + command_args.append( + f"mkdir {container_python_path} && cd {container_python_path} && " + f"echo 'Downloading {python_pip_config_url}' && " + f"curl {common_curl_options} -L {python_pip_config_url} --output pip.conf && cd .. && " + ) + + command_args.append( + f"python3 -m pip install {python_user_lib_path_target} packaging && " + "python3 -m pip freeze > requirements-current.txt && " + "python3 bootstrapper.py " + f"--pipeline-name '{pipeline_name}' " + f"--cos-endpoint '{cos_endpoint}' " + f"--cos-bucket '{cos_bucket}' " + f"--cos-directory '{cos_directory}' " + f"--cos-dependencies-archive '{cos_dependencies_archive}' " + f"--file '{filename}' " + ) + + def file_list_to_string(file_list: List[str]) -> str: + """ + Utiltity function that converts a list of strings to a string + """ + # Inputs and Outputs separator character. If updated, + # same-named variable in bootstrapper.py must be updated! + INOUT_SEPARATOR = ";" + for file in file_list: + if INOUT_SEPARATOR in file: + raise ValueError(f"Illegal character ({INOUT_SEPARATOR}) found in filename '{file}'.") + return INOUT_SEPARATOR.join(file_list) + + # If upstream nodes declared file outputs they need to + # be downloaded from object storage by the bootstrapper + if len(cos_inputs) > 0: + inputs_str = file_list_to_string(cos_inputs) + command_args.append(f"--inputs '{inputs_str}' ") + + # If this node produces file outputs they need to be uploaded + # to object storage by the bootstrapper + if len(cos_outputs) > 0: + outputs_str = file_list_to_string(cos_outputs) + command_args.append(f"--outputs '{outputs_str}' ") + + if is_crio_runtime: + command_args.append(f"--user-volume-path '{CRIO_VOL_PYTHON_PATH}' ") + + return "".join(command_args) @staticmethod def _sanitize_param_name(name: str) -> str: @@ -771,73 +1116,62 @@ def _sanitize_param_name(name: str) -> str: return normalized_name.replace(" ", "_") def add_disable_node_caching(self, instance: DisableNodeCaching, execution_object: Any, **kwargs) -> None: - """Add DisableNodeCaching info to the execution object for the given runtime processor""" + """Add DisableNodeCaching info to the execution object""" # Force re-execution of the operation by setting staleness to zero days # https://www.kubeflow.org/docs/components/pipelines/overview/caching/#managing-caching-staleness if instance.selection: - execution_object.execution_options.caching_strategy.max_cache_staleness = "P0D" + execution_object["disable_node_caching"] = True + else: + execution_object["disable_node_caching"] = False def add_custom_shared_memory_size(self, instance: CustomSharedMemorySize, execution_object: Any, **kwargs) -> None: - """Add CustomSharedMemorySize info to the execution object for the given runtime processor""" - + """Add CustomSharedMemorySize info to the execution object""" if not instance.size: + # no custom size was specified; ignore return - - volume = V1Volume( - name="shm", - empty_dir=V1EmptyDirVolumeSource(medium="Memory", size_limit=f"{instance.size}{instance.units}"), - ) - if volume not in execution_object.volumes: - execution_object.add_volume(volume) - - execution_object.container.add_volume_mount(V1VolumeMount(mount_path="/dev/shm", name="shm")) + execution_object["kubernetes_shared_mem_size"] = {"size": instance.size, "units": instance.units} def add_kubernetes_secret(self, instance: KubernetesSecret, execution_object: Any, **kwargs) -> None: - """Add KubernetesSecret instance to the execution object for the given runtime processor""" - execution_object.container.add_env_variable( - V1EnvVar( - name=instance.env_var, - value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name=instance.name, key=instance.key)), - ) - ) + """Add KubernetesSecret instance to the execution object""" + if "kubernetes_secrets" not in execution_object: + execution_object["kubernetes_secrets"] = {} + execution_object["kubernetes_secrets"][instance.env_var] = {"name": instance.name, "key": instance.key} def add_mounted_volume(self, instance: VolumeMount, execution_object: Any, **kwargs) -> None: - """Add VolumeMount instance to the execution object for the given runtime processor""" - volume = V1Volume( - name=instance.pvc_name, - persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(claim_name=instance.pvc_name), - ) - if volume not in execution_object.volumes: - execution_object.add_volume(volume) - execution_object.container.add_volume_mount( - V1VolumeMount( - mount_path=instance.path, - name=instance.pvc_name, - sub_path=instance.sub_path, - read_only=instance.read_only, - ) - ) + """Add VolumeMount instance to the execution object""" + if "kubernetes_volumes" not in execution_object: + execution_object["kubernetes_volumes"] = {} + execution_object["kubernetes_volumes"][instance.path] = { + "pvc_name": instance.pvc_name, + "sub_path": instance.sub_path, + "read_only": instance.read_only, + } def add_kubernetes_pod_annotation(self, instance: KubernetesAnnotation, execution_object: Any, **kwargs) -> None: - """Add KubernetesAnnotation instance to the execution object for the given runtime processor""" - if instance.key not in execution_object.pod_annotations: - execution_object.add_pod_annotation(instance.key, instance.value or "") + """Add KubernetesAnnotation instance to the execution object""" + if "pod_annotations" not in execution_object: + execution_object["pod_annotations"] = {} + execution_object["pod_annotations"][instance.key] = instance.value or "" def add_kubernetes_pod_label(self, instance: KubernetesLabel, execution_object: Any, **kwargs) -> None: - """Add KubernetesLabel instance to the execution object for the given runtime processor""" - if instance.key not in execution_object.pod_labels: - execution_object.add_pod_label(instance.key, instance.value or "") + """Add KubernetesLabel instance to the execution object""" + if "pod_labels" not in execution_object: + execution_object["pod_labels"] = {} + execution_object["pod_labels"][instance.key] = instance.value or "" def add_kubernetes_toleration(self, instance: KubernetesToleration, execution_object: Any, **kwargs) -> None: - """Add KubernetesToleration instance to the execution object for the given runtime processor""" - toleration = V1Toleration( - effect=instance.effect, - key=instance.key, - operator=instance.operator, - value=instance.value, - ) - if toleration not in execution_object.tolerations: - execution_object.add_toleration(toleration) + """Add KubernetesToleration instance to the execution object""" + if "kubernetes_tolerations" not in execution_object: + execution_object["kubernetes_tolerations"] = {} + toleration_hash = hashlib.sha256( + f"{instance.key}::{instance.operator}::{instance.value}::{instance.effect}".encode() + ).hexdigest() + execution_object["kubernetes_tolerations"][toleration_hash] = { + "key": instance.key, + "operator": instance.operator, + "value": instance.value, + "effect": instance.effect, + } @property def supported_properties(self) -> Set[str]: diff --git a/elyra/pipeline/runtime_type.py b/elyra/pipeline/runtime_type.py index 7f029a55a..9b18d7453 100644 --- a/elyra/pipeline/runtime_type.py +++ b/elyra/pipeline/runtime_type.py @@ -124,7 +124,10 @@ class KubeflowPipelinesResources(RuntimeTypeResources): type = RuntimeProcessorType.KUBEFLOW_PIPELINES icon_endpoint = "static/elyra/kubeflow.svg" - export_file_types = [{"id": "yaml", "display_name": "KFP static configuration file (YAML formatted)"}] + export_file_types = [ + {"id": "yaml", "display_name": "KFP static configuration file (YAML formatted)"}, + {"id": "py", "display_name": "Python DSL"}, + ] class LocalResources(RuntimeTypeResources): diff --git a/elyra/templates/kubeflow/v1/generic_component_definition_template.jinja2 b/elyra/templates/kubeflow/v1/generic_component_definition_template.jinja2 new file mode 100644 index 000000000..bc3003111 --- /dev/null +++ b/elyra/templates/kubeflow/v1/generic_component_definition_template.jinja2 @@ -0,0 +1,8 @@ +name: Run a file +description: Run a Jupyter notebook or Python/R script + +implementation: + container: + image: {{ container_image }} + command: [sh, -c] + args: ["{{ command_args }}"] diff --git a/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 b/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 new file mode 100644 index 000000000..329cdd551 --- /dev/null +++ b/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 @@ -0,0 +1,177 @@ +# +# Generated by Elyra {{ elyra_version }} +# +import kfp +import kfp.aws +from kubernetes.client import * +from kubernetes.client.models import * + +{# Load statements for custom components -#} +{# component_hash = """""" -#} +{# factory_hash = kfp.components.load_component_from_text(component_hash) -#} +{% for hash, component_definition in component_definitions.items() %} +component_def_{{ hash | python_safe }} = """ +{{ component_definition }} +""" + +factory_{{ hash | python_safe }} = kfp.components.load_component_from_text(component_def_{{ hash | python_safe }}) +{% endfor %} + +{# Define pipeline -#} +{% if pipeline_description %} +@kfp.dsl.pipeline(name="{{ pipeline_name }}", description="{{ pipeline_description | string_delimiter_safe }}") +{% else %} +@kfp.dsl.pipeline(name="{{ pipeline_name }}") +{% endif %} +def generated_pipeline( +{% if pipeline_parameters %} +{# pipeline parameters will be added here -#} +{% endif %} +): +{% for workflow_task in workflow_tasks.values() %} + {% set task_name = "task_" + workflow_task.escaped_task_id %} + # Task for node '{{ workflow_task.name }}' + {{ task_name }} = factory_{{ workflow_task.component_definition_hash | python_safe }}( +{% for task_input_name, task_input_spec in workflow_task.task_inputs.items() %} +{% if task_input_spec.task_output_reference %} + {{ task_input_name }}=task_{{ task_input_spec.task_output_reference.task_id }}.outputs["{{ task_input_spec.task_output_reference.output_id }}"], +{% elif task_input_spec.pipeline_parameter_reference %} + {{ task_input_name }}={{ task_input_spec.pipeline_parameter_reference }}, +{% elif task_input_spec.requires_quoted_rendering %} + {{ task_input_name }}="{{ task_input_spec.value }}", +{% else %} + {{ task_input_name }}={{ task_input_spec.value }}, +{% endif %} +{% endfor %} + ) +{% if workflow_task.task_modifiers.image_pull_policy %} + {{ task_name }}.container.set_image_pull_policy("{{ workflow_task.task_modifiers.image_pull_policy }}") +{% endif %} +{% if workflow_task.task_modifiers.special_output_files %} + {{ task_name }}.output_artifact_paths["mlpipeline-metrics"] = "{{ workflow_task.task_modifiers.special_output_files.mlpipeline_metrics }}" + {{ task_name }}.output_artifact_paths["mlpipeline-ui-metadata"] = "{{ workflow_task.task_modifiers.special_output_files.mlpipeline_ui_metadata }}" +{% endif %} +{% if workflow_task.task_modifiers.object_storage_secret %} + {{ task_name }}.apply(kfp.aws.use_aws_secret("{{ workflow_task.task_modifiers.object_storage_secret }}")) +{% endif %} + {{ task_name }}.set_display_name("{{ workflow_task.name | string_delimiter_safe }}") +{% if workflow_task.doc %} + {{ task_name }}.add_pod_annotation("elyra/node-user-doc","""{{ workflow_task.doc| string_delimiter_safe }}""") +{% endif %} +{% if workflow_task.task_modifiers.cpu_request %} + {{ task_name }}.container.set_cpu_request(cpu="{{ workflow_task.task_modifiers.cpu_request }}") +{% endif %} +{% if workflow_task.task_modifiers.mem_request and workflow_task.task_modifiers.mem_request.size %} + {{ task_name }}.container.set_memory_request(memory="{{ workflow_task.task_modifiers.mem_request.size }}{{ workflow_task.task_modifiers.mem_request.units }}") +{% endif %} +{% if workflow_task.task_modifiers.gpu_limit and workflow_task.task_modifiers.gpu_limit.size %} + {{ task_name }}.container.set_gpu_limit(gpu="{{ workflow_task.task_modifiers.gpu_limit.size }}", vendor="{{ workflow_task.task_modifiers.gpu_limit.vendor }}") +{% endif %} +{% if workflow_task.task_modifiers.env_variables %} +{% for env_var_name, env_var_value in workflow_task.task_modifiers.env_variables.items() %} + {{ task_name }}.add_env_variable(V1EnvVar(name="{{ env_var_name }}", value="{{ env_var_value | string_delimiter_safe }}")) +{% endfor %} +{% endif %} +{% if workflow_task.task_modifiers.set_run_name %} +{% if workflow_engine == "tekton" %} + {{ task_name }}.add_env_variable(V1EnvVar(name="ELYRA_RUN_NAME", value_from=V1EnvVarSource(field_ref=V1ObjectFieldSelector(field_path="metadata.annotations['pipelines.kubeflow.org/run_name']")))) +{% else %} + {{ task_name }}.add_env_variable(V1EnvVar(name="ELYRA_RUN_NAME", value="{{ workflow_task.task_modifiers.set_run_name }}")) +{% endif %} +{% endif %} +{% if workflow_task.task_modifiers.disable_node_caching %} + {{ task_name }}.execution_options.caching_strategy.max_cache_staleness = "P0D" +{% endif %} +{% if workflow_task.task_modifiers.pod_labels %} +{% for pod_label_key, pod_label_value in workflow_task.task_modifiers.pod_labels.items() %} + {{ task_name }}.add_pod_label("{{ pod_label_key }}", "{{ pod_label_value }}") +{% endfor %} +{% endif %} +{% if workflow_task.task_modifiers.pod_annotations %} +{% for pod_annotation_key, pod_annotation_value in workflow_task.task_modifiers.pod_annotations.items() %} + {{ task_name }}.add_pod_annotation("{{ pod_annotation_key }}", """{{ pod_annotation_value | string_delimiter_safe }}""") +{% endfor %} +{% endif %} +{% if workflow_task.task_modifiers.kubernetes_secrets %} +{% for env_var, secret_dict in workflow_task.task_modifiers.kubernetes_secrets.items() %} + {{ task_name }}.container.add_env_variable(V1EnvVar( + name="{{ env_var }}", + value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name="{{ secret_dict.name }}", key="{{ secret_dict.key }}")), + )) +{% endfor %} +{% endif %} +{% if workflow_task.task_modifiers.kubernetes_volumes %} +{% for volume_path, volume_dict in workflow_task.task_modifiers.kubernetes_volumes.items() %} + {{ task_name }}.add_volume( + V1Volume( + name="{{ volume_dict.pvc_name}}", + persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(claim_name="{{ volume_dict.pvc_name }}",), + )) + {{ task_name }}.container.add_volume_mount( + V1VolumeMount( + mount_path="{{ volume_path }}", + name="{{ volume_dict.pvc_name }}", +{% if volume_dict.sub_path %} + sub_path="{{ volume_dict.sub_path }}", +{% endif %} + read_only={{ volume_dict.read_only }}, + )) +{% endfor %} +{% endif %} +{% if workflow_task.task_modifiers.kubernetes_tolerations %} +{% for toleration_dict in workflow_task.task_modifiers.kubernetes_tolerations.values() %} + {{ task_name }}.add_toleration( + V1Toleration( +{% if toleration_dict.effect %} + effect="{{ toleration_dict.effect }}", +{% else %} + effect=None, +{% endif %} +{% if toleration_dict.key %} + key="{{ toleration_dict.key }}", +{% else %} + key=None, +{% endif %} + operator="{{ toleration_dict.operator }}", +{% if toleration_dict.value %} + value="{{ toleration_dict.value | string_delimiter_safe }}", +{% else %} + value=None, +{% endif %} + )) +{% endfor %} +{% endif %} +{% if workflow_task.task_modifiers.kubernetes_shared_mem_size %} + {{ task_name }}.add_volume(V1Volume( + name="shm", + empty_dir=V1EmptyDirVolumeSource(medium="Memory", size_limit="{{ workflow_task.task_modifiers.kubernetes_shared_mem_size.size }}{{ workflow_task.task_modifiers.kubernetes_shared_mem_size.units }}"), + )) + {{ task_name }}.container.add_volume_mount(V1VolumeMount(mount_path="/dev/shm", name="shm")) +{% endif %} +{% if workflow_task.task_modifiers.crio_runtime %} + {{ task_name }}.add_volume(V1Volume( + name="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_name }}", + empty_dir=V1EmptyDirVolumeSource(medium="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_medium }}", size_limit="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_size }}"), + )) + {{ task_name }}.container.add_volume_mount(V1VolumeMount(mount_path="{{ workflow_task.task_modifiers.crio_runtime.emptydir_mount_path }}", name="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_name }}")) +{% endif %} +{# declare upstream dependencies -#} +{% if workflow_task.upstream_workflow_task_ids %} +{% for upstream_workflow_task_id in workflow_task.upstream_workflow_task_ids %} + {{ task_name }}.after(task_{{ upstream_workflow_task_id | python_safe }}) +{% endfor %} +{% endif %} +{% endfor %} + +if __name__ == "__main__": + from pathlib import Path +{% if workflow_engine.lower() == "tekton" %} + from kfp_tekton import compiler + + compiler.TektonCompiler().compile( +{% else %} + kfp.compiler.Compiler().compile( +{% endif %} + pipeline_func=generated_pipeline, + package_path=Path(__file__).with_suffix(".yaml").name, + ) \ No newline at end of file diff --git a/elyra/tests/cli/resources/pipelines/airflow.pipeline b/elyra/tests/cli/resources/pipelines/airflow.pipeline index f420a1471..e3f571ce9 100644 --- a/elyra/tests/cli/resources/pipelines/airflow.pipeline +++ b/elyra/tests/cli/resources/pipelines/airflow.pipeline @@ -24,20 +24,10 @@ "label": "", "ui_data": { "label": "hello.ipynb", - "image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2216%22%20viewBox%3D%220%200%2022%2022%22%3E%0A%20%20%3Cg%20class%3D%22jp-icon-warn0%20jp-icon-selectable%22%20fill%3D%22%23EF6C00%22%3E%0A%20%20%20%20%3Cpath%20d%3D%22M18.7%203.3v15.4H3.3V3.3h15.4m1.5-1.5H1.8v18.3h18.3l.1-18.3z%22%2F%3E%0A%20%20%20%20%3Cpath%20d%3D%22M16.5%2016.5l-5.4-4.3-5.6%204.3v-11h11z%22%2F%3E%0A%20%20%3C%2Fg%3E%0A%3C%2Fsvg%3E%0A", + "image": "/static/elyra/notebook.svg", "x_pos": 175, "y_pos": 110, - "description": "Run notebook file", - "decorations": [ - { - "id": "error", - "image": "data:image/svg+xml;utf8,%3Csvg%20focusable%3D%22false%22%20preserveAspectRatio%3D%22xMidYMid%20meet%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20fill%3D%22%23da1e28%22%20width%3D%2216%22%20height%3D%2216%22%20viewBox%3D%220%200%2016%2016%22%20aria-hidden%3D%22true%22%3E%3Ccircle%20cx%3D%228%22%20cy%3D%228%22%20r%3D%228%22%20fill%3D%22%23ffffff%22%3E%3C%2Fcircle%3E%3Cpath%20d%3D%22M8%2C1C4.2%2C1%2C1%2C4.2%2C1%2C8s3.2%2C7%2C7%2C7s7-3.1%2C7-7S11.9%2C1%2C8%2C1z%20M7.5%2C4h1v5h-1C7.5%2C9%2C7.5%2C4%2C7.5%2C4z%20M8%2C12.2%09c-0.4%2C0-0.8-0.4-0.8-0.8s0.3-0.8%2C0.8-0.8c0.4%2C0%2C0.8%2C0.4%2C0.8%2C0.8S8.4%2C12.2%2C8%2C12.2z%22%3E%3C%2Fpath%3E%3Cpath%20d%3D%22M7.5%2C4h1v5h-1C7.5%2C9%2C7.5%2C4%2C7.5%2C4z%20M8%2C12.2c-0.4%2C0-0.8-0.4-0.8-0.8s0.3-0.8%2C0.8-0.8%09c0.4%2C0%2C0.8%2C0.4%2C0.8%2C0.8S8.4%2C12.2%2C8%2C12.2z%22%20data-icon-path%3D%22inner-path%22%20opacity%3D%220%22%3E%3C%2Fpath%3E%3C%2Fsvg%3E", - "outline": false, - "position": "topRight", - "x_pos": -24, - "y_pos": -8 - } - ] + "description": "Run notebook file" } }, "inputs": [ @@ -74,10 +64,10 @@ "ui_data": { "comments": [] }, - "version": 7, + "version": 8, "runtime_type": "APACHE_AIRFLOW", "properties": { - "name": "untitled", + "name": "airflow", "runtime": "Apache Airflow" } }, diff --git a/elyra/tests/cli/test_pipeline_app.py b/elyra/tests/cli/test_pipeline_app.py index 335c46777..be685c11a 100644 --- a/elyra/tests/cli/test_pipeline_app.py +++ b/elyra/tests/cli/test_pipeline_app.py @@ -17,6 +17,8 @@ import json from pathlib import Path import shutil +from typing import List +from typing import Union from click.testing import CliRunner from conftest import KFP_COMPONENT_CACHE_INSTANCE @@ -1010,6 +1012,14 @@ def prepare_export_work_dir(work_dir: str, source_dir: str): print(f"Work directory content: {list(Path(work_dir).glob('*'))}") +def copy_to_work_dir(work_dir: str, files: List[Union[str, Path]]) -> None: + """Copies the specified files to work_dir""" + for file in files: + if not isinstance(file, Path): + file = Path(file) + shutil.copy(file.as_posix(), work_dir) + + def test_export_invalid_runtime_config(): """Test user error scenarios: the specified runtime configuration is 'invalid'""" runner = CliRunner() @@ -1082,7 +1092,6 @@ def test_export_kubeflow_output_option( pipeline_file_path = cwd / pipeline_file # make sure the pipeline file exists assert pipeline_file_path.is_file() is True - print(f"Pipeline file: {pipeline_file_path}") # Test: '--output' not specified; exported file is created # in current directory and named like the pipeline file with @@ -1147,7 +1156,6 @@ def test_export_airflow_output_option(airflow_runtime_instance): pipeline_file_path = cwd / pipeline_file # make sure the pipeline file exists assert pipeline_file_path.is_file() is True - print(f"Pipeline file: {pipeline_file_path}") # # Test: '--output' not specified; exported file is created @@ -1155,7 +1163,6 @@ def test_export_airflow_output_option(airflow_runtime_instance): # a '.py' suffix # expected_output_file = pipeline_file_path.with_suffix(".py") - print(f"expected_output_file -> {expected_output_file}") do_mock_export(str(expected_output_file)) # this should fail: default output file already exists @@ -1272,6 +1279,110 @@ def test_export_kubeflow_overwrite_option( assert f"was exported to '{str(expected_output_file)}" in result.output, result.output +def test_export_airflow_format_option(airflow_runtime_instance): + """Verify that the '--format' option works as expected for Airflow""" + runner = CliRunner() + with runner.isolated_filesystem(): + cwd = Path.cwd().resolve() + # copy pipeline file and depencencies + resource_dir = Path(__file__).parent / "resources" / "pipelines" + copy_to_work_dir(str(cwd), [resource_dir / "airflow.pipeline", resource_dir / "hello.ipynb"]) + pipeline_file = "airflow.pipeline" + pipeline_file_path = cwd / pipeline_file + # make sure the pipeline file exists + assert pipeline_file_path.is_file() is True + + # Try supported formats + for supported_export_format_value in ["yaml", "py"]: + if supported_export_format_value: + expected_output_file = pipeline_file_path.with_suffix(f".{supported_export_format_value}") + else: + expected_output_file = pipeline_file_path.with_suffix(".py") + + # Make sure the output file doesn't exist yet + if expected_output_file.is_file(): + expected_output_file.unlink() + + # Try invalid format + for invalid_export_format_value in ["humpty", "dumpty"]: + options = [ + "export", + str(pipeline_file_path), + "--runtime-config", + airflow_runtime_instance, + "--format", + invalid_export_format_value, + ] + + # this should fail + result = runner.invoke(pipeline, options) + + assert result.exit_code == 2, result.output + assert "Invalid value for --format: Valid export formats are ['py']." in result.output, result.output + + +@pytest.mark.parametrize("catalog_instance_no_server_process", [KFP_COMPONENT_CACHE_INSTANCE], indirect=True) +def test_export_kubeflow_format_option( + jp_environ, kubeflow_pipelines_runtime_instance, catalog_instance_no_server_process +): + """Verify that the '--format' option works as expected for Kubeflow Pipelines""" + runner = CliRunner() + with runner.isolated_filesystem(): + cwd = Path.cwd().resolve() + # copy pipeline file and depencencies + prepare_export_work_dir(str(cwd), Path(__file__).parent / "resources" / "pipelines") + pipeline_file = "kfp_3_node_custom.pipeline" + pipeline_file_path = cwd / pipeline_file + # make sure the pipeline file exists + assert pipeline_file_path.is_file() is True + + # Try supported formats + for supported_export_format_value in [None, "py", "yaml"]: + if supported_export_format_value: + expected_output_file = pipeline_file_path.with_suffix(f".{supported_export_format_value}") + else: + expected_output_file = pipeline_file_path.with_suffix(".yaml") + + # Make sure the output file doesn't exist yet + if expected_output_file.is_file(): + expected_output_file.unlink() + + options = [ + "export", + str(pipeline_file_path), + "--runtime-config", + kubeflow_pipelines_runtime_instance, + ] + if supported_export_format_value: + options.append("--format") + options.append(supported_export_format_value) + + # this should succeed + result = runner.invoke(pipeline, options) + + assert result.exit_code == 0, result.output + assert f"was exported to '{str(expected_output_file)}" in result.output, result.output + + # Try invalid format + for invalid_export_format_value in ["humpty", "dumpty"]: + options = [ + "export", + str(pipeline_file_path), + "--runtime-config", + kubeflow_pipelines_runtime_instance, + "--format", + invalid_export_format_value, + ] + + # this should fail + result = runner.invoke(pipeline, options) + + assert result.exit_code == 2, result.output + assert ( + "Invalid value for --format: Valid export formats are ['yaml', 'py']." in result.output + ), result.output + + # ------------------------------------------------------------------ # end tests for 'export' command # ------------------------------------------------------------------ diff --git a/elyra/tests/kfp/test_operator.py b/elyra/tests/kfp/test_operator.py deleted file mode 100644 index ff046df90..000000000 --- a/elyra/tests/kfp/test_operator.py +++ /dev/null @@ -1,567 +0,0 @@ -# -# Copyright 2018-2022 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import string - -from kfp.dsl import RUN_ID_PLACEHOLDER -import pytest - -from elyra.kfp.operator import ExecuteFileOp - - -def test_fail_without_cos_endpoint(): - with pytest.raises(TypeError): - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_fail_without_cos_bucket(): - with pytest.raises(TypeError): - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_fail_without_cos_directory(): - with pytest.raises(TypeError): - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_fail_without_cos_dependencies_archive(): - with pytest.raises(TypeError): - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - image="test/image:dev", - ) - - -def test_fail_without_runtime_image(): - with pytest.raises(ValueError) as error_info: - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - ) - assert "You need to provide an image." == str(error_info.value) - - -def test_fail_without_notebook(): - with pytest.raises(TypeError): - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_fail_without_name(): - with pytest.raises(TypeError): - ExecuteFileOp( - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_fail_with_empty_string_as_name(): - with pytest.raises(ValueError): - ExecuteFileOp( - name="", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_fail_with_empty_string_as_notebook(): - with pytest.raises(ValueError) as error_info: - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - assert "You need to provide a notebook." == str(error_info.value) - - -def test_fail_without_pipeline_name(): - with pytest.raises(TypeError): - ExecuteFileOp( - name="test", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_fail_without_experiment_name(): - with pytest.raises(TypeError): - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - - -def test_properly_set_notebook_name_when_in_subdirectory(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="foo/test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - assert "test_notebook.ipynb" == notebook_op.notebook_name - - -def test_properly_set_python_script_name_when_in_subdirectory(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="foo/test.py", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - assert "test.py" == notebook_op.notebook_name - - -def test_user_crio_volume_creation(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - emptydir_volume_size="20Gi", - ) - assert notebook_op.emptydir_volume_size == "20Gi" - assert notebook_op.container_work_dir_root_path == "/opt/app-root/src/" - assert notebook_op.container.volume_mounts.__len__() == 1 - # Environment variables: PYTHONPATH, ELYRA_RUN_NAME - assert notebook_op.container.env.__len__() == 2, notebook_op.container.env - - -def test_override_bootstrap_url(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - bootstrap_script_url="https://test.server.com/bootscript.py", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - assert notebook_op.bootstrap_script_url == "https://test.server.com/bootscript.py" - - -def test_override_requirements_url(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - requirements_url="https://test.server.com/requirements.py", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - image="test/image:dev", - ) - assert notebook_op.requirements_url == "https://test.server.com/requirements.py" - - -def test_construct_with_both_pipeline_inputs_and_outputs(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_inputs=["test_input1.txt", "test_input2.txt"], - pipeline_outputs=["test_output1.txt", "test_output2.txt"], - image="test/image:dev", - ) - assert notebook_op.pipeline_inputs == ["test_input1.txt", "test_input2.txt"] - assert notebook_op.pipeline_outputs == ["test_output1.txt", "test_output2.txt"] - - assert '--inputs "test_input1.txt;test_input2.txt"' in notebook_op.container.args[0] - assert '--outputs "test_output1.txt;test_output2.txt"' in notebook_op.container.args[0] - - -def test_construct_wildcard_outputs(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_inputs=["test_input1.txt", "test_input2.txt"], - pipeline_outputs=["test_out*", "foo.tar"], - image="test/image:dev", - ) - assert notebook_op.pipeline_inputs == ["test_input1.txt", "test_input2.txt"] - assert notebook_op.pipeline_outputs == ["test_out*", "foo.tar"] - - assert '--inputs "test_input1.txt;test_input2.txt"' in notebook_op.container.args[0] - assert '--outputs "test_out*;foo.tar"' in notebook_op.container.args[0] - - -def test_construct_with_only_pipeline_inputs(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_inputs=["test_input1.txt", "test,input2.txt"], - pipeline_outputs=[], - image="test/image:dev", - ) - assert notebook_op.pipeline_inputs == ["test_input1.txt", "test,input2.txt"] - assert '--inputs "test_input1.txt;test,input2.txt"' in notebook_op.container.args[0] - - -def test_construct_with_bad_pipeline_inputs(): - with pytest.raises(ValueError) as error_info: - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_inputs=["test_input1.txt", "test;input2.txt"], - pipeline_outputs=[], - image="test/image:dev", - ) - assert "Illegal character (;) found in filename 'test;input2.txt'." == str(error_info.value) - - -def test_construct_with_only_pipeline_outputs(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_outputs=["test_output1.txt", "test,output2.txt"], - pipeline_envs={}, - image="test/image:dev", - ) - assert notebook_op.pipeline_outputs == ["test_output1.txt", "test,output2.txt"] - assert '--outputs "test_output1.txt;test,output2.txt"' in notebook_op.container.args[0] - - -def test_construct_with_bad_pipeline_outputs(): - with pytest.raises(ValueError) as error_info: - ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_outputs=["test_output1.txt", "test;output2.txt"], - image="test/image:dev", - ) - assert "Illegal character (;) found in filename 'test;output2.txt'." == str(error_info.value) - - -def test_construct_with_env_variables_argo(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"}, - image="test/image:dev", - ) - - confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"] - confirmation_values = ["1", "2", "3", RUN_ID_PLACEHOLDER] - for env_val in notebook_op.container.env: - assert env_val.name in confirmation_names - assert env_val.value in confirmation_values - confirmation_names.remove(env_val.name) - confirmation_values.remove(env_val.value) - - # Verify confirmation values have been drained. - assert len(confirmation_names) == 0 - assert len(confirmation_values) == 0 - - # same as before but explicitly specify the workflow engine type - # as Argo - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"}, - image="test/image:dev", - workflow_engine="Argo", - ) - - confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"] - confirmation_values = ["1", "2", "3", RUN_ID_PLACEHOLDER] - for env_val in notebook_op.container.env: - assert env_val.name in confirmation_names - assert env_val.value in confirmation_values - confirmation_names.remove(env_val.name) - confirmation_values.remove(env_val.value) - - # Verify confirmation values have been drained. - assert len(confirmation_names) == 0 - assert len(confirmation_values) == 0 - - -def test_construct_with_env_variables_tekton(): - notebook_op = ExecuteFileOp( - name="test", - pipeline_name="test-pipeline", - experiment_name="experiment-name", - notebook="test_notebook.ipynb", - cos_endpoint="http://testserver:32525", - cos_bucket="test_bucket", - cos_directory="test_directory", - cos_dependencies_archive="test_archive.tgz", - pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"}, - image="test/image:dev", - workflow_engine="Tekton", - ) - - confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"] - confirmation_values = ["1", "2", "3"] - field_path = "metadata.annotations['pipelines.kubeflow.org/run_name']" - for env_val in notebook_op.container.env: - assert env_val.name in confirmation_names - confirmation_names.remove(env_val.name) - if env_val.name == "ELYRA_RUN_NAME": - assert env_val.value_from.field_ref.field_path == field_path, env_val.value_from.field_ref - else: - assert env_val.value in confirmation_values - confirmation_values.remove(env_val.value) - - # Verify confirmation values have been drained. - assert len(confirmation_names) == 0 - assert len(confirmation_values) == 0 - - -def test_normalize_label_value(): - valid_middle_chars = "-_." - - # test min length - assert ExecuteFileOp._normalize_label_value(None) == "" - assert ExecuteFileOp._normalize_label_value("") == "" - # test max length (63) - assert ExecuteFileOp._normalize_label_value("a" * 63) == "a" * 63 - assert ExecuteFileOp._normalize_label_value("a" * 64) == "a" * 63 # truncated - # test first and last char - assert ExecuteFileOp._normalize_label_value("1") == "1" - assert ExecuteFileOp._normalize_label_value("22") == "22" - assert ExecuteFileOp._normalize_label_value("3_3") == "3_3" - assert ExecuteFileOp._normalize_label_value("4u4") == "4u4" - assert ExecuteFileOp._normalize_label_value("5$5") == "5_5" - - # test first char - for c in string.printable: - if c in string.ascii_letters + string.digits: - # first char is valid - # no length violation - assert ExecuteFileOp._normalize_label_value(c) == c - assert ExecuteFileOp._normalize_label_value(c + "B") == c + "B" - # max length - assert ExecuteFileOp._normalize_label_value(c + "B" * 62) == (c + "B" * 62) - # max length exceeded - assert ExecuteFileOp._normalize_label_value(c + "B" * 63) == (c + "B" * 62) # truncated - else: - # first char is invalid, e.g. '#a', and becomes the - # second char, which might require replacement - rv = c - if c not in valid_middle_chars: - rv = "_" - # no length violation - assert ExecuteFileOp._normalize_label_value(c) == "a" + rv + "a" - assert ExecuteFileOp._normalize_label_value(c + "B") == "a" + rv + "B" - # max length - assert ExecuteFileOp._normalize_label_value(c + "B" * 62) == ("a" + rv + "B" * 61) # truncated - # max length exceeded - assert ExecuteFileOp._normalize_label_value(c + "B" * 63) == ("a" + rv + "B" * 61) # truncated - - # test last char - for c in string.printable: - if c in string.ascii_letters + string.digits: - # no length violation - assert ExecuteFileOp._normalize_label_value("b" + c) == "b" + c - # max length - assert ExecuteFileOp._normalize_label_value("b" * 62 + c) == ("b" * 62 + c) - # max length exceeded - assert ExecuteFileOp._normalize_label_value("b" * 63 + c) == ("b" * 63) - else: - # last char is invalid, e.g. 'a#', and requires - # patching - rv = c - if c not in valid_middle_chars: - rv = "_" - # no length violation (char is appended) - assert ExecuteFileOp._normalize_label_value("b" + c) == "b" + rv + "a" - # max length (char is replaced) - assert ExecuteFileOp._normalize_label_value("b" * 62 + c) == ("b" * 62 + "a") - # max length exceeded (no action required) - assert ExecuteFileOp._normalize_label_value("b" * 63 + c) == ("b" * 63) - - # test first and last char - for c in string.printable: - if c in string.ascii_letters + string.digits: - # no length violation - assert ExecuteFileOp._normalize_label_value(c + "b" + c) == c + "b" + c # nothing is modified - # max length - assert ExecuteFileOp._normalize_label_value(c + "b" * 61 + c) == (c + "b" * 61 + c) # nothing is modified - # max length exceeded - assert ExecuteFileOp._normalize_label_value(c + "b" * 62 + c) == c + "b" * 62 # truncate only - else: - # first and last characters are invalid, e.g. '#a#' - rv = c - if c not in valid_middle_chars: - rv = "_" - # no length violation - assert ExecuteFileOp._normalize_label_value(c + "b" + c) == "a" + rv + "b" + rv + "a" - # max length - assert ExecuteFileOp._normalize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a") - # max length exceeded after processing, scenario 1 - # resolved by adding char before first, replace last - assert ExecuteFileOp._normalize_label_value(c + "b" * 60 + c) == ("a" + rv + "b" * 60 + "a") - # max length exceeded after processing, scenario 2 - # resolved by adding char before first, appending after last - assert ExecuteFileOp._normalize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a") - # max length exceeded before processing, scenario 1 - # resolved by adding char before first, truncating last - assert ExecuteFileOp._normalize_label_value(c + "b" * 62 + c) == ("a" + rv + "b" * 61) - # max length exceeded before processing, scenario 2 - # resolved by adding char before first, replacing last - assert ExecuteFileOp._normalize_label_value(c + "b" * 60 + c * 3) == ("a" + rv + "b" * 60 + "a") - - # test char in a position other than first and last - # if invalid, the char is replaced with '_' - for c in string.printable: - if c in string.ascii_letters + string.digits + "-_.": - assert ExecuteFileOp._normalize_label_value("A" + c + "Z") == "A" + c + "Z" - else: - assert ExecuteFileOp._normalize_label_value("A" + c + "Z") == "A_Z" - - # encore - assert ExecuteFileOp._normalize_label_value(r"¯\_(ツ)_/¯") == "a_________a" diff --git a/elyra/tests/pipeline/kfp/test_processor_kfp.py b/elyra/tests/pipeline/kfp/test_processor_kfp.py index 47c6c59e9..e37ad147b 100644 --- a/elyra/tests/pipeline/kfp/test_processor_kfp.py +++ b/elyra/tests/pipeline/kfp/test_processor_kfp.py @@ -13,38 +13,65 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from datetime import datetime +import hashlib +import json import os from pathlib import Path import re import tarfile +from typing import List +from typing import Union from unittest import mock -from kfp import compiler as kfp_argo_compiler +from kfp.dsl import RUN_ID_PLACEHOLDER import pytest import yaml from elyra.metadata.metadata import Metadata from elyra.pipeline.catalog_connector import FilesystemComponentCatalogConnector -from elyra.pipeline.catalog_connector import UrlComponentCatalogConnector from elyra.pipeline.component import Component from elyra.pipeline.component import ComponentParameter +from elyra.pipeline.component_parameter import CustomSharedMemorySize +from elyra.pipeline.component_parameter import DisableNodeCaching from elyra.pipeline.component_parameter import ElyraProperty +from elyra.pipeline.component_parameter import KubernetesAnnotation +from elyra.pipeline.component_parameter import KubernetesLabel +from elyra.pipeline.component_parameter import KubernetesSecret +from elyra.pipeline.component_parameter import KubernetesToleration +from elyra.pipeline.component_parameter import VolumeMount +from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_DEF_MEDIUM +from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_DEF_NAME +from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_DEF_SIZE +from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_MOUNT_PATH +from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_PYTHON_PATH +from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_WORKDIR_PATH from elyra.pipeline.kfp.processor_kfp import KfpPipelineProcessor +from elyra.pipeline.kfp.processor_kfp import WorkflowEngineType from elyra.pipeline.parser import PipelineParser from elyra.pipeline.pipeline import GenericOperation from elyra.pipeline.pipeline import Operation from elyra.pipeline.pipeline import Pipeline from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX +from elyra.pipeline.pipeline_constants import KUBERNETES_POD_ANNOTATIONS +from elyra.pipeline.pipeline_constants import KUBERNETES_POD_LABELS +from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS +from elyra.pipeline.pipeline_constants import KUBERNETES_SHARED_MEM_SIZE +from elyra.pipeline.pipeline_constants import KUBERNETES_TOLERATIONS +from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES from elyra.tests.pipeline.test_pipeline_parser import _read_pipeline_resource +from elyra.util.cos import join_paths +from elyra.util.kubernetes import sanitize_label_value - -ARCHIVE_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources", "archive") -PIPELINE_FILE_COMPLEX = "resources/sample_pipelines/pipeline_dependency_complex.json" +PIPELINE_FILE_COMPLEX = str((Path("resources") / "sample_pipelines" / "pipeline_dependency_complex.json").as_posix()) @pytest.fixture -def processor(setup_factory_data): - root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +def processor(setup_factory_data) -> KfpPipelineProcessor: + """ + Instantiate a process for Kubeflow Pipelines + """ + root_dir = str((Path(__file__).parent / "..").resolve()) processor = KfpPipelineProcessor(root_dir=root_dir) return processor @@ -73,13 +100,88 @@ def sample_metadata(): } -def test_fail_get_metadata_configuration_invalid_namespace(processor): +def kfp_runtime_config( + workflow_engine: WorkflowEngineType = WorkflowEngineType.ARGO, + use_cos_credentials_secret: bool = False, +) -> Metadata: + """ + Returns a KFP runtime config metadata entry, which meets the contraints + defined by the specified parameters + """ + + kfp_runtime_config = { + "display_name": "Mocked KFP runtime", + "schema_name": "kfp", + "metadata": { + "display_name": "Mocked KFP runtime", + "tags": [], + "user_namespace": "default", + "api_username": "user@example.com", + "api_password": "12341234", + "runtime_type": "KUBEFLOW_PIPELINES", + "api_endpoint": "http://examples.com:31737", + "cos_endpoint": "http://examples.com:31671", + "cos_bucket": "test", + }, + } + + if workflow_engine == WorkflowEngineType.TEKTON: + kfp_runtime_config["metadata"]["engine"] = "Tekton" + else: + kfp_runtime_config["metadata"]["engine"] = "Argo" + + if use_cos_credentials_secret: + kfp_runtime_config["metadata"]["cos_auth_type"] = "KUBERNETES_SECRET" + kfp_runtime_config["metadata"]["cos_username"] = "my_name" + kfp_runtime_config["metadata"]["cos_password"] = "my_password" + kfp_runtime_config["metadata"]["cos_secret"] = "secret-name" + else: + kfp_runtime_config["metadata"]["cos_auth_type"] = "USER_CREDENTIALS" + kfp_runtime_config["metadata"]["cos_username"] = "my_name" + kfp_runtime_config["metadata"]["cos_password"] = "my_password" + + return Metadata( + name=kfp_runtime_config["display_name"].lower().replace(" ", "_"), + display_name=kfp_runtime_config["display_name"], + schema_name=kfp_runtime_config["schema_name"], + metadata=kfp_runtime_config["metadata"], + ) + + +def test_WorkflowEngineType_get_instance_by_value(): + """ + Validate that method 'get_instance_by_value' yields the expected results for + valid and invalid input. + """ + # test valid inputs (the provided value is evalutaed in a case insensitive manner) + assert WorkflowEngineType.get_instance_by_value("argo") == WorkflowEngineType.ARGO + assert WorkflowEngineType.get_instance_by_value("ARGO") == WorkflowEngineType.ARGO + assert WorkflowEngineType.get_instance_by_value("aRGo") == WorkflowEngineType.ARGO + assert WorkflowEngineType.get_instance_by_value("Argo") == WorkflowEngineType.ARGO + assert WorkflowEngineType.get_instance_by_value("tekton") == WorkflowEngineType.TEKTON + assert WorkflowEngineType.get_instance_by_value("TEKTON") == WorkflowEngineType.TEKTON + assert WorkflowEngineType.get_instance_by_value("tEKtOn") == WorkflowEngineType.TEKTON + assert WorkflowEngineType.get_instance_by_value("Tekton") == WorkflowEngineType.TEKTON + # test invalid inputs + with pytest.raises(KeyError): + WorkflowEngineType.get_instance_by_value(None) # there is no default + with pytest.raises(KeyError): + WorkflowEngineType.get_instance_by_value("") # there is no default + with pytest.raises(KeyError): + WorkflowEngineType.get_instance_by_value(" argo ") # whitespaces are not trimmed + with pytest.raises(KeyError): + WorkflowEngineType.get_instance_by_value("bitcoin") + with pytest.raises(KeyError): + WorkflowEngineType.get_instance_by_value("ether") + + +def test_fail_get_metadata_configuration_invalid_namespace(processor: KfpPipelineProcessor): with pytest.raises(RuntimeError): processor._get_metadata_configuration(schemaspace="non_existent_namespace", name="non_existent_metadata") -def test_generate_dependency_archive(processor): - pipelines_test_file = os.path.join(ARCHIVE_DIR, "test.ipynb") +def test_generate_dependency_archive(processor: KfpPipelineProcessor): + pipelines_test_file = str((Path(__file__).parent / ".." / "resources" / "archive" / "test.ipynb").resolve()) pipeline_dependencies = ["airflow.json"] correct_filelist = ["test.ipynb", "airflow.json"] component_parameters = { @@ -107,7 +209,7 @@ def test_generate_dependency_archive(processor): assert sorted(correct_filelist) == sorted(tar_content) -def test_fail_generate_dependency_archive(processor): +def test_fail_generate_dependency_archive(processor: KfpPipelineProcessor): pipelines_test_file = "this/is/a/rel/path/test.ipynb" pipeline_dependencies = ["non_existent_file.json"] component_parameters = { @@ -127,7 +229,7 @@ def test_fail_generate_dependency_archive(processor): processor._generate_dependency_archive(test_operation) -def test_get_dependency_source_dir(processor): +def test_get_dependency_source_dir(processor: KfpPipelineProcessor): pipelines_test_file = "this/is/a/rel/path/test.ipynb" processor.root_dir = "/this/is/an/abs/path/" correct_filepath = "/this/is/an/abs/path/this/is/a/rel/path" @@ -145,7 +247,7 @@ def test_get_dependency_source_dir(processor): assert filepath == correct_filepath -def test_get_dependency_archive_name(processor): +def test_get_dependency_archive_name(processor: KfpPipelineProcessor): pipelines_test_file = "this/is/a/rel/path/test.ipynb" correct_filename = "test-this-is-a-test-id.tar.gz" component_parameters = {"filename": pipelines_test_file, "runtime_image": "tensorflow/tensorflow:latest"} @@ -162,7 +264,7 @@ def test_get_dependency_archive_name(processor): assert filename == correct_filename -def test_collect_envs(processor): +def test_collect_envs(processor: KfpPipelineProcessor): pipelines_test_file = "this/is/a/rel/path/test.ipynb" # add system-owned envs with bogus values to ensure they get set to system-derived values, @@ -212,7 +314,7 @@ def test_collect_envs(processor): assert "USER_NO_VALUE" not in envs -def test_process_list_value_function(processor): +def test_process_list_value_function(processor: KfpPipelineProcessor): # Test values that will be successfully converted to list assert processor._process_list_value("") == [] assert processor._process_list_value(None) == [] @@ -233,7 +335,7 @@ def test_process_list_value_function(processor): assert processor._process_list_value("'elem1', 'elem2'") == "'elem1', 'elem2'" -def test_process_dictionary_value_function(processor): +def test_process_dictionary_value_function(processor: KfpPipelineProcessor): # Test values that will be successfully converted to dictionary assert processor._process_dictionary_value("") == {} assert processor._process_dictionary_value(None) == {} @@ -283,121 +385,254 @@ def test_process_dictionary_value_function(processor): assert processor._process_dictionary_value(dict_as_str) == dict_as_str -def test_processing_url_runtime_specific_component(monkeypatch, processor, component_cache, sample_metadata, tmpdir): - # Define the appropriate reader for a URL-type component definition - kfp_supported_file_types = [".yaml"] - reader = UrlComponentCatalogConnector(kfp_supported_file_types) - - # Assign test resource location - url = ( - "https://raw.githubusercontent.com/elyra-ai/elyra/main/" - "elyra/tests/pipeline/resources/components/filter_text.yaml" - ) - - # Read contents of given path -- read_component_definition() returns a - # a dictionary of component definition content indexed by path - entry_data = reader.get_entry_data({"url": url}, {}) - component_definition = entry_data.definition - - properties = [ - ComponentParameter( - id="text", - name="Text", - json_data_type="string", - value="default", - description="Text to filter", - allowed_input_types=["file", "inputpath", "inputvalue"], - ), - ComponentParameter( - id="pattern", - name="Pattern", - json_data_type="string", - value=".*", - description="Pattern to filter on", - allowed_input_types=["file", "inputpath", "inputvalue"], - ), - ] - - # Instantiate a url-based component - component_id = "test_component" - component = Component( - id=component_id, - name="Filter text", - description="", - op="filter-text", - catalog_type="url-catalog", - component_reference={"url": url}, - definition=component_definition, - categories=[], - properties=properties, - ) - - # Fabricate the component cache to include single filename-based component for testing - component_cache._component_cache[processor._type.name] = { - "spoofed_catalog": {"components": {component_id: component}} - } - - # Construct hypothetical operation for component - operation_name = "Filter text test" - operation_params = { - "text": {"widget": "string", "value": "path/to/text.txt"}, - "pattern": {"widget": "string", "value": "hello"}, - } - operation = Operation( - id="filter-text-id", - type="execution_node", - classifier=component_id, - name=operation_name, - parent_operation_ids=[], - component_params=operation_params, - ) - - # Build a mock runtime config for use in _cc_pipeline - mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) - - mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) - monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) +def test_compose_container_command_args(processor: KfpPipelineProcessor): + """ + Verify that _compose_container_command_args yields the expected output for valid input + """ - # Construct single-operation pipeline - pipeline = Pipeline( - id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="filter_text.pipeline" + pipeline_name = "test pipeline" + cos_endpoint = "https://minio:9000" + cos_bucket = "test_bucket" + cos_directory = "a_dir" + cos_dependencies_archive = "dummy-notebook-0815.tar.gz" + filename = "dummy-notebook.ipynb" + + command_args = processor._compose_container_command_args( + pipeline_name=pipeline_name, + cos_endpoint=cos_endpoint, + cos_bucket=cos_bucket, + cos_directory=cos_directory, + cos_dependencies_archive=cos_dependencies_archive, + filename=filename, ) - pipeline.operations[operation.id] = operation + assert f"--pipeline-name '{pipeline_name}'" in command_args + assert f"--cos-endpoint '{cos_endpoint}'" in command_args + assert f"--cos-bucket '{cos_bucket}'" in command_args + assert f"--cos-directory '{cos_directory}'" in command_args + assert f"--cos-dependencies-archive '{cos_dependencies_archive}'" in command_args + assert f"--file '{filename}'" in command_args + + assert "--inputs" not in command_args + assert "--outputs" not in command_args + + # verify correct handling of file dependencies and file outputs + for file_dependency in [[], ["input_file.txt"], ["input_file.txt", "input_file_2.txt"]]: + for file_output in [[], ["output.csv"], ["output_1.csv", "output_2.pdf"]]: + command_args = processor._compose_container_command_args( + pipeline_name=pipeline_name, + cos_endpoint=cos_endpoint, + cos_bucket=cos_bucket, + cos_directory=cos_directory, + cos_dependencies_archive=cos_dependencies_archive, + filename=filename, + cos_inputs=file_dependency, + cos_outputs=file_output, + ) + + if len(file_dependency) < 1: + assert "--inputs" not in command_args + else: + assert f"--inputs '{';'.join(file_dependency)}'" in command_args + + if len(file_output) < 1: + assert "--outputs" not in command_args + else: + assert f"--outputs '{';'.join(file_output)}'" in command_args + + +def test_compose_container_command_args_invalid_dependency_filename(processor: KfpPipelineProcessor): + """ + Verify that _compose_container_command_args fails if one or more of the + specified input file dependencies contains the reserved separator character + """ - # Establish path and function to construct pipeline - pipeline_path = os.path.join(tmpdir, "kfp_test.yaml") - constructed_pipeline_function = lambda: processor._cc_pipeline(pipeline=pipeline, pipeline_name="test_pipeline") + pipeline_name = "test pipeline" + cos_endpoint = "https://minio:9000" + cos_bucket = "test_bucket" + cos_directory = "a_dir" + cos_dependencies_archive = "dummy-notebook-0815.tar.gz" + filename = "dummy-notebook.ipynb" + + reserved_separator_char = ";" + + for file_dependency in [ + [f"input_file{reserved_separator_char}txt"], + ["input_file.txt", f"input{reserved_separator_char}_file_2.txt"], + ]: + # identify invalid file dependency name + invalid_file_name = [file for file in file_dependency if reserved_separator_char in file][0] + for file_output in [[], ["output.csv"], ["output_1.csv", "output_2.pdf"]]: + with pytest.raises( + ValueError, + match=re.escape( + f"Illegal character ({reserved_separator_char}) found in filename '{invalid_file_name}'." + ), + ): + command_args = processor._compose_container_command_args( + pipeline_name=pipeline_name, + cos_endpoint=cos_endpoint, + cos_bucket=cos_bucket, + cos_directory=cos_directory, + cos_dependencies_archive=cos_dependencies_archive, + filename=filename, + cos_inputs=file_dependency, + cos_outputs=file_output, + ) + assert command_args is None + + +def test_add_disable_node_caching(processor: KfpPipelineProcessor): + """ + Verify that add_disable_node_caching updates the execution object as expected + """ + execution_object = {} + for instance in [ + DisableNodeCaching("True"), + DisableNodeCaching("False"), + ]: + processor.add_disable_node_caching(instance=instance, execution_object=execution_object) + assert execution_object.get("disable_node_caching") is instance.selection + assert len(execution_object.keys()) == 1 - # TODO Check against both argo and tekton compilations - # Compile pipeline and save into pipeline_path - kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path) - # Read contents of pipeline YAML - with open(pipeline_path) as f: - pipeline_yaml = yaml.safe_load(f.read()) +def test_add_custom_shared_memory_size(processor): + """ + Verify that add_custom_shared_memory_size updates the execution object as expected + """ + execution_object = {} + for instance in [CustomSharedMemorySize(None, None), CustomSharedMemorySize("", None)]: + processor.add_custom_shared_memory_size(instance=instance, execution_object=execution_object) + assert execution_object.get("kubernetes_shared_mem_size") is None + + for instance in [ + CustomSharedMemorySize("0.5", None), + CustomSharedMemorySize("3.14", "G"), + CustomSharedMemorySize("256", "M"), + ]: + processor.add_custom_shared_memory_size(instance=instance, execution_object=execution_object) + assert execution_object["kubernetes_shared_mem_size"]["size"] == instance.size + assert execution_object["kubernetes_shared_mem_size"]["units"] == instance.units + + +def test_add_kubernetes_secret(processor: KfpPipelineProcessor): + """ + Verify that add_kubernetes_secret updates the execution object as expected + """ + execution_object = {} + for instance in [ + KubernetesSecret("var", "secret_name", "secret_key"), + KubernetesSecret("var2", "secret_name", "secret_key"), + KubernetesSecret("var", "secret_name_2", "secret_key_2"), + ]: + processor.add_kubernetes_secret(instance=instance, execution_object=execution_object) + assert execution_object["kubernetes_secrets"][instance.env_var]["name"] == instance.name + assert execution_object["kubernetes_secrets"][instance.env_var]["key"] == instance.key - # Check the pipeline file contents for correctness - pipeline_template = pipeline_yaml["spec"]["templates"][0] - assert pipeline_template["metadata"]["annotations"]["pipelines.kubeflow.org/task_display_name"] == operation_name - assert pipeline_template["inputs"]["artifacts"][0]["raw"]["data"] == operation_params["text"] - assert pipeline_template["container"]["command"][4] == operation_params["pattern"] + # given above instances, there should be two entries in the modified execution_object + assert len(execution_object["kubernetes_secrets"].keys()) == 2 -def test_processing_filename_runtime_specific_component( - monkeypatch, processor, component_cache, sample_metadata, tmpdir +def test_add_mounted_volume(processor: KfpPipelineProcessor): + """ + Verify that add_mounted_volume updates the execution object as expected + """ + execution_object = {} + for instance in [ + VolumeMount("/mount/path", "test-pvc", None, None), + VolumeMount("/mount/path2", "test-pvc-2", None, True), + VolumeMount("/mount/path3", "test-pvc-3", None, False), + VolumeMount("/mount/path4", "test-pvc-4", "sub/path", True), + VolumeMount("/mount/path", "test-pvc", None, True), + ]: + processor.add_mounted_volume(instance=instance, execution_object=execution_object) + assert execution_object["kubernetes_volumes"][instance.path]["pvc_name"] == instance.pvc_name + assert execution_object["kubernetes_volumes"][instance.path]["sub_path"] == instance.sub_path + assert execution_object["kubernetes_volumes"][instance.path]["read_only"] == instance.read_only + + # given above instances, there should be four entries in the modified execution_object + assert len(execution_object["kubernetes_volumes"].keys()) == 4 + + +def test_add_kubernetes_pod_annotation(processor: KfpPipelineProcessor): + """ + Verify that add_kubernetes_pod_annotation updates the execution object as expected + """ + execution_object = {} + for instance in [ + KubernetesAnnotation("annotation-key", None), + KubernetesAnnotation("prefix/annotation-key-2", ""), + KubernetesAnnotation("annotation-key-3", "annotation value"), + KubernetesAnnotation("annotation-key-3", "another annotation value"), + ]: + processor.add_kubernetes_pod_annotation(instance=instance, execution_object=execution_object) + if instance.value is not None: + assert execution_object["pod_annotations"][instance.key] == instance.value + else: + assert execution_object["pod_annotations"][instance.key] == "" + + # given above instances, there should be three entries in the modified execution_object + assert len(execution_object["pod_annotations"].keys()) == 3 + + +def test_add_kubernetes_pod_label(processor: KfpPipelineProcessor): + """ + Verify that add_kubernetes_pod_label updates the execution object as expected + """ + execution_object = {} + for instance in [ + KubernetesLabel("label-key", None), + KubernetesLabel("label-key-2", ""), + KubernetesLabel("label-key-3", "label-value"), + KubernetesLabel("label-key-2", "a-different-label-value"), + ]: + processor.add_kubernetes_pod_label(instance=instance, execution_object=execution_object) + if instance.value is not None: + assert execution_object["pod_labels"][instance.key] == instance.value + else: + assert execution_object["pod_labels"][instance.key] == "" + + # given above instances, there should be three entries in the modified execution_object + assert len(execution_object["pod_labels"].keys()) == 3 + + +def test_add_kubernetes_toleration(processor: KfpPipelineProcessor): + """ + Verify that add_kubernetes_toleration updates the execution object as expected + """ + execution_object = {} + expected_unique_execution_object_entries = [] + for instance in [ + KubernetesToleration("toleration-key", "Exists", None, "NoExecute"), + KubernetesToleration("toleration-key", "Equals", 42, ""), + ]: + processor.add_kubernetes_toleration(instance=instance, execution_object=execution_object) + toleration_hash = hashlib.sha256( + f"{instance.key}::{instance.operator}::{instance.value}::{instance.effect}".encode() + ).hexdigest() + if toleration_hash not in expected_unique_execution_object_entries: + expected_unique_execution_object_entries.append(toleration_hash) + assert execution_object["kubernetes_tolerations"][toleration_hash]["key"] == instance.key + assert execution_object["kubernetes_tolerations"][toleration_hash]["value"] == instance.value + assert execution_object["kubernetes_tolerations"][toleration_hash]["operator"] == instance.operator + assert execution_object["kubernetes_tolerations"][toleration_hash]["effect"] == instance.effect + assert len(expected_unique_execution_object_entries) == len(execution_object["kubernetes_tolerations"].keys()) + + +def test_generate_pipeline_dsl_compile_pipeline_dsl_custom_component_pipeline( + processor: KfpPipelineProcessor, component_cache, tmpdir ): - # Define the appropriate reader for a filesystem-type component definition - kfp_supported_file_types = [".yaml"] - reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) + """ + Verify that _generate_pipeline_dsl and _compile_pipeline_dsl yield + the expected output for pipeline the includes a custom component + """ - # Assign test resource location - absolute_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "..", "resources", "components", "download_data.yaml") - ) + # load test component definition + component_def_path = Path(__file__).parent / ".." / "resources" / "components" / "download_data.yaml" # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path - entry_data = reader.get_entry_data({"path": absolute_path}, {}) + reader = FilesystemComponentCatalogConnector([".yaml"]) + entry_data = reader.get_entry_data({"path": str(component_def_path.absolute())}, {}) component_definition = entry_data.definition properties = [ @@ -424,10 +659,10 @@ def test_processing_filename_runtime_specific_component( component = Component( id=component_id, name="Download data", - description="", + description="download data from web", op="download-data", catalog_type="elyra-kfp-examples-catalog", - component_reference={"path": absolute_path}, + component_reference={"path": component_def_path.as_posix()}, definition=component_definition, properties=properties, categories=[], @@ -438,10 +673,14 @@ def test_processing_filename_runtime_specific_component( "spoofed_catalog": {"components": {component_id: component}} } - # Construct hypothetical operation for component + # Construct operation for component operation_name = "Download data test" operation_params = { - "url": {"widget": "file", "value": "resources/sample_pipelines/pipeline_valid.json"}, + "url": { + "widget": "string", + "value": "https://raw.githubusercontent.com/elyra-ai/examples/" + "main/pipelines/run-pipelines-on-kubeflow-pipelines/data/data.csv", + }, "curl_options": {"widget": "string", "value": "--location"}, } operation = Operation( @@ -453,176 +692,973 @@ def test_processing_filename_runtime_specific_component( component_params=operation_params, ) - # Build a mock runtime config for use in _cc_pipeline - mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) - - mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) - monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) - # Construct single-operation pipeline pipeline = Pipeline( - id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="download_data.pipeline" + id="pipeline-id", + name="code-gen-test-custom-components", + description="Test code generation for custom components", + runtime="kfp", + runtime_config="test", + source="download_data.pipeline", ) pipeline.operations[operation.id] = operation - # Establish path and function to construct pipeline - pipeline_path = os.path.join(tmpdir, "kfp_test.yaml") - constructed_pipeline_function = lambda: processor._cc_pipeline(pipeline=pipeline, pipeline_name="test_pipeline") + # generate Python DSL for the Argo workflow engine + generated_argo_dsl = processor._generate_pipeline_dsl( + pipeline=pipeline, pipeline_name=pipeline.name, workflow_engine=WorkflowEngineType.ARGO + ) + + assert generated_argo_dsl is not None + # Generated DSL includes workflow engine specific code in the _main_ function + assert "kfp.compiler.Compiler().compile(" in generated_argo_dsl + + compiled_argo_output_file = Path(tmpdir) / "compiled_kfp_test_argo.yaml" + + # make sure the output file does not exist (3.8+ use unlink("missing_ok=True")) + if compiled_argo_output_file.is_file(): + compiled_argo_output_file.unlink() - # TODO Check against both argo and tekton compilations - # Compile pipeline and save into pipeline_path - kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path) + # if the compiler discovers an issue with the generated DSL this call fails + processor._compile_pipeline_dsl( + dsl=generated_argo_dsl, + workflow_engine=WorkflowEngineType.ARGO, + output_file=compiled_argo_output_file.as_posix(), + pipeline_conf=None, + ) + + # verify that the output file exists + assert compiled_argo_output_file.is_file() - # Read contents of pipeline YAML - with open(pipeline_path) as f: - pipeline_yaml = yaml.safe_load(f.read()) + # verify the file content + with open(compiled_argo_output_file) as fh: + argo_spec = yaml.safe_load(fh.read()) - # Check the pipeline file contents for correctness - pipeline_template = pipeline_yaml["spec"]["templates"][0] - assert pipeline_template["metadata"]["annotations"]["pipelines.kubeflow.org/task_display_name"] == operation_name - assert pipeline_template["container"]["command"][3] == operation_params["url"] - assert '"doc_type": "pipeline"' in pipeline_template["container"]["command"][3] + assert "argoproj.io/" in argo_spec["apiVersion"] + pipeline_spec_annotations = json.loads(argo_spec["metadata"]["annotations"]["pipelines.kubeflow.org/pipeline_spec"]) + assert ( + pipeline_spec_annotations["name"] == pipeline.name + ), f"DSL input: {generated_argo_dsl}\nArgo output: {argo_spec}" + assert pipeline_spec_annotations["description"] == pipeline.description, pipeline_spec_annotations + + # generate Python DSL for the Tekton workflow engine + generated_tekton_dsl = processor._generate_pipeline_dsl( + pipeline=pipeline, pipeline_name=pipeline.name, workflow_engine=WorkflowEngineType.TEKTON + ) + assert generated_tekton_dsl is not None + # Generated DSL includes workflow engine specific code in the _main_ function + assert "compiler.TektonCompiler().compile(" in generated_tekton_dsl + + compiled_tekton_output_file = Path(tmpdir) / "compiled_kfp_test_tekton.yaml" + + # if the compiler discovers an issue with the generated DSL this call fails + processor._compile_pipeline_dsl( + dsl=generated_tekton_dsl, + workflow_engine=WorkflowEngineType.TEKTON, + output_file=compiled_tekton_output_file.as_posix(), + pipeline_conf=None, + ) -def test_cc_pipeline_component_no_input(monkeypatch, processor, component_cache, sample_metadata, tmpdir): + # verify that the output file exists + assert compiled_tekton_output_file.is_file() + + # verify the file content + with open(compiled_tekton_output_file) as fh: + tekton_spec = yaml.safe_load(fh.read()) + + assert "tekton.dev/" in tekton_spec["apiVersion"] + + +def load_and_patch_pipeline( + pipeline_filename: Union[str, Path], with_cos_object_prefix: bool = False +) -> Union[None, Pipeline]: """ - Verifies that cc_pipeline can handle KFP component definitions that don't - include any inputs + This utility function loads pipeline_filename and injects additional metadata, similar + to what is done when a pipeline is submitted. """ - # Define the appropriate reader for a filesystem-type component definition - kfp_supported_file_types = [".yaml"] - reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) - # Assign test resource location - cpath = (Path(__file__).parent / ".." / "resources" / "components" / "kfp_test_operator_no_inputs.yaml").resolve() - assert cpath.is_file() - cpath = str(cpath) + assert pipeline_filename is not None, "A pipeline filename is required." + + if not isinstance(pipeline_filename, Path): + pipeline_filename = Path(pipeline_filename) + + assert pipeline_filename.is_file(), f"Pipeline '{pipeline_filename}' does not exist." + + # load file content + with open(pipeline_filename, "r") as fh: + pipeline_json = json.loads(fh.read()) + + # This rudimentary implementation assumes that the provided file is a valid + # pipeline file, which contains a primary pipeline. + if len(pipeline_json["pipelines"]) > 0: + # Add runtime information + if pipeline_json["pipelines"][0]["app_data"].get("runtime", None) is None: + pipeline_json["pipelines"][0]["app_data"]["runtime"] = "Kubeflow Pipelines" + if pipeline_json["pipelines"][0]["app_data"].get("runtime_type", None) is None: + pipeline_json["pipelines"][0]["app_data"]["runtime_type"] = "KUBEFLOW_PIPELINES" + # Add the filename as pipeline source information + if pipeline_json["pipelines"][0]["app_data"].get("source", None) is None: + pipeline_json["pipelines"][0]["app_data"]["source"] = pipeline_filename.name + + if with_cos_object_prefix: + # Define a dummy COS prefix, if none is defined + if pipeline_json["pipelines"][0]["app_data"]["properties"].get("pipeline_defaults") is None: + pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"] = {} + if ( + pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"].get(COS_OBJECT_PREFIX) + is None + ): + pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"][ + COS_OBJECT_PREFIX + ] = "test/project" + else: + # Remove the prefix, if one is already defined + if pipeline_json["pipelines"][0]["app_data"]["properties"].get("pipeline_defaults") is not None: + pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"].pop( + COS_OBJECT_PREFIX, None + ) + + return PipelineParser().parse(pipeline_json=pipeline_json) + + +def generate_mocked_runtime_image_configurations( + pipeline: Pipeline, require_pull_secret: bool = False +) -> List[Metadata]: + """ + Generates mocked runtime configuration entries for each unique + runtime image that is referenced by the pipeline's generic nodes. + """ + if pipeline is None: + raise ValueError("Pipeline parameter is required") + mocked_runtime_image_configurations = [] + unique_image_names = [] + # Iterate through pipeline nodes, extract the container image references + # for all generic operations, and produce mocked runtime image configurations. + counter = 1 + for operation in pipeline.operations.values(): + if isinstance(operation, GenericOperation): + if operation.runtime_image not in unique_image_names: + name = f"mocked-image-{counter}" + m = { + "image_name": operation.runtime_image, + "pull_policy": "IfNotPresent", + } + if require_pull_secret: + m["pull_secret"] = f"{name.lower().replace(' ', '-')}-secret" + + mocked_runtime_image_configurations.append( + Metadata( + name=name, + display_name="test-image", + schema_name="runtime-image", + metadata=m, + ) + ) + unique_image_names.append(operation.runtime_image) + + return mocked_runtime_image_configurations + + +@pytest.mark.parametrize( + "kfp_runtime_config", + [ + kfp_runtime_config(workflow_engine=WorkflowEngineType.ARGO), + kfp_runtime_config(workflow_engine=WorkflowEngineType.TEKTON), + ], +) +def test_generate_pipeline_dsl_compile_pipeline_dsl_workflow_engine_test( + monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir +): + """ + This test validates the following: + - _generate_pipeline_dsl generates Python code for the supported workflow engines + - _compile_pipeline_dsl compiles the generated code using the workflow engine's compiler - # Read contents of given path -- read_component_definition() returns a - # a dictionary of component definition content indexed by path - entry_data = reader.get_entry_data({"path": cpath}, {}) - component_definition = entry_data.definition + This test does not validate that the output artifacts correctly reflect the test pipeline. + Other tests do that. + """ + workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"]) - # Instantiate a file-based component - component_id = "test-component" - component = Component( - id=component_id, - name="No input data", - description="", - op="no-input-data", - catalog_type="elyra-kfp-examples-catalog", - component_reference={"path": cpath}, - definition=component_definition, - properties=[], - categories=[], + # Any valid pipeline file can be used to run this test, as long as it includes at least one node. + test_pipeline_file = ( + Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline" ) + # Instantiate a pipeline object to make it easier to obtain the information + # needed to perform validation. + pipeline = load_and_patch_pipeline(test_pipeline_file, False) + assert pipeline is not None - # Fabricate the component cache to include single filename-based component for testing - component_cache._component_cache[processor._type.name] = { - "spoofed_catalog": {"components": {component_id: component}} - } + mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(pipeline) - # Construct hypothetical operation for component - operation_name = "no-input-test" - operation_params = {} - operation = Operation( - id="no-input-id", - type="execution_node", - classifier=component_id, - name=operation_name, - parent_operation_ids=[], - component_params=operation_params, + mock_side_effects = [kfp_runtime_config] + [mocked_runtime_image_configurations] + mocked_func = mock.Mock(return_value="default", side_effect=mock_side_effects) + monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) + monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True) + monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True) + + compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml") + compiled_output_file_name = str(compiled_output_file.absolute()) + + # generate Python DSL for the specified workflow engine + pipeline_version = f"{pipeline.name}-test-0" + pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}" + experiment_name = f"{pipeline.name}-test-0" + generated_dsl = processor._generate_pipeline_dsl( + pipeline=pipeline, + pipeline_name=pipeline.name, + workflow_engine=workflow_engine, + pipeline_version=pipeline_version, + pipeline_instance_id=pipeline_instance_id, + experiment_name=experiment_name, + ) + + # Check the workflow engine specific code in the generated DSL + if workflow_engine == WorkflowEngineType.TEKTON: + assert "from kfp_tekton import compiler" in generated_dsl, f"engine: {workflow_engine}\ndsl: {generated_dsl}" + assert "compiler.TektonCompiler().compile(" in generated_dsl + assert "kfp.compiler.Compiler().compile(" not in generated_dsl + else: + assert "from kfp_tekton import compiler" not in generated_dsl + assert "compiler.TektonCompiler().compile(" not in generated_dsl + assert "kfp.compiler.Compiler().compile(" in generated_dsl + + # Compile the generated Python DSL + processor._compile_pipeline_dsl( + dsl=generated_dsl, + workflow_engine=workflow_engine, + output_file=compiled_output_file_name, + pipeline_conf=None, + ) + + # Load compiled workflow + with open(compiled_output_file_name) as f: + workflow_spec = yaml.safe_load(f.read()) + + # Verify that the output is for the specified workflow engine + if workflow_engine == WorkflowEngineType.TEKTON: + assert "tekton.dev/" in workflow_spec["apiVersion"] + else: + assert "argoproj.io/" in workflow_spec["apiVersion"] + + +@pytest.mark.parametrize("use_cos_object_prefix", [True, False]) +@pytest.mark.parametrize( + "kfp_runtime_config", + [ + kfp_runtime_config(workflow_engine=WorkflowEngineType.ARGO, use_cos_credentials_secret=True), + kfp_runtime_config(workflow_engine=WorkflowEngineType.ARGO, use_cos_credentials_secret=False), + ], +) +def test_generate_pipeline_dsl_compile_pipeline_dsl_one_generic_node_pipeline_test_1( + monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, use_cos_object_prefix: bool, tmpdir +): + """ + This test validates that the output of _generate_pipeline_dsl and _compile_pipeline_dsl + yields the expected results for a generic node that has only the required inputs defined. + + This test covers: + - the Argo workflow engine + - runtime configurations that use cloud storage authentication types KUBERNETES_SECRET + and USER_CREDENTIALS (the generated code varies depending on the selected type) + + Other tests cover the scenarios where the user defined optional properties, + such as environment variables, Kubernetes labels, or data volumes. + """ + + workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"]) + + # The test pipeline should only include one generic node that has only the following + # required properties defined: + # - runtime image + test_pipeline_file = ( + Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline" ) + # Instantiate a pipeline object to make it easier to obtain the information + # needed to perform validation. + pipeline = load_and_patch_pipeline(test_pipeline_file, use_cos_object_prefix) + assert pipeline is not None + + # Make sure this is a one generic node pipeline + assert len(pipeline.operations.keys()) == 1 + assert isinstance(list(pipeline.operations.values())[0], GenericOperation) + # Use 'op' variable to access the operation + op = list(pipeline.operations.values())[0] - # Build a mock runtime config for use in _cc_pipeline - mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) + mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(pipeline) - mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) + mock_side_effects = [kfp_runtime_config] + [mocked_runtime_image_configurations] + mocked_func = mock.Mock(return_value="default", side_effect=mock_side_effects) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) + monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True) + monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True) - # Construct single-operation pipeline - pipeline = Pipeline( - id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="no_input.pipeline" + compiled_argo_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml") + compiled_argo_output_file_name = str(compiled_argo_output_file.absolute()) + + # generate Python DSL for the Argo workflow engine + pipeline_version = f"{pipeline.name}-0815" + pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}" + experiment_name = f"{pipeline.name}-0815" + generated_dsl = processor._generate_pipeline_dsl( + pipeline=pipeline, + pipeline_name=pipeline.name, + workflow_engine=workflow_engine, + pipeline_version=pipeline_version, + pipeline_instance_id=pipeline_instance_id, + experiment_name=experiment_name, ) - pipeline.operations[operation.id] = operation - constructed_pipeline_function = lambda: processor._cc_pipeline(pipeline=pipeline, pipeline_name="test_pipeline") - pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml") + # if the compiler discovers an issue with the generated DSL this call fails + processor._compile_pipeline_dsl( + dsl=generated_dsl, + workflow_engine=workflow_engine, + output_file=compiled_argo_output_file_name, + pipeline_conf=None, + ) - # Compile pipeline and save into pipeline_path - kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path) + # Load generated Argo workflow + with open(compiled_argo_output_file_name) as f: + argo_spec = yaml.safe_load(f.read()) + + # verify that this is an argo specification + assert "argoproj.io" in argo_spec["apiVersion"] + + pipeline_meta_annotations = json.loads(argo_spec["metadata"]["annotations"]["pipelines.kubeflow.org/pipeline_spec"]) + assert pipeline_meta_annotations["name"] == pipeline.name + assert pipeline_meta_annotations["description"] == pipeline.description + + # There should be two templates, one for the DAG and one for the generic node. + # Locate the one for the generic node and inspect its properties. + assert len(argo_spec["spec"]["templates"]) == 2 + if argo_spec["spec"]["templates"][0]["name"] == argo_spec["spec"]["entrypoint"]: + node_template = argo_spec["spec"]["templates"][1] + else: + node_template = argo_spec["spec"]["templates"][0] + + # Verify component definition information (see generic_component_definition_template.jinja2) + # - property 'name' + assert node_template["name"] == "run-a-file" + # - property 'implementation.container.command' + assert node_template["container"]["command"] == ["sh", "-c"] + # - property 'implementation.container.args' + # This is a CLOB, which we need to spot check. + assert isinstance(node_template["container"]["args"], list) and len(node_template["container"]["args"]) == 1 + # Check for things that must be in this CLOB: + # - the pipeline name + assert f"--pipeline-name '{pipeline.name}'" in node_template["container"]["args"][0] + # - the object storage endpoint that this node uses for file I/O + assert f"--cos-endpoint '{kfp_runtime_config.metadata['cos_endpoint']}'" in node_template["container"]["args"][0] + # - the object storage bucket name that this node uses for file I/O + assert f"--cos-bucket '{kfp_runtime_config.metadata['cos_bucket']}'" in node_template["container"]["args"][0] + # - the directory within that object storage bucket + if pipeline.pipeline_properties.get(COS_OBJECT_PREFIX): + expected_directory_value = join_paths(pipeline.pipeline_properties.get(COS_OBJECT_PREFIX), pipeline_instance_id) + assert f"--cos-directory '{expected_directory_value}' " in node_template["container"]["args"][0] + else: + assert f"--cos-directory '{pipeline_instance_id}" in node_template["container"]["args"][0] + # - the name of the archive in that directory + expected_archive_name = processor._get_dependency_archive_name(op) + assert f"--cos-dependencies-archive '{expected_archive_name}' " in node_template["container"]["args"][0] + # - the name of the file that this node processes, which is included in that archive + assert f"--file '{op.filename}'" in node_template["container"]["args"][0] + + # Check for things that should not be in this CLOB: + # - Since it's a one-node pipeline, the component cannot have any "--inputs", + # which are declared object storage output files from upstream components. + assert "--inputs" not in node_template["container"]["args"] + # - The component does not declare "--outputs", + # which are output files that need to be stored on object storage. + assert "--outputs" not in node_template["container"]["args"] + + # - property 'implementation.container.image' + assert node_template["container"]["image"] == op.runtime_image + # - property 'implementation.container.imagePullPolicy' + # The image pull policy is defined in the the runtime image + # configuration. Look it up and verified it is properly applied. + for runtime_image_config in mocked_runtime_image_configurations: + if runtime_image_config.metadata["image_name"] == op.runtime_image: + if runtime_image_config.metadata.get("pull_policy"): + assert node_template["container"]["imagePullPolicy"] == runtime_image_config.metadata["pull_policy"] + else: + assert node_template["container"].get("imagePullPolicy") is None + break + + # Verify Kubernetes labels and annotations that Elyra attaches to pods that + # execute generic nodes or custom nodes + if op.doc: + # only set if a comment is attached to the node + assert node_template["metadata"]["annotations"].get("elyra/node-user-doc") == op.doc + + # Verify Kubernetes labels and annotations that Elyra attaches to pods that + # execute generic nodes + assert node_template["metadata"]["annotations"]["elyra/node-file-name"] == op.filename + if pipeline.source: + assert node_template["metadata"]["annotations"]["elyra/pipeline-source"] == pipeline.source + assert node_template["metadata"]["labels"]["elyra/node-name"] == sanitize_label_value(op.name) + assert node_template["metadata"]["labels"]["elyra/node-type"] == sanitize_label_value("notebook-script") + assert node_template["metadata"]["labels"]["elyra/pipeline-name"] == sanitize_label_value(pipeline.name) + assert node_template["metadata"]["labels"]["elyra/pipeline-version"] == sanitize_label_value(pipeline_version) + assert node_template["metadata"]["labels"]["elyra/experiment-name"] == sanitize_label_value(experiment_name) + + # Verify environment variables that Elyra attaches to pods that + # execute generic nodes. All values are hard-coded in the template, with the + # exception of "AWS_ACCESS_KEY_ID" and "AWS_SECRET_ACCESS_KEY", + # which are derived from a Kubernetes secret, if the runtime configuration + # is configured to use one. + use_secret_for_cos_authentication = kfp_runtime_config.metadata["cos_auth_type"] == "KUBERNETES_SECRET" + + assert node_template["container"].get("env") is not None, node_template["container"] + for env_var in node_template["container"]["env"]: + if env_var["name"] == "ELYRA_RUNTIME_ENV": + assert env_var["value"] == "kfp" + elif env_var["name"] == "ELYRA_ENABLE_PIPELINE_INFO": + assert env_var["value"] == "True" + elif env_var["name"] == "ELYRA_WRITABLE_CONTAINER_DIR": + assert env_var["value"] == KfpPipelineProcessor.WCD + elif env_var["name"] == "ELYRA_RUN_NAME": + assert env_var["value"] == RUN_ID_PLACEHOLDER + elif env_var["name"] == "AWS_ACCESS_KEY_ID": + if use_secret_for_cos_authentication: + assert env_var["valueFrom"]["secretKeyRef"]["key"] == "AWS_ACCESS_KEY_ID" + assert env_var["valueFrom"]["secretKeyRef"]["name"] == kfp_runtime_config.metadata["cos_secret"] + else: + assert env_var["value"] == kfp_runtime_config.metadata["cos_username"] + elif env_var["name"] == "AWS_SECRET_ACCESS_KEY": + if use_secret_for_cos_authentication: + assert env_var["valueFrom"]["secretKeyRef"]["key"] == "AWS_SECRET_ACCESS_KEY" + assert env_var["valueFrom"]["secretKeyRef"]["name"] == kfp_runtime_config.metadata["cos_secret"] + else: + assert env_var["value"] == kfp_runtime_config.metadata["cos_password"] + + # Verify that the mlpipeline specific outputs are declared + assert node_template.get("outputs") is not None, node_template + assert node_template["outputs"]["artifacts"] is not None, node_template["container"]["outputs"] + assert node_template["outputs"]["artifacts"][0]["name"] == "mlpipeline-metrics" + assert ( + node_template["outputs"]["artifacts"][0]["path"] + == (Path(KfpPipelineProcessor.WCD) / "mlpipeline-metrics.json").as_posix() + ) + assert node_template["outputs"]["artifacts"][1]["name"] == "mlpipeline-ui-metadata" + assert ( + node_template["outputs"]["artifacts"][1]["path"] + == (Path(KfpPipelineProcessor.WCD) / "mlpipeline-ui-metadata.json").as_posix() + ) -@pytest.mark.parametrize("parsed_pipeline", [PIPELINE_FILE_COMPLEX], indirect=True) -def test_create_yaml_complex_pipeline(monkeypatch, processor, parsed_pipeline, sample_metadata, tmpdir): - pipeline_json = _read_pipeline_resource(PIPELINE_FILE_COMPLEX) +@pytest.fixture(autouse=False) +def enable_and_disable_crio(request): + """ + Set and unset the CRIO_RUNTIME environment variable, if requested + """ + # Define variable prior to the test + if request.param: + os.environ["CRIO_RUNTIME"] = "True" - # Ensure the value of COS_OBJECT_PREFIX has been propagated to the Pipeline object appropriately - cos_prefix = pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"].get(COS_OBJECT_PREFIX) - assert cos_prefix == parsed_pipeline.pipeline_properties.get(COS_OBJECT_PREFIX) + yield - # Build a mock runtime config for use in _cc_pipeline - mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) - # Build mock runtime images for use in _cc_pipeline - image_one_md = {"image_name": "tensorflow/tensorflow:2.0.0-py3", "pull_policy": "IfNotPresent", "tags": []} - image_two_md = {"image_name": "elyra/examples:1.0.0-py3", "pull_policy": "Always", "tags": []} - mocked_images = [ - Metadata(name="test-image-metadata", display_name="test-image", schema_name="kfp", metadata=image_one_md), - Metadata(name="test-image-metadata2", display_name="test-image2", schema_name="kfp", metadata=image_two_md), - ] + # Remove variable after the test + if request.param: + del os.environ["CRIO_RUNTIME"] + + +@pytest.mark.parametrize("enable_and_disable_crio", [False, True], indirect=True) +@pytest.mark.parametrize( + "kfp_runtime_config", + [ + kfp_runtime_config( + workflow_engine=WorkflowEngineType.ARGO, + ), + ], +) +def test_generate_pipeline_dsl_compile_pipeline_dsl_generic_component_crio( + monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir, enable_and_disable_crio +): + """ + This test validates that the output of _generate_pipeline_dsl and _compile_pipeline_dsl + yields the expected results for a generic node when the CRIO_RUNTIME environment variable + is set to a valid string representation of the boolean value True (/true/i). + Test assumptions: + - Enabling CRIO_RUNTIME has the same effect for all supported workflow engines + - The test pipeline contains at least one generic node + + With CRIO_RUNTIME enabled, the compiled output must include the following properties: + - in spec.templates[].volumes: + - emptyDir: {medium: '', sizeLimit: 20Gi} + name: workspace + """ + crio_runtime_enabled = os.environ.get("CRIO_RUNTIME", "").lower() == "true" + + workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"]) + + # Any valid pipeline file can be used to run this test, as long as it includes at least one generic node. + test_pipeline_file = ( + Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline" + ) + # Instantiate a pipeline object to make it easier to obtain the information + # needed to perform validation. + pipeline = load_and_patch_pipeline(pipeline_filename=test_pipeline_file, with_cos_object_prefix=False) + assert pipeline is not None + + mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations( + pipeline, + require_pull_secret=False, + ) + + assert kfp_runtime_config is not None + assert mocked_runtime_image_configurations is not None + + monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True) + monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True) + + # Test begins here + + compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml") + compiled_output_file_name = str(compiled_output_file.absolute()) - # Mock necessary functions (incl. side effects for each node) - mock_side_effects = [mocked_runtime] + [mocked_images for _ in range(len(pipeline_json["pipelines"][0]["nodes"]))] + # generate Python DSL for the specified workflow engine + pipeline_version = f"{pipeline.name}-test-0" + pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}" + experiment_name = f"{pipeline.name}-test-0" + + # Generate pipeline DSL; this requires the _get_metadata_configuration mock + monkeypatch.setattr( + processor, + "_get_metadata_configuration", + mock.Mock(return_value="default", side_effect=[kfp_runtime_config] + [mocked_runtime_image_configurations]), + ) + generated_dsl = processor._generate_pipeline_dsl( + pipeline=pipeline, + pipeline_name=pipeline.name, + workflow_engine=workflow_engine, + pipeline_version=pipeline_version, + pipeline_instance_id=pipeline_instance_id, + experiment_name=experiment_name, + ) + + # Compile the DSL + processor._compile_pipeline_dsl( + dsl=generated_dsl, + workflow_engine=workflow_engine, + output_file=compiled_output_file_name, + pipeline_conf=None, + ) + + # Load compiled workflow + with open(compiled_output_file_name) as f: + compiled_spec = yaml.safe_load(f.read()) + + # There should be multiple templates, one for the DAG and one for every generic node. + assert len(compiled_spec["spec"]["templates"]) >= 2 + if crio_runtime_enabled: + for template in compiled_spec["spec"]["templates"]: + if template["name"] == compiled_spec["spec"]["entrypoint"]: + continue + # Check volume definition + assert template.get("volumes") is not None, template + entry_found = False + for volume_entry in template["volumes"]: + if volume_entry["name"] != CRIO_VOL_DEF_NAME: + continue + assert ( + volume_entry.get("emptyDir") is not None + ), f"Unexpected volume entry '{CRIO_VOL_DEF_NAME}': {volume_entry} " + assert volume_entry["emptyDir"]["sizeLimit"] == CRIO_VOL_DEF_SIZE + assert volume_entry["emptyDir"]["medium"] == CRIO_VOL_DEF_MEDIUM + entry_found = True + assert entry_found, f"Missing volume entry '{CRIO_VOL_DEF_NAME}' for CRI-O in {template['volumes']}" + # Check volume mount definition + assert template["container"].get("volumeMounts") is not None, template["container"] + for volumemount_entry in template["container"]["volumeMounts"]: + entry_found = False + if volumemount_entry["name"] != CRIO_VOL_DEF_NAME: + continue + assert volumemount_entry["mountPath"] == CRIO_VOL_MOUNT_PATH + entry_found = True + break + assert ( + entry_found + ), f"Missing volume mount entry '{CRIO_VOL_DEF_NAME}' for CRI-O in {template['container']['volumeMounts']}" + # Check PYTHONPATH environment variable (python_user_lib_path) + assert template["container"].get("env") is not None, template["container"] + for env_entry in template["container"]["env"]: + entry_found = False + if env_entry["name"] != "PYTHONPATH": + continue + assert env_entry["value"] == CRIO_VOL_PYTHON_PATH + entry_found = True + break + assert entry_found, f"Missing env variable entry 'PYTHONPATH' for CRI-O in {template['container']['env']}" + # Check the container command argument list + assert len(template["container"]["args"]) == 1 + assert f"mkdir -p {CRIO_VOL_WORKDIR_PATH}" in template["container"]["args"][0] + assert f"--target={CRIO_VOL_PYTHON_PATH}" in template["container"]["args"][0] + assert f"--user-volume-path '{CRIO_VOL_PYTHON_PATH}' " in template["container"]["args"][0] + else: + for template in compiled_spec["spec"]["templates"]: + if template["name"] == compiled_spec["spec"]["entrypoint"]: + continue + # Check if a volume was defined + for volume_entry in template.get("volumes", []): + if volume_entry["name"] == CRIO_VOL_DEF_NAME: + # if a volume with the 'reserved' name exist there could be a problem + assert volume_entry.get("emptyDir") is None + # Check volume mount definition + for volumemount_entry in template["container"].get("volumeMounts", []): + if volumemount_entry["name"] == CRIO_VOL_DEF_NAME: + assert volumemount_entry["mountPath"] != CRIO_VOL_MOUNT_PATH + # Check PYTHONPATH environment variable + for env_entry in template["container"].get("env", []): + assert env_entry["name"] != "PYTHONPATH" + # Check the container command argument list + assert "mkdir -p ./jupyter-work-dir" in template["container"]["args"][0] + assert f"--target={CRIO_VOL_PYTHON_PATH}" not in template["container"]["args"][0] + assert "--user-volume-path" not in template["container"]["args"][0] + + +@pytest.mark.parametrize( + "kfp_runtime_config", + [ + kfp_runtime_config( + workflow_engine=WorkflowEngineType.ARGO, + ), + ], +) +def test_generate_pipeline_dsl_compile_pipeline_dsl_optional_elyra_properties( + monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir +): + """ + This test validates that the output of _generate_pipeline_dsl and _compile_pipeline_dsl + yields the expected results for a generic node that has optional user-provided properties + defined: + - data volumes + - shared memory size + - Kubernetes secrets + - Kubernetes labels + - Kubernetes annotations + - Kubernetes tolerations + """ + workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"]) + + # The test pipeline should only include one generic node that has the following optional + # user-specified properties defined: + # - data volumes + test_pipeline_file = ( + Path(__file__).parent + / ".." + / "resources" + / "test_pipelines" + / "kfp" + / "kfp-one-node-generic-elyra-properties.pipeline" + ) + # Instantiate a pipeline object to make it easier to obtain the information + # needed to perform validation. + pipeline = load_and_patch_pipeline(test_pipeline_file) + assert pipeline is not None + + # Make sure this is a one generic node pipeline + assert len(pipeline.operations.keys()) == 1 + assert isinstance(list(pipeline.operations.values())[0], GenericOperation) + # Use 'op' variable to access the operation + op = list(pipeline.operations.values())[0] + + mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(pipeline) + + mock_side_effects = [kfp_runtime_config] + [mocked_runtime_image_configurations] mocked_func = mock.Mock(return_value="default", side_effect=mock_side_effects) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True) - monkeypatch.setattr(processor, "_get_dependency_archive_name", lambda x: True) monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True) - inst_id = "test-instance-id" - pipeline_func = lambda: processor._cc_pipeline(parsed_pipeline, pipeline_name="test", pipeline_instance_id=inst_id) - pipeline_path = str(Path(tmpdir) / "complex_test.yaml") - - # Compile pipeline, save into pipeline_path, then read YAML - kfp_argo_compiler.Compiler().compile(pipeline_func, pipeline_path) - with open(pipeline_path) as f: - pipeline_yaml = yaml.safe_load(f.read()) - - def list_to_sorted_str(convert_list): - """Helper function to convert a list of files into a semicolon-separated sorted string""" - convert_str = "" - for item in convert_list: - convert_str += f"{item};" - return "".join(sorted(convert_str[:-1])) - - # Sort and clean node lists in preparation for direct comparison between YAML and JSON - pipeline_nodes = sorted(pipeline_json["pipelines"][0]["nodes"], key=lambda d: d["app_data"]["label"]) - yaml_nodes = [template for template in pipeline_yaml["spec"]["templates"] if template["name"] != "lambda"] - - for node_yaml, node_json in zip(yaml_nodes, pipeline_nodes): - # Check the each node for correctness - if "container" not in node_yaml or "args" not in node_yaml["container"]: - continue - - node_args = node_yaml["container"]["args"][0] - - # Check that COS values are the same for each node - assert f'--cos-directory "{cos_prefix}/{inst_id}"' in node_args - assert f"--cos-endpoint {sample_metadata['cos_endpoint']}" in node_args - assert f"--cos-bucket {sample_metadata['cos_bucket']}" in node_args - - component_parameters = node_json["app_data"]["component_parameters"] - assert f"--file \"{component_parameters.get('filename')}\"" in node_args # check filename - assert node_yaml["container"]["image"] == component_parameters.get("runtime_image") # check runtime image - - if component_parameters.get("inputs"): # check inputs - args_input = re.search(r' --inputs "([\w.;]+)" ', node_args) - assert list_to_sorted_str(component_parameters["inputs"]) in "".join(sorted(args_input[1])) - if component_parameters.get("outputs"): # check outputs - args_output = re.search(r' --outputs "([\w.;]+)" ', node_args) - assert list_to_sorted_str(component_parameters["outputs"]) in "".join(sorted(args_output[1])) - if component_parameters.get("env_vars"): # check env_vars - env_list_from_yaml = node_yaml["container"]["env"] - for var_dict in component_parameters["env_vars"]: - adjusted_var_dict = {"name": var_dict["env_var"], "value": var_dict["value"]} - assert adjusted_var_dict in env_list_from_yaml + # Test begins here + + compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml") + compiled_output_file_name = str(compiled_output_file.absolute()) + + # generate Python DSL + pipeline_version = f"{pipeline.name}-0815" + pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}" + experiment_name = f"{pipeline.name}-0815" + generated_dsl = processor._generate_pipeline_dsl( + pipeline=pipeline, + pipeline_name=pipeline.name, + workflow_engine=workflow_engine, + pipeline_version=pipeline_version, + pipeline_instance_id=pipeline_instance_id, + experiment_name=experiment_name, + ) + + # if the compiler discovers an issue with the generated DSL this call fails + processor._compile_pipeline_dsl( + dsl=generated_dsl, + workflow_engine=workflow_engine, + output_file=compiled_output_file_name, + pipeline_conf=None, + ) + + # Load compiled output + with open(compiled_output_file_name) as fh: + compiled_spec = yaml.safe_load(fh.read()) + + # There should be two templates, one for the DAG and one for the generic node. + # Locate the one for the generic node and inspect its properties. + assert len(compiled_spec["spec"]["templates"]) == 2 + if compiled_spec["spec"]["templates"][0]["name"] == compiled_spec["spec"]["entrypoint"]: + node_template = compiled_spec["spec"]["templates"][1] + else: + node_template = compiled_spec["spec"]["templates"][0] + + # + # validate data volumes, if applicable + expected_volume_mounts = op.elyra_params.get(MOUNTED_VOLUMES) + if len(expected_volume_mounts) > 0: + # There must be one or more 'volumeMounts' entry and one or more 'volumes' entry + assert node_template["container"].get("volumeMounts") is not None, node_template["container"] + assert node_template.get("volumes") is not None, compiled_spec["spec"] + + assert len(node_template["container"]["volumeMounts"]) >= len(expected_volume_mounts) + for volume_mount in expected_volume_mounts: + for volumemount_entry in node_template["container"]["volumeMounts"]: + entry_found = False + if volumemount_entry["mountPath"] == volume_mount.path: + assert volumemount_entry["name"] == volume_mount.pvc_name + assert volumemount_entry.get("subPath", None) == volume_mount.sub_path + assert volumemount_entry.get("readOnly", None) == volume_mount.read_only + entry_found = True + break + assert ( + entry_found + ), f"Cannot find volume mount entry '{volume_mount.path}' in {node_template['container']['volumeMounts']}" + for volume_entry in node_template["volumes"]: + entry_found = False + if volume_entry["name"] == volume_mount.pvc_name: + assert volume_entry["persistentVolumeClaim"]["claimName"] == volume_mount.pvc_name + entry_found = True + break + assert ( + entry_found + ), f"Cannot find volume entry '{volume_mount.path}' in {node_template['container']['volumeMounts']}" + + # + # validate custom shared memory size, if applicable + custom_shared_mem_size = op.elyra_params.get(KUBERNETES_SHARED_MEM_SIZE) + if custom_shared_mem_size: + # There must be one 'volumeMounts' entry and one 'volumes' entry + assert node_template["container"].get("volumeMounts") is not None, node_template["container"] + assert node_template.get("volumes") is not None, compiled_spec["spec"] + for volumemount_entry in node_template["container"]["volumeMounts"]: + entry_found = False + if volumemount_entry["mountPath"] == "/dev/shm": + assert volumemount_entry["name"] == "shm" + entry_found = True + break + assert ( + entry_found + ), "Missing volume mount entry for shared memory size in {node_template['container']['volumeMounts']}" + for volume_entry in node_template["volumes"]: + entry_found = False + if volume_entry["name"] == "shm": + assert volume_entry["emptyDir"]["medium"] == "Memory" + assert ( + volume_entry["emptyDir"]["sizeLimit"] + == f"{custom_shared_mem_size.size}{custom_shared_mem_size.units}" + ) + entry_found = True + break + assert ( + entry_found + ), f"Missing volume entry for shm size '{volume_mount.path}' in {node_template['container']['volumeMounts']}" + + # + # validate Kubernetes secrets, if applicable + expected_kubernetes_secrets = op.elyra_params.get(KUBERNETES_SECRETS) + if len(expected_kubernetes_secrets) > 0: + # There must be one or more 'env' entries + assert node_template["container"].get("env") is not None, node_template["container"] + for secret in expected_kubernetes_secrets: + for env_entry in node_template["container"]["env"]: + entry_found = False + if env_entry["name"] == secret.env_var: + assert env_entry["valueFrom"]["secretKeyRef"]["key"] == secret.key + assert env_entry["valueFrom"]["secretKeyRef"]["name"] == secret.name + entry_found = True + break + assert entry_found, f"Missing entry for secret '{secret.env_var}' in {node_template['container']['env']}" + + # Validate custom Kubernetes annotations + expected_kubernetes_annotations = op.elyra_params.get(KUBERNETES_POD_ANNOTATIONS) + if len(expected_kubernetes_annotations) > 0: + # There must be one or more 'metadata.annotations' entries + assert node_template["metadata"].get("annotations") is not None, node_template["metadata"] + for expected_annotation in expected_kubernetes_annotations: + assert expected_annotation.key in node_template["metadata"]["annotations"] + assert node_template["metadata"]["annotations"][expected_annotation.key] == ( + expected_annotation.value or "" + ) + + # + # Validate custom Kubernetes labels + expected_kubernetes_labels = op.elyra_params.get(KUBERNETES_POD_LABELS) + if len(expected_kubernetes_labels) > 0: + # There must be one or more 'metadata.labels' entries + assert node_template["metadata"].get("labels") is not None, node_template["metadata"] + for expected_label in expected_kubernetes_labels: + assert expected_label.key in node_template["metadata"]["labels"] + assert node_template["metadata"]["labels"][expected_label.key] == (expected_label.value or "") + + # + # Validate Kubernetes tolerations + # + # Validate custom Kubernetes tolerations + expected_kubernetes_tolerations = op.elyra_params.get(KUBERNETES_TOLERATIONS) + if len(expected_kubernetes_tolerations) > 0: + # There must be one or more 'tolerations' entries, e.g. + # {effect: NoExecute, key: kt1, operator: Equal, value: '3'} + assert node_template.get("tolerations") is not None, node_template + for expected_toleration in expected_kubernetes_tolerations: + entry_found = False + for toleration_entry in node_template["tolerations"]: + if ( + toleration_entry.get("key") == expected_toleration.key + and toleration_entry.get("operator") == expected_toleration.operator + and toleration_entry.get("value") == expected_toleration.value + and toleration_entry.get("effect") == expected_toleration.effect + ): + entry_found = True + break + not_found_msg = ( + "Missing toleration entry for '" + f"{expected_toleration.key}::{expected_toleration.operator}::" + f"{expected_toleration.value}::{expected_toleration.effect}'" + f"in {node_template['tolerations']}" + ) + assert entry_found, not_found_msg + + +@pytest.mark.parametrize( + "kfp_runtime_config", + [ + kfp_runtime_config( + workflow_engine=WorkflowEngineType.ARGO, + ), + ], +) +@pytest.mark.skip("TODO: implement test") +def test_generate_pipeline_dsl_compile_pipeline_dsl_generic_components_data_exchange( + monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir +): + """ + TODO Validate that code gen produces the expected artifacts if the pipeline contains + multiple generic nodes that are configured for data exchange + """ + assert False + + +@pytest.mark.parametrize( + "require_pull_secret", + [ + True, + False, + ], +) +@pytest.mark.parametrize( + "kfp_runtime_config", + [ + kfp_runtime_config( + workflow_engine=WorkflowEngineType.ARGO, + ), + ], +) +def test_generate_pipeline_dsl_compile_pipeline_dsl_generic_components_pipeline_conf( + monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, require_pull_secret: bool, tmpdir +): + """ + Validate that code gen produces the expected artifacts if the pipeline contains + generic nodes and associates runtime images are configured to require a pull secret. + The test results are not runtime type specific. + """ + workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"]) + + # Any valid pipeline file can be used to run this test, as long as it includes at least one node. + test_pipeline_file = ( + Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline" + ) + # Instantiate a pipeline object to make it easier to obtain the information + # needed to perform validation. + pipeline = load_and_patch_pipeline(pipeline_filename=test_pipeline_file, with_cos_object_prefix=False) + assert pipeline is not None + + mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations( + pipeline, + require_pull_secret=require_pull_secret, + ) + + assert kfp_runtime_config is not None + assert mocked_runtime_image_configurations is not None + + monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True) + monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True) + + # Test begins here + + compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml") + compiled_output_file_name = str(compiled_output_file.absolute()) + + # generate Python DSL for the specified workflow engine + pipeline_version = f"{pipeline.name}-test-0" + pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}" + experiment_name = f"{pipeline.name}-test-0" + + # Generate pipeline DSL; this requires the _get_metadata_configuration mock + monkeypatch.setattr( + processor, + "_get_metadata_configuration", + mock.Mock(return_value="default", side_effect=[kfp_runtime_config] + [mocked_runtime_image_configurations]), + ) + generated_dsl = processor._generate_pipeline_dsl( + pipeline=pipeline, + pipeline_name=pipeline.name, + workflow_engine=workflow_engine, + pipeline_version=pipeline_version, + pipeline_instance_id=pipeline_instance_id, + experiment_name=experiment_name, + ) + + # Generate pipeline configuration; this requires the _get_metadata_configuration mock + monkeypatch.setattr( + processor, + "_get_metadata_configuration", + mock.Mock(return_value="default", side_effect=[mocked_runtime_image_configurations]), + ) + pipeline_conf = processor._generate_pipeline_conf(pipeline=pipeline) + + processor._compile_pipeline_dsl( + dsl=generated_dsl, + workflow_engine=workflow_engine, + output_file=compiled_output_file_name, + pipeline_conf=pipeline_conf, + ) + + # Load compiled workflow + with open(compiled_output_file_name) as f: + compiled_spec = yaml.safe_load(f.read()) + + expected_image_pull_secret_names = [ + rti_config.metadata["pull_secret"] + for rti_config in mocked_runtime_image_configurations + if rti_config.metadata.get("pull_secret") is not None + ] + + if len(expected_image_pull_secret_names) > 0: + # There must be one or more spec.imagePullSecrets entries + assert compiled_spec["spec"].get("imagePullSecrets") is not None, compiled_spec["spec"] + # Verify that each expected secret is referenced + for expected_secret_name in expected_image_pull_secret_names: + entry_found = False + for secret_entry in compiled_spec["spec"]["imagePullSecrets"]: + if secret_entry.get("name") == expected_secret_name: + entry_found = True + break + assert entry_found, ( + f"Missing entry for image pull secret '{expected_secret_name}' " + f"in {compiled_spec['spec']['imagePullSecrets']}" + ) diff --git a/elyra/tests/pipeline/resources/test_pipelines/kfp/a-notebook.ipynb b/elyra/tests/pipeline/resources/test_pipelines/kfp/a-notebook.ipynb new file mode 100644 index 000000000..519af069e --- /dev/null +++ b/elyra/tests/pipeline/resources/test_pipelines/kfp/a-notebook.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b039f7dd-b768-4bdb-9b47-f803c409aa77", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic-elyra-properties.pipeline b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic-elyra-properties.pipeline new file mode 100644 index 000000000..d9b0db7ca --- /dev/null +++ b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic-elyra-properties.pipeline @@ -0,0 +1,156 @@ +{ + "doc_type": "pipeline", + "version": "3.0", + "json_schema": "http://api.dataplatform.ibm.com/schemas/common-pipeline/pipeline-flow/pipeline-flow-v3-schema.json", + "id": "elyra-auto-generated-pipeline", + "primary_pipeline": "primary", + "pipelines": [ + { + "id": "primary", + "nodes": [ + { + "id": "84d22396-568f-4c06-8558-95f715bba023", + "type": "execution_node", + "op": "execute-notebook-node", + "app_data": { + "component_parameters": { + "dependencies": [], + "include_subdirectories": false, + "outputs": [], + "env_vars": [], + "kubernetes_pod_annotations": [ + { + "key": "anno-key-1", + "value": "anno-value-1" + }, + { + "key": "anno-key-without-value-2" + } + ], + "kubernetes_pod_labels": [ + { + "key": "label-key-without-value-1" + }, + { + "key": "label-key-2", + "value": "label-value-2" + } + ], + "kubernetes_secrets": [ + { + "env_var": "secret_env_var_1", + "name": "secret-1", + "key": "secret-key-1" + } + ], + "kubernetes_shared_mem_size": { + "size": 0.5 + }, + "kubernetes_tolerations": [ + { + "key": "kt1", + "operator": "Equal", + "value": "3", + "effect": "NoExecute" + }, + { + "key": "kt2", + "operator": "Exists", + "effect": "NoSchedule" + }, + { + "operator": "Exists" + }, + { + "key": "kt3", + "operator": "Equal", + "value": "v3" + } + ], + "mounted_volumes": [ + { + "path": "/test/vol1", + "pvc_name": "test-pvc-1", + "read_only": false + }, + { + "path": "/test/vol2", + "pvc_name": "test-pvc-2", + "sub_path": "sub/path", + "read_only": false + }, + { + "path": "/test/vol3", + "pvc_name": "test-pvc-3", + "sub_path": "sub/path", + "read_only": true + } + ], + "filename": "a-notebook.ipynb", + "runtime_image": "tensorflow/tensorflow:2.8.0" + }, + "label": "", + "ui_data": { + "label": "a-notebook.ipynb", + "image": "/static/elyra/notebook.svg", + "x_pos": 183, + "y_pos": 77, + "description": "Run notebook file" + } + }, + "inputs": [ + { + "id": "inPort", + "app_data": { + "ui_data": { + "cardinality": { + "min": 0, + "max": -1 + }, + "label": "Input Port" + } + } + } + ], + "outputs": [ + { + "id": "outPort", + "app_data": { + "ui_data": { + "cardinality": { + "min": 0, + "max": -1 + }, + "label": "Output Port" + } + } + } + ] + } + ], + "app_data": { + "ui_data": { + "comments": [] + }, + "version": 8, + "runtime_type": "KUBEFLOW_PIPELINES", + "properties": { + "pipeline_defaults": { + "kubernetes_pod_annotations": [], + "kubernetes_shared_mem_size": {}, + "kubernetes_tolerations": [], + "kubernetes_pod_labels": [], + "mounted_volumes": [], + "env_vars": [], + "kubernetes_secrets": [] + }, + "name": "kfp-one-node-generic-elyra-properties", + "runtime": "Kubeflow Pipelines", + "description": "Test pipeline for generic nodes and Elyra properties" + } + }, + "runtime_ref": "" + } + ], + "schemas": [] +} \ No newline at end of file diff --git a/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic.pipeline b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic.pipeline new file mode 100644 index 000000000..bb60fe67d --- /dev/null +++ b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic.pipeline @@ -0,0 +1,109 @@ +{ + "doc_type": "pipeline", + "version": "3.0", + "json_schema": "http://api.dataplatform.ibm.com/schemas/common-pipeline/pipeline-flow/pipeline-flow-v3-schema.json", + "id": "elyra-auto-generated-pipeline", + "primary_pipeline": "primary", + "pipelines": [ + { + "id": "primary", + "nodes": [ + { + "id": "d3cbeeec-0e4f-4032-8318-4500fb9aa352", + "type": "execution_node", + "op": "execute-notebook-node", + "app_data": { + "component_parameters": { + "dependencies": [], + "include_subdirectories": false, + "outputs": [], + "env_vars": [], + "kubernetes_pod_annotations": [], + "kubernetes_pod_labels": [], + "kubernetes_secrets": [], + "kubernetes_shared_mem_size": {}, + "kubernetes_tolerations": [], + "mounted_volumes": [], + "filename": "a-notebook.ipynb" + }, + "label": "", + "ui_data": { + "label": "a-notebook.ipynb", + "image": "/static/elyra/notebook.svg", + "x_pos": 186, + "y_pos": 109, + "description": "Run notebook file" + } + }, + "inputs": [ + { + "id": "inPort", + "app_data": { + "ui_data": { + "cardinality": { + "min": 0, + "max": -1 + }, + "label": "Input Port" + } + } + } + ], + "outputs": [ + { + "id": "outPort", + "app_data": { + "ui_data": { + "cardinality": { + "min": 0, + "max": -1 + }, + "label": "Output Port" + } + } + } + ] + } + ], + "app_data": { + "ui_data": { + "comments": [ + { + "id": "6a01b028-38aa-4c6c-9b52-0ffe914b7b6d", + "x_pos": 30, + "y_pos": 34, + "width": 175, + "height": 42, + "content": "test comment", + "associated_id_refs": [ + { + "node_ref": "d3cbeeec-0e4f-4032-8318-4500fb9aa352" + } + ] + } + ] + }, + "version": 8, + "runtime_type": "KUBEFLOW_PIPELINES", + "properties": { + "pipeline_defaults": { + "kubernetes_shared_mem_size": {}, + "kubernetes_tolerations": [], + "kubernetes_pod_labels": [], + "kubernetes_pod_annotations": [], + "mounted_volumes": [], + "kubernetes_secrets": [], + "env_vars": [], + "runtime_image": "tensorflow/tensorflow:2.8.0", + "cos_object_prefix": "my/project" + }, + "name": "kfp-one-node-generic", + "runtime": "Kubeflow Pipelines", + "description": "A Kubeflow Pipelines pipeline that contains one generic node" + } + }, + "runtime_ref": "" + } + ], + "schemas": [] +} \ No newline at end of file diff --git a/elyra/tests/util/test_kubernetes.py b/elyra/tests/util/test_kubernetes.py index ee19204fd..2215bfad1 100644 --- a/elyra/tests/util/test_kubernetes.py +++ b/elyra/tests/util/test_kubernetes.py @@ -13,11 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import string + from elyra.util.kubernetes import is_valid_annotation_key from elyra.util.kubernetes import is_valid_annotation_value from elyra.util.kubernetes import is_valid_kubernetes_resource_name from elyra.util.kubernetes import is_valid_label_key from elyra.util.kubernetes import is_valid_label_value +from elyra.util.kubernetes import sanitize_label_value def test_is_valid_kubernetes_resource_name_invalid_input(): @@ -226,3 +229,109 @@ def test_is_valid_annotation_value_valid_input(): assert is_valid_annotation_value(value="l_4") assert is_valid_annotation_value(value="4-you") assert is_valid_annotation_value(value="You.2") + + +def test_sanitize_label_value(): + valid_middle_chars = "-_." + + # test min length + assert sanitize_label_value(None) == "" + assert sanitize_label_value("") == "" + # test max length (63) + assert sanitize_label_value("a" * 63) == "a" * 63 + assert sanitize_label_value("a" * 64) == "a" * 63 # truncated + # test first and last char + assert sanitize_label_value("1") == "1" + assert sanitize_label_value("22") == "22" + assert sanitize_label_value("3_3") == "3_3" + assert sanitize_label_value("4u4") == "4u4" + assert sanitize_label_value("5$5") == "5_5" + + # test first char + for c in string.printable: + if c in string.ascii_letters + string.digits: + # first char is valid + # no length violation + assert sanitize_label_value(c) == c + assert sanitize_label_value(c + "B") == c + "B" + # max length + assert sanitize_label_value(c + "B" * 62) == (c + "B" * 62) + # max length exceeded + assert sanitize_label_value(c + "B" * 63) == (c + "B" * 62) # truncated + else: + # first char is invalid, e.g. '#a', and becomes the + # second char, which might require replacement + rv = c + if c not in valid_middle_chars: + rv = "_" + # no length violation + assert sanitize_label_value(c) == "a" + rv + "a" + assert sanitize_label_value(c + "B") == "a" + rv + "B" + # max length + assert sanitize_label_value(c + "B" * 62) == ("a" + rv + "B" * 61) # truncated + # max length exceeded + assert sanitize_label_value(c + "B" * 63) == ("a" + rv + "B" * 61) # truncated + + # test last char + for c in string.printable: + if c in string.ascii_letters + string.digits: + # no length violation + assert sanitize_label_value("b" + c) == "b" + c + # max length + assert sanitize_label_value("b" * 62 + c) == ("b" * 62 + c) + # max length exceeded + assert sanitize_label_value("b" * 63 + c) == ("b" * 63) + else: + # last char is invalid, e.g. 'a#', and requires + # patching + rv = c + if c not in valid_middle_chars: + rv = "_" + # no length violation (char is appended) + assert sanitize_label_value("b" + c) == "b" + rv + "a" + # max length (char is replaced) + assert sanitize_label_value("b" * 62 + c) == ("b" * 62 + "a") + # max length exceeded (no action required) + assert sanitize_label_value("b" * 63 + c) == ("b" * 63) + + # test first and last char + for c in string.printable: + if c in string.ascii_letters + string.digits: + # no length violation + assert sanitize_label_value(c + "b" + c) == c + "b" + c # nothing is modified + # max length + assert sanitize_label_value(c + "b" * 61 + c) == (c + "b" * 61 + c) # nothing is modified + # max length exceeded + assert sanitize_label_value(c + "b" * 62 + c) == c + "b" * 62 # truncate only + else: + # first and last characters are invalid, e.g. '#a#' + rv = c + if c not in valid_middle_chars: + rv = "_" + # no length violation + assert sanitize_label_value(c + "b" + c) == "a" + rv + "b" + rv + "a" + # max length + assert sanitize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a") + # max length exceeded after processing, scenario 1 + # resolved by adding char before first, replace last + assert sanitize_label_value(c + "b" * 60 + c) == ("a" + rv + "b" * 60 + "a") + # max length exceeded after processing, scenario 2 + # resolved by adding char before first, appending after last + assert sanitize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a") + # max length exceeded before processing, scenario 1 + # resolved by adding char before first, truncating last + assert sanitize_label_value(c + "b" * 62 + c) == ("a" + rv + "b" * 61) + # max length exceeded before processing, scenario 2 + # resolved by adding char before first, replacing last + assert sanitize_label_value(c + "b" * 60 + c * 3) == ("a" + rv + "b" * 60 + "a") + + # test char in a position other than first and last + # if invalid, the char is replaced with '_' + for c in string.printable: + if c in string.ascii_letters + string.digits + "-_.": + assert sanitize_label_value("A" + c + "Z") == "A" + c + "Z" + else: + assert sanitize_label_value("A" + c + "Z") == "A_Z" + + # encore + assert sanitize_label_value(r"¯\_(ツ)_/¯") == "a_________a" diff --git a/elyra/util/kubernetes.py b/elyra/util/kubernetes.py index a3d483288..e97f6370e 100644 --- a/elyra/util/kubernetes.py +++ b/elyra/util/kubernetes.py @@ -14,6 +14,7 @@ # limitations under the License. # import re +import string def is_valid_kubernetes_resource_name(name: str) -> bool: @@ -145,3 +146,60 @@ def is_valid_label_value(value: str) -> bool: return False return re.match(r"^[a-zA-Z0-9]([-_\.A-Za-z0-9]*[a-zA-Z0-9])*$", value) is not None + + +def sanitize_label_value(value: str) -> str: + """Produce a Kubernetes-compliant label value + + Valid label values must be 63 characters or less and + must be empty or begin and end with an alphanumeric + character ([a-z0-9A-Z]) with dashes (-), underscores + (_), dots (.), and alphanumerics between. + """ + + if value is None or len(value) == 0: + return "" # nothing to do + + max_length = 63 + # This char is added at the front and/or back + # of value, if the first and/or last character + # is invalid. For example a value of "-abc" + # is converted to "a-abc". The specified character + # must meet the label value constraints. + valid_char = "a" + # This char is used to replace invalid characters + # that are in the "middle" of value. For example + # a value of "abc%def" is converted to "abc_def". + # The specified character must meet the label value + # constraints. + valid_middle_char = "_" + + # must begin with [0-9a-zA-Z] + valid_chars = string.ascii_letters + string.digits + if value[0] not in valid_chars: + value = valid_char + value + + value = value[:max_length] # enforce max length + + # must end with [0-9a-zA-Z] + if value[-1] not in valid_chars: + if len(value) <= max_length - 1: + # append valid character if max length + # would not be exceeded + value = value + valid_char + else: + # replace with valid character + value = value[:-1] + valid_char + + # middle chars must be [0-9a-zA-Z\-_.] + valid_chars = valid_chars + "-_." + + newstr = "" + for c in range(len(value)): + if value[c] not in valid_chars: + newstr = newstr + valid_middle_char + else: + newstr = newstr + value[c] + value = newstr + + return value diff --git a/test_requirements.txt b/test_requirements.txt index a6b4ae4e2..fa9402cc1 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -7,3 +7,4 @@ pytest-tornasync pytest_virtualenv requests-mock requests-unixsocket +kfp-tekton diff --git a/tests/assets/helloworld.pipeline b/tests/assets/generic-test.pipeline similarity index 99% rename from tests/assets/helloworld.pipeline rename to tests/assets/generic-test.pipeline index c7ed16269..a8e03b375 100644 --- a/tests/assets/helloworld.pipeline +++ b/tests/assets/generic-test.pipeline @@ -130,7 +130,7 @@ }, "version": 8, "properties": { - "name": "helloworld", + "name": "generic-test", "runtime": "Generic" } }, diff --git a/tests/integration/pipeline.ts b/tests/integration/pipeline.ts index a21a10137..2ae4c9a68 100644 --- a/tests/integration/pipeline.ts +++ b/tests/integration/pipeline.ts @@ -16,11 +16,12 @@ describe('Pipeline Editor tests', () => { beforeEach(() => { - cy.deleteFile('helloworld.yaml'); + cy.deleteFile('generic-test.yaml'); // previously exported pipeline + cy.deleteFile('generic-test.py'); // previously exported pipeline cy.deleteFile('*.pipeline'); // delete pipeline files used for testing cy.bootstrapFile('invalid.pipeline'); - cy.bootstrapFile('helloworld.pipeline'); + cy.bootstrapFile('generic-test.pipeline'); cy.bootstrapFile('helloworld.ipynb'); cy.exec('jupyter trust build/cypress-tests/helloworld.ipynb'); cy.bootstrapFile('helloworld.py'); @@ -35,7 +36,8 @@ describe('Pipeline Editor tests', () => { cy.deleteFile('helloworld.py'); // delete python file used for testing cy.deleteFile('output.txt'); // delete output files generated by tests cy.deleteFile('*.pipeline'); // delete pipeline files used for testing - cy.deleteFile('helloworld.yaml'); + cy.deleteFile('generic-test.yaml'); // exported pipeline + cy.deleteFile('generic-test.py'); // exported pipeline cy.deleteFile('invalid.txt'); // delete complex test directories @@ -294,7 +296,7 @@ describe('Pipeline Editor tests', () => { it('should open notebook on double-clicking the node', () => { // Open a pipeline in root directory - cy.openFile('helloworld.pipeline'); + cy.openFile('generic-test.pipeline'); // Open notebook node with double-click cy.get('.common-canvas-drop-div').within(() => { @@ -327,7 +329,7 @@ describe('Pipeline Editor tests', () => { it('should open notebook from node right-click menu', () => { // Open a pipeline in root directory - cy.openFile('helloworld.pipeline'); + cy.openFile('generic-test.pipeline'); // Open notebook node with right-click menu cy.get('#jp-main-dock-panel').within(() => { @@ -425,11 +427,11 @@ describe('Pipeline Editor tests', () => { // }); // it('should run pipeline with env vars and output files', () => { - // cy.openFile('helloworld.pipeline'); + // cy.openFile('generic-test.pipeline'); // cy.findByRole('button', { name: /run pipeline/i }).click(); - // cy.findByLabelText(/pipeline name/i).should('have.value', 'helloworld'); + // cy.findByLabelText(/pipeline name/i).should('have.value', 'generic-test'); // cy.findByLabelText(/runtime platform/i).should( // 'have.value', // '__elyra_local__' @@ -461,11 +463,11 @@ describe('Pipeline Editor tests', () => { cy.findByText(/failed export:/i).should('be.visible'); }); - it('should export pipeline as yaml', () => { + it('should export KFP pipeline as yaml', () => { // Install runtime configuration cy.installRuntimeConfig({ type: 'kfp' }); - cy.openFile('helloworld.pipeline'); + cy.openFile('generic-test.pipeline'); // try to export valid pipeline cy.findByRole('button', { name: /export pipeline/i }).click(); @@ -492,14 +494,48 @@ describe('Pipeline Editor tests', () => { 'be.visible' ); - cy.readFile('build/cypress-tests/helloworld.yaml'); + cy.readFile('build/cypress-tests/generic-test.yaml'); }); - it('should export pipeline as python dsl', () => { + it('should export KFP pipeline as Python DSL', () => { + // Install runtime configuration + cy.installRuntimeConfig({ type: 'kfp' }); + + cy.openFile('generic-test.pipeline'); + + // try to export valid pipeline + cy.findByRole('button', { name: /export pipeline/i }).click(); + + // check label for generic pipeline + cy.get('.jp-Dialog-header').contains('Export pipeline'); + + cy.findByLabelText(/runtime platform/i).select('KUBEFLOW_PIPELINES'); + + cy.findByLabelText(/runtime configuration/i) + .select('kfp_test_runtime') + .should('have.value', 'kfp_test_runtime'); + + // Validate all export options are available + cy.findByLabelText(/export pipeline as/i) + .select('Python DSL') + .should('have.value', 'py'); + + // actual export requires minio + cy.contains('OK').click(); + + // validate job was executed successfully, this can take a while in ci + cy.findByText(/pipeline export succeeded/i, { timeout: 30000 }).should( + 'be.visible' + ); + + cy.readFile('build/cypress-tests/generic-test.py'); + }); + + it('should export Airflow pipeline as python dsl', () => { // Install runtime configuration cy.installRuntimeConfig({ type: 'airflow' }); - cy.openFile('helloworld.pipeline'); + cy.openFile('generic-test.pipeline'); // try to export valid pipeline cy.findByRole('button', { name: /export pipeline/i }).click(); @@ -513,7 +549,7 @@ describe('Pipeline Editor tests', () => { .select('airflow_test_runtime') .should('have.value', 'airflow_test_runtime'); - // overwrite existing helloworld.py file + // overwrite existing genric-test.py file cy.findByLabelText(/export pipeline as/i) .select('Airflow domain-specific language Python code') .should('have.value', 'py'); @@ -534,7 +570,7 @@ describe('Pipeline Editor tests', () => { }); it('should not leak properties when switching between nodes', () => { - cy.openFile('helloworld.pipeline'); + cy.openFile('generic-test.pipeline'); cy.get('#jp-main-dock-panel').within(() => { cy.findByText('helloworld.ipynb').rightclick(); @@ -600,7 +636,7 @@ describe('Pipeline Editor tests', () => { // Validate all export options are available cy.findByRole('button', { name: /export pipeline/i }).click(); cy.findByRole('option', { name: /yaml/i }).should('have.value', 'yaml'); - cy.findByRole('option', { name: /python/i }).should('not.exist'); + cy.findByRole('option', { name: /python/i }).should('have.value', 'py'); // Dismiss dialog cy.findByRole('button', { name: /cancel/i }).click(); @@ -671,7 +707,7 @@ describe('Pipeline Editor tests', () => { // Validate all export options are available for kfp cy.findByLabelText(/runtime platform/i).select('KUBEFLOW_PIPELINES'); cy.findByRole('option', { name: /yaml/i }).should('have.value', 'yaml'); - cy.findByRole('option', { name: /python/i }).should('not.exist'); + cy.findByRole('option', { name: /python/i }).should('have.value', 'py'); // Dismiss dialog cy.findByRole('button', { name: /cancel/i }).click();