diff --git a/docs/source/user_guide/pipelines.md b/docs/source/user_guide/pipelines.md
index 73df99a0e..28fe6ada6 100644
--- a/docs/source/user_guide/pipelines.md
+++ b/docs/source/user_guide/pipelines.md
@@ -372,12 +372,16 @@ To export a pipeline from the Visual Pipeline Editor:
#### Exporting a pipeline from the command line interface
-Use the [`elyra-pipeline`](command-line-interface.html#working-with-pipelines) `export` command to export a pipeline to a runtime-specific format, such as YAML for Kubeflow Pipelines or Python DAG for Apache Airflow.
+Use the [`elyra-pipeline`](command-line-interface.html#working-with-pipelines) `export` command to export a pipeline to a runtime-specific format:
+- Kubeflow Pipelines: [Python DSL](https://v1-5-branch.kubeflow.org/docs/components/pipelines/sdk/build-pipeline/) or YAML
+- Apache Airflow: Python DAG
```bash
$ elyra-pipeline export a-notebook.pipeline --runtime-config kfp_dev_env --output /path/to/exported.yaml --overwrite
```
+By default, export produces YAML formatted output for Kubeflow Pipelines and ONLY Python DAGs for Apache Airflow. To choose a different format for Kubeflow Pipelines, specify the `--format` option. Supported values are `py` and `yaml` for Kubeflow Pipelines.
+
To learn more about supported parameters, run
```bash
$ elyra-pipeline export --help
diff --git a/elyra/cli/pipeline_app.py b/elyra/cli/pipeline_app.py
index 6bfa58f02..e4517c5e1 100644
--- a/elyra/cli/pipeline_app.py
+++ b/elyra/cli/pipeline_app.py
@@ -670,10 +670,16 @@ def describe(json_option, pipeline_path):
"--output",
required=False,
type=Path,
- help="Exported file name (including optional path). Defaults to " " the current directory and the pipeline name.",
+ help="Exported file name (including optional path). Defaults to the current directory and the pipeline name.",
+)
+@click.option(
+ "--format",
+ required=False,
+ type=str,
+ help="File export format.",
)
@click.option("--overwrite", is_flag=True, help="Overwrite output file if it already exists.")
-def export(pipeline_path, runtime_config, output, overwrite):
+def export(pipeline_path, runtime_config, output, format, overwrite):
"""
Export a pipeline to a runtime-specific format
"""
@@ -699,14 +705,20 @@ def export(pipeline_path, runtime_config, output, overwrite):
param_hint="--runtime-config",
)
+ # Determine which export format(s) the runtime processor supports
resources = RuntimeTypeResources.get_instance_by_type(RuntimeProcessorType.get_instance_by_name(runtime_type))
supported_export_formats = resources.get_export_extensions()
if len(supported_export_formats) == 0:
raise click.ClickException(f"Runtime type '{runtime_type}' does not support export.")
- # If, in the future, a runtime supports multiple export output formats,
- # the user can choose one. For now, choose the only option.
- selected_export_format = supported_export_formats[0]
+ # Verify that the user selected a valid format. If none was specified,
+ # the first from the supported list is selected as default.
+ selected_export_format = (format or supported_export_formats[0]).lower()
+ if selected_export_format not in supported_export_formats:
+ raise click.BadParameter(
+ f"Valid export formats are {supported_export_formats}.",
+ param_hint="--format",
+ )
selected_export_format_suffix = f".{selected_export_format}"
# generate output file name from the user-provided input
diff --git a/elyra/kfp/operator.py b/elyra/kfp/operator.py
deleted file mode 100644
index 36b26165b..000000000
--- a/elyra/kfp/operator.py
+++ /dev/null
@@ -1,374 +0,0 @@
-#
-# Copyright 2018-2022 Elyra Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import string
-from typing import Dict
-from typing import List
-from typing import Optional
-
-from kfp.dsl import ContainerOp
-from kfp.dsl import RUN_ID_PLACEHOLDER
-from kubernetes.client.models import V1EmptyDirVolumeSource
-from kubernetes.client.models import V1EnvVar
-from kubernetes.client.models import V1EnvVarSource
-from kubernetes.client.models import V1ObjectFieldSelector
-from kubernetes.client.models import V1Volume
-from kubernetes.client.models import V1VolumeMount
-
-from elyra._version import __version__
-
-"""
-The ExecuteFileOp uses a python script to bootstrap the user supplied image with the required dependencies.
-In order for the script run properly, the image used, must at a minimum, have the 'curl' utility available
-and have python3
-"""
-
-# Inputs and Outputs separator character. If updated,
-# same-named variable in bootstrapper.py must be updated!
-INOUT_SEPARATOR = ";"
-
-ELYRA_GITHUB_ORG = os.getenv("ELYRA_GITHUB_ORG", "elyra-ai")
-ELYRA_GITHUB_BRANCH = os.getenv("ELYRA_GITHUB_BRANCH", "main" if "dev" in __version__ else "v" + __version__)
-ELYRA_PIP_CONFIG_URL = os.getenv(
- "ELYRA_PIP_CONFIG_URL",
- f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/elyra/{ELYRA_GITHUB_BRANCH}/etc/kfp/pip.conf",
-)
-ELYRA_BOOTSTRAP_SCRIPT_URL = os.getenv(
- "ELYRA_BOOTSTRAP_SCRIPT_URL",
- f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/elyra/{ELYRA_GITHUB_BRANCH}/elyra/kfp/bootstrapper.py",
-)
-ELYRA_REQUIREMENTS_URL = os.getenv(
- "ELYRA_REQUIREMENTS_URL",
- f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/"
- f"elyra/{ELYRA_GITHUB_BRANCH}/etc/generic/requirements-elyra.txt",
-)
-ELYRA_REQUIREMENTS_URL_PY37 = os.getenv(
- "ELYRA_REQUIREMENTS_URL_PY37",
- f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/"
- f"elyra/{ELYRA_GITHUB_BRANCH}/etc/generic/requirements-elyra-py37.txt",
-)
-
-
-class ExecuteFileOp(ContainerOp):
- def __init__(
- self,
- pipeline_name: str,
- experiment_name: str,
- notebook: str,
- cos_endpoint: str,
- cos_bucket: str,
- cos_directory: str,
- cos_dependencies_archive: str,
- pipeline_version: Optional[str] = "",
- pipeline_source: Optional[str] = None,
- pipeline_outputs: Optional[List[str]] = None,
- pipeline_inputs: Optional[List[str]] = None,
- pipeline_envs: Optional[Dict[str, str]] = None,
- requirements_url: Optional[str] = None,
- bootstrap_script_url: Optional[str] = None,
- emptydir_volume_size: Optional[str] = None,
- cpu_request: Optional[str] = None,
- mem_request: Optional[str] = None,
- gpu_limit: Optional[str] = None,
- workflow_engine: Optional[str] = "argo",
- **kwargs,
- ):
- """Create a new instance of ContainerOp.
- Args:
- pipeline_name: pipeline that this op belongs to
- experiment_name: the experiment where pipeline_name is executed
- notebook: name of the notebook that will be executed per this operation
- cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442
- cos_bucket: bucket to retrieve archive from
- cos_directory: name of the directory in the object storage bucket to pull
- cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz
- pipeline_version: optional version identifier
- pipeline_source: pipeline source
- pipeline_outputs: comma delimited list of files produced by the notebook
- pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook
- pipeline_envs: dictionary of environmental variables to set in the container prior to execution
- requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook
- bootstrap_script_url: URL to a custom python bootstrap script to run
- emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime
- cpu_request: number of CPUs requested for the operation
- mem_request: memory requested for the operation (in Gi)
- gpu_limit: maximum number of GPUs allowed for the operation
- workflow_engine: Kubeflow workflow engine, defaults to 'argo'
- kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler.
- See Kubeflow pipelines ContainerOp definition for more parameters or how to use
- https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp
- """
-
- self.pipeline_name = pipeline_name
- self.pipeline_version = pipeline_version
- self.pipeline_source = pipeline_source
- self.experiment_name = experiment_name
- self.notebook = notebook
- self.notebook_name = os.path.basename(notebook)
- self.cos_endpoint = cos_endpoint
- self.cos_bucket = cos_bucket
- self.cos_directory = cos_directory
- self.cos_dependencies_archive = cos_dependencies_archive
- self.container_work_dir_root_path = "./"
- self.container_work_dir_name = "jupyter-work-dir/"
- self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
- self.bootstrap_script_url = bootstrap_script_url
- self.requirements_url = requirements_url
- self.pipeline_outputs = pipeline_outputs
- self.pipeline_inputs = pipeline_inputs
- self.pipeline_envs = pipeline_envs
- self.cpu_request = cpu_request
- self.mem_request = mem_request
- self.gpu_limit = gpu_limit
-
- argument_list = []
-
- """ CRI-o support for kfp pipelines
- We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow
- us to write to the base image layer file system, only to volumes.
- """
- self.emptydir_volume_name = "workspace"
- self.emptydir_volume_size = emptydir_volume_size
- self.python_user_lib_path = ""
- self.python_user_lib_path_target = ""
- self.python_pip_config_url = ""
-
- if self.emptydir_volume_size:
- self.container_work_dir_root_path = "/opt/app-root/src/"
- self.container_python_dir_name = "python3/"
- self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
- self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name
- self.python_user_lib_path_target = "--target=" + self.python_user_lib_path
- self.python_pip_config_url = ELYRA_PIP_CONFIG_URL
-
- if not self.bootstrap_script_url:
- self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL
-
- if not self.requirements_url:
- self.requirements_url = ELYRA_REQUIREMENTS_URL
-
- if "name" not in kwargs:
- raise TypeError("You need to provide a name for the operation.")
- elif not kwargs.get("name"):
- raise ValueError("You need to provide a name for the operation.")
-
- if "image" not in kwargs:
- raise ValueError("You need to provide an image.")
-
- if not notebook:
- raise ValueError("You need to provide a notebook.")
-
- if "arguments" not in kwargs:
- """If no arguments are passed, we use our own.
- If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed
- NOTE: Images being pulled must have python3 available on PATH and cURL utility
- """
-
- common_curl_options = '--fail -H "Cache-Control: no-cache"'
-
- argument_list.append(
- f"mkdir -p {self.container_work_dir} && cd {self.container_work_dir} && "
- f"echo 'Downloading {self.bootstrap_script_url}' && "
- f"curl {common_curl_options} -L {self.bootstrap_script_url} --output bootstrapper.py && "
- f"echo 'Downloading {self.requirements_url}' && "
- f"curl {common_curl_options} -L {self.requirements_url} --output requirements-elyra.txt && "
- f"echo 'Downloading {ELYRA_REQUIREMENTS_URL_PY37}' && "
- f"curl {common_curl_options} -L {ELYRA_REQUIREMENTS_URL_PY37} --output requirements-elyra-py37.txt && "
- )
-
- if self.emptydir_volume_size:
- argument_list.append(
- f"mkdir {self.container_python_dir_name} && cd {self.container_python_dir_name} && "
- f"echo 'Downloading {self.python_pip_config_url}' && "
- f"curl {common_curl_options} -L {self.python_pip_config_url} --output pip.conf && cd .. &&"
- )
-
- argument_list.append(
- f"python3 -m pip install {self.python_user_lib_path_target} packaging && "
- "python3 -m pip freeze > requirements-current.txt && "
- "python3 bootstrapper.py "
- f'--pipeline-name "{self.pipeline_name}" '
- f"--cos-endpoint {self.cos_endpoint} "
- f"--cos-bucket {self.cos_bucket} "
- f'--cos-directory "{self.cos_directory}" '
- f'--cos-dependencies-archive "{self.cos_dependencies_archive}" '
- f'--file "{self.notebook}" '
- )
-
- if self.pipeline_inputs:
- inputs_str = self._artifact_list_to_str(self.pipeline_inputs)
- argument_list.append(f'--inputs "{inputs_str}" ')
-
- if self.pipeline_outputs:
- outputs_str = self._artifact_list_to_str(self.pipeline_outputs)
- argument_list.append(f'--outputs "{outputs_str}" ')
-
- if self.emptydir_volume_size:
- argument_list.append(f'--user-volume-path "{self.python_user_lib_path}" ')
-
- kwargs["command"] = ["sh", "-c"]
- kwargs["arguments"] = "".join(argument_list)
-
- super().__init__(**kwargs)
-
- # We must deal with the envs after the superclass initialization since these amend the
- # container attribute that isn't available until now.
- if self.pipeline_envs:
- for key, value in self.pipeline_envs.items(): # Convert dict entries to format kfp needs
- self.container.add_env_variable(V1EnvVar(name=key, value=value))
-
- # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as
- # its container runtime
- if self.emptydir_volume_size:
- self.add_volume(
- V1Volume(
- empty_dir=V1EmptyDirVolumeSource(medium="", size_limit=self.emptydir_volume_size),
- name=self.emptydir_volume_name,
- )
- )
-
- self.container.add_volume_mount(
- V1VolumeMount(mount_path=self.container_work_dir_root_path, name=self.emptydir_volume_name)
- )
-
- # Append to PYTHONPATH location of elyra dependencies in installed in Volume
- self.container.add_env_variable(V1EnvVar(name="PYTHONPATH", value=self.python_user_lib_path))
-
- if self.cpu_request:
- self.container.set_cpu_request(cpu=str(cpu_request))
-
- if self.mem_request:
- self.container.set_memory_request(memory=str(mem_request) + "G")
-
- if self.gpu_limit:
- gpu_vendor = self.pipeline_envs.get("GPU_VENDOR", "nvidia")
- self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor)
-
- # Generate unique ELYRA_RUN_NAME value and expose it as an environment
- # variable in the container
- if not workflow_engine:
- raise ValueError("workflow_engine is missing and needs to be specified.")
- if workflow_engine.lower() == "argo":
- # attach RUN_ID_PLACEHOLDER as run name
- # '{{workflow.annotations.pipelines.kubeflow.org/run_name}}' variable
- # cannot be resolved by Argo in KF 1.4
- run_name_placeholder = RUN_ID_PLACEHOLDER
- self.container.add_env_variable(V1EnvVar(name="ELYRA_RUN_NAME", value=run_name_placeholder))
- elif workflow_engine.lower() == "tekton":
- try:
- from kfp_tekton import TektonClient # noqa: F401
- except ImportError:
- raise ValueError(
- "kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine."
- )
-
- # For Tekton derive the value from the specified pod annotation
- annotation = "pipelines.kubeflow.org/run_name"
- field_path = f"metadata.annotations['{annotation}']"
- self.container.add_env_variable(
- V1EnvVar(
- name="ELYRA_RUN_NAME",
- value_from=V1EnvVarSource(field_ref=V1ObjectFieldSelector(field_path=field_path)),
- )
- )
- else:
- raise ValueError(f"{workflow_engine} is not a supported workflow engine.")
-
- # Attach metadata to the pod
- # Node type (a static type for this op)
- self.add_pod_label("elyra/node-type", ExecuteFileOp._normalize_label_value("notebook-script"))
- # Pipeline name
- self.add_pod_label("elyra/pipeline-name", ExecuteFileOp._normalize_label_value(self.pipeline_name))
- # Pipeline version
- self.add_pod_label("elyra/pipeline-version", ExecuteFileOp._normalize_label_value(self.pipeline_version))
- # Experiment name
- self.add_pod_label("elyra/experiment-name", ExecuteFileOp._normalize_label_value(self.experiment_name))
- # Pipeline node name
- self.add_pod_label("elyra/node-name", ExecuteFileOp._normalize_label_value(kwargs.get("name")))
- # Pipeline node file
- self.add_pod_annotation("elyra/node-file-name", self.notebook)
-
- # Identify the pipeline source, which can be a
- # pipeline file (mypipeline.pipeline), a Python
- # script or notebook that was submitted
- if self.pipeline_source is not None:
- self.add_pod_annotation("elyra/pipeline-source", self.pipeline_source)
-
- def _artifact_list_to_str(self, pipeline_array):
- trimmed_artifact_list = []
- for artifact_name in pipeline_array:
- if INOUT_SEPARATOR in artifact_name: # if INOUT_SEPARATOR is in name, throw since this is our separator
- raise ValueError(f"Illegal character ({INOUT_SEPARATOR}) found in filename '{artifact_name}'.")
- trimmed_artifact_list.append(artifact_name.strip())
- return INOUT_SEPARATOR.join(trimmed_artifact_list)
-
- @staticmethod
- def _normalize_label_value(value):
- """Produce a Kubernetes-compliant label from value
-
- Valid label values must be 63 characters or less and
- must be empty or begin and end with an alphanumeric
- character ([a-z0-9A-Z]) with dashes (-), underscores
- (_), dots (.), and alphanumerics between.
- """
-
- if value is None or len(value) == 0:
- return "" # nothing to do
-
- max_length = 63
- # This char is added at the front and/or back
- # of value, if the first and/or last character
- # is invalid. For example a value of "-abc"
- # is converted to "a-abc". The specified character
- # must meet the label value constraints.
- valid_char = "a"
- # This char is used to replace invalid characters
- # that are in the "middle" of value. For example
- # a value of "abc%def" is converted to "abc_def".
- # The specified character must meet the label value
- # constraints.
- valid_middle_char = "_"
-
- # must begin with [0-9a-zA-Z]
- valid_chars = string.ascii_letters + string.digits
- if value[0] not in valid_chars:
- value = valid_char + value
-
- value = value[:max_length] # enforce max length
-
- # must end with [0-9a-zA-Z]
- if value[-1] not in valid_chars:
- if len(value) <= max_length - 1:
- # append valid character if max length
- # would not be exceeded
- value = value + valid_char
- else:
- # replace with valid character
- value = value[:-1] + valid_char
-
- # middle chars must be [0-9a-zA-Z\-_.]
- valid_chars = valid_chars + "-_."
-
- newstr = ""
- for c in range(len(value)):
- if value[c] not in valid_chars:
- newstr = newstr + valid_middle_char
- else:
- newstr = newstr + value[c]
- value = newstr
-
- return value
diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py
index 7c7ddbcff..969c2c122 100644
--- a/elyra/pipeline/kfp/processor_kfp.py
+++ b/elyra/pipeline/kfp/processor_kfp.py
@@ -14,29 +14,34 @@
# limitations under the License.
#
from datetime import datetime
+from enum import Enum
+from enum import unique
+import hashlib
+import importlib
+import json
import os
+from pathlib import Path
import re
+import string
+import sys
import tempfile
import time
from typing import Any
from typing import Dict
+from typing import List
+from typing import Optional
from typing import Set
from urllib.parse import urlsplit
+from autopep8 import fix_code
+from jinja2 import Environment
+from jinja2 import PackageLoader
from kfp import Client as ArgoClient
from kfp import compiler as kfp_argo_compiler
from kfp import components as components
from kfp.dsl import PipelineConf
-from kfp.aws import use_aws_secret # noqa H306
+from kfp.dsl import RUN_ID_PLACEHOLDER
from kubernetes import client as k8s_client
-from kubernetes.client import V1EmptyDirVolumeSource
-from kubernetes.client import V1EnvVar
-from kubernetes.client import V1EnvVarSource
-from kubernetes.client import V1PersistentVolumeClaimVolumeSource
-from kubernetes.client import V1SecretKeySelector
-from kubernetes.client import V1Toleration
-from kubernetes.client import V1Volume
-from kubernetes.client import V1VolumeMount
try:
from kfp_tekton import compiler as kfp_tekton_compiler
@@ -47,7 +52,6 @@
TektonClient = None
from elyra._version import __version__
-from elyra.kfp.operator import ExecuteFileOp
from elyra.metadata.schemaspaces import RuntimeImages
from elyra.metadata.schemaspaces import Runtimes
from elyra.pipeline import pipeline_constants
@@ -63,7 +67,6 @@
from elyra.pipeline.component_parameter import VolumeMount
from elyra.pipeline.kfp.kfp_authentication import AuthenticationError
from elyra.pipeline.kfp.kfp_authentication import KFPAuthenticator
-from elyra.pipeline.pipeline import GenericOperation
from elyra.pipeline.pipeline import Operation
from elyra.pipeline.pipeline import Pipeline
from elyra.pipeline.processor import PipelineProcessor
@@ -71,9 +74,43 @@
from elyra.pipeline.processor import RuntimePipelineProcessorResponse
from elyra.pipeline.runtime_type import RuntimeProcessorType
from elyra.util.cos import join_paths
+from elyra.util.kubernetes import sanitize_label_value
from elyra.util.path import get_absolute_path
+@unique
+class WorkflowEngineType(Enum):
+ """
+ Identifies Kubeflow Pipelines workflow engines that this
+ processor supports.
+ """
+
+ ARGO = "argo"
+ TEKTON = "tekton"
+
+ @staticmethod
+ def get_instance_by_value(value: str) -> "WorkflowEngineType":
+ """
+ Produces an WorkflowEngineType enum instance if the provided value
+ identifies a supported workflow engine type.
+ Raises KeyError if value is not a support workflow engine type.
+ """
+ if value:
+ for instance in WorkflowEngineType.__members__.values():
+ if instance.value == value.lower():
+ return instance
+ raise KeyError(f"'{value}'")
+
+
+# Externalize these constants to make them available to the code gen tests
+CRIO_VOL_DEF_NAME = "workspace"
+CRIO_VOL_DEF_SIZE = "20Gi"
+CRIO_VOL_DEF_MEDIUM = ""
+CRIO_VOL_MOUNT_PATH = "/opt/app-root/src"
+CRIO_VOL_WORKDIR_PATH = f"{CRIO_VOL_MOUNT_PATH}/jupyter-work-dir"
+CRIO_VOL_PYTHON_PATH = f"{CRIO_VOL_WORKDIR_PATH}/python3"
+
+
class KfpPipelineProcessor(RuntimePipelineProcessor):
_type = RuntimeProcessorType.KUBEFLOW_PIPELINES
_name = "kfp"
@@ -107,8 +144,8 @@ def process(self, pipeline):
api_username = runtime_configuration.metadata.get("api_username")
api_password = runtime_configuration.metadata.get("api_password")
user_namespace = runtime_configuration.metadata.get("user_namespace")
- engine = runtime_configuration.metadata.get("engine")
- if engine == "Tekton" and not TektonClient:
+ workflow_engine = WorkflowEngineType.get_instance_by_value(runtime_configuration.metadata.get("engine", "argo"))
+ if workflow_engine == WorkflowEngineType.TEKTON and not TektonClient:
raise ValueError(
"Python package `kfp-tekton` is not installed. "
"Please install using `elyra[kfp-tekton]` to use Tekton engine."
@@ -141,7 +178,7 @@ def process(self, pipeline):
# Create Kubeflow Client
#############
try:
- if engine == "Tekton":
+ if workflow_engine == WorkflowEngineType.TEKTON:
client = TektonClient(
host=api_endpoint,
cookies=auth_info.get("cookies", None),
@@ -246,29 +283,26 @@ def process(self, pipeline):
# the pipelines' dependencies, if applicable
pipeline_instance_id = f"{pipeline_name}-{timestamp}"
- pipeline_function = lambda: self._cc_pipeline( # nopep8 E731
- pipeline,
+ # Generate Python DSL from workflow
+ pipeline_dsl = self._generate_pipeline_dsl(
+ pipeline=pipeline,
pipeline_name=pipeline_name,
- pipeline_version=pipeline_version_name,
- experiment_name=experiment_name,
pipeline_instance_id=pipeline_instance_id,
+ workflow_engine=workflow_engine,
)
- # collect pipeline configuration information
- pipeline_conf = self._generate_pipeline_conf(pipeline)
+ # Collect pipeline configuration information
+ pipeline_conf = self._generate_pipeline_conf(pipeline=pipeline)
+
+ # Compile the Python DSL, producing the input for the upload to
+ # Kubeflow Pipelines
+ self._compile_pipeline_dsl(pipeline_dsl, workflow_engine, pipeline_path, pipeline_conf)
- # compile the pipeline
- if engine == "Tekton":
- kfp_tekton_compiler.TektonCompiler().compile(
- pipeline_function, pipeline_path, pipeline_conf=pipeline_conf
- )
- else:
- kfp_argo_compiler.Compiler().compile(pipeline_function, pipeline_path, pipeline_conf=pipeline_conf)
except RuntimeError:
raise
except Exception as ex:
raise RuntimeError(
- f"Failed to compile pipeline '{pipeline_name}' with engine '{engine}' to: '{pipeline_path}'"
+ f"Error compiling pipeline '{pipeline_name}' with engine '{workflow_engine.value}'."
) from ex
self.log_pipeline_info(pipeline_name, "pipeline compiled", duration=time.time() - t0)
@@ -394,10 +428,28 @@ def process(self, pipeline):
object_storage_path=object_storage_path,
)
- def export(self, pipeline: Pipeline, pipeline_export_format: str, pipeline_export_path: str, overwrite: bool):
- # Verify that the KfpPipelineProcessor supports the given export format
+ def export(
+ self, pipeline: Pipeline, pipeline_export_format: str, pipeline_export_path: str, overwrite: bool
+ ) -> str:
+ """
+ Export pipeline to the specified format and store the output
+ in the specified file.
+
+ :param pipeline: The pipeline to be exported
+ :type pipeline: Pipeline
+ :param pipeline_export_format: "py" for KFP Python DSL or "yaml" for YAML
+ :type pipeline_export_format: str
+ :param pipeline_export_path: name and location of exported file
+ :type pipeline_export_path: str
+ :param overwrite: If false, export raises an error if the output file exists.
+ :type overwrite: bool
+ :raises ValueError: raised if a parameter is invalid
+ :raises RuntimeError: an error occurred during export
+ :return: location of the exported file
+ :rtype: str
+ """
+ # Verify that the processor supports the given export format
self._verify_export_format(pipeline_export_format)
-
t0_all = time.time()
timestamp = datetime.now().strftime("%m%d%H%M%S")
pipeline_name = pipeline.name
@@ -413,28 +465,34 @@ def export(self, pipeline: Pipeline, pipeline_export_format: str, pipeline_expor
schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config
)
- engine = runtime_configuration.metadata.get("engine")
- if engine == "Tekton" and not TektonClient:
+ workflow_engine = WorkflowEngineType.get_instance_by_value(runtime_configuration.metadata.get("engine", "argo"))
+ if workflow_engine == WorkflowEngineType.TEKTON and not TektonClient:
raise ValueError("kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine.")
- if os.path.exists(absolute_pipeline_export_path) and not overwrite:
+ if Path(absolute_pipeline_export_path).exists() and not overwrite:
raise ValueError("File " + absolute_pipeline_export_path + " already exists.")
self.log_pipeline_info(pipeline_name, f"Exporting pipeline as a .{pipeline_export_format} file")
- # Export pipeline as static configuration file (YAML formatted)
try:
- # Exported pipeline is not associated with an experiment
- # or a version. The association is established when the
- # pipeline is imported into KFP by the user.
- pipeline_function = lambda: self._cc_pipeline(
- pipeline, pipeline_name, pipeline_instance_id=pipeline_instance_id
- ) # nopep8
- if engine == "Tekton":
- self.log.info("Compiling pipeline for Tekton engine")
- kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, absolute_pipeline_export_path)
+ # Generate Python DSL
+ pipeline_dsl = self._generate_pipeline_dsl(
+ pipeline=pipeline,
+ pipeline_name=pipeline_name,
+ pipeline_instance_id=pipeline_instance_id,
+ workflow_engine=workflow_engine,
+ )
+
+ if pipeline_export_format == "py":
+ # Write Python DSL to file
+ with open(absolute_pipeline_export_path, "w") as dsl_output:
+ dsl_output.write(pipeline_dsl)
else:
- self.log.info("Compiling pipeline for Argo engine")
- kfp_argo_compiler.Compiler().compile(pipeline_function, absolute_pipeline_export_path)
+ # Generate pipeline configuration
+ pipeline_conf = self._generate_pipeline_conf(pipeline=pipeline)
+ #
+ # Export pipeline as static configuration file (YAML formatted)
+ # by invoking the compiler for the selected engine
+ self._compile_pipeline_dsl(pipeline_dsl, workflow_engine, absolute_pipeline_export_path, pipeline_conf)
except RuntimeError:
raise
except Exception as ex:
@@ -462,54 +520,170 @@ def _collect_envs(self, operation: Operation, **kwargs) -> Dict:
envs["ELYRA_WRITABLE_CONTAINER_DIR"] = self.WCD
return envs
- def _cc_pipeline(
+ def _generate_pipeline_dsl(
self,
pipeline: Pipeline,
pipeline_name: str,
+ workflow_engine: WorkflowEngineType,
pipeline_version: str = "",
experiment_name: str = "",
pipeline_instance_id: str = None,
- export=False,
- ):
+ ) -> str:
+ """
+ Generate Python DSL for Kubeflow Pipelines v1
+ """
- runtime_configuration = self._get_metadata_configuration(
- schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config
+ # Load Kubeflow Pipelines Python DSL template
+ loader = PackageLoader("elyra", "templates/kubeflow/v1")
+ template_env = Environment(loader=loader)
+ # Add filter that produces a Python-safe variable name
+ template_env.filters["python_safe"] = lambda x: re.sub(r"[" + re.escape(string.punctuation) + "\\s]", "_", x)
+ # Add filter that escapes the " character in strings
+ template_env.filters["string_delimiter_safe"] = lambda string: re.sub('"', '\\"', string)
+ template = template_env.get_template("python_dsl_template.jinja2")
+
+ # Convert pipeline into workflow tasks
+ workflow_tasks = self._generate_workflow_tasks(
+ pipeline,
+ pipeline_name,
+ workflow_engine,
+ pipeline_instance_id=pipeline_instance_id,
+ pipeline_version=pipeline_version,
+ experiment_name=experiment_name,
)
- cos_endpoint = runtime_configuration.metadata["cos_endpoint"]
- cos_username = runtime_configuration.metadata.get("cos_username")
- cos_password = runtime_configuration.metadata.get("cos_password")
- cos_secret = runtime_configuration.metadata.get("cos_secret")
- cos_bucket = runtime_configuration.metadata.get("cos_bucket")
- engine = runtime_configuration.metadata["engine"]
+ # Gather unique component definitions from workflow task list.
+ unique_component_definitions = {}
+ for key, operation in workflow_tasks.items():
+ unique_component_definitions[operation["component_definition_hash"]] = operation["component_definition"]
+
+ # render the Kubeflow Pipelines Python DSL template
+ pipeline_dsl = template.render(
+ elyra_version=__version__,
+ pipeline_name=pipeline_name,
+ pipeline_description=pipeline.description,
+ pipeline_parameters=None,
+ workflow_tasks=workflow_tasks,
+ component_definitions=unique_component_definitions,
+ workflow_engine=workflow_engine.value,
+ )
- pipeline_instance_id = pipeline_instance_id or pipeline_name
+ # Prettify generated Python DSL
+ # Defer the import to postpone logger messages: https://github.com/psf/black/issues/2058
+ import black
- artifact_object_prefix = join_paths(
- pipeline.pipeline_properties.get(pipeline_constants.COS_OBJECT_PREFIX), pipeline_instance_id
- )
+ try:
+ pipeline_dsl = black.format_str(fix_code(pipeline_dsl), mode=black.FileMode())
+ except Exception:
+ # if an error was encountered log the generated DSL for troubleshooting
+ self.log.error("Error post-processing generated Python DSL:")
+ self.log.error(pipeline_dsl)
+ raise
+
+ return pipeline_dsl
+
+ def _compile_pipeline_dsl(
+ self, dsl: str, workflow_engine: WorkflowEngineType, output_file: str, pipeline_conf: PipelineConf
+ ) -> None:
+ """
+ Compile Python DSL using the compiler for the specified workflow_engine.
+
+ :param dsl: the Python DSL to be compiled
+ :type dsl: str
+ :param workflow_engine: Compiler to be used
+ :type workflow_engine: str
+ :param output_file: output file name
+ :type output_file: str
+ :param pipeline_conf: Pipeline configuration to apply
+ :type pipeline_conf: PipelineConf
+ :raises RuntimeError: raised when a fatal error is encountered
+ """
+
+ with tempfile.TemporaryDirectory() as temp_dir:
+ module_name = "generated_dsl"
+ try:
+ # Add temporary directory to Python module search path.
+ sys.path.insert(0, temp_dir)
+ # Save DSL in temporary file so we can import it as a module.
+ dsl_file = Path(temp_dir) / f"{module_name}.py"
+ with open(dsl_file, "w") as dsl_output:
+ dsl_output.write(dsl)
+ # Load DSL by importing the "generated_dsl" module.
+ mod = importlib.import_module(module_name)
+ # If this module was previously imported it won't reflect
+ # changes that might be in the DSL we are about to compile.
+ # Force a module re-load to pick up any changes.
+ mod = importlib.reload(mod)
+ # Obtain handle to pipeline function, which is named
+ # in the generated Python DSL "generated_pipeline"
+ pipeline_function = getattr(mod, "generated_pipeline")
+ # compile the DSL
+ if workflow_engine == WorkflowEngineType.TEKTON:
+ kfp_tekton_compiler.TektonCompiler().compile(
+ pipeline_function, output_file, pipeline_conf=pipeline_conf
+ )
+ else:
+ kfp_argo_compiler.Compiler().compile(pipeline_function, output_file, pipeline_conf=pipeline_conf)
+ except Exception as ex:
+ raise RuntimeError(
+ f"Failed to compile pipeline with workflow_engine '{workflow_engine.value}' to '{output_file}'"
+ ) from ex
+ finally:
+ # remove temporary directory from Python module search path
+ del sys.path[0]
+ # remove module entry; it's no longer needed now that it was
+ # processed by the Kubeflow Pipelines compiler
+ sys.modules.pop(module_name, None)
+
+ def _generate_workflow_tasks(
+ self,
+ pipeline: Pipeline,
+ pipeline_name: str,
+ workflow_engine: WorkflowEngineType,
+ pipeline_version: str = "",
+ experiment_name: str = "",
+ pipeline_instance_id: str = None,
+ export: bool = False,
+ ) -> Dict[str, Dict]:
+ """
+ Produce the workflow tasks that implement the pipeline nodes. The output is
+ a dictionary containing task ids as keys and task definitions as values.
+ """
+
+ pipeline_instance_id = pipeline_instance_id or pipeline_name
self.log_pipeline_info(
pipeline_name,
- f"processing pipeline dependencies for upload to '{cos_endpoint}' "
- f"bucket '{cos_bucket}' folder '{artifact_object_prefix}'",
+ "Processing pipeline",
)
t0_all = time.time()
- emptydir_volume_size = ""
- container_runtime = bool(os.getenv("CRIO_RUNTIME", "False").lower() == "true")
-
- # Create dictionary that maps component Id to its ContainerOp instance
- target_ops = {}
-
# Sort operations based on dependency graph (topological order)
sorted_operations = PipelineProcessor._sort_operations(pipeline.operations)
- # Determine whether access to cloud storage is required
- for operation in sorted_operations:
- if isinstance(operation, GenericOperation):
- self._verify_cos_connectivity(runtime_configuration)
- break
+ if any(operation.is_generic for operation in sorted_operations):
+ # The pipeline contains atleast one node that is implemented
+ # using a generic component: collect and verify relevant information
+ runtime_configuration = self._get_metadata_configuration(
+ schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config
+ )
+ # - verify that cloud storage can be accessed
+ self._verify_cos_connectivity(runtime_configuration)
+ # - collect runtime configuration information
+ cos_username = runtime_configuration.metadata.get("cos_username")
+ cos_password = runtime_configuration.metadata.get("cos_password")
+ cos_secret = runtime_configuration.metadata.get("cos_secret")
+ cos_endpoint = runtime_configuration.metadata["cos_endpoint"]
+ cos_bucket = runtime_configuration.metadata.get("cos_bucket")
+ artifact_object_prefix = join_paths(
+ pipeline.pipeline_properties.get(pipeline_constants.COS_OBJECT_PREFIX), pipeline_instance_id
+ )
+ # - load the generic component definition template
+ generic_component_template = Environment(
+ loader=PackageLoader("elyra", "templates/kubeflow/v1")
+ ).get_template("generic_component_definition_template.jinja2")
+ # Determine whether we are executing in a CRI-O runtime environment
+ is_crio_runtime = os.getenv("CRIO_RUNTIME", "False").lower() == "true"
# All previous operation outputs should be propagated throughout the pipeline.
# In order to process this recursively, the current operation's inputs should be combined
@@ -518,82 +692,200 @@ def _cc_pipeline(
PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations)
+ # Scrub all node labels of invalid characters
for operation in sorted_operations:
+ operation.name = re.sub("-+", "-", re.sub("[^-_0-9A-Za-z ]+", "-", operation.name)).lstrip("-").rstrip("-")
- if container_runtime:
- # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
- emptydir_volume_size = "20Gi"
-
- sanitized_operation_name = self._sanitize_operation_name(operation.name)
+ # Generate unique names for all operations
+ unique_names = {}
+ for operation in sorted_operations:
+ # Ensure operation name is unique
+ new_name = operation.name
+ while new_name in unique_names:
+ new_name = f"{operation.name}_{unique_names[operation.name]}"
+ unique_names[operation.name] += 1
+ operation.name = new_name
+ unique_names[operation.name] = 1
+
+ # Create workflow task list that is used as input for the DSL code generator
+ # from the sorted list of pipeline.pipeline.Operations
+ workflow_tasks = {}
+ for operation in sorted_operations:
+ # Create workflow task, which Jinja2 uses to generate the task specific
+ # source code.
+ workflow_task = {
+ "id": operation.id,
+ "escaped_task_id": re.sub(r"[" + re.escape(string.punctuation) + "\\s]", "_", operation.id),
+ "name": operation.name,
+ "doc": operation.doc,
+ "upstream_workflow_task_ids": operation.parent_operation_ids or [],
+ "task_inputs": {}, # as defined in the component specification
+ "task_outputs": {}, # as defined in the component specification
+ "task_modifiers": {}, # attached volumes, resources, env variables, metadata, etc
+ }
+
+ # Add Elyra-owned properties (data volume mounts, kubernetes labels, etc)
+ # to the task_modifiers property.
+ for value in operation.elyra_params.values():
+ if isinstance(value, (ElyraProperty, ElyraPropertyList)):
+ value.add_to_execution_object(
+ runtime_processor=self, execution_object=workflow_task["task_modifiers"]
+ )
- # Create pipeline operation
- # If operation is one of the "generic" set of NBs or scripts, construct custom ExecuteFileOp
- if isinstance(operation, GenericOperation):
- component = ComponentCache.get_generic_component_from_op(operation.classifier)
+ if operation.is_generic:
+ # The task is implemented using a generic component
+ workflow_task["uses_custom_component"] = False
- # Collect env variables
- pipeline_envs = self._collect_envs(
- operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password
+ component_definition = generic_component_template.render(
+ container_image=operation.runtime_image,
+ command_args=self._compose_container_command_args(
+ pipeline_name=pipeline_name,
+ cos_endpoint=cos_endpoint,
+ cos_bucket=cos_bucket,
+ cos_directory=artifact_object_prefix,
+ cos_dependencies_archive=self._get_dependency_archive_name(operation),
+ filename=operation.filename,
+ cos_inputs=operation.inputs,
+ cos_outputs=operation.outputs,
+ is_crio_runtime=is_crio_runtime,
+ ),
)
+ workflow_task["component_definition"] = component_definition
+ workflow_task["component_definition_hash"] = hashlib.sha256(component_definition.encode()).hexdigest()
- operation_artifact_archive = self._get_dependency_archive_name(operation)
-
- self.log.debug(
- f"Creating pipeline component archive '{operation_artifact_archive}' for operation '{operation}'"
+ # attach environment variables
+ workflow_task["task_modifiers"]["env_variables"] = self._collect_envs(
+ operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password
)
- container_op = ExecuteFileOp(
- name=sanitized_operation_name,
- pipeline_name=pipeline_name,
- experiment_name=experiment_name,
- notebook=operation.filename,
- cos_endpoint=cos_endpoint,
- cos_bucket=cos_bucket,
- cos_directory=artifact_object_prefix,
- cos_dependencies_archive=operation_artifact_archive,
- pipeline_version=pipeline_version,
- pipeline_source=pipeline.source,
- pipeline_inputs=operation.inputs,
- pipeline_outputs=operation.outputs,
- pipeline_envs=pipeline_envs,
- emptydir_volume_size=emptydir_volume_size,
- cpu_request=operation.cpu,
- mem_request=operation.memory,
- gpu_limit=operation.gpu,
- workflow_engine=engine,
- image=operation.runtime_image,
- file_outputs={
- "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json", # noqa
- "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json", # noqa
- },
- )
+ # hack only: since we don't use the ContainerOp constructor anymore
+ # we cannot use the file_outputs parameter to provide the information
+ # https://www.kubeflow.org/docs/components/pipelines/v1/sdk/output-viewer/
+ workflow_task["task_modifiers"]["special_output_files"] = {
+ "mlpipeline_ui_metadata": (Path(self.WCD) / "mlpipeline-ui-metadata.json").as_posix(),
+ "mlpipeline_metrics": (Path(self.WCD) / "mlpipeline-metrics.json").as_posix(),
+ }
+ # apply object storage Kubernetes secret, if one was provided
if cos_secret and not export:
- container_op.apply(use_aws_secret(cos_secret))
+ workflow_task["task_modifiers"]["object_storage_secret"] = cos_secret
- image_namespace = self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID)
- for image_instance in image_namespace:
+ # apply container image pull policy, if one was specified
+ for image_instance in self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID):
if image_instance.metadata["image_name"] == operation.runtime_image and image_instance.metadata.get(
"pull_policy"
):
- container_op.container.set_image_pull_policy(image_instance.metadata["pull_policy"])
+ workflow_task["task_modifiers"]["image_pull_policy"] = image_instance.metadata["pull_policy"]
+ break
- self.log_pipeline_info(
- pipeline_name,
- f"processing operation dependencies for id '{operation.id}'",
- operation_name=operation.name,
+ # set resource constraints
+ workflow_task["task_modifiers"]["cpu_request"] = operation.cpu
+ workflow_task["task_modifiers"]["mem_request"] = {
+ "size": operation.memory,
+ "units": "G",
+ }
+ workflow_task["task_modifiers"]["gpu_limit"] = {
+ "size": operation.gpu,
+ "vendor": workflow_task["task_modifiers"]["env_variables"].get("GPU_VENDOR", "nvidia"),
+ }
+
+ if is_crio_runtime:
+ # Attach empty dir volume
+ workflow_task["task_modifiers"]["crio_runtime"] = {
+ "emptydir_volume_name": CRIO_VOL_DEF_NAME,
+ "emptydir_volume_size": CRIO_VOL_DEF_SIZE,
+ "emptydir_volume_medium": CRIO_VOL_DEF_MEDIUM,
+ "emptydir_mount_path": CRIO_VOL_MOUNT_PATH,
+ }
+ # Set Python module search path
+ workflow_task["task_modifiers"]["env_variables"]["PYTHONPATH"] = CRIO_VOL_PYTHON_PATH
+
+ # Attach identifying metadata
+ if workflow_task["task_modifiers"].get("pod_labels") is None:
+ workflow_task["task_modifiers"]["pod_labels"] = {}
+ # Node type (a static type for this op)
+ workflow_task["task_modifiers"]["pod_labels"]["elyra/node-type"] = sanitize_label_value(
+ "notebook-script"
)
+ # Pipeline name
+ workflow_task["task_modifiers"]["pod_labels"]["elyra/pipeline-name"] = sanitize_label_value(
+ pipeline_name
+ )
+ # Pipeline version
+ workflow_task["task_modifiers"]["pod_labels"]["elyra/pipeline-version"] = sanitize_label_value(
+ pipeline_version
+ )
+ # Experiment name
+ workflow_task["task_modifiers"]["pod_labels"]["elyra/experiment-name"] = sanitize_label_value(
+ experiment_name
+ )
+ # Pipeline node name
+ workflow_task["task_modifiers"]["pod_labels"]["elyra/node-name"] = sanitize_label_value(operation.name)
+
+ # Add non-identifying metadata
+ if workflow_task["task_modifiers"].get("pod_annotations") is None:
+ workflow_task["task_modifiers"]["pod_annotations"] = {}
+ # Pipeline node file
+ workflow_task["task_modifiers"]["pod_annotations"]["elyra/node-file-name"] = operation.filename
+
+ # Identify the pipeline source, which can be a pipeline file (mypipeline.pipeline), a Python
+ # script or notebook that was submitted
+ if pipeline.source is not None:
+ workflow_task["task_modifiers"]["pod_annotations"]["elyra/pipeline-source"] = pipeline.source
+
+ # Generate unique ELYRA_RUN_NAME value, which gets exposed as an environment
+ # variable
+ if workflow_engine == WorkflowEngineType.TEKTON:
+ # Value is derived from an existing annotation; use dummy value
+ workflow_task["task_modifiers"]["set_run_name"] = "dummy value"
+ else:
+ # Use Kubeflow Pipelines provided RUN_ID_PLACEHOLDER as run name
+ workflow_task["task_modifiers"]["set_run_name"] = RUN_ID_PLACEHOLDER
+ # Upload dependencies to cloud storage
self._upload_dependencies_to_object_store(
runtime_configuration, pipeline_name, operation, prefix=artifact_object_prefix
)
- # If operation is a "non-standard" component, load it's spec and create operation with factory function
else:
+ # ----------------------------------------
+ # The task is implemented using a custom component
+ workflow_task["uses_custom_component"] = True
+
# Retrieve component from cache
component = ComponentCache.instance().get_component(self._type, operation.classifier)
- # Convert the user-entered value of certain properties according to their type
+ workflow_task["component_definition"] = component.definition
+ workflow_task["component_definition_hash"] = hashlib.sha256(component.definition.encode()).hexdigest()
+
+ # Identify task inputs and outputs using the component spec
+ # If no data type was specified, string is assumed
+ factory_function = components.load_component_from_text(component.definition)
+ for input in factory_function.component_spec.inputs or []:
+ sanitized_input_name = self._sanitize_param_name(input.name)
+ workflow_task["task_inputs"][sanitized_input_name] = {
+ "value": None,
+ "task_output_reference": None,
+ "pipeline_parameter_reference": None,
+ "data_type": (input.type or "string").lower(),
+ }
+ # Determine whether the value needs to be rendered in quotes
+ # in the generated DSL code. For example "my name" (string), and 34 (integer).
+ workflow_task["task_inputs"][sanitized_input_name]["requires_quoted_rendering"] = workflow_task[
+ "task_inputs"
+ ][sanitized_input_name]["data_type"] not in [
+ "integer",
+ "float",
+ "bool",
+ ]
+
+ for output in factory_function.component_spec.outputs or []:
+ workflow_task["task_outputs"][self._sanitize_param_name(output.name)] = {
+ "data_type": output.type,
+ }
+
+ # Iterate over component parameters and assign values to
+ # task inputs and task add-ons
for component_property in component.properties:
self.log.debug(
f"Processing component parameter '{component_property.name}' "
@@ -601,23 +893,31 @@ def _cc_pipeline(
)
if component_property.allowed_input_types == [None]:
- # Outputs are skipped
+ # The property does not support inputs. Ignore
continue
+ sanitized_component_property_id = self._sanitize_param_name(component_property.ref)
+ if sanitized_component_property_id in workflow_task["task_inputs"]:
+ reference = workflow_task["task_inputs"][sanitized_component_property_id]
+ else:
+ workflow_task["task_modifiers"][sanitized_component_property_id] = {}
+ reference = workflow_task["task_modifiers"][sanitized_component_property_id]
+
# Get corresponding property's value from parsed pipeline
property_value_dict = operation.component_params.get(component_property.ref)
data_entry_type = property_value_dict.get("widget", None) # one of: inputpath, file, raw data type
property_value = property_value_dict.get("value", None)
if data_entry_type == "inputpath":
- # KFP path-based parameters accept an input from a parent
+ # task input is the output of an upstream task
output_node_id = property_value["value"] # parent node id
output_node_parameter_key = property_value["option"].replace("output_", "") # parent param
- operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[
- output_node_parameter_key
- ]
+ reference["task_output_reference"] = {
+ "task_id": re.sub(r"[" + re.escape(string.punctuation) + "\\s]", "_", output_node_id),
+ "output_id": self._sanitize_param_name(output_node_parameter_key),
+ }
else: # Parameter is either of a raw data type or file contents
if data_entry_type == "file" and property_value:
- # Read a value from a file
+ # Read value from the specified file
absolute_path = get_absolute_path(self.root_dir, property_value)
with open(absolute_path, "r") as f:
property_value = f.read() if os.path.getsize(absolute_path) else None
@@ -628,80 +928,26 @@ def _cc_pipeline(
# Process the value according to its type, if necessary
if component_property.json_data_type == "object":
- processed_value = self._process_dictionary_value(property_value)
- operation.component_params[component_property.ref] = processed_value
+ reference["value"] = self._process_dictionary_value(property_value)
elif component_property.json_data_type == "array":
- processed_value = self._process_list_value(property_value)
- operation.component_params[component_property.ref] = processed_value
+ reference["value"] = self._process_list_value(property_value)
else:
- operation.component_params[component_property.ref] = property_value
-
- # Build component task factory
- try:
- factory_function = components.load_component_from_text(component.definition)
- except Exception as e:
- # TODO Fix error messaging and break exceptions down into categories
- self.log.error(f"Error loading component spec for {operation.name}: {str(e)}")
- raise RuntimeError(f"Error loading component spec for {operation.name}.")
-
- # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict
- try:
- comp_spec_inputs = [
- inputs.name.lower().replace(" ", "_") for inputs in factory_function.component_spec.inputs or []
- ]
-
- # Remove inputs and outputs from params dict
- # TODO: need to have way to retrieve only required params
- parameter_removal_list = ["inputs", "outputs"]
- for component_param in operation.component_params_as_dict.keys():
- if component_param not in comp_spec_inputs:
- parameter_removal_list.append(component_param)
-
- for parameter in parameter_removal_list:
- operation.component_params_as_dict.pop(parameter, None)
-
- # Create ContainerOp instance and assign appropriate user-provided name
- sanitized_component_params = {
- self._sanitize_param_name(name): value
- for name, value in operation.component_params_as_dict.items()
- }
- container_op = factory_function(**sanitized_component_params)
- container_op.set_display_name(operation.name)
+ reference["value"] = property_value
- except Exception as e:
- # TODO Fix error messaging and break exceptions down into categories
- self.log.error(f"Error constructing component {operation.name}: {str(e)}")
- raise RuntimeError(f"Error constructing component {operation.name}.")
+ self.log.debug(f"Completed processing of task '{workflow_task['name']}':")
+ self.log.debug(json.dumps(workflow_task, sort_keys=False, indent=4))
- # Attach node comment
- if operation.doc:
- container_op.add_pod_annotation("elyra/node-user-doc", operation.doc)
+ # append task to task list
+ workflow_tasks[workflow_task["id"]] = workflow_task
- # Process Elyra-owned properties as required for each type
- for value in operation.elyra_params.values():
- if isinstance(value, (ElyraProperty, ElyraPropertyList)):
- value.add_to_execution_object(runtime_processor=self, execution_object=container_op)
-
- # Add ContainerOp to target_ops dict
- target_ops[operation.id] = container_op
-
- # Process dependencies after all the operations have been created
- for operation in pipeline.operations.values():
- op = target_ops[operation.id]
- for parent_operation_id in operation.parent_operation_ids:
- parent_op = target_ops[parent_operation_id] # Parent Operation
- op.after(parent_op)
-
- self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))
+ # end of processing
+ self.log_pipeline_info(pipeline_name, "Pipeline processed", duration=(time.time() - t0_all))
+ return workflow_tasks
- return target_ops
-
- def _generate_pipeline_conf(self, pipeline: dict) -> PipelineConf:
+ def _generate_pipeline_conf(self, pipeline: Pipeline) -> PipelineConf:
"""
Returns a KFP pipeline configuration for this pipeline, which can be empty.
- :param pipeline: pipeline dictionary
- :type pipeline: dict
:return: https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.PipelineConf
:rtype: kfp.dsl import PipelineConf
"""
@@ -713,14 +959,14 @@ def _generate_pipeline_conf(self, pipeline: dict) -> PipelineConf:
# Gather input for container image pull secrets in support of private container image registries
# https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.PipelineConf.set_image_pull_secrets
#
- image_namespace = self._get_metadata_configuration(schemaspace=RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID)
-
- # iterate through pipeline operations and create list of Kubernetes secret names
- # that are associated with generic components
+ # Retrieve all runtime image configurations
+ runtime_image_configurations = self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID)
+ # For each generic pipeline operation determine wether its runtime image
+ # is protected by a pull secret
container_image_pull_secret_names = []
for operation in pipeline.operations.values():
- if isinstance(operation, GenericOperation):
- for image_instance in image_namespace:
+ if operation.is_generic:
+ for image_instance in runtime_image_configurations:
if image_instance.metadata["image_name"] == operation.runtime_image:
if image_instance.metadata.get("pull_secret"):
container_image_pull_secret_names.append(image_instance.metadata.get("pull_secret"))
@@ -739,13 +985,112 @@ def _generate_pipeline_conf(self, pipeline: dict) -> PipelineConf:
return pipeline_conf
- @staticmethod
- def _sanitize_operation_name(name: str) -> str:
+ def _compose_container_command_args(
+ self,
+ pipeline_name: str,
+ cos_endpoint: str,
+ cos_bucket: str,
+ cos_directory: str,
+ cos_dependencies_archive: str,
+ filename: str,
+ cos_inputs: Optional[List[str]] = [],
+ cos_outputs: Optional[List[str]] = [],
+ is_crio_runtime: bool = False,
+ ) -> str:
"""
- In KFP, only letters, numbers, spaces, "_", and "-" are allowed in name.
- :param name: name of the operation
+ Compose the container command arguments for a generic component, taking into
+ account wether the container will run in a CRI-O environment.
"""
- return re.sub("-+", "-", re.sub("[^-_0-9A-Za-z ]+", "-", name)).lstrip("-").rstrip("-")
+ elyra_github_org = os.getenv("ELYRA_GITHUB_ORG", "elyra-ai")
+ elyra_github_branch = os.getenv("ELYRA_GITHUB_BRANCH", "main" if "dev" in __version__ else "v" + __version__)
+ elyra_bootstrap_script_url = os.getenv(
+ "ELYRA_BOOTSTRAP_SCRIPT_URL",
+ f"https://raw.githubusercontent.com/{elyra_github_org}/elyra/{elyra_github_branch}/elyra/kfp/bootstrapper.py", # noqa E501
+ )
+ elyra_requirements_url = os.getenv(
+ "ELYRA_REQUIREMENTS_URL",
+ f"https://raw.githubusercontent.com/{elyra_github_org}/"
+ f"elyra/{elyra_github_branch}/etc/generic/requirements-elyra.txt",
+ )
+ elyra_requirements_url_py37 = os.getenv(
+ "elyra_requirements_url_py37",
+ f"https://raw.githubusercontent.com/{elyra_github_org}/"
+ f"elyra/{elyra_github_branch}/etc/generic/requirements-elyra-py37.txt",
+ )
+
+ if is_crio_runtime:
+ container_work_dir = CRIO_VOL_WORKDIR_PATH
+ container_python_path = CRIO_VOL_PYTHON_PATH
+ python_pip_config_url = os.getenv(
+ "ELYRA_PIP_CONFIG_URL",
+ f"https://raw.githubusercontent.com/{elyra_github_org}/elyra/{elyra_github_branch}/etc/kfp/pip.conf",
+ )
+ python_user_lib_path_target = f"--target={CRIO_VOL_PYTHON_PATH}"
+ else:
+ container_work_dir = "./jupyter-work-dir"
+ python_user_lib_path_target = ""
+
+ common_curl_options = "--fail -H 'Cache-Control: no-cache'"
+
+ command_args = []
+
+ command_args.append(
+ f"mkdir -p {container_work_dir} && cd {container_work_dir} && "
+ f"echo 'Downloading {elyra_bootstrap_script_url}' && "
+ f"curl {common_curl_options} -L {elyra_bootstrap_script_url} --output bootstrapper.py && "
+ f"echo 'Downloading {elyra_requirements_url}' && "
+ f"curl {common_curl_options} -L {elyra_requirements_url} --output requirements-elyra.txt && "
+ f"echo 'Downloading {elyra_requirements_url_py37}' && "
+ f"curl {common_curl_options} -L {elyra_requirements_url_py37} --output requirements-elyra-py37.txt && "
+ )
+
+ if is_crio_runtime:
+ command_args.append(
+ f"mkdir {container_python_path} && cd {container_python_path} && "
+ f"echo 'Downloading {python_pip_config_url}' && "
+ f"curl {common_curl_options} -L {python_pip_config_url} --output pip.conf && cd .. && "
+ )
+
+ command_args.append(
+ f"python3 -m pip install {python_user_lib_path_target} packaging && "
+ "python3 -m pip freeze > requirements-current.txt && "
+ "python3 bootstrapper.py "
+ f"--pipeline-name '{pipeline_name}' "
+ f"--cos-endpoint '{cos_endpoint}' "
+ f"--cos-bucket '{cos_bucket}' "
+ f"--cos-directory '{cos_directory}' "
+ f"--cos-dependencies-archive '{cos_dependencies_archive}' "
+ f"--file '{filename}' "
+ )
+
+ def file_list_to_string(file_list: List[str]) -> str:
+ """
+ Utiltity function that converts a list of strings to a string
+ """
+ # Inputs and Outputs separator character. If updated,
+ # same-named variable in bootstrapper.py must be updated!
+ INOUT_SEPARATOR = ";"
+ for file in file_list:
+ if INOUT_SEPARATOR in file:
+ raise ValueError(f"Illegal character ({INOUT_SEPARATOR}) found in filename '{file}'.")
+ return INOUT_SEPARATOR.join(file_list)
+
+ # If upstream nodes declared file outputs they need to
+ # be downloaded from object storage by the bootstrapper
+ if len(cos_inputs) > 0:
+ inputs_str = file_list_to_string(cos_inputs)
+ command_args.append(f"--inputs '{inputs_str}' ")
+
+ # If this node produces file outputs they need to be uploaded
+ # to object storage by the bootstrapper
+ if len(cos_outputs) > 0:
+ outputs_str = file_list_to_string(cos_outputs)
+ command_args.append(f"--outputs '{outputs_str}' ")
+
+ if is_crio_runtime:
+ command_args.append(f"--user-volume-path '{CRIO_VOL_PYTHON_PATH}' ")
+
+ return "".join(command_args)
@staticmethod
def _sanitize_param_name(name: str) -> str:
@@ -771,73 +1116,62 @@ def _sanitize_param_name(name: str) -> str:
return normalized_name.replace(" ", "_")
def add_disable_node_caching(self, instance: DisableNodeCaching, execution_object: Any, **kwargs) -> None:
- """Add DisableNodeCaching info to the execution object for the given runtime processor"""
+ """Add DisableNodeCaching info to the execution object"""
# Force re-execution of the operation by setting staleness to zero days
# https://www.kubeflow.org/docs/components/pipelines/overview/caching/#managing-caching-staleness
if instance.selection:
- execution_object.execution_options.caching_strategy.max_cache_staleness = "P0D"
+ execution_object["disable_node_caching"] = True
+ else:
+ execution_object["disable_node_caching"] = False
def add_custom_shared_memory_size(self, instance: CustomSharedMemorySize, execution_object: Any, **kwargs) -> None:
- """Add CustomSharedMemorySize info to the execution object for the given runtime processor"""
-
+ """Add CustomSharedMemorySize info to the execution object"""
if not instance.size:
+ # no custom size was specified; ignore
return
-
- volume = V1Volume(
- name="shm",
- empty_dir=V1EmptyDirVolumeSource(medium="Memory", size_limit=f"{instance.size}{instance.units}"),
- )
- if volume not in execution_object.volumes:
- execution_object.add_volume(volume)
-
- execution_object.container.add_volume_mount(V1VolumeMount(mount_path="/dev/shm", name="shm"))
+ execution_object["kubernetes_shared_mem_size"] = {"size": instance.size, "units": instance.units}
def add_kubernetes_secret(self, instance: KubernetesSecret, execution_object: Any, **kwargs) -> None:
- """Add KubernetesSecret instance to the execution object for the given runtime processor"""
- execution_object.container.add_env_variable(
- V1EnvVar(
- name=instance.env_var,
- value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name=instance.name, key=instance.key)),
- )
- )
+ """Add KubernetesSecret instance to the execution object"""
+ if "kubernetes_secrets" not in execution_object:
+ execution_object["kubernetes_secrets"] = {}
+ execution_object["kubernetes_secrets"][instance.env_var] = {"name": instance.name, "key": instance.key}
def add_mounted_volume(self, instance: VolumeMount, execution_object: Any, **kwargs) -> None:
- """Add VolumeMount instance to the execution object for the given runtime processor"""
- volume = V1Volume(
- name=instance.pvc_name,
- persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(claim_name=instance.pvc_name),
- )
- if volume not in execution_object.volumes:
- execution_object.add_volume(volume)
- execution_object.container.add_volume_mount(
- V1VolumeMount(
- mount_path=instance.path,
- name=instance.pvc_name,
- sub_path=instance.sub_path,
- read_only=instance.read_only,
- )
- )
+ """Add VolumeMount instance to the execution object"""
+ if "kubernetes_volumes" not in execution_object:
+ execution_object["kubernetes_volumes"] = {}
+ execution_object["kubernetes_volumes"][instance.path] = {
+ "pvc_name": instance.pvc_name,
+ "sub_path": instance.sub_path,
+ "read_only": instance.read_only,
+ }
def add_kubernetes_pod_annotation(self, instance: KubernetesAnnotation, execution_object: Any, **kwargs) -> None:
- """Add KubernetesAnnotation instance to the execution object for the given runtime processor"""
- if instance.key not in execution_object.pod_annotations:
- execution_object.add_pod_annotation(instance.key, instance.value or "")
+ """Add KubernetesAnnotation instance to the execution object"""
+ if "pod_annotations" not in execution_object:
+ execution_object["pod_annotations"] = {}
+ execution_object["pod_annotations"][instance.key] = instance.value or ""
def add_kubernetes_pod_label(self, instance: KubernetesLabel, execution_object: Any, **kwargs) -> None:
- """Add KubernetesLabel instance to the execution object for the given runtime processor"""
- if instance.key not in execution_object.pod_labels:
- execution_object.add_pod_label(instance.key, instance.value or "")
+ """Add KubernetesLabel instance to the execution object"""
+ if "pod_labels" not in execution_object:
+ execution_object["pod_labels"] = {}
+ execution_object["pod_labels"][instance.key] = instance.value or ""
def add_kubernetes_toleration(self, instance: KubernetesToleration, execution_object: Any, **kwargs) -> None:
- """Add KubernetesToleration instance to the execution object for the given runtime processor"""
- toleration = V1Toleration(
- effect=instance.effect,
- key=instance.key,
- operator=instance.operator,
- value=instance.value,
- )
- if toleration not in execution_object.tolerations:
- execution_object.add_toleration(toleration)
+ """Add KubernetesToleration instance to the execution object"""
+ if "kubernetes_tolerations" not in execution_object:
+ execution_object["kubernetes_tolerations"] = {}
+ toleration_hash = hashlib.sha256(
+ f"{instance.key}::{instance.operator}::{instance.value}::{instance.effect}".encode()
+ ).hexdigest()
+ execution_object["kubernetes_tolerations"][toleration_hash] = {
+ "key": instance.key,
+ "operator": instance.operator,
+ "value": instance.value,
+ "effect": instance.effect,
+ }
@property
def supported_properties(self) -> Set[str]:
diff --git a/elyra/pipeline/runtime_type.py b/elyra/pipeline/runtime_type.py
index 7f029a55a..9b18d7453 100644
--- a/elyra/pipeline/runtime_type.py
+++ b/elyra/pipeline/runtime_type.py
@@ -124,7 +124,10 @@ class KubeflowPipelinesResources(RuntimeTypeResources):
type = RuntimeProcessorType.KUBEFLOW_PIPELINES
icon_endpoint = "static/elyra/kubeflow.svg"
- export_file_types = [{"id": "yaml", "display_name": "KFP static configuration file (YAML formatted)"}]
+ export_file_types = [
+ {"id": "yaml", "display_name": "KFP static configuration file (YAML formatted)"},
+ {"id": "py", "display_name": "Python DSL"},
+ ]
class LocalResources(RuntimeTypeResources):
diff --git a/elyra/templates/kubeflow/v1/generic_component_definition_template.jinja2 b/elyra/templates/kubeflow/v1/generic_component_definition_template.jinja2
new file mode 100644
index 000000000..bc3003111
--- /dev/null
+++ b/elyra/templates/kubeflow/v1/generic_component_definition_template.jinja2
@@ -0,0 +1,8 @@
+name: Run a file
+description: Run a Jupyter notebook or Python/R script
+
+implementation:
+ container:
+ image: {{ container_image }}
+ command: [sh, -c]
+ args: ["{{ command_args }}"]
diff --git a/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 b/elyra/templates/kubeflow/v1/python_dsl_template.jinja2
new file mode 100644
index 000000000..329cdd551
--- /dev/null
+++ b/elyra/templates/kubeflow/v1/python_dsl_template.jinja2
@@ -0,0 +1,177 @@
+#
+# Generated by Elyra {{ elyra_version }}
+#
+import kfp
+import kfp.aws
+from kubernetes.client import *
+from kubernetes.client.models import *
+
+{# Load statements for custom components -#}
+{# component_hash = """""" -#}
+{# factory_hash = kfp.components.load_component_from_text(component_hash) -#}
+{% for hash, component_definition in component_definitions.items() %}
+component_def_{{ hash | python_safe }} = """
+{{ component_definition }}
+"""
+
+factory_{{ hash | python_safe }} = kfp.components.load_component_from_text(component_def_{{ hash | python_safe }})
+{% endfor %}
+
+{# Define pipeline -#}
+{% if pipeline_description %}
+@kfp.dsl.pipeline(name="{{ pipeline_name }}", description="{{ pipeline_description | string_delimiter_safe }}")
+{% else %}
+@kfp.dsl.pipeline(name="{{ pipeline_name }}")
+{% endif %}
+def generated_pipeline(
+{% if pipeline_parameters %}
+{# pipeline parameters will be added here -#}
+{% endif %}
+):
+{% for workflow_task in workflow_tasks.values() %}
+ {% set task_name = "task_" + workflow_task.escaped_task_id %}
+ # Task for node '{{ workflow_task.name }}'
+ {{ task_name }} = factory_{{ workflow_task.component_definition_hash | python_safe }}(
+{% for task_input_name, task_input_spec in workflow_task.task_inputs.items() %}
+{% if task_input_spec.task_output_reference %}
+ {{ task_input_name }}=task_{{ task_input_spec.task_output_reference.task_id }}.outputs["{{ task_input_spec.task_output_reference.output_id }}"],
+{% elif task_input_spec.pipeline_parameter_reference %}
+ {{ task_input_name }}={{ task_input_spec.pipeline_parameter_reference }},
+{% elif task_input_spec.requires_quoted_rendering %}
+ {{ task_input_name }}="{{ task_input_spec.value }}",
+{% else %}
+ {{ task_input_name }}={{ task_input_spec.value }},
+{% endif %}
+{% endfor %}
+ )
+{% if workflow_task.task_modifiers.image_pull_policy %}
+ {{ task_name }}.container.set_image_pull_policy("{{ workflow_task.task_modifiers.image_pull_policy }}")
+{% endif %}
+{% if workflow_task.task_modifiers.special_output_files %}
+ {{ task_name }}.output_artifact_paths["mlpipeline-metrics"] = "{{ workflow_task.task_modifiers.special_output_files.mlpipeline_metrics }}"
+ {{ task_name }}.output_artifact_paths["mlpipeline-ui-metadata"] = "{{ workflow_task.task_modifiers.special_output_files.mlpipeline_ui_metadata }}"
+{% endif %}
+{% if workflow_task.task_modifiers.object_storage_secret %}
+ {{ task_name }}.apply(kfp.aws.use_aws_secret("{{ workflow_task.task_modifiers.object_storage_secret }}"))
+{% endif %}
+ {{ task_name }}.set_display_name("{{ workflow_task.name | string_delimiter_safe }}")
+{% if workflow_task.doc %}
+ {{ task_name }}.add_pod_annotation("elyra/node-user-doc","""{{ workflow_task.doc| string_delimiter_safe }}""")
+{% endif %}
+{% if workflow_task.task_modifiers.cpu_request %}
+ {{ task_name }}.container.set_cpu_request(cpu="{{ workflow_task.task_modifiers.cpu_request }}")
+{% endif %}
+{% if workflow_task.task_modifiers.mem_request and workflow_task.task_modifiers.mem_request.size %}
+ {{ task_name }}.container.set_memory_request(memory="{{ workflow_task.task_modifiers.mem_request.size }}{{ workflow_task.task_modifiers.mem_request.units }}")
+{% endif %}
+{% if workflow_task.task_modifiers.gpu_limit and workflow_task.task_modifiers.gpu_limit.size %}
+ {{ task_name }}.container.set_gpu_limit(gpu="{{ workflow_task.task_modifiers.gpu_limit.size }}", vendor="{{ workflow_task.task_modifiers.gpu_limit.vendor }}")
+{% endif %}
+{% if workflow_task.task_modifiers.env_variables %}
+{% for env_var_name, env_var_value in workflow_task.task_modifiers.env_variables.items() %}
+ {{ task_name }}.add_env_variable(V1EnvVar(name="{{ env_var_name }}", value="{{ env_var_value | string_delimiter_safe }}"))
+{% endfor %}
+{% endif %}
+{% if workflow_task.task_modifiers.set_run_name %}
+{% if workflow_engine == "tekton" %}
+ {{ task_name }}.add_env_variable(V1EnvVar(name="ELYRA_RUN_NAME", value_from=V1EnvVarSource(field_ref=V1ObjectFieldSelector(field_path="metadata.annotations['pipelines.kubeflow.org/run_name']"))))
+{% else %}
+ {{ task_name }}.add_env_variable(V1EnvVar(name="ELYRA_RUN_NAME", value="{{ workflow_task.task_modifiers.set_run_name }}"))
+{% endif %}
+{% endif %}
+{% if workflow_task.task_modifiers.disable_node_caching %}
+ {{ task_name }}.execution_options.caching_strategy.max_cache_staleness = "P0D"
+{% endif %}
+{% if workflow_task.task_modifiers.pod_labels %}
+{% for pod_label_key, pod_label_value in workflow_task.task_modifiers.pod_labels.items() %}
+ {{ task_name }}.add_pod_label("{{ pod_label_key }}", "{{ pod_label_value }}")
+{% endfor %}
+{% endif %}
+{% if workflow_task.task_modifiers.pod_annotations %}
+{% for pod_annotation_key, pod_annotation_value in workflow_task.task_modifiers.pod_annotations.items() %}
+ {{ task_name }}.add_pod_annotation("{{ pod_annotation_key }}", """{{ pod_annotation_value | string_delimiter_safe }}""")
+{% endfor %}
+{% endif %}
+{% if workflow_task.task_modifiers.kubernetes_secrets %}
+{% for env_var, secret_dict in workflow_task.task_modifiers.kubernetes_secrets.items() %}
+ {{ task_name }}.container.add_env_variable(V1EnvVar(
+ name="{{ env_var }}",
+ value_from=V1EnvVarSource(secret_key_ref=V1SecretKeySelector(name="{{ secret_dict.name }}", key="{{ secret_dict.key }}")),
+ ))
+{% endfor %}
+{% endif %}
+{% if workflow_task.task_modifiers.kubernetes_volumes %}
+{% for volume_path, volume_dict in workflow_task.task_modifiers.kubernetes_volumes.items() %}
+ {{ task_name }}.add_volume(
+ V1Volume(
+ name="{{ volume_dict.pvc_name}}",
+ persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(claim_name="{{ volume_dict.pvc_name }}",),
+ ))
+ {{ task_name }}.container.add_volume_mount(
+ V1VolumeMount(
+ mount_path="{{ volume_path }}",
+ name="{{ volume_dict.pvc_name }}",
+{% if volume_dict.sub_path %}
+ sub_path="{{ volume_dict.sub_path }}",
+{% endif %}
+ read_only={{ volume_dict.read_only }},
+ ))
+{% endfor %}
+{% endif %}
+{% if workflow_task.task_modifiers.kubernetes_tolerations %}
+{% for toleration_dict in workflow_task.task_modifiers.kubernetes_tolerations.values() %}
+ {{ task_name }}.add_toleration(
+ V1Toleration(
+{% if toleration_dict.effect %}
+ effect="{{ toleration_dict.effect }}",
+{% else %}
+ effect=None,
+{% endif %}
+{% if toleration_dict.key %}
+ key="{{ toleration_dict.key }}",
+{% else %}
+ key=None,
+{% endif %}
+ operator="{{ toleration_dict.operator }}",
+{% if toleration_dict.value %}
+ value="{{ toleration_dict.value | string_delimiter_safe }}",
+{% else %}
+ value=None,
+{% endif %}
+ ))
+{% endfor %}
+{% endif %}
+{% if workflow_task.task_modifiers.kubernetes_shared_mem_size %}
+ {{ task_name }}.add_volume(V1Volume(
+ name="shm",
+ empty_dir=V1EmptyDirVolumeSource(medium="Memory", size_limit="{{ workflow_task.task_modifiers.kubernetes_shared_mem_size.size }}{{ workflow_task.task_modifiers.kubernetes_shared_mem_size.units }}"),
+ ))
+ {{ task_name }}.container.add_volume_mount(V1VolumeMount(mount_path="/dev/shm", name="shm"))
+{% endif %}
+{% if workflow_task.task_modifiers.crio_runtime %}
+ {{ task_name }}.add_volume(V1Volume(
+ name="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_name }}",
+ empty_dir=V1EmptyDirVolumeSource(medium="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_medium }}", size_limit="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_size }}"),
+ ))
+ {{ task_name }}.container.add_volume_mount(V1VolumeMount(mount_path="{{ workflow_task.task_modifiers.crio_runtime.emptydir_mount_path }}", name="{{ workflow_task.task_modifiers.crio_runtime.emptydir_volume_name }}"))
+{% endif %}
+{# declare upstream dependencies -#}
+{% if workflow_task.upstream_workflow_task_ids %}
+{% for upstream_workflow_task_id in workflow_task.upstream_workflow_task_ids %}
+ {{ task_name }}.after(task_{{ upstream_workflow_task_id | python_safe }})
+{% endfor %}
+{% endif %}
+{% endfor %}
+
+if __name__ == "__main__":
+ from pathlib import Path
+{% if workflow_engine.lower() == "tekton" %}
+ from kfp_tekton import compiler
+
+ compiler.TektonCompiler().compile(
+{% else %}
+ kfp.compiler.Compiler().compile(
+{% endif %}
+ pipeline_func=generated_pipeline,
+ package_path=Path(__file__).with_suffix(".yaml").name,
+ )
\ No newline at end of file
diff --git a/elyra/tests/cli/resources/pipelines/airflow.pipeline b/elyra/tests/cli/resources/pipelines/airflow.pipeline
index f420a1471..e3f571ce9 100644
--- a/elyra/tests/cli/resources/pipelines/airflow.pipeline
+++ b/elyra/tests/cli/resources/pipelines/airflow.pipeline
@@ -24,20 +24,10 @@
"label": "",
"ui_data": {
"label": "hello.ipynb",
- "image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2216%22%20viewBox%3D%220%200%2022%2022%22%3E%0A%20%20%3Cg%20class%3D%22jp-icon-warn0%20jp-icon-selectable%22%20fill%3D%22%23EF6C00%22%3E%0A%20%20%20%20%3Cpath%20d%3D%22M18.7%203.3v15.4H3.3V3.3h15.4m1.5-1.5H1.8v18.3h18.3l.1-18.3z%22%2F%3E%0A%20%20%20%20%3Cpath%20d%3D%22M16.5%2016.5l-5.4-4.3-5.6%204.3v-11h11z%22%2F%3E%0A%20%20%3C%2Fg%3E%0A%3C%2Fsvg%3E%0A",
+ "image": "/static/elyra/notebook.svg",
"x_pos": 175,
"y_pos": 110,
- "description": "Run notebook file",
- "decorations": [
- {
- "id": "error",
- "image": "data:image/svg+xml;utf8,%3Csvg%20focusable%3D%22false%22%20preserveAspectRatio%3D%22xMidYMid%20meet%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20fill%3D%22%23da1e28%22%20width%3D%2216%22%20height%3D%2216%22%20viewBox%3D%220%200%2016%2016%22%20aria-hidden%3D%22true%22%3E%3Ccircle%20cx%3D%228%22%20cy%3D%228%22%20r%3D%228%22%20fill%3D%22%23ffffff%22%3E%3C%2Fcircle%3E%3Cpath%20d%3D%22M8%2C1C4.2%2C1%2C1%2C4.2%2C1%2C8s3.2%2C7%2C7%2C7s7-3.1%2C7-7S11.9%2C1%2C8%2C1z%20M7.5%2C4h1v5h-1C7.5%2C9%2C7.5%2C4%2C7.5%2C4z%20M8%2C12.2%09c-0.4%2C0-0.8-0.4-0.8-0.8s0.3-0.8%2C0.8-0.8c0.4%2C0%2C0.8%2C0.4%2C0.8%2C0.8S8.4%2C12.2%2C8%2C12.2z%22%3E%3C%2Fpath%3E%3Cpath%20d%3D%22M7.5%2C4h1v5h-1C7.5%2C9%2C7.5%2C4%2C7.5%2C4z%20M8%2C12.2c-0.4%2C0-0.8-0.4-0.8-0.8s0.3-0.8%2C0.8-0.8%09c0.4%2C0%2C0.8%2C0.4%2C0.8%2C0.8S8.4%2C12.2%2C8%2C12.2z%22%20data-icon-path%3D%22inner-path%22%20opacity%3D%220%22%3E%3C%2Fpath%3E%3C%2Fsvg%3E",
- "outline": false,
- "position": "topRight",
- "x_pos": -24,
- "y_pos": -8
- }
- ]
+ "description": "Run notebook file"
}
},
"inputs": [
@@ -74,10 +64,10 @@
"ui_data": {
"comments": []
},
- "version": 7,
+ "version": 8,
"runtime_type": "APACHE_AIRFLOW",
"properties": {
- "name": "untitled",
+ "name": "airflow",
"runtime": "Apache Airflow"
}
},
diff --git a/elyra/tests/cli/test_pipeline_app.py b/elyra/tests/cli/test_pipeline_app.py
index 335c46777..be685c11a 100644
--- a/elyra/tests/cli/test_pipeline_app.py
+++ b/elyra/tests/cli/test_pipeline_app.py
@@ -17,6 +17,8 @@
import json
from pathlib import Path
import shutil
+from typing import List
+from typing import Union
from click.testing import CliRunner
from conftest import KFP_COMPONENT_CACHE_INSTANCE
@@ -1010,6 +1012,14 @@ def prepare_export_work_dir(work_dir: str, source_dir: str):
print(f"Work directory content: {list(Path(work_dir).glob('*'))}")
+def copy_to_work_dir(work_dir: str, files: List[Union[str, Path]]) -> None:
+ """Copies the specified files to work_dir"""
+ for file in files:
+ if not isinstance(file, Path):
+ file = Path(file)
+ shutil.copy(file.as_posix(), work_dir)
+
+
def test_export_invalid_runtime_config():
"""Test user error scenarios: the specified runtime configuration is 'invalid'"""
runner = CliRunner()
@@ -1082,7 +1092,6 @@ def test_export_kubeflow_output_option(
pipeline_file_path = cwd / pipeline_file
# make sure the pipeline file exists
assert pipeline_file_path.is_file() is True
- print(f"Pipeline file: {pipeline_file_path}")
# Test: '--output' not specified; exported file is created
# in current directory and named like the pipeline file with
@@ -1147,7 +1156,6 @@ def test_export_airflow_output_option(airflow_runtime_instance):
pipeline_file_path = cwd / pipeline_file
# make sure the pipeline file exists
assert pipeline_file_path.is_file() is True
- print(f"Pipeline file: {pipeline_file_path}")
#
# Test: '--output' not specified; exported file is created
@@ -1155,7 +1163,6 @@ def test_export_airflow_output_option(airflow_runtime_instance):
# a '.py' suffix
#
expected_output_file = pipeline_file_path.with_suffix(".py")
- print(f"expected_output_file -> {expected_output_file}")
do_mock_export(str(expected_output_file))
# this should fail: default output file already exists
@@ -1272,6 +1279,110 @@ def test_export_kubeflow_overwrite_option(
assert f"was exported to '{str(expected_output_file)}" in result.output, result.output
+def test_export_airflow_format_option(airflow_runtime_instance):
+ """Verify that the '--format' option works as expected for Airflow"""
+ runner = CliRunner()
+ with runner.isolated_filesystem():
+ cwd = Path.cwd().resolve()
+ # copy pipeline file and depencencies
+ resource_dir = Path(__file__).parent / "resources" / "pipelines"
+ copy_to_work_dir(str(cwd), [resource_dir / "airflow.pipeline", resource_dir / "hello.ipynb"])
+ pipeline_file = "airflow.pipeline"
+ pipeline_file_path = cwd / pipeline_file
+ # make sure the pipeline file exists
+ assert pipeline_file_path.is_file() is True
+
+ # Try supported formats
+ for supported_export_format_value in ["yaml", "py"]:
+ if supported_export_format_value:
+ expected_output_file = pipeline_file_path.with_suffix(f".{supported_export_format_value}")
+ else:
+ expected_output_file = pipeline_file_path.with_suffix(".py")
+
+ # Make sure the output file doesn't exist yet
+ if expected_output_file.is_file():
+ expected_output_file.unlink()
+
+ # Try invalid format
+ for invalid_export_format_value in ["humpty", "dumpty"]:
+ options = [
+ "export",
+ str(pipeline_file_path),
+ "--runtime-config",
+ airflow_runtime_instance,
+ "--format",
+ invalid_export_format_value,
+ ]
+
+ # this should fail
+ result = runner.invoke(pipeline, options)
+
+ assert result.exit_code == 2, result.output
+ assert "Invalid value for --format: Valid export formats are ['py']." in result.output, result.output
+
+
+@pytest.mark.parametrize("catalog_instance_no_server_process", [KFP_COMPONENT_CACHE_INSTANCE], indirect=True)
+def test_export_kubeflow_format_option(
+ jp_environ, kubeflow_pipelines_runtime_instance, catalog_instance_no_server_process
+):
+ """Verify that the '--format' option works as expected for Kubeflow Pipelines"""
+ runner = CliRunner()
+ with runner.isolated_filesystem():
+ cwd = Path.cwd().resolve()
+ # copy pipeline file and depencencies
+ prepare_export_work_dir(str(cwd), Path(__file__).parent / "resources" / "pipelines")
+ pipeline_file = "kfp_3_node_custom.pipeline"
+ pipeline_file_path = cwd / pipeline_file
+ # make sure the pipeline file exists
+ assert pipeline_file_path.is_file() is True
+
+ # Try supported formats
+ for supported_export_format_value in [None, "py", "yaml"]:
+ if supported_export_format_value:
+ expected_output_file = pipeline_file_path.with_suffix(f".{supported_export_format_value}")
+ else:
+ expected_output_file = pipeline_file_path.with_suffix(".yaml")
+
+ # Make sure the output file doesn't exist yet
+ if expected_output_file.is_file():
+ expected_output_file.unlink()
+
+ options = [
+ "export",
+ str(pipeline_file_path),
+ "--runtime-config",
+ kubeflow_pipelines_runtime_instance,
+ ]
+ if supported_export_format_value:
+ options.append("--format")
+ options.append(supported_export_format_value)
+
+ # this should succeed
+ result = runner.invoke(pipeline, options)
+
+ assert result.exit_code == 0, result.output
+ assert f"was exported to '{str(expected_output_file)}" in result.output, result.output
+
+ # Try invalid format
+ for invalid_export_format_value in ["humpty", "dumpty"]:
+ options = [
+ "export",
+ str(pipeline_file_path),
+ "--runtime-config",
+ kubeflow_pipelines_runtime_instance,
+ "--format",
+ invalid_export_format_value,
+ ]
+
+ # this should fail
+ result = runner.invoke(pipeline, options)
+
+ assert result.exit_code == 2, result.output
+ assert (
+ "Invalid value for --format: Valid export formats are ['yaml', 'py']." in result.output
+ ), result.output
+
+
# ------------------------------------------------------------------
# end tests for 'export' command
# ------------------------------------------------------------------
diff --git a/elyra/tests/kfp/test_operator.py b/elyra/tests/kfp/test_operator.py
deleted file mode 100644
index ff046df90..000000000
--- a/elyra/tests/kfp/test_operator.py
+++ /dev/null
@@ -1,567 +0,0 @@
-#
-# Copyright 2018-2022 Elyra Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import string
-
-from kfp.dsl import RUN_ID_PLACEHOLDER
-import pytest
-
-from elyra.kfp.operator import ExecuteFileOp
-
-
-def test_fail_without_cos_endpoint():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_fail_without_cos_bucket():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_fail_without_cos_directory():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_fail_without_cos_dependencies_archive():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- image="test/image:dev",
- )
-
-
-def test_fail_without_runtime_image():
- with pytest.raises(ValueError) as error_info:
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- )
- assert "You need to provide an image." == str(error_info.value)
-
-
-def test_fail_without_notebook():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_fail_without_name():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_fail_with_empty_string_as_name():
- with pytest.raises(ValueError):
- ExecuteFileOp(
- name="",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_fail_with_empty_string_as_notebook():
- with pytest.raises(ValueError) as error_info:
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
- assert "You need to provide a notebook." == str(error_info.value)
-
-
-def test_fail_without_pipeline_name():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- name="test",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_fail_without_experiment_name():
- with pytest.raises(TypeError):
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
-
-
-def test_properly_set_notebook_name_when_in_subdirectory():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="foo/test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
- assert "test_notebook.ipynb" == notebook_op.notebook_name
-
-
-def test_properly_set_python_script_name_when_in_subdirectory():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="foo/test.py",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
- assert "test.py" == notebook_op.notebook_name
-
-
-def test_user_crio_volume_creation():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- emptydir_volume_size="20Gi",
- )
- assert notebook_op.emptydir_volume_size == "20Gi"
- assert notebook_op.container_work_dir_root_path == "/opt/app-root/src/"
- assert notebook_op.container.volume_mounts.__len__() == 1
- # Environment variables: PYTHONPATH, ELYRA_RUN_NAME
- assert notebook_op.container.env.__len__() == 2, notebook_op.container.env
-
-
-def test_override_bootstrap_url():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- bootstrap_script_url="https://test.server.com/bootscript.py",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
- assert notebook_op.bootstrap_script_url == "https://test.server.com/bootscript.py"
-
-
-def test_override_requirements_url():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- requirements_url="https://test.server.com/requirements.py",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- image="test/image:dev",
- )
- assert notebook_op.requirements_url == "https://test.server.com/requirements.py"
-
-
-def test_construct_with_both_pipeline_inputs_and_outputs():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_inputs=["test_input1.txt", "test_input2.txt"],
- pipeline_outputs=["test_output1.txt", "test_output2.txt"],
- image="test/image:dev",
- )
- assert notebook_op.pipeline_inputs == ["test_input1.txt", "test_input2.txt"]
- assert notebook_op.pipeline_outputs == ["test_output1.txt", "test_output2.txt"]
-
- assert '--inputs "test_input1.txt;test_input2.txt"' in notebook_op.container.args[0]
- assert '--outputs "test_output1.txt;test_output2.txt"' in notebook_op.container.args[0]
-
-
-def test_construct_wildcard_outputs():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_inputs=["test_input1.txt", "test_input2.txt"],
- pipeline_outputs=["test_out*", "foo.tar"],
- image="test/image:dev",
- )
- assert notebook_op.pipeline_inputs == ["test_input1.txt", "test_input2.txt"]
- assert notebook_op.pipeline_outputs == ["test_out*", "foo.tar"]
-
- assert '--inputs "test_input1.txt;test_input2.txt"' in notebook_op.container.args[0]
- assert '--outputs "test_out*;foo.tar"' in notebook_op.container.args[0]
-
-
-def test_construct_with_only_pipeline_inputs():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_inputs=["test_input1.txt", "test,input2.txt"],
- pipeline_outputs=[],
- image="test/image:dev",
- )
- assert notebook_op.pipeline_inputs == ["test_input1.txt", "test,input2.txt"]
- assert '--inputs "test_input1.txt;test,input2.txt"' in notebook_op.container.args[0]
-
-
-def test_construct_with_bad_pipeline_inputs():
- with pytest.raises(ValueError) as error_info:
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_inputs=["test_input1.txt", "test;input2.txt"],
- pipeline_outputs=[],
- image="test/image:dev",
- )
- assert "Illegal character (;) found in filename 'test;input2.txt'." == str(error_info.value)
-
-
-def test_construct_with_only_pipeline_outputs():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_outputs=["test_output1.txt", "test,output2.txt"],
- pipeline_envs={},
- image="test/image:dev",
- )
- assert notebook_op.pipeline_outputs == ["test_output1.txt", "test,output2.txt"]
- assert '--outputs "test_output1.txt;test,output2.txt"' in notebook_op.container.args[0]
-
-
-def test_construct_with_bad_pipeline_outputs():
- with pytest.raises(ValueError) as error_info:
- ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_outputs=["test_output1.txt", "test;output2.txt"],
- image="test/image:dev",
- )
- assert "Illegal character (;) found in filename 'test;output2.txt'." == str(error_info.value)
-
-
-def test_construct_with_env_variables_argo():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"},
- image="test/image:dev",
- )
-
- confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"]
- confirmation_values = ["1", "2", "3", RUN_ID_PLACEHOLDER]
- for env_val in notebook_op.container.env:
- assert env_val.name in confirmation_names
- assert env_val.value in confirmation_values
- confirmation_names.remove(env_val.name)
- confirmation_values.remove(env_val.value)
-
- # Verify confirmation values have been drained.
- assert len(confirmation_names) == 0
- assert len(confirmation_values) == 0
-
- # same as before but explicitly specify the workflow engine type
- # as Argo
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"},
- image="test/image:dev",
- workflow_engine="Argo",
- )
-
- confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"]
- confirmation_values = ["1", "2", "3", RUN_ID_PLACEHOLDER]
- for env_val in notebook_op.container.env:
- assert env_val.name in confirmation_names
- assert env_val.value in confirmation_values
- confirmation_names.remove(env_val.name)
- confirmation_values.remove(env_val.value)
-
- # Verify confirmation values have been drained.
- assert len(confirmation_names) == 0
- assert len(confirmation_values) == 0
-
-
-def test_construct_with_env_variables_tekton():
- notebook_op = ExecuteFileOp(
- name="test",
- pipeline_name="test-pipeline",
- experiment_name="experiment-name",
- notebook="test_notebook.ipynb",
- cos_endpoint="http://testserver:32525",
- cos_bucket="test_bucket",
- cos_directory="test_directory",
- cos_dependencies_archive="test_archive.tgz",
- pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"},
- image="test/image:dev",
- workflow_engine="Tekton",
- )
-
- confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"]
- confirmation_values = ["1", "2", "3"]
- field_path = "metadata.annotations['pipelines.kubeflow.org/run_name']"
- for env_val in notebook_op.container.env:
- assert env_val.name in confirmation_names
- confirmation_names.remove(env_val.name)
- if env_val.name == "ELYRA_RUN_NAME":
- assert env_val.value_from.field_ref.field_path == field_path, env_val.value_from.field_ref
- else:
- assert env_val.value in confirmation_values
- confirmation_values.remove(env_val.value)
-
- # Verify confirmation values have been drained.
- assert len(confirmation_names) == 0
- assert len(confirmation_values) == 0
-
-
-def test_normalize_label_value():
- valid_middle_chars = "-_."
-
- # test min length
- assert ExecuteFileOp._normalize_label_value(None) == ""
- assert ExecuteFileOp._normalize_label_value("") == ""
- # test max length (63)
- assert ExecuteFileOp._normalize_label_value("a" * 63) == "a" * 63
- assert ExecuteFileOp._normalize_label_value("a" * 64) == "a" * 63 # truncated
- # test first and last char
- assert ExecuteFileOp._normalize_label_value("1") == "1"
- assert ExecuteFileOp._normalize_label_value("22") == "22"
- assert ExecuteFileOp._normalize_label_value("3_3") == "3_3"
- assert ExecuteFileOp._normalize_label_value("4u4") == "4u4"
- assert ExecuteFileOp._normalize_label_value("5$5") == "5_5"
-
- # test first char
- for c in string.printable:
- if c in string.ascii_letters + string.digits:
- # first char is valid
- # no length violation
- assert ExecuteFileOp._normalize_label_value(c) == c
- assert ExecuteFileOp._normalize_label_value(c + "B") == c + "B"
- # max length
- assert ExecuteFileOp._normalize_label_value(c + "B" * 62) == (c + "B" * 62)
- # max length exceeded
- assert ExecuteFileOp._normalize_label_value(c + "B" * 63) == (c + "B" * 62) # truncated
- else:
- # first char is invalid, e.g. '#a', and becomes the
- # second char, which might require replacement
- rv = c
- if c not in valid_middle_chars:
- rv = "_"
- # no length violation
- assert ExecuteFileOp._normalize_label_value(c) == "a" + rv + "a"
- assert ExecuteFileOp._normalize_label_value(c + "B") == "a" + rv + "B"
- # max length
- assert ExecuteFileOp._normalize_label_value(c + "B" * 62) == ("a" + rv + "B" * 61) # truncated
- # max length exceeded
- assert ExecuteFileOp._normalize_label_value(c + "B" * 63) == ("a" + rv + "B" * 61) # truncated
-
- # test last char
- for c in string.printable:
- if c in string.ascii_letters + string.digits:
- # no length violation
- assert ExecuteFileOp._normalize_label_value("b" + c) == "b" + c
- # max length
- assert ExecuteFileOp._normalize_label_value("b" * 62 + c) == ("b" * 62 + c)
- # max length exceeded
- assert ExecuteFileOp._normalize_label_value("b" * 63 + c) == ("b" * 63)
- else:
- # last char is invalid, e.g. 'a#', and requires
- # patching
- rv = c
- if c not in valid_middle_chars:
- rv = "_"
- # no length violation (char is appended)
- assert ExecuteFileOp._normalize_label_value("b" + c) == "b" + rv + "a"
- # max length (char is replaced)
- assert ExecuteFileOp._normalize_label_value("b" * 62 + c) == ("b" * 62 + "a")
- # max length exceeded (no action required)
- assert ExecuteFileOp._normalize_label_value("b" * 63 + c) == ("b" * 63)
-
- # test first and last char
- for c in string.printable:
- if c in string.ascii_letters + string.digits:
- # no length violation
- assert ExecuteFileOp._normalize_label_value(c + "b" + c) == c + "b" + c # nothing is modified
- # max length
- assert ExecuteFileOp._normalize_label_value(c + "b" * 61 + c) == (c + "b" * 61 + c) # nothing is modified
- # max length exceeded
- assert ExecuteFileOp._normalize_label_value(c + "b" * 62 + c) == c + "b" * 62 # truncate only
- else:
- # first and last characters are invalid, e.g. '#a#'
- rv = c
- if c not in valid_middle_chars:
- rv = "_"
- # no length violation
- assert ExecuteFileOp._normalize_label_value(c + "b" + c) == "a" + rv + "b" + rv + "a"
- # max length
- assert ExecuteFileOp._normalize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a")
- # max length exceeded after processing, scenario 1
- # resolved by adding char before first, replace last
- assert ExecuteFileOp._normalize_label_value(c + "b" * 60 + c) == ("a" + rv + "b" * 60 + "a")
- # max length exceeded after processing, scenario 2
- # resolved by adding char before first, appending after last
- assert ExecuteFileOp._normalize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a")
- # max length exceeded before processing, scenario 1
- # resolved by adding char before first, truncating last
- assert ExecuteFileOp._normalize_label_value(c + "b" * 62 + c) == ("a" + rv + "b" * 61)
- # max length exceeded before processing, scenario 2
- # resolved by adding char before first, replacing last
- assert ExecuteFileOp._normalize_label_value(c + "b" * 60 + c * 3) == ("a" + rv + "b" * 60 + "a")
-
- # test char in a position other than first and last
- # if invalid, the char is replaced with '_'
- for c in string.printable:
- if c in string.ascii_letters + string.digits + "-_.":
- assert ExecuteFileOp._normalize_label_value("A" + c + "Z") == "A" + c + "Z"
- else:
- assert ExecuteFileOp._normalize_label_value("A" + c + "Z") == "A_Z"
-
- # encore
- assert ExecuteFileOp._normalize_label_value(r"¯\_(ツ)_/¯") == "a_________a"
diff --git a/elyra/tests/pipeline/kfp/test_processor_kfp.py b/elyra/tests/pipeline/kfp/test_processor_kfp.py
index 47c6c59e9..e37ad147b 100644
--- a/elyra/tests/pipeline/kfp/test_processor_kfp.py
+++ b/elyra/tests/pipeline/kfp/test_processor_kfp.py
@@ -13,38 +13,65 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+from datetime import datetime
+import hashlib
+import json
import os
from pathlib import Path
import re
import tarfile
+from typing import List
+from typing import Union
from unittest import mock
-from kfp import compiler as kfp_argo_compiler
+from kfp.dsl import RUN_ID_PLACEHOLDER
import pytest
import yaml
from elyra.metadata.metadata import Metadata
from elyra.pipeline.catalog_connector import FilesystemComponentCatalogConnector
-from elyra.pipeline.catalog_connector import UrlComponentCatalogConnector
from elyra.pipeline.component import Component
from elyra.pipeline.component import ComponentParameter
+from elyra.pipeline.component_parameter import CustomSharedMemorySize
+from elyra.pipeline.component_parameter import DisableNodeCaching
from elyra.pipeline.component_parameter import ElyraProperty
+from elyra.pipeline.component_parameter import KubernetesAnnotation
+from elyra.pipeline.component_parameter import KubernetesLabel
+from elyra.pipeline.component_parameter import KubernetesSecret
+from elyra.pipeline.component_parameter import KubernetesToleration
+from elyra.pipeline.component_parameter import VolumeMount
+from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_DEF_MEDIUM
+from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_DEF_NAME
+from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_DEF_SIZE
+from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_MOUNT_PATH
+from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_PYTHON_PATH
+from elyra.pipeline.kfp.processor_kfp import CRIO_VOL_WORKDIR_PATH
from elyra.pipeline.kfp.processor_kfp import KfpPipelineProcessor
+from elyra.pipeline.kfp.processor_kfp import WorkflowEngineType
from elyra.pipeline.parser import PipelineParser
from elyra.pipeline.pipeline import GenericOperation
from elyra.pipeline.pipeline import Operation
from elyra.pipeline.pipeline import Pipeline
from elyra.pipeline.pipeline_constants import COS_OBJECT_PREFIX
+from elyra.pipeline.pipeline_constants import KUBERNETES_POD_ANNOTATIONS
+from elyra.pipeline.pipeline_constants import KUBERNETES_POD_LABELS
+from elyra.pipeline.pipeline_constants import KUBERNETES_SECRETS
+from elyra.pipeline.pipeline_constants import KUBERNETES_SHARED_MEM_SIZE
+from elyra.pipeline.pipeline_constants import KUBERNETES_TOLERATIONS
+from elyra.pipeline.pipeline_constants import MOUNTED_VOLUMES
from elyra.tests.pipeline.test_pipeline_parser import _read_pipeline_resource
+from elyra.util.cos import join_paths
+from elyra.util.kubernetes import sanitize_label_value
-
-ARCHIVE_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources", "archive")
-PIPELINE_FILE_COMPLEX = "resources/sample_pipelines/pipeline_dependency_complex.json"
+PIPELINE_FILE_COMPLEX = str((Path("resources") / "sample_pipelines" / "pipeline_dependency_complex.json").as_posix())
@pytest.fixture
-def processor(setup_factory_data):
- root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+def processor(setup_factory_data) -> KfpPipelineProcessor:
+ """
+ Instantiate a process for Kubeflow Pipelines
+ """
+ root_dir = str((Path(__file__).parent / "..").resolve())
processor = KfpPipelineProcessor(root_dir=root_dir)
return processor
@@ -73,13 +100,88 @@ def sample_metadata():
}
-def test_fail_get_metadata_configuration_invalid_namespace(processor):
+def kfp_runtime_config(
+ workflow_engine: WorkflowEngineType = WorkflowEngineType.ARGO,
+ use_cos_credentials_secret: bool = False,
+) -> Metadata:
+ """
+ Returns a KFP runtime config metadata entry, which meets the contraints
+ defined by the specified parameters
+ """
+
+ kfp_runtime_config = {
+ "display_name": "Mocked KFP runtime",
+ "schema_name": "kfp",
+ "metadata": {
+ "display_name": "Mocked KFP runtime",
+ "tags": [],
+ "user_namespace": "default",
+ "api_username": "user@example.com",
+ "api_password": "12341234",
+ "runtime_type": "KUBEFLOW_PIPELINES",
+ "api_endpoint": "http://examples.com:31737",
+ "cos_endpoint": "http://examples.com:31671",
+ "cos_bucket": "test",
+ },
+ }
+
+ if workflow_engine == WorkflowEngineType.TEKTON:
+ kfp_runtime_config["metadata"]["engine"] = "Tekton"
+ else:
+ kfp_runtime_config["metadata"]["engine"] = "Argo"
+
+ if use_cos_credentials_secret:
+ kfp_runtime_config["metadata"]["cos_auth_type"] = "KUBERNETES_SECRET"
+ kfp_runtime_config["metadata"]["cos_username"] = "my_name"
+ kfp_runtime_config["metadata"]["cos_password"] = "my_password"
+ kfp_runtime_config["metadata"]["cos_secret"] = "secret-name"
+ else:
+ kfp_runtime_config["metadata"]["cos_auth_type"] = "USER_CREDENTIALS"
+ kfp_runtime_config["metadata"]["cos_username"] = "my_name"
+ kfp_runtime_config["metadata"]["cos_password"] = "my_password"
+
+ return Metadata(
+ name=kfp_runtime_config["display_name"].lower().replace(" ", "_"),
+ display_name=kfp_runtime_config["display_name"],
+ schema_name=kfp_runtime_config["schema_name"],
+ metadata=kfp_runtime_config["metadata"],
+ )
+
+
+def test_WorkflowEngineType_get_instance_by_value():
+ """
+ Validate that method 'get_instance_by_value' yields the expected results for
+ valid and invalid input.
+ """
+ # test valid inputs (the provided value is evalutaed in a case insensitive manner)
+ assert WorkflowEngineType.get_instance_by_value("argo") == WorkflowEngineType.ARGO
+ assert WorkflowEngineType.get_instance_by_value("ARGO") == WorkflowEngineType.ARGO
+ assert WorkflowEngineType.get_instance_by_value("aRGo") == WorkflowEngineType.ARGO
+ assert WorkflowEngineType.get_instance_by_value("Argo") == WorkflowEngineType.ARGO
+ assert WorkflowEngineType.get_instance_by_value("tekton") == WorkflowEngineType.TEKTON
+ assert WorkflowEngineType.get_instance_by_value("TEKTON") == WorkflowEngineType.TEKTON
+ assert WorkflowEngineType.get_instance_by_value("tEKtOn") == WorkflowEngineType.TEKTON
+ assert WorkflowEngineType.get_instance_by_value("Tekton") == WorkflowEngineType.TEKTON
+ # test invalid inputs
+ with pytest.raises(KeyError):
+ WorkflowEngineType.get_instance_by_value(None) # there is no default
+ with pytest.raises(KeyError):
+ WorkflowEngineType.get_instance_by_value("") # there is no default
+ with pytest.raises(KeyError):
+ WorkflowEngineType.get_instance_by_value(" argo ") # whitespaces are not trimmed
+ with pytest.raises(KeyError):
+ WorkflowEngineType.get_instance_by_value("bitcoin")
+ with pytest.raises(KeyError):
+ WorkflowEngineType.get_instance_by_value("ether")
+
+
+def test_fail_get_metadata_configuration_invalid_namespace(processor: KfpPipelineProcessor):
with pytest.raises(RuntimeError):
processor._get_metadata_configuration(schemaspace="non_existent_namespace", name="non_existent_metadata")
-def test_generate_dependency_archive(processor):
- pipelines_test_file = os.path.join(ARCHIVE_DIR, "test.ipynb")
+def test_generate_dependency_archive(processor: KfpPipelineProcessor):
+ pipelines_test_file = str((Path(__file__).parent / ".." / "resources" / "archive" / "test.ipynb").resolve())
pipeline_dependencies = ["airflow.json"]
correct_filelist = ["test.ipynb", "airflow.json"]
component_parameters = {
@@ -107,7 +209,7 @@ def test_generate_dependency_archive(processor):
assert sorted(correct_filelist) == sorted(tar_content)
-def test_fail_generate_dependency_archive(processor):
+def test_fail_generate_dependency_archive(processor: KfpPipelineProcessor):
pipelines_test_file = "this/is/a/rel/path/test.ipynb"
pipeline_dependencies = ["non_existent_file.json"]
component_parameters = {
@@ -127,7 +229,7 @@ def test_fail_generate_dependency_archive(processor):
processor._generate_dependency_archive(test_operation)
-def test_get_dependency_source_dir(processor):
+def test_get_dependency_source_dir(processor: KfpPipelineProcessor):
pipelines_test_file = "this/is/a/rel/path/test.ipynb"
processor.root_dir = "/this/is/an/abs/path/"
correct_filepath = "/this/is/an/abs/path/this/is/a/rel/path"
@@ -145,7 +247,7 @@ def test_get_dependency_source_dir(processor):
assert filepath == correct_filepath
-def test_get_dependency_archive_name(processor):
+def test_get_dependency_archive_name(processor: KfpPipelineProcessor):
pipelines_test_file = "this/is/a/rel/path/test.ipynb"
correct_filename = "test-this-is-a-test-id.tar.gz"
component_parameters = {"filename": pipelines_test_file, "runtime_image": "tensorflow/tensorflow:latest"}
@@ -162,7 +264,7 @@ def test_get_dependency_archive_name(processor):
assert filename == correct_filename
-def test_collect_envs(processor):
+def test_collect_envs(processor: KfpPipelineProcessor):
pipelines_test_file = "this/is/a/rel/path/test.ipynb"
# add system-owned envs with bogus values to ensure they get set to system-derived values,
@@ -212,7 +314,7 @@ def test_collect_envs(processor):
assert "USER_NO_VALUE" not in envs
-def test_process_list_value_function(processor):
+def test_process_list_value_function(processor: KfpPipelineProcessor):
# Test values that will be successfully converted to list
assert processor._process_list_value("") == []
assert processor._process_list_value(None) == []
@@ -233,7 +335,7 @@ def test_process_list_value_function(processor):
assert processor._process_list_value("'elem1', 'elem2'") == "'elem1', 'elem2'"
-def test_process_dictionary_value_function(processor):
+def test_process_dictionary_value_function(processor: KfpPipelineProcessor):
# Test values that will be successfully converted to dictionary
assert processor._process_dictionary_value("") == {}
assert processor._process_dictionary_value(None) == {}
@@ -283,121 +385,254 @@ def test_process_dictionary_value_function(processor):
assert processor._process_dictionary_value(dict_as_str) == dict_as_str
-def test_processing_url_runtime_specific_component(monkeypatch, processor, component_cache, sample_metadata, tmpdir):
- # Define the appropriate reader for a URL-type component definition
- kfp_supported_file_types = [".yaml"]
- reader = UrlComponentCatalogConnector(kfp_supported_file_types)
-
- # Assign test resource location
- url = (
- "https://raw.githubusercontent.com/elyra-ai/elyra/main/"
- "elyra/tests/pipeline/resources/components/filter_text.yaml"
- )
-
- # Read contents of given path -- read_component_definition() returns a
- # a dictionary of component definition content indexed by path
- entry_data = reader.get_entry_data({"url": url}, {})
- component_definition = entry_data.definition
-
- properties = [
- ComponentParameter(
- id="text",
- name="Text",
- json_data_type="string",
- value="default",
- description="Text to filter",
- allowed_input_types=["file", "inputpath", "inputvalue"],
- ),
- ComponentParameter(
- id="pattern",
- name="Pattern",
- json_data_type="string",
- value=".*",
- description="Pattern to filter on",
- allowed_input_types=["file", "inputpath", "inputvalue"],
- ),
- ]
-
- # Instantiate a url-based component
- component_id = "test_component"
- component = Component(
- id=component_id,
- name="Filter text",
- description="",
- op="filter-text",
- catalog_type="url-catalog",
- component_reference={"url": url},
- definition=component_definition,
- categories=[],
- properties=properties,
- )
-
- # Fabricate the component cache to include single filename-based component for testing
- component_cache._component_cache[processor._type.name] = {
- "spoofed_catalog": {"components": {component_id: component}}
- }
-
- # Construct hypothetical operation for component
- operation_name = "Filter text test"
- operation_params = {
- "text": {"widget": "string", "value": "path/to/text.txt"},
- "pattern": {"widget": "string", "value": "hello"},
- }
- operation = Operation(
- id="filter-text-id",
- type="execution_node",
- classifier=component_id,
- name=operation_name,
- parent_operation_ids=[],
- component_params=operation_params,
- )
-
- # Build a mock runtime config for use in _cc_pipeline
- mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata)
-
- mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata])
- monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)
+def test_compose_container_command_args(processor: KfpPipelineProcessor):
+ """
+ Verify that _compose_container_command_args yields the expected output for valid input
+ """
- # Construct single-operation pipeline
- pipeline = Pipeline(
- id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="filter_text.pipeline"
+ pipeline_name = "test pipeline"
+ cos_endpoint = "https://minio:9000"
+ cos_bucket = "test_bucket"
+ cos_directory = "a_dir"
+ cos_dependencies_archive = "dummy-notebook-0815.tar.gz"
+ filename = "dummy-notebook.ipynb"
+
+ command_args = processor._compose_container_command_args(
+ pipeline_name=pipeline_name,
+ cos_endpoint=cos_endpoint,
+ cos_bucket=cos_bucket,
+ cos_directory=cos_directory,
+ cos_dependencies_archive=cos_dependencies_archive,
+ filename=filename,
)
- pipeline.operations[operation.id] = operation
+ assert f"--pipeline-name '{pipeline_name}'" in command_args
+ assert f"--cos-endpoint '{cos_endpoint}'" in command_args
+ assert f"--cos-bucket '{cos_bucket}'" in command_args
+ assert f"--cos-directory '{cos_directory}'" in command_args
+ assert f"--cos-dependencies-archive '{cos_dependencies_archive}'" in command_args
+ assert f"--file '{filename}'" in command_args
+
+ assert "--inputs" not in command_args
+ assert "--outputs" not in command_args
+
+ # verify correct handling of file dependencies and file outputs
+ for file_dependency in [[], ["input_file.txt"], ["input_file.txt", "input_file_2.txt"]]:
+ for file_output in [[], ["output.csv"], ["output_1.csv", "output_2.pdf"]]:
+ command_args = processor._compose_container_command_args(
+ pipeline_name=pipeline_name,
+ cos_endpoint=cos_endpoint,
+ cos_bucket=cos_bucket,
+ cos_directory=cos_directory,
+ cos_dependencies_archive=cos_dependencies_archive,
+ filename=filename,
+ cos_inputs=file_dependency,
+ cos_outputs=file_output,
+ )
+
+ if len(file_dependency) < 1:
+ assert "--inputs" not in command_args
+ else:
+ assert f"--inputs '{';'.join(file_dependency)}'" in command_args
+
+ if len(file_output) < 1:
+ assert "--outputs" not in command_args
+ else:
+ assert f"--outputs '{';'.join(file_output)}'" in command_args
+
+
+def test_compose_container_command_args_invalid_dependency_filename(processor: KfpPipelineProcessor):
+ """
+ Verify that _compose_container_command_args fails if one or more of the
+ specified input file dependencies contains the reserved separator character
+ """
- # Establish path and function to construct pipeline
- pipeline_path = os.path.join(tmpdir, "kfp_test.yaml")
- constructed_pipeline_function = lambda: processor._cc_pipeline(pipeline=pipeline, pipeline_name="test_pipeline")
+ pipeline_name = "test pipeline"
+ cos_endpoint = "https://minio:9000"
+ cos_bucket = "test_bucket"
+ cos_directory = "a_dir"
+ cos_dependencies_archive = "dummy-notebook-0815.tar.gz"
+ filename = "dummy-notebook.ipynb"
+
+ reserved_separator_char = ";"
+
+ for file_dependency in [
+ [f"input_file{reserved_separator_char}txt"],
+ ["input_file.txt", f"input{reserved_separator_char}_file_2.txt"],
+ ]:
+ # identify invalid file dependency name
+ invalid_file_name = [file for file in file_dependency if reserved_separator_char in file][0]
+ for file_output in [[], ["output.csv"], ["output_1.csv", "output_2.pdf"]]:
+ with pytest.raises(
+ ValueError,
+ match=re.escape(
+ f"Illegal character ({reserved_separator_char}) found in filename '{invalid_file_name}'."
+ ),
+ ):
+ command_args = processor._compose_container_command_args(
+ pipeline_name=pipeline_name,
+ cos_endpoint=cos_endpoint,
+ cos_bucket=cos_bucket,
+ cos_directory=cos_directory,
+ cos_dependencies_archive=cos_dependencies_archive,
+ filename=filename,
+ cos_inputs=file_dependency,
+ cos_outputs=file_output,
+ )
+ assert command_args is None
+
+
+def test_add_disable_node_caching(processor: KfpPipelineProcessor):
+ """
+ Verify that add_disable_node_caching updates the execution object as expected
+ """
+ execution_object = {}
+ for instance in [
+ DisableNodeCaching("True"),
+ DisableNodeCaching("False"),
+ ]:
+ processor.add_disable_node_caching(instance=instance, execution_object=execution_object)
+ assert execution_object.get("disable_node_caching") is instance.selection
+ assert len(execution_object.keys()) == 1
- # TODO Check against both argo and tekton compilations
- # Compile pipeline and save into pipeline_path
- kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path)
- # Read contents of pipeline YAML
- with open(pipeline_path) as f:
- pipeline_yaml = yaml.safe_load(f.read())
+def test_add_custom_shared_memory_size(processor):
+ """
+ Verify that add_custom_shared_memory_size updates the execution object as expected
+ """
+ execution_object = {}
+ for instance in [CustomSharedMemorySize(None, None), CustomSharedMemorySize("", None)]:
+ processor.add_custom_shared_memory_size(instance=instance, execution_object=execution_object)
+ assert execution_object.get("kubernetes_shared_mem_size") is None
+
+ for instance in [
+ CustomSharedMemorySize("0.5", None),
+ CustomSharedMemorySize("3.14", "G"),
+ CustomSharedMemorySize("256", "M"),
+ ]:
+ processor.add_custom_shared_memory_size(instance=instance, execution_object=execution_object)
+ assert execution_object["kubernetes_shared_mem_size"]["size"] == instance.size
+ assert execution_object["kubernetes_shared_mem_size"]["units"] == instance.units
+
+
+def test_add_kubernetes_secret(processor: KfpPipelineProcessor):
+ """
+ Verify that add_kubernetes_secret updates the execution object as expected
+ """
+ execution_object = {}
+ for instance in [
+ KubernetesSecret("var", "secret_name", "secret_key"),
+ KubernetesSecret("var2", "secret_name", "secret_key"),
+ KubernetesSecret("var", "secret_name_2", "secret_key_2"),
+ ]:
+ processor.add_kubernetes_secret(instance=instance, execution_object=execution_object)
+ assert execution_object["kubernetes_secrets"][instance.env_var]["name"] == instance.name
+ assert execution_object["kubernetes_secrets"][instance.env_var]["key"] == instance.key
- # Check the pipeline file contents for correctness
- pipeline_template = pipeline_yaml["spec"]["templates"][0]
- assert pipeline_template["metadata"]["annotations"]["pipelines.kubeflow.org/task_display_name"] == operation_name
- assert pipeline_template["inputs"]["artifacts"][0]["raw"]["data"] == operation_params["text"]
- assert pipeline_template["container"]["command"][4] == operation_params["pattern"]
+ # given above instances, there should be two entries in the modified execution_object
+ assert len(execution_object["kubernetes_secrets"].keys()) == 2
-def test_processing_filename_runtime_specific_component(
- monkeypatch, processor, component_cache, sample_metadata, tmpdir
+def test_add_mounted_volume(processor: KfpPipelineProcessor):
+ """
+ Verify that add_mounted_volume updates the execution object as expected
+ """
+ execution_object = {}
+ for instance in [
+ VolumeMount("/mount/path", "test-pvc", None, None),
+ VolumeMount("/mount/path2", "test-pvc-2", None, True),
+ VolumeMount("/mount/path3", "test-pvc-3", None, False),
+ VolumeMount("/mount/path4", "test-pvc-4", "sub/path", True),
+ VolumeMount("/mount/path", "test-pvc", None, True),
+ ]:
+ processor.add_mounted_volume(instance=instance, execution_object=execution_object)
+ assert execution_object["kubernetes_volumes"][instance.path]["pvc_name"] == instance.pvc_name
+ assert execution_object["kubernetes_volumes"][instance.path]["sub_path"] == instance.sub_path
+ assert execution_object["kubernetes_volumes"][instance.path]["read_only"] == instance.read_only
+
+ # given above instances, there should be four entries in the modified execution_object
+ assert len(execution_object["kubernetes_volumes"].keys()) == 4
+
+
+def test_add_kubernetes_pod_annotation(processor: KfpPipelineProcessor):
+ """
+ Verify that add_kubernetes_pod_annotation updates the execution object as expected
+ """
+ execution_object = {}
+ for instance in [
+ KubernetesAnnotation("annotation-key", None),
+ KubernetesAnnotation("prefix/annotation-key-2", ""),
+ KubernetesAnnotation("annotation-key-3", "annotation value"),
+ KubernetesAnnotation("annotation-key-3", "another annotation value"),
+ ]:
+ processor.add_kubernetes_pod_annotation(instance=instance, execution_object=execution_object)
+ if instance.value is not None:
+ assert execution_object["pod_annotations"][instance.key] == instance.value
+ else:
+ assert execution_object["pod_annotations"][instance.key] == ""
+
+ # given above instances, there should be three entries in the modified execution_object
+ assert len(execution_object["pod_annotations"].keys()) == 3
+
+
+def test_add_kubernetes_pod_label(processor: KfpPipelineProcessor):
+ """
+ Verify that add_kubernetes_pod_label updates the execution object as expected
+ """
+ execution_object = {}
+ for instance in [
+ KubernetesLabel("label-key", None),
+ KubernetesLabel("label-key-2", ""),
+ KubernetesLabel("label-key-3", "label-value"),
+ KubernetesLabel("label-key-2", "a-different-label-value"),
+ ]:
+ processor.add_kubernetes_pod_label(instance=instance, execution_object=execution_object)
+ if instance.value is not None:
+ assert execution_object["pod_labels"][instance.key] == instance.value
+ else:
+ assert execution_object["pod_labels"][instance.key] == ""
+
+ # given above instances, there should be three entries in the modified execution_object
+ assert len(execution_object["pod_labels"].keys()) == 3
+
+
+def test_add_kubernetes_toleration(processor: KfpPipelineProcessor):
+ """
+ Verify that add_kubernetes_toleration updates the execution object as expected
+ """
+ execution_object = {}
+ expected_unique_execution_object_entries = []
+ for instance in [
+ KubernetesToleration("toleration-key", "Exists", None, "NoExecute"),
+ KubernetesToleration("toleration-key", "Equals", 42, ""),
+ ]:
+ processor.add_kubernetes_toleration(instance=instance, execution_object=execution_object)
+ toleration_hash = hashlib.sha256(
+ f"{instance.key}::{instance.operator}::{instance.value}::{instance.effect}".encode()
+ ).hexdigest()
+ if toleration_hash not in expected_unique_execution_object_entries:
+ expected_unique_execution_object_entries.append(toleration_hash)
+ assert execution_object["kubernetes_tolerations"][toleration_hash]["key"] == instance.key
+ assert execution_object["kubernetes_tolerations"][toleration_hash]["value"] == instance.value
+ assert execution_object["kubernetes_tolerations"][toleration_hash]["operator"] == instance.operator
+ assert execution_object["kubernetes_tolerations"][toleration_hash]["effect"] == instance.effect
+ assert len(expected_unique_execution_object_entries) == len(execution_object["kubernetes_tolerations"].keys())
+
+
+def test_generate_pipeline_dsl_compile_pipeline_dsl_custom_component_pipeline(
+ processor: KfpPipelineProcessor, component_cache, tmpdir
):
- # Define the appropriate reader for a filesystem-type component definition
- kfp_supported_file_types = [".yaml"]
- reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)
+ """
+ Verify that _generate_pipeline_dsl and _compile_pipeline_dsl yield
+ the expected output for pipeline the includes a custom component
+ """
- # Assign test resource location
- absolute_path = os.path.abspath(
- os.path.join(os.path.dirname(__file__), "..", "resources", "components", "download_data.yaml")
- )
+ # load test component definition
+ component_def_path = Path(__file__).parent / ".." / "resources" / "components" / "download_data.yaml"
# Read contents of given path -- read_component_definition() returns a
# a dictionary of component definition content indexed by path
- entry_data = reader.get_entry_data({"path": absolute_path}, {})
+ reader = FilesystemComponentCatalogConnector([".yaml"])
+ entry_data = reader.get_entry_data({"path": str(component_def_path.absolute())}, {})
component_definition = entry_data.definition
properties = [
@@ -424,10 +659,10 @@ def test_processing_filename_runtime_specific_component(
component = Component(
id=component_id,
name="Download data",
- description="",
+ description="download data from web",
op="download-data",
catalog_type="elyra-kfp-examples-catalog",
- component_reference={"path": absolute_path},
+ component_reference={"path": component_def_path.as_posix()},
definition=component_definition,
properties=properties,
categories=[],
@@ -438,10 +673,14 @@ def test_processing_filename_runtime_specific_component(
"spoofed_catalog": {"components": {component_id: component}}
}
- # Construct hypothetical operation for component
+ # Construct operation for component
operation_name = "Download data test"
operation_params = {
- "url": {"widget": "file", "value": "resources/sample_pipelines/pipeline_valid.json"},
+ "url": {
+ "widget": "string",
+ "value": "https://raw.githubusercontent.com/elyra-ai/examples/"
+ "main/pipelines/run-pipelines-on-kubeflow-pipelines/data/data.csv",
+ },
"curl_options": {"widget": "string", "value": "--location"},
}
operation = Operation(
@@ -453,176 +692,973 @@ def test_processing_filename_runtime_specific_component(
component_params=operation_params,
)
- # Build a mock runtime config for use in _cc_pipeline
- mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata)
-
- mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata])
- monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)
-
# Construct single-operation pipeline
pipeline = Pipeline(
- id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="download_data.pipeline"
+ id="pipeline-id",
+ name="code-gen-test-custom-components",
+ description="Test code generation for custom components",
+ runtime="kfp",
+ runtime_config="test",
+ source="download_data.pipeline",
)
pipeline.operations[operation.id] = operation
- # Establish path and function to construct pipeline
- pipeline_path = os.path.join(tmpdir, "kfp_test.yaml")
- constructed_pipeline_function = lambda: processor._cc_pipeline(pipeline=pipeline, pipeline_name="test_pipeline")
+ # generate Python DSL for the Argo workflow engine
+ generated_argo_dsl = processor._generate_pipeline_dsl(
+ pipeline=pipeline, pipeline_name=pipeline.name, workflow_engine=WorkflowEngineType.ARGO
+ )
+
+ assert generated_argo_dsl is not None
+ # Generated DSL includes workflow engine specific code in the _main_ function
+ assert "kfp.compiler.Compiler().compile(" in generated_argo_dsl
+
+ compiled_argo_output_file = Path(tmpdir) / "compiled_kfp_test_argo.yaml"
+
+ # make sure the output file does not exist (3.8+ use unlink("missing_ok=True"))
+ if compiled_argo_output_file.is_file():
+ compiled_argo_output_file.unlink()
- # TODO Check against both argo and tekton compilations
- # Compile pipeline and save into pipeline_path
- kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path)
+ # if the compiler discovers an issue with the generated DSL this call fails
+ processor._compile_pipeline_dsl(
+ dsl=generated_argo_dsl,
+ workflow_engine=WorkflowEngineType.ARGO,
+ output_file=compiled_argo_output_file.as_posix(),
+ pipeline_conf=None,
+ )
+
+ # verify that the output file exists
+ assert compiled_argo_output_file.is_file()
- # Read contents of pipeline YAML
- with open(pipeline_path) as f:
- pipeline_yaml = yaml.safe_load(f.read())
+ # verify the file content
+ with open(compiled_argo_output_file) as fh:
+ argo_spec = yaml.safe_load(fh.read())
- # Check the pipeline file contents for correctness
- pipeline_template = pipeline_yaml["spec"]["templates"][0]
- assert pipeline_template["metadata"]["annotations"]["pipelines.kubeflow.org/task_display_name"] == operation_name
- assert pipeline_template["container"]["command"][3] == operation_params["url"]
- assert '"doc_type": "pipeline"' in pipeline_template["container"]["command"][3]
+ assert "argoproj.io/" in argo_spec["apiVersion"]
+ pipeline_spec_annotations = json.loads(argo_spec["metadata"]["annotations"]["pipelines.kubeflow.org/pipeline_spec"])
+ assert (
+ pipeline_spec_annotations["name"] == pipeline.name
+ ), f"DSL input: {generated_argo_dsl}\nArgo output: {argo_spec}"
+ assert pipeline_spec_annotations["description"] == pipeline.description, pipeline_spec_annotations
+
+ # generate Python DSL for the Tekton workflow engine
+ generated_tekton_dsl = processor._generate_pipeline_dsl(
+ pipeline=pipeline, pipeline_name=pipeline.name, workflow_engine=WorkflowEngineType.TEKTON
+ )
+ assert generated_tekton_dsl is not None
+ # Generated DSL includes workflow engine specific code in the _main_ function
+ assert "compiler.TektonCompiler().compile(" in generated_tekton_dsl
+
+ compiled_tekton_output_file = Path(tmpdir) / "compiled_kfp_test_tekton.yaml"
+
+ # if the compiler discovers an issue with the generated DSL this call fails
+ processor._compile_pipeline_dsl(
+ dsl=generated_tekton_dsl,
+ workflow_engine=WorkflowEngineType.TEKTON,
+ output_file=compiled_tekton_output_file.as_posix(),
+ pipeline_conf=None,
+ )
-def test_cc_pipeline_component_no_input(monkeypatch, processor, component_cache, sample_metadata, tmpdir):
+ # verify that the output file exists
+ assert compiled_tekton_output_file.is_file()
+
+ # verify the file content
+ with open(compiled_tekton_output_file) as fh:
+ tekton_spec = yaml.safe_load(fh.read())
+
+ assert "tekton.dev/" in tekton_spec["apiVersion"]
+
+
+def load_and_patch_pipeline(
+ pipeline_filename: Union[str, Path], with_cos_object_prefix: bool = False
+) -> Union[None, Pipeline]:
"""
- Verifies that cc_pipeline can handle KFP component definitions that don't
- include any inputs
+ This utility function loads pipeline_filename and injects additional metadata, similar
+ to what is done when a pipeline is submitted.
"""
- # Define the appropriate reader for a filesystem-type component definition
- kfp_supported_file_types = [".yaml"]
- reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)
- # Assign test resource location
- cpath = (Path(__file__).parent / ".." / "resources" / "components" / "kfp_test_operator_no_inputs.yaml").resolve()
- assert cpath.is_file()
- cpath = str(cpath)
+ assert pipeline_filename is not None, "A pipeline filename is required."
+
+ if not isinstance(pipeline_filename, Path):
+ pipeline_filename = Path(pipeline_filename)
+
+ assert pipeline_filename.is_file(), f"Pipeline '{pipeline_filename}' does not exist."
+
+ # load file content
+ with open(pipeline_filename, "r") as fh:
+ pipeline_json = json.loads(fh.read())
+
+ # This rudimentary implementation assumes that the provided file is a valid
+ # pipeline file, which contains a primary pipeline.
+ if len(pipeline_json["pipelines"]) > 0:
+ # Add runtime information
+ if pipeline_json["pipelines"][0]["app_data"].get("runtime", None) is None:
+ pipeline_json["pipelines"][0]["app_data"]["runtime"] = "Kubeflow Pipelines"
+ if pipeline_json["pipelines"][0]["app_data"].get("runtime_type", None) is None:
+ pipeline_json["pipelines"][0]["app_data"]["runtime_type"] = "KUBEFLOW_PIPELINES"
+ # Add the filename as pipeline source information
+ if pipeline_json["pipelines"][0]["app_data"].get("source", None) is None:
+ pipeline_json["pipelines"][0]["app_data"]["source"] = pipeline_filename.name
+
+ if with_cos_object_prefix:
+ # Define a dummy COS prefix, if none is defined
+ if pipeline_json["pipelines"][0]["app_data"]["properties"].get("pipeline_defaults") is None:
+ pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"] = {}
+ if (
+ pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"].get(COS_OBJECT_PREFIX)
+ is None
+ ):
+ pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"][
+ COS_OBJECT_PREFIX
+ ] = "test/project"
+ else:
+ # Remove the prefix, if one is already defined
+ if pipeline_json["pipelines"][0]["app_data"]["properties"].get("pipeline_defaults") is not None:
+ pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"].pop(
+ COS_OBJECT_PREFIX, None
+ )
+
+ return PipelineParser().parse(pipeline_json=pipeline_json)
+
+
+def generate_mocked_runtime_image_configurations(
+ pipeline: Pipeline, require_pull_secret: bool = False
+) -> List[Metadata]:
+ """
+ Generates mocked runtime configuration entries for each unique
+ runtime image that is referenced by the pipeline's generic nodes.
+ """
+ if pipeline is None:
+ raise ValueError("Pipeline parameter is required")
+ mocked_runtime_image_configurations = []
+ unique_image_names = []
+ # Iterate through pipeline nodes, extract the container image references
+ # for all generic operations, and produce mocked runtime image configurations.
+ counter = 1
+ for operation in pipeline.operations.values():
+ if isinstance(operation, GenericOperation):
+ if operation.runtime_image not in unique_image_names:
+ name = f"mocked-image-{counter}"
+ m = {
+ "image_name": operation.runtime_image,
+ "pull_policy": "IfNotPresent",
+ }
+ if require_pull_secret:
+ m["pull_secret"] = f"{name.lower().replace(' ', '-')}-secret"
+
+ mocked_runtime_image_configurations.append(
+ Metadata(
+ name=name,
+ display_name="test-image",
+ schema_name="runtime-image",
+ metadata=m,
+ )
+ )
+ unique_image_names.append(operation.runtime_image)
+
+ return mocked_runtime_image_configurations
+
+
+@pytest.mark.parametrize(
+ "kfp_runtime_config",
+ [
+ kfp_runtime_config(workflow_engine=WorkflowEngineType.ARGO),
+ kfp_runtime_config(workflow_engine=WorkflowEngineType.TEKTON),
+ ],
+)
+def test_generate_pipeline_dsl_compile_pipeline_dsl_workflow_engine_test(
+ monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir
+):
+ """
+ This test validates the following:
+ - _generate_pipeline_dsl generates Python code for the supported workflow engines
+ - _compile_pipeline_dsl compiles the generated code using the workflow engine's compiler
- # Read contents of given path -- read_component_definition() returns a
- # a dictionary of component definition content indexed by path
- entry_data = reader.get_entry_data({"path": cpath}, {})
- component_definition = entry_data.definition
+ This test does not validate that the output artifacts correctly reflect the test pipeline.
+ Other tests do that.
+ """
+ workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"])
- # Instantiate a file-based component
- component_id = "test-component"
- component = Component(
- id=component_id,
- name="No input data",
- description="",
- op="no-input-data",
- catalog_type="elyra-kfp-examples-catalog",
- component_reference={"path": cpath},
- definition=component_definition,
- properties=[],
- categories=[],
+ # Any valid pipeline file can be used to run this test, as long as it includes at least one node.
+ test_pipeline_file = (
+ Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline"
)
+ # Instantiate a pipeline object to make it easier to obtain the information
+ # needed to perform validation.
+ pipeline = load_and_patch_pipeline(test_pipeline_file, False)
+ assert pipeline is not None
- # Fabricate the component cache to include single filename-based component for testing
- component_cache._component_cache[processor._type.name] = {
- "spoofed_catalog": {"components": {component_id: component}}
- }
+ mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(pipeline)
- # Construct hypothetical operation for component
- operation_name = "no-input-test"
- operation_params = {}
- operation = Operation(
- id="no-input-id",
- type="execution_node",
- classifier=component_id,
- name=operation_name,
- parent_operation_ids=[],
- component_params=operation_params,
+ mock_side_effects = [kfp_runtime_config] + [mocked_runtime_image_configurations]
+ mocked_func = mock.Mock(return_value="default", side_effect=mock_side_effects)
+ monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)
+ monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True)
+ monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True)
+
+ compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml")
+ compiled_output_file_name = str(compiled_output_file.absolute())
+
+ # generate Python DSL for the specified workflow engine
+ pipeline_version = f"{pipeline.name}-test-0"
+ pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}"
+ experiment_name = f"{pipeline.name}-test-0"
+ generated_dsl = processor._generate_pipeline_dsl(
+ pipeline=pipeline,
+ pipeline_name=pipeline.name,
+ workflow_engine=workflow_engine,
+ pipeline_version=pipeline_version,
+ pipeline_instance_id=pipeline_instance_id,
+ experiment_name=experiment_name,
+ )
+
+ # Check the workflow engine specific code in the generated DSL
+ if workflow_engine == WorkflowEngineType.TEKTON:
+ assert "from kfp_tekton import compiler" in generated_dsl, f"engine: {workflow_engine}\ndsl: {generated_dsl}"
+ assert "compiler.TektonCompiler().compile(" in generated_dsl
+ assert "kfp.compiler.Compiler().compile(" not in generated_dsl
+ else:
+ assert "from kfp_tekton import compiler" not in generated_dsl
+ assert "compiler.TektonCompiler().compile(" not in generated_dsl
+ assert "kfp.compiler.Compiler().compile(" in generated_dsl
+
+ # Compile the generated Python DSL
+ processor._compile_pipeline_dsl(
+ dsl=generated_dsl,
+ workflow_engine=workflow_engine,
+ output_file=compiled_output_file_name,
+ pipeline_conf=None,
+ )
+
+ # Load compiled workflow
+ with open(compiled_output_file_name) as f:
+ workflow_spec = yaml.safe_load(f.read())
+
+ # Verify that the output is for the specified workflow engine
+ if workflow_engine == WorkflowEngineType.TEKTON:
+ assert "tekton.dev/" in workflow_spec["apiVersion"]
+ else:
+ assert "argoproj.io/" in workflow_spec["apiVersion"]
+
+
+@pytest.mark.parametrize("use_cos_object_prefix", [True, False])
+@pytest.mark.parametrize(
+ "kfp_runtime_config",
+ [
+ kfp_runtime_config(workflow_engine=WorkflowEngineType.ARGO, use_cos_credentials_secret=True),
+ kfp_runtime_config(workflow_engine=WorkflowEngineType.ARGO, use_cos_credentials_secret=False),
+ ],
+)
+def test_generate_pipeline_dsl_compile_pipeline_dsl_one_generic_node_pipeline_test_1(
+ monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, use_cos_object_prefix: bool, tmpdir
+):
+ """
+ This test validates that the output of _generate_pipeline_dsl and _compile_pipeline_dsl
+ yields the expected results for a generic node that has only the required inputs defined.
+
+ This test covers:
+ - the Argo workflow engine
+ - runtime configurations that use cloud storage authentication types KUBERNETES_SECRET
+ and USER_CREDENTIALS (the generated code varies depending on the selected type)
+
+ Other tests cover the scenarios where the user defined optional properties,
+ such as environment variables, Kubernetes labels, or data volumes.
+ """
+
+ workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"])
+
+ # The test pipeline should only include one generic node that has only the following
+ # required properties defined:
+ # - runtime image
+ test_pipeline_file = (
+ Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline"
)
+ # Instantiate a pipeline object to make it easier to obtain the information
+ # needed to perform validation.
+ pipeline = load_and_patch_pipeline(test_pipeline_file, use_cos_object_prefix)
+ assert pipeline is not None
+
+ # Make sure this is a one generic node pipeline
+ assert len(pipeline.operations.keys()) == 1
+ assert isinstance(list(pipeline.operations.values())[0], GenericOperation)
+ # Use 'op' variable to access the operation
+ op = list(pipeline.operations.values())[0]
- # Build a mock runtime config for use in _cc_pipeline
- mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata)
+ mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(pipeline)
- mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata])
+ mock_side_effects = [kfp_runtime_config] + [mocked_runtime_image_configurations]
+ mocked_func = mock.Mock(return_value="default", side_effect=mock_side_effects)
monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)
+ monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True)
+ monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True)
- # Construct single-operation pipeline
- pipeline = Pipeline(
- id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="no_input.pipeline"
+ compiled_argo_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml")
+ compiled_argo_output_file_name = str(compiled_argo_output_file.absolute())
+
+ # generate Python DSL for the Argo workflow engine
+ pipeline_version = f"{pipeline.name}-0815"
+ pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}"
+ experiment_name = f"{pipeline.name}-0815"
+ generated_dsl = processor._generate_pipeline_dsl(
+ pipeline=pipeline,
+ pipeline_name=pipeline.name,
+ workflow_engine=workflow_engine,
+ pipeline_version=pipeline_version,
+ pipeline_instance_id=pipeline_instance_id,
+ experiment_name=experiment_name,
)
- pipeline.operations[operation.id] = operation
- constructed_pipeline_function = lambda: processor._cc_pipeline(pipeline=pipeline, pipeline_name="test_pipeline")
- pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml")
+ # if the compiler discovers an issue with the generated DSL this call fails
+ processor._compile_pipeline_dsl(
+ dsl=generated_dsl,
+ workflow_engine=workflow_engine,
+ output_file=compiled_argo_output_file_name,
+ pipeline_conf=None,
+ )
- # Compile pipeline and save into pipeline_path
- kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path)
+ # Load generated Argo workflow
+ with open(compiled_argo_output_file_name) as f:
+ argo_spec = yaml.safe_load(f.read())
+
+ # verify that this is an argo specification
+ assert "argoproj.io" in argo_spec["apiVersion"]
+
+ pipeline_meta_annotations = json.loads(argo_spec["metadata"]["annotations"]["pipelines.kubeflow.org/pipeline_spec"])
+ assert pipeline_meta_annotations["name"] == pipeline.name
+ assert pipeline_meta_annotations["description"] == pipeline.description
+
+ # There should be two templates, one for the DAG and one for the generic node.
+ # Locate the one for the generic node and inspect its properties.
+ assert len(argo_spec["spec"]["templates"]) == 2
+ if argo_spec["spec"]["templates"][0]["name"] == argo_spec["spec"]["entrypoint"]:
+ node_template = argo_spec["spec"]["templates"][1]
+ else:
+ node_template = argo_spec["spec"]["templates"][0]
+
+ # Verify component definition information (see generic_component_definition_template.jinja2)
+ # - property 'name'
+ assert node_template["name"] == "run-a-file"
+ # - property 'implementation.container.command'
+ assert node_template["container"]["command"] == ["sh", "-c"]
+ # - property 'implementation.container.args'
+ # This is a CLOB, which we need to spot check.
+ assert isinstance(node_template["container"]["args"], list) and len(node_template["container"]["args"]) == 1
+ # Check for things that must be in this CLOB:
+ # - the pipeline name
+ assert f"--pipeline-name '{pipeline.name}'" in node_template["container"]["args"][0]
+ # - the object storage endpoint that this node uses for file I/O
+ assert f"--cos-endpoint '{kfp_runtime_config.metadata['cos_endpoint']}'" in node_template["container"]["args"][0]
+ # - the object storage bucket name that this node uses for file I/O
+ assert f"--cos-bucket '{kfp_runtime_config.metadata['cos_bucket']}'" in node_template["container"]["args"][0]
+ # - the directory within that object storage bucket
+ if pipeline.pipeline_properties.get(COS_OBJECT_PREFIX):
+ expected_directory_value = join_paths(pipeline.pipeline_properties.get(COS_OBJECT_PREFIX), pipeline_instance_id)
+ assert f"--cos-directory '{expected_directory_value}' " in node_template["container"]["args"][0]
+ else:
+ assert f"--cos-directory '{pipeline_instance_id}" in node_template["container"]["args"][0]
+ # - the name of the archive in that directory
+ expected_archive_name = processor._get_dependency_archive_name(op)
+ assert f"--cos-dependencies-archive '{expected_archive_name}' " in node_template["container"]["args"][0]
+ # - the name of the file that this node processes, which is included in that archive
+ assert f"--file '{op.filename}'" in node_template["container"]["args"][0]
+
+ # Check for things that should not be in this CLOB:
+ # - Since it's a one-node pipeline, the component cannot have any "--inputs",
+ # which are declared object storage output files from upstream components.
+ assert "--inputs" not in node_template["container"]["args"]
+ # - The component does not declare "--outputs",
+ # which are output files that need to be stored on object storage.
+ assert "--outputs" not in node_template["container"]["args"]
+
+ # - property 'implementation.container.image'
+ assert node_template["container"]["image"] == op.runtime_image
+ # - property 'implementation.container.imagePullPolicy'
+ # The image pull policy is defined in the the runtime image
+ # configuration. Look it up and verified it is properly applied.
+ for runtime_image_config in mocked_runtime_image_configurations:
+ if runtime_image_config.metadata["image_name"] == op.runtime_image:
+ if runtime_image_config.metadata.get("pull_policy"):
+ assert node_template["container"]["imagePullPolicy"] == runtime_image_config.metadata["pull_policy"]
+ else:
+ assert node_template["container"].get("imagePullPolicy") is None
+ break
+
+ # Verify Kubernetes labels and annotations that Elyra attaches to pods that
+ # execute generic nodes or custom nodes
+ if op.doc:
+ # only set if a comment is attached to the node
+ assert node_template["metadata"]["annotations"].get("elyra/node-user-doc") == op.doc
+
+ # Verify Kubernetes labels and annotations that Elyra attaches to pods that
+ # execute generic nodes
+ assert node_template["metadata"]["annotations"]["elyra/node-file-name"] == op.filename
+ if pipeline.source:
+ assert node_template["metadata"]["annotations"]["elyra/pipeline-source"] == pipeline.source
+ assert node_template["metadata"]["labels"]["elyra/node-name"] == sanitize_label_value(op.name)
+ assert node_template["metadata"]["labels"]["elyra/node-type"] == sanitize_label_value("notebook-script")
+ assert node_template["metadata"]["labels"]["elyra/pipeline-name"] == sanitize_label_value(pipeline.name)
+ assert node_template["metadata"]["labels"]["elyra/pipeline-version"] == sanitize_label_value(pipeline_version)
+ assert node_template["metadata"]["labels"]["elyra/experiment-name"] == sanitize_label_value(experiment_name)
+
+ # Verify environment variables that Elyra attaches to pods that
+ # execute generic nodes. All values are hard-coded in the template, with the
+ # exception of "AWS_ACCESS_KEY_ID" and "AWS_SECRET_ACCESS_KEY",
+ # which are derived from a Kubernetes secret, if the runtime configuration
+ # is configured to use one.
+ use_secret_for_cos_authentication = kfp_runtime_config.metadata["cos_auth_type"] == "KUBERNETES_SECRET"
+
+ assert node_template["container"].get("env") is not None, node_template["container"]
+ for env_var in node_template["container"]["env"]:
+ if env_var["name"] == "ELYRA_RUNTIME_ENV":
+ assert env_var["value"] == "kfp"
+ elif env_var["name"] == "ELYRA_ENABLE_PIPELINE_INFO":
+ assert env_var["value"] == "True"
+ elif env_var["name"] == "ELYRA_WRITABLE_CONTAINER_DIR":
+ assert env_var["value"] == KfpPipelineProcessor.WCD
+ elif env_var["name"] == "ELYRA_RUN_NAME":
+ assert env_var["value"] == RUN_ID_PLACEHOLDER
+ elif env_var["name"] == "AWS_ACCESS_KEY_ID":
+ if use_secret_for_cos_authentication:
+ assert env_var["valueFrom"]["secretKeyRef"]["key"] == "AWS_ACCESS_KEY_ID"
+ assert env_var["valueFrom"]["secretKeyRef"]["name"] == kfp_runtime_config.metadata["cos_secret"]
+ else:
+ assert env_var["value"] == kfp_runtime_config.metadata["cos_username"]
+ elif env_var["name"] == "AWS_SECRET_ACCESS_KEY":
+ if use_secret_for_cos_authentication:
+ assert env_var["valueFrom"]["secretKeyRef"]["key"] == "AWS_SECRET_ACCESS_KEY"
+ assert env_var["valueFrom"]["secretKeyRef"]["name"] == kfp_runtime_config.metadata["cos_secret"]
+ else:
+ assert env_var["value"] == kfp_runtime_config.metadata["cos_password"]
+
+ # Verify that the mlpipeline specific outputs are declared
+ assert node_template.get("outputs") is not None, node_template
+ assert node_template["outputs"]["artifacts"] is not None, node_template["container"]["outputs"]
+ assert node_template["outputs"]["artifacts"][0]["name"] == "mlpipeline-metrics"
+ assert (
+ node_template["outputs"]["artifacts"][0]["path"]
+ == (Path(KfpPipelineProcessor.WCD) / "mlpipeline-metrics.json").as_posix()
+ )
+ assert node_template["outputs"]["artifacts"][1]["name"] == "mlpipeline-ui-metadata"
+ assert (
+ node_template["outputs"]["artifacts"][1]["path"]
+ == (Path(KfpPipelineProcessor.WCD) / "mlpipeline-ui-metadata.json").as_posix()
+ )
-@pytest.mark.parametrize("parsed_pipeline", [PIPELINE_FILE_COMPLEX], indirect=True)
-def test_create_yaml_complex_pipeline(monkeypatch, processor, parsed_pipeline, sample_metadata, tmpdir):
- pipeline_json = _read_pipeline_resource(PIPELINE_FILE_COMPLEX)
+@pytest.fixture(autouse=False)
+def enable_and_disable_crio(request):
+ """
+ Set and unset the CRIO_RUNTIME environment variable, if requested
+ """
+ # Define variable prior to the test
+ if request.param:
+ os.environ["CRIO_RUNTIME"] = "True"
- # Ensure the value of COS_OBJECT_PREFIX has been propagated to the Pipeline object appropriately
- cos_prefix = pipeline_json["pipelines"][0]["app_data"]["properties"]["pipeline_defaults"].get(COS_OBJECT_PREFIX)
- assert cos_prefix == parsed_pipeline.pipeline_properties.get(COS_OBJECT_PREFIX)
+ yield
- # Build a mock runtime config for use in _cc_pipeline
- mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata)
- # Build mock runtime images for use in _cc_pipeline
- image_one_md = {"image_name": "tensorflow/tensorflow:2.0.0-py3", "pull_policy": "IfNotPresent", "tags": []}
- image_two_md = {"image_name": "elyra/examples:1.0.0-py3", "pull_policy": "Always", "tags": []}
- mocked_images = [
- Metadata(name="test-image-metadata", display_name="test-image", schema_name="kfp", metadata=image_one_md),
- Metadata(name="test-image-metadata2", display_name="test-image2", schema_name="kfp", metadata=image_two_md),
- ]
+ # Remove variable after the test
+ if request.param:
+ del os.environ["CRIO_RUNTIME"]
+
+
+@pytest.mark.parametrize("enable_and_disable_crio", [False, True], indirect=True)
+@pytest.mark.parametrize(
+ "kfp_runtime_config",
+ [
+ kfp_runtime_config(
+ workflow_engine=WorkflowEngineType.ARGO,
+ ),
+ ],
+)
+def test_generate_pipeline_dsl_compile_pipeline_dsl_generic_component_crio(
+ monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir, enable_and_disable_crio
+):
+ """
+ This test validates that the output of _generate_pipeline_dsl and _compile_pipeline_dsl
+ yields the expected results for a generic node when the CRIO_RUNTIME environment variable
+ is set to a valid string representation of the boolean value True (/true/i).
+ Test assumptions:
+ - Enabling CRIO_RUNTIME has the same effect for all supported workflow engines
+ - The test pipeline contains at least one generic node
+
+ With CRIO_RUNTIME enabled, the compiled output must include the following properties:
+ - in spec.templates[].volumes:
+ - emptyDir: {medium: '', sizeLimit: 20Gi}
+ name: workspace
+ """
+ crio_runtime_enabled = os.environ.get("CRIO_RUNTIME", "").lower() == "true"
+
+ workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"])
+
+ # Any valid pipeline file can be used to run this test, as long as it includes at least one generic node.
+ test_pipeline_file = (
+ Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline"
+ )
+ # Instantiate a pipeline object to make it easier to obtain the information
+ # needed to perform validation.
+ pipeline = load_and_patch_pipeline(pipeline_filename=test_pipeline_file, with_cos_object_prefix=False)
+ assert pipeline is not None
+
+ mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(
+ pipeline,
+ require_pull_secret=False,
+ )
+
+ assert kfp_runtime_config is not None
+ assert mocked_runtime_image_configurations is not None
+
+ monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True)
+ monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True)
+
+ # Test begins here
+
+ compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml")
+ compiled_output_file_name = str(compiled_output_file.absolute())
- # Mock necessary functions (incl. side effects for each node)
- mock_side_effects = [mocked_runtime] + [mocked_images for _ in range(len(pipeline_json["pipelines"][0]["nodes"]))]
+ # generate Python DSL for the specified workflow engine
+ pipeline_version = f"{pipeline.name}-test-0"
+ pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}"
+ experiment_name = f"{pipeline.name}-test-0"
+
+ # Generate pipeline DSL; this requires the _get_metadata_configuration mock
+ monkeypatch.setattr(
+ processor,
+ "_get_metadata_configuration",
+ mock.Mock(return_value="default", side_effect=[kfp_runtime_config] + [mocked_runtime_image_configurations]),
+ )
+ generated_dsl = processor._generate_pipeline_dsl(
+ pipeline=pipeline,
+ pipeline_name=pipeline.name,
+ workflow_engine=workflow_engine,
+ pipeline_version=pipeline_version,
+ pipeline_instance_id=pipeline_instance_id,
+ experiment_name=experiment_name,
+ )
+
+ # Compile the DSL
+ processor._compile_pipeline_dsl(
+ dsl=generated_dsl,
+ workflow_engine=workflow_engine,
+ output_file=compiled_output_file_name,
+ pipeline_conf=None,
+ )
+
+ # Load compiled workflow
+ with open(compiled_output_file_name) as f:
+ compiled_spec = yaml.safe_load(f.read())
+
+ # There should be multiple templates, one for the DAG and one for every generic node.
+ assert len(compiled_spec["spec"]["templates"]) >= 2
+ if crio_runtime_enabled:
+ for template in compiled_spec["spec"]["templates"]:
+ if template["name"] == compiled_spec["spec"]["entrypoint"]:
+ continue
+ # Check volume definition
+ assert template.get("volumes") is not None, template
+ entry_found = False
+ for volume_entry in template["volumes"]:
+ if volume_entry["name"] != CRIO_VOL_DEF_NAME:
+ continue
+ assert (
+ volume_entry.get("emptyDir") is not None
+ ), f"Unexpected volume entry '{CRIO_VOL_DEF_NAME}': {volume_entry} "
+ assert volume_entry["emptyDir"]["sizeLimit"] == CRIO_VOL_DEF_SIZE
+ assert volume_entry["emptyDir"]["medium"] == CRIO_VOL_DEF_MEDIUM
+ entry_found = True
+ assert entry_found, f"Missing volume entry '{CRIO_VOL_DEF_NAME}' for CRI-O in {template['volumes']}"
+ # Check volume mount definition
+ assert template["container"].get("volumeMounts") is not None, template["container"]
+ for volumemount_entry in template["container"]["volumeMounts"]:
+ entry_found = False
+ if volumemount_entry["name"] != CRIO_VOL_DEF_NAME:
+ continue
+ assert volumemount_entry["mountPath"] == CRIO_VOL_MOUNT_PATH
+ entry_found = True
+ break
+ assert (
+ entry_found
+ ), f"Missing volume mount entry '{CRIO_VOL_DEF_NAME}' for CRI-O in {template['container']['volumeMounts']}"
+ # Check PYTHONPATH environment variable (python_user_lib_path)
+ assert template["container"].get("env") is not None, template["container"]
+ for env_entry in template["container"]["env"]:
+ entry_found = False
+ if env_entry["name"] != "PYTHONPATH":
+ continue
+ assert env_entry["value"] == CRIO_VOL_PYTHON_PATH
+ entry_found = True
+ break
+ assert entry_found, f"Missing env variable entry 'PYTHONPATH' for CRI-O in {template['container']['env']}"
+ # Check the container command argument list
+ assert len(template["container"]["args"]) == 1
+ assert f"mkdir -p {CRIO_VOL_WORKDIR_PATH}" in template["container"]["args"][0]
+ assert f"--target={CRIO_VOL_PYTHON_PATH}" in template["container"]["args"][0]
+ assert f"--user-volume-path '{CRIO_VOL_PYTHON_PATH}' " in template["container"]["args"][0]
+ else:
+ for template in compiled_spec["spec"]["templates"]:
+ if template["name"] == compiled_spec["spec"]["entrypoint"]:
+ continue
+ # Check if a volume was defined
+ for volume_entry in template.get("volumes", []):
+ if volume_entry["name"] == CRIO_VOL_DEF_NAME:
+ # if a volume with the 'reserved' name exist there could be a problem
+ assert volume_entry.get("emptyDir") is None
+ # Check volume mount definition
+ for volumemount_entry in template["container"].get("volumeMounts", []):
+ if volumemount_entry["name"] == CRIO_VOL_DEF_NAME:
+ assert volumemount_entry["mountPath"] != CRIO_VOL_MOUNT_PATH
+ # Check PYTHONPATH environment variable
+ for env_entry in template["container"].get("env", []):
+ assert env_entry["name"] != "PYTHONPATH"
+ # Check the container command argument list
+ assert "mkdir -p ./jupyter-work-dir" in template["container"]["args"][0]
+ assert f"--target={CRIO_VOL_PYTHON_PATH}" not in template["container"]["args"][0]
+ assert "--user-volume-path" not in template["container"]["args"][0]
+
+
+@pytest.mark.parametrize(
+ "kfp_runtime_config",
+ [
+ kfp_runtime_config(
+ workflow_engine=WorkflowEngineType.ARGO,
+ ),
+ ],
+)
+def test_generate_pipeline_dsl_compile_pipeline_dsl_optional_elyra_properties(
+ monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir
+):
+ """
+ This test validates that the output of _generate_pipeline_dsl and _compile_pipeline_dsl
+ yields the expected results for a generic node that has optional user-provided properties
+ defined:
+ - data volumes
+ - shared memory size
+ - Kubernetes secrets
+ - Kubernetes labels
+ - Kubernetes annotations
+ - Kubernetes tolerations
+ """
+ workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"])
+
+ # The test pipeline should only include one generic node that has the following optional
+ # user-specified properties defined:
+ # - data volumes
+ test_pipeline_file = (
+ Path(__file__).parent
+ / ".."
+ / "resources"
+ / "test_pipelines"
+ / "kfp"
+ / "kfp-one-node-generic-elyra-properties.pipeline"
+ )
+ # Instantiate a pipeline object to make it easier to obtain the information
+ # needed to perform validation.
+ pipeline = load_and_patch_pipeline(test_pipeline_file)
+ assert pipeline is not None
+
+ # Make sure this is a one generic node pipeline
+ assert len(pipeline.operations.keys()) == 1
+ assert isinstance(list(pipeline.operations.values())[0], GenericOperation)
+ # Use 'op' variable to access the operation
+ op = list(pipeline.operations.values())[0]
+
+ mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(pipeline)
+
+ mock_side_effects = [kfp_runtime_config] + [mocked_runtime_image_configurations]
mocked_func = mock.Mock(return_value="default", side_effect=mock_side_effects)
monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)
monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True)
- monkeypatch.setattr(processor, "_get_dependency_archive_name", lambda x: True)
monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True)
- inst_id = "test-instance-id"
- pipeline_func = lambda: processor._cc_pipeline(parsed_pipeline, pipeline_name="test", pipeline_instance_id=inst_id)
- pipeline_path = str(Path(tmpdir) / "complex_test.yaml")
-
- # Compile pipeline, save into pipeline_path, then read YAML
- kfp_argo_compiler.Compiler().compile(pipeline_func, pipeline_path)
- with open(pipeline_path) as f:
- pipeline_yaml = yaml.safe_load(f.read())
-
- def list_to_sorted_str(convert_list):
- """Helper function to convert a list of files into a semicolon-separated sorted string"""
- convert_str = ""
- for item in convert_list:
- convert_str += f"{item};"
- return "".join(sorted(convert_str[:-1]))
-
- # Sort and clean node lists in preparation for direct comparison between YAML and JSON
- pipeline_nodes = sorted(pipeline_json["pipelines"][0]["nodes"], key=lambda d: d["app_data"]["label"])
- yaml_nodes = [template for template in pipeline_yaml["spec"]["templates"] if template["name"] != "lambda"]
-
- for node_yaml, node_json in zip(yaml_nodes, pipeline_nodes):
- # Check the each node for correctness
- if "container" not in node_yaml or "args" not in node_yaml["container"]:
- continue
-
- node_args = node_yaml["container"]["args"][0]
-
- # Check that COS values are the same for each node
- assert f'--cos-directory "{cos_prefix}/{inst_id}"' in node_args
- assert f"--cos-endpoint {sample_metadata['cos_endpoint']}" in node_args
- assert f"--cos-bucket {sample_metadata['cos_bucket']}" in node_args
-
- component_parameters = node_json["app_data"]["component_parameters"]
- assert f"--file \"{component_parameters.get('filename')}\"" in node_args # check filename
- assert node_yaml["container"]["image"] == component_parameters.get("runtime_image") # check runtime image
-
- if component_parameters.get("inputs"): # check inputs
- args_input = re.search(r' --inputs "([\w.;]+)" ', node_args)
- assert list_to_sorted_str(component_parameters["inputs"]) in "".join(sorted(args_input[1]))
- if component_parameters.get("outputs"): # check outputs
- args_output = re.search(r' --outputs "([\w.;]+)" ', node_args)
- assert list_to_sorted_str(component_parameters["outputs"]) in "".join(sorted(args_output[1]))
- if component_parameters.get("env_vars"): # check env_vars
- env_list_from_yaml = node_yaml["container"]["env"]
- for var_dict in component_parameters["env_vars"]:
- adjusted_var_dict = {"name": var_dict["env_var"], "value": var_dict["value"]}
- assert adjusted_var_dict in env_list_from_yaml
+ # Test begins here
+
+ compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml")
+ compiled_output_file_name = str(compiled_output_file.absolute())
+
+ # generate Python DSL
+ pipeline_version = f"{pipeline.name}-0815"
+ pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}"
+ experiment_name = f"{pipeline.name}-0815"
+ generated_dsl = processor._generate_pipeline_dsl(
+ pipeline=pipeline,
+ pipeline_name=pipeline.name,
+ workflow_engine=workflow_engine,
+ pipeline_version=pipeline_version,
+ pipeline_instance_id=pipeline_instance_id,
+ experiment_name=experiment_name,
+ )
+
+ # if the compiler discovers an issue with the generated DSL this call fails
+ processor._compile_pipeline_dsl(
+ dsl=generated_dsl,
+ workflow_engine=workflow_engine,
+ output_file=compiled_output_file_name,
+ pipeline_conf=None,
+ )
+
+ # Load compiled output
+ with open(compiled_output_file_name) as fh:
+ compiled_spec = yaml.safe_load(fh.read())
+
+ # There should be two templates, one for the DAG and one for the generic node.
+ # Locate the one for the generic node and inspect its properties.
+ assert len(compiled_spec["spec"]["templates"]) == 2
+ if compiled_spec["spec"]["templates"][0]["name"] == compiled_spec["spec"]["entrypoint"]:
+ node_template = compiled_spec["spec"]["templates"][1]
+ else:
+ node_template = compiled_spec["spec"]["templates"][0]
+
+ #
+ # validate data volumes, if applicable
+ expected_volume_mounts = op.elyra_params.get(MOUNTED_VOLUMES)
+ if len(expected_volume_mounts) > 0:
+ # There must be one or more 'volumeMounts' entry and one or more 'volumes' entry
+ assert node_template["container"].get("volumeMounts") is not None, node_template["container"]
+ assert node_template.get("volumes") is not None, compiled_spec["spec"]
+
+ assert len(node_template["container"]["volumeMounts"]) >= len(expected_volume_mounts)
+ for volume_mount in expected_volume_mounts:
+ for volumemount_entry in node_template["container"]["volumeMounts"]:
+ entry_found = False
+ if volumemount_entry["mountPath"] == volume_mount.path:
+ assert volumemount_entry["name"] == volume_mount.pvc_name
+ assert volumemount_entry.get("subPath", None) == volume_mount.sub_path
+ assert volumemount_entry.get("readOnly", None) == volume_mount.read_only
+ entry_found = True
+ break
+ assert (
+ entry_found
+ ), f"Cannot find volume mount entry '{volume_mount.path}' in {node_template['container']['volumeMounts']}"
+ for volume_entry in node_template["volumes"]:
+ entry_found = False
+ if volume_entry["name"] == volume_mount.pvc_name:
+ assert volume_entry["persistentVolumeClaim"]["claimName"] == volume_mount.pvc_name
+ entry_found = True
+ break
+ assert (
+ entry_found
+ ), f"Cannot find volume entry '{volume_mount.path}' in {node_template['container']['volumeMounts']}"
+
+ #
+ # validate custom shared memory size, if applicable
+ custom_shared_mem_size = op.elyra_params.get(KUBERNETES_SHARED_MEM_SIZE)
+ if custom_shared_mem_size:
+ # There must be one 'volumeMounts' entry and one 'volumes' entry
+ assert node_template["container"].get("volumeMounts") is not None, node_template["container"]
+ assert node_template.get("volumes") is not None, compiled_spec["spec"]
+ for volumemount_entry in node_template["container"]["volumeMounts"]:
+ entry_found = False
+ if volumemount_entry["mountPath"] == "/dev/shm":
+ assert volumemount_entry["name"] == "shm"
+ entry_found = True
+ break
+ assert (
+ entry_found
+ ), "Missing volume mount entry for shared memory size in {node_template['container']['volumeMounts']}"
+ for volume_entry in node_template["volumes"]:
+ entry_found = False
+ if volume_entry["name"] == "shm":
+ assert volume_entry["emptyDir"]["medium"] == "Memory"
+ assert (
+ volume_entry["emptyDir"]["sizeLimit"]
+ == f"{custom_shared_mem_size.size}{custom_shared_mem_size.units}"
+ )
+ entry_found = True
+ break
+ assert (
+ entry_found
+ ), f"Missing volume entry for shm size '{volume_mount.path}' in {node_template['container']['volumeMounts']}"
+
+ #
+ # validate Kubernetes secrets, if applicable
+ expected_kubernetes_secrets = op.elyra_params.get(KUBERNETES_SECRETS)
+ if len(expected_kubernetes_secrets) > 0:
+ # There must be one or more 'env' entries
+ assert node_template["container"].get("env") is not None, node_template["container"]
+ for secret in expected_kubernetes_secrets:
+ for env_entry in node_template["container"]["env"]:
+ entry_found = False
+ if env_entry["name"] == secret.env_var:
+ assert env_entry["valueFrom"]["secretKeyRef"]["key"] == secret.key
+ assert env_entry["valueFrom"]["secretKeyRef"]["name"] == secret.name
+ entry_found = True
+ break
+ assert entry_found, f"Missing entry for secret '{secret.env_var}' in {node_template['container']['env']}"
+
+ # Validate custom Kubernetes annotations
+ expected_kubernetes_annotations = op.elyra_params.get(KUBERNETES_POD_ANNOTATIONS)
+ if len(expected_kubernetes_annotations) > 0:
+ # There must be one or more 'metadata.annotations' entries
+ assert node_template["metadata"].get("annotations") is not None, node_template["metadata"]
+ for expected_annotation in expected_kubernetes_annotations:
+ assert expected_annotation.key in node_template["metadata"]["annotations"]
+ assert node_template["metadata"]["annotations"][expected_annotation.key] == (
+ expected_annotation.value or ""
+ )
+
+ #
+ # Validate custom Kubernetes labels
+ expected_kubernetes_labels = op.elyra_params.get(KUBERNETES_POD_LABELS)
+ if len(expected_kubernetes_labels) > 0:
+ # There must be one or more 'metadata.labels' entries
+ assert node_template["metadata"].get("labels") is not None, node_template["metadata"]
+ for expected_label in expected_kubernetes_labels:
+ assert expected_label.key in node_template["metadata"]["labels"]
+ assert node_template["metadata"]["labels"][expected_label.key] == (expected_label.value or "")
+
+ #
+ # Validate Kubernetes tolerations
+ #
+ # Validate custom Kubernetes tolerations
+ expected_kubernetes_tolerations = op.elyra_params.get(KUBERNETES_TOLERATIONS)
+ if len(expected_kubernetes_tolerations) > 0:
+ # There must be one or more 'tolerations' entries, e.g.
+ # {effect: NoExecute, key: kt1, operator: Equal, value: '3'}
+ assert node_template.get("tolerations") is not None, node_template
+ for expected_toleration in expected_kubernetes_tolerations:
+ entry_found = False
+ for toleration_entry in node_template["tolerations"]:
+ if (
+ toleration_entry.get("key") == expected_toleration.key
+ and toleration_entry.get("operator") == expected_toleration.operator
+ and toleration_entry.get("value") == expected_toleration.value
+ and toleration_entry.get("effect") == expected_toleration.effect
+ ):
+ entry_found = True
+ break
+ not_found_msg = (
+ "Missing toleration entry for '"
+ f"{expected_toleration.key}::{expected_toleration.operator}::"
+ f"{expected_toleration.value}::{expected_toleration.effect}'"
+ f"in {node_template['tolerations']}"
+ )
+ assert entry_found, not_found_msg
+
+
+@pytest.mark.parametrize(
+ "kfp_runtime_config",
+ [
+ kfp_runtime_config(
+ workflow_engine=WorkflowEngineType.ARGO,
+ ),
+ ],
+)
+@pytest.mark.skip("TODO: implement test")
+def test_generate_pipeline_dsl_compile_pipeline_dsl_generic_components_data_exchange(
+ monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, tmpdir
+):
+ """
+ TODO Validate that code gen produces the expected artifacts if the pipeline contains
+ multiple generic nodes that are configured for data exchange
+ """
+ assert False
+
+
+@pytest.mark.parametrize(
+ "require_pull_secret",
+ [
+ True,
+ False,
+ ],
+)
+@pytest.mark.parametrize(
+ "kfp_runtime_config",
+ [
+ kfp_runtime_config(
+ workflow_engine=WorkflowEngineType.ARGO,
+ ),
+ ],
+)
+def test_generate_pipeline_dsl_compile_pipeline_dsl_generic_components_pipeline_conf(
+ monkeypatch, processor: KfpPipelineProcessor, kfp_runtime_config: Metadata, require_pull_secret: bool, tmpdir
+):
+ """
+ Validate that code gen produces the expected artifacts if the pipeline contains
+ generic nodes and associates runtime images are configured to require a pull secret.
+ The test results are not runtime type specific.
+ """
+ workflow_engine = WorkflowEngineType.get_instance_by_value(kfp_runtime_config.metadata["engine"])
+
+ # Any valid pipeline file can be used to run this test, as long as it includes at least one node.
+ test_pipeline_file = (
+ Path(__file__).parent / ".." / "resources" / "test_pipelines" / "kfp" / "kfp-one-node-generic.pipeline"
+ )
+ # Instantiate a pipeline object to make it easier to obtain the information
+ # needed to perform validation.
+ pipeline = load_and_patch_pipeline(pipeline_filename=test_pipeline_file, with_cos_object_prefix=False)
+ assert pipeline is not None
+
+ mocked_runtime_image_configurations = generate_mocked_runtime_image_configurations(
+ pipeline,
+ require_pull_secret=require_pull_secret,
+ )
+
+ assert kfp_runtime_config is not None
+ assert mocked_runtime_image_configurations is not None
+
+ monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True)
+ monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True)
+
+ # Test begins here
+
+ compiled_output_file = Path(tmpdir) / test_pipeline_file.with_suffix(".yaml")
+ compiled_output_file_name = str(compiled_output_file.absolute())
+
+ # generate Python DSL for the specified workflow engine
+ pipeline_version = f"{pipeline.name}-test-0"
+ pipeline_instance_id = f"{pipeline.name}-{datetime.now().strftime('%m%d%H%M%S')}"
+ experiment_name = f"{pipeline.name}-test-0"
+
+ # Generate pipeline DSL; this requires the _get_metadata_configuration mock
+ monkeypatch.setattr(
+ processor,
+ "_get_metadata_configuration",
+ mock.Mock(return_value="default", side_effect=[kfp_runtime_config] + [mocked_runtime_image_configurations]),
+ )
+ generated_dsl = processor._generate_pipeline_dsl(
+ pipeline=pipeline,
+ pipeline_name=pipeline.name,
+ workflow_engine=workflow_engine,
+ pipeline_version=pipeline_version,
+ pipeline_instance_id=pipeline_instance_id,
+ experiment_name=experiment_name,
+ )
+
+ # Generate pipeline configuration; this requires the _get_metadata_configuration mock
+ monkeypatch.setattr(
+ processor,
+ "_get_metadata_configuration",
+ mock.Mock(return_value="default", side_effect=[mocked_runtime_image_configurations]),
+ )
+ pipeline_conf = processor._generate_pipeline_conf(pipeline=pipeline)
+
+ processor._compile_pipeline_dsl(
+ dsl=generated_dsl,
+ workflow_engine=workflow_engine,
+ output_file=compiled_output_file_name,
+ pipeline_conf=pipeline_conf,
+ )
+
+ # Load compiled workflow
+ with open(compiled_output_file_name) as f:
+ compiled_spec = yaml.safe_load(f.read())
+
+ expected_image_pull_secret_names = [
+ rti_config.metadata["pull_secret"]
+ for rti_config in mocked_runtime_image_configurations
+ if rti_config.metadata.get("pull_secret") is not None
+ ]
+
+ if len(expected_image_pull_secret_names) > 0:
+ # There must be one or more spec.imagePullSecrets entries
+ assert compiled_spec["spec"].get("imagePullSecrets") is not None, compiled_spec["spec"]
+ # Verify that each expected secret is referenced
+ for expected_secret_name in expected_image_pull_secret_names:
+ entry_found = False
+ for secret_entry in compiled_spec["spec"]["imagePullSecrets"]:
+ if secret_entry.get("name") == expected_secret_name:
+ entry_found = True
+ break
+ assert entry_found, (
+ f"Missing entry for image pull secret '{expected_secret_name}' "
+ f"in {compiled_spec['spec']['imagePullSecrets']}"
+ )
diff --git a/elyra/tests/pipeline/resources/test_pipelines/kfp/a-notebook.ipynb b/elyra/tests/pipeline/resources/test_pipelines/kfp/a-notebook.ipynb
new file mode 100644
index 000000000..519af069e
--- /dev/null
+++ b/elyra/tests/pipeline/resources/test_pipelines/kfp/a-notebook.ipynb
@@ -0,0 +1,33 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b039f7dd-b768-4bdb-9b47-f803c409aa77",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic-elyra-properties.pipeline b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic-elyra-properties.pipeline
new file mode 100644
index 000000000..d9b0db7ca
--- /dev/null
+++ b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic-elyra-properties.pipeline
@@ -0,0 +1,156 @@
+{
+ "doc_type": "pipeline",
+ "version": "3.0",
+ "json_schema": "http://api.dataplatform.ibm.com/schemas/common-pipeline/pipeline-flow/pipeline-flow-v3-schema.json",
+ "id": "elyra-auto-generated-pipeline",
+ "primary_pipeline": "primary",
+ "pipelines": [
+ {
+ "id": "primary",
+ "nodes": [
+ {
+ "id": "84d22396-568f-4c06-8558-95f715bba023",
+ "type": "execution_node",
+ "op": "execute-notebook-node",
+ "app_data": {
+ "component_parameters": {
+ "dependencies": [],
+ "include_subdirectories": false,
+ "outputs": [],
+ "env_vars": [],
+ "kubernetes_pod_annotations": [
+ {
+ "key": "anno-key-1",
+ "value": "anno-value-1"
+ },
+ {
+ "key": "anno-key-without-value-2"
+ }
+ ],
+ "kubernetes_pod_labels": [
+ {
+ "key": "label-key-without-value-1"
+ },
+ {
+ "key": "label-key-2",
+ "value": "label-value-2"
+ }
+ ],
+ "kubernetes_secrets": [
+ {
+ "env_var": "secret_env_var_1",
+ "name": "secret-1",
+ "key": "secret-key-1"
+ }
+ ],
+ "kubernetes_shared_mem_size": {
+ "size": 0.5
+ },
+ "kubernetes_tolerations": [
+ {
+ "key": "kt1",
+ "operator": "Equal",
+ "value": "3",
+ "effect": "NoExecute"
+ },
+ {
+ "key": "kt2",
+ "operator": "Exists",
+ "effect": "NoSchedule"
+ },
+ {
+ "operator": "Exists"
+ },
+ {
+ "key": "kt3",
+ "operator": "Equal",
+ "value": "v3"
+ }
+ ],
+ "mounted_volumes": [
+ {
+ "path": "/test/vol1",
+ "pvc_name": "test-pvc-1",
+ "read_only": false
+ },
+ {
+ "path": "/test/vol2",
+ "pvc_name": "test-pvc-2",
+ "sub_path": "sub/path",
+ "read_only": false
+ },
+ {
+ "path": "/test/vol3",
+ "pvc_name": "test-pvc-3",
+ "sub_path": "sub/path",
+ "read_only": true
+ }
+ ],
+ "filename": "a-notebook.ipynb",
+ "runtime_image": "tensorflow/tensorflow:2.8.0"
+ },
+ "label": "",
+ "ui_data": {
+ "label": "a-notebook.ipynb",
+ "image": "/static/elyra/notebook.svg",
+ "x_pos": 183,
+ "y_pos": 77,
+ "description": "Run notebook file"
+ }
+ },
+ "inputs": [
+ {
+ "id": "inPort",
+ "app_data": {
+ "ui_data": {
+ "cardinality": {
+ "min": 0,
+ "max": -1
+ },
+ "label": "Input Port"
+ }
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "id": "outPort",
+ "app_data": {
+ "ui_data": {
+ "cardinality": {
+ "min": 0,
+ "max": -1
+ },
+ "label": "Output Port"
+ }
+ }
+ }
+ ]
+ }
+ ],
+ "app_data": {
+ "ui_data": {
+ "comments": []
+ },
+ "version": 8,
+ "runtime_type": "KUBEFLOW_PIPELINES",
+ "properties": {
+ "pipeline_defaults": {
+ "kubernetes_pod_annotations": [],
+ "kubernetes_shared_mem_size": {},
+ "kubernetes_tolerations": [],
+ "kubernetes_pod_labels": [],
+ "mounted_volumes": [],
+ "env_vars": [],
+ "kubernetes_secrets": []
+ },
+ "name": "kfp-one-node-generic-elyra-properties",
+ "runtime": "Kubeflow Pipelines",
+ "description": "Test pipeline for generic nodes and Elyra properties"
+ }
+ },
+ "runtime_ref": ""
+ }
+ ],
+ "schemas": []
+}
\ No newline at end of file
diff --git a/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic.pipeline b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic.pipeline
new file mode 100644
index 000000000..bb60fe67d
--- /dev/null
+++ b/elyra/tests/pipeline/resources/test_pipelines/kfp/kfp-one-node-generic.pipeline
@@ -0,0 +1,109 @@
+{
+ "doc_type": "pipeline",
+ "version": "3.0",
+ "json_schema": "http://api.dataplatform.ibm.com/schemas/common-pipeline/pipeline-flow/pipeline-flow-v3-schema.json",
+ "id": "elyra-auto-generated-pipeline",
+ "primary_pipeline": "primary",
+ "pipelines": [
+ {
+ "id": "primary",
+ "nodes": [
+ {
+ "id": "d3cbeeec-0e4f-4032-8318-4500fb9aa352",
+ "type": "execution_node",
+ "op": "execute-notebook-node",
+ "app_data": {
+ "component_parameters": {
+ "dependencies": [],
+ "include_subdirectories": false,
+ "outputs": [],
+ "env_vars": [],
+ "kubernetes_pod_annotations": [],
+ "kubernetes_pod_labels": [],
+ "kubernetes_secrets": [],
+ "kubernetes_shared_mem_size": {},
+ "kubernetes_tolerations": [],
+ "mounted_volumes": [],
+ "filename": "a-notebook.ipynb"
+ },
+ "label": "",
+ "ui_data": {
+ "label": "a-notebook.ipynb",
+ "image": "/static/elyra/notebook.svg",
+ "x_pos": 186,
+ "y_pos": 109,
+ "description": "Run notebook file"
+ }
+ },
+ "inputs": [
+ {
+ "id": "inPort",
+ "app_data": {
+ "ui_data": {
+ "cardinality": {
+ "min": 0,
+ "max": -1
+ },
+ "label": "Input Port"
+ }
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "id": "outPort",
+ "app_data": {
+ "ui_data": {
+ "cardinality": {
+ "min": 0,
+ "max": -1
+ },
+ "label": "Output Port"
+ }
+ }
+ }
+ ]
+ }
+ ],
+ "app_data": {
+ "ui_data": {
+ "comments": [
+ {
+ "id": "6a01b028-38aa-4c6c-9b52-0ffe914b7b6d",
+ "x_pos": 30,
+ "y_pos": 34,
+ "width": 175,
+ "height": 42,
+ "content": "test comment",
+ "associated_id_refs": [
+ {
+ "node_ref": "d3cbeeec-0e4f-4032-8318-4500fb9aa352"
+ }
+ ]
+ }
+ ]
+ },
+ "version": 8,
+ "runtime_type": "KUBEFLOW_PIPELINES",
+ "properties": {
+ "pipeline_defaults": {
+ "kubernetes_shared_mem_size": {},
+ "kubernetes_tolerations": [],
+ "kubernetes_pod_labels": [],
+ "kubernetes_pod_annotations": [],
+ "mounted_volumes": [],
+ "kubernetes_secrets": [],
+ "env_vars": [],
+ "runtime_image": "tensorflow/tensorflow:2.8.0",
+ "cos_object_prefix": "my/project"
+ },
+ "name": "kfp-one-node-generic",
+ "runtime": "Kubeflow Pipelines",
+ "description": "A Kubeflow Pipelines pipeline that contains one generic node"
+ }
+ },
+ "runtime_ref": ""
+ }
+ ],
+ "schemas": []
+}
\ No newline at end of file
diff --git a/elyra/tests/util/test_kubernetes.py b/elyra/tests/util/test_kubernetes.py
index ee19204fd..2215bfad1 100644
--- a/elyra/tests/util/test_kubernetes.py
+++ b/elyra/tests/util/test_kubernetes.py
@@ -13,11 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+import string
+
from elyra.util.kubernetes import is_valid_annotation_key
from elyra.util.kubernetes import is_valid_annotation_value
from elyra.util.kubernetes import is_valid_kubernetes_resource_name
from elyra.util.kubernetes import is_valid_label_key
from elyra.util.kubernetes import is_valid_label_value
+from elyra.util.kubernetes import sanitize_label_value
def test_is_valid_kubernetes_resource_name_invalid_input():
@@ -226,3 +229,109 @@ def test_is_valid_annotation_value_valid_input():
assert is_valid_annotation_value(value="l_4")
assert is_valid_annotation_value(value="4-you")
assert is_valid_annotation_value(value="You.2")
+
+
+def test_sanitize_label_value():
+ valid_middle_chars = "-_."
+
+ # test min length
+ assert sanitize_label_value(None) == ""
+ assert sanitize_label_value("") == ""
+ # test max length (63)
+ assert sanitize_label_value("a" * 63) == "a" * 63
+ assert sanitize_label_value("a" * 64) == "a" * 63 # truncated
+ # test first and last char
+ assert sanitize_label_value("1") == "1"
+ assert sanitize_label_value("22") == "22"
+ assert sanitize_label_value("3_3") == "3_3"
+ assert sanitize_label_value("4u4") == "4u4"
+ assert sanitize_label_value("5$5") == "5_5"
+
+ # test first char
+ for c in string.printable:
+ if c in string.ascii_letters + string.digits:
+ # first char is valid
+ # no length violation
+ assert sanitize_label_value(c) == c
+ assert sanitize_label_value(c + "B") == c + "B"
+ # max length
+ assert sanitize_label_value(c + "B" * 62) == (c + "B" * 62)
+ # max length exceeded
+ assert sanitize_label_value(c + "B" * 63) == (c + "B" * 62) # truncated
+ else:
+ # first char is invalid, e.g. '#a', and becomes the
+ # second char, which might require replacement
+ rv = c
+ if c not in valid_middle_chars:
+ rv = "_"
+ # no length violation
+ assert sanitize_label_value(c) == "a" + rv + "a"
+ assert sanitize_label_value(c + "B") == "a" + rv + "B"
+ # max length
+ assert sanitize_label_value(c + "B" * 62) == ("a" + rv + "B" * 61) # truncated
+ # max length exceeded
+ assert sanitize_label_value(c + "B" * 63) == ("a" + rv + "B" * 61) # truncated
+
+ # test last char
+ for c in string.printable:
+ if c in string.ascii_letters + string.digits:
+ # no length violation
+ assert sanitize_label_value("b" + c) == "b" + c
+ # max length
+ assert sanitize_label_value("b" * 62 + c) == ("b" * 62 + c)
+ # max length exceeded
+ assert sanitize_label_value("b" * 63 + c) == ("b" * 63)
+ else:
+ # last char is invalid, e.g. 'a#', and requires
+ # patching
+ rv = c
+ if c not in valid_middle_chars:
+ rv = "_"
+ # no length violation (char is appended)
+ assert sanitize_label_value("b" + c) == "b" + rv + "a"
+ # max length (char is replaced)
+ assert sanitize_label_value("b" * 62 + c) == ("b" * 62 + "a")
+ # max length exceeded (no action required)
+ assert sanitize_label_value("b" * 63 + c) == ("b" * 63)
+
+ # test first and last char
+ for c in string.printable:
+ if c in string.ascii_letters + string.digits:
+ # no length violation
+ assert sanitize_label_value(c + "b" + c) == c + "b" + c # nothing is modified
+ # max length
+ assert sanitize_label_value(c + "b" * 61 + c) == (c + "b" * 61 + c) # nothing is modified
+ # max length exceeded
+ assert sanitize_label_value(c + "b" * 62 + c) == c + "b" * 62 # truncate only
+ else:
+ # first and last characters are invalid, e.g. '#a#'
+ rv = c
+ if c not in valid_middle_chars:
+ rv = "_"
+ # no length violation
+ assert sanitize_label_value(c + "b" + c) == "a" + rv + "b" + rv + "a"
+ # max length
+ assert sanitize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a")
+ # max length exceeded after processing, scenario 1
+ # resolved by adding char before first, replace last
+ assert sanitize_label_value(c + "b" * 60 + c) == ("a" + rv + "b" * 60 + "a")
+ # max length exceeded after processing, scenario 2
+ # resolved by adding char before first, appending after last
+ assert sanitize_label_value(c + "b" * 59 + c) == ("a" + rv + "b" * 59 + rv + "a")
+ # max length exceeded before processing, scenario 1
+ # resolved by adding char before first, truncating last
+ assert sanitize_label_value(c + "b" * 62 + c) == ("a" + rv + "b" * 61)
+ # max length exceeded before processing, scenario 2
+ # resolved by adding char before first, replacing last
+ assert sanitize_label_value(c + "b" * 60 + c * 3) == ("a" + rv + "b" * 60 + "a")
+
+ # test char in a position other than first and last
+ # if invalid, the char is replaced with '_'
+ for c in string.printable:
+ if c in string.ascii_letters + string.digits + "-_.":
+ assert sanitize_label_value("A" + c + "Z") == "A" + c + "Z"
+ else:
+ assert sanitize_label_value("A" + c + "Z") == "A_Z"
+
+ # encore
+ assert sanitize_label_value(r"¯\_(ツ)_/¯") == "a_________a"
diff --git a/elyra/util/kubernetes.py b/elyra/util/kubernetes.py
index a3d483288..e97f6370e 100644
--- a/elyra/util/kubernetes.py
+++ b/elyra/util/kubernetes.py
@@ -14,6 +14,7 @@
# limitations under the License.
#
import re
+import string
def is_valid_kubernetes_resource_name(name: str) -> bool:
@@ -145,3 +146,60 @@ def is_valid_label_value(value: str) -> bool:
return False
return re.match(r"^[a-zA-Z0-9]([-_\.A-Za-z0-9]*[a-zA-Z0-9])*$", value) is not None
+
+
+def sanitize_label_value(value: str) -> str:
+ """Produce a Kubernetes-compliant label value
+
+ Valid label values must be 63 characters or less and
+ must be empty or begin and end with an alphanumeric
+ character ([a-z0-9A-Z]) with dashes (-), underscores
+ (_), dots (.), and alphanumerics between.
+ """
+
+ if value is None or len(value) == 0:
+ return "" # nothing to do
+
+ max_length = 63
+ # This char is added at the front and/or back
+ # of value, if the first and/or last character
+ # is invalid. For example a value of "-abc"
+ # is converted to "a-abc". The specified character
+ # must meet the label value constraints.
+ valid_char = "a"
+ # This char is used to replace invalid characters
+ # that are in the "middle" of value. For example
+ # a value of "abc%def" is converted to "abc_def".
+ # The specified character must meet the label value
+ # constraints.
+ valid_middle_char = "_"
+
+ # must begin with [0-9a-zA-Z]
+ valid_chars = string.ascii_letters + string.digits
+ if value[0] not in valid_chars:
+ value = valid_char + value
+
+ value = value[:max_length] # enforce max length
+
+ # must end with [0-9a-zA-Z]
+ if value[-1] not in valid_chars:
+ if len(value) <= max_length - 1:
+ # append valid character if max length
+ # would not be exceeded
+ value = value + valid_char
+ else:
+ # replace with valid character
+ value = value[:-1] + valid_char
+
+ # middle chars must be [0-9a-zA-Z\-_.]
+ valid_chars = valid_chars + "-_."
+
+ newstr = ""
+ for c in range(len(value)):
+ if value[c] not in valid_chars:
+ newstr = newstr + valid_middle_char
+ else:
+ newstr = newstr + value[c]
+ value = newstr
+
+ return value
diff --git a/test_requirements.txt b/test_requirements.txt
index a6b4ae4e2..fa9402cc1 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -7,3 +7,4 @@ pytest-tornasync
pytest_virtualenv
requests-mock
requests-unixsocket
+kfp-tekton
diff --git a/tests/assets/helloworld.pipeline b/tests/assets/generic-test.pipeline
similarity index 99%
rename from tests/assets/helloworld.pipeline
rename to tests/assets/generic-test.pipeline
index c7ed16269..a8e03b375 100644
--- a/tests/assets/helloworld.pipeline
+++ b/tests/assets/generic-test.pipeline
@@ -130,7 +130,7 @@
},
"version": 8,
"properties": {
- "name": "helloworld",
+ "name": "generic-test",
"runtime": "Generic"
}
},
diff --git a/tests/integration/pipeline.ts b/tests/integration/pipeline.ts
index a21a10137..2ae4c9a68 100644
--- a/tests/integration/pipeline.ts
+++ b/tests/integration/pipeline.ts
@@ -16,11 +16,12 @@
describe('Pipeline Editor tests', () => {
beforeEach(() => {
- cy.deleteFile('helloworld.yaml');
+ cy.deleteFile('generic-test.yaml'); // previously exported pipeline
+ cy.deleteFile('generic-test.py'); // previously exported pipeline
cy.deleteFile('*.pipeline'); // delete pipeline files used for testing
cy.bootstrapFile('invalid.pipeline');
- cy.bootstrapFile('helloworld.pipeline');
+ cy.bootstrapFile('generic-test.pipeline');
cy.bootstrapFile('helloworld.ipynb');
cy.exec('jupyter trust build/cypress-tests/helloworld.ipynb');
cy.bootstrapFile('helloworld.py');
@@ -35,7 +36,8 @@ describe('Pipeline Editor tests', () => {
cy.deleteFile('helloworld.py'); // delete python file used for testing
cy.deleteFile('output.txt'); // delete output files generated by tests
cy.deleteFile('*.pipeline'); // delete pipeline files used for testing
- cy.deleteFile('helloworld.yaml');
+ cy.deleteFile('generic-test.yaml'); // exported pipeline
+ cy.deleteFile('generic-test.py'); // exported pipeline
cy.deleteFile('invalid.txt');
// delete complex test directories
@@ -294,7 +296,7 @@ describe('Pipeline Editor tests', () => {
it('should open notebook on double-clicking the node', () => {
// Open a pipeline in root directory
- cy.openFile('helloworld.pipeline');
+ cy.openFile('generic-test.pipeline');
// Open notebook node with double-click
cy.get('.common-canvas-drop-div').within(() => {
@@ -327,7 +329,7 @@ describe('Pipeline Editor tests', () => {
it('should open notebook from node right-click menu', () => {
// Open a pipeline in root directory
- cy.openFile('helloworld.pipeline');
+ cy.openFile('generic-test.pipeline');
// Open notebook node with right-click menu
cy.get('#jp-main-dock-panel').within(() => {
@@ -425,11 +427,11 @@ describe('Pipeline Editor tests', () => {
// });
// it('should run pipeline with env vars and output files', () => {
- // cy.openFile('helloworld.pipeline');
+ // cy.openFile('generic-test.pipeline');
// cy.findByRole('button', { name: /run pipeline/i }).click();
- // cy.findByLabelText(/pipeline name/i).should('have.value', 'helloworld');
+ // cy.findByLabelText(/pipeline name/i).should('have.value', 'generic-test');
// cy.findByLabelText(/runtime platform/i).should(
// 'have.value',
// '__elyra_local__'
@@ -461,11 +463,11 @@ describe('Pipeline Editor tests', () => {
cy.findByText(/failed export:/i).should('be.visible');
});
- it('should export pipeline as yaml', () => {
+ it('should export KFP pipeline as yaml', () => {
// Install runtime configuration
cy.installRuntimeConfig({ type: 'kfp' });
- cy.openFile('helloworld.pipeline');
+ cy.openFile('generic-test.pipeline');
// try to export valid pipeline
cy.findByRole('button', { name: /export pipeline/i }).click();
@@ -492,14 +494,48 @@ describe('Pipeline Editor tests', () => {
'be.visible'
);
- cy.readFile('build/cypress-tests/helloworld.yaml');
+ cy.readFile('build/cypress-tests/generic-test.yaml');
});
- it('should export pipeline as python dsl', () => {
+ it('should export KFP pipeline as Python DSL', () => {
+ // Install runtime configuration
+ cy.installRuntimeConfig({ type: 'kfp' });
+
+ cy.openFile('generic-test.pipeline');
+
+ // try to export valid pipeline
+ cy.findByRole('button', { name: /export pipeline/i }).click();
+
+ // check label for generic pipeline
+ cy.get('.jp-Dialog-header').contains('Export pipeline');
+
+ cy.findByLabelText(/runtime platform/i).select('KUBEFLOW_PIPELINES');
+
+ cy.findByLabelText(/runtime configuration/i)
+ .select('kfp_test_runtime')
+ .should('have.value', 'kfp_test_runtime');
+
+ // Validate all export options are available
+ cy.findByLabelText(/export pipeline as/i)
+ .select('Python DSL')
+ .should('have.value', 'py');
+
+ // actual export requires minio
+ cy.contains('OK').click();
+
+ // validate job was executed successfully, this can take a while in ci
+ cy.findByText(/pipeline export succeeded/i, { timeout: 30000 }).should(
+ 'be.visible'
+ );
+
+ cy.readFile('build/cypress-tests/generic-test.py');
+ });
+
+ it('should export Airflow pipeline as python dsl', () => {
// Install runtime configuration
cy.installRuntimeConfig({ type: 'airflow' });
- cy.openFile('helloworld.pipeline');
+ cy.openFile('generic-test.pipeline');
// try to export valid pipeline
cy.findByRole('button', { name: /export pipeline/i }).click();
@@ -513,7 +549,7 @@ describe('Pipeline Editor tests', () => {
.select('airflow_test_runtime')
.should('have.value', 'airflow_test_runtime');
- // overwrite existing helloworld.py file
+ // overwrite existing genric-test.py file
cy.findByLabelText(/export pipeline as/i)
.select('Airflow domain-specific language Python code')
.should('have.value', 'py');
@@ -534,7 +570,7 @@ describe('Pipeline Editor tests', () => {
});
it('should not leak properties when switching between nodes', () => {
- cy.openFile('helloworld.pipeline');
+ cy.openFile('generic-test.pipeline');
cy.get('#jp-main-dock-panel').within(() => {
cy.findByText('helloworld.ipynb').rightclick();
@@ -600,7 +636,7 @@ describe('Pipeline Editor tests', () => {
// Validate all export options are available
cy.findByRole('button', { name: /export pipeline/i }).click();
cy.findByRole('option', { name: /yaml/i }).should('have.value', 'yaml');
- cy.findByRole('option', { name: /python/i }).should('not.exist');
+ cy.findByRole('option', { name: /python/i }).should('have.value', 'py');
// Dismiss dialog
cy.findByRole('button', { name: /cancel/i }).click();
@@ -671,7 +707,7 @@ describe('Pipeline Editor tests', () => {
// Validate all export options are available for kfp
cy.findByLabelText(/runtime platform/i).select('KUBEFLOW_PIPELINES');
cy.findByRole('option', { name: /yaml/i }).should('have.value', 'yaml');
- cy.findByRole('option', { name: /python/i }).should('not.exist');
+ cy.findByRole('option', { name: /python/i }).should('have.value', 'py');
// Dismiss dialog
cy.findByRole('button', { name: /cancel/i }).click();