diff --git a/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb b/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb index 5e5ddd40c48..ea0144f44c7 100644 --- a/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb +++ b/samples/notebooks/KubeFlow Pipeline Using TFX OSS Components.ipynb @@ -22,6 +22,37 @@ "## Setup" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Set your output and project. !!!Must Do before you can proceed!!!\n", + "EXPERIMENT_NAME = 'demo'\n", + "OUTPUT_DIR = 'Your-Gcs-Path' # Such as gs://bucket/objact/path\n", + "PROJECT_NAME = 'Your-Gcp-Project-Name'\n", + "BASE_IMAGE='gcr.io/%s/pusherbase:dev' % PROJECT_NAME\n", + "TARGET_IMAGE='gcr.io/%s/pusher:dev' % PROJECT_NAME\n", + "KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.3-rc.2/kfp.tar.gz'\n", + "TRAIN_DATA = 'gs://ml-pipeline-playground/tfx/taxi-cab-classification/train.csv'\n", + "EVAL_DATA = 'gs://ml-pipeline-playground/tfx/taxi-cab-classification/eval.csv'\n", + "HIDDEN_LAYER_SIZE = '1500'\n", + "STEPS = 3000\n", + "DATAFLOW_TFDV_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:0.1.3-rc.2'#TODO-release: update the release tag for the next release\n", + "DATAFLOW_TFT_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:0.1.3-rc.2'#TODO-release: update the release tag for the next release\n", + "DATAFLOW_TFMA_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:0.1.3-rc.2'#TODO-release: update the release tag for the next release\n", + "DATAFLOW_TF_PREDICT_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:0.1.3-rc.2'#TODO-release: update the release tag for the next release\n", + "KUBEFLOW_TF_TRAINER_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2'#TODO-release: update the release tag for the next release\n", + "KUBEFLOW_DEPLOYER_IMAGE = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:0.1.3-rc.2'#TODO-release: update the release tag for the next release\n", + "DEV_DEPLOYER_MODEL = 'notebook-tfx-devtaxi.beta'\n", + "PROD_DEPLOYER_MODEL = 'notebook-tfx-prodtaxi.beta'" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -81,26 +112,7 @@ ], "source": [ "# Install Pipeline SDK\n", - "!pip3 install https://storage.googleapis.com/ml-pipeline/release/0.1.3-rc.2/kfp.tar.gz --upgrade" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "import kfp\n", - "from kfp import compiler\n", - "import kfp.dsl as dsl\n", - "import kfp.notebook\n", - "\n", - "\n", - "# Set your output and project. !!!Must Do before you can proceed!!!\n", - "OUTPUT_DIR = 'Your-Gcs-Path' # Such as gs://bucket/objact/path\n", - "PROJECT_NAME = 'Your-Gcp-Project-Name'\n", - "BASE_IMAGE='gcr.io/%s/pusherbase:dev' % PROJECT_NAME\n", - "TARGET_IMAGE='gcr.io/%s/pusher:dev' % PROJECT_NAME" + "!pip3 install $KFP_PACKAGE --upgrade" ] }, { @@ -133,8 +145,13 @@ "source": [ "# Note that this notebook should be running in JupyterHub in the same cluster as the pipeline system.\n", "# Otherwise it will fail to talk to the pipeline system.\n", + "import kfp\n", + "from kfp import compiler\n", + "import kfp.dsl as dsl\n", + "import kfp.notebook\n", + "import kfp.gcp as gcp\n", "client = kfp.Client()\n", - "exp = client.create_experiment(name='demo')" + "exp = client.create_experiment(name=EXPERIMENT_NAME)" ] }, { @@ -208,7 +225,7 @@ "def dataflow_tf_data_validation_op(inference_data: 'GcsUri', validation_data: 'GcsUri', column_names: 'GcsUri[text/json]', key_columns, project: 'GcpProject', mode, validation_output: 'GcsUri[Directory]', step_name='validation'):\n", " return dsl.ContainerOp(\n", " name = step_name,\n", - " image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:0.1.3-rc.2', #TODO-release: update the release tag for the next release\n", + " image = DATAFLOW_TFDV_IMAGE,\n", " arguments = [\n", " '--csv-data-for-inference', inference_data,\n", " '--csv-data-to-validate', validation_data,\n", @@ -222,12 +239,12 @@ " 'output': '/output.txt',\n", " 'schema': '/output_schema.json',\n", " }\n", - " )\n", + " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "\n", "def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', preprocess_mode, preprocess_module: 'GcsUri[text/code/python]', transform_output: 'GcsUri[Directory]', step_name='preprocess'):\n", " return dsl.ContainerOp(\n", " name = step_name,\n", - " image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:0.1.3-rc.2', #TODO-release: update the release tag for the next release\n", + " image = DATAFLOW_TFT_IMAGE,\n", " arguments = [\n", " '--train', train_data,\n", " '--eval', evaluation_data,\n", @@ -238,13 +255,13 @@ " '--output', transform_output,\n", " ],\n", " file_outputs = {'transformed': '/output.txt'}\n", - " )\n", + " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "\n", "\n", "def tf_train_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate: float, hidden_layer_size: int, steps: int, target: str, preprocess_module: 'GcsUri[text/code/python]', training_output: 'GcsUri[Directory]', step_name='training'):\n", " return dsl.ContainerOp(\n", " name = step_name,\n", - " image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2', #TODO-release: update the release tag for the next release\n", + " image = KUBEFLOW_TF_TRAINER_IMAGE,\n", " arguments = [\n", " '--transformed-data-dir', transformed_data_dir,\n", " '--schema', schema,\n", @@ -256,12 +273,12 @@ " '--job-dir', training_output,\n", " ],\n", " file_outputs = {'train': '/output.txt'}\n", - " )\n", + " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "\n", "def dataflow_tf_model_analyze_op(model: 'TensorFlow model', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', analyze_mode, analyze_slice_column, analysis_output: 'GcsUri', step_name='analysis'):\n", " return dsl.ContainerOp(\n", " name = step_name,\n", - " image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:0.1.3-rc.2', #TODO-release: update the release tag for the next release\n", + " image = DATAFLOW_TFMA_IMAGE,\n", " arguments = [\n", " '--model', model,\n", " '--eval', evaluation_data,\n", @@ -272,13 +289,13 @@ " '--output', analysis_output,\n", " ],\n", " file_outputs = {'analysis': '/output.txt'}\n", - " )\n", + " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "\n", "\n", "def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', target: str, model: 'TensorFlow model', predict_mode, project: 'GcpProject', prediction_output: 'GcsUri', step_name='prediction'):\n", " return dsl.ContainerOp(\n", " name = step_name,\n", - " image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:0.1.3-rc.2', #TODO-release: update the release tag for the next release\n", + " image = DATAFLOW_TF_PREDICT_IMAGE,\n", " arguments = [\n", " '--data', evaluation_data,\n", " '--schema', schema,\n", @@ -289,17 +306,17 @@ " '--output', prediction_output,\n", " ],\n", " file_outputs = {'prediction': '/output.txt'}\n", - " )\n", + " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "\n", "def kubeflow_deploy_op(model: 'TensorFlow model', tf_server_name, step_name='deploy'):\n", " return dsl.ContainerOp(\n", " name = step_name,\n", - " image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:0.1.3-rc.2', #TODO-release: update the release tag for the next release\n", + " image = KUBEFLOW_DEPLOYER_IMAGE,\n", " arguments = [\n", " '--model-path', model,\n", " '--server-name', tf_server_name\n", " ]\n", - " )\n", + " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", "\n", "\n", "# The pipeline definition\n", @@ -312,15 +329,15 @@ " project,\n", " column_names=dsl.PipelineParam(name='column-names', value='gs://ml-pipeline-playground/tfx/taxi-cab-classification/column-names.json'),\n", " key_columns=dsl.PipelineParam(name='key-columns', value='trip_start_timestamp'),\n", - " train=dsl.PipelineParam(name='train', value='gs://ml-pipeline-playground/tfx/taxi-cab-classification/train.csv'),\n", - " evaluation=dsl.PipelineParam(name='evaluation', value='gs://ml-pipeline-playground/tfx/taxi-cab-classification/eval.csv'),\n", + " train=dsl.PipelineParam(name='train', value=TRAIN_DATA),\n", + " evaluation=dsl.PipelineParam(name='evaluation', value=EVAL_DATA),\n", " validation_mode=dsl.PipelineParam(name='validation-mode', value='local'),\n", " preprocess_mode=dsl.PipelineParam(name='preprocess-mode', value='local'),\n", " preprocess_module: dsl.PipelineParam=dsl.PipelineParam(name='preprocess-module', value='gs://ml-pipeline-playground/tfx/taxi-cab-classification/preprocessing.py'),\n", " target=dsl.PipelineParam(name='target', value='tips'),\n", " learning_rate=dsl.PipelineParam(name='learning-rate', value=0.1),\n", - " hidden_layer_size=dsl.PipelineParam(name='hidden-layer-size', value='1500'),\n", - " steps=dsl.PipelineParam(name='steps', value=3000),\n", + " hidden_layer_size=dsl.PipelineParam(name='hidden-layer-size', value=HIDDEN_LAYER_SIZE),\n", + " steps=dsl.PipelineParam(name='steps', value=STEPS),\n", " predict_mode=dsl.PipelineParam(name='predict-mode', value='local'),\n", " analyze_mode=dsl.PipelineParam(name='analyze-mode', value='local'),\n", " analyze_slice_column=dsl.PipelineParam(name='analyze-slice-column', value='trip_start_hour')):\n", @@ -480,7 +497,7 @@ "source": [ "# Test the function and make sure it works.\n", "path = 'gs://ml-pipeline-playground/sampledata/taxi/train'\n", - "deploy_model('taxidev.beta', path, PROJECT_NAME, '1.9')" + "deploy_model(DEV_DEPLOYER_MODEL, path, PROJECT_NAME, '1.9')" ] }, { @@ -699,10 +716,10 @@ " key_columns=dsl.PipelineParam(name='key-columns', value='trip_start_timestamp'),\n", " train=dsl.PipelineParam(\n", " name='train',\n", - " value='gs://ml-pipeline-playground/tfx/taxi-cab-classification/train.csv'),\n", + " value=TRAIN_DATA),\n", " evaluation=dsl.PipelineParam(\n", " name='evaluation',\n", - " value='gs://ml-pipeline-playground/tfx/taxi-cab-classification/eval.csv'),\n", + " value=EVAL_DATA),\n", " validation_mode=dsl.PipelineParam(name='validation-mode', value='local'),\n", " preprocess_mode=dsl.PipelineParam(name='preprocess-mode', value='local'),\n", " preprocess_module: dsl.PipelineParam=dsl.PipelineParam(\n", @@ -710,8 +727,8 @@ " value='gs://ml-pipeline-playground/tfx/taxi-cab-classification/preprocessing.py'),\n", " target=dsl.PipelineParam(name='target', value='tips'),\n", " learning_rate=dsl.PipelineParam(name='learning-rate', value=0.1),\n", - " hidden_layer_size=dsl.PipelineParam(name='hidden-layer-size', value='1500'),\n", - " steps=dsl.PipelineParam(name='steps', value=3000),\n", + " hidden_layer_size=dsl.PipelineParam(name='hidden-layer-size', value=HIDDEN_LAYER_SIZE),\n", + " steps=dsl.PipelineParam(name='steps', value=STEPS),\n", " predict_mode=dsl.PipelineParam(name='predict-mode', value='local'),\n", " analyze_mode=dsl.PipelineParam(name='analyze-mode', value='local'),\n", " analyze_slice_column=dsl.PipelineParam(name='analyze-slice-column', value='trip_start_hour')):\n", @@ -732,7 +749,7 @@ " prediction = dataflow_tf_predict_op(evaluation, schema, target, training.output, predict_mode, project, prediction_output)\n", " \n", " # The new deployer. Note that the DeployerOp interface is similar to the function \"deploy_model\".\n", - " deploy = DeployerOp(gcp_project=project, model_dot_version=model, runtime='1.9', model_path=training.output)" + " deploy = DeployerOp(gcp_project=project, model_dot_version=model, runtime='1.9', model_path=training.output).apply(gcp.use_gcp_secret('user-gcp-sa'))" ] }, { @@ -766,7 +783,7 @@ "run = client.run_pipeline(exp.id, 'my-tfx', 'my-tfx.tar.gz',\n", " params={'output': OUTPUT_DIR,\n", " 'project': PROJECT_NAME,\n", - " 'model': 'mytaxi.beta'})" + " 'model': PROD_DEPLOYER_MODEL})" ] }, { diff --git a/samples/notebooks/Lightweight Python components - basics.ipynb b/samples/notebooks/Lightweight Python components - basics.ipynb index 3cda00de385..14880af9e65 100644 --- a/samples/notebooks/Lightweight Python components - basics.ipynb +++ b/samples/notebooks/Lightweight Python components - basics.ipynb @@ -19,14 +19,28 @@ "* To build a component with multiple output values, use the typing.NamedTuple type hint syntax: ```NamedTuple('MyFunctionOutputs', [('output_name_1', type), ('output_name_2', float)])```" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "EXPERIMENT_NAME = 'lightweight python components'\n", + "KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.3-rc.2/kfp.tar.gz'" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#Install the SDK\n", - "!pip3 install https://storage.googleapis.com/ml-pipeline/release/0.1.3-rc.2/kfp.tar.gz --upgrade\n" + "# Install the SDK\n", + "!pip3 install $KFP_PACKAGE --upgrade\n" ] }, { @@ -236,13 +250,7 @@ "#Get or create an experiment and submit a pipeline run\n", "import kfp\n", "client = kfp.Client()\n", - "list_experiments_response = client.list_experiments()\n", - "experiments = list_experiments_response.experiments\n", - "if not experiments:\n", - " #The user does not have any experiments available. Creating a new one\n", - " experiment = client.create_experiment(pipeline_func.__name__ + ' experiment')\n", - "else:\n", - " experiment = experiments[-1] #Using the last experiment\n", + "experiment = client.create_experiment(EXPERIMENT_NAME)\n", "\n", "#Submit a pipeline run\n", "run_name = pipeline_func.__name__ + ' run'\n", diff --git a/sdk/python/kfp/_client.py b/sdk/python/kfp/_client.py index ccc73a20309..0a2d9479a3f 100644 --- a/sdk/python/kfp/_client.py +++ b/sdk/python/kfp/_client.py @@ -158,16 +158,20 @@ def run_pipeline(self, experiment_id, job_name, pipeline_package_path, params={} IPython.display.display(IPython.display.HTML(html)) return response.run - def list_runs(self, page_token='', page_size=10, sort_by=''): + def list_runs(self, page_token='', page_size=10, sort_by='', resource_reference_key_type=None, resource_reference_key_id=None): """List runs. Args: page_token: token for starting of the page. page_size: size of the page. sort_by: one of 'field_name', 'field_name des'. For example, 'name des'. + resource_reference_key: resource filtering key Returns: A response object including a list of experiments and next page token. """ - response = self._run_api.list_runs(page_token=page_token, page_size=page_size, sort_by=sort_by) + if resource_reference_key_type is not None and resource_reference_key_id is not None: + response = self._run_api.list_runs(page_token=page_token, page_size=page_size, sort_by=sort_by, resource_reference_key_type=resource_reference_key_type, resource_reference_key_id=resource_reference_key_id) + else: + response = self._run_api.list_runs(page_token=page_token, page_size=page_size, sort_by=sort_by) return response def get_run(self, run_id): diff --git a/test/sample-test/Dockerfile b/test/sample-test/Dockerfile index 38c184efafd..2bfd52df449 100644 --- a/test/sample-test/Dockerfile +++ b/test/sample-test/Dockerfile @@ -14,6 +14,9 @@ RUN pip3 install junit-xml RUN pip3 install kubernetes RUN pip3 install minio RUN pip3 install setuptools==40.5.0 +RUN pip3 install papermill==0.16.1 +RUN pip3 install ipykernel==5.1.0 +RUN pip3 install google-api-python-client==1.7.0 #Needs test/sample-test and the files needed to run sdk/python/build.sh COPY . /python/src/github.com/kubeflow/pipelines diff --git a/test/sample-test/check_notebook_results.py b/test/sample-test/check_notebook_results.py new file mode 100644 index 00000000000..bd27388bde8 --- /dev/null +++ b/test/sample-test/check_notebook_results.py @@ -0,0 +1,88 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from kfp import Client +import utils + +###### Input/Output Instruction ###### +# input: experiment name, testname, and, namespace + +# Parsing the input arguments +def parse_arguments(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser() + parser.add_argument('--experiment', + type=str, + required=True, + help='The experiment name') + parser.add_argument('--testname', + type=str, + required=True, + help="Test name") + parser.add_argument('--namespace', + type=str, + default='kubeflow', + help="namespace of the deployed pipeline system. Default: kubeflow") + parser.add_argument('--result', + type=str, + required=True, + help='The path of the test result that will be exported.') + args = parser.parse_args() + return args + +def main(): + args = parse_arguments() + test_cases = [] + test_name = args.testname + ' Sample Test' + + ###### Initialization ###### + client = Client(namespace=args.namespace) + + ###### Get experiments ###### + list_experiments_response = client.list_experiments(page_size=100) + for experiment in list_experiments_response.experiments: + if experiment.name == args.experiment: + experiment_id = experiment.id + + ###### Get runs ###### + import kfp_run + resource_reference_key_type =kfp_run.models.api_resource_type.ApiResourceType.EXPERIMENT + resource_reference_key_id = experiment_id + list_runs_response = client.list_runs(page_size=1000, resource_reference_key_type=resource_reference_key_type, resource_reference_key_id=resource_reference_key_id) + + ###### Check all runs ###### + for run in list_runs_response.runs: + run_id = run.id + response = client.wait_for_run_completion(run_id, 1200) + succ = (response.run.status.lower()=='succeeded') + utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure') + + ###### Output Argo Log for Debugging ###### + workflow_json = client._get_workflow_json(run_id) + workflow_id = workflow_json['metadata']['name'] + argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format(args.namespace, workflow_id)) + print("=========Argo Workflow Log=========") + print(argo_log) + + if not succ: + utils.write_junit_xml(test_name, args.result, test_cases) + exit(1) + + ###### Write out the test result in junit xml ###### + utils.write_junit_xml(test_name, args.result, test_cases) + +if __name__ == "__main__": + main() diff --git a/test/sample-test/clean_cmle_models.py b/test/sample-test/clean_cmle_models.py new file mode 100644 index 00000000000..ff290d5f186 --- /dev/null +++ b/test/sample-test/clean_cmle_models.py @@ -0,0 +1,64 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +##### Input/Output Instruction ###### +# input: project, model, version + +# Parsing the input arguments +def parse_arguments(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser() + parser.add_argument('--project', + type=str, + required=True, + help='The project name') + parser.add_argument('--model', + type=str, + required=True, + help='The model name') + parser.add_argument('--version', + type=str, + required=True, + help="model version") + args = parser.parse_args() + return args + +# Google API Client utilities +#TODO: generalize the function such that it handles multiple version deletions. +def delete_model(project_name, model_name, version): + from googleapiclient import discovery + import time + api = discovery.build('ml', 'v1') + api.projects().models().versions().delete(name='projects/' + project_name + '/models/' + model_name + '/versions/' + version).execute() + version_deleted = False + while not version_deleted: + try: + api.projects().models().delete(name='projects/' + project_name + '/models/' + model_name).execute() + version_deleted = True + except: + print('waiting for the versions to be deleted before the model is deleted') + time.sleep(5) + print('model ' + model_name + ' deleted.') + +def main(): + args = parse_arguments() + project_name = args.project + model_name = args.model + version = args.version + delete_model(project_name=project_name, model_name=model_name, version=version) + +if __name__ == "__main__": + main() diff --git a/test/sample-test/run_basic_test.py b/test/sample-test/run_basic_test.py index 8c366163d3d..7abb7a48189 100644 --- a/test/sample-test/run_basic_test.py +++ b/test/sample-test/run_basic_test.py @@ -80,7 +80,6 @@ def main(): run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) - ###### Monitor Job ###### start_time = datetime.now() response = client.wait_for_run_completion(run_id, 1200) diff --git a/test/sample-test/run_test.sh b/test/sample-test/run_test.sh index 388bec49148..fe4dea664a6 100755 --- a/test/sample-test/run_test.sh +++ b/test/sample-test/run_test.sh @@ -18,8 +18,9 @@ set -xe usage() { - echo "usage: run_kubeflow_test.sh + echo "usage: run_test.sh [--results-gcs-dir GCS directory for the test results] + [--target-image-prefix image prefix] [--dataflow-tft-image image path to the dataflow tft] [--dataflow-predict-image image path to the dataflow predict] [--dataflow-tfma-image image path to the dataflow tfma] @@ -44,6 +45,9 @@ while [ "$1" != "" ]; do --results-gcs-dir ) shift RESULTS_GCS_DIR=$1 ;; + --target-image-prefix ) shift + TARGET_IMAGE_PREFIX=$1 + ;; --dataflow-tft-image ) shift DATAFLOW_TFT_IMAGE=$1 ;; @@ -261,4 +265,52 @@ elif [ "$TEST_NAME" == "xgboost" ]; then echo "Copy the test results to GCS ${RESULTS_GCS_DIR}/" gsutil cp ${SAMPLE_XGBOOST_TEST_RESULT} ${RESULTS_GCS_DIR}/${SAMPLE_XGBOOST_TEST_RESULT} +elif [ "$TEST_NAME" == "notebook-tfx" ]; then + SAMPLE_NOTEBOOK_TFX_TEST_RESULT=junit_SampleNotebookTFXOutput.xml + SAMPLE_NOTEBOOK_TFX_TEST_OUTPUT=${RESULTS_GCS_DIR} + + # CMLE model name format: A name should start with a letter and contain only letters, numbers and underscores. + DEPLOYER_MODEL=`cat /proc/sys/kernel/random/uuid` + DEPLOYER_MODEL=A`echo ${DEPLOYER_MODEL//-/_}` + DEV_DEPLOYER_MODEL=${DEPLOYER_MODEL}_dev + PROD_DEPLOYER_MODEL=${DEPLOYER_MODEL}_prod + MODEL_VERSION=beta + + cd ${BASE_DIR}/samples/notebooks + export LC_ALL=C.UTF-8 + export LANG=C.UTF-8 + papermill --prepare-only -p EXPERIMENT_NAME notebook-tfx-test -p OUTPUT_DIR ${RESULTS_GCS_DIR} -p PROJECT_NAME ml-pipeline-test \ + -p BASE_IMAGE ${TARGET_IMAGE_PREFIX}pusherbase:dev -p TARGET_IMAGE ${TARGET_IMAGE_PREFIX}pusher:dev \ + -p KFP_PACKAGE /tmp/kfp.tar.gz -p DEV_DEPLOYER_MODEL ${DEV_DEPLOYER_MODEL}.${MODEL_VERSION} -p PROD_DEPLOYER_MODEL ${PROD_DEPLOYER_MODEL}.${MODEL_VERSION} \ + -p DATAFLOW_TFDV_IMAGE ${DATAFLOW_TFDV_IMAGE} -p DATAFLOW_TFT_IMAGE ${DATAFLOW_TFT_IMAGE} -p DATAFLOW_TFMA_IMAGE ${DATAFLOW_TFMA_IMAGE} -p DATAFLOW_TF_PREDICT_IMAGE ${DATAFLOW_PREDICT_IMAGE} \ + -p KUBEFLOW_TF_TRAINER_IMAGE ${KUBEFLOW_DNNTRAINER_IMAGE} -p KUBEFLOW_DEPLOYER_IMAGE ${KUBEFLOW_DEPLOYER_IMAGE} \ + -p TRAIN_DATA gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/train50.csv -p EVAL_DATA gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/eval20.csv \ + -p HIDDEN_LAYER_SIZE 10 -p STEPS 50 KubeFlow\ Pipeline\ Using\ TFX\ OSS\ Components.ipynb notebook-tfx.ipynb + jupyter nbconvert --to python notebook-tfx.ipynb + pip3 install tensorflow==1.8.0 + ipython notebook-tfx.py + cd "${TEST_DIR}" + python3 check_notebook_results.py --experiment notebook-tfx-test --testname notebooktfx --result $SAMPLE_NOTEBOOK_TFX_TEST_RESULT --namespace ${NAMESPACE} + + echo "Copy the test results to GCS ${RESULTS_GCS_DIR}/" + gsutil cp $SAMPLE_NOTEBOOK_TFX_TEST_RESULT ${RESULTS_GCS_DIR}/$SAMPLE_NOTEBOOK_TFX_TEST_RESULT + + #Clean CMLE models + python3 clean_cmle_models.py --project ml-pipeline-test --model ${DEV_DEPLOYER_MODEL} --version ${MODEL_VERSION} + python3 clean_cmle_models.py --project ml-pipeline-test --model ${PROD_DEPLOYER_MODEL} --version ${MODEL_VERSION} +elif [ "$TEST_NAME" == "notebook-lightweight" ]; then + SAMPLE_NOTEBOOK_LIGHTWEIGHT_TEST_RESULT=junit_SampleNotebookLightweightOutput.xml + SAMPLE_NOTEBOOK_LIGHTWEIGHT_TEST_OUTPUT=${RESULTS_GCS_DIR} + + cd ${BASE_DIR}/samples/notebooks + export LC_ALL=C.UTF-8 + export LANG=C.UTF-8 + papermill --prepare-only -p EXPERIMENT_NAME notebook-lightweight -p PROJECT_NAME ml-pipeline-test -p KFP_PACKAGE /tmp/kfp.tar.gz Lightweight\ Python\ components\ -\ basics.ipynb notebook-lightweight.ipynb + jupyter nbconvert --to python notebook-lightweight.ipynb + ipython notebook-lightweight.py + cd "${TEST_DIR}" + python3 check_notebook_results.py --experiment notebook-lightweight --testname notebooklightweight --result $SAMPLE_NOTEBOOK_LIGHTWEIGHT_TEST_RESULT --namespace ${NAMESPACE} + + echo "Copy the test results to GCS ${RESULTS_GCS_DIR}/" + gsutil cp $SAMPLE_NOTEBOOK_LIGHTWEIGHT_TEST_RESULT ${RESULTS_GCS_DIR}/$SAMPLE_NOTEBOOK_LIGHTWEIGHT_TEST_RESULT fi diff --git a/test/sample_test_v2.yaml b/test/sample_test_v2.yaml index 9c8ac50074e..7e992488c7f 100644 --- a/test/sample_test_v2.yaml +++ b/test/sample_test_v2.yaml @@ -210,6 +210,8 @@ spec: parameters: - name: test-results-gcs-dir value: "{{inputs.parameters.test-results-gcs-dir}}" + - name: target-image-prefix + value: "{{inputs.parameters.target-image-prefix}}" - name: dataflow-tft-image value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataflow-tft-image-suffix}}" - name: dataflow-predict-image @@ -250,6 +252,8 @@ spec: parameters: - name: test-results-gcs-dir value: "{{inputs.parameters.test-results-gcs-dir}}" + - name: target-image-prefix + value: "{{inputs.parameters.target-image-prefix}}" - name: dataflow-tft-image value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataflow-tft-image-suffix}}" - name: dataflow-predict-image @@ -290,6 +294,8 @@ spec: parameters: - name: test-results-gcs-dir value: "{{inputs.parameters.test-results-gcs-dir}}" + - name: target-image-prefix + value: "{{inputs.parameters.target-image-prefix}}" - name: dataflow-tft-image value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataflow-tft-image-suffix}}" - name: dataflow-predict-image @@ -324,6 +330,48 @@ spec: value: "{{inputs.parameters.namespace}}" - name: test-name value: "xgboost" + - name: run-notebook-tfx-tests + template: run-sample-tests + arguments: + parameters: + - name: test-results-gcs-dir + value: "{{inputs.parameters.test-results-gcs-dir}}" + - name: target-image-prefix + value: "{{inputs.parameters.target-image-prefix}}" + - name: dataflow-tft-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataflow-tft-image-suffix}}" + - name: dataflow-predict-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataflow-predict-image-suffix}}" + - name: dataflow-tfma-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataflow-tfma-image-suffix}}" + - name: dataflow-tfdv-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataflow-tfdv-image-suffix}}" + - name: dataproc-create-cluster-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-create-cluster-image-suffix}}" + - name: dataproc-delete-cluster-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-delete-cluster-image-suffix}}" + - name: dataproc-analyze-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-analyze-image-suffix}}" + - name: dataproc-transform-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-transform-image-suffix}}" + - name: dataproc-train-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-train-image-suffix}}" + - name: dataproc-predict-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-predict-image-suffix}}" + - name: kubeflow-dnntrainer-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.kubeflow-dnntrainer-image-suffix}}" + - name: kubeflow-deployer-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.kubeflow-deployer-image-suffix}}" + - name: local-confusionmatrix-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.local-confusionmatrix-image-suffix}}" + - name: local-roc-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.local-roc-image-suffix}}" + - name: sample-tests-image + value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.sample-tests-image-suffix}}" + - name: namespace + value: "{{inputs.parameters.namespace}}" + - name: test-name + value: "notebook-tfx" # Build and push image - name: build-image-by-dockerfile @@ -408,6 +456,7 @@ spec: inputs: parameters: - name: test-results-gcs-dir + - name: target-image-prefix - name: dataflow-tft-image - name: dataflow-predict-image - name: dataflow-tfma-image @@ -429,6 +478,7 @@ spec: image: "{{inputs.parameters.sample-tests-image}}" args: [ "--results-gcs-dir", "{{inputs.parameters.test-results-gcs-dir}}", + "--target-image-prefix", "{{inputs.parameters.target-image-prefix}}", "--dataflow-tft-image","{{inputs.parameters.dataflow-tft-image}}", "--dataflow-predict-image","{{inputs.parameters.dataflow-predict-image}}", "--dataflow-tfma-image","{{inputs.parameters.dataflow-tfma-image}}",