From c038e44b53203a99fdef76b32581d41b195bfca2 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Mon, 12 Nov 2018 17:27:37 -0800 Subject: [PATCH 1/2] enable component build unit test --- sdk/python/kfp/compiler/_component_builder.py | 55 ++++++++--------- .../tests/compiler/component_builder_test.py | 59 +++++-------------- sdk/python/tests/compiler/main.py | 3 +- 3 files changed, 40 insertions(+), 77 deletions(-) diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index 7c09e34e965..8438933f6b8 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -64,9 +64,8 @@ class DockerfileHelper(object): arc_dockerfile_name: dockerfile filename that is stored in the tarball gcs_path: the gcs path to store the tarball that contains both the dockerfile and the python file """ - def __init__(self, arc_dockerfile_name, gcs_path): + def __init__(self, arc_dockerfile_name): self._arc_dockerfile_name = arc_dockerfile_name - self._gcs_path = gcs_path def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath): """ _generate_docker_file generates a simple dockerfile with the python path """ @@ -86,22 +85,17 @@ def _wrap_files_in_tarball(self, tarball_path, files={}): for key, value in files.items(): tarball.add(value, arcname=key) - def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image): + def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, local_tarball_path): """ prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts """ with tempfile.TemporaryDirectory() as local_build_dir: local_dockerfile_path = os.path.join(local_build_dir, self._arc_dockerfile_name) self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename) - local_tarball_path = os.path.join(local_build_dir, 'docker.tmp.tar.gz') self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:local_dockerfile_path, arc_python_filename:python_filepath}) - GCSHelper.upload_gcs_file(local_tarball_path, self._gcs_path) - def prepare_docker_tarball(self, dockerfile_path): + def prepare_docker_tarball(self, dockerfile_path, local_tarball_path): """ prepare_docker_tarball is the API to prepare a tarball with the dockerfile """ - with tempfile.TemporaryDirectory() as local_build_dir: - local_tarball_path = os.path.join(local_build_dir, 'docker.tmp.tar.gz') - self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:dockerfile_path}) - GCSHelper.upload_gcs_file(local_tarball_path, self._gcs_path) + self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:dockerfile_path}) class CodeGenerator(object): """ CodeGenerator helps to generate python codes with identation """ @@ -144,18 +138,10 @@ def __init__(self, gcs_base, target_image): def _check_gcs_path(self, gcs_path): """ _check_gcs_path check both the path validity and write permissions """ logging.info('Checking path: {}...'.format(gcs_path)) - if gcs_path.startswith('gs://'): - with tempfile.TemporaryDirectory() as local_build_dir: - rand_filename = str(uuid.uuid4()) + '.txt' - local_rand_file = os.path.join(local_build_dir, rand_filename) - Path(local_rand_file).touch() - random_gcs_bucket = os.path.join(gcs_path, rand_filename) - GCSHelper.upload_gcs_file(local_rand_file, random_gcs_bucket) - GCSHelper.remove_gcs_blob(random_gcs_bucket) - return True - else: + if not gcs_path.startswith('gs://'): logging.error('Error: {} should be a GCS path.'.format(gcs_path)) return False + return True def _generate_kaniko_spec(self, namespace, arc_dockerfile_name, gcs_path, target_image): """_generate_kaniko_yaml generates kaniko job yaml based on a template yaml """ @@ -259,10 +245,12 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout): # Prepare build files logging.info('Generate build files.') - docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path) + docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name) + local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz') docker_helper.prepare_docker_tarball_with_py(python_filepath=local_python_filepath, arc_python_filename=self._arc_python_filepath, - base_image=base_image) + base_image=base_image, local_tarball_file=local_tarball_file) + GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path) kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name, @@ -279,19 +267,22 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout): def build_image_from_dockerfile(self, dockerfile_path, timeout, namespace): """ build_image_from_dockerfile builds an image directly """ - logging.info('Generate build files.') - docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path) - docker_helper.prepare_docker_tarball(dockerfile_path) + with tempfile.TemporaryDirectory() as local_build_dir: + logging.info('Generate build files.') + docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name) + local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz') + docker_helper.prepare_docker_tarball(dockerfile_path, local_tarball_file=local_tarball_file) + GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path) - kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name, + kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path, target_image=self._target_image) - logging.info('Start a kaniko job for build.') - k8s_helper = K8sHelper() - k8s_helper.run_job(kaniko_spec, timeout) - logging.info('Kaniko job complete.') + logging.info('Start a kaniko job for build.') + k8s_helper = K8sHelper() + k8s_helper.run_job(kaniko_spec, timeout) + logging.info('Kaniko job complete.') - # Clean up - GCSHelper.remove_gcs_blob(self._gcs_path) + # Clean up + GCSHelper.remove_gcs_blob(self._gcs_path) def _generate_pythonop(component_func, target_image): """ Generate operator for the pipeline authors diff --git a/sdk/python/tests/compiler/component_builder_test.py b/sdk/python/tests/compiler/component_builder_test.py index d9f1a78b80f..5dccc5ddf21 100644 --- a/sdk/python/tests/compiler/component_builder_test.py +++ b/sdk/python/tests/compiler/component_builder_test.py @@ -24,30 +24,7 @@ from pathlib import Path import inspect -GCS_BASE = 'gs://ngao-mlpipeline-testing/' - -class TestGCSHelper(unittest.TestCase): - - def test_upload_gcs_path(self): - """ test uploading gcs file """ - # prepare - test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') - temp_file = os.path.join(test_data_dir, 'test_data.tmp') - temp_downloaded_file = os.path.join(test_data_dir, 'test_data.tmp.downloaded') - Path(temp_file).touch() - gcs_path = os.path.join(GCS_BASE, 'test_data.tmp') - - # check - try: - GCSHelper.upload_gcs_file(temp_file, gcs_path) - GCSHelper.download_gcs_blob(temp_downloaded_file, gcs_path) - GCSHelper.remove_gcs_blob(gcs_path) - except: - self.fail('GCS helper failure') - - # clean up - os.remove(temp_file) - os.remove(temp_downloaded_file) +GCS_BASE = 'gs://kfp-testing/' class TestDockerfileHelper(unittest.TestCase): @@ -65,8 +42,8 @@ def test_wrap_files_in_tarball(self): f.write('temporary file two content') # check - docker_helper = DockerfileHelper(arc_dockerfile_name='', gcs_path='') - self.assertTrue(docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two})) + docker_helper = DockerfileHelper(arc_dockerfile_name='') + docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two}) self.assertTrue(os.path.exists(temp_tarball)) temp_tarball_handler = tarfile.open(temp_tarball) temp_files = temp_tarball_handler.getmembers() @@ -93,7 +70,7 @@ def test_generate_dockerfile(self): ENTRYPOINT ["python3", "/ml/main.py"]''' # check - docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile, gcs_path='') + docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile) docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py') with open(target_dockerfile, 'r') as f: target_dockerfile_payload = f.read() @@ -108,22 +85,21 @@ def test_prepare_docker_with_py(self): # prepare test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') python_filepath = os.path.join(test_data_dir, 'basic.py') - downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz') - gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz') + generated_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz') # check - docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path) - docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0') - GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path) - temp_tarball_handler = tarfile.open(downloaded_tarball) + docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile') + docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, + base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0', + local_tarball_path=generated_tarball) + temp_tarball_handler = tarfile.open(generated_tarball) temp_files = temp_tarball_handler.getmembers() self.assertTrue(len(temp_files) == 2) for temp_file in temp_files: self.assertTrue(temp_file.name in ['dockerfile', 'main.py']) # clean up - os.remove(downloaded_tarball) - GCSHelper.remove_gcs_blob(gcs_tar_path) + os.remove(generated_tarball) def test_prepare_docker_tarball(self): """ Test the whole prepare docker tarball """ @@ -132,23 +108,20 @@ def test_prepare_docker_tarball(self): test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') dockerfile_path = os.path.join(test_data_dir, 'component.target.dockerfile') Path(dockerfile_path).touch() - downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz') - gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz') + generated_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz') # check - docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path) - docker_helper.prepare_docker_tarball(dockerfile_path=dockerfile_path) - GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path) - temp_tarball_handler = tarfile.open(downloaded_tarball) + docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile') + docker_helper.prepare_docker_tarball(dockerfile_path=dockerfile_path, local_tarball_path=generated_tarball) + temp_tarball_handler = tarfile.open(generated_tarball) temp_files = temp_tarball_handler.getmembers() self.assertTrue(len(temp_files) == 1) for temp_file in temp_files: self.assertTrue(temp_file.name in ['dockerfile']) # clean up - os.remove(downloaded_tarball) + os.remove(generated_tarball) os.remove(dockerfile_path) - GCSHelper.remove_gcs_blob(gcs_tar_path) # hello function is used by the TestCodeGenerator to verify the auto generated python function def hello(): diff --git a/sdk/python/tests/compiler/main.py b/sdk/python/tests/compiler/main.py index 598f75551cf..710cedc00bc 100644 --- a/sdk/python/tests/compiler/main.py +++ b/sdk/python/tests/compiler/main.py @@ -23,8 +23,7 @@ if __name__ == '__main__': suite = unittest.TestSuite() suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests)) - #TODO: enable the test once a gcs mock class is created. - #suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) + suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test)) runner = unittest.TextTestRunner() if not runner.run(suite).wasSuccessful(): sys.exit(1) From 18a1d910b6773842847783e561749ce88d739814 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Mon, 12 Nov 2018 17:31:15 -0800 Subject: [PATCH 2/2] minor fix --- sdk/python/kfp/compiler/_component_builder.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index 8438933f6b8..233a06f9421 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -61,8 +61,7 @@ def download_gcs_blob(local_path, gcs_path): class DockerfileHelper(object): """ Dockerfile Helper generates a tarball with dockerfile, ready for docker build - arc_dockerfile_name: dockerfile filename that is stored in the tarball - gcs_path: the gcs path to store the tarball that contains both the dockerfile and the python file """ + arc_dockerfile_name: dockerfile filename that is stored in the tarball """ def __init__(self, arc_dockerfile_name): self._arc_dockerfile_name = arc_dockerfile_name @@ -249,7 +248,7 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout): local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz') docker_helper.prepare_docker_tarball_with_py(python_filepath=local_python_filepath, arc_python_filename=self._arc_python_filepath, - base_image=base_image, local_tarball_file=local_tarball_file) + base_image=base_image, local_tarball_path=local_tarball_file) GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path) kaniko_spec = self._generate_kaniko_spec(namespace=namespace, @@ -271,7 +270,7 @@ def build_image_from_dockerfile(self, dockerfile_path, timeout, namespace): logging.info('Generate build files.') docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name) local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz') - docker_helper.prepare_docker_tarball(dockerfile_path, local_tarball_file=local_tarball_file) + docker_helper.prepare_docker_tarball(dockerfile_path, local_tarball_path=local_tarball_file) GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path) kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name,