Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable component build unit test #228

Merged
merged 3 commits into from
Nov 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 24 additions & 34 deletions sdk/python/kfp/compiler/_component_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,10 @@ def download_gcs_blob(local_path, gcs_path):

class DockerfileHelper(object):
""" Dockerfile Helper generates a tarball with dockerfile, ready for docker build
arc_dockerfile_name: dockerfile filename that is stored in the tarball
gcs_path: the gcs path to store the tarball that contains both the dockerfile and the python file """
arc_dockerfile_name: dockerfile filename that is stored in the tarball """
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, arc_dockerfile_name, gcs_path):
def __init__(self, arc_dockerfile_name):
self._arc_dockerfile_name = arc_dockerfile_name
self._gcs_path = gcs_path

def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath):
""" _generate_docker_file generates a simple dockerfile with the python path """
Expand All @@ -86,22 +84,17 @@ def _wrap_files_in_tarball(self, tarball_path, files={}):
for key, value in files.items():
tarball.add(value, arcname=key)

def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image):
def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, local_tarball_path):
Copy link
Contributor

@Ark-kun Ark-kun Nov 14, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about prepare_docker_tarball_with_py(self, python_filepath, local_tarball_path, base_image='python:3.5', arc_python_filename='program.py')

BTW, can python_filepath be absolute?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question, the python_filepath can be absolute or relative.
I think it might be better not to give default values, or else the error messages might be misleading once the container launched but the program.py is not found.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question, the python_filepath can be absolute or relative.

That's good. The problem is that your code will fail in one of those cases.

Copy link
Contributor

@Ark-kun Ark-kun Nov 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the error messages might be misleading once the container launched but the program.py is not found.

How come this file is not in the container? You're putting it there yourself.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. I'm good with default value for arc_python_filename. However, I do not think we should add a default value for base_image. This parameter should be mandatory from the DockerfileHelper if not for out layer APIs.

""" prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts """
with tempfile.TemporaryDirectory() as local_build_dir:
local_dockerfile_path = os.path.join(local_build_dir, self._arc_dockerfile_name)
self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename)
local_tarball_path = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:local_dockerfile_path,
arc_python_filename:python_filepath})
GCSHelper.upload_gcs_file(local_tarball_path, self._gcs_path)

def prepare_docker_tarball(self, dockerfile_path):
def prepare_docker_tarball(self, dockerfile_path, local_tarball_path):
""" prepare_docker_tarball is the API to prepare a tarball with the dockerfile """
with tempfile.TemporaryDirectory() as local_build_dir:
local_tarball_path = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:dockerfile_path})
GCSHelper.upload_gcs_file(local_tarball_path, self._gcs_path)
self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:dockerfile_path})

class CodeGenerator(object):
""" CodeGenerator helps to generate python codes with identation """
Expand Down Expand Up @@ -144,18 +137,10 @@ def __init__(self, gcs_base, target_image):
def _check_gcs_path(self, gcs_path):
""" _check_gcs_path check both the path validity and write permissions """
logging.info('Checking path: {}...'.format(gcs_path))
if gcs_path.startswith('gs://'):
with tempfile.TemporaryDirectory() as local_build_dir:
rand_filename = str(uuid.uuid4()) + '.txt'
local_rand_file = os.path.join(local_build_dir, rand_filename)
Path(local_rand_file).touch()
random_gcs_bucket = os.path.join(gcs_path, rand_filename)
GCSHelper.upload_gcs_file(local_rand_file, random_gcs_bucket)
GCSHelper.remove_gcs_blob(random_gcs_bucket)
return True
else:
if not gcs_path.startswith('gs://'):
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
logging.error('Error: {} should be a GCS path.'.format(gcs_path))
return False
return True

def _generate_kaniko_spec(self, namespace, arc_dockerfile_name, gcs_path, target_image):
"""_generate_kaniko_yaml generates kaniko job yaml based on a template yaml """
Expand Down Expand Up @@ -259,10 +244,12 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout):

gaoning777 marked this conversation as resolved.
Show resolved Hide resolved
# Prepare build files
logging.info('Generate build files.')
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path)
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name)
local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
docker_helper.prepare_docker_tarball_with_py(python_filepath=local_python_filepath,
arc_python_filename=self._arc_python_filepath,
base_image=base_image)
base_image=base_image, local_tarball_path=local_tarball_file)
GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path)

kaniko_spec = self._generate_kaniko_spec(namespace=namespace,
arc_dockerfile_name=self._arc_dockerfile_name,
Expand All @@ -279,19 +266,22 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout):

def build_image_from_dockerfile(self, dockerfile_path, timeout, namespace):
""" build_image_from_dockerfile builds an image directly """
logging.info('Generate build files.')
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path)
docker_helper.prepare_docker_tarball(dockerfile_path)
with tempfile.TemporaryDirectory() as local_build_dir:
logging.info('Generate build files.')
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name)
local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
docker_helper.prepare_docker_tarball(dockerfile_path, local_tarball_path=local_tarball_file)
GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path)

kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name,
kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name,
gcs_path=self._gcs_path, target_image=self._target_image)
logging.info('Start a kaniko job for build.')
k8s_helper = K8sHelper()
k8s_helper.run_job(kaniko_spec, timeout)
logging.info('Kaniko job complete.')
logging.info('Start a kaniko job for build.')
k8s_helper = K8sHelper()
k8s_helper.run_job(kaniko_spec, timeout)
logging.info('Kaniko job complete.')

# Clean up
GCSHelper.remove_gcs_blob(self._gcs_path)
# Clean up
GCSHelper.remove_gcs_blob(self._gcs_path)

def _generate_pythonop(component_func, target_image):
""" Generate operator for the pipeline authors
Expand Down
57 changes: 15 additions & 42 deletions sdk/python/tests/compiler/component_builder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,30 +24,7 @@
from pathlib import Path
import inspect

GCS_BASE = 'gs://ngao-mlpipeline-testing/'

class TestGCSHelper(unittest.TestCase):

def test_upload_gcs_path(self):
""" test uploading gcs file """
# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
temp_file = os.path.join(test_data_dir, 'test_data.tmp')
temp_downloaded_file = os.path.join(test_data_dir, 'test_data.tmp.downloaded')
Path(temp_file).touch()
gcs_path = os.path.join(GCS_BASE, 'test_data.tmp')

# check
try:
GCSHelper.upload_gcs_file(temp_file, gcs_path)
GCSHelper.download_gcs_blob(temp_downloaded_file, gcs_path)
GCSHelper.remove_gcs_blob(gcs_path)
except:
self.fail('GCS helper failure')

# clean up
os.remove(temp_file)
os.remove(temp_downloaded_file)
GCS_BASE = 'gs://kfp-testing/'
gaoning777 marked this conversation as resolved.
Show resolved Hide resolved

class TestDockerfileHelper(unittest.TestCase):

Expand All @@ -65,7 +42,7 @@ def test_wrap_files_in_tarball(self):
f.write('temporary file two content')

# check
docker_helper = DockerfileHelper(arc_dockerfile_name='', gcs_path='')
docker_helper = DockerfileHelper(arc_dockerfile_name='')
docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two})
self.assertTrue(os.path.exists(temp_tarball))
temp_tarball_handler = tarfile.open(temp_tarball)
Expand Down Expand Up @@ -93,7 +70,7 @@ def test_generate_dockerfile(self):
ENTRYPOINT ["python3", "/ml/main.py"]'''

# check
docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile, gcs_path='')
docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py')
with open(target_dockerfile, 'r') as f:
target_dockerfile_payload = f.read()
Expand All @@ -108,22 +85,21 @@ def test_prepare_docker_with_py(self):
# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
python_filepath = os.path.join(test_data_dir, 'basic.py')
downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')
gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz')
generated_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')

# check
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path)
docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0')
GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path)
temp_tarball_handler = tarfile.open(downloaded_tarball)
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile')
docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath,
base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0',
local_tarball_path=generated_tarball)
temp_tarball_handler = tarfile.open(generated_tarball)
temp_files = temp_tarball_handler.getmembers()
self.assertTrue(len(temp_files) == 2)
for temp_file in temp_files:
self.assertTrue(temp_file.name in ['dockerfile', 'main.py'])

# clean up
os.remove(downloaded_tarball)
GCSHelper.remove_gcs_blob(gcs_tar_path)
os.remove(generated_tarball)

def test_prepare_docker_tarball(self):
""" Test the whole prepare docker tarball """
Expand All @@ -132,23 +108,20 @@ def test_prepare_docker_tarball(self):
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
dockerfile_path = os.path.join(test_data_dir, 'component.target.dockerfile')
Path(dockerfile_path).touch()
downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')
gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz')
generated_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')

# check
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path)
docker_helper.prepare_docker_tarball(dockerfile_path=dockerfile_path)
GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path)
temp_tarball_handler = tarfile.open(downloaded_tarball)
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile')
docker_helper.prepare_docker_tarball(dockerfile_path=dockerfile_path, local_tarball_path=generated_tarball)
temp_tarball_handler = tarfile.open(generated_tarball)
temp_files = temp_tarball_handler.getmembers()
self.assertTrue(len(temp_files) == 1)
for temp_file in temp_files:
self.assertTrue(temp_file.name in ['dockerfile'])

# clean up
os.remove(downloaded_tarball)
os.remove(generated_tarball)
os.remove(dockerfile_path)
GCSHelper.remove_gcs_blob(gcs_tar_path)

# hello function is used by the TestCodeGenerator to verify the auto generated python function
def hello():
Expand Down
3 changes: 1 addition & 2 deletions sdk/python/tests/compiler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
if __name__ == '__main__':
suite = unittest.TestSuite()
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests))
#TODO: enable the test once a gcs mock class is created.
#suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test))
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test))
runner = unittest.TextTestRunner()
if not runner.run(suite).wasSuccessful():
sys.exit(1)
Expand Down