From 3de5cc2ffd1e05f4f2f97b757d190018406abb3e Mon Sep 17 00:00:00 2001 From: Bradley Jiang Date: Tue, 21 Feb 2017 16:48:44 -0800 Subject: [PATCH] Inception package updates. - Instead of hard code setup.py path, duplicate it along with all py files, just like structured data package. - Use Pip installable TensorFlow 1.0 for packages. - Fix some TF warnings. --- .../datalab_solutions/inception/_cloud.py | 13 +++-- .../datalab_solutions/inception/_model.py | 8 +-- .../datalab_solutions/inception/setup.py | 53 +++++++++++++++++++ solutionbox/inception/setup.py | 4 +- 4 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 solutionbox/inception/datalab_solutions/inception/setup.py diff --git a/solutionbox/inception/datalab_solutions/inception/_cloud.py b/solutionbox/inception/datalab_solutions/inception/_cloud.py index 82bf6ab52..f9b47fef1 100644 --- a/solutionbox/inception/datalab_solutions/inception/_cloud.py +++ b/solutionbox/inception/datalab_solutions/inception/_cloud.py @@ -31,9 +31,6 @@ from . import _trainer from . import _util -_TF_GS_URL = 'gs://cloud-datalab/deploy/tf/tensorflow-1.0.0rc1-cp27-none-linux_x86_64.whl' -# Keep in sync with "data_files" in package's setup.py -_SETUP_PY = '/datalab/packages_setup/inception/setup.py' class Cloud(object): """Class for cloud training, preprocessing and prediction.""" @@ -51,8 +48,10 @@ def _repackage_to_staging(self, output_path): # Find the package root. __file__ is under [package_root]/datalab_solutions/inception. package_root = os.path.join(os.path.dirname(__file__), '../../') + # We deploy setup.py in the same dir for repackaging purpose. + setup_py = os.path.join(os.path.dirname(__file__), 'setup.py') staging_package_url = os.path.join(output_path, 'staging', 'inception.tar.gz') - mlalpha.package_and_copy(package_root, _SETUP_PY, staging_package_url) + mlalpha.package_and_copy(package_root, setup_py, staging_package_url) return staging_package_url def preprocess(self, train_dataset, eval_dataset, output_dir, pipeline_option): @@ -67,7 +66,7 @@ def preprocess(self, train_dataset, eval_dataset, output_dir, pipeline_option): 'temp_location': os.path.join(output_dir, 'tmp'), 'job_name': job_name, 'project': _util.default_project(), - 'extra_packages': [_TF_GS_URL, ml.version.nodeps_sdk_location, staging_package_url], + 'extra_packages': [ml.version.nodeps_sdk_location, staging_package_url], 'teardown_policy': 'TEARDOWN_ALWAYS', 'no_save_main_session': True } @@ -95,7 +94,7 @@ def train(self, input_dir, batch_size, max_steps, output_path, cloud_train_confi 'checkpoint': self._checkpoint } job_request = { - 'package_uris': [_TF_GS_URL, staging_package_url], + 'package_uris': [staging_package_url], 'python_module': 'datalab_solutions.inception.task', 'args': job_args } @@ -145,7 +144,7 @@ def batch_predict(self, dataset, model_dir, gcs_staging_location, output_csv, 'temp_location': os.path.join(gcs_staging_location, 'tmp'), 'job_name': job_name, 'project': _util.default_project(), - 'extra_packages': [_TF_GS_URL, ml.version.nodeps_sdk_location, staging_package_url], + 'extra_packages': [ml.version.nodeps_sdk_location, staging_package_url], 'teardown_policy': 'TEARDOWN_ALWAYS', 'no_save_main_session': True } diff --git a/solutionbox/inception/datalab_solutions/inception/_model.py b/solutionbox/inception/datalab_solutions/inception/_model.py index 6aaaae3dd..ad328bdfd 100644 --- a/solutionbox/inception/datalab_solutions/inception/_model.py +++ b/solutionbox/inception/datalab_solutions/inception/_model.py @@ -297,8 +297,10 @@ def build_prediction_graph(self): # To extract the id, we need to add the identity function. keys = tf.identity(keys_placeholder) labels = self.labels + ['UNKNOWN'] - predicted_label = tf.contrib.lookup.index_to_string(tensors.predictions[0], - mapping=labels) + labels_tensor = tf.constant(labels) + labels_table = tf.contrib.lookup.index_to_string_table_from_tensor(mapping=labels_tensor) + predicted_label = labels_table.lookup(tensors.predictions[0]) + # Need to duplicate the labels by num_of_instances so the output is one batch # (all output members share the same outer dimension). # The labels are needed for client to match class scores list. @@ -316,7 +318,7 @@ def build_prediction_graph(self): # Add table init op to collection so online prediction will load the model and run it. # TODO: initialize_all_tables is going to be deprecated but the replacement # tf.tables_initializer does not exist in 0.12 yet. - init_tables_op = tf.initialize_all_tables() + init_tables_op = tf.tables_initializer() tf.add_to_collection(tf.contrib.session_bundle.constants.INIT_OP_KEY, init_tables_op) def export(self, last_checkpoint, output_dir): diff --git a/solutionbox/inception/datalab_solutions/inception/setup.py b/solutionbox/inception/datalab_solutions/inception/setup.py new file mode 100644 index 000000000..85618fe57 --- /dev/null +++ b/solutionbox/inception/datalab_solutions/inception/setup.py @@ -0,0 +1,53 @@ +# Copyright 2017 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +# To publish to PyPi use: python setup.py bdist_wheel upload -r pypi + +import datetime +from setuptools import setup + +minor = datetime.datetime.now().strftime("%y%m%d%H%M") +version = '0.1' + +setup( + name='inception', + version=version, + packages=[ + 'datalab_solutions', + 'datalab_solutions.inception', + ], + + description='Google Cloud Datalab Inception Package', + author='Google', + author_email='google-cloud-datalab-feedback@googlegroups.com', + keywords=[ + ], + license="Apache Software License", + classifiers=[ + "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Development Status :: 4 - Beta", + "Environment :: Other Environment", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules" + ], + long_description=""" + """, + install_requires=[ + 'tensorflow==1.0', + 'protobuf==3.1.0', + ], + package_data={ + } +) diff --git a/solutionbox/inception/setup.py b/solutionbox/inception/setup.py index 03eec1360..85618fe57 100644 --- a/solutionbox/inception/setup.py +++ b/solutionbox/inception/setup.py @@ -25,8 +25,6 @@ 'datalab_solutions', 'datalab_solutions.inception', ], - # setup.py needs to be deployed so it can be repackaged from local installation for cloud run. - data_files=[('/datalab/packages_setup/inception', ['setup.py'])], description='Google Cloud Datalab Inception Package', author='Google', @@ -47,6 +45,8 @@ long_description=""" """, install_requires=[ + 'tensorflow==1.0', + 'protobuf==3.1.0', ], package_data={ }