Skip to content
This repository was archived by the owner on Sep 3, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,164 changes: 1,587 additions & 1,577 deletions datalab/notebook/static/extern/lantern-browser.html

Large diffs are not rendered by default.

13 changes: 6 additions & 7 deletions solutionbox/inception/datalab_solutions/inception/_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,23 +104,22 @@ def train(self, input_dir, batch_size, max_steps, output_path, cloud_train_confi
job = mlalpha.Job.submit_training(job_request, job_id)
return job

def predict(self, model_id, image_files):
def predict(self, model_id, images):
"""Cloud prediction with CloudML prediction service."""

import datalab.mlalpha as mlalpha
parts = model_id.split('.')
if len(parts) != 2:
raise ValueError('Invalid model name for cloud prediction. Use "model.version".')
if len(image_files) == 0:
raise ValueError('image_files is empty.')
if len(images) == 0:
raise ValueError('images is empty.')

data = []
for ii, img_file in enumerate(image_files):
with ml.util._file.open_local_or_gcs(img_file, 'rb') as f:
img = base64.b64encode(f.read())
for ii, image in enumerate(images):
image_encoded = base64.b64encode(image)
data.append({
'key': str(ii),
'image_bytes': {'b64': img}
'image_bytes': {'b64': image_encoded}
})

predictions = mlalpha.ModelVersions(parts[0]).predict(parts[1], data)
Expand Down
4 changes: 2 additions & 2 deletions solutionbox/inception/datalab_solutions/inception/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ def train(self, input_dir, batch_size, max_steps, output_dir):
_trainer.Trainer(input_dir, batch_size, max_steps, output_dir,
model, None, task).run_training()

def predict(self, model_dir, image_files):
def predict(self, model_dir, images):
"""Local prediction."""

return _predictor.predict(model_dir, image_files)
return _predictor.predict(model_dir, images)


def batch_predict(self, dataset, model_dir, output_csv, output_bq_table):
Expand Down
23 changes: 1 addition & 22 deletions solutionbox/inception/datalab_solutions/inception/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,36 +122,15 @@ def build_inception_graph(self):
Inception layer with image bytes for prediction.
inception_embeddings: The embeddings tensor.
"""

# These constants are set by Inception v3's expectations.
height = 299
width = 299
channels = 3

image_str_tensor = tf.placeholder(tf.string, shape=[None])

# The CloudML Prediction API always "feeds" the Tensorflow graph with
# dynamic batch sizes e.g. (?,). decode_jpeg only processes scalar
# strings because it cannot guarantee a batch of images would have
# the same output size. We use tf.map_fn to give decode_jpeg a scalar
# string from dynamic batches.
def decode_and_resize(image_str_tensor):
"""Decodes jpeg string, resizes it and returns a uint8 tensor."""

image = tf.image.decode_jpeg(image_str_tensor, channels=channels)

# Note resize expects a batch_size, but tf_map supresses that index,
# thus we have to expand then squeeze. Resize returns float32 in the
# range [0, uint8_max]
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(
image, [height, width], align_corners=False)
image = tf.squeeze(image, squeeze_dims=[0])
image = tf.cast(image, dtype=tf.uint8)
return image

image = tf.map_fn(
decode_and_resize, image_str_tensor, back_prop=False, dtype=tf.uint8)
_util.decode_and_resize, image_str_tensor, back_prop=False, dtype=tf.uint8)
# convert_image_dtype, also scales [0, uint8_max] -> [0 ,1).
image = tf.image.convert_image_dtype(image, dtype=tf.float32)

Expand Down
25 changes: 15 additions & 10 deletions solutionbox/inception/datalab_solutions/inception/_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,44 +142,49 @@ def cloud_train(input_dir, batch_size, max_steps, output_dir,
def _display_predict_results(results, show_image):
if (_util.is_in_IPython()):
import IPython
for image_file, label_and_score in results:
for image_url, image, label_and_score in results:
if show_image is True:
IPython.display.display_html('<p style="font-size:28px">%s(%.5f)</p>' % label_and_score,
raw=True)
with ml.util._file.open_local_or_gcs(image_file, mode='r') as f:
IPython.display.display(IPython.display.Image(data=f.read()))
IPython.display.display(IPython.display.Image(data=image))
else:
IPython.display.display_html(
'<p>%s&nbsp&nbsp%s(%.5f)</p>' % ((image_file,) + label_and_score), raw=True)
'<p>%s&nbsp&nbsp&nbsp&nbsp%s(%.5f)</p>' % ((image_url,) + label_and_score), raw=True)
else:
print results


def local_predict(model_dir, image_files, show_image=True):
def local_predict(model_dir, image_files, resize=False, show_image=True):
"""Predict using an offline model.
Args:
model_dir: The directory of a trained inception model. Can be local or GCS paths.
image_files: The paths to the image files to predict labels. Can be local or GCS paths.
show_image: Whether to show images in the results.
resize: Whether to resize the image to a reasonable size (300x300) before prediction.
"""
print('Predicting...')
labels_and_scores = _local.Local().predict(model_dir, image_files)
results = zip(image_files, labels_and_scores)
images = _util.load_images(image_files, resize=resize)
labels_and_scores = _local.Local().predict(model_dir, images)
results = zip(image_files, images, labels_and_scores)
_display_predict_results(results, show_image)
print('Done')


def cloud_predict(model_id, image_files, show_image=True):
def cloud_predict(model_id, image_files, resize=False, show_image=True):
"""Predict using a deployed (online) model.
Args:
model_id: The deployed model id in the form of "model.version".
image_files: The paths to the image files to predict labels. GCS paths only.
show_image: Whether to show images in the results.
resize: Whether to resize the image to a reasonable size (300x300) before prediction.
Set it to True if your images are too large to send over network.
"""
print('Predicting...')
labels_and_scores = _cloud.Cloud().predict(model_id, image_files)
results = zip(image_files, labels_and_scores)
images = _util.load_images(image_files, resize=resize)
labels_and_scores = _cloud.Cloud().predict(model_id, images)
results = zip(image_files, images, labels_and_scores)
_display_predict_results(results, show_image)
print('Done')


def local_batch_predict(dataset, model_dir, output_csv=None, output_bq_table=None):
Expand Down
31 changes: 13 additions & 18 deletions solutionbox/inception/datalab_solutions/inception/_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
import os
import tensorflow as tf

from . import _model
from . import _util


def _tf_predict(model_dir, batches):
def _tf_predict(model_dir, images):
model_dir = os.path.join(model_dir, 'model')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph(os.path.join(model_dir, 'export.meta'))
Expand All @@ -35,25 +36,19 @@ def _tf_predict(model_dir, batches):
sess.run(init_op)
inputs = json.loads(tf.get_collection('inputs')[0])
outputs = json.loads(tf.get_collection('outputs')[0])
for batch in batches:
feed_dict = collections.defaultdict(list)
for ii, image_filename in enumerate(batch):
if image_filename is None:
break
with ml.util._file.open_local_or_gcs(image_filename, 'r') as ff:
image_bytes = ff.read()
feed_dict[inputs['image_bytes']].append(image_bytes)
feed_dict[inputs['key']].append(str(ii))
predictions, labels, scores = sess.run(
[outputs['prediction'], outputs['labels'], outputs['scores']], feed_dict=feed_dict)
yield zip(predictions, labels, scores)


def predict(model_dir, image_files):
feed_dict = collections.defaultdict(list)
for ii, image in enumerate(images):
feed_dict[inputs['image_bytes']].append(image)
feed_dict[inputs['key']].append(str(ii))
predictions, labels, scores = sess.run(
[outputs['prediction'], outputs['labels'], outputs['scores']], feed_dict=feed_dict)
return zip(predictions, labels, scores)


def predict(model_dir, images):
"""Local instant prediction."""

# Single batch for instant prediction.
results = next(_tf_predict(model_dir, [image_files]))
results = _tf_predict(model_dir, images)
predicted_and_scores = [(predicted, label_scores[list(labels).index(predicted)])
for predicted, labels, label_scores in results]
return predicted_and_scores
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def __init__(self, input_dir, batch_size, max_steps, output_path, model, cluster
self.task = task
self.evaluator = Evaluator(self.model, eval_files, batch_size, output_path, 'eval_set')
self.train_evaluator = Evaluator(self.model, train_files, batch_size, output_path, 'train_set')
self.min_train_eval_rate = 20
self.min_train_eval_rate = 8

def run_training(self):
"""Runs a Master."""
Expand Down
53 changes: 53 additions & 0 deletions solutionbox/inception/datalab_solutions/inception/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"""Reusable utility functions.
"""

import collections
import google.cloud.ml as ml
import multiprocessing
import os
Expand Down Expand Up @@ -162,6 +163,7 @@ def check_dataset(dataset, mode):
raise ValueError('Invalid dataset. Expect only "image_url" or "image_url,label" ' +
'STRING columns.')


def get_sources_from_dataset(p, dataset, mode):
"""get pcollection from dataset."""

Expand All @@ -184,3 +186,54 @@ def get_sources_from_dataset(p, dataset, mode):
return p | 'Read source from BigQuery (%s)' % mode >> beam.io.Read(bq_source)
else:
raise ValueError('Invalid DataSet. Expect CsvDataSet or BigQueryDataSet')


def decode_and_resize(image_str_tensor):
"""Decodes jpeg string, resizes it and returns a uint8 tensor."""

# These constants are set by Inception v3's expectations.
height = 299
width = 299
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not 300? is there some kind of "start counting at zero" going on here? Add comment if so.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just Inception expects such dimension. Don't know the reason behind but there is already comments saying this is expected by Inception.

channels = 3

image = tf.image.decode_jpeg(image_str_tensor, channels=channels)
# Note resize expects a batch_size, but tf_map supresses that index,
# thus we have to expand then squeeze. Resize returns float32 in the
# range [0, uint8_max]
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
image = tf.squeeze(image, squeeze_dims=[0])
image = tf.cast(image, dtype=tf.uint8)
return image


def resize_image(image_str_tensor):
"""Decodes jpeg string, resizes it and re-encode it to jpeg."""

image = decode_and_resize(image_str_tensor)
image = tf.image.encode_jpeg(image, quality=100)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could decode_and_resize do

  1. decode_jpeg
  2. stuff
  3. encode_jpeg

so then decode_and_resize can be renamed resize_image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

decode_and_resize() is also used by model (it does not need to encode back) so this function can only do 1 and 2.

return image


def load_images(image_files, resize=True):
"""Load images from files and optionally resize it."""

images = []
for image_file in image_files:
with ml.util._file.open_local_or_gcs(image_file, 'r') as ff:
images.append(ff.read())
if resize is False:
return images

# To resize, run a tf session so we can reuse 'decode_and_resize()'
# which is used in prediction graph. This makes sure we don't lose
# any quality in prediction, while decreasing the size of the images
# submitted to the model over network.
image_str_tensor = tf.placeholder(tf.string, shape=[None])
image = tf.map_fn(resize_image, image_str_tensor, back_prop=False)
feed_dict = collections.defaultdict(list)
feed_dict[image_str_tensor.name] = images
with tf.Session() as sess:
images_resized = sess.run(image, feed_dict=feed_dict)
return images_resized