Skip to content
This repository was archived by the owner on Sep 3, 2022. It is now read-only.

Commit 20b6e97

Browse files
committed
Add a resize option for inception package to avoid sending large data to online prediction (#215)
* Add a resize option for inception package to avoid sending large data to online prediction. Update Lantern browser. * Follow up on code review comments and fix a bug for inception.
1 parent c73d52a commit 20b6e97

File tree

8 files changed

+1678
-1637
lines changed

8 files changed

+1678
-1637
lines changed

datalab/notebook/static/extern/lantern-browser.html

Lines changed: 1587 additions & 1577 deletions
Large diffs are not rendered by default.

solutionbox/inception/datalab_solutions/inception/_cloud.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,23 +104,22 @@ def train(self, input_dir, batch_size, max_steps, output_path, cloud_train_confi
104104
job = mlalpha.Job.submit_training(job_request, job_id)
105105
return job
106106

107-
def predict(self, model_id, image_files):
107+
def predict(self, model_id, images):
108108
"""Cloud prediction with CloudML prediction service."""
109109

110110
import datalab.mlalpha as mlalpha
111111
parts = model_id.split('.')
112112
if len(parts) != 2:
113113
raise ValueError('Invalid model name for cloud prediction. Use "model.version".')
114-
if len(image_files) == 0:
115-
raise ValueError('image_files is empty.')
114+
if len(images) == 0:
115+
raise ValueError('images is empty.')
116116

117117
data = []
118-
for ii, img_file in enumerate(image_files):
119-
with ml.util._file.open_local_or_gcs(img_file, 'rb') as f:
120-
img = base64.b64encode(f.read())
118+
for ii, image in enumerate(images):
119+
image_encoded = base64.b64encode(image)
121120
data.append({
122121
'key': str(ii),
123-
'image_bytes': {'b64': img}
122+
'image_bytes': {'b64': image_encoded}
124123
})
125124

126125
predictions = mlalpha.ModelVersions(parts[0]).predict(parts[1], data)

solutionbox/inception/datalab_solutions/inception/_local.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ def train(self, input_dir, batch_size, max_steps, output_dir):
6767
_trainer.Trainer(input_dir, batch_size, max_steps, output_dir,
6868
model, None, task).run_training()
6969

70-
def predict(self, model_dir, image_files):
70+
def predict(self, model_dir, images):
7171
"""Local prediction."""
7272

73-
return _predictor.predict(model_dir, image_files)
73+
return _predictor.predict(model_dir, images)
7474

7575

7676
def batch_predict(self, dataset, model_dir, output_csv, output_bq_table):

solutionbox/inception/datalab_solutions/inception/_model.py

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -122,36 +122,15 @@ def build_inception_graph(self):
122122
Inception layer with image bytes for prediction.
123123
inception_embeddings: The embeddings tensor.
124124
"""
125-
126-
# These constants are set by Inception v3's expectations.
127-
height = 299
128-
width = 299
129-
channels = 3
130-
131125
image_str_tensor = tf.placeholder(tf.string, shape=[None])
132126

133127
# The CloudML Prediction API always "feeds" the Tensorflow graph with
134128
# dynamic batch sizes e.g. (?,). decode_jpeg only processes scalar
135129
# strings because it cannot guarantee a batch of images would have
136130
# the same output size. We use tf.map_fn to give decode_jpeg a scalar
137131
# string from dynamic batches.
138-
def decode_and_resize(image_str_tensor):
139-
"""Decodes jpeg string, resizes it and returns a uint8 tensor."""
140-
141-
image = tf.image.decode_jpeg(image_str_tensor, channels=channels)
142-
143-
# Note resize expects a batch_size, but tf_map supresses that index,
144-
# thus we have to expand then squeeze. Resize returns float32 in the
145-
# range [0, uint8_max]
146-
image = tf.expand_dims(image, 0)
147-
image = tf.image.resize_bilinear(
148-
image, [height, width], align_corners=False)
149-
image = tf.squeeze(image, squeeze_dims=[0])
150-
image = tf.cast(image, dtype=tf.uint8)
151-
return image
152-
153132
image = tf.map_fn(
154-
decode_and_resize, image_str_tensor, back_prop=False, dtype=tf.uint8)
133+
_util.decode_and_resize, image_str_tensor, back_prop=False, dtype=tf.uint8)
155134
# convert_image_dtype, also scales [0, uint8_max] -> [0 ,1).
156135
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
157136

solutionbox/inception/datalab_solutions/inception/_package.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,44 +142,49 @@ def cloud_train(input_dir, batch_size, max_steps, output_dir,
142142
def _display_predict_results(results, show_image):
143143
if (_util.is_in_IPython()):
144144
import IPython
145-
for image_file, label_and_score in results:
145+
for image_url, image, label_and_score in results:
146146
if show_image is True:
147147
IPython.display.display_html('<p style="font-size:28px">%s(%.5f)</p>' % label_and_score,
148148
raw=True)
149-
with ml.util._file.open_local_or_gcs(image_file, mode='r') as f:
150-
IPython.display.display(IPython.display.Image(data=f.read()))
149+
IPython.display.display(IPython.display.Image(data=image))
151150
else:
152151
IPython.display.display_html(
153-
'<p>%s&nbsp&nbsp%s(%.5f)</p>' % ((image_file,) + label_and_score), raw=True)
152+
'<p>%s&nbsp&nbsp&nbsp&nbsp%s(%.5f)</p>' % ((image_url,) + label_and_score), raw=True)
154153
else:
155154
print results
156155

157156

158-
def local_predict(model_dir, image_files, show_image=True):
157+
def local_predict(model_dir, image_files, resize=False, show_image=True):
159158
"""Predict using an offline model.
160159
Args:
161160
model_dir: The directory of a trained inception model. Can be local or GCS paths.
162161
image_files: The paths to the image files to predict labels. Can be local or GCS paths.
163162
show_image: Whether to show images in the results.
163+
resize: Whether to resize the image to a reasonable size (300x300) before prediction.
164164
"""
165165
print('Predicting...')
166-
labels_and_scores = _local.Local().predict(model_dir, image_files)
167-
results = zip(image_files, labels_and_scores)
166+
images = _util.load_images(image_files, resize=resize)
167+
labels_and_scores = _local.Local().predict(model_dir, images)
168+
results = zip(image_files, images, labels_and_scores)
168169
_display_predict_results(results, show_image)
169170
print('Done')
170171

171172

172-
def cloud_predict(model_id, image_files, show_image=True):
173+
def cloud_predict(model_id, image_files, resize=False, show_image=True):
173174
"""Predict using a deployed (online) model.
174175
Args:
175176
model_id: The deployed model id in the form of "model.version".
176177
image_files: The paths to the image files to predict labels. GCS paths only.
177178
show_image: Whether to show images in the results.
179+
resize: Whether to resize the image to a reasonable size (300x300) before prediction.
180+
Set it to True if your images are too large to send over network.
178181
"""
179182
print('Predicting...')
180-
labels_and_scores = _cloud.Cloud().predict(model_id, image_files)
181-
results = zip(image_files, labels_and_scores)
183+
images = _util.load_images(image_files, resize=resize)
184+
labels_and_scores = _cloud.Cloud().predict(model_id, images)
185+
results = zip(image_files, images, labels_and_scores)
182186
_display_predict_results(results, show_image)
187+
print('Done')
183188

184189

185190
def local_batch_predict(dataset, model_dir, output_csv=None, output_bq_table=None):

solutionbox/inception/datalab_solutions/inception/_predictor.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
import os
2424
import tensorflow as tf
2525

26+
from . import _model
2627
from . import _util
2728

2829

29-
def _tf_predict(model_dir, batches):
30+
def _tf_predict(model_dir, images):
3031
model_dir = os.path.join(model_dir, 'model')
3132
with tf.Session() as sess:
3233
new_saver = tf.train.import_meta_graph(os.path.join(model_dir, 'export.meta'))
@@ -35,25 +36,19 @@ def _tf_predict(model_dir, batches):
3536
sess.run(init_op)
3637
inputs = json.loads(tf.get_collection('inputs')[0])
3738
outputs = json.loads(tf.get_collection('outputs')[0])
38-
for batch in batches:
39-
feed_dict = collections.defaultdict(list)
40-
for ii, image_filename in enumerate(batch):
41-
if image_filename is None:
42-
break
43-
with ml.util._file.open_local_or_gcs(image_filename, 'r') as ff:
44-
image_bytes = ff.read()
45-
feed_dict[inputs['image_bytes']].append(image_bytes)
46-
feed_dict[inputs['key']].append(str(ii))
47-
predictions, labels, scores = sess.run(
48-
[outputs['prediction'], outputs['labels'], outputs['scores']], feed_dict=feed_dict)
49-
yield zip(predictions, labels, scores)
50-
51-
52-
def predict(model_dir, image_files):
39+
feed_dict = collections.defaultdict(list)
40+
for ii, image in enumerate(images):
41+
feed_dict[inputs['image_bytes']].append(image)
42+
feed_dict[inputs['key']].append(str(ii))
43+
predictions, labels, scores = sess.run(
44+
[outputs['prediction'], outputs['labels'], outputs['scores']], feed_dict=feed_dict)
45+
return zip(predictions, labels, scores)
46+
47+
48+
def predict(model_dir, images):
5349
"""Local instant prediction."""
5450

55-
# Single batch for instant prediction.
56-
results = next(_tf_predict(model_dir, [image_files]))
51+
results = _tf_predict(model_dir, images)
5752
predicted_and_scores = [(predicted, label_scores[list(labels).index(predicted)])
5853
for predicted, labels, label_scores in results]
5954
return predicted_and_scores

solutionbox/inception/datalab_solutions/inception/_trainer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def __init__(self, input_dir, batch_size, max_steps, output_path, model, cluster
117117
self.task = task
118118
self.evaluator = Evaluator(self.model, eval_files, batch_size, output_path, 'eval_set')
119119
self.train_evaluator = Evaluator(self.model, train_files, batch_size, output_path, 'train_set')
120-
self.min_train_eval_rate = 20
120+
self.min_train_eval_rate = 8
121121

122122
def run_training(self):
123123
"""Runs a Master."""

solutionbox/inception/datalab_solutions/inception/_util.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"""Reusable utility functions.
1717
"""
1818

19+
import collections
1920
import google.cloud.ml as ml
2021
import multiprocessing
2122
import os
@@ -162,6 +163,7 @@ def check_dataset(dataset, mode):
162163
raise ValueError('Invalid dataset. Expect only "image_url" or "image_url,label" ' +
163164
'STRING columns.')
164165

166+
165167
def get_sources_from_dataset(p, dataset, mode):
166168
"""get pcollection from dataset."""
167169

@@ -184,3 +186,54 @@ def get_sources_from_dataset(p, dataset, mode):
184186
return p | 'Read source from BigQuery (%s)' % mode >> beam.io.Read(bq_source)
185187
else:
186188
raise ValueError('Invalid DataSet. Expect CsvDataSet or BigQueryDataSet')
189+
190+
191+
def decode_and_resize(image_str_tensor):
192+
"""Decodes jpeg string, resizes it and returns a uint8 tensor."""
193+
194+
# These constants are set by Inception v3's expectations.
195+
height = 299
196+
width = 299
197+
channels = 3
198+
199+
image = tf.image.decode_jpeg(image_str_tensor, channels=channels)
200+
# Note resize expects a batch_size, but tf_map supresses that index,
201+
# thus we have to expand then squeeze. Resize returns float32 in the
202+
# range [0, uint8_max]
203+
image = tf.expand_dims(image, 0)
204+
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
205+
image = tf.squeeze(image, squeeze_dims=[0])
206+
image = tf.cast(image, dtype=tf.uint8)
207+
return image
208+
209+
210+
def resize_image(image_str_tensor):
211+
"""Decodes jpeg string, resizes it and re-encode it to jpeg."""
212+
213+
image = decode_and_resize(image_str_tensor)
214+
image = tf.image.encode_jpeg(image, quality=100)
215+
return image
216+
217+
218+
def load_images(image_files, resize=True):
219+
"""Load images from files and optionally resize it."""
220+
221+
images = []
222+
for image_file in image_files:
223+
with ml.util._file.open_local_or_gcs(image_file, 'r') as ff:
224+
images.append(ff.read())
225+
if resize is False:
226+
return images
227+
228+
# To resize, run a tf session so we can reuse 'decode_and_resize()'
229+
# which is used in prediction graph. This makes sure we don't lose
230+
# any quality in prediction, while decreasing the size of the images
231+
# submitted to the model over network.
232+
image_str_tensor = tf.placeholder(tf.string, shape=[None])
233+
image = tf.map_fn(resize_image, image_str_tensor, back_prop=False)
234+
feed_dict = collections.defaultdict(list)
235+
feed_dict[image_str_tensor.name] = images
236+
with tf.Session() as sess:
237+
images_resized = sess.run(image, feed_dict=feed_dict)
238+
return images_resized
239+

0 commit comments

Comments
 (0)