Skip to content

Commit

Permalink
SUBMARINE-44. Upgrade Docker image's tensorflow version to 1.13.1. Co…
Browse files Browse the repository at this point in the history
…ntributed by Zac Zhou.
  • Loading branch information
tangzhankun committed Apr 25, 2019
1 parent 0b3d41b commit b5dcf64
Show file tree
Hide file tree
Showing 20 changed files with 397 additions and 459 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu:16.04

# Pick up some TF dependencies
RUN apt-get update && apt-get install -y --allow-downgrades --no-install-recommends \
--allow-change-held-packages --allow-unauthenticated \
build-essential libfreetype6-dev libpng12-dev \
libzmq3-dev pkg-config python python-dev \
rsync software-properties-common curl unzip wget grep sed vim iputils-ping net-tools gdb python2.7-dbg tzdata && \
apt-get clean && rm -rf /var/lib/apt/lists/*

RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -yq --no-install-recommends \
krb5-user libpam-krb5 && \
apt-get clean && rm -rf /var/lib/apt/lists/*

RUN wget https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \
rm get-pip.py

RUN echo "Install python related packages" && \
apt-get update && \
apt-get install -y --no-install-recommends gfortran \
# numerical/algebra packages
libblas-dev libatlas-dev liblapack-dev \
# font, image for matplotlib
libpng-dev libxft-dev \
# for tkinter
python-tk libxml2-dev libxslt-dev zlib1g-dev && \
apt-get clean && rm -rf /var/lib/apt/lists/*

RUN pip --no-cache-dir install Pillow h5py ipykernel jupyter matplotlib numpy pandas scipy sklearn && \
python -m ipykernel.kernelspec

# Install TensorFlow CPU version.
ENV TENSORFLOW_VERSION="1.13.1"
RUN pip --no-cache-dir install \
http://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-${TENSORFLOW_VERSION}-cp27-none-linux_x86_64.whl
RUN apt-get update && apt-get install -y --no-install-recommends git && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Install hadoop
ENV HADOOP_VERSION="3.1.2"
RUN wget http://mirrors.shu.edu.cn/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
RUN tar zxf hadoop-${HADOOP_VERSION}.tar.gz
RUN ln -s hadoop-${HADOOP_VERSION} hadoop-current
RUN rm hadoop-${HADOOP_VERSION}.tar.gz

ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
RUN echo "$LOG_TAG Install java8" && \
apt-get update && \
apt-get install -y --no-install-recommends openjdk-8-jdk && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Set the locale to fix bash warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)
RUN apt-get update && apt-get install -y --no-install-recommends locales && \
apt-get clean && rm -rf /var/lib/apt/lists/*
RUN locale-gen en_US.UTF-8

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04

# Pick up some TF dependencies
RUN apt-get update && apt-get install -y --allow-downgrades --no-install-recommends \
--allow-change-held-packages --allow-unauthenticated \
build-essential libfreetype6-dev libpng12-dev \
libzmq3-dev pkg-config python python-dev \
rsync software-properties-common curl unzip wget grep sed vim \
iputils-ping net-tools gdb python2.7-dbg tzdata \
cuda-command-line-tools-10-0 cuda-cublas-10-0 \
cuda-cufft-10-0 cuda-curand-10-0 cuda-cusolver-10-0 \
cuda-cusparse-10-0 libcudnn7=7.4.1.5-1+cuda10.0 && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Install TensorRT
RUN apt-get update && \
apt-get install -y --allow-unauthenticated --no-install-recommends \
nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 && \
apt-get update && \
apt-get install -y --no-install-recommends \
libnvinfer5=5.0.2-1+cuda10.0 && \
apt-get clean && rm -rf /var/lib/apt/lists/*


RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && \
apt-get install -yq --no-install-recommends krb5-user libpam-krb5 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

RUN wget https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \
rm get-pip.py

RUN echo "Install python related packages" && \
apt-get -y update && \
apt-get install -y --no-install-recommends gfortran \
# numerical/algebra packages
libblas-dev libatlas-dev liblapack-dev \
# font, image for matplotlib
libpng-dev libxft-dev \
# for tkinter
python-tk libxml2-dev libxslt-dev zlib1g-dev && \
apt-get clean && rm -rf /var/lib/apt/lists/*

RUN pip --no-cache-dir install Pillow h5py ipykernel jupyter matplotlib numpy pandas scipy sklearn && \
python -m ipykernel.kernelspec

# Install TensorFlow GPU version.
ENV TENSORFLOW_VERSION="1.13.1"
RUN pip --no-cache-dir install \
http://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-${TENSORFLOW_VERSION}-cp27-none-linux_x86_64.whl
RUN apt-get update && apt-get install -y --no-install-recommends git && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Install hadoop
ENV HADOOP_VERSION="3.1.2"
RUN wget http://mirrors.shu.edu.cn/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
RUN tar zxf hadoop-${HADOOP_VERSION}.tar.gz
RUN ln -s hadoop-${HADOOP_VERSION} hadoop-current
RUN rm hadoop-${HADOOP_VERSION}.tar.gz

ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
RUN echo "$LOG_TAG Install java8" && \
apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-8-jdk && \
rm -rf /var/lib/apt/lists/*

# Set the locale to fix bash warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)
RUN apt-get update && apt-get install -y --no-install-recommends locales && \
apt-get clean && rm -rf /var/lib/apt/lists/*
RUN locale-gen en_US.UTF-8

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ set -e

cd base/ubuntu-16.04

docker build . -f Dockerfile.cpu.tf_1.8.0 -t tf-1.8.0-cpu-base:0.0.1
docker build . -f Dockerfile.gpu.tf_1.8.0 -t tf-1.8.0-gpu-base:0.0.1
docker build . -f Dockerfile.cpu.tf_1.13.1 -t tf-1.13.1-cpu-base:0.0.1
docker build . -f Dockerfile.gpu.tf_1.13.1 -t tf-1.13.1-gpu-base:0.0.1

echo "Finished building base images"

cd ../../with-cifar10-models/ubuntu-16.04

docker build . -f Dockerfile.cpu.tf_1.8.0 -t tf-1.8.0-cpu:0.0.1
docker build . -f Dockerfile.gpu.tf_1.8.0 -t tf-1.8.0-gpu:0.0.1
docker build . -f Dockerfile.cpu.tf_1.13.1 -t tf-1.13.1-cpu:0.0.1
docker build . -f Dockerfile.gpu.tf_1.13.1 -t tf-1.13.1-gpu:0.0.1
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM tf-1.8.0-cpu-base:0.0.1
FROM tf-1.13.1-cpu-base:0.0.1

# Include models
RUN mkdir /test
ADD cifar10_estimator_tf_1.8.0 /test/cifar10_estimator
ADD cifar10_estimator_tf_1.13.1 /test/cifar10_estimator
RUN chown -R nobody /test
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM tf-1.8.0-gpu-base:0.0.1
FROM tf-1.13.1-gpu-base:0.0.1

# Include models
RUN mkdir /test
ADD cifar10_estimator_tf_1.8.0 /test/cifar10_estimator
ADD cifar10_estimator_tf_1.13.1 /test/cifar10_estimator
RUN chown -R nobody /test
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def make_batch(self, batch_size):

# Parse records.
dataset = dataset.map(
self.parser)
self.parser, num_parallel_calls=batch_size)

# Potentially shuffle records.
if self.subset == 'train':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def _experiment_fn(run_config, hparams):

train_steps = hparams.train_steps
eval_steps = num_eval_examples // hparams.eval_batch_size

classifier = tf.estimator.Estimator(
model_fn=get_model_fn(num_gpus, variable_strategy,
run_config.num_worker_replicas or 1),
Expand Down Expand Up @@ -483,7 +483,7 @@ def main(job_dir, data_dir, num_gpus, variable_strategy,
type=str,
default=None,
help="""\
If not set, the data format best for the training device is used.
If not set, the data format best for the training device is used.
Allowed values: channels_first (NCHW) channels_last (NHWC).\
""")
parser.add_argument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import collections
import six

Expand All @@ -29,7 +28,7 @@


# TODO(b/64848083) Remove once uid bug is fixed
class RunConfig(tf.contrib.learn.RunConfig):
class RunConfig(tf.contrib.learn.RunConfig):
def uid(self, whitelist=None):
"""Generates a 'Unique Identifier' based on all internal fields.
Caller should use the uid string to check `RunConfig` instance integrity
Expand Down Expand Up @@ -60,7 +59,7 @@ def uid(self, whitelist=None):
key=lambda t: t[0])
)
return ', '.join(
'%s=%r' % (k, v) for (k, v) in six.iteritems(ordered_state))
'%s=%r' % (k, v) for (k, v) in six.iteritems(ordered_state))


class ExamplesPerSecondHook(session_run_hook.SessionRunHook):
Expand Down
Loading

0 comments on commit b5dcf64

Please sign in to comment.