Update TensorFlow to 1.14.0 (#787)

This commit updates a lot of deprecated imports to tf.compat.v1 imports and also updates some other deprecated methods. It also brings support for python 3.7. In base.py, yapf (or the underlying lib2to3 parser) doesn't seem to accept two * expression in a function call. This commit combines them and passes a single * expression. * Update TensorFlow to 1.14 * Update code for yapf compatibility * Update TF version for intel artifact See: google/yapf#532 for reps.py
rlworkgroup · Jul 26, 2019 · 72e3cc3 · 72e3cc3
1 parent 64ded53
commit 72e3cc3
Show file tree

Hide file tree

Showing 135 changed files with 964 additions and 883 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
     -   id: pylint
         name: pylint
         entry: pylint
-        args: ['-j 0', '--rcfile=setup.cfg']
+        args: ['-j 2', '--rcfile=setup.cfg']
         stages: [commit, push]
         language: system
         files: \.py$

diff --git a/docs/user/implement_algo_advanced.rst b/docs/user/implement_algo_advanced.rst
@@ -92,7 +92,7 @@ extra diagnostic information as well as supporting recurrent policies):
             'action',
             batch_dims=1,
         )
-        advantage_var = tf.placeholder('advantage')
+        advantage_var = tf.compat.v1.placeholder('advantage')
         dist = self.policy.distribution
         old_dist_info_vars = {
             k: TT.matrix('old_%s' % k)

diff --git a/docs/user/implement_algo_basic.rst b/docs/user/implement_algo_basic.rst
@@ -207,7 +207,7 @@ First, we construct symbolic variables for the input data:
         name='actions',
         batch_dims=1
     )
-    returns_var = tf.placeholder(name='returns')
+    returns_var = tf.compat.v1.placeholder(name='returns')
 
 Note that we can transform the policy gradient formula as
 

diff --git a/examples/jupyter/custom_env.ipynb b/examples/jupyter/custom_env.ipynb
@@ -452,7 +452,7 @@
         "sess = tf.InteractiveSession()\n",
         "\n",
         "# no need to initialize\n",
-        "sess.run(tf.global_variables_initializer())\n"
+        "sess.run(tf.compat.v1.global_variables_initializer())\n"
       ],
       "execution_count": 0,
       "outputs": []

diff --git a/examples/jupyter/trpo_gym_tf_cartpole.ipynb b/examples/jupyter/trpo_gym_tf_cartpole.ipynb
@@ -350,7 +350,7 @@
       "cell_type": "code",
       "source": [
         "# initialize\n",
-        "sess.run(tf.global_variables_initializer())"
+        "sess.run(tf.compat.v1.global_variables_initializer())"
       ],
       "execution_count": 0,
       "outputs": []

diff --git a/examples/sim_policy.py b/examples/sim_policy.py
@@ -22,9 +22,9 @@
 
     # If the snapshot file use tensorflow, do:
     # import tensorflow as tf
-    # with tf.Session():
+    # with tf.compat.v1.Session():
     #     [rest of the code]
-    with tf.Session() as sess:
+    with tf.compat.v1.Session() as sess:
         data = joblib.load(args.file)
         policy = data['algo'].policy
         env = data['env']

diff --git a/setup.py b/setup.py
@@ -10,6 +10,8 @@
         'Please install numpy==1.14.5 and try again. See '
         'https://github.com/rlworkgroup/garage/issues/800 for more info.')
 
+TF_VERSION = '<1.15,>=1.14.0'
+
 # Required dependencies
 required = [
     # Please keep alphabetized
@@ -38,8 +40,8 @@
     'torch==1.1.0',
     'scikit-image',
     'scipy',
-    'tensorflow<1.13,>=1.12.0',
-    'tensorflow-probability<0.6.0,>=0.5.0',  # for tensorflow 1.12
+    'tensorflow' + TF_VERSION,
+    'tensorflow-probability<0.8.0,>=0.7.0',  # for tensorflow 1.12
     'torchvision==0.3.0'
 ]
 
@@ -48,7 +50,7 @@
 extras['all'] = list(set(sum(extras.values(), [])))
 
 # Intel dependencies not included in all
-extras['intel'] = ['intel-tensorflow<1.13,>=1.12.0']
+extras['intel'] = ['intel-tensorflow' + TF_VERSION]
 
 # Development dependencies (*not* included in "all")
 extras['dev'] = [

diff --git a/src/garage/experiment/deterministic.py b/src/garage/experiment/deterministic.py
@@ -21,7 +21,7 @@ def set_seed(seed):
     np.random.seed(seed)
     if 'tensorflow' in sys.modules:
         import tensorflow as tf
-        tf.set_random_seed(seed)
+        tf.compat.v1.set_random_seed(seed)
 
 
 def get_seed():

diff --git a/src/garage/experiment/local_tf_runner.py b/src/garage/experiment/local_tf_runner.py
@@ -74,7 +74,7 @@ def __init__(self, snapshot_config=None, sess=None, max_cpus=1):
         if max_cpus > 1:
             from garage.sampler import singleton_pool
             singleton_pool.initialize(max_cpus)
-        self.sess = sess or tf.Session()
+        self.sess = sess or tf.compat.v1.Session()
         self.sess_entered = False
         self.has_setup = False
         self.plot = False
@@ -89,14 +89,15 @@ def __enter__(self):
             This local runner.
 
         """
-        if tf.get_default_session() is not self.sess:
+        if tf.compat.v1.get_default_session() is not self.sess:
             self.sess.__enter__()
             self.sess_entered = True
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Leave session."""
-        if tf.get_default_session() is self.sess and self.sess_entered:
+        if tf.compat.v1.get_default_session(
+        ) is self.sess and self.sess_entered:
             self.sess.__exit__(exc_type, exc_val, exc_tb)
             self.sess_entered = False
 
@@ -150,12 +151,12 @@ def initialize_tf_vars(self):
         """Initialize all uninitialized variables in session."""
         with tf.name_scope('initialize_tf_vars'):
             uninited_set = [
-                e.decode()
-                for e in self.sess.run(tf.report_uninitialized_variables())
+                e.decode() for e in self.sess.run(
+                    tf.compat.v1.report_uninitialized_variables())
             ]
             self.sess.run(
-                tf.variables_initializer([
-                    v for v in tf.global_variables()
+                tf.compat.v1.variables_initializer([
+                    v for v in tf.compat.v1.global_variables()
                     if v.name.split(':')[0] in uninited_set
                 ]))
 

diff --git a/src/garage/tf/algos/ddpg.py b/src/garage/tf/algos/ddpg.py
@@ -75,8 +75,8 @@ def __init__(self,
                  discount=0.99,
                  policy_weight_decay=0,
                  qf_weight_decay=0,
-                 policy_optimizer=tf.train.AdamOptimizer,
-                 qf_optimizer=tf.train.AdamOptimizer,
+                 policy_optimizer=tf.compat.v1.train.AdamOptimizer,
+                 qf_optimizer=tf.compat.v1.train.AdamOptimizer,
                  clip_pos_returns=False,
                  clip_return=np.inf,
                  max_action=None,
@@ -157,12 +157,13 @@ def init_opt(self):
                         flat_dim_with_keys(['observation', 'desired_goal'])
                 else:
                     obs_dim = self.env_spec.observation_space.flat_dim
-                y = tf.placeholder(tf.float32, shape=(None, 1), name='input_y')
-                obs = tf.placeholder(
+                y = tf.compat.v1.placeholder(
+                    tf.float32, shape=(None, 1), name='input_y')
+                obs = tf.compat.v1.placeholder(
                     tf.float32,
                     shape=(None, obs_dim),
                     name='input_observation')
-                actions = tf.placeholder(
+                actions = tf.compat.v1.placeholder(
                     tf.float32,
                     shape=(None, self.env_spec.action_space.flat_dim),
                     name='input_action')
@@ -189,7 +190,8 @@ def init_opt(self):
             # Set up qf training function
             qval = self.qf.get_qval_sym(obs, actions, name='q_value')
             with tf.name_scope('qval_loss'):
-                qval_loss = tf.reduce_mean(tf.squared_difference(y, qval))
+                qval_loss = tf.reduce_mean(
+                    tf.compat.v1.squared_difference(y, qval))
                 if self.qf_weight_decay > 0.:
                     qf_reg = tc.layers.apply_regularization(
                         tc.layers.l2_regularizer(self.qf_weight_decay),

diff --git a/src/garage/tf/algos/dqn.py b/src/garage/tf/algos/dqn.py
@@ -61,7 +61,7 @@ def __init__(self,
                  n_train_steps=50,
                  max_path_length=None,
                  qf_lr=0.001,
-                 qf_optimizer=tf.train.AdamOptimizer,
+                 qf_optimizer=tf.compat.v1.train.AdamOptimizer,
                  discount=1.0,
                  target_network_update_freq=5,
                  grad_norm_clipping=None,
@@ -112,9 +112,11 @@ def init_opt(self):
 
         # build q networks
         with tf.name_scope(self.name, 'DQN'):
-            action_t_ph = tf.placeholder(tf.int32, None, name='action')
-            reward_t_ph = tf.placeholder(tf.float32, None, name='reward')
-            done_t_ph = tf.placeholder(tf.float32, None, name='done')
+            action_t_ph = tf.compat.v1.placeholder(
+                tf.int32, None, name='action')
+            reward_t_ph = tf.compat.v1.placeholder(
+                tf.float32, None, name='reward')
+            done_t_ph = tf.compat.v1.placeholder(tf.float32, None, name='done')
 
             with tf.name_scope('update_ops'):
                 target_update_op = tensor_utils.get_target_ops(
@@ -152,8 +154,8 @@ def init_opt(self):
                 target_q_values = (reward_t_ph + self.discount * q_best_masked)
 
                 # td_error = q_selected - tf.stop_gradient(target_q_values)
-                loss = tf.losses.huber_loss(q_selected,
-                                            tf.stop_gradient(target_q_values))
+                loss = tf.compat.v1.losses.huber_loss(
+                    q_selected, tf.stop_gradient(target_q_values))
                 loss = tf.reduce_mean(loss)
 
             with tf.name_scope('optimize_ops'):

diff --git a/src/garage/tf/algos/npo.py b/src/garage/tf/algos/npo.py
@@ -198,13 +198,13 @@ def _build_inputs(self):
                 name='action', batch_dims=2)
             reward_var = tensor_utils.new_tensor(
                 name='reward', ndim=2, dtype=tf.float32)
-            valid_var = tf.placeholder(
+            valid_var = tf.compat.v1.placeholder(
                 tf.float32, shape=[None, None], name='valid')
             baseline_var = tensor_utils.new_tensor(
                 name='baseline', ndim=2, dtype=tf.float32)
 
             policy_state_info_vars = {
-                k: tf.placeholder(
+                k: tf.compat.v1.placeholder(
                     tf.float32, shape=[None] * 2 + list(shape), name=k)
                 for k, shape in self.policy.state_info_specs
             }
@@ -214,7 +214,7 @@ def _build_inputs(self):
 
             # old policy distribution
             policy_old_dist_info_vars = {
-                k: tf.placeholder(
+                k: tf.compat.v1.placeholder(
                     tf.float32,
                     shape=[None] * 2 + list(shape),
                     name='policy_old_%s' % k)

diff --git a/src/garage/tf/algos/reps.py b/src/garage/tf/algos/reps.py
@@ -167,8 +167,7 @@ def eval_dual_grad(x):
             x0=x0,
             fprime=eval_dual_grad,
             bounds=bounds,
-            **self.dual_optimizer_args,
-        )
+            **self.dual_optimizer_args)
 
         logger.log('Computing dual after')
         self.param_eta, self.param_v = params_ast[0], params_ast[1:]
@@ -233,7 +232,7 @@ def _build_inputs(self):
                 ndim=0,
                 dtype=tf.float32)   # yapf: disable
             policy_state_info_vars = {
-                k: tf.placeholder(
+                k: tf.compat.v1.placeholder(
                     tf.float32,
                     shape=[None] * 2 + list(shape),
                     name=k)
@@ -245,7 +244,7 @@ def _build_inputs(self):
             ]   # yapf: disable
 
             policy_old_dist_info_vars = {
-                k: tf.placeholder(
+                k: tf.compat.v1.placeholder(
                     tf.float32,
                     shape=[None] * 2 + list(shape),
                     name='policy_old_%s' % k)
@@ -391,7 +390,7 @@ def _build_policy_loss(self, i):
             pol_mean_kl = tf.reduce_mean(kl)
 
         with tf.name_scope('dual'):
-            dual_loss = i.param_eta * self.epsilon + i.param_eta * tf.log(
+            dual_loss = i.param_eta * self.epsilon + i.param_eta * tf.math.log(
                 tf.reduce_mean(
                     tf.exp(delta_v / i.param_eta -
                            tf.reduce_max(delta_v / i.param_eta)))

diff --git a/src/garage/tf/core/cnn.py b/src/garage/tf/core/cnn.py
@@ -43,7 +43,7 @@ def cnn(input_var,
     Return:
         The output tf.Tensor of the CNN.
     """
-    with tf.variable_scope(name):
+    with tf.compat.v1.variable_scope(name):
         h = input_var
         for index, (filter_dim, num_filter, stride) in enumerate(
                 zip(filter_dims, num_filters, strides)):
@@ -110,7 +110,7 @@ def cnn_with_max_pooling(input_var,
     pool_strides = [1, pool_strides[0], pool_strides[1], 1]
     pool_shapes = [1, pool_shapes[0], pool_shapes[1], 1]
 
-    with tf.variable_scope(name):
+    with tf.compat.v1.variable_scope(name):
         h = input_var
         for index, (filter_dim, num_filter, stride) in enumerate(
                 zip(filter_dims, num_filters, strides)):
@@ -119,7 +119,7 @@ def cnn_with_max_pooling(input_var,
                       hidden_w_init, hidden_b_init, padding)
             if hidden_nonlinearity is not None:
                 h = hidden_nonlinearity(h)
-            h = tf.nn.max_pool(
+            h = tf.nn.max_pool2d(
                 h, ksize=pool_shapes, strides=pool_strides, padding=padding)
 
         # flatten
@@ -135,9 +135,11 @@ def _conv(input_var, name, filter_size, num_filter, strides, hidden_w_init,
     w_shape = [filter_size, filter_size, input_shape, num_filter]
     b_shape = [1, 1, 1, num_filter]
 
-    with tf.variable_scope(name):
-        weight = tf.get_variable('weight', w_shape, initializer=hidden_w_init)
-        bias = tf.get_variable('bias', b_shape, initializer=hidden_b_init)
+    with tf.compat.v1.variable_scope(name):
+        weight = tf.compat.v1.get_variable(
+            'weight', w_shape, initializer=hidden_w_init)
+        bias = tf.compat.v1.get_variable(
+            'bias', b_shape, initializer=hidden_b_init)
 
         return tf.nn.conv2d(
             input_var, weight, strides=strides, padding=padding) + bias
diff --git a/src/garage/tf/core/gru.py b/src/garage/tf/core/gru.py
@@ -34,12 +34,12 @@ def gru(name,
         hidden (tf.Tensor): Step hidden state.
         hidden_init_var (tf.Tensor): Initial hidden state.
     """
-    with tf.variable_scope(name):
+    with tf.compat.v1.variable_scope(name):
         hidden_dim = gru_cell.units
         output, [hidden] = gru_cell(step_input_var, states=[step_hidden_var])
         output = output_nonlinearity_layer(output)
 
-        hidden_init_var = tf.get_variable(
+        hidden_init_var = tf.compat.v1.get_variable(
             name='initial_hidden',
             shape=(hidden_dim, ),
             initializer=hidden_state_init,