diff --git a/README.md b/README.md
index f08f907..3f6a624 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ This model deals with raw speech waveforms on many noise conditions at different
 ### Dependencies
 
 * Python 2.7
-* TensorFlow 0.12
+* TensorFlow 1.4.1
 
 You can install the requirements either to your virtualenv or the system via pip with:
 
diff --git a/data_loader.py b/data_loader.py
index 038625c..08f367a 100644
--- a/data_loader.py
+++ b/data_loader.py
@@ -7,7 +7,7 @@
 def pre_emph(x, coeff=0.95):
     x0 = tf.reshape(x[0], [1,])
     diff = x[1:] - coeff * x[:-1]
-    concat = tf.concat(0, [x0, diff])
+    concat = tf.concat([x0, diff], 0)
     return concat
 
 def de_emph(y, coeff=0.95):
diff --git a/generator.py b/generator.py
index b2cef4f..925d426 100644
--- a/generator.py
+++ b/generator.py
@@ -50,7 +50,7 @@ def make_z(shape, mean=0., std=1., name='z'):
         kwidth = 3
         z = make_z([segan.batch_size, h_i.get_shape().as_list()[1],
                     segan.g_enc_depths[-1]])
-        h_i = tf.concat(2, [h_i, z])
+        h_i = tf.concat([h_i, z], 2)
         skip_out = True
         skips = []
         for block_idx, dilation in enumerate(segan.g_dilated_blocks):
@@ -188,7 +188,7 @@ def make_z(shape, mean=0., std=1., name='z'):
                 # random code is fused with intermediate representation
                 z = make_z([segan.batch_size, h_i.get_shape().as_list()[1],
                             segan.g_enc_depths[-1]])
-                h_i = tf.concat(2, [z, h_i])
+                h_i = tf.concat([z, h_i], 2)
 
             #SECOND DECODER (reverse order)
             g_dec_depths = segan.g_enc_depths[:-1][::-1] + [1]
@@ -247,7 +247,7 @@ def make_z(shape, mean=0., std=1., name='z'):
                     if is_ref:
                         print('Fusing skip connection of '
                               'shape {}'.format(skip_.get_shape()))
-                    h_i = tf.concat(2, [h_i, skip_])
+                    h_i = tf.concat([h_i, skip_], 2)
 
                 else:
                     if is_ref:
diff --git a/main.py b/main.py
index d40db0f..e105eea 100644
--- a/main.py
+++ b/main.py
@@ -77,11 +77,12 @@ def main(_):
     config.allow_soft_placement=True
     udevices = []
     for device in devices:
-        if len(devices) > 1 and 'cpu' in device.name:
+        if len(devices) > 1 and 'CPU' in device.name:
             # Use cpu only when we dont have gpus
             continue
         print('Using device: ', device.name)
         udevices.append(device.name)
+    print("!!!!!!", udevices)
     # execute the session
     with tf.Session(config=config) as sess:
         if FLAGS.model == 'gan':
@@ -112,7 +113,7 @@ def main(_):
             print('test wave min:{}  max:{}'.format(np.min(wave), np.max(wave)))
             c_wave = se_model.clean(wave)
             print('c wave min:{}  max:{}'.format(np.min(c_wave), np.max(c_wave)))
-            wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16e3, c_wave)
+            wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16000, c_wave)
             print('Done cleaning {} and saved '
                   'to {}'.format(FLAGS.test_wav,
                                  os.path.join(FLAGS.save_clean_path, wavname)))
diff --git a/make_tfrecords.py b/make_tfrecords.py
index bc1f821..b0a8c01 100644
--- a/make_tfrecords.py
+++ b/make_tfrecords.py
@@ -89,7 +89,7 @@ def main(opts):
         beg_enc_t = timeit.default_timer()
         out_file = tf.python_io.TFRecordWriter(out_filepath)
         # process the acoustic and textual data now
-        for dset_i, (dset, dset_desc) in enumerate(cfg_desc.iteritems()):
+        for dset_i, (dset, dset_desc) in enumerate(cfg_desc.items()):
             print('-' * 50)
             wav_dir = dset_desc['clean']
             wav_files = [os.path.join(wav_dir, wav) for wav in
diff --git a/model.py b/model.py
index 9e2078c..fa9ec47 100644
--- a/model.py
+++ b/model.py
@@ -120,14 +120,15 @@ def __init__(self, sess, args, devices, infer=False, name='SEGAN'):
     def build_model(self, config):
         all_d_grads = []
         all_g_grads = []
-        d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate)
-        g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate)
-        #d_opt = tf.train.AdamOptimizer(config.d_learning_rate,
-        #                               beta1=config.beta_1)
-        #g_opt = tf.train.AdamOptimizer(config.g_learning_rate,
-        #                               beta1=config.beta_1)
-
-        for idx, device in enumerate(self.devices):
+        #d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate)
+        #g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate)
+        d_opt = tf.train.AdamOptimizer(config.d_learning_rate,
+                                       beta1=config.beta_1)
+        g_opt = tf.train.AdamOptimizer(config.g_learning_rate,
+                                       beta1=config.beta_1)
+
+        with tf.variable_scope(tf.get_variable_scope()) as scope:
+          for idx, device in enumerate(self.devices):
             with tf.device("/%s" % device):
                 with tf.name_scope("device_%s" % idx):
                     with variables_on_gpu0():
@@ -138,7 +139,7 @@ def build_model(self, config):
                                                           var_list=self.g_vars)
                         all_d_grads.append(d_grads)
                         all_g_grads.append(g_grads)
-                        tf.get_variable_scope().reuse_variables()
+
         avg_d_grads = average_gradients(all_d_grads)
         avg_g_grads = average_gradients(all_g_grads)
         self.d_opt = d_opt.apply_gradients(avg_d_grads)
@@ -197,7 +198,7 @@ def build_model_single_gpu(self, gpu_idx):
             # make a dummy copy of discriminator to have variables and then
             # be able to set up the variable reuse for all other devices
             # merge along channels and this would be a real batch
-            dummy_joint = tf.concat(2, [wavbatch, noisybatch])
+            dummy_joint = tf.concat([wavbatch, noisybatch], 2)
             dummy = discriminator(self, dummy_joint,
                                   reuse=False)
 
@@ -207,8 +208,8 @@ def build_model_single_gpu(self, gpu_idx):
         self.zs.append(z)
 
         # add new dimension to merge with other pairs
-        D_rl_joint = tf.concat(2, [wavbatch, noisybatch])
-        D_fk_joint = tf.concat(2, [G, noisybatch])
+        D_rl_joint = tf.concat([wavbatch, noisybatch], 2)
+        D_fk_joint = tf.concat([G, noisybatch], 2)
         # build rl discriminator
         d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
         # build fk G discriminator
@@ -243,7 +244,7 @@ def build_model_single_gpu(self, gpu_idx):
         d_loss = d_rl_loss + d_fk_loss
 
         # Add the L1 loss to G
-        g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G,
+        g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.subtract(G,
                                                                   wavbatch)))
 
         g_loss = g_adv_loss + g_l1_loss
diff --git a/ops.py b/ops.py
index a970985..be3a384 100644
--- a/ops.py
+++ b/ops.py
@@ -29,28 +29,28 @@ def scalar_summary(name, x):
     try:
         summ = tf.summary.scalar(name, x)
     except AttributeError:
-        summ = tf.scalar_summary(name, x)
+        summ = tf.summary.scalar(name, x)
     return summ
 
 def histogram_summary(name, x):
     try:
         summ = tf.summary.histogram(name, x)
     except AttributeError:
-        summ = tf.histogram_summary(name, x)
+        summ = tf.summary.histogram(name, x)
     return summ
 
 def tensor_summary(name, x):
     try:
         summ = tf.summary.tensor_summary(name, x)
     except AttributeError:
-        summ = tf.tensor_summary(name, x)
+        summ = tf.summary.tensor_summary(name, x)
     return summ
 
 def audio_summary(name, x, sampling_rate=16e3):
     try:
         summ = tf.summary.audio(name, x, sampling_rate)
     except AttributeError:
-        summ = tf.audio_summary(name, x, sampling_rate)
+        summ = tf.summary.audio(name, x, sampling_rate)
     return summ
 
 def minmax_normalize(x, x_min, x_max, o_min=-1., o_max=1.):
@@ -89,12 +89,12 @@ def highway(input_, size, layer_size=1, bias=-2, f=tf.nn.relu, name='hw'):
     where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
     """
     output = input_
-    for idx in xrange(layer_size):
+    for idx in range(layer_size):
         lin_scope = '{}_output_lin_{}'.format(name, idx)
-        output = f(tf.nn.rnn_cell._linear(output, size, 0, scope=lin_scope))
+        output = f(tf.contrib.rnn._linear(output, size, 0, scope=lin_scope))
         transform_scope = '{}_transform_lin_{}'.format(name, idx)
         transform_gate = tf.sigmoid(
-            tf.nn.rnn_cell._linear(input_, size, 0, scope=transform_scope) + bias)
+            tf.contrib.rnn._linear(input_, size, 0, scope=transform_scope) + bias)
         carry_gate = 1. - transform_gate
 
         output = transform_gate * output + carry_gate * input_
@@ -202,7 +202,7 @@ def residual_block(input_, dilation, kwidth, num_kernels=1,
         z = tf.nn.sigmoid(z_a)
         print('gate shape: ', z.get_shape())
         # element-wise apply the gate
-        gated_h = tf.mul(z, h)
+        gated_h = tf.multiply(z, h)
         print('gated h shape: ', gated_h.get_shape())
         #make res connection
         h_ = conv1d(gated_h, kwidth=1, num_kernels=1,
@@ -281,8 +281,8 @@ def deconv(x, output_shape, kwidth=5, dilation=2, init=None, uniform=False,
         except AttributeError:
             # support for versions of TF before 0.7.0
             # based on https://github.com/carpedm20/DCGAN-tensorflow
-            deconv = tf.nn.deconv2d(x2d, W, output_shape=o2d,
-                                    strides=[1, dilation, 1, 1])
+            deconv = tf.nn.conv2d_transpose(x2d, W, output_shape=o2d,
+                                            strides=[1, dilation, 1, 1])
         if bias_init is not None:
             b = tf.get_variable('b', [out_channels],
                                 initializer=tf.constant_initializer(0.))
@@ -339,7 +339,7 @@ def average_gradients(tower_grads):
             grads.append(expanded_g)
 
         # Build the tensor and average along tower dimension
-        grad = tf.concat(0, grads)
+        grad = tf.concat(grads, 0)
         grad = tf.reduce_mean(grad, 0)
 
         # The Variables are redundant because they are shared across towers
diff --git a/requirements.txt b/requirements.txt
index 8f06ea7..35d6ab0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 numpy==1.12.1
 scipy==0.18.1
-tensorflow_gpu==0.12.1
+tensorflow_gpu==1.4.1
 toml==0.9.2