diff --git a/README.md b/README.md index f08f907..3f6a624 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ This model deals with raw speech waveforms on many noise conditions at different ### Dependencies * Python 2.7 -* TensorFlow 0.12 +* TensorFlow 1.4.1 You can install the requirements either to your virtualenv or the system via pip with: diff --git a/data_loader.py b/data_loader.py index 038625c..08f367a 100644 --- a/data_loader.py +++ b/data_loader.py @@ -7,7 +7,7 @@ def pre_emph(x, coeff=0.95): x0 = tf.reshape(x[0], [1,]) diff = x[1:] - coeff * x[:-1] - concat = tf.concat(0, [x0, diff]) + concat = tf.concat([x0, diff], 0) return concat def de_emph(y, coeff=0.95): diff --git a/generator.py b/generator.py index b2cef4f..925d426 100644 --- a/generator.py +++ b/generator.py @@ -50,7 +50,7 @@ def make_z(shape, mean=0., std=1., name='z'): kwidth = 3 z = make_z([segan.batch_size, h_i.get_shape().as_list()[1], segan.g_enc_depths[-1]]) - h_i = tf.concat(2, [h_i, z]) + h_i = tf.concat([h_i, z], 2) skip_out = True skips = [] for block_idx, dilation in enumerate(segan.g_dilated_blocks): @@ -188,7 +188,7 @@ def make_z(shape, mean=0., std=1., name='z'): # random code is fused with intermediate representation z = make_z([segan.batch_size, h_i.get_shape().as_list()[1], segan.g_enc_depths[-1]]) - h_i = tf.concat(2, [z, h_i]) + h_i = tf.concat([z, h_i], 2) #SECOND DECODER (reverse order) g_dec_depths = segan.g_enc_depths[:-1][::-1] + [1] @@ -247,7 +247,7 @@ def make_z(shape, mean=0., std=1., name='z'): if is_ref: print('Fusing skip connection of ' 'shape {}'.format(skip_.get_shape())) - h_i = tf.concat(2, [h_i, skip_]) + h_i = tf.concat([h_i, skip_], 2) else: if is_ref: diff --git a/main.py b/main.py index d40db0f..e105eea 100644 --- a/main.py +++ b/main.py @@ -77,11 +77,12 @@ def main(_): config.allow_soft_placement=True udevices = [] for device in devices: - if len(devices) > 1 and 'cpu' in device.name: + if len(devices) > 1 and 'CPU' in device.name: # Use cpu only when we dont have gpus continue print('Using device: ', device.name) udevices.append(device.name) + print("!!!!!!", udevices) # execute the session with tf.Session(config=config) as sess: if FLAGS.model == 'gan': @@ -112,7 +113,7 @@ def main(_): print('test wave min:{} max:{}'.format(np.min(wave), np.max(wave))) c_wave = se_model.clean(wave) print('c wave min:{} max:{}'.format(np.min(c_wave), np.max(c_wave))) - wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16e3, c_wave) + wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16000, c_wave) print('Done cleaning {} and saved ' 'to {}'.format(FLAGS.test_wav, os.path.join(FLAGS.save_clean_path, wavname))) diff --git a/make_tfrecords.py b/make_tfrecords.py index bc1f821..b0a8c01 100644 --- a/make_tfrecords.py +++ b/make_tfrecords.py @@ -89,7 +89,7 @@ def main(opts): beg_enc_t = timeit.default_timer() out_file = tf.python_io.TFRecordWriter(out_filepath) # process the acoustic and textual data now - for dset_i, (dset, dset_desc) in enumerate(cfg_desc.iteritems()): + for dset_i, (dset, dset_desc) in enumerate(cfg_desc.items()): print('-' * 50) wav_dir = dset_desc['clean'] wav_files = [os.path.join(wav_dir, wav) for wav in diff --git a/model.py b/model.py index 9e2078c..fa9ec47 100644 --- a/model.py +++ b/model.py @@ -120,14 +120,15 @@ def __init__(self, sess, args, devices, infer=False, name='SEGAN'): def build_model(self, config): all_d_grads = [] all_g_grads = [] - d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate) - g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate) - #d_opt = tf.train.AdamOptimizer(config.d_learning_rate, - # beta1=config.beta_1) - #g_opt = tf.train.AdamOptimizer(config.g_learning_rate, - # beta1=config.beta_1) - - for idx, device in enumerate(self.devices): + #d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate) + #g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate) + d_opt = tf.train.AdamOptimizer(config.d_learning_rate, + beta1=config.beta_1) + g_opt = tf.train.AdamOptimizer(config.g_learning_rate, + beta1=config.beta_1) + + with tf.variable_scope(tf.get_variable_scope()) as scope: + for idx, device in enumerate(self.devices): with tf.device("/%s" % device): with tf.name_scope("device_%s" % idx): with variables_on_gpu0(): @@ -138,7 +139,7 @@ def build_model(self, config): var_list=self.g_vars) all_d_grads.append(d_grads) all_g_grads.append(g_grads) - tf.get_variable_scope().reuse_variables() + avg_d_grads = average_gradients(all_d_grads) avg_g_grads = average_gradients(all_g_grads) self.d_opt = d_opt.apply_gradients(avg_d_grads) @@ -197,7 +198,7 @@ def build_model_single_gpu(self, gpu_idx): # make a dummy copy of discriminator to have variables and then # be able to set up the variable reuse for all other devices # merge along channels and this would be a real batch - dummy_joint = tf.concat(2, [wavbatch, noisybatch]) + dummy_joint = tf.concat([wavbatch, noisybatch], 2) dummy = discriminator(self, dummy_joint, reuse=False) @@ -207,8 +208,8 @@ def build_model_single_gpu(self, gpu_idx): self.zs.append(z) # add new dimension to merge with other pairs - D_rl_joint = tf.concat(2, [wavbatch, noisybatch]) - D_fk_joint = tf.concat(2, [G, noisybatch]) + D_rl_joint = tf.concat([wavbatch, noisybatch], 2) + D_fk_joint = tf.concat([G, noisybatch], 2) # build rl discriminator d_rl_logits = discriminator(self, D_rl_joint, reuse=True) # build fk G discriminator @@ -243,7 +244,7 @@ def build_model_single_gpu(self, gpu_idx): d_loss = d_rl_loss + d_fk_loss # Add the L1 loss to G - g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G, + g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.subtract(G, wavbatch))) g_loss = g_adv_loss + g_l1_loss diff --git a/ops.py b/ops.py index a970985..be3a384 100644 --- a/ops.py +++ b/ops.py @@ -29,28 +29,28 @@ def scalar_summary(name, x): try: summ = tf.summary.scalar(name, x) except AttributeError: - summ = tf.scalar_summary(name, x) + summ = tf.summary.scalar(name, x) return summ def histogram_summary(name, x): try: summ = tf.summary.histogram(name, x) except AttributeError: - summ = tf.histogram_summary(name, x) + summ = tf.summary.histogram(name, x) return summ def tensor_summary(name, x): try: summ = tf.summary.tensor_summary(name, x) except AttributeError: - summ = tf.tensor_summary(name, x) + summ = tf.summary.tensor_summary(name, x) return summ def audio_summary(name, x, sampling_rate=16e3): try: summ = tf.summary.audio(name, x, sampling_rate) except AttributeError: - summ = tf.audio_summary(name, x, sampling_rate) + summ = tf.summary.audio(name, x, sampling_rate) return summ def minmax_normalize(x, x_min, x_max, o_min=-1., o_max=1.): @@ -89,12 +89,12 @@ def highway(input_, size, layer_size=1, bias=-2, f=tf.nn.relu, name='hw'): where g is nonlinearity, t is transform gate, and (1 - t) is carry gate. """ output = input_ - for idx in xrange(layer_size): + for idx in range(layer_size): lin_scope = '{}_output_lin_{}'.format(name, idx) - output = f(tf.nn.rnn_cell._linear(output, size, 0, scope=lin_scope)) + output = f(tf.contrib.rnn._linear(output, size, 0, scope=lin_scope)) transform_scope = '{}_transform_lin_{}'.format(name, idx) transform_gate = tf.sigmoid( - tf.nn.rnn_cell._linear(input_, size, 0, scope=transform_scope) + bias) + tf.contrib.rnn._linear(input_, size, 0, scope=transform_scope) + bias) carry_gate = 1. - transform_gate output = transform_gate * output + carry_gate * input_ @@ -202,7 +202,7 @@ def residual_block(input_, dilation, kwidth, num_kernels=1, z = tf.nn.sigmoid(z_a) print('gate shape: ', z.get_shape()) # element-wise apply the gate - gated_h = tf.mul(z, h) + gated_h = tf.multiply(z, h) print('gated h shape: ', gated_h.get_shape()) #make res connection h_ = conv1d(gated_h, kwidth=1, num_kernels=1, @@ -281,8 +281,8 @@ def deconv(x, output_shape, kwidth=5, dilation=2, init=None, uniform=False, except AttributeError: # support for versions of TF before 0.7.0 # based on https://github.com/carpedm20/DCGAN-tensorflow - deconv = tf.nn.deconv2d(x2d, W, output_shape=o2d, - strides=[1, dilation, 1, 1]) + deconv = tf.nn.conv2d_transpose(x2d, W, output_shape=o2d, + strides=[1, dilation, 1, 1]) if bias_init is not None: b = tf.get_variable('b', [out_channels], initializer=tf.constant_initializer(0.)) @@ -339,7 +339,7 @@ def average_gradients(tower_grads): grads.append(expanded_g) # Build the tensor and average along tower dimension - grad = tf.concat(0, grads) + grad = tf.concat(grads, 0) grad = tf.reduce_mean(grad, 0) # The Variables are redundant because they are shared across towers diff --git a/requirements.txt b/requirements.txt index 8f06ea7..35d6ab0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ numpy==1.12.1 scipy==0.18.1 -tensorflow_gpu==0.12.1 +tensorflow_gpu==1.4.1 toml==0.9.2