santi-pdp · fcakyon · Jan 9, 2021 · Jan 9, 2021 · Jan 9, 2021 · Jan 9, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+*.wav
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-## SEGAN: Speech Enhancement Generative Adversarial Network
+## SEGAN: Speech Enhancement Generative Adversarial Network (Python3/Tensorflow1.15/Conda)
 
 ### Introduction
 
@@ -13,17 +13,18 @@ This model deals with raw speech waveforms on many noise conditions at different
 **All the project is developed with TensorFlow**. There are two repositories that were good references on how GANs are defined and deployed:
 
 * [improved-gan](https://github.com/openai/improved-gan): implementing improvements to train GANs in a more stable way
-*  [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow): implementation of the DCGAN in tensorflow
+* [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow): implementation of the DCGAN in tensorflow
+* [segan (python2)](https://github.com/santi-pdp/segan): implementation of the DCGAN in tensorflow
 
 ### Dependencies
 
-* Python 2.7
-* TensorFlow 0.12
+* Python 3.6-3.7
+* TensorFlow 1.12-1.15
 
-You can install the requirements either to your virtualenv or the system via pip with:
+Create env with anaconda:
 
-```
-pip install -r requirements.txt
+```bash
+conda env create -f environment36.yml
 ```
 
 ### Data
@@ -44,10 +45,16 @@ python make_tfrecords.py --force-gen --cfg cfg/e2e_maker.cfg
 
 Once you have the TFRecords file created in `data/segan.tfrecords` you can simply run the training process with:
 
-```
+```bash
 ./train_segan.sh
 ```
 
+or run the training process in Windows with:
+
+```bash
+train_segan.bat
+```
+
 By default this will take all the available GPUs in your system, if any. Otherwise it will just take the CPU.
 
 **NOTE:** If you want to specify a subset of GPUs to work on, you can do so with the `CUDA_VISIBLE_DEVICES="0, 1, <etc>"` flag in the python execution within the training script. In the case of having two GPUs they'll be identified as 0 and 1, so we could just take the first GPU with: `CUDA_VISIBLE_DEVICES="0"`.

diff --git a/data_loader.py b/data_loader.py
@@ -7,7 +7,7 @@
 def pre_emph(x, coeff=0.95):
     x0 = tf.reshape(x[0], [1,])
     diff = x[1:] - coeff * x[:-1]
-    concat = tf.concat(0, [x0, diff])
+    concat = tf.concat([x0, diff],0)
     return concat
 
 def de_emph(y, coeff=0.95):

diff --git a/environment36.yml b/environment36.yml
@@ -0,0 +1,17 @@
+name: segan
+channels:
+  - conda-forge
+  - anaconda
+  - menpo
+dependencies:
+  - python=3.6
+  - scipy=0.19.1
+  - cudatoolkit=9
+  - cudnn=7
+  - wget
+  - pip
+  - pip:
+    - cython
+    - numpy==1.16
+    - tensorflow_gpu==1.12.0
+    - toml==0.9.2
diff --git a/environment37.yml b/environment37.yml
@@ -0,0 +1,16 @@
+name: segan37
+channels:
+  - conda-forge
+  - anaconda
+  - menpo
+dependencies:
+  - python=3.7
+  - cudatoolkit=10.0
+  - cudnn=7
+  - scipy=1.1
+  - wget
+  - pip
+  - pip:
+    - cython
+    - tensorflow_gpu==1.15
+    - toml==0.9.2
diff --git a/generator.py b/generator.py
@@ -50,7 +50,7 @@ def make_z(shape, mean=0., std=1., name='z'):
         kwidth = 3
         z = make_z([segan.batch_size, h_i.get_shape().as_list()[1],
                     segan.g_enc_depths[-1]])
-        h_i = tf.concat(2, [h_i, z])
+        h_i = tf.concat([h_i, z],2)
         skip_out = True
         skips = []
         for block_idx, dilation in enumerate(segan.g_dilated_blocks):
@@ -188,7 +188,7 @@ def make_z(shape, mean=0., std=1., name='z'):
                 # random code is fused with intermediate representation
                 z = make_z([segan.batch_size, h_i.get_shape().as_list()[1],
                             segan.g_enc_depths[-1]])
-                h_i = tf.concat(2, [z, h_i])
+                h_i = tf.concat([z, h_i],2)
 
             #SECOND DECODER (reverse order)
             g_dec_depths = segan.g_enc_depths[:-1][::-1] + [1]
@@ -247,7 +247,7 @@ def make_z(shape, mean=0., std=1., name='z'):
                     if is_ref:
                         print('Fusing skip connection of '
                               'shape {}'.format(skip_.get_shape()))
-                    h_i = tf.concat(2, [h_i, skip_])
+                    h_i = tf.concat([h_i, skip_],2)
 
                 else:
                     if is_ref:

diff --git a/main.py b/main.py
@@ -112,7 +112,7 @@ def main(_):
             print('test wave min:{}  max:{}'.format(np.min(wave), np.max(wave)))
             c_wave = se_model.clean(wave)
             print('c wave min:{}  max:{}'.format(np.min(c_wave), np.max(c_wave)))
-            wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16e3, c_wave)
+            wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16000, c_wave)
             print('Done cleaning {} and saved '
                   'to {}'.format(FLAGS.test_wav,
                                  os.path.join(FLAGS.save_clean_path, wavname)))

diff --git a/make_tfrecords.py b/make_tfrecords.py
@@ -89,7 +89,7 @@ def main(opts):
         beg_enc_t = timeit.default_timer()
         out_file = tf.python_io.TFRecordWriter(out_filepath)
         # process the acoustic and textual data now
-        for dset_i, (dset, dset_desc) in enumerate(cfg_desc.iteritems()):
+        for dset_i, (dset, dset_desc) in enumerate(cfg_desc.items()):
             print('-' * 50)
             wav_dir = dset_desc['clean']
             wav_files = [os.path.join(wav_dir, wav) for wav in

diff --git a/model.py b/model.py
@@ -120,25 +120,28 @@ def __init__(self, sess, args, devices, infer=False, name='SEGAN'):
     def build_model(self, config):
         all_d_grads = []
         all_g_grads = []
-        d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate)
-        g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate)
-        #d_opt = tf.train.AdamOptimizer(config.d_learning_rate,
-        #                               beta1=config.beta_1)
-        #g_opt = tf.train.AdamOptimizer(config.g_learning_rate,
-        #                               beta1=config.beta_1)
 
-        for idx, device in enumerate(self.devices):
-            with tf.device("/%s" % device):
-                with tf.name_scope("device_%s" % idx):
-                    with variables_on_gpu0():
-                        self.build_model_single_gpu(idx)
-                        d_grads = d_opt.compute_gradients(self.d_losses[-1],
-                                                          var_list=self.d_vars)
-                        g_grads = g_opt.compute_gradients(self.g_losses[-1],
-                                                          var_list=self.g_vars)
-                        all_d_grads.append(d_grads)
-                        all_g_grads.append(g_grads)
-                        tf.get_variable_scope().reuse_variables()
+        #d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate)
+        #g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate)
+        d_opt = tf.train.AdamOptimizer(config.d_learning_rate,
+                                       beta1=config.beta_1)
+        g_opt = tf.train.AdamOptimizer(config.g_learning_rate,
+                                       beta1=config.beta_1)
+
+        with tf.variable_scope(tf.get_variable_scope()) as scope:
+          for idx, device in enumerate(self.devices):
+              with tf.device("/%s" % device):
+                  with tf.name_scope("device_%s" % idx):
+                      with variables_on_gpu0():
+                          self.build_model_single_gpu(idx)
+
+                          d_grads = d_opt.compute_gradients(self.d_losses[-1],
+                                                            var_list=self.d_vars)
+                          g_grads = g_opt.compute_gradients(self.g_losses[-1],
+                                                            var_list=self.g_vars)
+                          all_d_grads.append(d_grads)
+                          all_g_grads.append(g_grads)
+
         avg_d_grads = average_gradients(all_d_grads)
         avg_g_grads = average_gradients(all_g_grads)
         self.d_opt = d_opt.apply_gradients(avg_d_grads)
@@ -197,7 +200,7 @@ def build_model_single_gpu(self, gpu_idx):
             # make a dummy copy of discriminator to have variables and then
             # be able to set up the variable reuse for all other devices
             # merge along channels and this would be a real batch
-            dummy_joint = tf.concat(2, [wavbatch, noisybatch])
+            dummy_joint = tf.concat(axis=2, values=[wavbatch, noisybatch])
             dummy = discriminator(self, dummy_joint,
                                   reuse=False)
 
@@ -207,8 +210,8 @@ def build_model_single_gpu(self, gpu_idx):
         self.zs.append(z)
 
         # add new dimension to merge with other pairs
-        D_rl_joint = tf.concat(2, [wavbatch, noisybatch])
-        D_fk_joint = tf.concat(2, [G, noisybatch])
+        D_rl_joint = tf.concat(axis=2, values=[wavbatch, noisybatch])
+        D_fk_joint = tf.concat(axis=2, values=[G, noisybatch])
         # build rl discriminator
         d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
         # build fk G discriminator
@@ -243,8 +246,7 @@ def build_model_single_gpu(self, gpu_idx):
         d_loss = d_rl_loss + d_fk_loss
 
         # Add the L1 loss to G
-        g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G,
-                                                                  wavbatch)))
+        g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.subtract(G, wavbatch)))
 
         g_loss = g_adv_loss + g_l1_loss
 
@@ -279,8 +281,9 @@ def get_vars(self):
                 self.d_vars_dict[var.name] = var
             if var.name.startswith('g_'):
                 self.g_vars_dict[var.name] = var
-        self.d_vars = self.d_vars_dict.values()
-        self.g_vars = self.g_vars_dict.values()
+        self.d_vars = list(self.d_vars_dict.values())
+        self.g_vars = list(self.g_vars_dict.values())
+
         for x in self.d_vars:
             assert x not in self.g_vars
         for x in self.g_vars:
@@ -325,7 +328,7 @@ def train(self, config, devices):
             init = tf.global_variables_initializer()
         except AttributeError:
             # fall back to old implementation
-            init = tf.initialize_all_variables()
+            init = tf.global_variables_initializer()
 
         print('Initializing variables...')
         self.sess.run(init)
@@ -467,45 +470,19 @@ def train(self, config, devices):
                     swaves = sample_wav
                     sample_dif = sample_wav - sample_noisy
                     for m in range(min(20, canvas_w.shape[0])):
-                        print('w{} max: {} min: {}'.format(m,
-                                                           np.max(canvas_w[m]),
-                                                           np.min(canvas_w[m])))
-                        wavfile.write(os.path.join(save_path,
-                                                   'sample_{}-'
-                                                   '{}.wav'.format(counter, m)),
-                                      16e3,
-                                      de_emph(canvas_w[m],
-                                              self.preemph))
-                        m_gtruth_path = os.path.join(save_path, 'gtruth_{}.'
-                                                                'wav'.format(m))
-                        if not os.path.exists(m_gtruth_path):
-                            wavfile.write(os.path.join(save_path,
-                                                       'gtruth_{}.'
-                                                       'wav'.format(m)),
-                                          16e3,
-                                          de_emph(swaves[m],
-                                                  self.preemph))
-                            wavfile.write(os.path.join(save_path,
-                                                       'noisy_{}.'
-                                                       'wav'.format(m)),
-                                          16e3,
-                                          de_emph(sample_noisy[m],
-                                                  self.preemph))
-                            wavfile.write(os.path.join(save_path,
-                                                       'dif_{}.wav'.format(m)),
-                                          16e3,
-                                          de_emph(sample_dif[m],
-                                                  self.preemph))
-                        np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'),
-                                   d_rl_losses)
-                        np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'),
-                                   d_fk_losses)
-                        np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'),
-                                   g_adv_losses)
-                        np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'),
-                                   g_l1_losses)
-
-                if batch_idx >= num_batches:
+                        print('w{} max: {} min: {}'.format(m, np.max(canvas_w[m]), np.min(canvas_w[m])))
+                        wavfile.write(os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16000, canvas_w[m])
+                        if not os.path.exists(os.path.join(save_path, 'gtruth_{}.wav'.format(m))):
+                            wavfile.write(os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16000, swaves[m])
+                            wavfile.write(os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16000, sample_noisy[m])
+                            wavfile.write(os.path.join(save_path, 'dif_{}.wav'.format(m)), 16000, sample_dif[m])
+                        np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'), d_rl_losses)
+                        np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'), d_fk_losses)
+                        #np.savetxt(os.path.join(save_path, 'd_nfk_losses.txt'), d_nfk_losses)
+                        np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'), g_adv_losses)
+                        np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'), g_l1_losses)
+
+                if batch_idx >= int(num_batches):
                     curr_epoch += 1
                     # re-set batch idx
                     batch_idx = 0
@@ -671,7 +648,7 @@ def build_model_single_gpu(self, gpu_idx):
             self.g_losses = []
 
         # Add the L1 loss to G
-        g_loss = tf.reduce_mean(tf.abs(tf.sub(G, wavbatch)))
+        g_loss = tf.reduce_mean(tf.abs(tf.subtract(G, wavbatch)))
 
         self.g_losses.append(g_loss)
 
@@ -699,7 +676,7 @@ def train(self, config, devices):
             init = tf.global_variables_initializer()
         except AttributeError:
             # fall back to old implementation
-            init = tf.initialize_all_variables()
+            init = tf.global_variables_initializer()
 
         print('Initializing variables...')
         self.sess.run(init)
@@ -785,27 +762,24 @@ def train(self, config, devices):
                     sample_dif = sample_wav - sample_noisy
                     for m in range(min(20, canvas_w.shape[0])):
                         print('w{} max: {} min: {}'.format(m, np.max(canvas_w[m]), np.min(canvas_w[m])))
-                        wavfile.write(os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16e3, canvas_w[m])
+                        wavfile.write(os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16000, canvas_w[m])
                         if not os.path.exists(os.path.join(save_path, 'gtruth_{}.wav'.format(m))):
-                            wavfile.write(os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16e3, swaves[m])
-                            wavfile.write(os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16e3, sample_noisy[m])
-                            wavfile.write(os.path.join(save_path, 'dif_{}.wav'.format(m)), 16e3, sample_dif[m])
+                            wavfile.write(os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16000, swaves[m])
+                            wavfile.write(os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16000, sample_noisy[m])
+                            wavfile.write(os.path.join(save_path, 'dif_{}.wav'.format(m)), 16000, sample_dif[m])
                         np.savetxt(os.path.join(save_path, 'g_losses.txt'), g_losses)
 
-                if batch_idx >= num_batches:
+                if batch_idx >= int(num_batches):
                     curr_epoch += 1
                     # re-set batch idx
                     batch_idx = 0
                 if curr_epoch >= config.epoch:
                     # done training
                     print('Done training; epoch limit {} '
                           'reached.'.format(self.epoch))
-                    print('Saving last model at iteration {}'.format(counter))
-                    self.save(config.save_path, counter)
-                    self.writer.add_summary(_g_sum, counter)
                     break
         except tf.errors.OutOfRangeError:
             print('[!] Reached queues limits in training loop')
         finally:
             coord.request_stop()
-        coord.join(threads)
+        coord.join(threads)