Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for tensorflow 1.12-1.15 and python 3.6-3.7 #85

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.wav

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
23 changes: 15 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## SEGAN: Speech Enhancement Generative Adversarial Network
## SEGAN: Speech Enhancement Generative Adversarial Network (Python3/Tensorflow1.15/Conda)

### Introduction

Expand All @@ -13,17 +13,18 @@ This model deals with raw speech waveforms on many noise conditions at different
**All the project is developed with TensorFlow**. There are two repositories that were good references on how GANs are defined and deployed:

* [improved-gan](https://github.com/openai/improved-gan): implementing improvements to train GANs in a more stable way
* [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow): implementation of the DCGAN in tensorflow
* [DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow): implementation of the DCGAN in tensorflow
* [segan (python2)](https://github.com/santi-pdp/segan): implementation of the DCGAN in tensorflow

### Dependencies

* Python 2.7
* TensorFlow 0.12
* Python 3.6-3.7
* TensorFlow 1.12-1.15

You can install the requirements either to your virtualenv or the system via pip with:
Create env with anaconda:

```
pip install -r requirements.txt
```bash
conda env create -f environment36.yml
```

### Data
Expand All @@ -44,10 +45,16 @@ python make_tfrecords.py --force-gen --cfg cfg/e2e_maker.cfg

Once you have the TFRecords file created in `data/segan.tfrecords` you can simply run the training process with:

```
```bash
./train_segan.sh
```

or run the training process in Windows with:

```bash
train_segan.bat
```

By default this will take all the available GPUs in your system, if any. Otherwise it will just take the CPU.

**NOTE:** If you want to specify a subset of GPUs to work on, you can do so with the `CUDA_VISIBLE_DEVICES="0, 1, <etc>"` flag in the python execution within the training script. In the case of having two GPUs they'll be identified as 0 and 1, so we could just take the first GPU with: `CUDA_VISIBLE_DEVICES="0"`.
Expand Down
2 changes: 1 addition & 1 deletion data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def pre_emph(x, coeff=0.95):
x0 = tf.reshape(x[0], [1,])
diff = x[1:] - coeff * x[:-1]
concat = tf.concat(0, [x0, diff])
concat = tf.concat([x0, diff],0)
return concat

def de_emph(y, coeff=0.95):
Expand Down
17 changes: 17 additions & 0 deletions environment36.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: segan
channels:
- conda-forge
- anaconda
- menpo
dependencies:
- python=3.6
- scipy=0.19.1
- cudatoolkit=9
- cudnn=7
- wget
- pip
- pip:
- cython
- numpy==1.16
- tensorflow_gpu==1.12.0
- toml==0.9.2
16 changes: 16 additions & 0 deletions environment37.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: segan37
channels:
- conda-forge
- anaconda
- menpo
dependencies:
- python=3.7
- cudatoolkit=10.0
- cudnn=7
- scipy=1.1
- wget
- pip
- pip:
- cython
- tensorflow_gpu==1.15
- toml==0.9.2
6 changes: 3 additions & 3 deletions generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def make_z(shape, mean=0., std=1., name='z'):
kwidth = 3
z = make_z([segan.batch_size, h_i.get_shape().as_list()[1],
segan.g_enc_depths[-1]])
h_i = tf.concat(2, [h_i, z])
h_i = tf.concat([h_i, z],2)
skip_out = True
skips = []
for block_idx, dilation in enumerate(segan.g_dilated_blocks):
Expand Down Expand Up @@ -188,7 +188,7 @@ def make_z(shape, mean=0., std=1., name='z'):
# random code is fused with intermediate representation
z = make_z([segan.batch_size, h_i.get_shape().as_list()[1],
segan.g_enc_depths[-1]])
h_i = tf.concat(2, [z, h_i])
h_i = tf.concat([z, h_i],2)

#SECOND DECODER (reverse order)
g_dec_depths = segan.g_enc_depths[:-1][::-1] + [1]
Expand Down Expand Up @@ -247,7 +247,7 @@ def make_z(shape, mean=0., std=1., name='z'):
if is_ref:
print('Fusing skip connection of '
'shape {}'.format(skip_.get_shape()))
h_i = tf.concat(2, [h_i, skip_])
h_i = tf.concat([h_i, skip_],2)

else:
if is_ref:
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def main(_):
print('test wave min:{} max:{}'.format(np.min(wave), np.max(wave)))
c_wave = se_model.clean(wave)
print('c wave min:{} max:{}'.format(np.min(c_wave), np.max(c_wave)))
wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16e3, c_wave)
wavfile.write(os.path.join(FLAGS.save_clean_path, wavname), 16000, c_wave)
print('Done cleaning {} and saved '
'to {}'.format(FLAGS.test_wav,
os.path.join(FLAGS.save_clean_path, wavname)))
Expand Down
2 changes: 1 addition & 1 deletion make_tfrecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def main(opts):
beg_enc_t = timeit.default_timer()
out_file = tf.python_io.TFRecordWriter(out_filepath)
# process the acoustic and textual data now
for dset_i, (dset, dset_desc) in enumerate(cfg_desc.iteritems()):
for dset_i, (dset, dset_desc) in enumerate(cfg_desc.items()):
print('-' * 50)
wav_dir = dset_desc['clean']
wav_files = [os.path.join(wav_dir, wav) for wav in
Expand Down
126 changes: 50 additions & 76 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,25 +120,28 @@ def __init__(self, sess, args, devices, infer=False, name='SEGAN'):
def build_model(self, config):
all_d_grads = []
all_g_grads = []
d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate)
g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate)
#d_opt = tf.train.AdamOptimizer(config.d_learning_rate,
# beta1=config.beta_1)
#g_opt = tf.train.AdamOptimizer(config.g_learning_rate,
# beta1=config.beta_1)

for idx, device in enumerate(self.devices):
with tf.device("/%s" % device):
with tf.name_scope("device_%s" % idx):
with variables_on_gpu0():
self.build_model_single_gpu(idx)
d_grads = d_opt.compute_gradients(self.d_losses[-1],
var_list=self.d_vars)
g_grads = g_opt.compute_gradients(self.g_losses[-1],
var_list=self.g_vars)
all_d_grads.append(d_grads)
all_g_grads.append(g_grads)
tf.get_variable_scope().reuse_variables()
#d_opt = tf.train.RMSPropOptimizer(config.d_learning_rate)
#g_opt = tf.train.RMSPropOptimizer(config.g_learning_rate)
d_opt = tf.train.AdamOptimizer(config.d_learning_rate,
beta1=config.beta_1)
g_opt = tf.train.AdamOptimizer(config.g_learning_rate,
beta1=config.beta_1)

with tf.variable_scope(tf.get_variable_scope()) as scope:
for idx, device in enumerate(self.devices):
with tf.device("/%s" % device):
with tf.name_scope("device_%s" % idx):
with variables_on_gpu0():
self.build_model_single_gpu(idx)

d_grads = d_opt.compute_gradients(self.d_losses[-1],
var_list=self.d_vars)
g_grads = g_opt.compute_gradients(self.g_losses[-1],
var_list=self.g_vars)
all_d_grads.append(d_grads)
all_g_grads.append(g_grads)

avg_d_grads = average_gradients(all_d_grads)
avg_g_grads = average_gradients(all_g_grads)
self.d_opt = d_opt.apply_gradients(avg_d_grads)
Expand Down Expand Up @@ -197,7 +200,7 @@ def build_model_single_gpu(self, gpu_idx):
# make a dummy copy of discriminator to have variables and then
# be able to set up the variable reuse for all other devices
# merge along channels and this would be a real batch
dummy_joint = tf.concat(2, [wavbatch, noisybatch])
dummy_joint = tf.concat(axis=2, values=[wavbatch, noisybatch])
dummy = discriminator(self, dummy_joint,
reuse=False)

Expand All @@ -207,8 +210,8 @@ def build_model_single_gpu(self, gpu_idx):
self.zs.append(z)

# add new dimension to merge with other pairs
D_rl_joint = tf.concat(2, [wavbatch, noisybatch])
D_fk_joint = tf.concat(2, [G, noisybatch])
D_rl_joint = tf.concat(axis=2, values=[wavbatch, noisybatch])
D_fk_joint = tf.concat(axis=2, values=[G, noisybatch])
# build rl discriminator
d_rl_logits = discriminator(self, D_rl_joint, reuse=True)
# build fk G discriminator
Expand Down Expand Up @@ -243,8 +246,7 @@ def build_model_single_gpu(self, gpu_idx):
d_loss = d_rl_loss + d_fk_loss

# Add the L1 loss to G
g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.sub(G,
wavbatch)))
g_l1_loss = self.l1_lambda * tf.reduce_mean(tf.abs(tf.subtract(G, wavbatch)))

g_loss = g_adv_loss + g_l1_loss

Expand Down Expand Up @@ -279,8 +281,9 @@ def get_vars(self):
self.d_vars_dict[var.name] = var
if var.name.startswith('g_'):
self.g_vars_dict[var.name] = var
self.d_vars = self.d_vars_dict.values()
self.g_vars = self.g_vars_dict.values()
self.d_vars = list(self.d_vars_dict.values())
self.g_vars = list(self.g_vars_dict.values())

for x in self.d_vars:
assert x not in self.g_vars
for x in self.g_vars:
Expand Down Expand Up @@ -325,7 +328,7 @@ def train(self, config, devices):
init = tf.global_variables_initializer()
except AttributeError:
# fall back to old implementation
init = tf.initialize_all_variables()
init = tf.global_variables_initializer()

print('Initializing variables...')
self.sess.run(init)
Expand Down Expand Up @@ -467,45 +470,19 @@ def train(self, config, devices):
swaves = sample_wav
sample_dif = sample_wav - sample_noisy
for m in range(min(20, canvas_w.shape[0])):
print('w{} max: {} min: {}'.format(m,
np.max(canvas_w[m]),
np.min(canvas_w[m])))
wavfile.write(os.path.join(save_path,
'sample_{}-'
'{}.wav'.format(counter, m)),
16e3,
de_emph(canvas_w[m],
self.preemph))
m_gtruth_path = os.path.join(save_path, 'gtruth_{}.'
'wav'.format(m))
if not os.path.exists(m_gtruth_path):
wavfile.write(os.path.join(save_path,
'gtruth_{}.'
'wav'.format(m)),
16e3,
de_emph(swaves[m],
self.preemph))
wavfile.write(os.path.join(save_path,
'noisy_{}.'
'wav'.format(m)),
16e3,
de_emph(sample_noisy[m],
self.preemph))
wavfile.write(os.path.join(save_path,
'dif_{}.wav'.format(m)),
16e3,
de_emph(sample_dif[m],
self.preemph))
np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'),
d_rl_losses)
np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'),
d_fk_losses)
np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'),
g_adv_losses)
np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'),
g_l1_losses)

if batch_idx >= num_batches:
print('w{} max: {} min: {}'.format(m, np.max(canvas_w[m]), np.min(canvas_w[m])))
wavfile.write(os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16000, canvas_w[m])
if not os.path.exists(os.path.join(save_path, 'gtruth_{}.wav'.format(m))):
wavfile.write(os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16000, swaves[m])
wavfile.write(os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16000, sample_noisy[m])
wavfile.write(os.path.join(save_path, 'dif_{}.wav'.format(m)), 16000, sample_dif[m])
np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'), d_rl_losses)
np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'), d_fk_losses)
#np.savetxt(os.path.join(save_path, 'd_nfk_losses.txt'), d_nfk_losses)
np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'), g_adv_losses)
np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'), g_l1_losses)

if batch_idx >= int(num_batches):
curr_epoch += 1
# re-set batch idx
batch_idx = 0
Expand Down Expand Up @@ -671,7 +648,7 @@ def build_model_single_gpu(self, gpu_idx):
self.g_losses = []

# Add the L1 loss to G
g_loss = tf.reduce_mean(tf.abs(tf.sub(G, wavbatch)))
g_loss = tf.reduce_mean(tf.abs(tf.subtract(G, wavbatch)))

self.g_losses.append(g_loss)

Expand Down Expand Up @@ -699,7 +676,7 @@ def train(self, config, devices):
init = tf.global_variables_initializer()
except AttributeError:
# fall back to old implementation
init = tf.initialize_all_variables()
init = tf.global_variables_initializer()

print('Initializing variables...')
self.sess.run(init)
Expand Down Expand Up @@ -785,27 +762,24 @@ def train(self, config, devices):
sample_dif = sample_wav - sample_noisy
for m in range(min(20, canvas_w.shape[0])):
print('w{} max: {} min: {}'.format(m, np.max(canvas_w[m]), np.min(canvas_w[m])))
wavfile.write(os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16e3, canvas_w[m])
wavfile.write(os.path.join(save_path, 'sample_{}-{}.wav'.format(counter, m)), 16000, canvas_w[m])
if not os.path.exists(os.path.join(save_path, 'gtruth_{}.wav'.format(m))):
wavfile.write(os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16e3, swaves[m])
wavfile.write(os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16e3, sample_noisy[m])
wavfile.write(os.path.join(save_path, 'dif_{}.wav'.format(m)), 16e3, sample_dif[m])
wavfile.write(os.path.join(save_path, 'gtruth_{}.wav'.format(m)), 16000, swaves[m])
wavfile.write(os.path.join(save_path, 'noisy_{}.wav'.format(m)), 16000, sample_noisy[m])
wavfile.write(os.path.join(save_path, 'dif_{}.wav'.format(m)), 16000, sample_dif[m])
np.savetxt(os.path.join(save_path, 'g_losses.txt'), g_losses)

if batch_idx >= num_batches:
if batch_idx >= int(num_batches):
curr_epoch += 1
# re-set batch idx
batch_idx = 0
if curr_epoch >= config.epoch:
# done training
print('Done training; epoch limit {} '
'reached.'.format(self.epoch))
print('Saving last model at iteration {}'.format(counter))
self.save(config.save_path, counter)
self.writer.add_summary(_g_sum, counter)
break
except tf.errors.OutOfRangeError:
print('[!] Reached queues limits in training loop')
finally:
coord.request_stop()
coord.join(threads)
coord.join(threads)
Loading