Merge pull request espnet#4097 from Johnson-Lsx/dccrn

Add DCCRN separator
chintu619 · Mar 3, 2022 · a04a98c · a04a98c
2 parents b274c4e + f587f4d
commit a04a98c
Show file tree

Hide file tree

Showing 8 changed files with 1,097 additions and 93 deletions.
diff --git a/README.md b/README.md
diff --git a/egs2/dns_ins20/enh1/README.md b/egs2/dns_ins20/enh1/README.md
@@ -14,9 +14,33 @@
  - config: ./conf/tuning/train_enh_blstm_tf.yaml
  - Pretrained model: https://zenodo.org/record/4923697
 
-|dataset|STOI|SAR|SDR|SIR|
-|---|---|---|---|---|
-|enhanced_cv_synthetic|0.95|18.63|18.63|0.00|
-|enhanced_tt_synthetic_no_reverb|0.92|10.92|10.92|0.00|
-|enhanced_tt_synthetic_with_reverb|0.85|9.31|9.31|0.00|
+| dataset                           | STOI | SAR   | SDR   | SIR  |
+| --------------------------------- | ---- | ----- | ----- | ---- |
+| enhanced_cv_synthetic             | 0.95 | 18.63 | 18.63 | 0.00 |
+| enhanced_tt_synthetic_no_reverb   | 0.92 | 10.92 | 10.92 | 0.00 |
+| enhanced_tt_synthetic_with_reverb | 0.85 | 9.31  | 9.31  | 0.00 |
 
+<!-- Generated by ./scripts/utils/show_enh_score.sh -->
+# RESULTS
+## Environments
+- date: `Thu Feb 10 23:11:40 CST 2022`
+- python version: `3.8.12 (default, Oct 12 2021, 13:49:34)  [GCC 7.5.0]`
+- espnet version: `espnet 0.10.5a1`
+- pytorch version: `pytorch 1.9.1`
+- Git hash: `6f66283b9eed7b0d5e5643feb18d8f60118a4afc`
+  - Commit date: `Mon Dec 13 15:30:29 2021 +0800`
+
+
+## enh_train_enh_dccrn_raw
+
+- config: ./conf/tuning/train_enh_dccrn.yaml
+- download_model: https://huggingface.co/Johnson-Lsx/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw
+
+| dataset                           | PESQ | STOI | SAR   | SDR   | SIR  | SI_SNR |
+| --------------------------------- | ---- | ---- | ----- | ----- | ---- | ------ |
+| enhanced_cv_synthetic             | 3.72 | 0.98 | 24.69 | 24.69 | 0.00 | 24.22  |
+| enhanced_tt_synthetic_no_reverb   | 3.29 | 0.96 | 17.69 | 17.69 | 0.00 | 17.50  |
+| enhanced_tt_synthetic_with_reverb | 2.54 | 0.81 | 10.45 | 10.45 | 0.00 | 9.72   |
+
+Note: Here, the model is only trained on data without reverberation.
+Note: Here, the PESQ score is calculated based on https://github.com/vBaiCai/python-pesq.
diff --git a/egs2/dns_ins20/enh1/conf/tuning/train_enh_dccrn.yaml b/egs2/dns_ins20/enh1/conf/tuning/train_enh_dccrn.yaml
@@ -0,0 +1,53 @@
+optim: adam
+init: null   # do not set init method here because DCCRN has its own initialization
+max_epoch: 100
+batch_type: folded
+batch_size: 32
+iterator_type: chunk
+chunk_length: 64000
+num_workers: 4
+optim_conf:
+    lr: 1.0e-03
+    eps: 1.0e-08
+    weight_decay: 1.0e-7
+patience: 10
+val_scheduler_criterion:
+- valid
+- loss
+best_model_criterion:
+-   - valid
+    - si_snr
+    - max
+-   - valid
+    - loss
+    - min
+keep_nbest_models: 1
+scheduler: reducelronplateau
+scheduler_conf:
+    mode: min
+    factor: 0.7
+    patience: 1
+model_conf:
+    loss_type: si_snr
+encoder: stft
+encoder_conf:
+    n_fft: 512
+    win_length: 400
+    hop_length: 100
+decoder: stft
+decoder_conf:
+    n_fft: 512
+    win_length: 400
+    hop_length: 100
+separator: dccrn
+
+criterions: 
+  # The first criterion
+  - name: si_snr 
+    conf:
+      eps: 1.0e-7
+    # the wrapper for the current criterion
+    # for single-talker case, we simplely use fixed_order wrapper
+    wrapper: fixed_order
+    wrapper_conf:
+      weight: 1.0