From 27323492155a35c6f5399c3d4031fd6de148187f Mon Sep 17 00:00:00 2001
From: Yifan Yang <yifanyeung@qq.com>
Date: Tue, 17 Oct 2023 19:15:55 +0800
Subject: [PATCH] Finish

---
 egs/gigaspeech/ASR/zipformer/export-onnx.py   |  6 +++---
 egs/gigaspeech/ASR/zipformer/export.py        | 14 +++++---------
 .../ASR/zipformer/streaming_decode.py         | 19 +++++++++++--------
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/egs/gigaspeech/ASR/zipformer/export-onnx.py b/egs/gigaspeech/ASR/zipformer/export-onnx.py
index 3682f0b625..0f78cfe5b1 100755
--- a/egs/gigaspeech/ASR/zipformer/export-onnx.py
+++ b/egs/gigaspeech/ASR/zipformer/export-onnx.py
@@ -7,14 +7,14 @@
 This script exports a transducer model from PyTorch to ONNX.
 
 We use the pre-trained model from
-https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
+https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
 as an example to show how to use this file.
 
 1. Download the pre-trained model
 
-cd egs/librispeech/ASR
+cd egs/gigaspeech/ASR
 
-repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
+repo_url=https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 
diff --git a/egs/gigaspeech/ASR/zipformer/export.py b/egs/gigaspeech/ASR/zipformer/export.py
index 2b8d1aaf36..e45c96b573 100755
--- a/egs/gigaspeech/ASR/zipformer/export.py
+++ b/egs/gigaspeech/ASR/zipformer/export.py
@@ -24,7 +24,7 @@
 
 Usage:
 
-Note: This is a example for librispeech dataset, if you are using different
+Note: This is a example for gigaspeech dataset, if you are using different
 dataset, you should change the argument values according to your dataset.
 
 (1) Export to torchscript model using torch.jit.script()
@@ -96,7 +96,7 @@
     cd /path/to/exp_dir
     ln -s pretrained.pt epoch-9999.pt
 
-    cd /path/to/egs/librispeech/ASR
+    cd /path/to/egs/gigaspeech/ASR
     ./zipformer/decode.py \
         --exp-dir ./zipformer/exp \
         --epoch 9999 \
@@ -112,7 +112,7 @@
     cd /path/to/exp_dir
     ln -s pretrained.pt epoch-9999.pt
 
-    cd /path/to/egs/librispeech/ASR
+    cd /path/to/egs/gigaspeech/ASR
 
     # simulated streaming decoding
     ./zipformer/decode.py \
@@ -144,17 +144,13 @@
 provided one for you. You can get it at
 
 - non-streaming model:
-https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
-
-- streaming model:
-https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
+https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
 
 with the following commands:
 
     sudo apt-get install git-lfs
     git lfs install
-    git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
-    git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
+    git clone https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
     # You will find the pre-trained models in exp dir
 """
 
diff --git a/egs/gigaspeech/ASR/zipformer/streaming_decode.py b/egs/gigaspeech/ASR/zipformer/streaming_decode.py
index 904caf8af1..1b81c8511c 100755
--- a/egs/gigaspeech/ASR/zipformer/streaming_decode.py
+++ b/egs/gigaspeech/ASR/zipformer/streaming_decode.py
@@ -40,7 +40,7 @@
 import numpy as np
 import sentencepiece as spm
 import torch
-from asr_datamodule import LibriSpeechAsrDataModule
+from asr_datamodule import GigaSpeechAsrDataModule
 from decode_stream import DecodeStream
 from kaldifeat import Fbank, FbankOptions
 from lhotse import CutSet
@@ -682,7 +682,7 @@ def save_results(
 @torch.no_grad()
 def main():
     parser = get_parser()
-    LibriSpeechAsrDataModule.add_arguments(parser)
+    GigaSpeechAsrDataModule.add_arguments(parser)
     args = parser.parse_args()
     args.exp_dir = Path(args.exp_dir)
 
@@ -823,15 +823,18 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    librispeech = LibriSpeechAsrDataModule(args)
+    gigaspeech = GigaSpeechAsrDataModule(args)
 
-    test_clean_cuts = librispeech.test_clean_cuts()
-    test_other_cuts = librispeech.test_other_cuts()
+    dev_cuts = gigaspeech.dev_cuts()
+    test_cuts = gigaspeech.test_cuts()
 
-    test_sets = ["test-clean", "test-other"]
-    test_cuts = [test_clean_cuts, test_other_cuts]
+    dev_dl = gigaspeech.test_dataloaders(dev_cuts)
+    test_dl = gigaspeech.test_dataloaders(test_cuts)
 
-    for test_set, test_cut in zip(test_sets, test_cuts):
+    test_sets = ["dev", "test"]
+    test_dls = [dev_dl, test_dl]
+
+    for test_set, test_dl in zip(test_sets, test_dls):
         results_dict = decode_dataset(
             cuts=test_cut,
             params=params,