Skip to content

Commit

Permalink
Using hamming window for Paraformer frontend.
Browse files Browse the repository at this point in the history
  • Loading branch information
Huang Lekai committed Jun 3, 2024
1 parent e197305 commit aa3eba8
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 3 deletions.
3 changes: 2 additions & 1 deletion wenet/cli/paraformer_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def transcribe(self, audio_file: str, tokens_info: bool = False) -> dict:
frame_length=25,
frame_shift=10,
energy_floor=0.0,
sample_frequency=self.resample_rate)
sample_frequency=self.resample_rate,
window_type="hamming")
feats = feats.unsqueeze(0)
feats_lens = torch.tensor([feats.size(1)],
dtype=torch.int64,
Expand Down
6 changes: 4 additions & 2 deletions wenet/dataset/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ def compute_fbank(sample,
num_mel_bins=23,
frame_length=25,
frame_shift=10,
dither=0.0):
dither=0.0,
window_type="povey"):
""" Extract fbank
Args:
Expand All @@ -253,7 +254,8 @@ def compute_fbank(sample,
frame_shift=frame_shift,
dither=dither,
energy_floor=0.0,
sample_frequency=sample_rate)
sample_frequency=sample_rate,
window_type=window_type)
sample['feat'] = mat
return sample

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def convert_to_wenet_yaml(configs, wenet_yaml_path: str,
configs['dataset_conf']['fbank_conf']['frame_shift'] = 10
configs['dataset_conf']['fbank_conf']['frame_length'] = 25
configs['dataset_conf']['fbank_conf']['dither'] = 0.1
configs['dataset_conf']['fbank_conf']['window_type'] = 'hamming'
configs['dataset_conf']['spec_sub'] = False
configs['dataset_conf']['spec_trim'] = False
configs['dataset_conf']['shuffle'] = True
Expand Down

0 comments on commit aa3eba8

Please sign in to comment.