mobiusml · Jiltseb · May 24, 2024 · May 24, 2024 · May 24, 2024
diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py
@@ -16,9 +16,9 @@
 import numpy as np
 import tokenizers
 import torch
-from tqdm import tqdm
 
 from pyannote.audio import Model
+from tqdm import tqdm
 from transformers import Pipeline
 from transformers.pipelines.pt_utils import PipelineIterator
 
@@ -112,9 +112,11 @@ class TranscriptionInfo(NamedTuple):
     transcription_options: TranscriptionOptions
     vad_options: VadOptions
 
+
 # The code below is copied from whisper-x (https://github.com/m-bain/whisperX)
 # and adapted for faster_whisper
 
+
 class BatchedInferencePipeline(Pipeline):
 
     """
@@ -149,10 +151,10 @@ def __init__(
         self.use_vad_model = use_vad_model
         self.vad_onset = 0.500
         self.vad_offset = 0.363
-        self.vad_model_url =  (
-                            "https://whisperx.s3.eu-west-2.amazonaws.com/model_weights/segmentation"
-                            "/0b5b3216d60a2d32fc086b47ea8c67589aaeb26b7e07fcbe620d6d0b83e209ea/pytorch_model.bin"
-                        )
+        self.vad_model_url = (
+            "https://whisperx.s3.eu-west-2.amazonaws.com/model_weights/segmentation"
+            "/0b5b3216d60a2d32fc086b47ea8c67589aaeb26b7e07fcbe620d6d0b83e209ea/pytorch_model.bin"
+        )
         (
             self._preprocess_params,
             self._forward_params,
@@ -177,7 +179,6 @@ def __init__(
 
         super(Pipeline, self).__init__()
 
-
     def _sanitize_parameters(self, **kwargs):
         preprocess_kwargs = {}
         if "tokenizer" in kwargs:
@@ -2062,6 +2063,7 @@ def key_func(language):
     "log_prob_low_threshold": -2.0,
     "multilingual": False,
     "output_language": "en",
+    "hotwords": None,
 }
 
 

diff --git a/faster_whisper/vad.py b/faster_whisper/vad.py
@@ -4,7 +4,7 @@
 import warnings
 
 from collections.abc import Callable
-from typing import List, NamedTuple, Optional
+from typing import List, NamedTuple, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -314,19 +314,19 @@ class VoiceActivitySegmentation(VoiceActivityDetection):
     def __init__(
         self,
         segmentation: PipelineModel = "pyannote/segmentation",
-        device: torch.device | None = None,
+        device: Optional[Union[str, torch.device]] = None,
         fscore: bool = False,
-        use_auth_token: str | None = None,
+        use_auth_token: Optional[str] = None,
         **inference_kwargs,
     ):
         """Initialize the pipeline with the model name and the optional device.
 
         Args:
             dict parameters of VoiceActivityDetection class from pyannote:
             segmentation (PipelineModel): Loaded model name.
-            device (torch.device | None): Device to perform the segmentation.
+            device (torch.device or None): Device to perform the segmentation.
             fscore (bool): Flag indicating whether to compute F-score during inference.
-            use_auth_token (str | None): Optional authentication token for model access.
+            use_auth_token (str or None): Optional authentication token for model access.
             inference_kwargs (dict):  Additional arguments from VoiceActivityDetection pipeline.
         """
         super().__init__(
@@ -337,7 +337,7 @@ def __init__(
             **inference_kwargs,
         )
 
-    def apply(self, file: AudioFile, hook: Callable | None = None) -> Annotation:
+    def apply(self, file: AudioFile, hook: Optional[Callable] = None) -> Annotation:
         """Apply voice activity detection on the audio file.
 
         Args:
@@ -379,7 +379,7 @@ class BinarizeVadScores:
     def __init__(
         self,
         onset: float = 0.5,
-        offset: float | None = None,
+        offset: Optional[float] = None,
         min_duration_on: float = 0.0,
         min_duration_off: float = 0.0,
         pad_onset: float = 0.0,
@@ -442,7 +442,8 @@ def __get_active_regions(self, scores: SlidingWindowFeature) -> Annotation:
             curr_scores = [k_scores[0]]
             curr_timestamps = [start]
             t = start
-            for t, y in zip(timestamps[1:], k_scores[1:], strict=False):
+            # optionally add `strict=False` for python 3.10 or later
+            for t, y in zip(timestamps[1:], k_scores[1:]):
                 # currently active
                 if is_active:
                     curr_duration = t - start

diff --git a/requirements.txt b/requirements.txt
@@ -5,7 +5,7 @@ tokenizers>=0.13,<1
 onnxruntime>=1.14,<2 
 transformers
 pyannote-audio>=3.1.1
-pandas>=2.1.4
+pandas
 torch>=2.1.1 
 torchaudio>=2.1.2
 jsons>=1.6.3