From 53bbe5401683c9a7549db62642e3d4535956b95c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=A2=A8=E6=B0=B4=E9=B1=BC?= Date: Fri, 15 Nov 2024 19:57:38 +0800 Subject: [PATCH] fix: Use correct `seek` value in output, fix word timestamps when the initial timestamp is not zero (#1141) Co-authored-by: Mahmoud Ashraf --- faster_whisper/transcribe.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 6d18a173..80e5d92c 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -174,6 +174,9 @@ def forward(self, features, chunks_metadata, **forward_params): compression_ratio=get_compression_ratio( self.tokenizer.decode(subsegment["tokens"]) ), + seek=int( + chunk_metadata["start_time"] * self.model.frames_per_second + ), ) for subsegment in subsegments ] @@ -496,7 +499,7 @@ def _batched_segments_generator( for segment in result: seg_idx += 1 yield Segment( - seek=int(result[-1]["end"] * self.model.frames_per_second), + seek=segment["seek"], id=seg_idx, text=segment["text"], start=round(segment["start"], 3), @@ -1318,7 +1321,7 @@ def next_words_segment(segments: List[dict]) -> Optional[dict]: yield Segment( id=idx, - seek=seek, + seek=previous_seek, start=segment["start"], end=segment["end"], text=text, @@ -1585,7 +1588,7 @@ def add_word_timestamps( for segment_idx, segment in enumerate(segments): word_index = 0 - time_offset = segment[0]["start"] + time_offset = segment[0]["seek"] / self.frames_per_second median_duration, max_duration = median_max_durations[segment_idx] for subsegment_idx, subsegment in enumerate(segment): saved_tokens = 0