Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add dot for local import module #950

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions whisperx/SubtitlesProcessor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import math
from conjunctions import get_conjunctions, get_comma
from .conjunctions import get_conjunctions, get_comma
from typing import TextIO

def normal_round(n):
Expand All @@ -23,7 +23,7 @@ def format_timestamp(seconds: float, is_vtt: bool = False):
milliseconds -= seconds * 1_000

separator = '.' if is_vtt else ','

hours_marker = f"{hours:02d}:"
return (
f"{hours_marker}{minutes:02d}:{seconds:02d}{separator}{milliseconds:03d}"
Expand Down Expand Up @@ -78,7 +78,7 @@ def process_segments(self, advanced_splitting=True):
subtitles = []
for i, segment in enumerate(self.segments):
next_segment_start_time = self.segments[i + 1]['start'] if i + 1 < len(self.segments) else None

if advanced_splitting:

split_points = self.determine_advanced_split_points(segment, next_segment_start_time)
Expand Down Expand Up @@ -138,10 +138,10 @@ def determine_advanced_split_points(self, segment, next_segment_start_time=None)

return split_points


def generate_subtitles_from_split_points(self, segment, split_points, next_start_time=None):
subtitles = []

words = segment.get('words', segment['text'].split())
total_word_count = len(words)
total_time = segment['end'] - segment['start']
Expand All @@ -152,7 +152,7 @@ def generate_subtitles_from_split_points(self, segment, split_points, next_start

fragment_words = words[start_idx:split_point + 1]
current_word_count = len(fragment_words)


if isinstance(fragment_words[0], dict):
start_time = fragment_words[0]['start']
Expand All @@ -173,14 +173,14 @@ def generate_subtitles_from_split_points(self, segment, split_points, next_start
'end': end_time,
'text': fragment if not isinstance(fragment_words[0], dict) else prefix.join(word['word'] for word in fragment_words)
})

start_idx = split_point + 1

# Handle the last fragment
if start_idx < len(words):
fragment_words = words[start_idx:]
current_word_count = len(fragment_words)

if isinstance(fragment_words[0], dict):
start_time = fragment_words[0]['start']
end_time = fragment_words[-1]['end']
Expand All @@ -198,13 +198,13 @@ def generate_subtitles_from_split_points(self, segment, split_points, next_start
'end': end_time if end_time is not None else segment['end'],
'text': fragment if not isinstance(fragment_words[0], dict) else prefix.join(word['word'] for word in fragment_words)
})

return subtitles



def save(self, filename="subtitles.srt", advanced_splitting=True):

subtitles = self.process_segments(advanced_splitting)

def write_subtitle(file, idx, start_time, end_time, text):
Expand All @@ -216,12 +216,12 @@ def write_subtitle(file, idx, start_time, end_time, text):
with open(filename, 'w', encoding='utf-8') as file:
if self.is_vtt:
file.write("WEBVTT\n\n")

if advanced_splitting:
for idx, subtitle in enumerate(subtitles, 1):
start_time = format_timestamp(subtitle['start'], self.is_vtt)
end_time = format_timestamp(subtitle['end'], self.is_vtt)
text = subtitle['text'].strip()
write_subtitle(file, idx, start_time, end_time, text)

return len(subtitles)
return len(subtitles)