diff --git a/.gitignore b/.gitignore index e7e375e..1faac5f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ dist .DS_Store *.egg-info auto_subtitle/__pycache__ -build diff --git a/README.md b/README.md index 1d21530..24ce2b0 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,16 @@ # Automatic subtitles in your videos +This is a fork of [auto_subtitle](https://github.com/m1guelpf/auto-subtitle) developed by [m1guelpf](https://github.com/m1guelpf). The main difference in `AutoSubZ` is the addition of several features, such as .vtt and .txt output, along with fixes for various bugs. + + + This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whisper) to automatically generate and overlay subtitles on any video. ## Installation -To get started, you'll need Python 3.7 or newer. Install the binary by running the following command: +To get started, you'll need Python 3.8 or newer. Install the binary by running the following command: - pip install git+https://github.com/m1guelpf/auto-subtitle.git + pip install git+https://github.com/zaltinsoy/AutoSubZ.git You'll also need to install [`ffmpeg`](https://ffmpeg.org/), which is available from most package managers: @@ -17,8 +21,9 @@ sudo apt update && sudo apt install ffmpeg # on MacOS using Homebrew (https://brew.sh/) brew install ffmpeg -# on Windows using Chocolatey (https://chocolatey.org/) -choco install ffmpeg +# on Windows using Winget +winget install -e --id Gyan.FFmpeg + ``` ## Usage diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py index 21cdc16..ef6196c 100644 --- a/auto_subtitle/cli.py +++ b/auto_subtitle/cli.py @@ -4,7 +4,7 @@ import argparse import warnings import tempfile -from .utils import filename, str2bool, write_srt +from .utils import filename, write_srt def main(): @@ -16,24 +16,40 @@ def main(): choices=whisper.available_models(), help="name of the Whisper model to use") parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs") - parser.add_argument("--output_srt", type=str2bool, default=False, - help="whether to output the .srt file along with the video files") - parser.add_argument("--srt_only", type=str2bool, default=False, + parser.add_argument("--subtitle_format", type=str, default="srt", choices=["srt","vtt"], + help="subtitle file format type") + parser.add_argument("--output_mkv", action="store_true", + help="whether to output the new subtitled video as an .mkv container rather than .mp4 container") + parser.add_argument("--output_srt", action="store_true", + help="output the .srt file along with the video files") + parser.add_argument("--output_txt", action="store_true", + help="whether to also save the subtitles as a .txt file") + parser.add_argument("--srt_only", action="store_true", help="only generate the .srt file and not create overlayed video") - parser.add_argument("--verbose", type=str2bool, default=False, - help="whether to print out the progress and debug messages") - + parser.add_argument("--verbose", action="store_true", + help="print out the progress and debug messages") + parser.add_argument("--task", type=str, default="transcribe", choices=[ "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") - parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"], - help="What is the origin language of the video? If unset, it is detected automatically.") + parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en", + "es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln", + "lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so", + "sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"], + help="What is the origin language of the video? If unset, it is detected automatically.") + parser.add_argument("--word_timestamps", action="store_true", default=False, + help="(experimental) extract word-level timestamps and refine the results based on them") args = parser.parse_args().__dict__ model_name: str = args.pop("model") output_dir: str = args.pop("output_dir") output_srt: bool = args.pop("output_srt") + subtitle_format: str = args.pop("subtitle_format") + output_txt: bool = args.pop("output_txt") srt_only: bool = args.pop("srt_only") + verbose: bool = args["verbose"] language: str = args.pop("language") + output_mkv: bool = args.pop("output_mkv") + os.makedirs(output_dir, exist_ok=True) @@ -48,24 +64,29 @@ def main(): model = whisper.load_model(model_name) audios = get_audio(args.pop("video")) subtitles = get_subtitles( - audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args) + audios, output_srt or srt_only, subtitle_format,output_txt, output_dir, lambda audio_path: model.transcribe(audio_path, **args) ) if srt_only: return - for path, srt_path in subtitles.items(): - out_path = os.path.join(output_dir, f"{filename(path)}.mp4") + ext = "mkv" if output_mkv else "mp4" - print(f"Adding subtitles to {filename(path)}...") + for path, srt_path in subtitles.items(): - video = ffmpeg.input(path) - audio = video.audio + out_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(path))[0]}.{ext}") + print(f"Adding subtitles to {os.path.basename(path)}...") + + stream = ffmpeg.input(path) + audio = stream.audio + videoWithSub = stream.video.filter('subtitles', filename=srt_path,force_style='OutlineColour=&H40000000,BorderStyle=3') - ffmpeg.concat( - video.filter('subtitles', srt_path, force_style="OutlineColour=&H40000000,BorderStyle=3"), audio, v=1, a=1 - ).output(out_path).run(quiet=True, overwrite_output=True) + if output_mkv: + stream = ffmpeg.output(ffmpeg.input(srt_path), stream, out_path, vcodec='copy', scodec='copy') + else: + stream = ffmpeg.output(audio, videoWithSub, out_path, vcodec='libx264', acodec='copy') + ffmpeg.run(stream, quiet= not verbose, overwrite_output=True) print(f"Saved subtitled video to {os.path.abspath(out_path)}.") @@ -88,12 +109,17 @@ def get_audio(paths): return audio_paths -def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcribe: callable): + +def get_subtitles(audio_paths: list, output_srt: bool,subtitle_format: str,output_txt: bool, output_dir: str, transcribe: callable): subtitles_path = {} for path, audio_path in audio_paths.items(): srt_path = output_dir if output_srt else tempfile.gettempdir() - srt_path = os.path.join(srt_path, f"{filename(path)}.srt") + + if(subtitle_format=="srt"): + srt_path = os.path.join(srt_path, f"{filename(path)}.srt") + else: # vtt + srt_path = os.path.join(srt_path, f"{filename(path)}.vtt") print( f"Generating subtitles for {filename(path)}... This might take a while." @@ -104,12 +130,19 @@ def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcri warnings.filterwarnings("default") with open(srt_path, "w", encoding="utf-8") as srt: - write_srt(result["segments"], file=srt) + write_srt(result["segments"], file=srt,subtitle_format=subtitle_format) subtitles_path[path] = srt_path + if output_txt: + text_path = os.path.join(output_dir, f"{filename(path)}.txt") + with open(text_path, "w", encoding="utf-8") as text_file: + for segment in result["segments"]: + print(segment["text"], file=text_file) + print(f"Saving subtitles to text file: {text_path}") + return subtitles_path if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/auto_subtitle/utils.py b/auto_subtitle/utils.py index fb4e11a..7c483be 100644 --- a/auto_subtitle/utils.py +++ b/auto_subtitle/utils.py @@ -2,18 +2,7 @@ from typing import Iterator, TextIO -def str2bool(string): - string = string.lower() - str2val = {"true": True, "false": False} - - if string in str2val: - return str2val[string] - else: - raise ValueError( - f"Expected one of {set(str2val.keys())}, got {string}") - - -def format_timestamp(seconds: float, always_include_hours: bool = False): +def format_timestamp(seconds: float, always_include_hours: bool = False,subtitle_format: str = "srt"): assert seconds >= 0, "non-negative timestamp expected" milliseconds = round(seconds * 1000.0) @@ -27,20 +16,40 @@ def format_timestamp(seconds: float, always_include_hours: bool = False): milliseconds -= seconds * 1_000 hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" - return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}" - -def write_srt(transcript: Iterator[dict], file: TextIO): - for i, segment in enumerate(transcript, start=1): - print( - f"{i}\n" - f"{format_timestamp(segment['start'], always_include_hours=True)} --> " - f"{format_timestamp(segment['end'], always_include_hours=True)}\n" - f"{segment['text'].strip().replace('-->', '->')}\n", - file=file, - flush=True, - ) + #return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}" + if subtitle_format == "srt": + output=f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}" + else: + output= f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}" + + return output + + +def write_srt(transcript: Iterator[dict], file: TextIO,subtitle_format: str = "srt"): + + if subtitle_format == "vtt": + print("WEBVTT\n", file=file) + for i, segment in enumerate(transcript, start=1): + print( + f"{format_timestamp(segment['start'], always_include_hours=True,subtitle_format=subtitle_format)} --> " + f"{format_timestamp(segment['end'], always_include_hours=True,subtitle_format=subtitle_format)}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) + + else: #srt + for i, segment in enumerate(transcript, start=1): + print( + f"{i}\n" + f"{format_timestamp(segment['start'], always_include_hours=True,subtitle_format=subtitle_format)} --> " + f"{format_timestamp(segment['end'], always_include_hours=True,subtitle_format=subtitle_format)}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) def filename(path): - return os.path.splitext(os.path.basename(path))[0] + return os.path.splitext(os.path.basename(path))[0] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 73bca28..e829d2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ openai-whisper +ffmpeg-python +numpy<2.0.0 \ No newline at end of file diff --git a/setup.py b/setup.py index ca2ed5b..9493b92 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,8 @@ author="Miguel Piedrafita", install_requires=[ 'openai-whisper', + 'ffmpeg-python', + 'numpy<2.0.0' ], description="Automatically generate and embed subtitles into your videos", entry_points={