Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to save output as same directory with inputs #467

Merged
merged 10 commits into from
Jan 20, 2025
13 changes: 10 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,14 @@ def launch(self):
visible=self.args.colab,
value="")
cb_include_subdirectory = gr.Checkbox(label="Include Subdirectory Files",
info="When using Input Folder Path above, whether to include all files in the subdirectory or not",
info="When using Input Folder Path above, whether to include all files in the subdirectory or not.",
visible=self.args.colab,
value=False)
cb_save_same_dir = gr.Checkbox(label="Save outputs at same directory",
info="When using Input Folder Path above, whether to save output in the same directory as inputs or not, in addition to the original"
" output directory.",
visible=self.args.colab,
value=True)
pipeline_params, dd_file_format, cb_timestamp = self.create_pipeline_inputs()

with gr.Row():
Expand All @@ -127,9 +132,11 @@ def launch(self):
files_subtitles = gr.Files(label=_("Downloadable output file"), scale=3, interactive=False)
btn_openfolder = gr.Button('📂', scale=1)

params = [input_file, tb_input_folder, cb_include_subdirectory, dd_file_format, cb_timestamp]
params = [input_file, tb_input_folder, cb_include_subdirectory, cb_save_same_dir,
dd_file_format, cb_timestamp]
params = params + pipeline_params
btn_run.click(fn=self.whisper_inf.transcribe_file,
inputs=params + pipeline_params,
inputs=params,
outputs=[tb_indicator, files_subtitles])
btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)

Expand Down
4 changes: 2 additions & 2 deletions configs/default_parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ whisper:
hotwords: null
language_detection_threshold: 0.5
language_detection_segments: 1
add_timestamp: true
add_timestamp: false

vad:
vad_filter: false
Expand Down Expand Up @@ -62,4 +62,4 @@ translation:
source_lang: null
target_lang: null
max_length: 200
add_timestamp: true
add_timestamp: false
18 changes: 17 additions & 1 deletion modules/whisper/base_transcription_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def transcribe_file(self,
files: Optional[List] = None,
input_folder_path: Optional[str] = None,
include_subdirectory: Optional[str] = None,
save_same_dir: Optional[str] = None,
file_format: str = "SRT",
add_timestamp: bool = True,
progress=gr.Progress(),
Expand All @@ -201,7 +202,11 @@ def transcribe_file(self,
Input folder path to transcribe from gr.Textbox(). If this is provided, `files` will be ignored and
this will be used instead.
include_subdirectory: Optional[str]
When using Input Folder Path above, whether to include all files in the subdirectory or not
When using `input_folder_path`, whether to include all files in the subdirectory or not
save_same_dir: Optional[str]
When using `input_folder_path`, whether to save output in the same directory as inputs or not, in addition
to the original output directory. This feature is only available when using `input_folder_path`, because
gradio only allows to use cached file path in the function yet.
file_format: str
Subtitle File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
add_timestamp: bool
Expand Down Expand Up @@ -242,6 +247,17 @@ def transcribe_file(self,
)

file_name, file_ext = os.path.splitext(os.path.basename(file))
if save_same_dir and input_folder_path:
output_dir = os.path.dirname(file)
subtitle, file_path = generate_file(
output_dir=output_dir,
output_file_name=file_name,
output_format=file_format,
result=transcribed_segments,
add_timestamp=add_timestamp,
**writer_options
)

subtitle, file_path = generate_file(
output_dir=self.output_dir,
output_file_name=file_name,
Expand Down
1 change: 1 addition & 0 deletions tests/test_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_transcribe(
[audio_path],
None,
None,
None,
"SRT",
False,
gr.Progress(),
Expand Down
Loading