Skip to content

Commit

Permalink
update packages version and fix bug
Browse files Browse the repository at this point in the history
- add torch and torchaudio version in requirements, remove torchvision
- faster whisper now use the v0.10.0
- updated stable-ts, whisper, and webrtcvad version
- add pause to downloading faster whisper model
- now dialog interaction is disabled disable when downloading model from dialog
- fix for #54, the problem was that i forgot that fg was set in the parameter
- now enforce ffmpeg instead of asking continue or not when failed to install
  • Loading branch information
Dadangdut33 committed Nov 25, 2023
1 parent 82e7041 commit 165d25a
Show file tree
Hide file tree
Showing 10 changed files with 226 additions and 158 deletions.
14 changes: 5 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
torch
torchvision
torchaudio
torch==2.1.1
torchaudio==2.1.1
deep-translator==1.11.1
notify-py==0.3.42
loguru
Expand All @@ -15,13 +14,10 @@ requests==2.31.0
scipy==1.11.3
sounddevice==0.4.6
soundfile==0.12.1
webrtcvad==2.0.10
webrtcvad @ git+https://github.com/wiseman/py-webrtcvad.git@e283ca41df3a84b0e87fb1f5cb9b21580a286b09
darkdetect==0.8.0
arabic-reshaper==3.0.0
python-bidi==0.4.2
matplotlib==3.8.0
onnxruntime==1.16.1
demucs==4.0.1
stable-ts @ git+https://github.com/jianfch/stable-ts.git@b336735ff784bb59690eec8f9f706b0151dda74c
openai-whisper==20231106
faster-whisper @ git+https://github.com/guillaumekln/faster-whisper.git@1fab6eee59b268f89ff3912a4b19c82751e6bb26
stable-ts @ git+https://github.com/jianfch/stable-ts.git@71b9f1fcbd1268f8bfe95bba6a394a2bc2e7339b
faster-whisper @ git+https://github.com/SYSTRAN/faster-whisper.git@e1a218fab1ab02d637b79565995bf1a9c4c83a09
6 changes: 3 additions & 3 deletions speech_translate/_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ class StreamStderrToLogger(object):
"""
def __init__(self, level):
self.level = level
# tqdm use stderr to print, so we should consider it as info
# tqdm use stderr to print, so we can consider it as info
self.considered_info = [
"Downloading", "Fetching", "run_threaded", "Estimating duration from bitrate, this may be inaccurate",
"Transcribe", "Translate", "Refine", "Align", "Running", "done", "Using cache found in"
"Downloading", "Fetching", "run_threaded", "Estimating duration from bitrate", "Transcribe", "Translate",
"Translating", "Refine", "Align", "Running", "done", "Using cache found in", "%|#", "0%|"
]

def write(self, buf):
Expand Down
46 changes: 46 additions & 0 deletions speech_translate/ui/custom/dialog.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def __init__(
self.mode = mode
self.data_list = []
self.headers = headers
self.interact_disabled = False

self.root = Toplevel(self.master)
self.root.geometry("+400+250")
Expand Down Expand Up @@ -201,11 +202,30 @@ def submit(self):
self.root.destroy()

def on_close(self):
if self.interact_disabled:
return

if not messagebox.askyesno("Cancel", "Are you sure you want to cancel?", parent=self.root):
return

self.root.destroy()

def disable_interactions(self):
self.interact_disabled = True
self.cb_model.configure(state="disabled")
self.btn_add.configure(state="disabled")
self.btn_delete.configure(state="disabled")
self.btn_start.configure(state="disabled")
self.btn_cancel.configure(state="disabled")

def enable_interactions(self):
self.interact_disabled = False
self.cb_model.configure(state="readonly")
self.btn_add.configure(state="normal")
self.btn_delete.configure(state="normal")
self.btn_start.configure(state="normal")
self.btn_cancel.configure(state="normal")


class FileImportDialog(FileOperationDialog):
def __init__(self, master, title: str, submit_func, theme: str, **kwargs):
Expand Down Expand Up @@ -375,6 +395,22 @@ def submit(self):
if status: # if status is True, meaning process thread is successfully started, then close the window
self.root.destroy()

def disable_interactions(self):
super().disable_interactions()
self.cb_engine.configure(state="disabled")
self.cb_source_lang.configure(state="disabled")
self.cb_target_lang.configure(state="disabled")
self.cbtn_transcribe.configure(state="disabled")
self.cbtn_translate.configure(state="disabled")

def enable_interactions(self):
super().enable_interactions()
self.cb_engine.configure(state="readonly")
self.cb_source_lang.configure(state="readonly")
self.cb_target_lang.configure(state="readonly")
self.cbtn_transcribe.configure(state="normal")
self.cbtn_translate.configure(state="normal")


class TranslateResultDialog(FileOperationDialog):
def __init__(self, master, title: str, submit_func, theme: str, **kwargs):
Expand Down Expand Up @@ -455,6 +491,16 @@ def submit(self):
self.submit_func(self.var_engine.get(), self.var_target_lang.get().lower(), [x[0] for x in self.data_list])
self.root.destroy()

def disable_interactions(self):
super().disable_interactions()
self.cb_engine.configure(state="disabled")
self.cb_target_lang.configure(state="disabled")

def enable_interactions(self):
super().enable_interactions()
self.cb_engine.configure(state="readonly")
self.cb_target_lang.configure(state="readonly")


class RefinementDialog(FileOperationDialog):
def __init__(self, master, title: str, submit_func, theme: str, **kwargs):
Expand Down
57 changes: 45 additions & 12 deletions speech_translate/ui/custom/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def whisper_download_with_progress_gui(
# flag
paused = False

def pause_download():
def toggle_pause():
nonlocal paused
paused = not paused
if paused:
Expand Down Expand Up @@ -115,7 +115,7 @@ def pause_download():
lbl_status_text = ttk.Label(status_frame, text=f"Downloading {model_name} model")
lbl_status_text.pack(side="left", padx=5, pady=5)

btn_pause = ttk.Button(btn_frame, text="Pause", command=pause_download)
btn_pause = ttk.Button(btn_frame, text="Pause", command=toggle_pause)
btn_pause.pack(side="left", fill="x", padx=5, pady=5, expand=True)

btn_cancel = ttk.Button(btn_frame, text="Cancel", command=cancel_func, style="Accent.TButton")
Expand Down Expand Up @@ -305,7 +305,7 @@ def snapshot_download(

filtered_repo_files = list(
huggingface_hub.utils.filter_repo_objects(
items=[f.rfilename for f in repo_info.siblings],
items=[f.rfilename for f in repo_info.siblings], # type: ignore
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
)
Expand Down Expand Up @@ -404,10 +404,6 @@ def faster_whisper_download_with_progress_gui(
# clear recent_stderr
recent_stderr.clear()

# add label that says downloading please wait
failed = False
msg = ""

f1 = ttk.Frame(root)
f1.pack(side="top", fill="x", expand=True)

Expand All @@ -426,6 +422,9 @@ def faster_whisper_download_with_progress_gui(
btn_cancel = ttk.Button(f1, text="Cancel", command=cancel_func, style="Accent.TButton")
btn_cancel.pack(side="right", padx=(5, 10), pady=(5, 0))

btn_pause = ttk.Button(f1, text="Pause", command=lambda: toggle_pause())
btn_pause.pack(side="right", padx=5, pady=(5, 0))

# add progress bar that just goes back and forth
progress = ttk.Progressbar(f2, orient="horizontal", length=200, mode="indeterminate")
progress.pack(expand=True, fill="x", padx=10, pady=(2, 2))
Expand All @@ -450,8 +449,14 @@ def update_log():
text_log.insert(1.0, content)
text_log.see("end") # scroll to the bottom

failed = False
msg = ""
finished = False
paused = False
killed = False

def run_threaded():
nonlocal failed, msg
nonlocal failed, msg, finished, paused

root.title("Verifying Model")
lbl_status_text.configure(text=f"Verifying {model_name} model please wait...")
Expand Down Expand Up @@ -479,24 +484,52 @@ def run_threaded():
failed = True
msg = str(e)

finally:
if not paused:
finished = True

threaded = Thread(target=run_threaded, daemon=True)
threaded.start()
start_time = time()

while threaded.is_alive():
def toggle_pause():
nonlocal paused, killed, threaded
paused = not paused
if paused:
logger.info("Download paused")
btn_pause["text"] = "Resume"
progress.stop()
else:
logger.info("Download resumed")
btn_pause["text"] = "Pause"
progress.start(15)
killed = False
threaded = Thread(target=run_threaded, daemon=True)
threaded.start()

while not finished:
if paused and not killed:
kill_thread(threaded)
killed = True
recent_stderr.append("Download paused")
update_log()

if bc.cancel_dl:
kill_thread(threaded)
finished = True # mark as finished
root.destroy()
mbox("Download Cancelled", f"Downloading of {model_name} faster whisper model has been cancelled", 0, master)
break

# check if 2 second have passed. Means probably downloading from the hub
if time() - start_time > 2:
root.title("Downloading Faster Whisper Model")
root.title(f"{'Downloading' if not paused else 'Paused downloading of'} Faster Whisper Model")
lbl_status_text.configure(
text=f"Downloading {model_name} model, {get_file_amount(storage_folder + '/' + 'blobs')} files downloaded..."
text=
f"{'Downloading' if not paused else 'Paused downloading'} {model_name} model, {get_file_amount(storage_folder + '/' + 'blobs')} files downloaded..."
)
update_log()
if not paused:
update_log()
sleep(1)

# if cancel button is pressed, return
Expand Down
31 changes: 4 additions & 27 deletions speech_translate/ui/frame/setting/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,22 +187,6 @@ def __init__(self, root: Toplevel, master_frame: Union[ttk.Frame, Frame]):
)
self.btn_log_config.pack(side="left", padx=5, pady=5)

# self.lbl_ignore_stdout = ttk.Label(self.f_logging_2, text="Ignore stdout", width=16)
# self.lbl_ignore_stdout.pack(side="left", padx=5)
# tk_tooltip(self.lbl_ignore_stdout, "Collection to ignore stdout / print from the console.")
# self.entry_ignore_stdout = ttk.Entry(self.f_logging_2)
# self.entry_ignore_stdout.pack(side="left", padx=5, fill="x", expand=True)
# self.entry_ignore_stdout.insert(0, ', '.join(sj.cache["ignore_stdout"]))
# self.entry_ignore_stdout.bind("<FocusOut>", lambda e: self.save_ignore_stdout())
# self.entry_ignore_stdout.bind("<Return>", lambda e: self.save_ignore_stdout())
# tk_tooltip(
# self.entry_ignore_stdout,
# "Collection to ignore stdout / print from the console with its input separated by comma.\n\n"
# "This is useful if you want to ignore some of the stdout / print from the console.\n\n"
# "Example: `Predicting silences(s) with VAD..., Predicted silences(s) with VAD`",
# wrapLength=500,
# )

self.menu_config_log = Menu(self.master, tearoff=0)
self.menu_config_log.add_command(
label="Open", image=self.open_emoji, compound="left", command=lambda: start_file(dir_log)
Expand Down Expand Up @@ -274,7 +258,7 @@ def __init__(self, root: Toplevel, master_frame: Union[ttk.Frame, Frame]):
tk_tooltip(
self.cbtn_debug_realtime_record,
"Show some debugging process of the realtime record.\n\n"
"Enabling will probably slow down the app.",
"Enabling could slow down the app.",
)

self.cbtn_debug_recorded_audio = CustomCheckButton(
Expand Down Expand Up @@ -550,7 +534,7 @@ def failed_func():
kwargs = {
"after_func": after_func,
"use_faster_whisper": use_faster_whisper,
"cancel_func": lambda: self.cancel_model_download(model, btn),
"cancel_func": lambda: self.cancel_model_download(model, btn, use_faster_whisper),
"failed_func": failed_func,
}

Expand Down Expand Up @@ -580,7 +564,7 @@ def failed_func():
)
mbox("Download error", f"Err details: {e}", 0, self.root)

def cancel_model_download(self, model: str, btn: ttk.Button) -> None:
def cancel_model_download(self, model: str, btn: ttk.Button, use_faster_whisper) -> None:
"""
Cancel whisper model download.
Expand All @@ -589,7 +573,7 @@ def cancel_model_download(self, model: str, btn: ttk.Button) -> None:
if not mbox("Cancel confirmation", "Are you sure you want to cancel downloading?", 3, self.root):
return

btn.configure(text="Download", command=lambda: self.model_download(model, btn, False), state="normal")
btn.configure(text="Download", command=lambda: self.model_download(model, btn, use_faster_whisper), state="normal")
bc.cancel_dl = True # Raise flag to stop

def model_btn_checker(self, model: str, btn: ttk.Button, faster_whisper: bool = False, on_start=False) -> None:
Expand Down Expand Up @@ -901,10 +885,3 @@ def path_default(self, key: str, element: ttk.Entry, default_path: str, save=Tru
element.configure(state="readonly")
if save:
sj.save_key(key, "auto")

# def save_ignore_stdout(self):
# _input = self.entry_ignore_stdout.get().split(",")
# _input = [i.strip() for i in _input if i.strip() != ""] # remove any empty string or space

# sj.save_key("ignore_stdout", _input)
# update_stdout_ignore_list(_input)
2 changes: 1 addition & 1 deletion speech_translate/ui/template/detached.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, master: Tk, title: str, winType: Literal["tc", "tl"]):
wrapLength=250,
)

self.menuDropdown = Menu(self.root, tearoff=0, fg="white")
self.menuDropdown = Menu(self.root, tearoff=0)
self.menuDropdown.add_command(label=self.title, command=self.open_menu, image=self.title_emoji, compound="left")
self.menuDropdown.add_command(label="Help", command=self.show_help, image=self.help_emoji, compound="left")
self.menuDropdown.add_command(
Expand Down
Loading

0 comments on commit 165d25a

Please sign in to comment.