-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.py
154 lines (130 loc) · 6.34 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import re
from io import BytesIO
from pathlib import Path
from threading import Thread
from wave import Wave_read
import numpy as np
import sounddevice as sd
import anyio
import gradio as gr
from deep_translator import GoogleTranslator
from voicevox import Client, Style
from modules import shared
params: dict[str, None|bool|int|str] = {
'activate': False,
'selected_voice': None,
'autoplay': False,
'selected_style': None,
'speaker_id': None,
'interrogative_speak': True,
'translate': True,
'url': 'http://localhost:50021'
}
wav_idx = 0
speakers = {}
now_style: dict[str, Style] = {}
def remove_surrounded_chars(string):
# this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
# 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
return re.sub("\*[^\*]*?(\*|$)", '', string)
def connect():
global now_style, speakers
async def tmp():
temp = {}
try:
async with Client(params['url']) as client:
for i in await client.fetch_speakers((await client.fetch_core_versions())[0]):
temp[i.name] = i
except:
pass
return temp
speakers = anyio.run(tmp)
if len(speakers) != 0:
params['selected_voice'] = [i for i in speakers.keys()][0]
params['selected_style'] = speakers[params['selected_voice']].styles[0].name
params['speaker_id'] = speakers[params['selected_voice']].styles[0].id
now_style = {}
for i in speakers[params['selected_voice']].styles:
now_style[i.name] = i
return gr.Dropdown.update(choices=[i for i in speakers.keys()], value=params['selected_voice']), \
gr.Dropdown.update(choices=[i for i in now_style.keys()]), \
gr.Checkbox.update(interactive=True), gr.Button.update(interactive=True)
params['activate'] = False
return gr.Dropdown.update(), \
gr.Dropdown.update(), \
gr.Checkbox.update(interactive=False), \
gr.Button.update(interactive=False)
def update_style(speaker_name):
global now_style, speakers
now_style = {}
for i in speakers[speaker_name].styles:
now_style[i.name] = i
c = [i for i in now_style.keys()]
params['selected_style'] = c[0]
params['speaker_id'] = now_style[c[0]].id
return gr.Dropdown.update(choices=c, value=c[0])
async def get_voice_bytes(speaker_id: int, text: str, interrogative_speak: bool):
async with Client() as client:
return await (await client.create_audio_query(text, speaker_id)).synthesis(speaker=speaker_id, enable_interrogative_upspeak=interrogative_speak)
def play_test():
text = 'この音はテストです。 今日はお腹いっぱいですか?'
audio = anyio.run(get_voice_bytes, params['speaker_id'], text, params['interrogative_speak'])
with Wave_read(BytesIO(audio)) as w:
sr = w.getframerate()
array = np.frombuffer(w.readframes(w.getnframes()), dtype=np.int16)
sd.play(array, sr)
sd.wait()
def setup():
Thread(target=connect, args=()).start()
def ui():
with gr.Accordion("Voicevox", open=True):
# Gradio elements
with gr.Row():
activate = gr.Checkbox(value=params['activate'], label='Activate TTS', interactive=len(speakers) != 0,
info="If extension can't connect to voicevox engine, it will be disable.")
autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
translate = gr.Checkbox(value=params['translate'], label='Translate model output into Japanese for voicevox')
with gr.Row():
voice = gr.Dropdown(value=params['selected_voice'], choices=[i for i in speakers.keys()], label='TTS Voice')
style = gr.Dropdown(value=params['selected_style'], choices=[i.name for i in speakers[params['selected_voice']].styles] if len(speakers) != 0 else [], label='Voice style')
interrogative_speak = gr.Checkbox(value=params['interrogative_speak'], label='interrogative speak')
play = gr.Button(value='Play', interactive=len(speakers) != 0,
info="If extension can't connect to voicevox engine, it will be disable.")
with gr.Row():
engine = gr.Textbox(value=params['url'], label='voicevox engine url')
refresh_connect = gr.Button(value='Refresh')
# Event functions to update the parameters in the backend
activate.change(lambda x: params.update({'activate': x}), activate, None)
voice.change(lambda x: params.update({'selected_voice': x, 'selected_style': None, 'speaker_id': None}), voice, None).then(update_style, voice, style)
autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
style.change(lambda x: params.update({'selected_style': x, 'speaker_id': now_style[x].id if not (x is None) else None}), style, None)
interrogative_speak.change(lambda x: params.update({'interrogative_speak': x}), interrogative_speak, None)
translate.change(lambda x: params.update({'translate': x}), translate, None)
refresh_connect.click(lambda x: params.update({'url': x}), engine, None).then(connect, None, [voice, style, activate, play])
play.click(play_test, None, None)
def output_modifier(string, state):
global params, wav_idx
if not params['activate'] or params['speaker_id'] is None:
print(string)
return string
original_string = string
string = remove_surrounded_chars(string)
string = string.replace('"', '')
string = string.replace('“', '')
string = string.replace('\n', ' ')
string = string.strip()
if string == '':
string = 'empty reply, try regenerating'
output_file = Path(f'extensions/voicevox_tts/outputs/{wav_idx:06d}.mp3')
print(f'Outputting audio to {str(output_file)}')
if params['translate']:
string = GoogleTranslator(target='ja').translate(string)
audio = anyio.run(get_voice_bytes, params['speaker_id'], string, params['interrogative_speak'])
with open(str(output_file), 'wb') as f:
f.write(audio)
autoplay = 'autoplay' if params['autoplay'] else ''
string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
wav_idx += 1
string += f'\n\n{original_string}'
shared.processing_message = "*Is typing...*"
return string