forked from hanfangyuan4396/dify-on-wechat
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request zhayujie#385 from wanggang1987/google_voice
Voice support
- Loading branch information
Showing
12 changed files
with
254 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,4 @@ venv* | |
config.json | ||
QR.png | ||
nohup.out | ||
tmp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
|
||
import os | ||
import pathlib | ||
from config import conf | ||
|
||
|
||
class TmpDir(object): | ||
"""A temporary directory that is deleted when the object is destroyed. | ||
""" | ||
|
||
tmpFilePath = pathlib.Path('./tmp/') | ||
|
||
def __init__(self): | ||
pathExists = os.path.exists(self.tmpFilePath) | ||
if not pathExists and conf().get('speech_recognition') == True: | ||
os.makedirs(self.tmpFilePath) | ||
|
||
def path(self): | ||
return str(self.tmpFilePath) + '/' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
|
||
""" | ||
baidu voice service | ||
""" | ||
import time | ||
from aip import AipSpeech | ||
from common.log import logger | ||
from common.tmp_dir import TmpDir | ||
from voice.voice import Voice | ||
from config import conf | ||
|
||
class BaiduVoice(Voice): | ||
APP_ID = conf().get('baidu_app_id') | ||
API_KEY = conf().get('baidu_api_key') | ||
SECRET_KEY = conf().get('baidu_secret_key') | ||
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) | ||
|
||
def __init__(self): | ||
pass | ||
|
||
def voiceToText(self, voice_file): | ||
pass | ||
|
||
def textToVoice(self, text): | ||
result = self.client.synthesis(text, 'zh', 1, { | ||
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111 | ||
}) | ||
if not isinstance(result, dict): | ||
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | ||
with open(fileName, 'wb') as f: | ||
f.write(result) | ||
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName)) | ||
return fileName | ||
else: | ||
logger.error('[Baidu] textToVoice error={}'.format(result)) | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
|
||
""" | ||
google voice service | ||
""" | ||
|
||
import pathlib | ||
import subprocess | ||
import time | ||
import speech_recognition | ||
import pyttsx3 | ||
from common.log import logger | ||
from common.tmp_dir import TmpDir | ||
from voice.voice import Voice | ||
|
||
|
||
class GoogleVoice(Voice): | ||
recognizer = speech_recognition.Recognizer() | ||
engine = pyttsx3.init() | ||
|
||
def __init__(self): | ||
# 语速 | ||
self.engine.setProperty('rate', 125) | ||
# 音量 | ||
self.engine.setProperty('volume', 1.0) | ||
# 0为男声,1为女声 | ||
voices = self.engine.getProperty('voices') | ||
self.engine.setProperty('voice', voices[1].id) | ||
|
||
def voiceToText(self, voice_file): | ||
new_file = voice_file.replace('.mp3', '.wav') | ||
subprocess.call('ffmpeg -i ' + voice_file + | ||
' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) | ||
with speech_recognition.AudioFile(new_file) as source: | ||
audio = self.recognizer.record(source) | ||
try: | ||
text = self.recognizer.recognize_google(audio, language='zh-CN') | ||
logger.info( | ||
'[Google] voiceToText text={} voice file name={}'.format(text, voice_file)) | ||
return text | ||
except speech_recognition.UnknownValueError: | ||
return "抱歉,我听不懂。" | ||
except speech_recognition.RequestError as e: | ||
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e) | ||
|
||
def textToVoice(self, text): | ||
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | ||
self.engine.save_to_file(text, textFile) | ||
self.engine.runAndWait() | ||
logger.info( | ||
'[Google] textToVoice text={} voice file name={}'.format(text, textFile)) | ||
return textFile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
""" | ||
google voice service | ||
""" | ||
import json | ||
import openai | ||
from config import conf | ||
from common.log import logger | ||
from voice.voice import Voice | ||
|
||
|
||
class OpenaiVoice(Voice): | ||
def __init__(self): | ||
openai.api_key = conf().get('open_ai_api_key') | ||
|
||
def voiceToText(self, voice_file): | ||
logger.debug( | ||
'[Openai] voice file name={}'.format(voice_file)) | ||
file = open(voice_file, "rb") | ||
reply = openai.Audio.transcribe("whisper-1", file) | ||
text = reply["text"] | ||
logger.info( | ||
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) | ||
return text | ||
|
||
def textToVoice(self, text): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
""" | ||
Voice service abstract class | ||
""" | ||
|
||
class Voice(object): | ||
def voiceToText(self, voice_file): | ||
""" | ||
Send voice to voice service and get text | ||
""" | ||
raise NotImplementedError | ||
|
||
def textToVoice(self, text): | ||
""" | ||
Send text to voice service and get voice | ||
""" | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
""" | ||
voice factory | ||
""" | ||
|
||
def create_voice(voice_type): | ||
""" | ||
create a voice instance | ||
:param voice_type: voice type code | ||
:return: voice instance | ||
""" | ||
if voice_type == 'baidu': | ||
from voice.baidu.baidu_voice import BaiduVoice | ||
return BaiduVoice() | ||
elif voice_type == 'google': | ||
from voice.google.google_voice import GoogleVoice | ||
return GoogleVoice() | ||
elif voice_type == 'openai': | ||
from voice.openai.openai_voice import OpenaiVoice | ||
return OpenaiVoice() | ||
raise RuntimeError |