forked from ErlichLiu/Whisper-Input
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
119 lines (105 loc) · 4.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
import sys
from dotenv import load_dotenv
load_dotenv()
from src.audio.recorder import AudioRecorder
from src.keyboard.listener import KeyboardManager, check_accessibility_permissions
from src.transcription.whisper import WhisperProcessor
from src.utils.logger import logger
from src.transcription.senseVoiceSmall import SenseVoiceSmallProcessor
def check_microphone_permissions():
"""检查麦克风权限并提供指导"""
logger.warning("\n=== macOS 麦克风权限检查 ===")
logger.warning("此应用需要麦克风权限才能进行录音。")
logger.warning("\n请按照以下步骤授予权限:")
logger.warning("1. 打开 系统偏好设置")
logger.warning("2. 点击 隐私与安全性")
logger.warning("3. 点击左侧的 麦克风")
logger.warning("4. 点击右下角的锁图标并输入密码")
logger.warning("5. 在右侧列表中找到 Terminal(或者您使用的终端应用)并勾选")
logger.warning("\n授权后,请重新运行此程序。")
logger.warning("===============================\n")
class VoiceAssistant:
def __init__(self, audio_processor):
self.audio_recorder = AudioRecorder()
self.audio_processor = audio_processor
self.keyboard_manager = KeyboardManager(
on_record_start=self.start_transcription_recording,
on_record_stop=self.stop_transcription_recording,
on_translate_start=self.start_translation_recording,
on_translate_stop=self.stop_translation_recording,
on_reset_state=self.reset_state
)
def start_transcription_recording(self):
"""开始录音(转录模式)"""
self.audio_recorder.start_recording()
def stop_transcription_recording(self):
"""停止录音并处理(转录模式)"""
audio = self.audio_recorder.stop_recording()
if audio == "TOO_SHORT":
logger.warning("录音时长太短,状态将重置")
self.keyboard_manager.reset_state()
elif audio:
result = self.audio_processor.process_audio(
audio,
mode="transcriptions",
prompt=""
)
# 解构返回值
text, error = result if isinstance(result, tuple) else (result, None)
self.keyboard_manager.type_text(text, error)
else:
logger.error("没有录音数据,状态将重置")
self.keyboard_manager.reset_state()
def start_translation_recording(self):
"""开始录音(翻译模式)"""
self.audio_recorder.start_recording()
def stop_translation_recording(self):
"""停止录音并处理(翻译模式)"""
audio = self.audio_recorder.stop_recording()
if audio == "TOO_SHORT":
logger.warning("录音时长太短,状态将重置")
self.keyboard_manager.reset_state()
elif audio:
result = self.audio_processor.process_audio(
audio,
mode="translations",
prompt=""
)
text, error = result if isinstance(result, tuple) else (result, None)
self.keyboard_manager.type_text(text,error)
else:
logger.error("没有录音数据,状态将重置")
self.keyboard_manager.reset_state()
def reset_state(self):
"""重置状态"""
self.keyboard_manager.reset_state()
def run(self):
"""运行语音助手"""
logger.info("=== 语音助手已启动 ===")
self.keyboard_manager.start_listening()
def main():
# 判断是 Whisper 还是 SiliconFlow
service_platform = os.getenv("SERVICE_PLATFORM", "siliconflow")
if service_platform == "groq":
audio_processor = WhisperProcessor()
elif service_platform == "siliconflow":
audio_processor = SenseVoiceSmallProcessor()
else:
raise ValueError(f"无效的服务平台: {service_platform}")
try:
assistant = VoiceAssistant(audio_processor)
assistant.run()
except Exception as e:
error_msg = str(e)
if "Input event monitoring will not be possible" in error_msg:
check_accessibility_permissions()
sys.exit(1)
elif "无法访问音频设备" in error_msg:
check_microphone_permissions()
sys.exit(1)
else:
logger.error(f"发生错误: {error_msg}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
main()