diff --git a/api/core/model_runtime/model_providers/openai/tts/tts.py b/api/core/model_runtime/model_providers/openai/tts/tts.py index 608ed897e096f..d3fcf731f15ac 100644 --- a/api/core/model_runtime/model_providers/openai/tts/tts.py +++ b/api/core/model_runtime/model_providers/openai/tts/tts.py @@ -114,7 +114,8 @@ def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str # doc: https://platform.openai.com/docs/guides/text-to-speech credentials_kwargs = self._to_credential_kwargs(credentials) client = OpenAI(**credentials_kwargs) - if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials): + model_support_voice = [x.get("value") for x in self.get_tts_model_voices(model=model, credentials=credentials)] + if not voice or voice not in model_support_voice: voice = self._get_model_default_voice(model, credentials) word_limit = self._get_model_word_limit(model, credentials) if len(content_text) > word_limit: diff --git a/web/app/components/app/configuration/config-voice/param-config-content.tsx b/web/app/components/app/configuration/config-voice/param-config-content.tsx index cced3b045849c..9b0d5bbb69e23 100644 --- a/web/app/components/app/configuration/config-voice/param-config-content.tsx +++ b/web/app/components/app/configuration/config-voice/param-config-content.tsx @@ -31,12 +31,12 @@ const VoiceParamConfig: FC = () => { let languageItem = languages.find(item => item.value === textToSpeechConfig.language) const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select') - if (languages && !languageItem) + if (languages && !languageItem && languages.length > 0) languageItem = languages[0] const language = languageItem?.value const voiceItems = useSWR({ appId, language }, fetchAppVoices).data let voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice) - if (voiceItems && !voiceItem) + if (voiceItems && !voiceItem && voiceItems.length > 0) voiceItem = voiceItems[0] const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select') @@ -125,9 +125,11 @@ const VoiceParamConfig: FC = () => {
{t('appDebug.voice.voiceSettings.voice')}
{ + if (!value.value) + return setTextToSpeechConfig({ ...textToSpeechConfig, voice: String(value.value), diff --git a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx index 4c5db2251312b..72d617c3c371c 100644 --- a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx +++ b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx @@ -41,6 +41,7 @@ const TextToSpeech: FC = () => { )} diff --git a/web/app/components/app/text-generate/item/index.tsx b/web/app/components/app/text-generate/item/index.tsx index 3313c987c9343..9794967d9d1f6 100644 --- a/web/app/components/app/text-generate/item/index.tsx +++ b/web/app/components/app/text-generate/item/index.tsx @@ -29,6 +29,7 @@ import { useStore as useAppStore } from '@/app/components/app/store' import WorkflowProcessItem from '@/app/components/base/chat/chat/answer/workflow-process' import type { WorkflowProcess } from '@/app/components/base/chat/types' import type { SiteInfo } from '@/models/share' +import { useChatContext } from '@/app/components/base/chat/chat/context' const MAX_DEPTH = 3 @@ -127,6 +128,10 @@ const GenerationItem: FC = ({ const [childFeedback, setChildFeedback] = useState({ rating: null, }) + const { + config, + } = useChatContext() + const setCurrentLogItem = useAppStore(s => s.setCurrentLogItem) const setShowPromptLogModal = useAppStore(s => s.setShowPromptLogModal) @@ -430,6 +435,7 @@ const GenerationItem: FC = ({ )} diff --git a/web/app/components/base/audio-btn/audio.player.manager.ts b/web/app/components/base/audio-btn/audio.player.manager.ts index 03e9e21f93f6a..17d92f8dc25f7 100644 --- a/web/app/components/base/audio-btn/audio.player.manager.ts +++ b/web/app/components/base/audio-btn/audio.player.manager.ts @@ -41,7 +41,7 @@ export class AudioPlayerManager { } this.msgId = id - this.audioPlayers = new AudioPlayer(url, isPublic, id, msgContent, callback) + this.audioPlayers = new AudioPlayer(url, isPublic, id, msgContent, voice, callback) return this.audioPlayers } } diff --git a/web/app/components/base/audio-btn/audio.ts b/web/app/components/base/audio-btn/audio.ts index 638626bf8ac69..a61fd085d44c4 100644 --- a/web/app/components/base/audio-btn/audio.ts +++ b/web/app/components/base/audio-btn/audio.ts @@ -23,12 +23,13 @@ export default class AudioPlayer { isPublic: boolean callback: ((event: string) => {}) | null - constructor(streamUrl: string, isPublic: boolean, msgId: string | undefined, msgContent: string | null | undefined, callback: ((event: string) => {}) | null) { + constructor(streamUrl: string, isPublic: boolean, msgId: string | undefined, msgContent: string | null | undefined, voice: string | undefined, callback: ((event: string) => {}) | null) { this.audioContext = new AudioContext() this.msgId = msgId this.msgContent = msgContent this.url = streamUrl this.isPublic = isPublic + this.voice = voice this.callback = callback // Compatible with iphone ios17 ManagedMediaSource @@ -154,7 +155,6 @@ export default class AudioPlayer { this.mediaSource?.endOfStream() clearInterval(endTimer) } - console.log('finishStream endOfStream endTimer') }, 10) } @@ -169,7 +169,6 @@ export default class AudioPlayer { const arrayBuffer = this.cacheBuffers.shift()! this.sourceBuffer?.appendBuffer(arrayBuffer) } - console.log('finishStream timer') }, 10) } diff --git a/web/app/components/base/audio-btn/index.tsx b/web/app/components/base/audio-btn/index.tsx index 48081c170c6d2..675f58b53039c 100644 --- a/web/app/components/base/audio-btn/index.tsx +++ b/web/app/components/base/audio-btn/index.tsx @@ -65,11 +65,11 @@ const AudioBtn = ({ } const handleToggle = async () => { if (audioState === 'playing' || audioState === 'loading') { - setAudioState('paused') + setTimeout(() => setAudioState('paused'), 1) AudioPlayerManager.getInstance().getAudioPlayer(url, isPublic, id, value, voice, audio_finished_call).pauseAudio() } else { - setAudioState('loading') + setTimeout(() => setAudioState('loading'), 1) AudioPlayerManager.getInstance().getAudioPlayer(url, isPublic, id, value, voice, audio_finished_call).playAudio() } } diff --git a/web/app/components/base/chat/chat/answer/operation.tsx b/web/app/components/base/chat/chat/answer/operation.tsx index d46aa3437596b..3d52477d0bbe4 100644 --- a/web/app/components/base/chat/chat/answer/operation.tsx +++ b/web/app/components/base/chat/chat/answer/operation.tsx @@ -125,6 +125,7 @@ const Operation: FC = ({ id={id} value={content} noCache={false} + voice={config?.text_to_speech?.voice} className='hidden group-hover:block' /> diff --git a/web/app/components/base/features/feature-panel/text-to-speech/param-config-content.tsx b/web/app/components/base/features/feature-panel/text-to-speech/param-config-content.tsx index ea1d789d0a129..a5a2eb7bb70e3 100644 --- a/web/app/components/base/features/feature-panel/text-to-speech/param-config-content.tsx +++ b/web/app/components/base/features/feature-panel/text-to-speech/param-config-content.tsx @@ -149,7 +149,7 @@ const VoiceParamConfig = ({
{t('appDebug.voice.voiceSettings.voice')}
{ handleChange({