@@ -2,7 +2,6 @@ import type { VoiceInfo } from 'microsoft-cognitiveservices-speech-sdk'
2
2
import {
3
3
AudioConfig ,
4
4
CancellationErrorCode ,
5
- ResultReason ,
6
5
SpeakerAudioDestination ,
7
6
SpeechConfig ,
8
7
SpeechRecognizer ,
@@ -52,6 +51,7 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
52
51
53
52
// const isFetchAllVoices = ref(false) // 是否在请求所有语音列表
54
53
const rate = ref ( 1 ) // 语速 (0,2]
54
+ const style = ref ( 'Neural' ) // 情感
55
55
56
56
let mediaRecorder : MediaRecorder | null
57
57
const chunks : Blob [ ] = [ ]
@@ -61,35 +61,15 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
61
61
62
62
const recognizer = ref < SpeechRecognizer > ( new SpeechRecognizer ( speechConfig . value ) )
63
63
const synthesizer = ref < SpeechSynthesizer > ( new SpeechSynthesizer ( speechConfig . value ) )
64
-
65
64
// 引入变量,触发 SpeechSynthesizer 实例的重新创建
66
65
const count = ref ( 0 )
67
-
66
+ const player = ref ( new SpeakerAudioDestination ( ) )
68
67
watch ( [ language , voiceName , count , azureKey , azureRegion , ttsPassword ] , ( [ lang , voice ] ) => {
69
68
speechConfig . value = SpeechConfig . fromSubscription ( resultAzureKey . value , resultAzureRegion . value )
70
69
speechConfig . value . speechRecognitionLanguage = lang
71
70
speechConfig . value . speechSynthesisLanguage = lang
72
71
speechConfig . value . speechSynthesisVoiceName = voice
73
72
console . log ( lang , voice )
74
-
75
- // 通过playback结束事件来判断播放结束
76
- const player = new SpeakerAudioDestination ( )
77
- player . onAudioStart = function ( _ ) {
78
- if ( isSynthesError . value ) return
79
- isPlaying . value = true
80
- isPlayend . value = false
81
- console . log ( 'playback started' )
82
- }
83
- player . onAudioEnd = function ( _ ) {
84
- console . log ( 'playback finished' )
85
- isPlaying . value = false
86
- isPlayend . value = true
87
- }
88
-
89
- const audioConfig = AudioConfig . fromDefaultMicrophoneInput ( )
90
- const audioConfiga = AudioConfig . fromSpeakerOutput ( player )
91
- recognizer . value = new SpeechRecognizer ( speechConfig . value , audioConfig )
92
- synthesizer . value = new SpeechSynthesizer ( speechConfig . value , audioConfiga )
93
73
} , {
94
74
immediate : true ,
95
75
} )
@@ -103,6 +83,7 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
103
83
mediaRecorder = new MediaRecorder ( stream )
104
84
105
85
mediaRecorder . ondataavailable = ( e ) => {
86
+ console . log ( chunks , 'c' )
106
87
chunks . push ( e . data )
107
88
}
108
89
@@ -117,6 +98,9 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
117
98
}
118
99
119
100
const startRecognizeSpeech = async ( cb ?: ( text : string ) => void ) => {
101
+ const audioConfig = AudioConfig . fromDefaultMicrophoneInput ( )
102
+ recognizer . value = new SpeechRecognizer ( speechConfig . value , audioConfig )
103
+
120
104
isRecognizReadying . value = true
121
105
122
106
recognizer . value . canceled = ( ) => {
@@ -143,7 +127,6 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
143
127
isRecognizReadying . value = false
144
128
isRecognizing . value = false
145
129
}
146
-
147
130
recognizer . value . startContinuousRecognitionAsync ( async ( ) => {
148
131
await audioRecorder ( )
149
132
isRecognizing . value = true
@@ -160,8 +143,7 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
160
143
161
144
// 停止语音识别
162
145
const stopRecognizeSpeech = ( ) : Promise < void > => {
163
- mediaRecorder ! . stop ( )
164
-
146
+ mediaRecorder ?. stop ( )
165
147
isRecognizReadying . value = false
166
148
return new Promise ( ( resolve , reject ) => {
167
149
recognizer . value . stopContinuousRecognitionAsync ( ( ) => {
@@ -211,18 +193,23 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
211
193
} )
212
194
}
213
195
214
- const ssmlToSpeak = async ( text : string , { voice, voiceRate, lang } : { voice ?: string ; voiceRate ?: number ; lang ?: string } = { } ) => {
196
+ const ssmlToSpeak = async ( text : string , { voice, voiceRate, lang, voiceStyle } : { voice ?: string ; voiceRate ?: number ; lang ?: string ; voiceStyle ?: string } = { } ) => {
197
+ applySynthesizerConfiguration ( )
198
+
215
199
isSynthesizing . value = true
216
200
isSynthesError . value = false
217
201
const targetLang = lang || speechConfig . value . speechSynthesisLanguage
218
202
const targetVoice = voice || speechConfig . value . speechSynthesisVoiceName
219
203
const targetRate = voiceRate || rate . value
204
+ const targetFeel = voiceStyle || style . value
220
205
221
206
const ssml = `
222
- <speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="${ targetLang } ">
207
+ <speak version="1.0" xmlns:mstts="https://www.w3.org/2001/mstts" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="${ targetLang } ">
223
208
<voice name="${ targetVoice } ">
224
209
<prosody rate="${ targetRate } ">
225
- ${ text }
210
+ <mstts:express-as style="${ targetFeel } " styledegree="1.5">
211
+ ${ text }
212
+ </mstts:express-as>
226
213
</prosody>
227
214
</voice>
228
215
</speak>`
@@ -274,6 +261,25 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
274
261
return res . voices
275
262
}
276
263
264
+ function applySynthesizerConfiguration ( ) {
265
+ // 通过playback结束事件来判断播放结束
266
+ player . value = new SpeakerAudioDestination ( )
267
+ player . value . onAudioStart = function ( _ ) {
268
+ if ( isSynthesError . value ) return
269
+ isPlaying . value = true
270
+ isPlayend . value = false
271
+ console . log ( 'playback started.....' )
272
+ }
273
+ player . value . onAudioEnd = function ( _ ) {
274
+ console . log ( 'playback finished....' )
275
+ isPlaying . value = false
276
+ isPlayend . value = true
277
+ }
278
+
279
+ const speakConfig = AudioConfig . fromSpeakerOutput ( player . value )
280
+ synthesizer . value = new SpeechSynthesizer ( speechConfig . value , speakConfig )
281
+ }
282
+
277
283
return {
278
284
languages,
279
285
language,
@@ -292,6 +298,8 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
292
298
allVoices,
293
299
isSynthesizing,
294
300
rate,
301
+ style,
295
302
audioBlob,
303
+ player,
296
304
}
297
305
}
0 commit comments