From e3dae0b5a6f7f31e361bd58a89eacaace37519d9 Mon Sep 17 00:00:00 2001 From: Jing <42014615+jing332@users.noreply.github.com> Date: Tue, 11 Oct 2022 18:07:29 +0800 Subject: [PATCH 001/925] Update CHANGELOG.md --- CHANGELOG.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 43f3b0611..748aa9ce2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ -1. 🥰 新增Creation接口(来自微软Speech Studio Demo),本接口基本与Azure相同,且服务器在东南亚,延迟低、更稳定。 -打开网页版(Azure),开启测试按钮上方的 "使用Creation接口" 选项以使用。 -2. 根据日志等级上色。 -3. 新的桌面快捷方式图标。 -4. 长按快捷开关(Android7+)自动跳转到APP +1. 网页版全部接口支持语言选择。 +2. 网页版添加顶部导航栏。 +3. 网页版Creation和Azure加载声音数据使用缓存(有效期1小时)。 +4. Creation: 修复文本长度超300导致跳段。 +5. Creation: 支持volume(需手动编辑朗读url)。 +6. 解决朗读文本中包含反斜杠"\"所导致跳段问题。(需重新网络导入) +7. 去除Go库的符号文件、调试信息以减小APP大小。 From 8fcf22f029124226d3fdd3176e62ff826f9d0f24 Mon Sep 17 00:00:00 2001 From: Jing <42014615+jing332@users.noreply.github.com> Date: Thu, 13 Oct 2022 18:52:31 +0800 Subject: [PATCH 002/925] =?UTF-8?q?=E9=80=82=E9=85=8DA13(SDK33)=E9=80=9A?= =?UTF-8?q?=E7=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/src/main/AndroidManifest.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 96cdd9303..dbd9b9977 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -8,6 +8,7 @@ + - \ No newline at end of file + From 99aff1a051bb3ec75c346044544e5be946830f99 Mon Sep 17 00:00:00 2001 From: Jing <42014615+jing332@users.noreply.github.com> Date: Wed, 19 Oct 2022 15:19:20 +0800 Subject: [PATCH 003/925] Update test.yml --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d88c958d1..fe58a3e17 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,7 @@ on: push: branches: - "master" + - "dev" paths-ignore: - "README.md" - "CHANGELOG.md" @@ -85,4 +86,4 @@ jobs: uses: actions/upload-artifact@v3 with: name: TTS-Server_${{ env.ver_name }} - path: ${{ github.workspace }}/apk/*.apk \ No newline at end of file + path: ${{ github.workspace }}/apk/*.apk From 426aacc71bb44c971f6fe17b595f18087410547a Mon Sep 17 00:00:00 2001 From: Jing <42014615+jing332@users.noreply.github.com> Date: Tue, 11 Oct 2022 17:41:27 +0800 Subject: [PATCH 004/925] ttts --- .../jing332/tts_server_android/GoLibTest.kt | 32 ++ app/src/main/AndroidManifest.xml | 23 ++ .../service/tts/ByteArrayMediaDataSource.kt | 34 ++ .../service/tts/CheckVoiceData.kt | 27 ++ .../service/tts/TtsFormatManger.kt | 34 ++ .../service/tts/TtsOutputFormat.kt | 50 +++ .../service/tts/TtsService.kt | 290 ++++++++++++++++++ app/src/main/res/xml/tts_engine.xml | 5 + tts-server-lib/{gomobile.go => libserver.go} | 5 +- tts-server-lib/libtts.go | 33 ++ 10 files changed, 529 insertions(+), 4 deletions(-) create mode 100644 app/src/androidTest/java/com/github/jing332/tts_server_android/GoLibTest.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/ByteArrayMediaDataSource.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/CheckVoiceData.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt create mode 100644 app/src/main/res/xml/tts_engine.xml rename tts-server-lib/{gomobile.go => libserver.go} (98%) create mode 100644 tts-server-lib/libtts.go diff --git a/app/src/androidTest/java/com/github/jing332/tts_server_android/GoLibTest.kt b/app/src/androidTest/java/com/github/jing332/tts_server_android/GoLibTest.kt new file mode 100644 index 000000000..a2856f4b2 --- /dev/null +++ b/app/src/androidTest/java/com/github/jing332/tts_server_android/GoLibTest.kt @@ -0,0 +1,32 @@ +package com.github.jing332.tts_server_android + +import android.util.Log +import androidx.test.ext.junit.runners.AndroidJUnit4 +import org.junit.Test +import org.junit.runner.RunWith +import java.lang.Exception + +@RunWith(AndroidJUnit4::class) +class GoLibTest { + @Test + fun goLib() { + val arg = tts_server_lib.CreationArg() + arg.text = "test" + arg.voiceName = "en-US-AIGenerate1Neural" + arg.voiceId = "5120f8b71-e1cc-4e80-b9ea-006d2f816864" + arg.style = "general" + arg.styleDegree = "1.0" + arg.role = "default" + arg.volume = "0%" + arg.format = "audio-16khz-32kbitrate-mono-mp3" + + try { + val audio = tts_server_lib.Tts_server_lib.getCreationAudio(arg) + Log.e("TestGo", audio.contentToString()) + } catch (e: Exception) { + Log.e("TestGo", e.message.toString()) + e.printStackTrace() + } + + } +} \ No newline at end of file diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index dbd9b9977..7181302ba 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -38,6 +38,29 @@ android:name=".ui.ScSwitchActivity" android:theme="@android:style/Theme.NoDisplay" /> + + + + + + + + + + + + + + + = data!!.size) { + return -1 + } + val endPosition = (position + size).toInt() + var size2 = size + if (endPosition > data!!.size) { + size2 -= endPosition - data!!.size + } + System.arraycopy(data, position.toInt(), buffer, offset, size2) + return size2 + } + + override fun getSize(): Long { + return data!!.size.toLong() + } + + override fun close() { + data = null + } + + init { + this.data = data + } +} \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/CheckVoiceData.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/CheckVoiceData.kt new file mode 100644 index 000000000..1295c2de8 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/CheckVoiceData.kt @@ -0,0 +1,27 @@ +package com.github.jing332.tts_server_android.service.tts + +import android.app.Activity +import android.content.Intent +import android.os.Bundle +import android.speech.tts.TextToSpeech +import android.util.Log +import android.widget.Toast + +class CheckVoiceData : Activity() { + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + val result = TextToSpeech.Engine.CHECK_VOICE_DATA_PASS + val returnData = Intent() + + val available: ArrayList = arrayListOf("zho-CHN") + val unavailable: ArrayList = arrayListOf() + + returnData.putStringArrayListExtra(TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES, available) + returnData.putStringArrayListExtra( + TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, + unavailable + ) + setResult(result, returnData) + finish() + } +} \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt new file mode 100644 index 000000000..65915e4c1 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt @@ -0,0 +1,34 @@ +package com.github.jing332.tts_server_android.service.tts + +import android.media.AudioFormat + +object TtsFormatManger { + val formats = arrayListOf() + + init { + formats.add( + TtsOutputFormat( + "audio-24khz-48kbitrate-mono-mp3", + 24000, + AudioFormat.ENCODING_PCM_16BIT + ) + ) + + formats.add( + TtsOutputFormat( + "audio-16khz-32kbitrate-mono-mp3", + 16000, + AudioFormat.ENCODING_PCM_16BIT + ) + ) + } + + fun getFormat(name: String): TtsOutputFormat? { + formats.forEach { v -> + if (v.name.equals(name)) { + return v + } + } + return null + } +} \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt new file mode 100644 index 000000000..5d49edd28 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt @@ -0,0 +1,50 @@ +package com.github.jing332.tts_server_android.service.tts + +class TtsOutputFormat { + val name: String + val value: String + val HZ: Int + val BitRate: Byte + + /** + * 是否需要解码 + */ + private var needDecode = false + + constructor(name: String, hz: Int, bitRate: Int) { + this.name = name + value = name + HZ = hz + BitRate = bitRate.toByte() + } + + constructor(name: String, hz: Int, bitRate: Int, needDecode: Boolean) { + this.name = name + if (name.contains(TAG)) { + value = name.substring(TAG.length) + } else { + value = name + } + HZ = hz + BitRate = bitRate.toByte() + this.needDecode = needDecode + } + + fun setNeedDecode(needDecode: Boolean) { + this.needDecode = needDecode + } + + override fun toString(): String { + return "TtsOutputFormat{" + + "name='" + name + '\'' + + ", value='" + value + '\'' + + ", HZ=" + HZ + + ", BitRate=" + BitRate + + ", needDecode=" + needDecode + + '}' + } + + companion object { + const val TAG = "\uD83D\uDC96" + } +} \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt new file mode 100644 index 000000000..2a8de2c86 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt @@ -0,0 +1,290 @@ +package com.github.jing332.tts_server_android.service.tts + +import android.media.AudioFormat +import android.media.MediaCodec +import android.media.MediaExtractor +import android.media.MediaFormat +import android.os.Build +import android.speech.tts.SynthesisCallback +import android.speech.tts.SynthesisRequest +import android.speech.tts.TextToSpeech +import android.speech.tts.TextToSpeechService +import android.text.TextUtils +import android.util.Log +import android.widget.Toast +import okio.ByteString +import okio.ByteString.Companion.toByteString +import java.io.IOException +import java.nio.ByteBuffer +import java.util.* + + +class TtsService : TextToSpeechService() { + private val TAG = "TtsService" + private val currentLanguage: MutableList = mutableListOf("zho", "CHN", "") + + override fun onIsLanguageAvailable(lang: String?, country: String?, variant: String?): Int { + return if (Locale.SIMPLIFIED_CHINESE.isO3Language == lang || Locale.US.isO3Language == lang) { + if (Locale.SIMPLIFIED_CHINESE.isO3Country == country || Locale.US.isO3Country == country) TextToSpeech.LANG_COUNTRY_AVAILABLE else TextToSpeech.LANG_AVAILABLE + } else TextToSpeech.LANG_NOT_SUPPORTED + } + + override fun onGetLanguage(): Array { + Log.i(TAG, "onGetLanguage: ${currentLanguage.toTypedArray()}") + return currentLanguage.toTypedArray() + } + + override fun onLoadLanguage(lang: String?, country: String?, variant: String?): Int { + val result = onIsLanguageAvailable(lang, country, variant) + Log.i(TAG, "onLoadLanguage ret: $result, $lang, $country, $variant") + currentLanguage.clear() + currentLanguage.addAll( + mutableListOf( + lang.toString(), + country.toString(), + variant.toString() + ) + ) + return result + } + + override fun onStop() { + Log.e("TTS", "onStop") + } + + override fun onSynthesizeText(request: SynthesisRequest?, callback: SynthesisCallback?) { + val arg = tts_server_lib.CreationArg() + arg.text = request?.charSequenceText.toString() + val rate = request?.speechRate?.toFloat() + + arg.voiceName = "zh-CN-XiaoxiaoNeural" + arg.voiceId = "5f55541d-c844-4e04-a7f8-1723ffbea4a9" + arg.style = "general" + arg.styleDegree = "1.0" + arg.role = "default" + if (rate != null) { + arg.rate = "${(rate - 20 * 2)}%" + } + arg.volume = "0%" + arg.format = "audio-24khz-48kbitrate-mono-mp3" + val format = TtsFormatManger.getFormat(arg.format) + if (format == null) { + Log.e(TAG, "不支持解码此格式: ${arg.format}") + return + } + Log.e(TAG, "${arg.rate}") + + callback?.start(format.HZ, format.BitRate.toInt(), 1) + try { + val audio = tts_server_lib.Tts_server_lib.getCreationAudio(arg) + Log.e(TAG, "获取成功, size: ${audio.size}") + + doDecode(callback!!, "", audio) + } catch (e: Exception) { + e.printStackTrace() + } + } + + var isSynthesizing = false + private val currentMime: String? = null + private var mediaCodec: MediaCodec? = null + private var oldMime: String? = null + + /** + * 根据mime创建MediaCodec + * 当Mime未变化时复用MediaCodec + * + * @param mime mime + * @return MediaCodec + */ + private fun getMediaCodec(mime: String, mediaFormat: MediaFormat): MediaCodec { + if (mediaCodec == null || mime != oldMime) { + if (null != mediaCodec) { + mediaCodec!!.release() +// GcManger.getInstance().doGC() + } + try { + mediaCodec = MediaCodec.createDecoderByType(mime) + oldMime = mime + } catch (ioException: IOException) { + //设备无法创建,直接抛出 + ioException.printStackTrace() + throw RuntimeException(ioException) + } + } + mediaCodec!!.reset() + mediaCodec!!.configure(mediaFormat, null, null, 0) + return mediaCodec as MediaCodec + } + + + @Synchronized + private fun doDecode(cb: SynthesisCallback, format: String, data: ByteArray) { + isSynthesizing = true + try { + val mediaExtractor = MediaExtractor() + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + //在高版本上使用自定义MediaDataSource + mediaExtractor.setDataSource(ByteArrayMediaDataSource(data)) + } else { + //在低版本上使用Base64音频数据 + mediaExtractor.setDataSource( + "data:" + currentMime.toString() + ";base64," + data.toByteString().base64() + ) + } + + //找到音频流的索引 + var audioTrackIndex = -1 + var mime: String? = null + var trackFormat: MediaFormat? = null + for (i in 0 until mediaExtractor.trackCount) { + trackFormat = mediaExtractor.getTrackFormat(i) + mime = trackFormat.getString(MediaFormat.KEY_MIME) + if (!TextUtils.isEmpty(mime) && mime!!.startsWith("audio")) { + audioTrackIndex = i + Log.d(TAG, "找到音频流的索引为:$audioTrackIndex") + Log.d(TAG, "找到音频流的mime为:$mime") + break + } + } + //没有找到音频流的情况下 + if (audioTrackIndex == -1) { + Log.e(TAG, "initAudioDecoder: 没有找到音频流") +// updateNotification("TTS服务-错误中", "没有找到音频流") + cb.done() + isSynthesizing = false + return + } + + //Log.e("Track", trackFormat.toString()); + + + //opus的音频必须设置这个才能正确的解码 + /* if ("audio/opus" == mime) { + //Log.d(TAG, ByteString.of(trackFormat.getByteBuffer("csd-0")).hex()); + val buf = Buffer() + // Magic Signature:固定头,占8个字节,为字符串OpusHead + buf.write("OpusHead".getBytes(StandardCharsets.UTF_8)) + // Version:版本号,占1字节,固定为0x01 + buf.writeByte(1) + // Channel Count:通道数,占1字节,根据音频流通道自行设置,如0x02 + buf.writeByte(1) + // Pre-skip:回放的时候从解码器中丢弃的samples数量,占2字节,为小端模式,默认设置0x00, + buf.writeShortLe(0) + // Input Sample Rate (Hz):音频流的Sample Rate,占4字节,为小端模式,根据实际情况自行设置 + buf.writeIntLe(currentFormat.HZ) + //Output Gain:输出增益,占2字节,为小端模式,没有用到默认设置0x00, 0x00就好 + buf.writeShortLe(0) + // Channel Mapping Family:通道映射系列,占1字节,默认设置0x00就好 + buf.writeByte(0) + //Channel Mapping Table:可选参数,上面的Family默认设置0x00的时候可忽略 + if (BuildConfig.DEBUG) { + Log.e(TAG, + trackFormat!!.getByteBuffer("csd-1")!! + .order(ByteOrder.nativeOrder()).long.toString() + "" + ) + Log.e(TAG, + trackFormat.getByteBuffer("csd-2")!! + .order(ByteOrder.nativeOrder()).long.toString() + "" + ) + Log.e(TAG, ByteString.of(*trackFormat.getByteBuffer("csd-2")!!.array()).hex()) + } + val csd1bytes = byteArrayOf(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) + val csd2bytes = byteArrayOf(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) + val hd: ByteString = buf.readByteString() + val csd0: ByteBuffer = ByteBuffer.wrap(hd.toByteArray()) + trackFormat!!.setByteBuffer("csd-0", csd0) + val csd1: ByteBuffer = ByteBuffer.wrap(csd1bytes) + trackFormat.setByteBuffer("csd-1", csd1) + val csd2: ByteBuffer = ByteBuffer.wrap(csd2bytes) + trackFormat.setByteBuffer("csd-2", csd2) + }*/ + + //选择此音轨 + mediaExtractor.selectTrack(audioTrackIndex) + + //创建解码器 + val mediaCodec: MediaCodec = + getMediaCodec( + mime.toString(), + trackFormat!! + ) //MediaCodec.createDecoderByType(mime); + mediaCodec.start() + val bufferInfo = MediaCodec.BufferInfo() + var inputBuffer: ByteBuffer? + val TIME_OUT_US: Long = 10000 + while (isSynthesizing) { + //获取可用的inputBuffer,输入参数-1代表一直等到,0代表不等待,10*1000代表10秒超时 + //超时时间10秒 + val inputIndex = mediaCodec.dequeueInputBuffer(TIME_OUT_US) + if (inputIndex < 0) { + break + } + bufferInfo.presentationTimeUs = mediaExtractor.sampleTime + //bufferInfo.flags=mediaExtractor.getSampleFlags(); + inputBuffer = mediaCodec.getInputBuffer(inputIndex) + if (inputBuffer != null) { + inputBuffer.clear() + } else { + continue + } + //从流中读取的采用数据的大小 + val sampleSize = mediaExtractor.readSampleData(inputBuffer, 0) + if (sampleSize > 0) { + bufferInfo.size = sampleSize + //入队解码 + mediaCodec.queueInputBuffer(inputIndex, 0, sampleSize, 0, 0) + //移动到下一个采样点 + mediaExtractor.advance() + } else { + break + } + + //取解码后的数据 + var outputIndex = mediaCodec.dequeueOutputBuffer(bufferInfo, TIME_OUT_US) + //不一定能一次取完,所以要循环取 + var outputBuffer: ByteBuffer? + var pcmData: ByteArray + while (outputIndex >= 0) { + outputBuffer = mediaCodec.getOutputBuffer(outputIndex) + pcmData = ByteArray(bufferInfo.size) + if (outputBuffer != null) { + outputBuffer.get(pcmData) + outputBuffer.clear() //用完后清空,复用 + } + cb.audioAvailable(pcmData, 0, bufferInfo.size) + //释放 + mediaCodec.releaseOutputBuffer(outputIndex, false) + //再次获取数据 + outputIndex = mediaCodec.dequeueOutputBuffer(bufferInfo, TIME_OUT_US) + } + } + mediaCodec.reset() + cb.done() + isSynthesizing = false + } catch (e: Exception) { + Log.e(TAG, "doDecode", e) + cb.error() + isSynthesizing = false + //GcManger.getInstance().doGC(); + } + } + + + @Synchronized + private fun doUnDecode(cb: SynthesisCallback, format: String, data: ByteString) { + isSynthesizing = true + val length: Int = data.toByteArray().size + //最大BufferSize + val maxBufferSize = cb.maxBufferSize + var offset = 0 + while (offset < length && isSynthesizing) { + val bytesToWrite = Math.min(maxBufferSize, length - offset) + cb.audioAvailable(data.toByteArray(), offset, bytesToWrite) + offset += bytesToWrite + } + cb.done() + isSynthesizing = false + } + +} \ No newline at end of file diff --git a/app/src/main/res/xml/tts_engine.xml b/app/src/main/res/xml/tts_engine.xml new file mode 100644 index 000000000..e30b156b1 --- /dev/null +++ b/app/src/main/res/xml/tts_engine.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/tts-server-lib/gomobile.go b/tts-server-lib/libserver.go similarity index 98% rename from tts-server-lib/gomobile.go rename to tts-server-lib/libserver.go index b1c1cd08c..1cda5c891 100644 --- a/tts-server-lib/gomobile.go +++ b/tts-server-lib/libserver.go @@ -1,15 +1,12 @@ package tts_server_lib import ( + "github.com/jing332/tts-server-go/service" log "github.com/sirupsen/logrus" "io" "time" ) -import ( - "github.com/jing332/tts-server-go/service" -) - type LogCallback interface { Log(level int32, msg string) } diff --git a/tts-server-lib/libtts.go b/tts-server-lib/libtts.go new file mode 100644 index 000000000..230905426 --- /dev/null +++ b/tts-server-lib/libtts.go @@ -0,0 +1,33 @@ +package tts_server_lib + +import ( + "github.com/jing332/tts-server-go/service/creation" +) + +type CreationArg creation.SpeakArg + +var creationApi *creation.Creation + +func GetCreationAudio(arg *CreationArg) ([]byte, error) { + if creationApi == nil { + creationApi = &creation.Creation{} + } + + s := creation.SpeakArg(*arg) + audio, err := creationApi.GetAudio(&s) + if err != nil { + return nil, err + } + return audio, nil +} +func GetCreationVoices() ([]byte, error) { + token, err := creation.GetToken() + if err != nil { + return nil, err + } + data, err := creation.GetVoices(token) + if err != nil { + return nil, err + } + return data, nil +} From 58a255fca6732254efe94481e19aa06b442fa18e Mon Sep 17 00:00:00 2001 From: Jing <42014615+jing332@users.noreply.github.com> Date: Sun, 16 Oct 2022 16:06:46 +0800 Subject: [PATCH 005/925] =?UTF-8?q?=E5=88=9D=E6=AD=A5=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E7=B3=BB=E7=BB=9FTTS:=20Creation=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/gradle.xml | 1 - .idea/misc.xml | 5 + app/build.gradle | 26 ++- app/src/main/AndroidManifest.xml | 25 +- .../service/tts/TtsConfig.kt | 75 ++++++ .../service/tts/TtsFormatManger.kt | 71 +++++- .../service/tts/TtsOutputFormat.kt | 62 +++-- .../service/tts/TtsService.kt | 158 ++++++++++--- .../service/tts/data/CreationVoicesItem.kt | 76 ++++++ .../service/tts/data/EdgeVoicesItem.kt | 22 ++ .../tts_server_android/ui/MainActivity.kt | 2 +- .../ui/TtsSettingsActivity.kt | 220 ++++++++++++++++++ .../tts_server_android/utils/GcManager.kt | 21 ++ .../tts_server_android/utils/NormUtil.kt | 25 ++ .../utils/SharedPrefsUtils.kt | 23 +- .../main/res/layout/activity_tts_settings.xml | 131 +++++++++++ app/src/main/res/values/strings.xml | 8 + app/src/main/res/xml/tts_engine.xml | 2 +- build.gradle | 1 + tts-server-lib/go.mod | 2 +- tts-server-lib/go.sum | 4 +- tts-server-lib/httpTools.go | 25 ++ tts-server-lib/libserver.go | 6 +- tts-server-lib/libtts.go | 31 ++- 24 files changed, 911 insertions(+), 111 deletions(-) create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsConfig.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/CreationVoicesItem.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/EdgeVoicesItem.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/ui/TtsSettingsActivity.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/utils/GcManager.kt create mode 100644 app/src/main/java/com/github/jing332/tts_server_android/utils/NormUtil.kt create mode 100644 app/src/main/res/layout/activity_tts_settings.xml create mode 100644 tts-server-lib/httpTools.go diff --git a/.idea/gradle.xml b/.idea/gradle.xml index 526b4c25c..a2d7c2133 100644 --- a/.idea/gradle.xml +++ b/.idea/gradle.xml @@ -13,7 +13,6 @@ - diff --git a/.idea/misc.xml b/.idea/misc.xml index 48719d61d..d24ec87e5 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -13,8 +13,13 @@ + + + + + diff --git a/app/build.gradle b/app/build.gradle index 454e3b866..2ac0f2e36 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -1,7 +1,8 @@ plugins { id 'com.android.application' id 'org.jetbrains.kotlin.android' -// id 'kotlin-kapt' + id 'kotlinx-serialization' + id 'kotlin-kapt' } android { @@ -47,8 +48,6 @@ android { } } - - compileOptions { coreLibraryDesugaringEnabled true sourceCompatibility JavaVersion.VERSION_11 @@ -66,28 +65,31 @@ android { outputFileName = "TTS-Server-v${variant.versionName}.apk" } } + + buildFeatures { + viewBinding true + } } dependencies { coreLibraryDesugaring('com.android.tools:desugar_jdk_libs:1.1.6') implementation fileTree(include: ['*.jar', '*.aar'], dir: 'libs') - implementation 'androidx.core:core-ktx:1.7.0' + implementation 'androidx.core:core-ktx:1.8.0' //UI - implementation 'androidx.appcompat:appcompat:1.3.0' - implementation 'com.google.android.material:material:1.4.0' - implementation 'androidx.constraintlayout:constraintlayout:2.0.4' + implementation 'androidx.appcompat:appcompat:1.5.1' + implementation 'com.google.android.material:material:1.6.1' + implementation 'androidx.constraintlayout:constraintlayout:2.1.4' implementation 'com.squareup.okhttp3:okhttp:4.10.0' -// implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version") + implementation('org.jetbrains.kotlinx:kotlinx-serialization-json:1.4.1') + + implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.6.4' -// kapt('com.squareup.moshi:moshi-kotlin-codegen:1.14.0') -// implementation('com.squareup.moshi:moshi:1.14.0') +// implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version") -// implementation 'androidx.work:work-runtime-ktx:2.7.1' -// implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.3.9") testImplementation 'junit:junit:4.13.2' androidTestImplementation 'androidx.test.ext:junit:1.1.3' diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 7181302ba..62d905552 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -7,8 +7,8 @@ - - + + + android:launchMode="singleTop"> @@ -29,6 +28,7 @@ + @@ -48,10 +48,21 @@ + + + + + + + + + android:label="@string/app_name" + tools:ignore="ExportedService"> @@ -86,4 +97,4 @@ - + \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsConfig.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsConfig.kt new file mode 100644 index 000000000..562432f50 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsConfig.kt @@ -0,0 +1,75 @@ +package com.github.jing332.tts_server_android.service.tts + +import android.content.Context + +class TtsConfig( + var api: Int, + var locale: String, + var voiceName: String, + var voiceId: String, + var format: String, + var volume: Int +) { + constructor() : this( + TtsOutputFormat.API_CREATION, + "zh-CN", + "zh-CN-XiaoxiaoNeural", + "5f55541d-c844-4e04-a7f8-1723ffbea4a9", + "audio-24khz-48kbitrate-mono-mp3", + 50 + ) + + /* 转为百分比字符串 */ + fun volumeToPctString(): String { + return "${volume - 50}%" + } + + private lateinit var ctx: Context + + fun loadConfig(ctx: Context): TtsConfig { + this.ctx = ctx + api = getConfigInt("api", TtsOutputFormat.API_CREATION) + locale = getConfigString("locale", "zh-CN") + voiceName = getConfigString("voiceName", "zh-CN-XiaoxiaoNeural") + voiceId = getConfigString("voiceId", "5f55541d-c844-4e04-a7f8-1723ffbea4a9") + format = getConfigString("format", "audio-24khz-48kbitrate-mono-mp3") + volume = getConfigInt("volume", 50) + return this + } + + fun writeConfig(ctx: Context) { + this.ctx = ctx + setConfigInt("api", api) + setConfigString("locale", locale) + setConfigString("voiceName", voiceName) + setConfigString("voiceId", voiceId) + setConfigString("format", format) + setConfigInt("volume", volume) + } + + private fun getConfigString(key: String, default: String): String { + val pref = ctx.getSharedPreferences("tts_service", Context.MODE_PRIVATE) + return pref.getString(key, default).toString() + } + + private fun setConfigString(key: String, value: String) { + ctx.getSharedPreferences("tts_service", Context.MODE_PRIVATE).edit() + .putString(key, value) + .apply() + } + + private fun getConfigInt(key: String, default: Int): Int { + val pref = ctx.getSharedPreferences("tts_service", Context.MODE_PRIVATE) + return pref.getInt(key, default) + } + + private fun setConfigInt(key: String, value: Int) { + ctx.getSharedPreferences("tts_service", Context.MODE_PRIVATE).edit() + .putInt(key, value) + .apply() + } + + override fun toString(): String { + return "locale: $locale, voiceName: $voiceName, voiceId: $voiceId, format: $format, volume: $volume" + } +} \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt index 65915e4c1..0f8121359 100644 --- a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsFormatManger.kt @@ -8,27 +8,88 @@ object TtsFormatManger { init { formats.add( TtsOutputFormat( - "audio-24khz-48kbitrate-mono-mp3", + "webm-24khz-16bit-mono-opus", 24000, - AudioFormat.ENCODING_PCM_16BIT + AudioFormat.ENCODING_PCM_16BIT, + TtsOutputFormat.SupportedApi(isEdge = true, isAzure = true, isCreation = false), + true ) ) - formats.add( TtsOutputFormat( "audio-16khz-32kbitrate-mono-mp3", 16000, - AudioFormat.ENCODING_PCM_16BIT + AudioFormat.ENCODING_PCM_16BIT, + TtsOutputFormat.SupportedApi(isEdge = true, isAzure = true, isCreation = true), + true + ) + ) + formats.add( + TtsOutputFormat( + "audio-24khz-48kbitrate-mono-mp3", + 24000, + AudioFormat.ENCODING_PCM_16BIT, + TtsOutputFormat.SupportedApi(isEdge = true, isAzure = true, isCreation = true), true + ) + ) + formats.add( + TtsOutputFormat( + "audio-24khz-96kbitrate-mono-mp3", + 24000, + AudioFormat.ENCODING_PCM_16BIT, + TtsOutputFormat.SupportedApi(isEdge = true, isAzure = true, isCreation = true), true + ) + ) + + formats.add( + TtsOutputFormat( + "audio-48khz-96kbitrate-mono-mp3", + 48000, + AudioFormat.ENCODING_PCM_16BIT, + TtsOutputFormat.SupportedApi(isEdge = true, isAzure = true, isCreation = true), true ) ) } + /* 通过name查找格式Item */ fun getFormat(name: String): TtsOutputFormat? { formats.forEach { v -> - if (v.name.equals(name)) { + if (v.name == name) { return v } } return null } + + fun getAllFormatName(): ArrayList { + val list = arrayListOf() + formats.forEach { v -> + list.add(v.name) + } + return list + } + + fun getFormatsBySupportedApi(api: Int): ArrayList { + val list = arrayListOf() + formats.forEach { v -> + when (api) { + TtsOutputFormat.API_EDGE -> { + if (v.supportedApi.isEdge){ + list.add(v.value) + } + } + TtsOutputFormat.API_AZURE -> { + if (v.supportedApi.isAzure){ + list.add(v.value) + } + } + TtsOutputFormat.API_CREATION -> { + if (v.supportedApi.isCreation){ + list.add(v.value) + } + } + } + } + return list + } } \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt index 5d49edd28..05c085380 100644 --- a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsOutputFormat.kt @@ -1,50 +1,40 @@ package com.github.jing332.tts_server_android.service.tts -class TtsOutputFormat { - val name: String - val value: String - val HZ: Int - val BitRate: Byte - - /** - * 是否需要解码 - */ - private var needDecode = false - - constructor(name: String, hz: Int, bitRate: Int) { - this.name = name - value = name - HZ = hz - BitRate = bitRate.toByte() - } - - constructor(name: String, hz: Int, bitRate: Int, needDecode: Boolean) { - this.name = name - if (name.contains(TAG)) { - value = name.substring(TAG.length) - } else { - value = name - } - HZ = hz - BitRate = bitRate.toByte() - this.needDecode = needDecode - } - - fun setNeedDecode(needDecode: Boolean) { - this.needDecode = needDecode - } +class TtsOutputFormat( + val name: String, + val value: String, + val hz: Int, + val bitRate: Int, + val supportedApi: SupportedApi, + val needDecode: Boolean +) { + constructor( + name: String, + hz: Int, + bitRate: Int, + supportedApi: SupportedApi, + needDecode: Boolean + ) : this(name, name, hz, bitRate, supportedApi, needDecode) override fun toString(): String { return "TtsOutputFormat{" + "name='" + name + '\'' + ", value='" + value + '\'' + - ", HZ=" + HZ + - ", BitRate=" + BitRate + + ", hz=" + hz + + ", bitRate=" + bitRate + ", needDecode=" + needDecode + '}' } + class SupportedApi( + var isEdge: Boolean, + var isAzure: Boolean, + var isCreation: Boolean + ) + companion object { - const val TAG = "\uD83D\uDC96" + const val API_EDGE = 0 + const val API_AZURE = 1 + const val API_CREATION = 2 } } \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt index 2a8de2c86..5d88a79c2 100644 --- a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/TtsService.kt @@ -1,17 +1,29 @@ package com.github.jing332.tts_server_android.service.tts +import android.content.BroadcastReceiver +import android.content.Context +import android.content.Intent +import android.content.IntentFilter import android.media.AudioFormat import android.media.MediaCodec import android.media.MediaExtractor import android.media.MediaFormat import android.os.Build +import android.os.PowerManager +import android.os.SystemClock import android.speech.tts.SynthesisCallback import android.speech.tts.SynthesisRequest import android.speech.tts.TextToSpeech import android.speech.tts.TextToSpeechService import android.text.TextUtils import android.util.Log -import android.widget.Toast +import com.github.jing332.tts_server_android.ui.TtsSettingsActivity +import com.github.jing332.tts_server_android.utils.GcManger +import com.github.jing332.tts_server_android.utils.NormUtil +import kotlinx.coroutines.DelicateCoroutinesApi +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.delay +import kotlinx.coroutines.launch import okio.ByteString import okio.ByteString.Companion.toByteString import java.io.IOException @@ -20,9 +32,37 @@ import java.util.* class TtsService : TextToSpeechService() { - private val TAG = "TtsService" + companion object { + const val TAG = "TtsService" + } + private val currentLanguage: MutableList = mutableListOf("zho", "CHN", "") + lateinit var ttsConfig: TtsConfig + private val mReceiver: MyReceiver by lazy { return@lazy MyReceiver() } + private val mWakeLock by lazy { + val powerManager = getSystemService(POWER_SERVICE) as PowerManager + return@lazy powerManager.newWakeLock( + PowerManager.PARTIAL_WAKE_LOCK or PowerManager.ON_AFTER_RELEASE, + "tts-server:tts" + ) + } + + var isSynthesizing = false + + override fun onCreate() { + super.onCreate() + val intentFilter = IntentFilter(TtsSettingsActivity.ACTION_ON_CONFIG_CHANGED) + registerReceiver(mReceiver, intentFilter) + mWakeLock.acquire(60 * 20 * 100) + } + + override fun onDestroy() { + super.onDestroy() + unregisterReceiver(mReceiver) + mWakeLock.release() + } + override fun onIsLanguageAvailable(lang: String?, country: String?, variant: String?): Int { return if (Locale.SIMPLIFIED_CHINESE.isO3Language == lang || Locale.US.isO3Language == lang) { if (Locale.SIMPLIFIED_CHINESE.isO3Country == country || Locale.US.isO3Country == country) TextToSpeech.LANG_COUNTRY_AVAILABLE else TextToSpeech.LANG_AVAILABLE @@ -36,7 +76,7 @@ class TtsService : TextToSpeechService() { override fun onLoadLanguage(lang: String?, country: String?, variant: String?): Int { val result = onIsLanguageAvailable(lang, country, variant) - Log.i(TAG, "onLoadLanguage ret: $result, $lang, $country, $variant") +// Log.i(TAG, "onLoadLanguage ret: $result, $lang, $country, $variant") currentLanguage.clear() currentLanguage.addAll( mutableListOf( @@ -45,47 +85,108 @@ class TtsService : TextToSpeechService() { variant.toString() ) ) + + if (!this::ttsConfig.isInitialized) { + ttsConfig = TtsConfig().loadConfig(this) + } + return result } override fun onStop() { - Log.e("TTS", "onStop") + Log.d(TAG, "onStop") + isSynthesizing = false + } + + private val norm: NormUtil by lazy { + return@lazy NormUtil(500F, 0F, 100F, 0F) } + @OptIn(DelicateCoroutinesApi::class) override fun onSynthesizeText(request: SynthesisRequest?, callback: SynthesisCallback?) { + val startTime = SystemClock.elapsedRealtime() + synchronized(this) { + reNewWakeLock() + + val text = request?.charSequenceText.toString() + Log.d(TAG, "接收到文本: $text") + if (text.isEmpty()) { + Log.d(TAG, "文本为空,跳过") + callback!!.start( + 16000, + AudioFormat.ENCODING_PCM_16BIT, 1 + ) + callback.done() + return + } + + GlobalScope.launch { + while (isSynthesizing) { + try { + delay(100) + } catch (e: InterruptedException) { + e.printStackTrace() + } + val time = SystemClock.elapsedRealtime() - startTime + //超时15秒后跳过,保证长句不会被跳过 + if (time > 15000) { + callback!!.error(TextToSpeech.ERROR_NETWORK_TIMEOUT) + isSynthesizing = false + } + } + } + synthesizeText(request, callback) + } + } + + private fun synthesizeText(request: SynthesisRequest?, callback: SynthesisCallback?) { + val rate = request?.speechRate?.toFloat() val arg = tts_server_lib.CreationArg() arg.text = request?.charSequenceText.toString() - val rate = request?.speechRate?.toFloat() - - arg.voiceName = "zh-CN-XiaoxiaoNeural" - arg.voiceId = "5f55541d-c844-4e04-a7f8-1723ffbea4a9" + arg.voiceName = ttsConfig.voiceName + arg.voiceId = ttsConfig.voiceId arg.style = "general" arg.styleDegree = "1.0" arg.role = "default" - if (rate != null) { - arg.rate = "${(rate - 20 * 2)}%" - } - arg.volume = "0%" - arg.format = "audio-24khz-48kbitrate-mono-mp3" + arg.rate = "${norm.normalize(rate!!) - 50}%" + arg.volume = ttsConfig.volumeToPctString() + arg.format = ttsConfig.format + val format = TtsFormatManger.getFormat(arg.format) + Log.d(TAG, "$arg") if (format == null) { Log.e(TAG, "不支持解码此格式: ${arg.format}") + callback!!.start( + 16000, + AudioFormat.ENCODING_PCM_16BIT, 1 + ) + callback.error(TextToSpeech.ERROR_INVALID_REQUEST) return } - Log.e(TAG, "${arg.rate}") - callback?.start(format.HZ, format.BitRate.toInt(), 1) + callback?.start(format.hz, format.bitRate, 1) try { val audio = tts_server_lib.Tts_server_lib.getCreationAudio(arg) - Log.e(TAG, "获取成功, size: ${audio.size}") - - doDecode(callback!!, "", audio) + if (audio != null) { + Log.i(TAG, "获取音频成功, size: ${audio.size}") + doDecode(callback!!, "", audio) + } else { + callback?.error() + } } catch (e: Exception) { e.printStackTrace() } } - var isSynthesizing = false + private fun reNewWakeLock() { + if (!mWakeLock.isHeld) { + mWakeLock.acquire(60 * 20 * 1000) + GcManger.doGC() + Log.i(TAG, "刷新WakeLock 20分钟") + } + } + + private val currentMime: String? = null private var mediaCodec: MediaCodec? = null private var oldMime: String? = null @@ -101,7 +202,8 @@ class TtsService : TextToSpeechService() { if (mediaCodec == null || mime != oldMime) { if (null != mediaCodec) { mediaCodec!!.release() -// GcManger.getInstance().doGC() + GcManger.doGC() + System.gc() } try { mediaCodec = MediaCodec.createDecoderByType(mime) @@ -142,23 +244,18 @@ class TtsService : TextToSpeechService() { mime = trackFormat.getString(MediaFormat.KEY_MIME) if (!TextUtils.isEmpty(mime) && mime!!.startsWith("audio")) { audioTrackIndex = i - Log.d(TAG, "找到音频流的索引为:$audioTrackIndex") - Log.d(TAG, "找到音频流的mime为:$mime") + Log.d(TAG, "找到音频流的index:$audioTrackIndex, mime:$mime") break } } //没有找到音频流的情况下 if (audioTrackIndex == -1) { Log.e(TAG, "initAudioDecoder: 没有找到音频流") -// updateNotification("TTS服务-错误中", "没有找到音频流") cb.done() isSynthesizing = false return } - //Log.e("Track", trackFormat.toString()); - - //opus的音频必须设置这个才能正确的解码 /* if ("audio/opus" == mime) { //Log.d(TAG, ByteString.of(trackFormat.getByteBuffer("csd-0")).hex()); @@ -262,6 +359,7 @@ class TtsService : TextToSpeechService() { mediaCodec.reset() cb.done() isSynthesizing = false + Log.d(TAG, "播放完毕") } catch (e: Exception) { Log.e(TAG, "doDecode", e) cb.error() @@ -287,4 +385,12 @@ class TtsService : TextToSpeechService() { isSynthesizing = false } + inner class MyReceiver : BroadcastReceiver() { + override fun onReceive(context: Context?, intent: Intent?) { + if (intent?.action == TtsSettingsActivity.ACTION_ON_CONFIG_CHANGED) { + ttsConfig.loadConfig(this@TtsService) + } + } + } + } \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/CreationVoicesItem.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/CreationVoicesItem.kt new file mode 100644 index 000000000..40b1f2df8 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/CreationVoicesItem.kt @@ -0,0 +1,76 @@ +package com.github.jing332.tts_server_android.service.tts.data + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +//@Serializable +//class CreationVoices : List() + +@Serializable +data class CreationVoicesItem( + @SerialName("id") val id: String, + @SerialName("locale") val locale: String, + @SerialName("name") val name: String, + @SerialName("properties") val properties: Properties, + @SerialName("shortName") val shortName: String, + @SerialName("voiceType") val voiceType: String +// @SerialName("categories") +// val categories: List, +// @SerialName("masterpieces") +// val masterpieces: List, +// @SerialName("samples") +// val samples: Samples, +) + +@Serializable +data class Properties( + @SerialName("DisplayName") val displayName: String, + @SerialName("DisplayVoiceName") val displayVoiceName: String, + @SerialName("LocalName") val localName: String, + @SerialName("LocaleDescription") val localeDescription: String, + @SerialName("VoiceRoleNames") val voiceRoleNames: String, + @SerialName("VoiceStyleNames") val voiceStyleNames: String + +// @SerialName("ExpressAsRoleIdDefinitions") +// val expressAsRoleIdDefinitions: String, +// @SerialName("ExpressAsStyleIdDefinitions") +// val expressAsStyleIdDefinitions: String, +// @SerialName("FrontendVoiceType") +// val frontendVoiceType: String, +// @SerialName("Gender") +// val gender: String, +// @SerialName("VoiceStyleNameDefinitions") +// val voiceStyleNameDefinitions: String, +// @SerialName("PreviewSentence") +// val previewSentence: String, +// @SerialName("ReleaseScope") +// val releaseScope: String, +// @SerialName("SampleRateHertz") +// val sampleRateHertz: String, +// @SerialName("SecondaryLocales") +// val secondaryLocales: String, +// @SerialName("ShortName") +// val shortName: String, +// @SerialName("VoiceModelKind") +// val voiceModelKind: String, +// @SerialName("VoiceRoleNameDefinitions") +// val voiceRoleNameDefinitions: String, +) +// +//@Serializable +//data class Samples( +// @SerialName("languageSamples") +// val languageSamples: List, +// @SerialName("roleSamples") +// val roleSamples: List, +// @SerialName("styleSamples") +// val styleSamples: List +//) +// +//@Serializable +//data class StyleSample( +// @SerialName("audioFileEndpointWithSas") +// val audioFileEndpointWithSas: String, +// @SerialName("styleName") +// val styleName: String +//) \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/EdgeVoicesItem.kt b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/EdgeVoicesItem.kt new file mode 100644 index 000000000..752dcb6d3 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/service/tts/data/EdgeVoicesItem.kt @@ -0,0 +1,22 @@ +package com.github.jing332.tts_server_android.service.tts.data + + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +data class EdgeVoicesItem ( + @SerialName("FriendlyName") + val friendlyName: String, + @SerialName("Locale") + val locale: String, + @SerialName("Name") + val name: String, + @SerialName("ShortName") + val shortName: String, +// @SerialName("Gender") +// val gender: String, +// @SerialName("Status") +// val status: String, +// @SerialName("SuggestedCodec") +// val suggestedCodec: String, +) \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/ui/MainActivity.kt b/app/src/main/java/com/github/jing332/tts_server_android/ui/MainActivity.kt index b532ce711..71db0c258 100644 --- a/app/src/main/java/com/github/jing332/tts_server_android/ui/MainActivity.kt +++ b/app/src/main/java/com/github/jing332/tts_server_android/ui/MainActivity.kt @@ -34,7 +34,7 @@ import com.github.jing332.tts_server_android.utils.SharedPrefsUtils class MainActivity : AppCompatActivity() { companion object { - val TAG = "MainActivity" + const val TAG = "MainActivity" } lateinit var etPort: EditText diff --git a/app/src/main/java/com/github/jing332/tts_server_android/ui/TtsSettingsActivity.kt b/app/src/main/java/com/github/jing332/tts_server_android/ui/TtsSettingsActivity.kt new file mode 100644 index 000000000..05d250766 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/ui/TtsSettingsActivity.kt @@ -0,0 +1,220 @@ +package com.github.jing332.tts_server_android.ui + +import android.annotation.SuppressLint +import android.content.Intent +import android.os.Bundle +import android.provider.Settings +import android.view.View +import android.widget.* +import androidx.appcompat.app.AppCompatActivity +import com.github.jing332.tts_server_android.databinding.ActivityTtsSettingsBinding +import com.github.jing332.tts_server_android.service.tts.TtsConfig +import com.github.jing332.tts_server_android.service.tts.TtsFormatManger +import com.github.jing332.tts_server_android.service.tts.TtsOutputFormat +import com.github.jing332.tts_server_android.service.tts.data.CreationVoicesItem +import kotlinx.coroutines.DelicateCoroutinesApi +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import kotlinx.serialization.decodeFromString +import kotlinx.serialization.json.Json +import java.io.File +import tts_server_lib.* + + +class TtsSettingsActivity : AppCompatActivity() { + companion object { + const val TAG = "TtsActivity" + const val ACTION_ON_CONFIG_CHANGED = "action_on_config_changed" + } + + private lateinit var binding: ActivityTtsSettingsBinding + + private val spinnerApiAdapter: ArrayAdapter by lazy { + val array = ArrayAdapter( + this, + android.R.layout.simple_list_item_1 + ) + array.addAll(arrayListOf("edge", "azure", "creation")) + return@lazy array + } + private val spinnerLanguageAdapter: ArrayAdapter by lazy { + return@lazy ArrayAdapter( + this, + android.R.layout.simple_list_item_1 + ) + } + private val spinnerVoiceAdapter: ArrayAdapter by lazy { + return@lazy ArrayAdapter( + this, + android.R.layout.simple_list_item_1 + ) + } + private val spinnerFormatAdapter: ArrayAdapter by lazy { + return@lazy ArrayAdapter( + this, + android.R.layout.simple_list_item_1 + ) + } + + lateinit var voicesData: List /* 全部数据 */ + val currentVoices = arrayListOf() /* 当前语言的voice列表 */ + lateinit var ttsConfig: TtsConfig + + + @SuppressLint("SetTextI18n") + @OptIn(DelicateCoroutinesApi::class) + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + binding = ActivityTtsSettingsBinding.inflate(layoutInflater) + setContentView(binding.root) + + + ttsConfig = TtsConfig().loadConfig(this) + binding.seekBarVolume.progress = ttsConfig.volume + binding.tvCurrentVolume.text = ttsConfig.volumeToPctString() + + binding.seekBarVolume.setOnSeekBarChangeListener(object : SeekBar.OnSeekBarChangeListener { + override fun onProgressChanged(seekBar: SeekBar?, progress: Int, fromUser: Boolean) { + binding.tvCurrentVolume.text = + "${binding.seekBarVolume.progress - 50}%" + } + + override fun onStartTrackingTouch(seekBar: SeekBar?) {} + override fun onStopTrackingTouch(seekBar: SeekBar?) {} + }) + + /* {保存更改} 按钮 */ + binding.btnSave.setOnClickListener { + val item = currentVoices[binding.spinnerVoice.selectedItemPosition] + ttsConfig.api = binding.spinnerApi.selectedItemPosition + ttsConfig.voiceName = item.shortName + ttsConfig.voiceId = item.id + ttsConfig.format = binding.spinnerForamt.selectedItem.toString() + ttsConfig.volume = binding.seekBarVolume.progress + ttsConfig.locale = binding.spinnerLanguage.selectedItem.toString() + ttsConfig.writeConfig(this) + // 广播更改消息到service以重新加载配置 + sendBroadcast(Intent(ACTION_ON_CONFIG_CHANGED)) + } + + /* {TTS设置} 按钮 */ + binding.btnOpenTtsSettings.setOnClickListener { + openTtsSettings() + } + + binding.spinnerApi.onItemSelectedListener = object : AdapterView.OnItemSelectedListener { + override fun onItemSelected( + parent: AdapterView<*>?, + view: View?, + position: Int, + id: Long + ) { + when (position) { + TtsOutputFormat.API_EDGE -> { + GlobalScope.launch { + val data = Tts_server_lib.getEdgeVoices() + + } + } + TtsOutputFormat.API_AZURE -> { + + } + TtsOutputFormat.API_CREATION -> { + + } + } + } + + override fun onNothingSelected(parent: AdapterView<*>?) {} + } + + /* 语言选择变动 */ + binding.spinnerLanguage.onItemSelectedListener = + object : AdapterView.OnItemSelectedListener { + override fun onItemSelected( + adapterView: AdapterView<*>?, view: View?, i: Int, l: Long + ) { + val locale = binding.spinnerLanguage.selectedItem.toString() + spinnerVoiceAdapter.clear() + currentVoices.clear() + voicesData.forEach { item -> + if (locale == item.locale) { + currentVoices.add(item) + } + } + currentVoices.sortBy { return@sortBy it.shortName } + for ((index, v) in currentVoices.withIndex()) { + spinnerVoiceAdapter.add(v.shortName) + if (ttsConfig.voiceName == v.shortName) { + binding.spinnerVoice.setSelection(index) + } + } + } + + override fun onNothingSelected(adapterView: AdapterView<*>?) {} + } + + + /* 接口 */ + binding.spinnerApi.adapter = spinnerApiAdapter + /* 语言 */ + binding.spinnerLanguage.adapter = spinnerLanguageAdapter + /* 发音人 */ + binding.spinnerVoice.adapter = spinnerVoiceAdapter + /* 音频格式 */ + spinnerFormatAdapter.addAll(TtsFormatManger.getFormatsBySupportedApi(ttsConfig.api)) + binding.spinnerForamt.adapter = spinnerFormatAdapter + + val tmpLanguageList = arrayListOf() + GlobalScope.launch { + try { + var data: ByteArray + if (File("${cacheDir}/creation_voices.json").exists()) { /* 从缓存中读取 */ + data = File("${cacheDir}/creation_voices.json").readBytes() + } else { /* 从微软服务器获取 */ + data = Tts_server_lib.getCreationVoices() + File("${cacheDir}/creation_voices.json").writeBytes(data) + } + + voicesData = Json { ignoreUnknownKeys = true }.decodeFromString( + String(data) + ) + + voicesData.forEach { item -> + if (!tmpLanguageList.contains(item.locale)) { + tmpLanguageList.add(item.locale) + } + + } + tmpLanguageList.sort() + runOnUiThread { + for ((i, v) in tmpLanguageList.withIndex()) { + spinnerLanguageAdapter.add(v) + if (ttsConfig.locale == v) { /* 设置选中 */ + binding.spinnerLanguage.setSelection(i) + } + } + } + } catch (e: Exception) { + e.printStackTrace() + } + } + } + + private fun updateConfigUI() { +// binding.spinnerLanguage.forEach { view -> } +// for (spinnerLanguageAdapter.count) + for (i in 0..spinnerLanguageAdapter.count) { + spinnerLanguageAdapter.getItem(i) + } + } + + /* 打开系统TTS设置 */ + private fun openTtsSettings() { + val intent = Intent(Settings.ACTION_ACCESSIBILITY_SETTINGS) + intent.action = "com.android.settings.TTS_SETTINGS" + intent.flags = Intent.FLAG_ACTIVITY_NEW_TASK + this.startActivity(intent) + } +} + diff --git a/app/src/main/java/com/github/jing332/tts_server_android/utils/GcManager.kt b/app/src/main/java/com/github/jing332/tts_server_android/utils/GcManager.kt new file mode 100644 index 000000000..a7425d797 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/utils/GcManager.kt @@ -0,0 +1,21 @@ +package com.github.jing332.tts_server_android.utils + +import android.os.SystemClock + + +object GcManger { + var last: Long = 0 + + /** + * 避免频繁GC + */ + @Synchronized + fun doGC() { + if (SystemClock.elapsedRealtime() - last > 10000) { + Runtime.getRuntime().gc() + last = SystemClock.elapsedRealtime() + } + + } + +} diff --git a/app/src/main/java/com/github/jing332/tts_server_android/utils/NormUtil.kt b/app/src/main/java/com/github/jing332/tts_server_android/utils/NormUtil.kt new file mode 100644 index 000000000..61ad9f301 --- /dev/null +++ b/app/src/main/java/com/github/jing332/tts_server_android/utils/NormUtil.kt @@ -0,0 +1,25 @@ +package com.github.jing332.tts_server_android.utils + +/** + * Construct the normalization utility, allow the normalization range to be specified. + * @param dataHigh The high value for the input data. + * @param dataLow The low value for the input data. + * @param dataHigh The high value for the normalized data. + * @param dataLow The low value for the normalized data. + */ +class NormUtil( + var dataHigh: Float, + var dataLow: Float, + var normalizedHigh: Float, + var normalizedLow: Float +) { + + /** + * Normalize x. + * @param x The value to be normalized. + * @return The result of the normalization. + */ + fun normalize(x: Float): Float { + return ((x - dataLow) / (dataHigh - dataLow)) * normalizedHigh - normalizedLow + normalizedLow + } +} \ No newline at end of file diff --git a/app/src/main/java/com/github/jing332/tts_server_android/utils/SharedPrefsUtils.kt b/app/src/main/java/com/github/jing332/tts_server_android/utils/SharedPrefsUtils.kt index 624c3e2bd..156560c2c 100644 --- a/app/src/main/java/com/github/jing332/tts_server_android/utils/SharedPrefsUtils.kt +++ b/app/src/main/java/com/github/jing332/tts_server_android/utils/SharedPrefsUtils.kt @@ -2,21 +2,16 @@ package com.github.jing332.tts_server_android.utils import android.content.Context -class SharedPrefsUtils { - companion object { - @JvmStatic - fun getWakeLock(ctx: Context): Boolean { - val pref = ctx.getSharedPreferences("config", Context.MODE_PRIVATE) - return pref.getBoolean("wakeLock", false) - } - - @JvmStatic - fun setWakeLock(ctx: Context, isWakeLock: Boolean) { - val editor = ctx.getSharedPreferences("config", Context.MODE_PRIVATE).edit() - editor.putBoolean("wakeLock", isWakeLock) - editor.apply() - } +object SharedPrefsUtils { + fun getWakeLock(ctx: Context): Boolean { + val pref = ctx.getSharedPreferences("config", Context.MODE_PRIVATE) + return pref.getBoolean("wakeLock", false) + } + fun setWakeLock(ctx: Context, isWakeLock: Boolean) { + val editor = ctx.getSharedPreferences("config", Context.MODE_PRIVATE).edit() + editor.putBoolean("wakeLock", isWakeLock) + editor.apply() } diff --git a/app/src/main/res/layout/activity_tts_settings.xml b/app/src/main/res/layout/activity_tts_settings.xml new file mode 100644 index 000000000..455ddca57 --- /dev/null +++ b/app/src/main/res/layout/activity_tts_settings.xml @@ -0,0 +1,131 @@ + + + + + + + + + + + +