-
Notifications
You must be signed in to change notification settings - Fork 541
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Dart API for Kokoro TTS 1.0 (#1806)
- Loading branch information
1 parent
ae32dfa
commit 35f5ff3
Showing
6 changed files
with
144 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
// Copyright (c) 2025 Xiaomi Corporation | ||
import 'dart:io'; | ||
|
||
import 'package:args/args.dart'; | ||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
|
||
import './init.dart'; | ||
|
||
void main(List<String> arguments) async { | ||
await initSherpaOnnx(); | ||
|
||
final parser = ArgParser() | ||
..addOption('model', help: 'Path to the onnx model') | ||
..addOption('voices', help: 'Path to the voices.bin') | ||
..addOption('tokens', help: 'Path to tokens.txt') | ||
..addOption( | ||
'data-dir', | ||
help: 'Path to espeak-ng-data directory', | ||
defaultsTo: '', | ||
) | ||
..addOption( | ||
'dict-dir', | ||
help: 'Path to dict directory', | ||
defaultsTo: '', | ||
) | ||
..addOption( | ||
'lexicon', | ||
help: 'Path to lexicon files', | ||
defaultsTo: '', | ||
) | ||
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '') | ||
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '') | ||
..addOption('text', help: 'Text to generate TTS for') | ||
..addOption('output-wav', help: 'Filename to save the generated audio') | ||
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
..addOption( | ||
'sid', | ||
help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
defaultsTo: '0', | ||
); | ||
final res = parser.parse(arguments); | ||
if (res['model'] == null || | ||
res['voices'] == null || | ||
res['tokens'] == null || | ||
res['data-dir'] == null || | ||
res['dict-dir'] == null || | ||
res['lexicon'] == null || | ||
res['output-wav'] == null || | ||
res['text'] == null) { | ||
print(parser.usage); | ||
exit(1); | ||
} | ||
final model = res['model'] as String; | ||
final voices = res['voices'] as String; | ||
final tokens = res['tokens'] as String; | ||
final dataDir = res['data-dir'] as String; | ||
final dictDir = res['dict-dir'] as String; | ||
final lexicon = res['lexicon'] as String; | ||
final ruleFsts = res['rule-fsts'] as String; | ||
final ruleFars = res['rule-fars'] as String; | ||
final text = res['text'] as String; | ||
final outputWav = res['output-wav'] as String; | ||
var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
|
||
if (speed == 0) { | ||
speed = 1.0; | ||
} | ||
|
||
final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig( | ||
model: model, | ||
voices: voices, | ||
tokens: tokens, | ||
dataDir: dataDir, | ||
lengthScale: 1 / speed, | ||
dictDir: dictDir, | ||
lexicon: lexicon, | ||
); | ||
|
||
final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
kokoro: kokoro, | ||
numThreads: 1, | ||
debug: true, | ||
); | ||
final config = sherpa_onnx.OfflineTtsConfig( | ||
model: modelConfig, | ||
maxNumSenetences: 1, | ||
ruleFsts: ruleFsts, | ||
ruleFars: ruleFars, | ||
); | ||
|
||
final tts = sherpa_onnx.OfflineTts(config); | ||
final audio = tts.generate(text: text, sid: sid, speed: speed); | ||
tts.free(); | ||
|
||
sherpa_onnx.writeWave( | ||
filename: outputWav, | ||
samples: audio.samples, | ||
sampleRate: audio.sampleRate, | ||
); | ||
print('Saved to $outputWav'); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
dart pub get | ||
|
||
# please visit | ||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
# to download more models | ||
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
rm kokoro-multi-lang-v1_0.tar.bz2 | ||
fi | ||
|
||
dart run \ | ||
./bin/kokoro-zh-en.dart \ | ||
--model ./kokoro-multi-lang-v1_0/model.onnx \ | ||
--voices ./kokoro-multi-lang-v1_0/voices.bin \ | ||
--tokens ./kokoro-multi-lang-v1_0/tokens.txt \ | ||
--data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \ | ||
--dict-dir ./kokoro-multi-lang-v1_0/dict \ | ||
--lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \ | ||
--sid 45 \ | ||
--speed 1.0 \ | ||
--output-wav kokoro-zh-en-45.wav \ | ||
--text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" | ||
|
||
ls -lh *.wav |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters