Skip to content

Commit

Permalink
Add Dart API for Kokoro TTS 1.0 (#1806)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Feb 7, 2025
1 parent ae32dfa commit 35f5ff3
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 2 deletions.
1 change: 1 addition & 0 deletions .github/scripts/test-dart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cd dart-api-examples
pushd tts

echo '----------matcha tts----------'
./run-kokoro-zh-en.sh
./run-kokoro-en.sh
./run-matcha-zh.sh
./run-matcha-en.sh
Expand Down
102 changes: 102 additions & 0 deletions dart-api-examples/tts/bin/kokoro-zh-en.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Copyright (c) 2025 Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
await initSherpaOnnx();

final parser = ArgParser()
..addOption('model', help: 'Path to the onnx model')
..addOption('voices', help: 'Path to the voices.bin')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption(
'data-dir',
help: 'Path to espeak-ng-data directory',
defaultsTo: '',
)
..addOption(
'dict-dir',
help: 'Path to dict directory',
defaultsTo: '',
)
..addOption(
'lexicon',
help: 'Path to lexicon files',
defaultsTo: '',
)
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
..addOption('text', help: 'Text to generate TTS for')
..addOption('output-wav', help: 'Filename to save the generated audio')
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
..addOption(
'sid',
help: 'Speaker ID to select. Used only for multi-speaker TTS',
defaultsTo: '0',
);
final res = parser.parse(arguments);
if (res['model'] == null ||
res['voices'] == null ||
res['tokens'] == null ||
res['data-dir'] == null ||
res['dict-dir'] == null ||
res['lexicon'] == null ||
res['output-wav'] == null ||
res['text'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final voices = res['voices'] as String;
final tokens = res['tokens'] as String;
final dataDir = res['data-dir'] as String;
final dictDir = res['dict-dir'] as String;
final lexicon = res['lexicon'] as String;
final ruleFsts = res['rule-fsts'] as String;
final ruleFars = res['rule-fars'] as String;
final text = res['text'] as String;
final outputWav = res['output-wav'] as String;
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
final sid = int.tryParse(res['sid'] as String) ?? 0;

if (speed == 0) {
speed = 1.0;
}

final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
model: model,
voices: voices,
tokens: tokens,
dataDir: dataDir,
lengthScale: 1 / speed,
dictDir: dictDir,
lexicon: lexicon,
);

final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
kokoro: kokoro,
numThreads: 1,
debug: true,
);
final config = sherpa_onnx.OfflineTtsConfig(
model: modelConfig,
maxNumSenetences: 1,
ruleFsts: ruleFsts,
ruleFars: ruleFars,
);

final tts = sherpa_onnx.OfflineTts(config);
final audio = tts.generate(text: text, sid: sid, speed: speed);
tts.free();

sherpa_onnx.writeWave(
filename: outputWav,
samples: audio.samples,
sampleRate: audio.sampleRate,
);
print('Saved to $outputWav');
}
2 changes: 1 addition & 1 deletion dart-api-examples/tts/run-kokoro-en.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ dart run \
--sid 9 \
--speed 1.0 \
--output-wav kokoro-en-9.wav \
--text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \
--text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

ls -lh *.wav
29 changes: 29 additions & 0 deletions dart-api-examples/tts/run-kokoro-zh-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2
fi

dart run \
./bin/kokoro-zh-en.dart \
--model ./kokoro-multi-lang-v1_0/model.onnx \
--voices ./kokoro-multi-lang-v1_0/voices.bin \
--tokens ./kokoro-multi-lang-v1_0/tokens.txt \
--data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \
--dict-dir ./kokoro-multi-lang-v1_0/dict \
--lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
--sid 45 \
--speed 1.0 \
--output-wav kokoro-zh-en-45.wav \
--text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?"

ls -lh *.wav
2 changes: 2 additions & 0 deletions flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {

@Float()
external double lengthScale;
external Pointer<Utf8> dictDir;
external Pointer<Utf8> lexicon;
}

final class SherpaOnnxOfflineTtsModelConfig extends Struct {
Expand Down
10 changes: 9 additions & 1 deletion flutter/sherpa_onnx/lib/src/tts.dart
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,22 @@ class OfflineTtsKokoroModelConfig {
this.tokens = '',
this.dataDir = '',
this.lengthScale = 1.0,
this.dictDir = '',
this.lexicon = '',
});

@override
String toString() {
return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)';
return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale, dictDir: $dictDir, lexicon: $lexicon)';
}

final String model;
final String voices;
final String tokens;
final String dataDir;
final double lengthScale;
final String dictDir;
final String lexicon;
}

class OfflineTtsModelConfig {
Expand Down Expand Up @@ -166,6 +170,8 @@ class OfflineTts {
c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
c.ref.model.kokoro.dictDir = config.model.kokoro.dictDir.toNativeUtf8();
c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8();

c.ref.model.numThreads = config.model.numThreads;
c.ref.model.debug = config.model.debug ? 1 : 0;
Expand All @@ -181,6 +187,8 @@ class OfflineTts {
calloc.free(c.ref.ruleFsts);
calloc.free(c.ref.model.provider);

calloc.free(c.ref.model.kokoro.lexicon);
calloc.free(c.ref.model.kokoro.dictDir);
calloc.free(c.ref.model.kokoro.dataDir);
calloc.free(c.ref.model.kokoro.tokens);
calloc.free(c.ref.model.kokoro.voices);
Expand Down

0 comments on commit 35f5ff3

Please sign in to comment.