Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C API for Kokoro TTS models #1717

Merged
merged 2 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/c-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,32 @@ jobs:
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
fi

- name: Test Kokoro TTS (en)
shell: bash
run: |
gcc -o kokoro-tts-en-c-api ./c-api-examples/kokoro-tts-en-c-api.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

./kokoro-tts-en-c-api

rm ./kokoro-tts-en-c-api
rm -rf kokoro-en-*

- uses: actions/upload-artifact@v4
with:
name: kokoro-tts-${{ matrix.os }}
path: ./generated-kokoro-*.wav

- name: Test Matcha TTS (zh)
shell: bash
run: |
Expand Down
27 changes: 27 additions & 0 deletions .github/workflows/cxx-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,33 @@ jobs:
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
fi

- name: Test Kokoro TTS (en)
shell: bash
run: |
g++ -std=c++17 -o kokoro-tts-en-cxx-api ./cxx-api-examples/kokoro-tts-en-cxx-api.cc \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-cxx-api \
-l sherpa-onnx-c-api \
-l onnxruntime

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

./kokoro-tts-en-cxx-api

rm kokoro-tts-en-cxx-api
rm -rf kokoro-en-*

- uses: actions/upload-artifact@v4
with:
name: kokoro-tts-${{ matrix.os }}
path: ./generated-kokoro-*.wav

- name: Test Matcha TTS (zh)
shell: bash
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,4 @@ harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md
matcha-icefall-zh-baker
matcha-icefall-en_US-ljspeech
kokoro-en-v0_19
3 changes: 3 additions & 0 deletions c-api-examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ if(SHERPA_ONNX_ENABLE_TTS)

add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c)
target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api)

add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c)
target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api)
endif()

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
Expand Down
84 changes: 84 additions & 0 deletions c-api-examples/kokoro-tts-en-c-api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// c-api-examples/kokoro-tts-en-c-api.c
//
// Copyright (c) 2025 Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for English TTS with Kokoro.
//
// clang-format off
/*
Usage


wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

./kokoro-tts-en-c-api

*/
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
float progress) {
fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
// return 1 to continue generating
// return 0 to stop generating
return 1;
}

int32_t main(int32_t argc, char *argv[]) {
SherpaOnnxOfflineTtsConfig config;
memset(&config, 0, sizeof(config));
config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";

config.model.num_threads = 2;

// If you don't want to see debug messages, please set it to 0
config.model.debug = 1;

const char *filename = "./generated-kokoro-en.wav";
const char *text =
"Today as always, men fall into two groups: slaves and free men. Whoever "
"does not have two-thirds of his day for himself, is a slave, whatever "
"he may be: a statesman, a businessman, an official, or a scholar. "
"Friends fell out often because life was changing so fast. The easiest "
"thing in the world was to lose touch with someone.";

const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
// mapping of sid to voice name
// 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
// 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
int32_t sid = 0;
float speed = 1.0; // larger -> faster in speech speed

#if 0
// If you don't want to use a callback, then please enable this branch
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
#else
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
ProgressCallback);
#endif

SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
SherpaOnnxDestroyOfflineTts(tts);

fprintf(stderr, "Input text is: %s\n", text);
fprintf(stderr, "Speaker ID is is: %d\n", sid);
fprintf(stderr, "Saved to: %s\n", filename);

return 0;
}
3 changes: 3 additions & 0 deletions cxx-api-examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@ if(SHERPA_ONNX_ENABLE_TTS)

add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc)
target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api)

add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc)
target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api)
endif()
73 changes: 73 additions & 0 deletions cxx-api-examples/kokoro-tts-en-cxx-api.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// cxx-api-examples/kokoro-tts-en-cxx-api.c
//
// Copyright (c) 2025 Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for Chinese TTS with Kokoro.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

./kokoro-tts-en-cxx-api

*/
// clang-format on

#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
float progress, void *arg) {
fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
// return 1 to continue generating
// return 0 to stop generating
return 1;
}

int32_t main(int32_t argc, char *argv[]) {
using namespace sherpa_onnx::cxx; // NOLINT
OfflineTtsConfig config;

config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";

config.model.num_threads = 2;

// If you don't want to see debug messages, please set it to 0
config.model.debug = 1;

std::string filename = "./generated-kokoro-en-cxx.wav";
std::string text =
"Today as always, men fall into two groups: slaves and free men. Whoever "
"does not have two-thirds of his day for himself, is a slave, whatever "
"he may be: a statesman, a businessman, an official, or a scholar. "
"Friends fell out often because life was changing so fast. The easiest "
"thing in the world was to lose touch with someone.";

auto tts = OfflineTts::Create(config);
int32_t sid = 0;
float speed = 1.0; // larger -> faster in speech speed

#if 0
// If you don't want to use a callback, then please enable this branch
GeneratedAudio audio = tts.Generate(text, sid, speed);
#else
GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
#endif

WriteWave(filename, {audio.samples, audio.sample_rate});

fprintf(stderr, "Input text is: %s\n", text.c_str());
fprintf(stderr, "Speaker ID is is: %d\n", sid);
fprintf(stderr, "Saved to: %s\n", filename.c_str());

return 0;
}
12 changes: 12 additions & 0 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1092,6 +1092,18 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
tts_config.model.matcha.dict_dir =
SHERPA_ONNX_OR(config->model.matcha.dict_dir, "");

// kokoro
tts_config.model.kokoro.model =
SHERPA_ONNX_OR(config->model.kokoro.model, "");
tts_config.model.kokoro.voices =
SHERPA_ONNX_OR(config->model.kokoro.voices, "");
tts_config.model.kokoro.tokens =
SHERPA_ONNX_OR(config->model.kokoro.tokens, "");
tts_config.model.kokoro.data_dir =
SHERPA_ONNX_OR(config->model.kokoro.data_dir, "");
tts_config.model.kokoro.length_scale =
SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0);

tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
Expand Down
10 changes: 10 additions & 0 deletions sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -910,12 +910,22 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsMatchaModelConfig {
const char *dict_dir;
} SherpaOnnxOfflineTtsMatchaModelConfig;

SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig {
const char *model;
const char *voices;
const char *tokens;
const char *data_dir;

float length_scale; // < 1, faster in speech speed; > 1, slower in speed
} SherpaOnnxOfflineTtsKokoroModelConfig;

SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
SherpaOnnxOfflineTtsVitsModelConfig vits;
int32_t num_threads;
int32_t debug;
const char *provider;
SherpaOnnxOfflineTtsMatchaModelConfig matcha;
SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
} SherpaOnnxOfflineTtsModelConfig;

SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
Expand Down
6 changes: 6 additions & 0 deletions sherpa-onnx/c-api/cxx-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,12 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) {
c.model.matcha.length_scale = config.model.matcha.length_scale;
c.model.matcha.dict_dir = config.model.matcha.dict_dir.c_str();

c.model.kokoro.model = config.model.kokoro.model.c_str();
c.model.kokoro.voices = config.model.kokoro.voices.c_str();
c.model.kokoro.tokens = config.model.kokoro.tokens.c_str();
c.model.kokoro.data_dir = config.model.kokoro.data_dir.c_str();
c.model.kokoro.length_scale = config.model.kokoro.length_scale;

c.model.num_threads = config.model.num_threads;
c.model.debug = config.model.debug;
c.model.provider = config.model.provider.c_str();
Expand Down
10 changes: 10 additions & 0 deletions sherpa-onnx/c-api/cxx-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,9 +338,19 @@ struct OfflineTtsMatchaModelConfig {
float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed
};

struct OfflineTtsKokoroModelConfig {
std::string model;
std::string voices;
std::string tokens;
std::string data_dir;

float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed
};

struct OfflineTtsModelConfig {
OfflineTtsVitsModelConfig vits;
OfflineTtsMatchaModelConfig matcha;
OfflineTtsKokoroModelConfig kokoro;
int32_t num_threads = 1;
bool debug = false;
std::string provider = "cpu";
Expand Down
Loading