From eff9f6bbcbe776c7be1de851d88d2dd490fd0c9a Mon Sep 17 00:00:00 2001 From: 9728Lin <164290516+9728Lin@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:56:52 +0800 Subject: [PATCH 1/4] Update c-api.h c-api add hotwords to modeling-unit and bpe_vocab --- sherpa-onnx/c-api/c-api.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 1b1b56330..dcd4acf94 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -82,6 +82,12 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { const char *provider; int32_t debug; // true to print debug information of the model const char *model_type; + // Valid values: + // - cjkchar + // - bpe + // - cjkchar+bpe + std::string modeling_unit = "cjkchar"; + std::string bpe_vocab; } SherpaOnnxOnlineModelConfig; /// It expects 16 kHz 16-bit single channel wave format. @@ -383,6 +389,12 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { int32_t debug; const char *provider; const char *model_type; + // Valid values: + // - cjkchar + // - bpe + // - cjkchar+bpe + std::string modeling_unit = "cjkchar"; + std::string bpe_vocab; } SherpaOnnxOfflineModelConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { From 5b83289a4ee65155c4d45efee23782f46f3ff026 Mon Sep 17 00:00:00 2001 From: 9728Lin <164290516+9728Lin@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:20:07 +0800 Subject: [PATCH 2/4] Update c-api.h for hotwords --- sherpa-onnx/c-api/c-api.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index dcd4acf94..ca125c77c 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -86,8 +86,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { // - cjkchar // - bpe // - cjkchar+bpe - std::string modeling_unit = "cjkchar"; - std::string bpe_vocab; + const char * modeling_unit = "cjkchar"; + const char * bpe_vocab; } SherpaOnnxOnlineModelConfig; /// It expects 16 kHz 16-bit single channel wave format. @@ -393,8 +393,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { // - cjkchar // - bpe // - cjkchar+bpe - std::string modeling_unit = "cjkchar"; - std::string bpe_vocab; + const char * modeling_unit = "cjkchar"; + const char * bpe_vocab; } SherpaOnnxOfflineModelConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { From 3fcfb8df84237a3c362504598e91d627c4fe7d45 Mon Sep 17 00:00:00 2001 From: 9728Lin <164290516+9728Lin@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:50:28 +0800 Subject: [PATCH 3/4] Update c-api.h for hotwords --- sherpa-onnx/c-api/c-api.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index ca125c77c..bd9b6a4d4 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -86,8 +86,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { // - cjkchar // - bpe // - cjkchar+bpe - const char * modeling_unit = "cjkchar"; - const char * bpe_vocab; + const char *modeling_unit; + const char *bpe_vocab; } SherpaOnnxOnlineModelConfig; /// It expects 16 kHz 16-bit single channel wave format. @@ -393,8 +393,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { // - cjkchar // - bpe // - cjkchar+bpe - const char * modeling_unit = "cjkchar"; - const char * bpe_vocab; + const char *modeling_unit; + const char *bpe_vocab; } SherpaOnnxOfflineModelConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { From 8e2f359966378c1d6c7a7472e92872c5b0739827 Mon Sep 17 00:00:00 2001 From: 9728Lin <164290516+9728Lin@users.noreply.github.com> Date: Mon, 3 Jun 2024 16:15:29 +0800 Subject: [PATCH 4/4] Update c-api.cc for hotwords --- sherpa-onnx/c-api/c-api.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index f6160deb1..eaf782b92 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -79,6 +79,10 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( SHERPA_ONNX_OR(config->model_config.model_type, ""); recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0); + recognizer_config.model_config.modeling_unit = + SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); + recognizer_config.model_config.bpe_vocab = + SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search"); @@ -357,6 +361,10 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( SHERPA_ONNX_OR(config->model_config.provider, "cpu"); recognizer_config.model_config.model_type = SHERPA_ONNX_OR(config->model_config.model_type, ""); + recognizer_config.model_config.modeling_unit = + SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); + recognizer_config.model_config.bpe_vocab = + SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, "");