generated from robotology/yarp-device-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The device works but no test is available at the moment. It needs to be implemented
- Loading branch information
1 parent
ea6274e
commit 32480ef
Showing
8 changed files
with
360 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# SPDX-FileCopyrightText: 2023 Istituto Italiano di Tecnologia (IIT) | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
|
||
yarp_prepare_plugin(googleSpeechTranscription | ||
CATEGORY device | ||
TYPE GoogleSpeechTranscription | ||
INCLUDE GoogleSpeechTranscription.h | ||
INTERNAL ON | ||
) | ||
|
||
find_package(google_cloud_cpp_speech REQUIRED) | ||
|
||
if(NOT SKIP_googleSpeechTranscription) | ||
yarp_add_plugin(yarp_googleSpeechTranscription) | ||
|
||
target_sources(yarp_googleSpeechTranscription | ||
PRIVATE | ||
GoogleSpeechTranscription.cpp | ||
GoogleSpeechTranscription.h | ||
) | ||
|
||
target_link_libraries(yarp_googleSpeechTranscription | ||
PRIVATE | ||
YARP::YARP_os | ||
YARP::YARP_sig | ||
YARP::YARP_dev | ||
google-cloud-cpp::speech | ||
) | ||
|
||
yarp_install( | ||
TARGETS yarp_googleSpeechTranscription | ||
EXPORT yarp-device-googleSpeechTranscription | ||
COMPONENT yarp-device-googleSpeechTranscription | ||
LIBRARY DESTINATION ${YARP_DYNAMIC_PLUGINS_INSTALL_DIR} | ||
ARCHIVE DESTINATION ${YARP_STATIC_PLUGINS_INSTALL_DIR} | ||
YARP_INI DESTINATION ${YARP_PLUGIN_MANIFESTS_INSTALL_DIR} | ||
) | ||
|
||
if(YARP_COMPILE_TESTS) | ||
add_subdirectory(tests) | ||
add_subdirectory(demos) | ||
endif() | ||
|
||
set_property(TARGET yarp_googleSpeechTranscription PROPERTY FOLDER "Plugins/Device") | ||
endif() |
123 changes: 123 additions & 0 deletions
123
src/devices/googleSpeechTranscription/GoogleSpeechTranscription.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/* | ||
* SPDX-FileCopyrightText: 2023 Istituto Italiano di Tecnologia (IIT) | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*/ | ||
|
||
#ifndef _USE_MATH_DEFINES | ||
#define _USE_MATH_DEFINES | ||
#endif | ||
|
||
#include "GoogleSpeechTranscription.h" | ||
|
||
#include <yarp/sig/SoundFile.h> | ||
|
||
#include <yarp/os/LogComponent.h> | ||
#include <yarp/os/LogStream.h> | ||
#include <fstream> | ||
|
||
#include <cmath> | ||
|
||
using namespace yarp::os; | ||
using namespace yarp::dev; | ||
|
||
|
||
YARP_LOG_COMPONENT(GOOGLESPEECHTRANSCR, "yarp.googleSpeechTranscription", yarp::os::Log::TraceType); | ||
|
||
|
||
GoogleSpeechTranscription::GoogleSpeechTranscription() | ||
{ | ||
|
||
} | ||
|
||
bool GoogleSpeechTranscription::open(yarp::os::Searchable &config) | ||
{ | ||
if(config.check("__offline")) | ||
{ | ||
m_offline = config.find("__offline").asInt32() == 1; | ||
} | ||
if(!config.check("language_code")) | ||
{ | ||
yCError(GOOGLESPEECHTRANSCR) << "No language code specified"; | ||
|
||
return false; | ||
} | ||
m_sampleRate = config.check("sample_rate_hertz", yarp::os::Value(16000), "sample rate (int)").asInt32(); | ||
m_languageCode = config.find("language_code").asString(); | ||
m_audioConfig.set_language_code(m_languageCode); | ||
m_audioConfig.set_encoding(google::cloud::speech::v1::RecognitionConfig::LINEAR16); | ||
m_audioConfig.set_sample_rate_hertz(m_sampleRate); | ||
m_client = std::make_shared<google::cloud::speech_v1::SpeechClient>(google::cloud::speech_v1::MakeSpeechConnection()); | ||
|
||
return true; | ||
} | ||
|
||
bool GoogleSpeechTranscription::close() | ||
{ | ||
return true; | ||
} | ||
|
||
bool GoogleSpeechTranscription::setLanguage(const std::string& language) | ||
{ | ||
if(language == "auto") | ||
{ | ||
yCError(GOOGLESPEECHTRANSCR) << "The \"auto\" option is not supported by this device"; | ||
|
||
return false; | ||
} | ||
|
||
m_audioConfig.set_language_code(language); | ||
return true; | ||
} | ||
|
||
bool GoogleSpeechTranscription::getLanguage(std::string& language) | ||
{ | ||
language = m_audioConfig.language_code(); | ||
|
||
return true; | ||
} | ||
|
||
bool GoogleSpeechTranscription::transcribe(const yarp::sig::Sound& sound, std::string& transcription, double& score) | ||
{ | ||
transcription=""; | ||
score = 0.0; | ||
|
||
if (sound.getSamples() == 0 || | ||
sound.getChannels() == 0) | ||
{ | ||
yCError(GOOGLESPEECHTRANSCR) << "Invalid Sound sample received"; | ||
return false; | ||
} | ||
|
||
google::cloud::speech::v1::RecognitionAudio audio; | ||
auto rawData_tmp = sound.getNonInterleavedAudioRawData(); | ||
auto rawData = std::vector<short>(rawData_tmp.begin(), rawData_tmp.end()); | ||
audio.set_content((char*)rawData.data(),rawData.size()*2); | ||
|
||
auto response = m_client->Recognize(m_audioConfig,audio); | ||
|
||
if(!response) | ||
{ | ||
yCError(GOOGLESPEECHTRANSCR) << "Could not perform audio transcription:" << response.status().message(); | ||
return false; | ||
} | ||
|
||
yCDebug(GOOGLESPEECHTRANSCR) << "Results size:" << response->results_size(); | ||
for(int i=0; i<response->results_size(); i++) | ||
{ | ||
auto result = response->results(i); | ||
yCDebug(GOOGLESPEECHTRANSCR) << i << "Alternative size:" << result.alternatives_size(); | ||
for(int j=0; j<result.alternatives_size(); j++) | ||
{ | ||
auto alternative = result.alternatives(j); | ||
float tempConf = alternative.confidence(); | ||
yCDebug(GOOGLESPEECHTRANSCR) << "Alternative:" << alternative.SerializeAsString() << "Confidence:" << tempConf; | ||
if(tempConf > score) | ||
{ | ||
score = tempConf; | ||
transcription = alternative.transcript(); | ||
} | ||
} | ||
} | ||
|
||
return true; | ||
} |
91 changes: 91 additions & 0 deletions
91
src/devices/googleSpeechTranscription/GoogleSpeechTranscription.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* | ||
* SPDX-FileCopyrightText: 2023 Istituto Italiano di Tecnologia (IIT) | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*/ | ||
|
||
#ifndef YARP_GOOGLESPEECHTRANSCR_H | ||
#define YARP_GOOGLESPEECHTRANSCR_H | ||
|
||
#include <yarp/dev/DeviceDriver.h> | ||
#include <yarp/dev/ISpeechTranscription.h> | ||
#include <yarp/sig/Sound.h> | ||
#include <yarp/os/Network.h> | ||
#include <algorithm> | ||
#include <memory> | ||
#include <vector> | ||
|
||
#include "google/cloud/speech/v1/speech_client.h" | ||
#include "google/protobuf/repeated_ptr_field.h" | ||
|
||
|
||
/** | ||
* @ingroup dev_impl_other | ||
* | ||
* \section googleSpeechTranscription | ||
* | ||
* \brief `googleSpeechTranscription`: A yarp device for speech transcription using google cloud cpp libraries | ||
* | ||
* Parameters required by this device are: | ||
* | Parameter name | SubParameter | Type | Units | Default Value | Required | Description | Notes | | ||
* |:--------------:|:------------:|:-------:|:--------------:|:-------------:|:--------:|:--------------------------------------------------:|:-----:| | ||
* | language_code | - | string | - | - | Yes | Language for speech synthesis (e.g. "ita", "eng") | | | ||
* | ||
* | ||
* example of xml file with a fake odometer | ||
* | ||
* \code{.unparsed} | ||
* <?xml version="1.0" encoding="UTF-8"?> | ||
* <!DOCTYPE robot PUBLIC "-//YARP//DTD yarprobotinterface 3.0//EN" "http://www.yarp.it/DTD/yarprobotinterfaceV3.0.dtd"> | ||
* <robot name="googleTest" build="2" portprefix="/googleTranscr" xmlns:xi="http://www.w3.org/2001/XInclude"> | ||
* <devices> | ||
* <device name="googleTranscr" type="googleSpeechTranscription"> | ||
* <param name="language_code"> | ||
* it-IT | ||
* </param> | ||
* </device> | ||
* | ||
* <device name="synthWrap" type="speechTranscription_nws_yarp"> | ||
* <action phase="startup" level="5" type="attach"> | ||
* <paramlist name="networks"> | ||
* <elem name="subdeviceGoogle"> | ||
* googleTranscr | ||
* </elem> | ||
* </paramlist> | ||
* </action> | ||
* <action phase="shutdown" level="5" type="detach" /> | ||
* </device> | ||
* </devices> | ||
* </robot> | ||
* \endcode | ||
*/ | ||
|
||
class GoogleSpeechTranscription : | ||
public yarp::dev::DeviceDriver, | ||
public yarp::dev::ISpeechTranscription | ||
{ | ||
public: | ||
GoogleSpeechTranscription(); | ||
GoogleSpeechTranscription(const GoogleSpeechTranscription&) = delete; | ||
GoogleSpeechTranscription(GoogleSpeechTranscription&&) noexcept = delete; | ||
GoogleSpeechTranscription& operator=(const GoogleSpeechTranscription&) = delete; | ||
GoogleSpeechTranscription& operator=(GoogleSpeechTranscription&&) noexcept = delete; | ||
~GoogleSpeechTranscription() override = default; | ||
|
||
// DeviceDriver | ||
bool open(yarp::os::Searchable& config) override; | ||
bool close() override; | ||
|
||
// yarp::dev::ISpeechTranscription | ||
bool setLanguage(const std::string& language="auto") override; | ||
bool getLanguage(std::string& language) override; | ||
bool transcribe(const yarp::sig::Sound& sound, std::string& transcription, double& score) override; | ||
|
||
private: | ||
int m_sampleRate; | ||
bool m_offline{false}; | ||
std::string m_languageCode; | ||
google::cloud::speech::v1::RecognitionConfig m_audioConfig; | ||
std::shared_ptr<google::cloud::speech_v1::SpeechClient> m_client{nullptr}; | ||
}; | ||
|
||
#endif // YARP_GOOGLESPEECHTRANSCR_H |
12 changes: 12 additions & 0 deletions
12
src/devices/googleSpeechTranscription/demos/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# SPDX-FileCopyrightText: 2023-2023 Istituto Italiano di Tecnologia (IIT) | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
|
||
set(appname googleSpeechSynthesizer_demo) | ||
|
||
file(GLOB conf ${CMAKE_CURRENT_SOURCE_DIR}/yarprobotinterface_xml/*.xml) | ||
file(GLOB data ${CMAKE_CURRENT_SOURCE_DIR}/*.wav) | ||
file(GLOB apps ${CMAKE_CURRENT_SOURCE_DIR}/*.xml) | ||
|
||
yarp_install(FILES ${conf} DESTINATION ${YARP_CONTEXTS_INSTALL_DIR}/${appname}) | ||
yarp_install(FILES ${data} DESTINATION ${YARP_CONTEXTS_INSTALL_DIR}/${appname}) | ||
yarp_install(FILES ${apps} DESTINATION ${YARP_APPLICATIONS_INSTALL_DIR}) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# SPDX-FileCopyrightText: 2023 Istituto Italiano di Tecnologia (IIT) | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
|
||
create_device_test (googleSpeechSynthesizer) |
84 changes: 84 additions & 0 deletions
84
src/devices/googleSpeechTranscription/tests/googleSpeechSynthesizer_test.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* SPDX-FileCopyrightText: 2023 Istituto Italiano di Tecnologia (IIT) | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*/ | ||
|
||
#include <yarp/dev/ISpeechSynthesizer.h> | ||
#include <yarp/os/Network.h> | ||
#include <yarp/os/LogStream.h> | ||
#include <yarp/os/ResourceFinder.h> | ||
#include <yarp/dev/PolyDriver.h> | ||
#include <yarp/dev/WrapperSingle.h> | ||
|
||
#include <yarp/sig/Sound.h> | ||
#include <yarp/sig/SoundFile.h> | ||
|
||
#include <catch2/catch_amalgamated.hpp> | ||
#include <harness.h> | ||
|
||
using namespace yarp::dev; | ||
using namespace yarp::os; | ||
|
||
TEST_CASE("dev::googleSpeechSynthesizer_test", "[yarp::dev]") | ||
{ | ||
YARP_REQUIRE_PLUGIN("googleSpeechSynthesizer", "device"); | ||
|
||
Network::setLocalMode(true); | ||
|
||
SECTION("Checking googleSpeechSynthesizer device") | ||
{ | ||
ISpeechSynthesizer* iSynth{nullptr}; | ||
PolyDriver dd; | ||
|
||
//read a test sound file from disk | ||
yarp::sig::Sound snd; | ||
yarp::os::ResourceFinder rf; | ||
|
||
rf.setQuiet(false); | ||
rf.setVerbose(true); | ||
|
||
rf.setDefaultContext("googleSpeechSynthesizer_demo"); | ||
std::string ss = rf.findFile("test_audio.wav"); | ||
CHECK(!ss.empty()); | ||
yarp::sig::file::read(snd,ss.c_str()); | ||
CHECK(snd.getSamples()>0); | ||
|
||
//"Checking opening device" | ||
{ | ||
Property pcfg; | ||
const std::string init_lang{"it-IT"}; | ||
const std::string init_voice{"it-IT-Wavenet-A"}; | ||
pcfg.put("device", "googleSpeechSynthesizer"); | ||
pcfg.put("language_code",init_lang); | ||
pcfg.put("voice_name",init_voice); | ||
pcfg.put("__offline", 1); | ||
REQUIRE(dd.open(pcfg)); | ||
REQUIRE(dd.view(iSynth)); | ||
} | ||
|
||
const std::string lang_to_set{"en-GB"}; | ||
const std::string voice_to_set{"en-GB-Neural2-C"}; | ||
std::string lang_code; | ||
std::string voice_name; | ||
|
||
std::string toSynthesize{"This is a text to speech test"}; | ||
yarp::sig::Sound outputSound; | ||
CHECK(iSynth->setLanguage(lang_to_set)); | ||
CHECK(iSynth->setVoice(voice_to_set)); | ||
CHECK(iSynth->getLanguage(lang_code)); | ||
CHECK(lang_code == lang_to_set); | ||
CHECK(iSynth->getVoice(voice_name)); | ||
CHECK(voice_name == voice_to_set); | ||
|
||
// For the moment being, actually using the API for test only purposes doesn't seem like a good idea since it will waste money | ||
//CHECK(iSynth->synthesize(toSynthesize,outputSound)); | ||
//CHECK(outputSound == snd); | ||
|
||
//"Close all polydrivers and check" | ||
{ | ||
CHECK(dd.close()); | ||
} | ||
} | ||
|
||
Network::setLocalMode(false); | ||
} |