diff --git a/base/include/AudioToTextXForm.h b/base/include/AudioToTextXForm.h index c95cf796f..07dcbbd95 100644 --- a/base/include/AudioToTextXForm.h +++ b/base/include/AudioToTextXForm.h @@ -49,6 +49,8 @@ class AudioToTextXForm : public Module bool validateOutputPins(); void addInputPin(framemetadata_sp& metadata, string& pinId); bool handlePropsChange(frame_sp& frame); + bool processEOS(string &pinId); + bool handleFlushingBuffer(); private: void setMetadata(framemetadata_sp& metadata); diff --git a/base/src/AudioToTextXForm.cpp b/base/src/AudioToTextXForm.cpp index d84a13073..ce49d9c4a 100644 --- a/base/src/AudioToTextXForm.cpp +++ b/base/src/AudioToTextXForm.cpp @@ -162,8 +162,7 @@ bool AudioToTextXForm::process(frame_container& frames) } if (mDetail->mInputAudioBuffer.size() < mDetail->mProps.bufferSize) { - sendEOS(); - return true; + return handleFlushingBuffer(); } whisper_full( mDetail->mWhisperContext, @@ -221,4 +220,18 @@ void AudioToTextXForm::setProps(AudioToTextXFormProps& props) throw AIPException(AIP_FATAL, "Model Path dynamic change not handled"); } Module::addPropsToQueue(props); +} + +bool AudioToTextXForm::processEOS(string &pinId) +{ + mDetail->mInputAudioBuffer.clear(); + return true; +} + +bool AudioToTextXForm::handleFlushingBuffer() +{ + mDetail->mInputAudioBuffer.clear(); + LOG_ERROR << "Flushed Buffer Successfully...\n"; + Module::sendEOS(); + return true; } \ No newline at end of file diff --git a/base/test/audioToTextXform_tests.cpp b/base/test/audioToTextXform_tests.cpp index b566f5bb2..0e9864464 100644 --- a/base/test/audioToTextXform_tests.cpp +++ b/base/test/audioToTextXform_tests.cpp @@ -224,9 +224,9 @@ BOOST_AUTO_TEST_CASE(change_unsupported_prop_asr) BOOST_CHECK_THROW(asr->setProps(propschange), std::runtime_error); } -BOOST_AUTO_TEST_CASE(checkEOS_asr) +BOOST_AUTO_TEST_CASE(check_eos_frame_asr) { - std::vector asrOutText = { "./data/asr_out.txt" }; + std::vector asrOutText = { "./data/asr_check_eos_frame.txt" }; Test_Utils::FileCleaner f(asrOutText); Logger::setLogLevel(boost::log::trivial::severity_level::info); @@ -273,4 +273,63 @@ BOOST_AUTO_TEST_CASE(checkEOS_asr) in_file_text.close(); } +BOOST_AUTO_TEST_CASE(check_flushed_buffer_asr) +{ + std::vector asrOutText = { "./data/asr_flushed_buffer.txt" }; + Test_Utils::FileCleaner f(asrOutText); + + Logger::setLogLevel(boost::log::trivial::severity_level::info); + + // This is a PCM file without WAV header + auto fileReaderProps = FileReaderModuleProps("./data/audioToTextXform_test.pcm"); + fileReaderProps.readLoop = true; + auto fileReader = boost::shared_ptr(new FileReaderModule(fileReaderProps)); + auto metadata = framemetadata_sp(new FrameMetadata(FrameMetadata::AUDIO)); + auto pinId = fileReader->addOutputPin(metadata); + + auto asr = boost::shared_ptr(new AudioToTextXForm(AudioToTextXFormProps( + AudioToTextXFormProps::DecoderSamplingStrategy::GREEDY + ,"./data/whisper/models/ggml-tiny.en-q8_0.bin",160000))); + fileReader->setNext(asr); + + auto outputFile = boost::shared_ptr(new FileWriterModule(FileWriterModuleProps(asrOutText[0], false))); + asr->setNext(outputFile); + + auto sink = boost::shared_ptr(new ExternalSinkModule()); + asr->setNext(sink); + + BOOST_TEST(fileReader->init()); + BOOST_TEST(asr->init()); + BOOST_TEST(outputFile->init()); + BOOST_TEST(sink->init()); + + fileReader->step(); + asr->step(); + + auto frames = sink->pop(); + auto eosframe = frames.begin()->second; + BOOST_TEST(eosframe->isEOS()); + + outputFile->step(); + + AudioToTextXFormProps propschange = asr->getProps(); + propschange.bufferSize = 18000; + asr->setProps(propschange); + + for (int i = 0; i < 2; i++) { + fileReader->step(); + asr->step(); + } + outputFile->step(); + + std::ifstream in_file_text(asrOutText[0]); + std::ostringstream buffer; + buffer << in_file_text.rdbuf(); + std:string output = " The Matic speech recognition also known as ASR is the use of machine learning or artificial intelligence technology to process human speech into readable text."; + double thres = 0.95; + BOOST_TEST(cosineSimilarity(buffer.str(), output) >= thres); + // BOOST_TEST(buffer.str() == output); + in_file_text.close(); +} + BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file