added processEOS and resolved issue #335 with unit test

Apra-Labs · Mar 1, 2024 · 4ab2e39 · 4ab2e39
1 parent 5358310
commit 4ab2e39
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 4 deletions.
diff --git a/base/include/AudioToTextXForm.h b/base/include/AudioToTextXForm.h
@@ -49,6 +49,8 @@ class AudioToTextXForm : public Module
  bool validateOutputPins();
  void addInputPin(framemetadata_sp& metadata, string& pinId);
  bool handlePropsChange(frame_sp& frame);
+ bool processEOS(string &pinId);
+ bool handleFlushingBuffer();
 
 private:
  void setMetadata(framemetadata_sp& metadata);

diff --git a/base/src/AudioToTextXForm.cpp b/base/src/AudioToTextXForm.cpp
@@ -162,8 +162,7 @@ bool AudioToTextXForm::process(frame_container& frames)
  }
 
  if (mDetail->mInputAudioBuffer.size() < mDetail->mProps.bufferSize) {
- sendEOS();
- return true;
+ return handleFlushingBuffer();
  }
  whisper_full(
  mDetail->mWhisperContext,
@@ -221,4 +220,18 @@ void AudioToTextXForm::setProps(AudioToTextXFormProps& props)
  throw AIPException(AIP_FATAL, "Model Path dynamic change not handled");
  }
  Module::addPropsToQueue(props);
+}
+
+bool AudioToTextXForm::processEOS(string &pinId)
+{
+ mDetail->mInputAudioBuffer.clear();
+ return true;
+}
+
+bool AudioToTextXForm::handleFlushingBuffer()
+{
+ mDetail->mInputAudioBuffer.clear();
+ LOG_ERROR << "Flushed Buffer Successfully...\n";
+ Module::sendEOS();
+ return true;
 }
diff --git a/base/test/audioToTextXform_tests.cpp b/base/test/audioToTextXform_tests.cpp
@@ -224,9 +224,9 @@ BOOST_AUTO_TEST_CASE(change_unsupported_prop_asr)
  BOOST_CHECK_THROW(asr->setProps(propschange), std::runtime_error);
 }
 
-BOOST_AUTO_TEST_CASE(checkEOS_asr)
+BOOST_AUTO_TEST_CASE(check_eos_frame_asr)
 {
- std::vector<std::string> asrOutText = { "./data/asr_out.txt" };
+ std::vector<std::string> asrOutText = { "./data/asr_check_eos_frame.txt" };
  Test_Utils::FileCleaner f(asrOutText);
 
  Logger::setLogLevel(boost::log::trivial::severity_level::info);
@@ -273,4 +273,63 @@ BOOST_AUTO_TEST_CASE(checkEOS_asr)
  in_file_text.close();
 }
 
+BOOST_AUTO_TEST_CASE(check_flushed_buffer_asr)
+{
+ std::vector<std::string> asrOutText = { "./data/asr_flushed_buffer.txt" };
+ Test_Utils::FileCleaner f(asrOutText);
+
+ Logger::setLogLevel(boost::log::trivial::severity_level::info);
+
+ // This is a PCM file without WAV header
+ auto fileReaderProps = FileReaderModuleProps("./data/audioToTextXform_test.pcm");
+ fileReaderProps.readLoop = true;
+ auto fileReader = boost::shared_ptr<FileReaderModule>(new FileReaderModule(fileReaderProps));
+ auto metadata = framemetadata_sp(new FrameMetadata(FrameMetadata::AUDIO));
+ auto pinId = fileReader->addOutputPin(metadata);
+
+ auto asr = boost::shared_ptr<AudioToTextXForm>(new AudioToTextXForm(AudioToTextXFormProps(
+ AudioToTextXFormProps::DecoderSamplingStrategy::GREEDY
+ ,"./data/whisper/models/ggml-tiny.en-q8_0.bin",160000)));
+ fileReader->setNext(asr);
+
+ auto outputFile = boost::shared_ptr<FileWriterModule>(new FileWriterModule(FileWriterModuleProps(asrOutText[0], false)));
+ asr->setNext(outputFile);
+
+ auto sink = boost::shared_ptr<ExternalSinkModule>(new ExternalSinkModule());
+ asr->setNext(sink);
+
+ BOOST_TEST(fileReader->init());
+ BOOST_TEST(asr->init());
+ BOOST_TEST(outputFile->init());
+ BOOST_TEST(sink->init());
+
+ fileReader->step();
+ asr->step();
+
+ auto frames = sink->pop();
+ auto eosframe = frames.begin()->second;
+ BOOST_TEST(eosframe->isEOS());
+
+ outputFile->step();
+
+ AudioToTextXFormProps propschange = asr->getProps();
+ propschange.bufferSize = 18000;
+ asr->setProps(propschange);
+
+ for (int i = 0; i < 2; i++) {
+ fileReader->step();
+ asr->step();
+ }
+ outputFile->step();
+
+ std::ifstream in_file_text(asrOutText[0]);
+ std::ostringstream buffer;
+ buffer << in_file_text.rdbuf();
+ std:string output = " The Matic speech recognition also known as ASR is the use of machine learning or artificial intelligence technology to process human speech into readable text.";
+ double thres = 0.95;
+ BOOST_TEST(cosineSimilarity(buffer.str(), output) >= thres);
+ // BOOST_TEST(buffer.str() == output);
+ in_file_text.close();
+}
+
 BOOST_AUTO_TEST_SUITE_END()