Skip to content

Commit

Permalink
Pronunciation assessment with stream for C++ language (Azure-Samples#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jinshan1979 authored Feb 22, 2023
1 parent ded1cc5 commit 1d87a37
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 0 deletions.
6 changes: 6 additions & 0 deletions samples/cpp/windows/console/samples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ extern void SpeechContinuousRecognitionWithPullStream();
extern void SpeechContinuousRecognitionWithPushStream();
extern void KeywordTriggeredSpeechRecognitionWithMicrophone();
extern void PronunciationAssessmentWithMicrophone();
extern void PronunciationAssessmentWithStream();
extern void SpeechContinuousRecognitionFromDefaultMicrophoneWithMASEnabled();
extern void SpeechRecognitionFromMicrophoneWithMASEnabledAndPresetGeometrySpecified();
extern void SpeechContinuousRecognitionFromMultiChannelFileWithMASEnabledAndCustomGeometrySpecified();
Expand Down Expand Up @@ -99,6 +100,7 @@ void SpeechSamples()
" Microsoft Audio Stack enabled.\n";
cout << "d.) Speech recognition from push stream with Microsoft Audio Stack enabled and\n"
" beam-forming angles specified.\n";
cout << "e.) Pronunciation assessment with stream.\n";
cout << "\nChoice (0 for MAIN MENU): ";
cout.flush();

Expand Down Expand Up @@ -150,6 +152,10 @@ void SpeechSamples()
case 'd':
SpeechContinuousRecognitionFromPushStreamWithMASEnabledAndBeamformingAnglesSpecified();
break;
case 'E':
case 'e':
PronunciationAssessmentWithStream();
break;
case '0':
break;
}
Expand Down
3 changes: 3 additions & 0 deletions samples/cpp/windows/console/samples/samples.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,14 @@
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>$(MSBuildThisFileDirectory)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalUsingDirectories>
</AdditionalUsingDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SpeechNativeLibDir)\Microsoft.CognitiveServices.Speech.core.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
Expand Down
69 changes: 69 additions & 0 deletions samples/cpp/windows/console/samples/speech_recognition_samples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <nlohmann/json.hpp>
#include <fstream>
#include "wav_file_reader.h"
#include <vector>
#include <future>

using namespace std;
using namespace Microsoft::CognitiveServices::Speech;
Expand Down Expand Up @@ -613,6 +615,73 @@ void PronunciationAssessmentWithMicrophone()
}
}

void PronunciationAssessmentWithStreamInternalAsync(shared_ptr<SpeechConfig> speechConfig, std::string referenceText, std::vector<uint8_t> audioData, std::promise<int> resultReceived, std::vector<std::string>& resultContainer)
{
auto audioFormat = AudioStreamFormat::GetWaveFormatPCM(16000, 16, 1); // This need be set based on the format of the given audio data
auto audioInputStream = AudioInputStream::CreatePushStream(audioFormat);
auto audioConfig = AudioConfig::FromStreamInput(audioInputStream);
auto speechRecognizer = SpeechRecognizer::FromConfig(speechConfig, audioConfig);

// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
auto pronAssessmentConfig = PronunciationAssessmentConfig::Create(referenceText, PronunciationAssessmentGradingSystem::HundredMark, PronunciationAssessmentGranularity::Phoneme, false);
pronAssessmentConfig->ApplyTo(speechRecognizer);

audioInputStream->Write(audioData.data(), static_cast<uint32_t>(audioData.size()));
audioInputStream->Write(nullptr, 0);// send a zero-size chunk to signal the end of stream

auto result = speechRecognizer->RecognizeOnceAsync().get();
if (result->Reason == ResultReason::Canceled)
{
auto cancellationDetails = CancellationDetails::FromResult(result);
std::cout << cancellationDetails->ErrorDetails << std::endl;
}
else
{
auto responsePA = PronunciationAssessmentResult::FromResult(result);
std::string responseResult = "PRONUNCIATION ASSESSMENT RESULTS : \n";
responseResult = responseResult + " Accuracy score: " + std::to_string(responsePA->AccuracyScore) + ", Pronunciation score: " + std::to_string(responsePA->PronunciationScore) + ", Completeness score : " + std::to_string(responsePA->CompletenessScore) + ", FluencyScore: " + std::to_string(responsePA->FluencyScore);

resultContainer.push_back(responseResult);
}

resultReceived.set_value(1);
}

// Pronunciation assessment with audio stream input.
// See more information at https://aka.ms/csspeech/pa
void PronunciationAssessmentWithStream()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
auto config = SpeechConfig::FromSubscription("YourSubscriptionKey", "YourServiceRegion");

// Read audio data from file. In real scenario this can be from memory or network
std::ifstream file("whatstheweatherlike.wav", std::ios::binary | std::ios::ate);
auto audioDataWithHeaderSize = file.tellg();
file.seekg(46);
auto audioData = std::vector<uint8_t>(static_cast<size_t>(audioDataWithHeaderSize) - static_cast<size_t>(46));

file.read((char*)audioData.data(), audioData.size());

std::promise<int> resultReceived;
std::future<int> futureResult = resultReceived.get_future();
std::vector<std::string> resultContainer;

auto startTime = std::chrono::high_resolution_clock::now();

auto task = std::async(std::launch::async, PronunciationAssessmentWithStreamInternalAsync, config, "what's the weather like", audioData, std::move(resultReceived), std::ref(resultContainer));

int result = futureResult.get();
const auto& resultPA = resultContainer[0];

auto endTime = std::chrono::high_resolution_clock::now();

std::cout << resultPA << std::endl;

auto timeCost = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
std::cout << "Time cost: " << timeCost << "ms" << std::endl;
}

#pragma region Language Detection related samples

void SpeechRecognitionAndLanguageIdWithMicrophone()
Expand Down

0 comments on commit 1d87a37

Please sign in to comment.