Skip to content

Commit

Permalink
Handle is_final and endpointing together with utterance end
Browse files Browse the repository at this point in the history
  • Loading branch information
DamienDeepgram committed Apr 16, 2024
1 parent d2ade7b commit 773460c
Showing 1 changed file with 42 additions and 10 deletions.
52 changes: 42 additions & 10 deletions examples/node-live/index.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
const {
createClient,
LiveTranscriptionEvents,
LiveTranscriptionEvent,
} = require("../../dist/main/index");
const { createClient, LiveTranscriptionEvents } = require("../../dist/main/index");
const fetch = require("cross-fetch");

const live = async () => {
const url = "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service";

const deepgram = createClient(process.env.DEEPGRAM_API_KEY);

// We will collect the is_final=true messages here so we can use them when the person finishes speaking
let is_finals = [];

const connection = deepgram.listen.live({
model: "nova-2",
utterance_end_ms: 1500,
language: "en-US",
// Apply smart formatting to the output
smart_format: true,
// To get UtteranceEnd, the following must be set:
interim_results: true,
utterance_end_ms: 1000,
vad_events: true,
// Time in milliseconds of silence to wait for before finalizing speech
endpointing: 300,
});

connection.on(LiveTranscriptionEvents.Open, () => {
Expand All @@ -22,19 +28,45 @@ const live = async () => {
});

connection.on(LiveTranscriptionEvents.Metadata, (data) => {
console.log(data);
console.log(`Deepgram Metadata: ${data}`);
});

connection.on(LiveTranscriptionEvents.Transcript, (data) => {
console.log(data.channel);
const sentence = data.channel.alternatives[0].transcript;

// Ignore empty transcripts
if (sentence.length == 0) {
return;
}
if (data.is_final) {
// We need to collect these and concatenate them together when we get a speech_final=true
// See docs: https://developers.deepgram.com/docs/understand-endpointing-interim-results
is_finals.push(sentence);

// Speech final means we have detected sufficent silence to consider this end of speech
// Speech final is the lowest latency result as it triggers as soon an the endpointing value has triggered
if (data.speech_final) {
const utterance = is_finals.join(" ");
console.log(`Speech Final: ${utterance}`);
is_finals = [];
} else {
// These are useful if you need real time captioning and update what the Interim Results produced
console.log(`Is Final: ${sentence}`);
}
} else {
// These are useful if you need real time captioning of what is being spoken
console.log(`Interim Results: ${sentence}`);
}
});

connection.on(LiveTranscriptionEvents.UtteranceEnd, (data) => {
console.log(data);
const utterance = is_finals.join(" ");
console.log(`Deepgram UtteranceEnd: ${utterance}`);
is_finals = [];
});

connection.on(LiveTranscriptionEvents.SpeechStarted, (data) => {
console.log(data);
// console.log("Deepgram SpeechStarted");
});

connection.on(LiveTranscriptionEvents.Error, (err) => {
Expand Down

0 comments on commit 773460c

Please sign in to comment.