-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.js
108 lines (94 loc) · 2.98 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
require("dotenv").config();
require("colors");
const express = require("express");
const ExpressWs = require("express-ws");
const { LanguageModelProcessor } = require("./components/model");
const { Socket } = require("./components/socket");
const { SpeechToText } = require("./components/stt");
const { TextToSpeech } = require("./components/tts");
const { VectorStore } = require("./components/vectorstore");
const app = express();
ExpressWs(app);
const PORT = process.env.PORT || 10000;
app.get("/", (req, res) => {
res.json({ message: "hermes, live!" });
});
app.post("/answer", (req, res) => {
res.status(200);
res.type("text/xml");
res.end(`
<Response>
<Say>Himalayan resturant at Niles, how can I help you?</Say>
<Connect>
<Stream url="wss://${process.env.SERVER}/connection" />
</Connect>
</Response>
`);
});
app.ws("/connection", (ws) => {
ws.on("error", console.error);
let streamSid;
let callSid;
const model = new LanguageModelProcessor();
const socket = new Socket(ws);
const stt = new SpeechToText();
const tts = new TextToSpeech({});
const vectorstore = new VectorStore();
let marks = [];
let interactionCount = 0;
ws.on("message", function message(data) {
const msg = JSON.parse(data);
if (msg.event === "start") {
streamSid = msg.start.streamSid;
callSid = msg.start.callSid;
socket.setStreamSid(streamSid);
console.log(
`Twilio -> Starting Media Stream for ${streamSid}`.underline.red,
);
} else if (msg.event === "media") {
stt.send(msg.media.payload);
} else if (msg.event === "mark") {
const label = msg.mark.name;
console.log(
`Twilio -> Audio completed mark (${msg.sequenceNumber}): ${label}`.red,
);
marks = marks.filter((m) => m !== msg.mark.name);
} else if (msg.event === "stop") {
console.log(`Twilio -> Media stream ${streamSid} ended.`.underline.red);
}
});
stt.on("utterance", async (text) => {
if (marks.length > 0 && text?.length > 5) {
console.log("Twilio -> Interruption, Clearing stream".red);
ws.send(
JSON.stringify({
streamSid,
event: "clear",
}),
);
}
});
stt.on("transcription", async (text) => {
if (!text) {
return;
}
console.log(`Interaction ${interactionCount} – STT -> GPT: ${text}`.yellow);
const relevantDocs = await vectorstore.queryVectorStore(text);
const modelResponse = await model.chat(text, relevantDocs);
const modelReply = {
partialResponseIndex: null,
partialResponse: modelResponse,
};
interactionCount += 1;
tts.generate(modelReply, interactionCount);
});
tts.on("speech", (responseIndex, audio, label, icount) => {
console.log(`Interaction ${icount}: TTS -> TWILIO: ${label}`.blue);
socket.buffer(responseIndex, audio);
});
socket.on("audiosent", (markLabel) => {
marks.push(markLabel);
});
});
app.listen(PORT);
console.log(`Server running on port ${PORT}`);