Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech integration #17

Merged
merged 22 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f3ac8d2
moved speech tools to a tools folder
TheStrgamer Oct 22, 2024
2a9a3fa
updated requirements-txt
TheStrgamer Oct 22, 2024
59a885a
moved speech to text files to core
TheStrgamer Oct 22, 2024
48af413
Added js function for voice button
TheStrgamer Oct 24, 2024
294c7df
beginning to add voice
WilliamMRS Oct 24, 2024
0630709
added flask server as main.py
WilliamMRS Oct 24, 2024
44115b0
Moved speech to new folder
TheStrgamer Oct 24, 2024
0364036
Added thing to main, don't know what
TheStrgamer Oct 24, 2024
4cff87b
Merge branch 'stt-integration' into speech_integration
TheStrgamer Oct 24, 2024
66fd456
feat: changed folders
WilliamMRS Oct 24, 2024
0579353
fix: stt not quite working yet. CC'd from compose
WilliamMRS Oct 24, 2024
a8b3af3
Merge branch 'main' into speech_integration
WilliamMRS Oct 27, 2024
422632d
Feat: Added main and req.txt
WilliamMRS Nov 4, 2024
efe1d18
Added communication between jarvis and stt module
TheStrgamer Nov 5, 2024
6272d76
Merge branch 'speech_integration' of https://github.com/CogitoNTNU/ja…
TheStrgamer Nov 5, 2024
840d788
moved recording to js. it no work tho
TheStrgamer Nov 5, 2024
8771a78
Fixed audio input, it is wierd though
TheStrgamer Nov 7, 2024
686946f
made max length for recording, so it does not go out of control
TheStrgamer Nov 7, 2024
57856bc
Added webm to gitignore
TheStrgamer Nov 7, 2024
1386fa8
Made button red while recording
TheStrgamer Nov 7, 2024
8ea0240
Fixed bug where audio is appended instead of replaced
TheStrgamer Nov 9, 2024
9187515
Merge branch 'main' into speech_integration
WilliamMRS Nov 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -147,5 +147,6 @@ dmypy.json
#wav files
*.wav

*.webm
#calender json
/core/tools/calendarjson
3 changes: 0 additions & 3 deletions core/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy the current directory contents into the container at /app
COPY . .

# Make port 8000 available to the world outside this container
EXPOSE 3001

# Run app.py when the container launches
CMD ["python","-u", "main.py"]
#CMD ["gunicorn", "--worker-class", "eventlet", "-w", "1", "-b", "0.0.0.0:8000", "app:app"]
59 changes: 56 additions & 3 deletions core/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
import asyncio
from modules.user_data_setup import check_folders
from modules.chat import read_chat
import requests
import logging
log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR) #INFO, DEBUG, WARNING, ERROR, or CRITICAL - config as needed during development.
log.setLevel(logging.ERROR)
from time import sleep
from collections import defaultdict

#
Expand Down Expand Up @@ -50,7 +52,7 @@ def hello_world():
# Route to get metadata like name, id, descriptions of all user chats
@app.route("/chats/metadata")
def get_chats():
return "lmao"
return "lmao" # Why does this return lmao?

@app.route('/vectorize_chat', methods=['POST'])
def summarize_store():
Expand Down Expand Up @@ -133,6 +135,56 @@ async def run_and_store():
print(f'Something very bad happened: {e}')
return jsonify({"status": "error"})

# Custom event. Fired when the user click the button with the cute little microphone icon.
@app.route('/start_recording', methods=['POST'])
def start_recording_route():
data = request.json
conversation_id = data.get('conversation_id')

print("Starting recording...")

# Send POST request to the recorder to start recording
headers = {'Content-Type': 'application/json'}
response = requests.post(f'http://speech-to-text:3001/start_recording/{conversation_id}', headers=headers, json=data)

if response.status_code != 200:
return jsonify({"status": "error", "text": "Failed to start recording"}), 500

return jsonify({"status": "recording_started"}), 200


@socketio.on('start_recording')
def start_recording_socket(data):
# This function handles the socket event to start recording
conversation_id = data.get('conversation_id')

print("Starting recording via socket...")

# Send POST request to the recorder to start recording
headers = {'Content-Type': 'application/json'}
response = requests.post(f'http://speech-to-text:3001/start_recording/{conversation_id}', headers=headers, json=data)

if response.status_code != 200:
socketio.emit('recording_failed', {"status": "error", "text": "Failed to start recording"})
return

socketio.emit('recording_started', {"status": "recording_started"})

@app.route('/recording_completed', methods=['POST'])
def recording_completed():
data = request.json
text = data.get('text', '')
socketio.emit("recording", text)

conversation_id = data.get('conversation_id', '')
print(f"Recording completed for conversation ID {conversation_id} with text:", text)

# Process the recorded text as needed (e.g., send to Jarvis or other services)
asyncio.run(jarvis.run(text, socketio)) # Assuming jarvis.run is asynchronous

return jsonify({"status": "success"}), 200


@socketio.on('get_chat_history')
def get_chat_history():
session_id = request.sid
Expand All @@ -143,4 +195,5 @@ def get_chat_history():
if __name__ == '__main__':
socketio.run(app, debug=True, host='0.0.0.0', port=PORT, allow_unsafe_werkzeug=True)

# hello
# hello
# TODO say hello back to whoever wrote this
10 changes: 10 additions & 0 deletions core/static/chat.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ sendMessage = () => {
}
}

addRecordedMessage = (message) => {
let chat_history = document.getElementById("chat_history")
if (message != "") {
addUserMessage(marked.parse(message))
chat_history.scrollTop = chat_history.scrollHeight;
}

}


addStreamedChunk = (messagePart) => {
if(state.activeAIMessage){
state.activeAIMessage.innerHTML += messagePart; // Append to innertext of the message
Expand Down
8 changes: 8 additions & 0 deletions core/static/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ body {
font-size: 24px;
margin-left: 12px;
}
#voice_button_recording {
width: 10%;
height: 9vh;
background-color: #673636;
border-radius: 10px;
font-size: 24px;

}

.chat_input_container{
display: flex;
Expand Down
1 change: 1 addition & 0 deletions core/static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<script src="/static/ui_elements/settings.js"></script>
<script src="/static/ui_elements/chatHistoryList.js"></script>
<script src="/static/chat.js"></script>
<script src="static/recording.js"></script>
<script src="/static/socketEvents.js"></script>
<script src="/static/index.js" defer></script>
<script src="/static/eventlisteners.js"></script>
Expand Down
47 changes: 28 additions & 19 deletions core/static/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,23 @@ Main js file for loading the dynamic UI elements.
*/

// Runs on inital startup, after window (html) has finished loading
init = () => {
document.getElementById("send_button").addEventListener("click", sendMessage);
document.getElementById("clear_log").addEventListener("click", clear_log);

document.querySelector(".chatHistory").innerHTML += chatHistoryList();

// To hide settings page when clicking somewhere else after it's opened.
document.addEventListener("click", function (event) {
const settings = document.getElementById("settingsPage");
const settingsButton = document.getElementById("settingsButton");
if (
!settings.contains(event.target) &&
!settingsButton.contains(event.target) &&
settings.style.display == "block"
) {
settingsPage();
}
});
};
init = () => {
document.getElementById('send_button').addEventListener('click', sendMessage)
document.getElementById('clear_log').addEventListener('click', clear_log)

document.getElementById('voice_button').addEventListener('click', startRecording)

document.querySelector(".chatHistory").innerHTML += chatHistoryList()

// To hide settings page when clicking somewhere else after it's opened.
document.addEventListener('click', function(event){
const settings = document.getElementById("settingsPage");
const settingsButton = document.getElementById("settingsButton");
if(!settings.contains(event.target) && !settingsButton.contains(event.target) && settings.style.display=="block") {
settingsPage()
}
});
}
window.onload = init;

// global state of the UI
Expand Down Expand Up @@ -107,6 +105,17 @@ async function addToolResponseToProcessContainer(toolResponse) {
let processesContainer = document.querySelector(".processesContainer");
processesContainer.scrollTop = processesContainer.scrollHeight;
}
async function addStreamedRecording(uuid, messagePart) {
let element = document.getElementById(uuid);

if (element == null) {
await addRecordedMessage(messagePart, uuid);
element = document.getElementById(uuid);
} else {
// Concat ChatPart on message with uuid
element.innerHTML += messagePart;
}
}

addUserMessage = (message) => {
let html = /*html*/ `
Expand Down
7 changes: 7 additions & 0 deletions core/static/recording.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
startRecording = () => {
document.getElementById('voice_button').style.backgroundColor = "#673636"; // Change button color to indicate recording
document.getElementById('voice_button').enabled = false; // Disable button while recording
const payload = {conversation_id: state.activeConversationId}
let res = socket.emit('start_recording', payload)
console.log("Recording started");
}
13 changes: 13 additions & 0 deletions core/static/socketEvents.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ socket.on("chunk", async (chunk) => {
await addStreamedMessage(uuid, chunk);
});

socket.on("recording", async (recording)=>{
if(!state.activeAIMessage){
console.log("RECIVED MESSAGE")
document.getElementById('voice_button').style.backgroundColor = ""; // Change button color to indicate recording
document.getElementById('voice_button').enabled = true; // Disable button while recording
uuid = generateUUID();
await addStreamedRecording(uuid, "");
ai_message = document.getElementById(uuid)
state.activeAIMessage = ai_message
}
await addStreamedRecording(uuid, recording );
})

socket.on("tokens", async (tokens) => {
state.totalTokensUsed += tokens;
console.log("Total tokens so far:", state.totalTokensUsed);
Expand Down
21 changes: 21 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
version: '2.1'

services:
llm-service:
build: ./core
Expand All @@ -21,7 +23,26 @@ services:
stop_signal: SIGINT
ports:
- "3000:3000"
deploy:
resources:
limits:
cpus: '0.5'
memory: 2048M # Memory limit for the compose

speech-to-text:
build: ./speechToText
restart: unless-stopped
environment:
FLASK_ENV: ${FLASK_ENV} # Autorestarts flask when code changes are detected
OPENAI_API_KEY: ${OPENAI_API_KEY}
PORT_STT: ${PORT_STT}
volumes:
- ./speechToText:/app # Mount the application code to detect live changes
networks:
- backend
stop_signal: SIGINT
ports:
- "3001:3001"

networks:
backend:
Expand Down
Binary file removed docs/images/enter_server_url.png
Binary file not shown.
Binary file removed docs/images/select_kernel.png
Binary file not shown.
Empty file removed speech/Dockerfile
Empty file.
Empty file removed speech/requirements.txt
Empty file.
32 changes: 32 additions & 0 deletions speechToText/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Use an official Python runtime as a parent image
FROM python:3.10-bookworm

# Set the working directory in the container
WORKDIR /app
RUN apt-get update && apt-get install -y \
portaudio19-dev \
libasound2-dev \
libpulse-dev \
sox \
libsox-dev \
&& rm -rf /var/lib/apt/lists/*

RUN apt-get update && apt-get install -y ffmpeg






# Copy only requrirements to keep cache.
COPY requirements.txt requirements.txt

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Run app.py when the container launches
CMD ["python","-u", "main.py"]
#CMD ["gunicorn", "--worker-class", "eventlet", "-w", "1", "-b", "0.0.0.0:8000", "app:app"]
File renamed without changes.
File renamed without changes.
Loading
Loading