Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(faster-whisper): add backend #4666

Merged
merged 1 commit into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ ARG TARGETARCH
ARG TARGETVARIANT

ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"


RUN apt-get update && \
Expand Down Expand Up @@ -414,6 +414,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/parler-tts \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/parler-tts \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/diffusers \
; fi
Expand Down
13 changes: 11 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -533,10 +533,10 @@ protogen-go-clean:
$(RM) bin/*

.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen faster-whisper-protogen

.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean faster-whisper-protogen-clean

.PHONY: autogptq-protogen
autogptq-protogen:
Expand Down Expand Up @@ -570,6 +570,14 @@ diffusers-protogen:
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean

.PHONY: faster-whisper-protogen
faster-whisper-protogen:
$(MAKE) -C backend/python/faster-whisper protogen

.PHONY: faster-whisper-protogen-clean
faster-whisper-protogen-clean:
$(MAKE) -C backend/python/faster-whisper protogen-clean

.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
Expand Down Expand Up @@ -641,6 +649,7 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/faster-whisper
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/mamba
$(MAKE) -C backend/python/rerankers
Expand Down
20 changes: 20 additions & 0 deletions backend/python/faster-whisper/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.DEFAULT_GOAL := install

.PHONY: install
install:
bash install.sh
$(MAKE) protogen

.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py

.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py

backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh

.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__
94 changes: 94 additions & 0 deletions backend/python/faster-whisper/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3
"""
This is an extra gRPC server of LocalAI for Bark TTS
"""
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc

from faster_whisper import WhisperModel

import grpc


_ONE_DAY_IN_SECONDS = 60 * 60 * 24

# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)

# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
BackendServicer is the class that implements the gRPC service
"""
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
device = "cpu"
# Get device
# device = "cuda" if request.CUDA else "cpu"
if request.CUDA:
device = "cuda"

try:
print("Preparing models, please wait", file=sys.stderr)
self.model = WhisperModel(request.Model, device=device, compute_type="float16")
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service
# Replace this with your desired response
return backend_pb2.Result(message="Model loaded successfully", success=True)

def AudioTranscription(self, request, context):
resultSegments = []
text = ""
try:
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
id = 0
for segment in segments:
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
text += segment.text
id += 1
except Exception as err:
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)

return backend_pb2.TranscriptResult(segments=resultSegments, text=text)

def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)

# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)

# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()

serve(args.addr)
14 changes: 14 additions & 0 deletions backend/python/faster-whisper/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
set -e

source $(dirname $0)/../common/libbackend.sh

# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi

installRequirements
6 changes: 6 additions & 0 deletions backend/python/faster-whisper/protogen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -e

source $(dirname $0)/../common/libbackend.sh

python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
8 changes: 8 additions & 0 deletions backend/python/faster-whisper/requirements-cpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
torch==2.4.1
optimum-quanto
9 changes: 9 additions & 0 deletions backend/python/faster-whisper/requirements-cublas11.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
optimum-quanto
8 changes: 8 additions & 0 deletions backend/python/faster-whisper/requirements-cublas12.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
torch==2.4.1
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
optimum-quanto
3 changes: 3 additions & 0 deletions backend/python/faster-whisper/requirements-hipblas.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
faster-whisper
6 changes: 6 additions & 0 deletions backend/python/faster-whisper/requirements-intel.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu
optimum[openvino]
faster-whisper
3 changes: 3 additions & 0 deletions backend/python/faster-whisper/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
grpcio==1.69.0
protobuf
grpcio-tools
4 changes: 4 additions & 0 deletions backend/python/faster-whisper/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
source $(dirname $0)/../common/libbackend.sh

startBackend $@
6 changes: 6 additions & 0 deletions backend/python/faster-whisper/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -e

source $(dirname $0)/../common/libbackend.sh

runUnittests
Loading