Merge branch 'Samagra-Development:restructure' into restructure

ChatWithPDF · Oct 5, 2023 · 50868ac · 50868ac
2 parents 68ed593 + db7a0e5
commit 50868ac
Show file tree

Hide file tree

Showing 27 changed files with 470 additions and 30 deletions.
diff --git a/config.json b/config.json
@@ -1,5 +1,27 @@
 {
   "models": [
+    {
+      "serviceName": "asr_whisper_en",
+      "modelBasePath": "src/asr/whisper_en/local/.",
+      "apiBasePath": "asr/whisper_en/local/",
+      "containerPort": 8000,
+      "environment": {},
+      "nginx": [],
+      "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"],
+      "constraints": ["node.labels.node_vm_type==gpu"],
+      "build": false
+    }, 
+     {
+      "serviceName": "asr_lang_detect",
+      "modelBasePath": "src/asr/whisper_lang_rec/local/.",
+      "apiBasePath": "asr/whisper_lang_rec/local/",
+      "containerPort": 8000,
+      "environment": {},
+      "nginx": [],
+      "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"],
+      "constraints": ["node.labels.node_vm_type==gpu"],
+      "build": false
+    },    
         {
       "serviceName": "ner",
       "modelBasePath": "src/ner/agri_ner_akai/local/.",
@@ -130,7 +152,7 @@
       },
       "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"],
       "constraints": ["node.labels.node_vm_type==gpu"],
-      "build": true
+      "build": false
     },
     {
       "serviceName": "embedding_instructor",
@@ -154,7 +176,7 @@
       },
       "nginx": [],
       "constraints": ["node.labels.node_vm_type==gpu"],
-      "build": true
+      "build": false
     }
   ]
 }
diff --git a/src/asr/whisper_en/README.md b/src/asr/whisper_en/README.md
diff --git a/src/asr/whisper_en/local/Dockerfile b/src/asr/whisper_en/local/Dockerfile
@@ -0,0 +1,15 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+WORKDIR /app
+
+
+#install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/asr/whisper_en/local/README.md b/src/asr/whisper_en/local/README.md
@@ -0,0 +1,11 @@
+### Testing the model deployment :  
+To run for testing  you can follow the following steps : 
+
+- Git clone the repo
+- Go to current folder location i.e. ``` cd /src/asr/fairseq_mms/local ```
+- Create docker image file and test the api:  
+```
+docker build -t testmodel .
+docker run -p 8000:8000 testmodel
+curl -X POST -F "file=@anorexia.wav"  http://localhost:8000/
+```
diff --git a/src/asr/whisper_en/local/__init__.py b/src/asr/whisper_en/local/__init__.py
@@ -0,0 +1,2 @@
+from .request import ModelRequest
+from .request import Model
diff --git a/src/asr/whisper_en/local/api.py b/src/asr/whisper_en/local/api.py
@@ -0,0 +1,41 @@
+from model import Model
+from request import ModelRequest
+from quart import Quart, request
+from quart_cors import cors  # Import the cors function
+import aiohttp
+import os
+import tempfile
+
+app = Quart(__name__)
+app = cors(app)  # Apply the cors function to your app to enable CORS for all routes
+
+model = None
+
+@app.before_serving
+async def startup():
+    app.client = aiohttp.ClientSession()
+    global model
+    model = Model(app)
+
+@app.route('/', methods=['POST'])
+async def embed():
+    global model
+
+    temp_dir = tempfile.mkdtemp()
+    data = await request.get_json()
+    files = await request.files  # await the coroutine
+    uploaded_file = files.get('file')  # now you can use .get()
+
+    file_path = os.path.join(temp_dir, uploaded_file.name)
+    await uploaded_file.save(file_path)
+
+    req = ModelRequest(wav_file=file_path) 
+    response = await model.inference(req)
+
+    os.remove(file_path)
+    os.rmdir(temp_dir)
+
+    return response
+
+if __name__ == "__main__":
+    app.run()
diff --git a/src/asr/whisper_en/local/model.py b/src/asr/whisper_en/local/model.py
@@ -0,0 +1,41 @@
+import torch
+import torchaudio
+from transformers import pipeline
+from request import ModelRequest
+
+
+class Model():
+    def __new__(cls, context):
+        cls.context = context
+        if not hasattr(cls, 'instance'):
+            cls.instance = super(Model, cls).__new__(cls)
+
+        # Initialize Whisper ASR pipeline
+        device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        cls.pipe = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-tiny.en",
+            chunk_length_s=10,
+            device=device,
+        )
+        return cls.instance
+
+    def transcribe_audio(self, audio_path): 
+        audio_input, sampling_rate = torchaudio.load(audio_path)
+        audio_data = {
+            "array": audio_input.squeeze().numpy(),
+            "sampling_rate": sampling_rate
+        }
+
+        # Get the transcription
+        prediction = self.pipe(audio_data.copy(), batch_size=8)["text"]
+        return prediction
+
+    async def inference(self, request: ModelRequest):
+        transcription = self.transcribe_audio(request.wav_file)
+        if not transcription:
+            transcription = 'Unable to transcribe the audio.'
+        return transcription
+
+
+
diff --git a/src/asr/whisper_en/local/request.py b/src/asr/whisper_en/local/request.py
@@ -0,0 +1,11 @@
+import requests
+import json
+
+
+class ModelRequest():
+    def __init__(self, wav_file):
+        self.wav_file = wav_file
+
+    def to_json(self):
+        return json.dumps(self, default=lambda o: o.__dict__,
+                          sort_keys=True, indent=4)
diff --git a/src/asr/whisper_en/local/requirements.txt b/src/asr/whisper_en/local/requirements.txt
@@ -0,0 +1,7 @@
+torch
+transformers
+quart
+aiohttp
+librosa
+quart-cors
+torchaudio
diff --git a/src/asr/whisper_lang_rec/local/Dockerfile b/src/asr/whisper_lang_rec/local/Dockerfile
@@ -0,0 +1,21 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+WORKDIR /app
+
+# Install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Update aptitude with new repo info, and install FFmpeg
+RUN apt-get update \
+    && apt-get install -y ffmpeg \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/asr/whisper_lang_rec/local/README.md b/src/asr/whisper_lang_rec/local/README.md
@@ -0,0 +1,11 @@
+### Testing the model deployment :  
+To run for testing  you can follow the following steps : 
+
+- Git clone the repo
+- Go to current folder location i.e. ``` cd /src/asr/whisper_lang_rec/local ```
+- Create docker image file and test the api:  
+```
+docker build -t testmodel .
+docker run -p 8000:8000 testmodel
+curl -X POST -F "file=@male.wav" -F "n_seconds=5" http://localhost:8000/
+```
diff --git a/src/asr/whisper_lang_rec/local/__init__.py b/src/asr/whisper_lang_rec/local/__init__.py
@@ -0,0 +1,2 @@
+from .request import ModelRequest
+from .request import Model
diff --git a/src/asr/whisper_lang_rec/local/api.py b/src/asr/whisper_lang_rec/local/api.py
@@ -0,0 +1,45 @@
+from model import Model
+from request import ModelRequest
+from quart import Quart, request
+from quart_cors import cors  # Import the cors function
+import aiohttp
+import os
+import tempfile
+import os 
+
+
+app = Quart(__name__)
+app = cors(app)  # Apply the cors function to your app to enable CORS for all routes
+
+model = None
+
+@app.before_serving
+async def startup():
+    app.client = aiohttp.ClientSession()
+    global model
+    model = Model(app)
+
+@app.route('/', methods=['POST'])
+async def embed():
+    global model
+
+    temp_dir = tempfile.mkdtemp()
+    data = await request.form  
+    files = await request.files  
+    uploaded_file = files.get('file')  
+
+    file_path = os.path.join(temp_dir, uploaded_file.filename) 
+    await uploaded_file.save(file_path)
+
+    n_seconds = int(data.get('n_seconds'))  
+    req = ModelRequest(wav_file=file_path, n_seconds=n_seconds)  
+    response = await model.inference(req)  # Removed n_seconds here
+
+    os.remove(file_path)
+    os.rmdir(temp_dir)
+
+    return response
+
+
+if __name__ == "__main__":
+    app.run()
diff --git a/src/asr/whisper_lang_rec/local/model.py b/src/asr/whisper_lang_rec/local/model.py
@@ -0,0 +1,56 @@
+import torch
+import torchaudio
+import whisper  
+from request import ModelRequest 
+import tempfile
+import os 
+
+class Model():
+    def __new__(cls, context):
+        cls.context = context
+        if not hasattr(cls, 'instance'):
+            cls.instance = super(Model, cls).__new__(cls)
+
+        # Load Whisper model
+        cls.model = whisper.load_model("base")
+        cls.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        cls.model.to(cls.device)
+        return cls.instance
+
+    def trim_audio(self, audio_path, n_seconds):
+        audio, sr = torchaudio.load(audio_path)
+        total_duration = audio.shape[1] / sr  # Total duration of the audio in seconds
+
+        # If the audio duration is less than n_seconds, don't trim the audio
+        if total_duration < n_seconds:
+            print(f"The audio duration ({total_duration:.2f}s) is less than {n_seconds}s. Using the full audio.")
+            return audio, sr
+
+        num_samples = int(n_seconds * sr)
+        audio = audio[:, :num_samples]
+        return audio, sr
+
+    async def inference(self, request: ModelRequest):
+        # The n_seconds is now accessed from the request object
+        n_seconds = request.n_seconds  
+        trimmed_audio, sr = self.trim_audio(request.wav_file, n_seconds)
+
+        # Save the trimmed audio to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:  # Add a file extension
+            torchaudio.save(temp_file.name, trimmed_audio, sr)
+
+            # Process the audio with Whisper
+            audio = whisper.load_audio(temp_file.name)
+            audio = whisper.pad_or_trim(audio)
+
+        # Clean up the temporary file
+        os.unlink(temp_file.name)
+
+        mel = whisper.log_mel_spectrogram(audio).to(self.device)  
+        # Detect the spoken language
+        _, probs = self.model.detect_language(mel) 
+        detected_language = max(probs, key=probs.get)
+
+        return detected_language
+
+
diff --git a/src/asr/whisper_lang_rec/local/request.py b/src/asr/whisper_lang_rec/local/request.py
@@ -0,0 +1,12 @@
+import requests
+import json
+
+
+class ModelRequest():
+    def __init__(self, wav_file,n_seconds):
+        self.wav_file = wav_file
+        self.n_seconds = n_seconds
+
+    def to_json(self):
+        return json.dumps(self, default=lambda o: o.__dict__,
+                          sort_keys=True, indent=4)
diff --git a/src/asr/whisper_lang_rec/local/requirements.txt b/src/asr/whisper_lang_rec/local/requirements.txt
@@ -0,0 +1,8 @@
+torch
+torchaudio
+transformers
+quart
+aiohttp
+librosa
+quart-cors
+openai-whisper
diff --git a/src/embeddings/instructor_gpu/README.md b/src/embeddings/instructor_gpu/README.md
@@ -0,0 +1 @@
+## Instructor model for generating embedding 
diff --git a/src/embeddings/instructor_gpu/local/Dockerfile b/src/embeddings/instructor_gpu/local/Dockerfile
@@ -0,0 +1,15 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+WORKDIR /app
+
+
+#install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/embeddings/instructor_gpu/local/README.md b/src/embeddings/instructor_gpu/local/README.md
@@ -0,0 +1,18 @@
+## Instructor Embedding model:
+
+### Purpose :
+Model to Create Embeddings from given text using Instructor Large model.
+
+### Testing the model deployment :  
+To run for testing just the Hugging Face deployment for grievence recognition, you can follow the following steps : 
+
+- Git clone the repo
+- Go to current folder location i.e. ``` cd src/embeddings/instructor/local ```
+- Create docker image file and test the api:  
+```
+docker build -t testmodel .
+docker run -p 8000:8000 testmodel
+curl -X POST -H "Content-Type: application/json" -d '{"query": "Where is my money? "}' http://localhost:8000/
+
+curl -X POST -F "file=@input.csv"  http://localhost:8000/embeddings/instructor/local -o output.csv
+```
diff --git a/src/embeddings/instructor_gpu/local/__init__.py b/src/embeddings/instructor_gpu/local/__init__.py
@@ -0,0 +1,2 @@
+from .request import *
+from .model import *