Merge branch 'main' of https://github.com/opea-project/GenAIExamples …

…into redefine
opea-project · Sep 19, 2024 · 82c8a46 · 82c8a46
2 parents c78662e + a09395e
commit 82c8a46
Show file tree

Hide file tree

Showing 73 changed files with 711 additions and 7,783 deletions.
diff --git a/AudioQnA/benchmark/accuracy/README.md b/AudioQnA/benchmark/accuracy/README.md
@@ -0,0 +1,51 @@
+# AudioQnA accuracy Evaluation
+
+AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy.
+
+## Dataset
+
+We evaluate the ASR accuracy on the test set of librispeech [dataset](https://huggingface.co/datasets/andreagasparini/librispeech_test_only), which contains 2620 records of audio and texts.
+
+## Metrics
+
+We evaluate the WER (Word Error Rate) metric of the ASR microservice.
+
+## Evaluation
+
+### Launch ASR microservice
+
+Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
+
+```bash
+git clone https://github.com/opea-project/GenAIComps
+cd GenAIComps
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+# change the name of model by editing model_name_or_path you want to evaluate
+docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
+```
+
+### Evaluate
+
+Install dependencies:
+
+```
+pip install -r requirements.txt
+```
+
+Evaluate the performance with the LLM:
+
+```py
+# validate the offline model
+# python offline_evaluate.py
+# validate the online asr microservice accuracy
+python online_evaluate.py
+```
+
+### Performance Result
+
+Here is the tested result for your reference
+|| WER |
+| --- | ---- |
+|whisper-large-v2| 2.87|
+|whisper-large| 2.7 |
+|whisper-medium| 3.45 |
diff --git a/AudioQnA/benchmark/accuracy/local_eval.py b/AudioQnA/benchmark/accuracy/local_eval.py
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import torch
+from datasets import load_dataset
+from evaluate import load
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+MODEL_NAME = "openai/whisper-large-v2"
+
+librispeech_test_clean = load_dataset(
+ "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
+)
+processor = WhisperProcessor.from_pretrained(MODEL_NAME)
+model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
+
+
+def map_to_pred(batch):
+ audio = batch["audio"]
+ input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
+ batch["reference"] = processor.tokenizer._normalize(batch["text"])
+
+ with torch.no_grad():
+ predicted_ids = model.generate(input_features.to(device))[0]
+ transcription = processor.decode(predicted_ids)
+ batch["prediction"] = processor.tokenizer._normalize(transcription)
+ return batch
+
+
+result = librispeech_test_clean.map(map_to_pred)
+
+wer = load("wer")
+print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
diff --git a/AudioQnA/benchmark/accuracy/online_eval.py b/AudioQnA/benchmark/accuracy/online_eval.py
@@ -0,0 +1,56 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import base64
+import json
+
+import requests
+import torch
+from datasets import load_dataset
+from evaluate import load
+from pydub import AudioSegment
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
+
+MODEL_NAME = "openai/whisper-large-v2"
+processor = WhisperProcessor.from_pretrained(MODEL_NAME)
+
+librispeech_test_clean = load_dataset(
+ "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
+)
+
+
+def map_to_pred(batch):
+ batch["reference"] = processor.tokenizer._normalize(batch["text"])
+
+ file_path = batch["file"]
+ # process the file_path
+ pidx = file_path.rfind("/")
+ sidx = file_path.rfind(".")
+
+ file_path_prefix = file_path[: pidx + 1]
+ file_path_suffix = file_path[sidx:]
+ file_path_mid = file_path[pidx + 1 : sidx]
+ splits = file_path_mid.split("-")
+ file_path_mid = f"LibriSpeech/test-clean/{splits[0]}/{splits[1]}/{file_path_mid}"
+
+ file_path = file_path_prefix + file_path_mid + file_path_suffix
+
+ audio = AudioSegment.from_file(file_path)
+ audio.export("tmp.wav")
+ with open("tmp.wav", "rb") as f:
+ test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
+
+ inputs = {"audio": test_audio_base64_str}
+ endpoint = "http://localhost:7066/v1/asr"
+ response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
+
+ result_str = response.json()["asr_result"]
+
+ batch["prediction"] = processor.tokenizer._normalize(result_str)
+ return batch
+
+
+result = librispeech_test_clean.map(map_to_pred)
+
+wer = load("wer")
+print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
diff --git a/AudioQnA/benchmark/accuracy/requirements.txt b/AudioQnA/benchmark/accuracy/requirements.txt
@@ -0,0 +1,8 @@
+datasets
+evaluate
+jiwer
+librosa
+pydub
+soundfile
+torch
+transformers
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
@@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \
 # llm microservice
 curl http://${host_ip}:3007/v1/chat/completions\
  -X POST \
- -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
+ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
  -H 'Content-Type: application/json'
 
 # speecht5 service

diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \
 # llm microservice
 curl http://${host_ip}:3007/v1/chat/completions\
  -X POST \
- -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
+ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
  -H 'Content-Type: application/json'
 
 # speecht5 service

diff --git a/AudioQnA/tests/test_gmc_on_gaudi.sh b/AudioQnA/tests/test_gmc_on_gaudi.sh
@@ -34,7 +34,7 @@ function validate_audioqa() {
  export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
  echo "$CLIENT_POD"
  accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
- byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+ byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
  echo "$byte_str" > $LOG_PATH/curl_audioqa.log
  if [ -z "$byte_str" ]; then
  echo "audioqa failed, please check the logs in ${LOG_PATH}!"

diff --git a/AudioQnA/tests/test_gmc_on_xeon.sh b/AudioQnA/tests/test_gmc_on_xeon.sh
@@ -34,7 +34,7 @@ function validate_audioqa() {
  export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
  echo "$CLIENT_POD"
  accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
- byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+ byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
  echo "$byte_str" > $LOG_PATH/curl_audioqa.log
  if [ -z "$byte_str" ]; then
  echo "audioqa failed, please check the logs in ${LOG_PATH}!"

diff --git a/ChatQnA/README.md b/ChatQnA/README.md
@@ -245,7 +245,9 @@ Refer to the [AI PC Guide](./docker_compose/intel/cpu/aipc/README.md) for instru
 
 Refer to the [Intel Technology enabling for Openshift readme](https://github.com/intel/intel-technology-enabling-for-openshift/blob/main/workloads/opea/chatqna/README.md) for instructions to deploy ChatQnA prototype on RHOCP with [Red Hat OpenShift AI (RHOAI)](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai).
 
-## Consume ChatQnA Service
+## Consume ChatQnA Service with RAG
+
+### Check Service Status
 
 Before consuming ChatQnA Service, make sure the TGI/vLLM service is ready (which takes up to 2 minutes to start).
 
@@ -260,6 +262,23 @@ Consume ChatQnA service until you get the TGI response like below.
 2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected
 ```
 
+### Upload RAG Files (Optional)
+
+To chat with retrieved information, you need to upload a file using `Dataprep` service.
+
+Here is an example of `Nike 2023` pdf.
+
+```bash
+# download pdf file
+wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
+# upload pdf file with dataprep
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+ -H "Content-Type: multipart/form-data" \
+ -F "files=@./nke-10k-2023.pdf"
+```
+
+### Consume Chat Service
+
 Two ways of consuming ChatQnA Service:
 
 1. Use cURL command on terminal